From 4faf8caff54e2aed7d3c955b713a2195b3399183 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 6 May 2026 12:47:30 -0700 Subject: [PATCH 1/2] feat(stt): add param and cleanup test suites --- .github/workflows/ci.yml | 2 - jigsawstack/audio.py | 5 + tests/test_embedding.py | 129 ------------------------- tests/test_sentiment.py | 152 ----------------------------- tests/test_summary.py | 199 -------------------------------------- tests/test_translate.py | 123 ------------------------ tests/test_validate.py | 201 --------------------------------------- tests/test_web.py | 148 ---------------------------- 8 files changed, 5 insertions(+), 954 deletions(-) delete mode 100644 tests/test_sentiment.py delete mode 100644 tests/test_summary.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 67af11c..f178870 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,12 +37,10 @@ jobs: - test_classification.py - test_embedding.py - test_file_store.py - - test_image_generation.py - test_object_detection.py - test_prediction.py - test_sentiment.py - test_sql.py - - test_summary.py - test_translate.py - test_validate.py - test_web.py diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index e1bd74f..62659e4 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -49,6 +49,11 @@ class SpeechToTextParams(TypedDict): the duration of each chunk in seconds, maximum value is 15, defaults to 3 """ + word_timestamps: NotRequired[bool] + """ + When set to true, returns each word as its own entry in the chunks array with its own start and end timestamp. Useful for caption alignment and word-accurate search. Cannot be combined with stream=true. + """ + class ChunkResponse(TypedDict): text: str diff --git a/tests/test_embedding.py b/tests/test_embedding.py index 60acc60..52121e7 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -35,61 +35,6 @@ "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" ) -# Test cases for Embedding V1 -EMBEDDING_V1_TEST_CASES = [ - { - "name": "text_embedding_basic", - "params": { - "type": "text", - "text": SAMPLE_TEXT, - }, - }, - { - "name": "text_embedding_with_truncate", - "params": { - "type": "text", - "text": SAMPLE_TEXT * 100, # Long text to test truncation - "token_overflow_mode": "truncate", - }, - }, - { - "name": "text_embedding_with_error_mode", - "params": { - "type": "text", - "text": SAMPLE_TEXT, - "token_overflow_mode": "error", - }, - }, - { - "name": "image_embedding_from_url", - "params": { - "type": "image", - "url": SAMPLE_IMAGE_URL, - }, - }, - { - "name": "audio_embedding_from_url", - "params": { - "type": "audio", - "url": SAMPLE_AUDIO_URL, - }, - }, - { - "name": "pdf_embedding_from_url", - "params": { - "type": "pdf", - "url": SAMPLE_PDF_URL, - }, - }, - { - "name": "text_other_type", - "params": { - "type": "text-other", - "text": "This is a different text type for embedding", - }, - }, -] - # Test cases for Embedding V2 EMBEDDING_V2_TEST_CASES = [ { @@ -173,80 +118,6 @@ ] -class TestEmbeddingV1Sync: - """Test synchronous Embedding V1 methods""" - - sync_test_cases = EMBEDDING_V1_TEST_CASES - - @pytest.mark.parametrize( - "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] - ) - def test_embedding_v1(self, test_case): - """Test synchronous embedding v1 with various inputs""" - try: - result = jigsaw.embedding(test_case["params"]) - assert result["success"] - assert "embeddings" in result - assert isinstance(result["embeddings"], list) - if "chunks" in result: - assert isinstance(result["chunks"], list) - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - @pytest.mark.parametrize( - "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] - ) - def test_embedding_v1_blob(self, test_case): - """Test synchronous embedding v1 with blob inputs""" - try: - # Download blob content - blob_content = requests.get(test_case["blob_url"]).content - result = jigsaw.embedding(blob_content, test_case["options"]) - assert result["success"] - assert "embeddings" in result - assert isinstance(result["embeddings"], list) - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - -class TestEmbeddingV1Async: - """Test asynchronous Embedding V1 methods""" - - async_test_cases = EMBEDDING_V1_TEST_CASES - - @pytest.mark.parametrize( - "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] - ) - @pytest.mark.asyncio - async def test_embedding_v1_async(self, test_case): - """Test asynchronous embedding v1 with various inputs""" - try: - result = await async_jigsaw.embedding(test_case["params"]) - assert result["success"] - assert "embeddings" in result - assert isinstance(result["embeddings"], list) - if "chunks" in result: - assert isinstance(result["chunks"], list) - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - @pytest.mark.parametrize( - "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] - ) - @pytest.mark.asyncio - async def test_embedding_v1_blob_async(self, test_case): - """Test asynchronous embedding v1 with blob inputs""" - try: - # Download blob content - blob_content = requests.get(test_case["blob_url"]).content - result = await async_jigsaw.embedding(blob_content, test_case["options"]) - assert result["success"] - assert "embeddings" in result - assert isinstance(result["embeddings"], list) - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - class TestEmbeddingV2Sync: """Test synchronous Embedding V2 methods""" diff --git a/tests/test_sentiment.py b/tests/test_sentiment.py deleted file mode 100644 index 71dbea7..0000000 --- a/tests/test_sentiment.py +++ /dev/null @@ -1,152 +0,0 @@ -import logging -import os - -import pytest -from dotenv import load_dotenv - -import jigsawstack -from jigsawstack.exceptions import JigsawStackError - -load_dotenv() - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -jigsaw = jigsawstack.JigsawStack( - api_key=os.getenv("JIGSAWSTACK_API_KEY"), - base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" - if os.getenv("JIGSAWSTACK_BASE_URL") - else "https://api.jigsawstack.com", - headers={"x-jigsaw-skip-cache": "true"}, -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_key=os.getenv("JIGSAWSTACK_API_KEY"), - base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" - if os.getenv("JIGSAWSTACK_BASE_URL") - else "https://api.jigsawstack.com", - headers={"x-jigsaw-skip-cache": "true"}, -) - -TEST_CASES = [ - { - "name": "positive_sentiment_excited", - "params": { - "text": "I am so excited about this new product! It's absolutely amazing and I can't wait to use it every day." - }, - }, - { - "name": "negative_sentiment_disappointed", - "params": { - "text": "I'm really disappointed with this purchase. The quality is terrible and it broke after just one day." - }, - }, - { - "name": "neutral_sentiment_factual", - "params": { - "text": "The meeting is scheduled for 3 PM tomorrow in conference room B." - }, - }, - { - "name": "mixed_sentiment_paragraph", - "params": { - "text": "The product arrived on time which was great. However, the packaging was damaged. The item itself works fine, but the instructions were confusing." - }, - }, - { - "name": "positive_sentiment_love", - "params": { - "text": "I absolutely love this! Best purchase I've made all year. Highly recommend to everyone!" - }, - }, - { - "name": "negative_sentiment_angry", - "params": { - "text": "This is unacceptable! I want a refund immediately. Worst customer service ever!" - }, - }, - { - "name": "single_sentence_positive", - "params": {"text": "This made my day!"}, - }, - { - "name": "single_sentence_negative", - "params": {"text": "I hate this."}, - }, - { - "name": "complex_multi_sentence", - "params": { - "text": "The first part of the movie was boring and I almost fell asleep. But then it got really exciting! The ending was spectacular and now it's one of my favorites." - }, - }, - { - "name": "question_sentiment", - "params": { - "text": "Why is this product so amazing? I can't believe how well it works!" - }, - }, -] - - -class TestSentimentSync: - """Test synchronous sentiment analysis methods""" - - sync_test_cases = TEST_CASES - - @pytest.mark.parametrize( - "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] - ) - def test_sentiment_analysis(self, test_case): - """Test synchronous sentiment analysis with various inputs""" - try: - result = jigsaw.sentiment(test_case["params"]) - - assert result["success"] - assert "sentiment" in result - assert "emotion" in result["sentiment"] - assert "sentiment" in result["sentiment"] - assert "score" in result["sentiment"] - - # Check if sentences analysis is included - if "sentences" in result["sentiment"]: - assert isinstance(result["sentiment"]["sentences"], list) - for sentence in result["sentiment"]["sentences"]: - assert "text" in sentence - assert "sentiment" in sentence - assert "emotion" in sentence - assert "score" in sentence - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - -class TestSentimentAsync: - """Test asynchronous sentiment analysis methods""" - - async_test_cases = TEST_CASES - - @pytest.mark.parametrize( - "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] - ) - @pytest.mark.asyncio - async def test_sentiment_analysis_async(self, test_case): - """Test asynchronous sentiment analysis with various inputs""" - try: - result = await async_jigsaw.sentiment(test_case["params"]) - - assert result["success"] - assert "sentiment" in result - assert "emotion" in result["sentiment"] - assert "sentiment" in result["sentiment"] - assert "score" in result["sentiment"] - - # Check if sentences analysis is included - if "sentences" in result["sentiment"]: - assert isinstance(result["sentiment"]["sentences"], list) - for sentence in result["sentiment"]["sentences"]: - assert "text" in sentence - assert "sentiment" in sentence - assert "emotion" in sentence - assert "score" in sentence - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_summary.py b/tests/test_summary.py deleted file mode 100644 index a71692b..0000000 --- a/tests/test_summary.py +++ /dev/null @@ -1,199 +0,0 @@ -import logging -import os - -import pytest -from dotenv import load_dotenv - -import jigsawstack -from jigsawstack.exceptions import JigsawStackError - -load_dotenv() - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -jigsaw = jigsawstack.JigsawStack( - api_key=os.getenv("JIGSAWSTACK_API_KEY"), - base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" - if os.getenv("JIGSAWSTACK_BASE_URL") - else "https://api.jigsawstack.com", - headers={"x-jigsaw-skip-cache": "true"}, -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_key=os.getenv("JIGSAWSTACK_API_KEY"), - base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" - if os.getenv("JIGSAWSTACK_BASE_URL") - else "https://api.jigsawstack.com", - headers={"x-jigsaw-skip-cache": "true"}, -) - -LONG_TEXT = """ -Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. -From healthcare to finance, transportation to entertainment, AI is reshaping industries and changing the way we live and work. -Machine learning algorithms can now diagnose diseases with remarkable accuracy, predict market trends, and even create art. -Natural language processing has enabled computers to understand and generate human language, leading to the development of sophisticated chatbots and virtual assistants. -Computer vision systems can identify objects, faces, and activities in images and videos with superhuman precision. -However, the rapid advancement of AI also raises important ethical questions about privacy, job displacement, and the potential for bias in algorithmic decision-making. -As we continue to develop more powerful AI systems, it's crucial that we consider their societal impact and work to ensure that the benefits of AI are distributed equitably. -The future of AI holds immense promise, but it will require careful planning, regulation, and collaboration between technologists, policymakers, and society at large to realize its full potential while mitigating its risks. -""" - -ARTICLE_URL = "https://en.wikipedia.org/wiki/Artificial_intelligence" -PDF_URL = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" - -TEST_CASES = [ - { - "name": "text_summary_default", - "params": { - "text": LONG_TEXT, - }, - }, - { - "name": "text_summary_with_text_type", - "params": { - "text": LONG_TEXT, - "type": "text", - }, - }, - { - "name": "text_summary_with_points_type", - "params": { - "text": LONG_TEXT, - "type": "points", - }, - }, - { - "name": "text_summary_with_max_points", - "params": { - "text": LONG_TEXT, - "type": "points", - "max_points": 5, - }, - }, - { - "name": "text_summary_with_max_characters", - "params": { - "text": LONG_TEXT, - "type": "text", - "max_characters": 200, - }, - }, - { - "name": "short_text_summary", - "params": { - "text": "This is a short text that doesn't need much summarization.", - }, - }, - { - "name": "url_summary_default", - "params": { - "url": ARTICLE_URL, - }, - }, - { - "name": "url_summary_with_text_type", - "params": { - "url": ARTICLE_URL, - "type": "text", - }, - }, - { - "name": "url_summary_with_points_type", - "params": { - "url": ARTICLE_URL, - "type": "points", - "max_points": 7, - }, - }, - { - "name": "pdf_url_summary", - "params": { - "url": PDF_URL, - "type": "text", - }, - }, - { - "name": "complex_text_with_points_and_limit", - "params": { - "text": LONG_TEXT * 3, # Triple the text for more content - "type": "points", - "max_points": 10, - }, - }, - { - "name": "technical_text_summary", - "params": { - "text": """ - Machine learning is a subset of artificial intelligence that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience. - Deep learning, a subfield of machine learning, uses artificial neural networks with multiple layers to progressively extract higher-level features from raw input. - Supervised learning involves training models on labeled data, while unsupervised learning discovers patterns in unlabeled data. - Reinforcement learning enables agents to learn optimal behaviors through trial and error interactions with an environment. - """, - "type": "points", - "max_points": 4, - }, - }, -] - - -class TestSummarySync: - """Test synchronous summary methods""" - - sync_test_cases = TEST_CASES - - @pytest.mark.parametrize( - "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] - ) - def test_summary(self, test_case): - """Test synchronous summary with various inputs""" - try: - result = jigsaw.summary(test_case["params"]) - - assert result["success"] - assert "summary" in result - - if test_case["params"].get("type") == "points": - assert isinstance(result["summary"], list) - if "max_points" in test_case["params"]: - assert len(result["summary"]) <= test_case["params"]["max_points"] - else: - assert isinstance(result["summary"], str) - if "max_characters" in test_case["params"]: - assert ( - len(result["summary"]) <= test_case["params"]["max_characters"] - ) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - -class TestSummaryAsync: - """Test asynchronous summary methods""" - - async_test_cases = TEST_CASES - - @pytest.mark.parametrize( - "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] - ) - @pytest.mark.asyncio - async def test_summary_async(self, test_case): - """Test asynchronous summary with various inputs""" - try: - result = await async_jigsaw.summary(test_case["params"]) - - assert result["success"] - assert "summary" in result - - if test_case["params"].get("type") == "points": - assert isinstance(result["summary"], list) - if "max_points" in test_case["params"]: - assert len(result["summary"]) <= test_case["params"]["max_points"] - else: - assert isinstance(result["summary"], str) - if "max_characters" in test_case["params"]: - assert ( - len(result["summary"]) <= test_case["params"]["max_characters"] - ) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_translate.py b/tests/test_translate.py index 4f63615..abcbcc1 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -2,7 +2,6 @@ import os import pytest -import requests from dotenv import load_dotenv import jigsawstack @@ -28,9 +27,6 @@ headers={"x-jigsaw-skip-cache": "true"}, ) -# Sample image URL for translation tests -IMAGE_URL = "https://images.unsplash.com/photo-1580679137870-86ef9f9a03d6?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" - # Text translation test cases TEXT_TEST_CASES = [ { @@ -79,64 +75,6 @@ }, ] -# Image translation test cases -IMAGE_TEST_CASES = [ - { - "name": "translate_image_with_url", - "params": { - "url": IMAGE_URL, - "target_language": "es", - }, - "blob": None, - "options": None, - }, - { - "name": "translate_image_with_blob", - "params": None, - "blob": IMAGE_URL, - "options": { - "target_language": "fr", - }, - }, - { - "name": "translate_image_with_url_return_base64", - "params": { - "url": IMAGE_URL, - "target_language": "de", - "return_type": "base64", - }, - "blob": None, - "options": None, - }, - { - "name": "translate_image_with_blob_return_url", - "params": None, - "blob": IMAGE_URL, - "options": { - "target_language": "ja", - "return_type": "url", - }, - }, - { - "name": "translate_image_with_blob_return_binary", - "params": None, - "blob": IMAGE_URL, - "options": { - "target_language": "zh", - "return_type": "binary", - }, - }, - { - "name": "translate_image_to_italian", - "params": { - "url": IMAGE_URL, - "target_language": "it", - }, - "blob": None, - "options": None, - }, -] - class TestTranslateTextSync: """Test synchronous text translation methods""" @@ -193,64 +131,3 @@ async def test_translate_text_async(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - -class TestTranslateImageSync: - """Test synchronous image translation methods""" - - sync_test_cases = IMAGE_TEST_CASES - - @pytest.mark.parametrize( - "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] - ) - def test_translate_image(self, test_case): - """Test synchronous image translation with various inputs""" - try: - if test_case.get("blob"): - # Download blob content - blob_content = requests.get(test_case["blob"]).content - result = jigsaw.translate.image( - blob_content, test_case.get("options", {}) - ) - else: - # Use params directly - result = jigsaw.translate.image(test_case["params"]) - assert result is not None - if isinstance(result, dict): - assert "url" in result - else: - assert isinstance(result, bytes) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - -class TestTranslateImageAsync: - """Test asynchronous image translation methods""" - - async_test_cases = IMAGE_TEST_CASES - - @pytest.mark.parametrize( - "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] - ) - @pytest.mark.asyncio - async def test_translate_image_async(self, test_case): - """Test asynchronous image translation with various inputs""" - try: - if test_case.get("blob"): - # Download blob content - blob_content = requests.get(test_case["blob"]).content - result = await async_jigsaw.translate.image( - blob_content, test_case.get("options", {}) - ) - else: - # Use params directly - result = await async_jigsaw.translate.image(test_case["params"]) - assert result is not None - if isinstance(result, dict): - assert "url" in result - else: - assert isinstance(result, bytes) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_validate.py b/tests/test_validate.py index 83b50b6..376e6d2 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -34,69 +34,6 @@ ) POTENTIALLY_NSFW_URL = "https://images.unsplash.com/photo-1512310604669-443f26c35f52?q=80&w=868&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" -SPAM_CHECK_TEST_CASES = [ - { - "name": "single_text_not_spam", - "params": { - "text": "I had a great experience with your product. The customer service was excellent!" - }, - }, - { - "name": "single_text_potential_spam", - "params": { - "text": "CLICK HERE NOW!!! FREE MONEY!!! Win $1000000 instantly! No credit card required! Act NOW!" - }, - }, - { - "name": "multiple_texts_mixed", - "params": { - "text": [ - "Thank you for your email. I'll get back to you soon.", - "BUY NOW! LIMITED TIME OFFER! 90% OFF EVERYTHING!", - "The meeting is scheduled for 3 PM tomorrow.", - ] - }, - }, - { - "name": "professional_email", - "params": { - "text": "Dear John, I hope this email finds you well. I wanted to follow up on our discussion from yesterday." - }, - }, - { - "name": "marketing_spam", - "params": { - "text": "Congratulations! You've been selected as our lucky winner! Claim your prize now at this link: bit.ly/win" - }, - }, -] - -# Spell Check Test Cases -SPELL_CHECK_TEST_CASES = [ - { - "name": "text_with_no_errors", - "params": {"text": "The quick brown fox jumps over the lazy dog."}, - }, - { - "name": "text_with_spelling_errors", - "params": {"text": "Thiss sentense has severel speling erors in it."}, - }, - { - "name": "text_with_language_code", - "params": {"text": "I recieved the pacakge yesterday.", "language_code": "en"}, - }, - { - "name": "mixed_correct_and_incorrect", - "params": { - "text": "The weather is beatiful today, but tommorow might be diferent." - }, - }, - { - "name": "technical_text", - "params": {"text": "The algorythm processes the datbase queries eficiently."}, - }, -] - # Profanity Test Cases PROFANITY_TEST_CASES = [ { @@ -149,75 +86,6 @@ ] -class TestSpamCheckSync: - """Test synchronous spam check methods""" - - @pytest.mark.parametrize( - "test_case", - SPAM_CHECK_TEST_CASES, - ids=[tc["name"] for tc in SPAM_CHECK_TEST_CASES], - ) - def test_spam_check(self, test_case): - """Test synchronous spam check with various inputs""" - try: - result = jigsaw.validate.spamcheck(test_case["params"]) - - assert result["success"] - assert "check" in result - - # Check structure based on input type - if isinstance(test_case["params"]["text"], list): - assert isinstance(result["check"], list) - for check in result["check"]: - assert "is_spam" in check - assert "score" in check - assert isinstance(check["is_spam"], bool) - assert 0 <= check["score"] <= 1 - else: - assert "is_spam" in result["check"] - assert "score" in result["check"] - assert isinstance(result["check"]["is_spam"], bool) - assert 0 <= result["check"]["score"] <= 1 - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - -class TestSpellCheckSync: - """Test synchronous spell check methods""" - - @pytest.mark.parametrize( - "test_case", - SPELL_CHECK_TEST_CASES, - ids=[tc["name"] for tc in SPELL_CHECK_TEST_CASES], - ) - def test_spell_check(self, test_case): - """Test synchronous spell check with various inputs""" - try: - result = jigsaw.validate.spellcheck(test_case["params"]) - - assert result["success"] - assert "misspellings_found" in result - assert "misspellings" in result - assert "auto_correct_text" in result - assert isinstance(result["misspellings_found"], bool) - assert isinstance(result["misspellings"], list) - assert isinstance(result["auto_correct_text"], str) - - # Check misspellings structure - for misspelling in result["misspellings"]: - assert "word" in misspelling - assert "startIndex" in misspelling - assert "endIndex" in misspelling - assert "expected" in misspelling - assert "auto_corrected" in misspelling - assert isinstance(misspelling["expected"], list) - assert isinstance(misspelling["auto_corrected"], bool) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - class TestProfanitySync: """Test synchronous profanity check methods""" @@ -305,75 +173,6 @@ def test_nsfw_check_blob(self, test_case): # Async Test Classes -class TestSpamCheckAsync: - """Test asynchronous spam check methods""" - - @pytest.mark.parametrize( - "test_case", - SPAM_CHECK_TEST_CASES, - ids=[tc["name"] for tc in SPAM_CHECK_TEST_CASES], - ) - @pytest.mark.asyncio - async def test_spam_check_async(self, test_case): - """Test asynchronous spam check with various inputs""" - try: - result = await async_jigsaw.validate.spamcheck(test_case["params"]) - - assert result["success"] - assert "check" in result - - # Check structure based on input type - if isinstance(test_case["params"]["text"], list): - assert isinstance(result["check"], list) - for check in result["check"]: - assert "is_spam" in check - assert "score" in check - assert isinstance(check["is_spam"], bool) - assert 0 <= check["score"] <= 1 - else: - assert "is_spam" in result["check"] - assert "score" in result["check"] - assert isinstance(result["check"]["is_spam"], bool) - assert 0 <= result["check"]["score"] <= 1 - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - -class TestSpellCheckAsync: - """Test asynchronous spell check methods""" - - @pytest.mark.parametrize( - "test_case", - SPELL_CHECK_TEST_CASES, - ids=[tc["name"] for tc in SPELL_CHECK_TEST_CASES], - ) - @pytest.mark.asyncio - async def test_spell_check_async(self, test_case): - """Test asynchronous spell check with various inputs""" - try: - result = await async_jigsaw.validate.spellcheck(test_case["params"]) - - assert result["success"] - assert "misspellings_found" in result - assert "misspellings" in result - assert "auto_correct_text" in result - assert isinstance(result["misspellings_found"], bool) - assert isinstance(result["misspellings"], list) - assert isinstance(result["auto_correct_text"], str) - - # Check misspellings structure - for misspelling in result["misspellings"]: - assert "word" in misspelling - assert "startIndex" in misspelling - assert "endIndex" in misspelling - assert "expected" in misspelling - assert "auto_corrected" in misspelling - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - class TestProfanityAsync: """Test asynchronous profanity check methods""" diff --git a/tests/test_web.py b/tests/test_web.py index fad3b83..86533f4 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -31,91 +31,6 @@ headers={"x-jigsaw-skip-cache": "true"}, ) -URL = "https://jigsawstack.com" - -# HTML to Any Test Cases -HTML_TO_ANY_TEST_CASES = [ - { - "name": "html_to_pdf_url", - "params": { - "url": URL, - "type": "pdf", - "return_type": "url", - }, - }, - { - "name": "html_to_png_base64", - "params": { - "url": URL, - "type": "png", - "return_type": "base64", - }, - }, - { - "name": "html_to_jpeg_binary", - "params": { - "url": URL, - "type": "jpeg", - "return_type": "binary", - }, - }, - { - "name": "html_string_to_pdf", - "params": { - "html": "

Test Document

This is a test.

", - "type": "pdf", - "return_type": "url", - }, - }, - { - "name": "html_to_pdf_with_options", - "params": { - "url": URL, - "type": "pdf", - "return_type": "url", - "pdf_display_header_footer": True, - "pdf_print_background": True, - }, - }, - { - "name": "html_to_png_full_page", - "params": { - "url": URL, - "type": "png", - "full_page": True, - "return_type": "url", - }, - }, - { - "name": "html_to_webp_custom_size", - "params": { - "url": URL, - "type": "webp", - "width": 1920, - "height": 1080, - "return_type": "base64", - }, - }, - { - "name": "html_to_png_mobile", - "params": { - "url": URL, - "type": "png", - "is_mobile": True, - "return_type": "url", - }, - }, - { - "name": "html_to_png_dark_mode", - "params": { - "url": URL, - "type": "png", - "dark_mode": True, - "return_type": "url", - }, - }, -] - # Search Test Cases SEARCH_TEST_CASES = [ { @@ -162,37 +77,6 @@ ] -class TestHTMLToAnySync: - """Test synchronous HTML to Any methods""" - - @pytest.mark.parametrize( - "test_case", - HTML_TO_ANY_TEST_CASES, - ids=[tc["name"] for tc in HTML_TO_ANY_TEST_CASES], - ) - def test_html_to_any(self, test_case): - """Test synchronous HTML to Any with various inputs""" - try: - result = jigsaw.web.html_to_any(test_case["params"]) - - return_type = test_case["params"].get("return_type", "url") - - if return_type == "binary": - assert isinstance(result, bytes) - assert len(result) > 0 - else: - assert result["success"] - assert "url" in result - assert isinstance(result["url"], str) - - if return_type == "base64": - # Check if it's a valid base64 string - assert result["url"].startswith("data:") - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - class TestSearchSync: """Test synchronous search methods""" @@ -250,38 +134,6 @@ def test_search_suggestions(self, test_case): # Async Test Classes -class TestHTMLToAnyAsync: - """Test asynchronous HTML to Any methods""" - - @pytest.mark.parametrize( - "test_case", - HTML_TO_ANY_TEST_CASES, - ids=[tc["name"] for tc in HTML_TO_ANY_TEST_CASES], - ) - @pytest.mark.asyncio - async def test_html_to_any_async(self, test_case): - """Test asynchronous HTML to Any with various inputs""" - try: - result = await async_jigsaw.web.html_to_any(test_case["params"]) - - return_type = test_case["params"].get("return_type", "url") - - if return_type == "binary": - assert isinstance(result, bytes) - assert len(result) > 0 - else: - assert result["success"] - assert "url" in result - assert isinstance(result["url"], str) - - if return_type == "base64": - # Check if it's a valid base64 string - assert result["url"].startswith("data:") - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - class TestSearchAsync: """Test asynchronous search methods""" From 7a2bf2e4e0c8236df28cb1e9f1b4714ff43888e1 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 6 May 2026 12:51:25 -0700 Subject: [PATCH 2/2] tests: drop sentiement testcases --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f178870..5cec50a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,7 +39,6 @@ jobs: - test_file_store.py - test_object_detection.py - test_prediction.py - - test_sentiment.py - test_sql.py - test_translate.py - test_validate.py