Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,9 @@ jobs:
- test_classification.py
- test_embedding.py
- test_file_store.py
- test_image_generation.py
- test_object_detection.py
- test_prediction.py
- test_sentiment.py
- test_sql.py
- test_summary.py
- test_translate.py
- test_validate.py
- test_web.py
Expand Down
5 changes: 5 additions & 0 deletions jigsawstack/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ class SpeechToTextParams(TypedDict):
the duration of each chunk in seconds, maximum value is 15, defaults to 3
"""

word_timestamps: NotRequired[bool]
"""
When set to true, returns each word as its own entry in the chunks array with its own start and end timestamp. Useful for caption alignment and word-accurate search. Cannot be combined with stream=true.
"""


class ChunkResponse(TypedDict):
text: str
Expand Down
129 changes: 0 additions & 129 deletions tests/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,61 +35,6 @@
"https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
)

# Test cases for Embedding V1
EMBEDDING_V1_TEST_CASES = [
{
"name": "text_embedding_basic",
"params": {
"type": "text",
"text": SAMPLE_TEXT,
},
},
{
"name": "text_embedding_with_truncate",
"params": {
"type": "text",
"text": SAMPLE_TEXT * 100, # Long text to test truncation
"token_overflow_mode": "truncate",
},
},
{
"name": "text_embedding_with_error_mode",
"params": {
"type": "text",
"text": SAMPLE_TEXT,
"token_overflow_mode": "error",
},
},
{
"name": "image_embedding_from_url",
"params": {
"type": "image",
"url": SAMPLE_IMAGE_URL,
},
},
{
"name": "audio_embedding_from_url",
"params": {
"type": "audio",
"url": SAMPLE_AUDIO_URL,
},
},
{
"name": "pdf_embedding_from_url",
"params": {
"type": "pdf",
"url": SAMPLE_PDF_URL,
},
},
{
"name": "text_other_type",
"params": {
"type": "text-other",
"text": "This is a different text type for embedding",
},
},
]

# Test cases for Embedding V2
EMBEDDING_V2_TEST_CASES = [
{
Expand Down Expand Up @@ -173,80 +118,6 @@
]


class TestEmbeddingV1Sync:
"""Test synchronous Embedding V1 methods"""

sync_test_cases = EMBEDDING_V1_TEST_CASES

@pytest.mark.parametrize(
"test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases]
)
def test_embedding_v1(self, test_case):
"""Test synchronous embedding v1 with various inputs"""
try:
result = jigsaw.embedding(test_case["params"])
assert result["success"]
assert "embeddings" in result
assert isinstance(result["embeddings"], list)
if "chunks" in result:
assert isinstance(result["chunks"], list)
except JigsawStackError as e:
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")

@pytest.mark.parametrize(
"test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES]
)
def test_embedding_v1_blob(self, test_case):
"""Test synchronous embedding v1 with blob inputs"""
try:
# Download blob content
blob_content = requests.get(test_case["blob_url"]).content
result = jigsaw.embedding(blob_content, test_case["options"])
assert result["success"]
assert "embeddings" in result
assert isinstance(result["embeddings"], list)
except JigsawStackError as e:
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")


class TestEmbeddingV1Async:
"""Test asynchronous Embedding V1 methods"""

async_test_cases = EMBEDDING_V1_TEST_CASES

@pytest.mark.parametrize(
"test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases]
)
@pytest.mark.asyncio
async def test_embedding_v1_async(self, test_case):
"""Test asynchronous embedding v1 with various inputs"""
try:
result = await async_jigsaw.embedding(test_case["params"])
assert result["success"]
assert "embeddings" in result
assert isinstance(result["embeddings"], list)
if "chunks" in result:
assert isinstance(result["chunks"], list)
except JigsawStackError as e:
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")

@pytest.mark.parametrize(
"test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES]
)
@pytest.mark.asyncio
async def test_embedding_v1_blob_async(self, test_case):
"""Test asynchronous embedding v1 with blob inputs"""
try:
# Download blob content
blob_content = requests.get(test_case["blob_url"]).content
result = await async_jigsaw.embedding(blob_content, test_case["options"])
assert result["success"]
assert "embeddings" in result
assert isinstance(result["embeddings"], list)
except JigsawStackError as e:
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")


class TestEmbeddingV2Sync:
"""Test synchronous Embedding V2 methods"""

Expand Down
152 changes: 0 additions & 152 deletions tests/test_sentiment.py

This file was deleted.

Loading
Loading