Skip to content

Commit e0cd332

Browse files
📝 fix example scripts (#265)
1 parent 96713dc commit e0cd332

File tree

4 files changed

+76
-58
lines changed

4 files changed

+76
-58
lines changed

examples/auto_invoice_splitter_extraction.py

Lines changed: 0 additions & 38 deletions
This file was deleted.
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from mindee import Client
2+
from mindee.extraction.pdf_extractor import PdfExtractor
3+
from mindee.input import PathInput
4+
from mindee.product import InvoiceSplitterV1, InvoiceV4
5+
6+
mindee_client = Client(api_key="my-api-key")
7+
# mindee_client = Client() # Optionally, set from env.
8+
9+
10+
def parse_invoice(file_path):
11+
input_source = PathInput(file_path)
12+
13+
if input_source.is_pdf() and input_source.count_doc_pages() > 1:
14+
parse_multi_page(input_source)
15+
else:
16+
parse_single_page(input_source)
17+
18+
19+
def parse_single_page(input_source):
20+
invoice_result = mindee_client.parse(InvoiceV4, input_source)
21+
print(invoice_result.document)
22+
23+
24+
def parse_multi_page(input_source):
25+
pdf_extractor = PdfExtractor(input_source)
26+
invoice_splitter_response = mindee_client.enqueue_and_parse(
27+
InvoiceSplitterV1, input_source, close_file=False
28+
)
29+
page_groups = (
30+
invoice_splitter_response.document.inference.prediction.invoice_page_groups
31+
)
32+
extracted_pdfs = pdf_extractor.extract_invoices(page_groups, strict=False)
33+
34+
for extracted_pdf in extracted_pdfs:
35+
# Optional: Save the files locally
36+
# extracted_pdf.write_to_file("output/path")
37+
38+
invoice_result = mindee_client.parse(InvoiceV4, extracted_pdf.as_input_source())
39+
print(invoice_result.document)
40+
41+
42+
if __name__ == "__main__":
43+
parse_invoice("path/to/my/file.ext")
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import os
2+
3+
from mindee import Client, product
4+
from mindee.extraction.multi_receipts_extractor.multi_receipts_extractor import (
5+
extract_receipts,
6+
)
7+
8+
9+
def parse_receipts(input_path):
10+
mindee_client = Client(api_key="my-api-key-here")
11+
# mindee_client = Client() # Optionally, set from env.
12+
input_doc = mindee_client.source_from_path(input_path)
13+
14+
result_split = mindee_client.parse(
15+
product.MultiReceiptsDetectorV1, input_doc, close_file=False
16+
)
17+
18+
extracted_receipts = extract_receipts(input_doc, result_split.document.inference)
19+
20+
for idx, receipt in enumerate(extracted_receipts, 1):
21+
result_receipt = mindee_client.parse(product.ReceiptV5, receipt.as_source())
22+
print(f"Receipt {idx}:")
23+
print(result_receipt.document)
24+
print("-" * 40)
25+
26+
# Uncomment to save each extracted receipt
27+
# save_path = f"./receipt_{idx}.pdf"
28+
# receipt.save_to_file(save_path)
29+
30+
31+
if __name__ == "__main__":
32+
input_file = "path/to/my/file.ext"
33+
parse_receipts(input_file)

examples/multi_receipts_tutorial.py

Lines changed: 0 additions & 20 deletions
This file was deleted.

0 commit comments

Comments
 (0)