diff --git a/config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml b/config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml index 67e5264d..4ad018ee 100644 --- a/config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml +++ b/config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml @@ -96,7 +96,7 @@ classes: "cc": null, "reference_number": "TNJB 0008497" imagePath: >- - config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter1.jpg + config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter1.jpg - classPrompt: This is an example of the class 'letter' name: Letter2 attributesPrompt: |- @@ -112,7 +112,7 @@ classes: "cc": null, "reference_number": null imagePath: >- - config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter2.png + config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter2.png - $schema: https://json-schema.org/draft/2020-12/schema $id: form x-aws-idp-document-type: form @@ -587,7 +587,7 @@ classes: "thread_id": null, "message_id": null imagePath: >- - config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/email1.jpg + config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/email1.jpg - $schema: https://json-schema.org/draft/2020-12/schema $id: questionnaire x-aws-idp-document-type: questionnaire @@ -805,7 +805,7 @@ classes: "account_number": ["003525801543","352580154336"], "transactions": [{"Date": "2/6/2020", "Description": "Food Purchase - AnyCompany Restaurant - 1194989245", "Amount": "-171"}] imagePath: >- - config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/bank-statement-pages/ + config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/bank-statement-pages/ classification: maxPagesForClassification: "ALL" @@ -822,9 +822,9 @@ classification: You are a multimodal document classification expert that analyzes business documents using both visual layout and textual content. Your task is to classify single-page documents into predefined categories based on their structural patterns, visual features, and text content. Your output must be valid JSON according to the requested format. - DOCUMENT_TEXT: OCR-extracted text content from the document page that provides textual information for classification - DOCUMENT_IMAGE: Visual representation of the document page that provides layout, formatting, and visual structure information - CLASS_NAMES_AND_DESCRIPTIONS: List of valid document types with their descriptions that the document must be classified into + : OCR-extracted text content from the document page that provides textual information for classification + : Visual representation of the document page that provides layout, formatting, and visual structure information + : List of valid document types with their descriptions that the document must be classified into task_prompt: >- @@ -836,6 +836,10 @@ classification: - Provide specific evidence from both visual and textual analysis + + {CLASS_NAMES_AND_DESCRIPTIONS} + + Return your classification as valid JSON following this exact structure: { @@ -892,6 +896,11 @@ extraction: + + + {FEW_SHOT_EXAMPLES} + + diff --git a/lib/idp_common_pkg/idp_common/extraction/service.py b/lib/idp_common_pkg/idp_common/extraction/service.py index 65e1336c..1469fcb8 100644 --- a/lib/idp_common_pkg/idp_common/extraction/service.py +++ b/lib/idp_common_pkg/idp_common/extraction/service.py @@ -819,10 +819,12 @@ def process_document_section(self, document: Document, section_id: str) -> Docum # Create empty result structure without invoking LLM extracted_fields = {} metering = { - "input_tokens": 0, - "output_tokens": 0, - "invocation_count": 0, - "total_cost": 0.0, + f"Extraction/{self.config.extraction.model}": { + "input_tokens": 0, + "output_tokens": 0, + "invocation_count": 0, + "total_cost": 0.0, + } } total_duration = 0.0 parsing_succeeded = True diff --git a/notebooks/misc/test_few_shot_extraction.ipynb b/notebooks/misc/test_few_shot_extraction.ipynb index 279865de..8d9b2a78 100644 --- a/notebooks/misc/test_few_shot_extraction.ipynb +++ b/notebooks/misc/test_few_shot_extraction.ipynb @@ -27,7 +27,7 @@ "os.environ['ROOT_DIR'] = f\"{ROOTDIR}/\"\n", "\n", "# Add the idp_common package to the path\n", - "sys.path.insert(0, '{ROOTDIR}/lib/idp_common_pkg')\n", + "sys.path.insert(0, f'{ROOTDIR}/lib/idp_common_pkg')\n", "\n", "from idp_common.extraction.service import ExtractionService" ] @@ -56,7 +56,7 @@ ], "source": [ "# Load the few-shot configuration\n", - "config_path = f'{ROOTDIR}/config_library/pattern-2/few_shot_example_with_multimodal_page_classification/config.yaml'\n", + "config_path = f'{ROOTDIR}/config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml'\n", "with open(config_path, 'r') as f:\n", " config = yaml.safe_load(f)\n", "\n", @@ -215,16 +215,16 @@ " Attributes Prompt: expected attributes are:\n", " \"sender_name\": \"Will E. Clark\",\n", " \"sender_address\": \"206 Maple Street...\n", - " Image Path: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter1.jpg\n", - " S3 URI: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter1.jpg\n", + " Image Path: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter1.jpg\n", + " S3 URI: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter1.jpg\n", " Example 2:\n", " Name: Letter2\n", " Class Prompt: This is an example of the class 'letter'\n", " Attributes Prompt: expected attributes are:\n", " \"sender_name\": \"William H. W. Anderson\",\n", " \"sender_address\": \"P O. BO...\n", - " Image Path: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter2.png\n", - " S3 URI: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter2.png\n", + " Image Path: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter2.png\n", + " S3 URI: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter2.png\n", "\n", "Class: form\n", "Number of examples: 0\n",