chore: add class names and description for classification task, fix paths

Daniel Lorch · Daniel Lorch · commit 59071590844d · 2025-11-27T16:14:25.000+01:00
diff --git a/config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml b/config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml
@@ -96,7 +96,7 @@ classes:
               "cc": null,
               "reference_number": "TNJB 0008497"
         imagePath: >-
-          config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter1.jpg
+          config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter1.jpg
       - classPrompt: This is an example of the class 'letter'
         name: Letter2
         attributesPrompt: |-
@@ -112,7 +112,7 @@ classes:
               "cc": null,
               "reference_number": null
         imagePath: >-
-          config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter2.png
+          config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter2.png
   - $schema: https://json-schema.org/draft/2020-12/schema
     $id: form
     x-aws-idp-document-type: form
@@ -587,7 +587,7 @@ classes:
               "thread_id": null,
               "message_id": null
         imagePath: >-
-          config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/email1.jpg
+          config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/email1.jpg
   - $schema: https://json-schema.org/draft/2020-12/schema
     $id: questionnaire
     x-aws-idp-document-type: questionnaire
@@ -805,7 +805,7 @@ classes:
               "account_number": ["003525801543","352580154336"],
               "transactions": [{"Date": "2/6/2020", "Description": "Food Purchase - AnyCompany Restaurant - 1194989245", "Amount": "-171"}]
         imagePath: >-
-          config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/bank-statement-pages/
+          config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/bank-statement-pages/
 
 classification:
   maxPagesForClassification: "ALL"
@@ -822,9 +822,9 @@ classification:
     You are a multimodal document classification expert that analyzes business documents using both visual layout and textual content. Your task is to classify single-page documents into predefined categories based on their structural patterns, visual features, and text content. Your output must be valid JSON according to the requested format.
 
     <variables>
-    DOCUMENT_TEXT: OCR-extracted text content from the document page that provides textual information for classification
-    DOCUMENT_IMAGE: Visual representation of the document page that provides layout, formatting, and visual structure information
-    CLASS_NAMES_AND_DESCRIPTIONS: List of valid document types with their descriptions that the document must be classified into
+    <document-ocr-data>: OCR-extracted text content from the document page that provides textual information for classification
+    <document-image>: Visual representation of the document page that provides layout, formatting, and visual structure information
+    <document-types>: List of valid document types with their descriptions that the document must be classified into
     </variables>
   task_prompt: >-
     <reasoning-guidelines>
@@ -836,6 +836,10 @@ classification:
     - Provide specific evidence from both visual and textual analysis
     </reasoning-guidelines>
 
+    <document-types>
+    {CLASS_NAMES_AND_DESCRIPTIONS}
+    </document-types>
+
     <output-format>
     Return your classification as valid JSON following this exact structure:
     {
@@ -892,6 +896,11 @@ extraction:
 
     </task>
 
+    <few-shot-examples>
+
+    {FEW_SHOT_EXAMPLES}
+
+    </few-shot-examples>
 
     <extraction-guidelines>
 
diff --git a/notebooks/misc/test_few_shot_extraction.ipynb b/notebooks/misc/test_few_shot_extraction.ipynb
@@ -27,7 +27,7 @@
     "os.environ['ROOT_DIR'] = f\"{ROOTDIR}/\"\n",
     "\n",
     "# Add the idp_common package to the path\n",
-    "sys.path.insert(0, '{ROOTDIR}/lib/idp_common_pkg')\n",
+    "sys.path.insert(0, f'{ROOTDIR}/lib/idp_common_pkg')\n",
     "\n",
     "from idp_common.extraction.service import ExtractionService"
    ]
@@ -56,7 +56,7 @@
    ],
    "source": [
     "# Load the few-shot configuration\n",
-    "config_path = f'{ROOTDIR}/config_library/pattern-2/few_shot_example_with_multimodal_page_classification/config.yaml'\n",
+    "config_path = f'{ROOTDIR}/config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml'\n",
     "with open(config_path, 'r') as f:\n",
     "    config = yaml.safe_load(f)\n",
     "\n",
@@ -215,16 +215,16 @@
       "    Attributes Prompt: expected attributes are:\n",
       "    \"sender_name\": \"Will E. Clark\",\n",
       "    \"sender_address\": \"206 Maple Street...\n",
-      "    Image Path: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter1.jpg\n",
-      "    S3 URI: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter1.jpg\n",
+      "    Image Path: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter1.jpg\n",
+      "    S3 URI: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter1.jpg\n",
       "  Example 2:\n",
       "    Name: Letter2\n",
       "    Class Prompt: This is an example of the class 'letter'\n",
       "    Attributes Prompt: expected attributes are:\n",
       "    \"sender_name\": \"William H. W. Anderson\",\n",
       "    \"sender_address\": \"P O. BO...\n",
-      "    Image Path: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter2.png\n",
-      "    S3 URI: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter2.png\n",
+      "    Image Path: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter2.png\n",
+      "    S3 URI: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter2.png\n",
       "\n",
       "Class: form\n",
       "Number of examples: 0\n",