|
6 | 6 | class CustomDocument(Document): |
7 | 7 | fields: Dict[str, dict] |
8 | 8 | """Dictionary of all fields in the document""" |
| 9 | + classifications: Dict[str, dict] |
| 10 | + """Dictionary of all classifications in the document""" |
9 | 11 |
|
10 | 12 | def __init__( |
11 | 13 | self, |
@@ -39,14 +41,24 @@ def _build_from_api_prediction( |
39 | 41 | :param page_n: Page number for multi pages pdf input |
40 | 42 | """ |
41 | 43 | self.fields = {} |
| 44 | + self.classifications = {} |
42 | 45 | for field_name in api_prediction: |
43 | 46 | field = api_prediction[field_name] |
44 | | - field["page_n"] = page_n |
45 | | - self.fields[field_name] = field |
| 47 | + # Only classifications have the 'value' attribute. |
| 48 | + if "value" in field: |
| 49 | + self.classifications[field_name] = field |
| 50 | + # Only value lists have the 'values' attribute. |
| 51 | + elif "values" in field: |
| 52 | + field["page_n"] = page_n |
| 53 | + self.fields[field_name] = field |
46 | 54 | setattr(self, field_name, field) |
47 | 55 |
|
48 | 56 | def __str__(self) -> str: |
49 | | - custom_doc_str = f"----- {self.type} -----\n" |
| 57 | + custom_doc_str = ( |
| 58 | + f"----- {self.type} -----\nFilename: {self.filename or ''}".rstrip() + "\n" |
| 59 | + ) |
| 60 | + for name, info in self.classifications.items(): |
| 61 | + custom_doc_str += f"{name}: {info['value']}\n" |
50 | 62 | for name, info in self.fields.items(): |
51 | 63 | custom_doc_str += "%s: %s\n" % ( |
52 | 64 | name, |
|
0 commit comments