Skip to content

Commit 0ecd9f5

Browse files
authored
✨ add a specific class for classifications, which are never None (#134)
1 parent 6742ce1 commit 0ecd9f5

File tree

11 files changed

+87
-23
lines changed

11 files changed

+87
-23
lines changed

mindee/documents/custom/custom_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Dict, Optional, TypeVar
22

33
from mindee.documents.base import Document, TypeApiPrediction, clean_out_string
4-
from mindee.fields.api_builder import ClassificationField, ListField
4+
from mindee.documents.custom.custom_v1_fields import ClassificationField, ListField
55

66

77
class CustomV1(Document):
File renamed without changes.

mindee/documents/financial/financial_document_v1.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from mindee.documents.base import Document, TypeApiPrediction, clean_out_string
44
from mindee.documents.invoice.line_item_v4 import InvoiceLineItemV4
55
from mindee.fields.amount import AmountField
6+
from mindee.fields.classification import ClassificationField
67
from mindee.fields.company_registration import CompanyRegistrationField
78
from mindee.fields.date import DateField
89
from mindee.fields.locale import LocaleField
@@ -50,11 +51,11 @@ class FinancialDocumentV1(Document):
5051
"""Total amount of tip and gratuity."""
5152
time: TextField
5253
"""Time as seen on the receipt in HH:MM format."""
53-
document_type: TextField
54+
document_type: ClassificationField
5455
"""A classification field, among predefined classes."""
55-
category: TextField
56+
category: ClassificationField
5657
"""The invoice or receipt category among predefined classes."""
57-
subcategory: TextField
58+
subcategory: ClassificationField
5859
"""The invoice or receipt sub-category among predefined classes."""
5960

6061
def __init__(
@@ -134,9 +135,13 @@ def _build_from_api_prediction(
134135
self.total_tax = AmountField(api_prediction["total_tax"], page_n=page_n)
135136
self.tip = AmountField(api_prediction["tip"], page_n=page_n)
136137
self.time = TextField(api_prediction["time"], page_n=page_n)
137-
self.document_type = TextField(api_prediction["document_type"], page_n=page_n)
138-
self.category = TextField(api_prediction["category"], page_n=page_n)
139-
self.subcategory = TextField(api_prediction["subcategory"], page_n=page_n)
138+
self.document_type = ClassificationField(
139+
api_prediction["document_type"], page_n=page_n
140+
)
141+
self.category = ClassificationField(api_prediction["category"], page_n=page_n)
142+
self.subcategory = ClassificationField(
143+
api_prediction["subcategory"], page_n=page_n
144+
)
140145

141146
def __str__(self) -> str:
142147
supplier_company_registrations = "; ".join(

mindee/documents/invoice/invoice_v3.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from mindee.documents.base import Document, TypeApiPrediction, clean_out_string
44
from mindee.documents.invoice import checks, reconstruct
55
from mindee.fields.amount import AmountField
6+
from mindee.fields.classification import ClassificationField
67
from mindee.fields.company_registration import CompanyRegistrationField
78
from mindee.fields.date import DateField
89
from mindee.fields.locale import LocaleField
@@ -14,6 +15,8 @@
1415
class InvoiceV3(Document):
1516
locale: LocaleField
1617
"""locale information"""
18+
document_type: ClassificationField
19+
"""Whether the document is an INVOICE or a CREDIT NOTE."""
1720
total_amount: AmountField
1821
"""Total including taxes. Same as ``total_incl``."""
1922
total_net: AmountField
@@ -75,6 +78,9 @@ def _build_from_api_prediction(
7578
:param api_prediction: Raw prediction from HTTP response
7679
:param page_n: Page number for multi pages pdf input
7780
"""
81+
self.document_type = ClassificationField(
82+
api_prediction["document_type"], page_n=page_n
83+
)
7884
self.company_number = [
7985
CompanyRegistrationField(field_dict, page_n=page_n)
8086
for field_dict in api_prediction["company_registration"]

mindee/documents/invoice/invoice_v4.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from mindee.documents.invoice import checks, reconstruct
55
from mindee.documents.invoice.line_item_v4 import InvoiceLineItemV4
66
from mindee.fields.amount import AmountField
7+
from mindee.fields.classification import ClassificationField
78
from mindee.fields.company_registration import CompanyRegistrationField
89
from mindee.fields.date import DateField
910
from mindee.fields.locale import LocaleField
@@ -15,6 +16,8 @@
1516
class InvoiceV4(Document):
1617
locale: LocaleField
1718
"""locale information"""
19+
document_type: ClassificationField
20+
"""Whether the document is an INVOICE or a CREDIT NOTE."""
1821
total_amount: AmountField
1922
"""Total including taxes"""
2023
total_net: AmountField
@@ -80,6 +83,9 @@ def _build_from_api_prediction(
8083
:param api_prediction: Raw prediction from HTTP response
8184
:param page_n: Page number for multi pages pdf input
8285
"""
86+
self.document_type = ClassificationField(
87+
api_prediction["document_type"], page_n=page_n
88+
)
8389
self.supplier_company_registrations = [
8490
CompanyRegistrationField(field_dict, page_n=page_n)
8591
for field_dict in api_prediction["supplier_company_registrations"]

mindee/documents/receipt/receipt_v3.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from mindee.documents.base import Document, TypeApiPrediction, clean_out_string
44
from mindee.fields.amount import AmountField
55
from mindee.fields.base import field_array_confidence, field_array_sum
6+
from mindee.fields.classification import ClassificationField
67
from mindee.fields.date import DateField
78
from mindee.fields.locale import LocaleField
89
from mindee.fields.tax import TaxField
@@ -18,7 +19,7 @@ class ReceiptV3(Document):
1819
"""Date the receipt was issued"""
1920
time: TextField
2021
"""Time the receipt was issued"""
21-
category: TextField
22+
category: ClassificationField
2223
"""Service category"""
2324
merchant_name: TextField
2425
"""Merchant's name"""
@@ -81,7 +82,7 @@ def _build_from_api_prediction(
8182
self.locale = LocaleField(api_prediction["locale"], page_n=page_n)
8283
self.total_incl = AmountField(api_prediction["total_incl"], page_n=page_n)
8384
self.date = DateField(api_prediction["date"], page_n=page_n)
84-
self.category = TextField(api_prediction["category"], page_n=page_n)
85+
self.category = ClassificationField(api_prediction["category"], page_n=page_n)
8586
self.merchant_name = TextField(
8687
api_prediction["supplier"], value_key="value", page_n=page_n
8788
)

mindee/documents/receipt/receipt_v4.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from mindee.documents.base import Document, TypeApiPrediction, clean_out_string
44
from mindee.fields.amount import AmountField
5+
from mindee.fields.classification import ClassificationField
56
from mindee.fields.date import DateField
67
from mindee.fields.locale import LocaleField
78
from mindee.fields.tax import TaxField
@@ -17,11 +18,11 @@ class ReceiptV4(Document):
1718
"""Date the receipt was issued"""
1819
time: TextField
1920
"""Time the receipt was issued, in HH: MM format."""
20-
category: TextField
21+
category: ClassificationField
2122
"""The type, or service category, of the purchase."""
22-
subcategory: TextField
23+
subcategory: ClassificationField
2324
"""The receipt sub category among predefined classes."""
24-
document_type: TextField
25+
document_type: ClassificationField
2526
"""Whether the document is an expense receipt or a credit card receipt."""
2627
supplier: TextField
2728
"""The merchant, or supplier, as found on the receipt."""
@@ -70,9 +71,13 @@ def _build_from_api_prediction(
7071
self.total_tax = AmountField(api_prediction["total_tax"], page_n=page_n)
7172
self.tip = AmountField(api_prediction["tip"], page_n=page_n)
7273
self.date = DateField(api_prediction["date"], page_n=page_n)
73-
self.category = TextField(api_prediction["category"], page_n=page_n)
74-
self.subcategory = TextField(api_prediction["subcategory"], page_n=page_n)
75-
self.document_type = TextField(api_prediction["document_type"], page_n=page_n)
74+
self.category = ClassificationField(api_prediction["category"], page_n=page_n)
75+
self.subcategory = ClassificationField(
76+
api_prediction["subcategory"], page_n=page_n
77+
)
78+
self.document_type = ClassificationField(
79+
api_prediction["document_type"], page_n=page_n
80+
)
7681
self.supplier = TextField(
7782
api_prediction["supplier"], value_key="value", page_n=page_n
7883
)

mindee/documents/receipt/receipt_v5.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from mindee.documents.base import Document, TypeApiPrediction, clean_out_string
44
from mindee.documents.receipt.line_item_v5 import ReceiptV5LineItem
55
from mindee.fields.amount import AmountField
6+
from mindee.fields.classification import ClassificationField
67
from mindee.fields.company_registration import CompanyRegistrationField
78
from mindee.fields.date import DateField
89
from mindee.fields.locale import LocaleField
@@ -19,11 +20,11 @@ class ReceiptV5(Document):
1920
"""The date the purchase was made."""
2021
time: TextField
2122
"""Time of purchase with 24 hours formatting (HH:MM)."""
22-
category: TextField
23+
category: ClassificationField
2324
"""The receipt category among predefined classes."""
24-
subcategory: TextField
25+
subcategory: ClassificationField
2526
"""The receipt sub category among predefined classes for transport and food."""
26-
document_type: TextField
27+
document_type: ClassificationField
2728
"""Whether the document is an expense receipt or a credit card receipt."""
2829
supplier_name: TextField
2930
"""The name of the supplier or merchant."""
@@ -80,9 +81,13 @@ def _build_from_api_prediction(
8081
self.total_tax = AmountField(api_prediction["total_tax"], page_n=page_n)
8182
self.tip = AmountField(api_prediction["tip"], page_n=page_n)
8283
self.date = DateField(api_prediction["date"], page_n=page_n)
83-
self.category = TextField(api_prediction["category"], page_n=page_n)
84-
self.subcategory = TextField(api_prediction["subcategory"], page_n=page_n)
85-
self.document_type = TextField(api_prediction["document_type"], page_n=page_n)
84+
self.category = ClassificationField(api_prediction["category"], page_n=page_n)
85+
self.subcategory = ClassificationField(
86+
api_prediction["subcategory"], page_n=page_n
87+
)
88+
self.document_type = ClassificationField(
89+
api_prediction["document_type"], page_n=page_n
90+
)
8691
self.supplier_name = TextField(
8792
api_prediction["supplier_name"], value_key="value", page_n=page_n
8893
)

mindee/fields/classification.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from typing import Optional
2+
3+
from mindee.fields.base import BaseField, TypePrediction
4+
5+
6+
class ClassificationField(BaseField):
7+
"""Represents a classifier value."""
8+
9+
value: str
10+
"""The value as a string."""
11+
12+
def __init__(
13+
self,
14+
prediction: TypePrediction,
15+
value_key: str = "value",
16+
reconstructed: bool = False,
17+
page_n: Optional[int] = None,
18+
):
19+
"""
20+
Text field object.
21+
22+
:param prediction: Amount prediction object from HTTP response
23+
:param value_key: Key to use in the amount_prediction dict
24+
:param reconstructed: Bool for reconstructed object (not extracted in the API)
25+
:param page_n: Page number for multi-page document
26+
"""
27+
super().__init__(
28+
prediction,
29+
value_key=value_key,
30+
reconstructed=reconstructed,
31+
page_n=page_n,
32+
)

0 commit comments

Comments
 (0)