Skip to content

Commit 4b80f1a

Browse files
authored
✨ add support for: financial document v1 (#119)
1 parent c9061a8 commit 4b80f1a

File tree

10 files changed

+676
-400
lines changed

10 files changed

+676
-400
lines changed

mindee/client.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from mindee.documents import (
55
CropperV1,
66
CustomV1,
7+
FinancialV0,
78
FinancialV1,
89
InvoiceV3,
910
InvoiceV4,
@@ -228,9 +229,9 @@ def _init_default_endpoints(self) -> None:
228229
)
229230
],
230231
),
231-
(OTS_OWNER, FinancialV1.__name__): DocumentConfig(
232+
(OTS_OWNER, FinancialV0.__name__): DocumentConfig(
232233
document_type="financial_doc",
233-
document_class=FinancialV1,
234+
document_class=FinancialV0,
234235
endpoints=[
235236
StandardEndpoint(
236237
url_name="invoices", version="3", api_key=self.api_key
@@ -240,6 +241,15 @@ def _init_default_endpoints(self) -> None:
240241
),
241242
],
242243
),
244+
(OTS_OWNER, FinancialV1.__name__): DocumentConfig(
245+
document_type="financial_doc",
246+
document_class=FinancialV1,
247+
endpoints=[
248+
StandardEndpoint(
249+
url_name="financial_document", version="1", api_key=self.api_key
250+
),
251+
],
252+
),
243253
(OTS_OWNER, PassportV1.__name__): DocumentConfig(
244254
document_type="passport_v1",
245255
document_class=PassportV1,

mindee/documents/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
from mindee.documents import fr, us
22
from mindee.documents.cropper import CropperV1, TypeCropperV1
33
from mindee.documents.custom import CustomV1, TypeCustomV1
4-
from mindee.documents.financial import FinancialV1, TypeFinancialV1
4+
from mindee.documents.financial import (
5+
FinancialV0,
6+
FinancialV1,
7+
TypeFinancialV0,
8+
TypeFinancialV1,
9+
)
510
from mindee.documents.invoice import InvoiceV3, InvoiceV4, TypeInvoiceV3, TypeInvoiceV4
611
from mindee.documents.passport import PassportV1, TypePassportV1
712
from mindee.documents.receipt import ReceiptV3, ReceiptV4, TypeReceiptV3, TypeReceiptV4
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
from .financial_v0 import FinancialV0, TypeFinancialV0
12
from .financial_v1 import FinancialV1, TypeFinancialV1
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
from typing import List, Optional, TypeVar
2+
3+
from mindee.documents.base import Document, TypeApiPrediction, clean_out_string
4+
from mindee.documents.invoice.invoice_v3 import InvoiceV3
5+
from mindee.documents.receipt.receipt_v3 import ReceiptV3
6+
from mindee.endpoints import Endpoint
7+
from mindee.fields.amount import AmountField
8+
from mindee.fields.company_registration import CompanyRegistrationField
9+
from mindee.fields.date import DateField
10+
from mindee.fields.locale import LocaleField
11+
from mindee.fields.payment_details import PaymentDetails
12+
from mindee.fields.tax import TaxField
13+
from mindee.fields.text import TextField
14+
from mindee.input.sources import InputSource
15+
16+
17+
class FinancialV0(Document):
18+
locale: LocaleField
19+
"""locale information"""
20+
total_incl: AmountField
21+
"""Total including taxes"""
22+
total_excl: AmountField
23+
"""Total excluding taxes"""
24+
date: DateField
25+
"""Date the document was issued"""
26+
time: TextField
27+
"""Time the document was issued"""
28+
invoice_number: TextField
29+
"""Invoice number"""
30+
due_date: DateField
31+
"""Date the invoice is due"""
32+
taxes: List[TaxField]
33+
"""List of all taxes"""
34+
merchant_name: TextField
35+
"""Merchant/Supplier's name"""
36+
supplier_address: TextField
37+
"""Merchant/Supplier's address"""
38+
customer_name: TextField
39+
"""Customer's name"""
40+
customer_address: TextField
41+
"""Customer's address"""
42+
customer_company_registration: List[CompanyRegistrationField]
43+
"""Customer company registration numbers"""
44+
payment_details: List[PaymentDetails]
45+
"""Payment details"""
46+
company_number: List[CompanyRegistrationField]
47+
"""Company numbers"""
48+
total_tax: AmountField
49+
"""Sum total of all taxes"""
50+
51+
def __init__(
52+
self,
53+
api_prediction=None,
54+
input_source=None,
55+
page_n: Optional[int] = None,
56+
document_type="financial_doc",
57+
):
58+
"""
59+
Union of `Invoice` and `Receipt`.
60+
61+
:param api_prediction: Raw prediction from HTTP response
62+
:param input_source: Input object
63+
:param page_n: Page number for multi-page PDF input
64+
"""
65+
# need this for building from prediction
66+
self.input_file = input_source
67+
68+
super().__init__(
69+
input_source=input_source,
70+
document_type=document_type,
71+
api_prediction=api_prediction,
72+
page_n=page_n,
73+
)
74+
self._build_from_api_prediction(api_prediction, page_n=page_n)
75+
self._checklist()
76+
77+
def _build_from_api_prediction(
78+
self, api_prediction: TypeApiPrediction, page_n: Optional[int] = None
79+
) -> None:
80+
"""
81+
Build the document from an API response JSON.
82+
83+
:param api_prediction: Raw prediction from HTTP response
84+
:param page_n: Page number for multi pages pdf input
85+
"""
86+
if "invoice_number" in api_prediction["prediction"].keys():
87+
invoice = InvoiceV3(api_prediction, self.input_file, page_n=page_n)
88+
self.locale = invoice.locale
89+
self.total_incl = invoice.total_incl
90+
self.total_excl = invoice.total_excl
91+
self.date = invoice.invoice_date
92+
self.invoice_number = invoice.invoice_number
93+
self.due_date = invoice.due_date
94+
self.taxes = invoice.taxes
95+
self.merchant_name = invoice.supplier
96+
self.payment_details = invoice.payment_details
97+
self.company_number = invoice.company_number
98+
self.orientation = invoice.orientation
99+
self.total_tax = invoice.total_tax
100+
self.time = TextField({"value": None, "confidence": 0.0})
101+
self.supplier_address = invoice.supplier_address
102+
self.customer_name = invoice.customer_name
103+
self.customer_company_registration = invoice.customer_company_registration
104+
self.customer_address = invoice.customer_address
105+
else:
106+
receipt = ReceiptV3(api_prediction, self.input_file, page_n=page_n)
107+
self.orientation = receipt.orientation
108+
self.date = receipt.date
109+
self.due_date = receipt.date
110+
self.taxes = receipt.taxes
111+
self.locale = receipt.locale
112+
self.total_incl = receipt.total_incl
113+
self.total_excl = receipt.total_excl
114+
self.merchant_name = receipt.merchant_name
115+
self.time = receipt.time
116+
self.total_tax = receipt.total_tax
117+
self.customer_company_registration = []
118+
self.company_number = []
119+
self.payment_details = []
120+
self.invoice_number = TextField({"value": None, "confidence": 0.0})
121+
self.supplier_address = TextField({"value": None, "confidence": 0.0})
122+
self.customer_name = TextField({"value": None, "confidence": 0.0})
123+
self.customer_address = TextField({"value": None, "confidence": 0.0})
124+
125+
def __str__(self) -> str:
126+
return clean_out_string(
127+
"-----Financial Document data-----\n"
128+
f"Filename: {self.filename or ''}\n"
129+
f"Invoice number: {self.invoice_number.value}\n"
130+
f"Total amount including taxes: {self.total_incl.value}\n"
131+
f"Total amount excluding taxes: {self.total_excl.value}\n"
132+
"Date: %s\n"
133+
"Invoice due date: %s\n"
134+
"Supplier name: %s\n"
135+
f"Supplier address: {self.supplier_address}\n"
136+
f"Customer name: {self.customer_name}\n"
137+
f"Customer company registration: {self.customer_company_registration}\n"
138+
f"Customer address: {self.customer_address}\n"
139+
"Taxes: %s\n"
140+
"Total taxes: %s\n"
141+
"----------------------"
142+
% (
143+
self.date.value,
144+
self.due_date.value,
145+
self.merchant_name.value,
146+
",".join([str(t.value) + " " + str(t.rate) + "%" for t in self.taxes]),
147+
self.total_tax.value,
148+
)
149+
)
150+
151+
@staticmethod
152+
def request(
153+
endpoints: List[Endpoint],
154+
input_source: InputSource,
155+
include_words: bool = False,
156+
close_file: bool = True,
157+
cropper: bool = False,
158+
):
159+
"""
160+
Make request to prediction endpoint.
161+
162+
:param input_source: Input object
163+
:param endpoints: Endpoints config
164+
:param include_words: Include Mindee vision words in http_response
165+
:param close_file: Whether to `close()` the file after parsing it.
166+
:param cropper: Including Mindee cropper results.
167+
"""
168+
if "pdf" in input_source.file_mimetype:
169+
# invoices is index 0, receipts 1 (this should be cleaned up)
170+
index = 0
171+
else:
172+
index = 1
173+
return endpoints[index].predict_req_post(
174+
input_source, include_words, close_file, cropper=cropper
175+
)
176+
177+
def _checklist(self) -> None:
178+
"""Set the validation rules."""
179+
self.checklist = {"taxes_match_total_incl": self.__taxes_match_total_incl()}
180+
181+
# Checks
182+
def __taxes_match_total_incl(self) -> bool:
183+
"""
184+
Check invoice rule of matching between taxes and total_incl.
185+
186+
:return: True if rule matches, False otherwise
187+
"""
188+
# Check taxes and total_incl exist
189+
if len(self.taxes) == 0 or self.total_incl.value is None:
190+
return False
191+
192+
# Reconstruct total_incl from taxes
193+
total_vat = 0.0
194+
reconstructed_total = 0.0
195+
for tax in self.taxes:
196+
if tax.rate is not None and tax.rate != 0 and tax.value is not None:
197+
total_vat += tax.value
198+
reconstructed_total += tax.value + 100 * tax.value / tax.rate
199+
200+
# Sanity check
201+
if total_vat <= 0:
202+
return False
203+
204+
# Crate epsilon
205+
eps = 1 / (100 * total_vat)
206+
if (
207+
self.total_incl.value * (1 - eps) - 0.02
208+
<= reconstructed_total
209+
<= self.total_incl.value * (1 + eps) + 0.02
210+
):
211+
for tax in self.taxes:
212+
tax.confidence = 1.0
213+
self.total_tax.confidence = 1.0
214+
self.total_incl.confidence = 1.0
215+
return True
216+
return False
217+
218+
219+
TypeFinancialV0 = TypeVar("TypeFinancialV0", bound=FinancialV0)

0 commit comments

Comments
 (0)