Skip to content

Commit c756f86

Browse files
authored
fix: 🐛 fix probabilities not loaded from API (#49)
1 parent 09dce2a commit c756f86

File tree

15 files changed

+160
-156
lines changed

15 files changed

+160
-156
lines changed

mindee/documents/financial_document.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def build_from_api_prediction(self, api_prediction, input_file, page_n=0):
141141
self.company_number = invoice.company_number
142142
self.orientation = invoice.orientation
143143
self.total_tax = invoice.total_tax
144-
self.time = Field({"value": None, "probability": 0.0})
144+
self.time = Field({"value": None, "confidence": 0.0})
145145
else:
146146
receipt = Receipt(api_prediction, input_file, page_n=page_n)
147147
self.orientation = receipt.orientation
@@ -154,7 +154,7 @@ def build_from_api_prediction(self, api_prediction, input_file, page_n=0):
154154
self.merchant_name = receipt.merchant_name
155155
self.time = receipt.time
156156
self.total_tax = receipt.total_tax
157-
self.invoice_number = Field({"value": None, "probability": 0.0})
157+
self.invoice_number = Field({"value": None, "confidence": 0.0})
158158
self.payment_details = []
159159
self.company_number = []
160160

mindee/documents/invoice.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ def __init__(
6767
self.total_incl = Amount(
6868
{"value": total_incl}, value_key="value", page_n=page_n
6969
)
70-
self.date = Date({"value": invoice_date}, value_key="value", page_n=page_n)
7170
self.invoice_date = Date(
7271
{"value": invoice_date}, value_key="value", page_n=page_n
7372
)
@@ -111,7 +110,7 @@ def __init__(
111110
# Reconstruct extra fields
112111
self._reconstruct()
113112

114-
def build_from_api_prediction(self, api_prediction, page_n=0):
113+
def build_from_api_prediction(self, api_prediction: dict, page_n=0):
115114
"""
116115
:param api_prediction: Raw prediction from HTTP response
117116
:param page_n: Page number for multi pages pdf input
@@ -149,7 +148,7 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
149148
api_prediction["total_excl"], value_key="value", page_n=page_n
150149
)
151150
self.total_tax = Amount(
152-
{"value": None, "probability": 0.0}, value_key="value", page_n=page_n
151+
{"value": None, "confidence": 0.0}, value_key="value", page_n=page_n
153152
)
154153

155154
def __str__(self) -> str:
@@ -364,7 +363,7 @@ def __reconstruct_total_incl_from_taxes_plus_excl(self):
364363
[tax.value if tax.value is not None else 0 for tax in self.taxes]
365364
)
366365
+ self.total_excl.value,
367-
"probability": Field.array_probability(self.taxes)
366+
"confidence": Field.array_probability(self.taxes)
368367
* self.total_excl.probability,
369368
}
370369
self.total_incl = Amount(total_incl, value_key="value", reconstructed=True)
@@ -388,7 +387,7 @@ def __reconstruct_total_excl_from_tcc_and_taxes(self):
388387
- sum(
389388
[tax.value if tax.value is not None else 0 for tax in self.taxes]
390389
),
391-
"probability": Field.array_probability(self.taxes)
390+
"confidence": Field.array_probability(self.taxes)
392391
* self.total_incl.probability,
393392
}
394393
self.total_excl = Amount(total_excl, value_key="value", reconstructed=True)
@@ -404,7 +403,7 @@ def __reconstruct_total_tax_from_tax_lines(self):
404403
"value": sum(
405404
[tax.value if tax.value is not None else 0 for tax in self.taxes]
406405
),
407-
"probability": Field.array_probability(self.taxes),
406+
"confidence": Field.array_probability(self.taxes),
408407
}
409408
if total_tax["value"] > 0:
410409
self.total_tax = Amount(
@@ -427,8 +426,7 @@ def __reconstruct_total_tax_from_incl_and_excl(self):
427426

428427
total_tax = {
429428
"value": self.total_incl.value - self.total_excl.value,
430-
"probability": self.total_incl.probability
431-
* self.total_excl.probability,
429+
"confidence": self.total_incl.probability * self.total_excl.probability,
432430
}
433431
if total_tax["value"] >= 0:
434432
self.total_tax = Amount(

mindee/documents/passport.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
123123
Field(given_name, page_n=page_n)
124124
for given_name in api_prediction["given_names"]
125125
]
126-
self.mrz = Field({"value": None, "probability": 0.0}, page_n=page_n)
127-
self.full_name = Field({"value": None, "probability": 0.0}, page_n=page_n)
126+
self.mrz = Field({"value": None, "confidence": 0.0}, page_n=page_n)
127+
self.full_name = Field({"value": None, "confidence": 0.0}, page_n=page_n)
128128

129129
def __str__(self) -> str:
130130
return (
@@ -322,7 +322,7 @@ def __reconstruct_mrz(self):
322322
):
323323
mrz = {
324324
"value": self.mrz1.value + self.mrz2.value,
325-
"probability": Field.array_probability(
325+
"confidence": Field.array_probability(
326326
[self.mrz1.probability, self.mrz2.probability]
327327
),
328328
}
@@ -342,7 +342,7 @@ def __reconstruct_full_name(self):
342342
):
343343
full_name = {
344344
"value": self.given_names[0].value + " " + self.surname.value,
345-
"probability": Field.array_probability(
345+
"confidence": Field.array_probability(
346346
[self.surname.probability, self.given_names[0].probability]
347347
),
348348
}

mindee/documents/receipt.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,10 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
147147
if str(page_n) != "-1":
148148
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
149149
self.total_tax = Amount(
150-
{"value": None, "probability": 0.0}, value_key="value", page_n=page_n
150+
{"value": None, "confidence": 0.0}, value_key="value", page_n=page_n
151151
)
152152
self.total_excl = Amount(
153-
{"value": None, "probability": 0.0}, value_key="value", page_n=page_n
153+
{"value": None, "confidence": 0.0}, value_key="value", page_n=page_n
154154
)
155155

156156
@staticmethod
@@ -251,7 +251,7 @@ def __reconstruct_total_excl_from_tcc_and_taxes(self):
251251
if len(self.taxes) and self.total_incl.value is not None:
252252
total_excl = {
253253
"value": self.total_incl.value - Field.array_sum(self.taxes),
254-
"probability": Field.array_probability(self.taxes)
254+
"confidence": Field.array_probability(self.taxes)
255255
* self.total_incl.probability,
256256
}
257257
self.total_excl = Amount(total_excl, value_key="value", reconstructed=True)
@@ -267,7 +267,7 @@ def __reconstruct_total_tax(self):
267267
"value": sum(
268268
[tax.value if tax.value is not None else 0 for tax in self.taxes]
269269
),
270-
"probability": Field.array_probability(self.taxes),
270+
"confidence": Field.array_probability(self.taxes),
271271
}
272272
if total_tax["value"] > 0:
273273
self.total_tax = Amount(

mindee/fields/__init__.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
class Field:
2+
probability: float
3+
24
def __init__(
35
self,
46
abstract_prediction,
@@ -25,9 +27,9 @@ def __init__(
2527
else:
2628
self.value = abstract_prediction[value_key]
2729

28-
if "probability" in abstract_prediction:
29-
self.probability = abstract_prediction["probability"]
30-
else:
30+
try:
31+
self.probability = float(abstract_prediction["confidence"])
32+
except (KeyError, TypeError):
3133
self.probability = 0.0
3234

3335
if "polygon" in abstract_prediction:

tests/documents/test_financial_doc.py

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -111,26 +111,26 @@ def test__str__receipt(financial_doc_from_receipt_object):
111111
# Business tests from receipt
112112
def test__receipt_reconstruct_total_excl_from_total_and_taxes_1(receipt_pred):
113113
# no incl implies no reconstruct for total excl
114-
receipt_pred["total_incl"] = {"value": "N/A", "probability": 0.0}
115-
receipt_pred["taxes"] = [{"rate": 20, "value": 9.5, "probability": 0.9}]
114+
receipt_pred["total_incl"] = {"value": "N/A", "confidence": 0.0}
115+
receipt_pred["taxes"] = [{"rate": 20, "value": 9.5, "confidence": 0.9}]
116116
financial_doc = FinancialDocument(receipt_pred)
117117
assert financial_doc.total_excl.value is None
118118

119119

120120
def test__receipt_reconstruct_total_excl_from_total_and_taxes_2(receipt_pred):
121121
# no taxes implies no reconstruct for total excl
122-
receipt_pred["total_incl"] = {"value": 12.54, "probability": 0.0}
122+
receipt_pred["total_incl"] = {"value": 12.54, "confidence": 0.0}
123123
receipt_pred["taxes"] = []
124124
financial_doc = FinancialDocument(receipt_pred)
125125
assert financial_doc.total_excl.value is None
126126

127127

128128
def test__receipt_reconstruct_total_excl_from_total_and_taxes_3(receipt_pred):
129129
# working example
130-
receipt_pred["total_incl"] = {"value": 12.54, "probability": 0.5}
130+
receipt_pred["total_incl"] = {"value": 12.54, "confidence": 0.5}
131131
receipt_pred["taxes"] = [
132-
{"rate": 20, "value": 0.5, "probability": 0.1},
133-
{"rate": 10, "value": 4.25, "probability": 0.6},
132+
{"rate": 20, "value": 0.5, "confidence": 0.1},
133+
{"rate": 10, "value": 4.25, "confidence": 0.6},
134134
]
135135
financial_doc = FinancialDocument(receipt_pred)
136136
assert financial_doc.total_excl.probability == 0.03
@@ -147,8 +147,8 @@ def test__receipt_reconstruct_total_tax_1(receipt_pred):
147147
def test__receipt_reconstruct_total_tax_2(receipt_pred):
148148
# working example
149149
receipt_pred["taxes"] = [
150-
{"rate": 20, "value": 10.2, "probability": 0.5},
151-
{"rate": 10, "value": 40.0, "probability": 0.1},
150+
{"rate": 20, "value": 10.2, "confidence": 0.5},
151+
{"rate": 10, "value": 40.0, "confidence": 0.1},
152152
]
153153
financial_doc = FinancialDocument(receipt_pred)
154154
assert financial_doc.total_tax.value == 50.2
@@ -157,10 +157,10 @@ def test__receipt_reconstruct_total_tax_2(receipt_pred):
157157

158158
def test__receipt_taxes_match_total_incl_1(receipt_pred):
159159
# matching example
160-
receipt_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
160+
receipt_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
161161
receipt_pred["taxes"] = [
162-
{"rate": 20, "value": 10.99, "probability": 0.5},
163-
{"rate": 10, "value": 40.12, "probability": 0.1},
162+
{"rate": 20, "value": 10.99, "confidence": 0.5},
163+
{"rate": 10, "value": 40.12, "confidence": 0.1},
164164
]
165165
financial_doc = FinancialDocument(receipt_pred)
166166
assert financial_doc.checklist["taxes_match_total_incl"] is True
@@ -171,46 +171,46 @@ def test__receipt_taxes_match_total_incl_1(receipt_pred):
171171

172172
def test__receipt_taxes_match_total_incl_2(receipt_pred):
173173
# not matching example with close error
174-
receipt_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
174+
receipt_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
175175
receipt_pred["taxes"] = [
176-
{"rate": 20, "value": 10.9, "probability": 0.5},
177-
{"rate": 10, "value": 40.12, "probability": 0.1},
176+
{"rate": 20, "value": 10.9, "confidence": 0.5},
177+
{"rate": 10, "value": 40.12, "confidence": 0.1},
178178
]
179179
financial_doc = FinancialDocument(receipt_pred)
180180
assert financial_doc.checklist["taxes_match_total_incl"] is False
181181

182182

183183
def test__receipt_taxes_match_total_incl_3(receipt_pred):
184184
# sanity check with null tax
185-
receipt_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
186-
receipt_pred["taxes"] = [{"rate": 20, "value": 0.0, "probability": 0.5}]
185+
receipt_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
186+
receipt_pred["taxes"] = [{"rate": 20, "value": 0.0, "confidence": 0.5}]
187187
financial_doc = FinancialDocument(receipt_pred)
188188
assert financial_doc.checklist["taxes_match_total_incl"] is False
189189

190190

191191
# Business tests from invoice
192192
def test__invoice_reconstruct_total_excl_from_total_and_taxes_1(invoice_pred):
193193
# no incl implies no reconstruct for total excl
194-
invoice_pred["total_incl"] = {"amount": "N/A", "probability": 0.0}
195-
invoice_pred["taxes"] = [{"rate": 20, "amount": 9.5, "probability": 0.9}]
194+
invoice_pred["total_incl"] = {"amount": "N/A", "confidence": 0.0}
195+
invoice_pred["taxes"] = [{"rate": 20, "amount": 9.5, "confidence": 0.9}]
196196
financial_doc = FinancialDocument(invoice_pred)
197197
assert financial_doc.total_excl.value is None
198198

199199

200200
def test__invoice_reconstruct_total_excl_from_total_and_taxes_2(invoice_pred):
201201
# no taxes implies no reconstruct for total excl
202-
invoice_pred["total_incl"] = {"amount": 12.54, "probability": 0.0}
202+
invoice_pred["total_incl"] = {"amount": 12.54, "confidence": 0.0}
203203
invoice_pred["taxes"] = []
204204
financial_doc = FinancialDocument(invoice_pred)
205205
assert financial_doc.total_excl.value is None
206206

207207

208208
def test__invoice_reconstruct_total_excl_from_total_and_taxes_3(invoice_pred):
209209
# working example
210-
invoice_pred["total_incl"] = {"value": 12.54, "probability": 0.5}
210+
invoice_pred["total_incl"] = {"value": 12.54, "confidence": 0.5}
211211
invoice_pred["taxes"] = [
212-
{"rate": 20, "value": 0.5, "probability": 0.1},
213-
{"rate": 10, "value": 4.25, "probability": 0.6},
212+
{"rate": 20, "value": 0.5, "confidence": 0.1},
213+
{"rate": 10, "value": 4.25, "confidence": 0.6},
214214
]
215215
financial_doc = FinancialDocument(invoice_pred)
216216
assert financial_doc.total_excl.probability == 0.03
@@ -227,8 +227,8 @@ def test__invoice_reconstruct_total_tax_1(invoice_pred):
227227
def test__invoice_reconstruct_total_tax_2(invoice_pred):
228228
# working example
229229
invoice_pred["taxes"] = [
230-
{"rate": 20, "value": 10.2, "probability": 0.5},
231-
{"rate": 10, "value": 40.0, "probability": 0.1},
230+
{"rate": 20, "value": 10.2, "confidence": 0.5},
231+
{"rate": 10, "value": 40.0, "confidence": 0.1},
232232
]
233233
financial_doc = FinancialDocument(invoice_pred)
234234
assert financial_doc.total_tax.value == 50.2
@@ -237,10 +237,10 @@ def test__invoice_reconstruct_total_tax_2(invoice_pred):
237237

238238
def test__invoice_taxes_match_total_incl_1(invoice_pred):
239239
# matching example
240-
invoice_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
240+
invoice_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
241241
invoice_pred["taxes"] = [
242-
{"rate": 20, "value": 10.99, "probability": 0.5},
243-
{"rate": 10, "value": 40.12, "probability": 0.1},
242+
{"rate": 20, "value": 10.99, "confidence": 0.5},
243+
{"rate": 10, "value": 40.12, "confidence": 0.1},
244244
]
245245
financial_doc = FinancialDocument(invoice_pred)
246246
assert financial_doc.checklist["taxes_match_total_incl"] is True
@@ -251,27 +251,27 @@ def test__invoice_taxes_match_total_incl_1(invoice_pred):
251251

252252
def test__invoice_taxes_match_total_incl_2(invoice_pred):
253253
# not matching example with close error
254-
invoice_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
254+
invoice_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
255255
invoice_pred["taxes"] = [
256-
{"rate": 20, "value": 10.9, "probability": 0.5},
257-
{"rate": 10, "value": 40.12, "probability": 0.1},
256+
{"rate": 20, "value": 10.9, "confidence": 0.5},
257+
{"rate": 10, "value": 40.12, "confidence": 0.1},
258258
]
259259
financial_doc = FinancialDocument(invoice_pred)
260260
assert financial_doc.checklist["taxes_match_total_incl"] is False
261261

262262

263263
def test__invoice_taxes_match_total_incl_3(invoice_pred):
264264
# sanity check with null tax
265-
invoice_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
266-
invoice_pred["taxes"] = [{"rate": 20, "value": 0.0, "probability": 0.5}]
265+
invoice_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
266+
invoice_pred["taxes"] = [{"rate": 20, "value": 0.0, "confidence": 0.5}]
267267
financial_doc = FinancialDocument(invoice_pred)
268268
assert financial_doc.checklist["taxes_match_total_incl"] is False
269269

270270

271271
def test__shouldnt_raise_when_tax_rate_none(invoice_pred):
272272
# sanity check with null tax
273-
invoice_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
274-
invoice_pred["taxes"] = [{"rate": "N/A", "value": 0.0, "probability": 0.5}]
273+
invoice_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
274+
invoice_pred["taxes"] = [{"rate": "N/A", "value": 0.0, "confidence": 0.5}]
275275
financial_doc = FinancialDocument(invoice_pred)
276276
assert financial_doc.checklist["taxes_match_total_incl"] is False
277277

0 commit comments

Comments
 (0)