Skip to content

Commit 4979c80

Browse files
authored
🐛 page_n should always be set when available (#106)
1 parent 738ed5f commit 4979c80

File tree

7 files changed

+60
-9
lines changed

7 files changed

+60
-9
lines changed

mindee/fields/api_builder.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class ListField:
3737
"""Confidence score"""
3838
reconstructed: bool
3939
"""Whether the field was reconstructed from other fields."""
40-
page_n: Optional[int] = None
40+
page_n: int
4141
"""The document page on which the information was found."""
4242
values: List[ListFieldValue]
4343
"""List of word values"""
@@ -50,7 +50,10 @@ def __init__(
5050
):
5151
self.values = []
5252
self.reconstructed = reconstructed
53-
self.page_n = page_n
53+
if page_n is None:
54+
self.page_n = prediction["page_id"]
55+
else:
56+
self.page_n = page_n
5457
self.confidence = prediction["confidence"]
5558

5659
for value in prediction["values"]:

mindee/fields/base.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,14 @@ def __init__(
4949
:param reconstructed: Bool for reconstructed object (not extracted in the API)
5050
:param page_n: Page number for multi-page PDF
5151
"""
52-
self.page_n = page_n
52+
if page_n is None:
53+
try:
54+
self.page_n = prediction["page_id"]
55+
except KeyError:
56+
pass
57+
else:
58+
self.page_n = page_n
59+
5360
self.reconstructed = reconstructed
5461

5562
if value_key not in prediction or prediction[value_key] == "N/A":

tests/documents/test_custom_v1.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def test_complete(custom_v1_doc_object):
5757
assert value.content != ""
5858
assert len(value.bounding_box) == 4
5959
assert value.confidence != 0.0
60+
assert field.page_n == 0 or field.page_n == 1
6061
for field_name, field in custom_v1_doc_object.classifications.items():
6162
assert len(field_name) > 0
6263
assert isinstance(field, ClassificationField)
@@ -66,3 +67,7 @@ def test_complete(custom_v1_doc_object):
6667

6768
def test_page_complete(custom_v1_page_object):
6869
assert custom_v1_page_object.orientation.value == 0
70+
for field_name, field in custom_v1_page_object.fields.items():
71+
assert isinstance(field, ListField)
72+
assert len(field.contents_list) == len(field.values)
73+
assert field.page_n == 0

tests/documents/test_invoice_v3.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,21 @@ def invoice_v3_doc_object_empty(invoice_pred):
2020
return InvoiceV3(api_prediction=invoice_pred)
2121

2222

23+
@pytest.fixture
24+
def invoice_v3_page_object():
25+
json_data = json.load(open(FILE_PATH_INVOICE_V3_COMPLETE))
26+
return InvoiceV3(
27+
api_prediction=json_data["document"]["inference"]["pages"][0], page_n=0
28+
)
29+
30+
2331
@pytest.fixture
2432
def invoice_pred():
2533
json_data = json.load(open(FILE_PATH_INVOICE_V3_EMPTY))
2634
return json_data["document"]["inference"]["pages"][0]
2735

2836

29-
# Technical tests
30-
def test_constructor(invoice_v3_doc_object):
37+
def test_doc_constructor(invoice_v3_doc_object):
3138
assert invoice_v3_doc_object.invoice_date.value == "2020-02-17"
3239
assert invoice_v3_doc_object.checklist["taxes_match_total_incl"] is True
3340
assert invoice_v3_doc_object.checklist["taxes_match_total_excl"] is True
@@ -44,6 +51,14 @@ def test_constructor(invoice_v3_doc_object):
4451
assert str(invoice_v3_doc_object) == doc_str
4552

4653

54+
def test_page_constructor(invoice_v3_page_object):
55+
doc_str = open(f"{INVOICE_DATA_DIR}/response_v3/page0_to_string.txt").read().strip()
56+
assert invoice_v3_page_object.orientation.value == 0
57+
assert invoice_v3_page_object.invoice_number.page_n == 0
58+
assert str(invoice_v3_page_object) == doc_str
59+
assert len(invoice_v3_page_object.cropper) == 0
60+
61+
4762
def test_all_na(invoice_v3_doc_object_empty):
4863
assert invoice_v3_doc_object_empty.locale.value is None
4964
assert invoice_v3_doc_object_empty.total_incl.value is None

tests/documents/test_passport_v1.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,21 @@
1111
@pytest.fixture
1212
def passport_v1_doc_object():
1313
json_data = json.load(open(FILE_PATH_PASSPORT_V1_COMPLETE))
14-
return PassportV1(api_prediction=json_data["document"]["inference"]["pages"][0])
14+
return PassportV1(api_prediction=json_data["document"]["inference"], page_n=None)
1515

1616

1717
@pytest.fixture
1818
def passport_v1_doc_object_empty():
1919
json_data = json.load(open(f"{PASSPORT_DATA_DIR}/response_v1/empty.json"))
20-
return PassportV1(api_prediction=json_data["document"]["inference"]["pages"][0])
20+
return PassportV1(api_prediction=json_data["document"]["inference"], page_n=None)
21+
22+
23+
@pytest.fixture
24+
def passport_v1_page_object():
25+
json_data = json.load(open(FILE_PATH_PASSPORT_V1_COMPLETE))
26+
return PassportV1(
27+
api_prediction=json_data["document"]["inference"]["pages"][0], page_n=0
28+
)
2129

2230

2331
def test_constructor(passport_v1_doc_object):
@@ -26,9 +34,20 @@ def test_constructor(passport_v1_doc_object):
2634
doc_str = (
2735
open(f"{PASSPORT_DATA_DIR}/response_v1/page0_to_string.txt").read().strip()
2836
)
37+
assert passport_v1_doc_object.birth_date.page_n == 0
2938
assert str(passport_v1_doc_object) == doc_str
3039

3140

41+
def test_page_constructor(passport_v1_page_object):
42+
doc_str = (
43+
open(f"{PASSPORT_DATA_DIR}/response_v1/page0_to_string.txt").read().strip()
44+
)
45+
assert passport_v1_page_object.orientation.value == 0
46+
assert passport_v1_page_object.birth_date.page_n == 0
47+
assert str(passport_v1_page_object) == doc_str
48+
assert len(passport_v1_page_object.cropper) == 0
49+
50+
3251
def test_all_na(passport_v1_doc_object_empty):
3352
assert passport_v1_doc_object_empty.mrz.value is None
3453
assert passport_v1_doc_object_empty.country.value is None

tests/documents/test_receipt_v3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ def receipt_pred():
2727
return json_data["document"]["inference"]["pages"][0]
2828

2929

30-
# Technical tests
31-
def test_constructor(receipt_v3_doc_object):
30+
def test_doc_constructor(receipt_v3_doc_object):
3231
assert receipt_v3_doc_object.date.value == "2016-02-26"
3332
assert receipt_v3_doc_object.total_tax.value == 1.7
3433
assert receipt_v3_doc_object.checklist["taxes_match_total_incl"] is True
3534
doc_str = open(f"{RECEIPT_DATA_DIR}/response_v3/doc_to_string.txt").read().strip()
35+
assert receipt_v3_doc_object.date.page_n == 0
3636
assert str(receipt_v3_doc_object) == doc_str
3737

3838

tests/documents/test_receipt_v4.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def test_doc_constructor(receipt_v4_doc_object):
3434
assert receipt_v4_doc_object.total_tax.value == 3.34
3535
doc_str = open(f"{RECEIPT_DATA_DIR}/response_v4/doc_to_string.txt").read().strip()
3636
assert receipt_v4_doc_object.orientation is None
37+
assert receipt_v4_doc_object.date.page_n == 0
3738
assert str(receipt_v4_doc_object) == doc_str
3839

3940

@@ -42,6 +43,7 @@ def test_page_constructor(receipt_v4_page_object):
4243
assert receipt_v4_page_object.total_tax.value == 3.34
4344
doc_str = open(f"{RECEIPT_DATA_DIR}/response_v4/page0_to_string.txt").read().strip()
4445
assert receipt_v4_page_object.orientation.value == 0
46+
assert receipt_v4_page_object.date.page_n == 0
4547
assert str(receipt_v4_page_object) == doc_str
4648
assert len(receipt_v4_page_object.cropper) == 0
4749

0 commit comments

Comments
 (0)