Skip to content

Commit e1bb1ba

Browse files
authored
fix: 🐛 fixed API error when using base64 (#45)
1 parent 42b8d65 commit e1bb1ba

File tree

5 files changed

+1095
-43
lines changed

5 files changed

+1095
-43
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,5 +84,5 @@ with open('/path/to/file', 'rb') as fp:
8484

8585
From a base64
8686
```python
87-
receipt_data = mindee_client.parse_receipt(base64_string, input_type="base64")
87+
receipt_data = mindee_client.parse_receipt(base64_string, input_type="base64", filename="receipt.jpg")
8888
```

mindee/__init__.py

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def parse_receipt(
5050
cut_pdf=True,
5151
include_words=False,
5252
cut_pdf_mode=3,
53+
filename=None,
5354
):
5455
"""
5556
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
@@ -60,6 +61,7 @@ def parse_receipt(
6061
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
6162
:param input_type: String in {'path', 'stream', 'base64'}
6263
:param file: Receipt filepath (allowed jpg, png, tiff, pdf)
64+
:param filename: the name of the file (without the path)
6365
:param version: expense_receipt api version
6466
:return: Wrapped response with Receipts objects parsed
6567
"""
@@ -68,7 +70,13 @@ def parse_receipt(
6870
"Missing 'expense_receipt_token' arg in parse_receipt() function."
6971
)
7072

71-
input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)
73+
input_file = Inputs(
74+
file,
75+
input_type,
76+
filename=filename,
77+
cut_pdf=cut_pdf,
78+
n_pdf_pages=cut_pdf_mode,
79+
)
7280

7381
response = Receipt.request(
7482
input_file,
@@ -109,6 +117,7 @@ def parse_passport(
109117
version="1",
110118
cut_pdf=True,
111119
cut_pdf_mode=3,
120+
filename=None,
112121
):
113122
"""
114123
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
@@ -118,6 +127,7 @@ def parse_passport(
118127
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
119128
:param input_type: String in {'path', 'stream', 'base64'}
120129
:param file: Passport filepath (allowed jpg, png, pdf)
130+
:param filename: the name of the file (without the path)
121131
:param version: passport api version
122132
:return: Wrapped response with passports objects parsed
123133
"""
@@ -126,7 +136,13 @@ def parse_passport(
126136
"Missing 'passport_token' arg in parse_passport() function."
127137
)
128138

129-
input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)
139+
input_file = Inputs(
140+
file,
141+
input_type,
142+
filename=filename,
143+
cut_pdf=cut_pdf,
144+
n_pdf_pages=cut_pdf_mode,
145+
)
130146

131147
response = Passport.request(input_file, self.passport_token, version)
132148

@@ -139,6 +155,7 @@ def parse_license_plate(
139155
version="1",
140156
cut_pdf=True,
141157
cut_pdf_mode=3,
158+
filename=None,
142159
):
143160
"""
144161
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
@@ -148,6 +165,7 @@ def parse_license_plate(
148165
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
149166
:param input_type: String in {'path', 'stream', 'base64'}
150167
:param file: CarPlate filepath (allowed jpg, png, pdf)
168+
:param filename: the name of the file (without the path)
151169
:param version: license_plates api version
152170
:return: Wrapped response with CarPlates objects parsed
153171
"""
@@ -156,7 +174,13 @@ def parse_license_plate(
156174
"Missing 'license_plate_token' arg in license_plate_token() function."
157175
)
158176

159-
input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)
177+
input_file = Inputs(
178+
file,
179+
input_type,
180+
filename=filename,
181+
cut_pdf=cut_pdf,
182+
n_pdf_pages=cut_pdf_mode,
183+
)
160184

161185
response = CarPlate.request(input_file, self.license_plate_token, version)
162186

@@ -170,6 +194,7 @@ def parse_invoice(
170194
cut_pdf=True,
171195
include_words=False,
172196
cut_pdf_mode=3,
197+
filename=None,
173198
):
174199
"""
175200
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
@@ -179,14 +204,21 @@ def parse_invoice(
179204
:param include_words: Bool, extract all words into http_response
180205
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
181206
:param input_type: String in {'path', 'stream', 'base64'}
182-
:param file: Invoice filepath (allowed jpg, png, pdf)
207+
:param file: Invoice full path (allowed jpg, png, pdf)
208+
:param filename: the name of the file (without the path)
183209
:param version: invoices api version
184210
:return: Wrapped response with Invoices objects parsed
185211
"""
186212
if not self.invoice_token:
187213
raise Exception("Missing 'invoice_token' arg in parse_invoice() function.")
188214

189-
input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)
215+
input_file = Inputs(
216+
file,
217+
input_type,
218+
filename=filename,
219+
cut_pdf=cut_pdf,
220+
n_pdf_pages=cut_pdf_mode,
221+
)
190222

191223
response = Invoice.request(
192224
input_file, self.invoice_token, version, include_words
@@ -201,6 +233,7 @@ def parse_financial_document(
201233
cut_pdf=True,
202234
include_words=False,
203235
cut_pdf_mode=3,
236+
filename=None,
204237
):
205238
"""
206239
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
@@ -211,14 +244,21 @@ def parse_financial_document(
211244
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
212245
:param input_type: String in {'path', 'stream', 'base64'}
213246
:param file: Invoice or Receipt filepath (allowed jpg, png, pdf)
247+
:param filename: the name of the file (without the path)
214248
:return: Wrapped response with FinancialDocument objects parsed
215249
"""
216250
if not self.invoice_token or not self.expense_receipt_token:
217251
raise Exception(
218252
"parse_invoice() function must include 'invoice_token' and 'expense_receipt_token' args."
219253
)
220254

221-
input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)
255+
input_file = Inputs(
256+
file,
257+
input_type,
258+
filename=filename,
259+
cut_pdf=cut_pdf,
260+
n_pdf_pages=cut_pdf_mode,
261+
)
222262

223263
response = FinancialDocument.request(
224264
input_file,

mindee/inputs.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
import io
22
import os
3-
from base64 import decodebytes
3+
import base64
44
from mimetypes import guess_type
55
import pikepdf
66

7+
ALLOWED_EXTENSIONS = [
8+
"image/png",
9+
"image/jpg",
10+
"image/jpeg",
11+
"image/webp",
12+
"application/pdf",
13+
]
14+
715

816
class Inputs:
917
def __init__(
@@ -15,17 +23,11 @@ def __init__(
1523
:param filename: File name of the input
1624
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
1725
"""
18-
self.allowed_extensions = [
19-
"image/png",
20-
"image/jpg",
21-
"image/jpeg",
22-
"image/webp",
23-
"application/pdf",
24-
]
2526
assert input_type in ["base64", "path", "stream", "dummy"]
2627
assert 0 < n_pdf_pages <= 3
2728

2829
if input_type == "base64":
30+
assert filename, "filename must be set"
2931
# Only for images
3032
self.file_object = Inputs.b64_to_stream(file)
3133
self.input_type = input_type
@@ -53,10 +55,9 @@ def __init__(
5355
self.filename = ""
5456
self.filepath = ""
5557
self.file_extension = ""
56-
elif self.file_extension not in self.allowed_extensions:
57-
raise Exception(
58-
"File type not allowed, must be in {%s}"
59-
% ", ".join(self.allowed_extensions)
58+
elif self.file_extension not in ALLOWED_EXTENSIONS:
59+
raise AssertionError(
60+
"File type not allowed, must be in {%s}" % ", ".join(ALLOWED_EXTENSIONS)
6061
)
6162

6263
if self.file_extension == "application/pdf":
@@ -88,12 +89,12 @@ def load(input_type, filename, filepath, file_extension):
8889
return file_input
8990

9091
@staticmethod
91-
def b64_to_stream(b64_string):
92+
def b64_to_stream(b64_string: str):
9293
"""
9394
:param b64_string: image base 64 string
9495
:return: stream from base64
9596
"""
96-
bytes_object = decodebytes(b64_string.encode("utf-8"))
97+
bytes_object = base64.standard_b64decode(b64_string)
9798
return io.BytesIO(bytes_object)
9899

99100
def count_pdf_pages(self):

tests/data/expense_receipts/receipt.txt

Lines changed: 1004 additions & 1 deletion
Large diffs are not rendered by default.

tests/test_client.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pytest
22
from mindee import Client, Response, Receipt, Passport
3+
from mindee.http import HTTPException
34

45

56
@pytest.fixture
@@ -61,68 +62,68 @@ def test_parse_license_plate_without_token(empty_client):
6162

6263

6364
def test_parse_receipt_with_wrong_filetype(dummy_client):
64-
with pytest.raises(Exception):
65+
with pytest.raises(AssertionError):
6566
dummy_client.parse_receipt("./tests/data/expense_receipts/receipt.jpga")
6667

6768

6869
def test_parse_invoice_with_wrong_filetype(dummy_client):
69-
with pytest.raises(Exception):
70+
with pytest.raises(AssertionError):
7071
dummy_client.parse_invoice("./tests/data/expense_receipts/receipt.jpga")
7172

7273

7374
def test_parse_financial_doc_with_wrong_filetype(dummy_client):
74-
with pytest.raises(Exception):
75+
with pytest.raises(AssertionError):
7576
dummy_client.parse_financial_document(
7677
"./tests/data/expense_receipts/receipt.jpga"
7778
)
7879

7980

8081
def test_parse_passport_with_wrong_filetype(dummy_client):
81-
with pytest.raises(Exception):
82+
with pytest.raises(AssertionError):
8283
dummy_client.parse_passport("./tests/data/expense_receipts/receipt.jpga")
8384

8485

8586
def test_parse_plate_with_wrong_filetype(dummy_client):
86-
with pytest.raises(Exception):
87+
with pytest.raises(AssertionError):
8788
dummy_client.parse_license_plate("./tests/data/expense_receipts/receipt.jpga")
8889

8990

9091
def test_parse_receipt_with_wrong_token(dummy_client):
91-
with pytest.raises(Exception):
92+
with pytest.raises(HTTPException):
9293
dummy_client.parse_receipt("./tests/data/expense_receipts/receipt.jpg")
9394

9495

9596
def test_parse_receipt_with_wrong_version(dummy_client):
96-
with pytest.raises(Exception):
97+
with pytest.raises(HTTPException):
9798
dummy_client.parse_receipt(
9899
"./tests/data/expense_receipts/receipt.jpg", version="4000"
99100
)
100101

101102

102103
def test_parse_invoice_with_wrong_token(dummy_client):
103-
with pytest.raises(Exception):
104+
with pytest.raises(HTTPException):
104105
dummy_client.parse_invoice("./tests/data/expense_receipts/receipt.jpg")
105106

106107

107108
def test_parse_financial_doc_with_wrong_token_jpg(dummy_client):
108-
with pytest.raises(Exception):
109+
with pytest.raises(HTTPException):
109110
dummy_client.parse_financial_document(
110111
"./tests/data/expense_receipts/receipt.jpg"
111112
)
112113

113114

114115
def test_parse_financial_doc_with_wrong_token_pdf(dummy_client):
115-
with pytest.raises(Exception):
116+
with pytest.raises(HTTPException):
116117
dummy_client.parse_financial_document("./tests/data/invoices/invoice.pdf")
117118

118119

119120
def test_parse_passport_with_wrong_token(dummy_client):
120-
with pytest.raises(Exception):
121+
with pytest.raises(HTTPException):
121122
dummy_client.parse_passport("./tests/data/expense_receipts/receipt.jpg")
122123

123124

124125
def test_parse_license_plate_with_wrong_token(dummy_client):
125-
with pytest.raises(Exception):
126+
with pytest.raises(HTTPException):
126127
dummy_client.parse_license_plate("./tests/data/license_plates/plate.png")
127128

128129

@@ -147,26 +148,33 @@ def test_response_with_passport_type():
147148

148149

149150
def test_request_with_filepath(dummy_client):
150-
with pytest.raises(Exception):
151+
with pytest.raises(HTTPException):
151152
dummy_client.parse_receipt(
152153
"./tests/data/expense_receipts/receipt.jpg", input_type="path"
153154
)
154155

155156

156157
def test_request_with_file(dummy_client):
157-
with pytest.raises(Exception):
158+
with pytest.raises(HTTPException):
158159
dummy_client.parse_receipt(
159-
open("./tests/data/expense_receipts/receipt.jpg"), input_type="file"
160+
open("./tests/data/expense_receipts/receipt.jpg", "rb"), input_type="stream"
160161
)
161162

162163

163-
def test_request_with_base64(dummy_client):
164+
def test_request_with_base64_no_filename(dummy_client):
164165
with open("./tests/data/expense_receipts/receipt.txt", "r") as fh:
165166
b64 = fh.read()
166-
with pytest.raises(Exception):
167+
with pytest.raises(AssertionError):
167168
dummy_client.parse_receipt(b64, input_type="base64")
168169

169170

171+
def test_request_with_base64(dummy_client):
172+
with open("./tests/data/expense_receipts/receipt.txt", "r") as fh:
173+
b64 = fh.read()
174+
with pytest.raises(HTTPException):
175+
dummy_client.parse_receipt(b64, input_type="base64", filename="receipt.txt")
176+
177+
170178
def test_request_without_raise_on_error(dummy_client_dont_raise):
171179
result = dummy_client_dont_raise.parse_receipt(
172180
"./tests/data/expense_receipts/receipt.jpg", input_type="path"
@@ -186,13 +194,13 @@ def test_request_without_raise_on_error_include_words(dummy_client_dont_raise):
186194

187195

188196
def test_request_with_file_wrong_type(dummy_client):
189-
with pytest.raises(Exception):
197+
with pytest.raises(AssertionError):
190198
dummy_client.parse_receipt(open("./tests/data/test.txt"), input_type="file")
191199

192-
with pytest.raises(Exception):
200+
with pytest.raises(AssertionError):
193201
dummy_client.parse_receipt("./tests/data/test.txt", input_type="path")
194202

195203

196-
def test_mpdf_reconstruct(dummy_client):
197-
with pytest.raises(Exception):
204+
def test_pdf_reconstruct(dummy_client):
205+
with pytest.raises(HTTPException):
198206
dummy_client.parse_invoice("./tests/data/invoices/invoice_6p.pdf")

0 commit comments

Comments
 (0)