11import json
2- from typing import BinaryIO , Dict , List , NamedTuple , Optional , Type
2+ from typing import BinaryIO, Dict, List, NamedTuple, Optional, Type, Union
33
44from mindee import documents
55from mindee.documents.base import Document, TypeDocument
1010 Base64Input,
1111 BytesInput,
1212 FileInput,
13- InputSource ,
13+ LocalInputSource ,
1414 PathInput,
15+ UrlInputSource,
1516)
1617from mindee.logger import logger
1718from mindee.response import PredictResponse
@@ -23,13 +24,13 @@ def get_bound_classname(type_var) -> str:
2324
2425
2526class DocumentClient:
26- input_doc : InputSource
27+ input_doc: Union[LocalInputSource, UrlInputSource]
2728 doc_configs: DocumentConfigDict
2829 raise_on_error: bool = True
2930
3031 def __init__(
3132 self,
32- input_doc : InputSource ,
33+ input_doc: Union[LocalInputSource, UrlInputSource] ,
3334 doc_configs: DocumentConfigDict,
3435 raise_on_error: bool,
3536 ):
@@ -108,12 +109,13 @@ def parse(
108109
109110 doc_config = self.doc_configs[config_key]
110111 doc_config.check_api_keys()
111- if page_options and self .input_doc .is_pdf ():
112- self .input_doc .process_pdf (
113- page_options .operation ,
114- page_options .on_min_pages ,
115- page_options .page_indexes ,
116- )
112+ if not isinstance(self.input_doc, UrlInputSource):
113+ if page_options and self.input_doc.is_pdf():
114+ self.input_doc.process_pdf(
115+ page_options.operation,
116+ page_options.on_min_pages,
117+ page_options.page_indexes,
118+ )
117119 return self._make_request(
118120 document_class, doc_config, include_words, close_file, cropper
119121 )
@@ -152,7 +154,8 @@ def _make_request(
152154
153155 def close(self) -> None:
154156 """Close the file object."""
155- self .input_doc .file_object .close ()
157+ if not isinstance(self.input_doc, UrlInputSource):
158+ self.input_doc.file_object.close()
156159
157160
158161class ConfigSpec(NamedTuple):
@@ -397,3 +400,21 @@ def doc_from_bytes(
397400 doc_configs=self._doc_configs,
398401 raise_on_error=self.raise_on_error,
399402 )
403+
404+ def doc_from_url(
405+ self,
406+ url: str,
407+ ) -> DocumentClient:
408+ """
409+ Load a document from an URL.
410+
411+ :param url: Raw byte input
412+ """
413+ input_doc = UrlInputSource(
414+ url,
415+ )
416+ return DocumentClient(
417+ input_doc=input_doc,
418+ doc_configs=self._doc_configs,
419+ raise_on_error=self.raise_on_error,
420+ )
0 commit comments