diff --git a/libs/extractor-api-lib/poetry.lock b/libs/extractor-api-lib/poetry.lock index 0a274ebb..5c9fb7b6 100644 --- a/libs/extractor-api-lib/poetry.lock +++ b/libs/extractor-api-lib/poetry.lock @@ -7595,14 +7595,14 @@ files = [ [[package]] name = "unstructured" -version = "0.18.15" +version = "0.18.18" description = "A library that prepares raw documents for downstream ML tasks." optional = false python-versions = ">=3.10.0" groups = ["main"] files = [ - {file = "unstructured-0.18.15-py3-none-any.whl", hash = "sha256:f05b1defcbe8190319d30da8adddbb888f74bf8ec7f65886867d7dca41d67ad0"}, - {file = "unstructured-0.18.15.tar.gz", hash = "sha256:81d8481280a4ac5cefe74bdb6db3687e8f240d5643706f86728eac39549112b5"}, + {file = "unstructured-0.18.18-py3-none-any.whl", hash = "sha256:d5189bdd5e2a1c5ed3cc289cfb4fb483c6f2dd544b42744bdc5b81d3388ea527"}, + {file = "unstructured-0.18.18.tar.gz", hash = "sha256:cfe6c84a36d374e5767930e13cfc10622357b3b68a5b7c735fdb1eeca08c6b57"}, ] [package.dependencies] @@ -7631,19 +7631,19 @@ unstructured-client = "*" wrapt = "*" [package.extras] -all-docs = ["effdet", "google-cloud-vision", "markdown", "msoffcrypto-tool", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] +all-docs = ["effdet", "google-cloud-vision", "markdown", "msoffcrypto-tool", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi_heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] csv = ["pandas"] doc = ["python-docx (>=1.1.2)"] docx = ["python-docx (>=1.1.2)"] epub = ["pypandoc"] huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"] -image = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)"] -local-inference = ["effdet", "google-cloud-vision", "markdown", "msoffcrypto-tool", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] +image = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi_heif", "pikepdf", "pypdf", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)"] +local-inference = ["effdet", "google-cloud-vision", "markdown", "msoffcrypto-tool", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi_heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)", "xlrd"] md = ["markdown"] odt = ["pypandoc", "python-docx (>=1.1.2)"] org = ["pypandoc"] paddleocr = ["paddlepaddle (>=3.0.0b1)", "unstructured.paddleocr (==2.10.0)"] -pdf = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)"] +pdf = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi_heif", "pikepdf", "pypdf", "unstructured-inference (>=1.0.5)", "unstructured.pytesseract (>=0.3.12)"] ppt = ["python-pptx (>=1.0.1)"] pptx = ["python-pptx (>=1.0.1)"] rst = ["pypandoc"] @@ -8334,4 +8334,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt [metadata] lock-version = "2.1" python-versions = ">=3.13,<3.14" -content-hash = "d61d4c32efc569d14cc11cee72947dfe83c08894189f7ccbe9353cccdc92e48e" +content-hash = "fe8793677335dedfa13b743f6c1843a7cdf57581fcb9cb2151829a29b41252b8" diff --git a/libs/extractor-api-lib/pyproject.toml b/libs/extractor-api-lib/pyproject.toml index 8ca70313..e7c2e87a 100644 --- a/libs/extractor-api-lib/pyproject.toml +++ b/libs/extractor-api-lib/pyproject.toml @@ -109,7 +109,7 @@ partial = "^1.0" pyyaml = "^6.0.2" numpy = "^2.2.5" docx2txt = "^0.9" -unstructured = {extras = ["docx", "pptx"], version = "0.18.15"} +unstructured = {extras = ["docx", "pptx"], version = "0.18.18"} html5lib = "^1.1" langchain-community = "^0.4.1" atlassian-python-api = "^4.0.3"