feat!: Upgrade to v2.0.0 (#48)

* feat!: Upgrade to v2.0.0

Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>

* Dummy change

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* rename old parser as pdf_parser_v1

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2024-10-23 14:15:52 +02:00
committed by GitHub
parent 48451ad095
commit 6fdd74870d
7 changed files with 17 additions and 17 deletions
+1 -1
View File
@@ -42,7 +42,7 @@ docker build --progress=plain \
COPY ./dist/*.tar.gz /src/
RUN USE_SYSTEM_DEPS=\$USE_SYSTEM_DEPS pip3.11 install /src/docling_parse*.tar.gz \
&& python3.11 -c 'from docling_parse.docling_parse import pdf_parser, pdf_parser_v2'
&& python3.11 -c 'from docling_parse.docling_parse import pdf_parser_v1, pdf_parser_v2'
COPY ./tests /src/tests
+1 -1
View File
@@ -164,7 +164,7 @@ To build the package, simply run (make sure [poetry](https://python-poetry.org/)
poetry build
```
To test the package, run,
To test the package, run:
```
poetry run pytest ./tests -v -s
+1 -1
View File
@@ -13,7 +13,7 @@
PYBIND11_MODULE(docling_parse, m) {
// purely for backward compatibility
pybind11::class_<docling::docling_parser_v1>(m, "pdf_parser")
pybind11::class_<docling::docling_parser_v1>(m, "pdf_parser_v1")
.def(pybind11::init())
.def("set_loglevel", &docling::docling_parser_v1::set_loglevel)
+2 -2
View File
@@ -5,7 +5,7 @@ import os
from tabulate import tabulate
from docling_parse.docling_parse import pdf_parser
from docling_parse.docling_parse import pdf_parser_v1
def main():
@@ -52,7 +52,7 @@ def main():
# Print the path to the PDF file (or add your processing logic here)
parser = pdf_parser()
parser = pdf_parser_v1()
parser.set_loglevel(args.log_level)
overview = []
+2 -2
View File
@@ -4,7 +4,7 @@ import os
from tabulate import tabulate
import docling_parse
from docling_parse.docling_parse import pdf_parser
from docling_parse.docling_parse import pdf_parser_v1
def main():
@@ -37,7 +37,7 @@ def main():
# Print the path to the PDF file (or add your processing logic here)
parser = pdf_parser()
parser = pdf_parser_v1()
parser.set_loglevel(args.log_level)
doc_file = args.pdf # filename
+2 -2
View File
@@ -5,7 +5,7 @@ import os
from tabulate import tabulate
from docling_parse import pdf_parser, pdf_parser_v2
from docling_parse import pdf_parser_v1, pdf_parser_v2
try:
from PIL import Image, ImageDraw
@@ -98,7 +98,7 @@ def visualise_v1(
log_level: str, pdf_path: str, interactive: str, output_dir: str, page_num: int
):
parser = pdf_parser()
parser = pdf_parser_v1()
parser.set_loglevel_with_label(log_level)
doc_key = "key"
+8 -8
View File
@@ -38,7 +38,7 @@ def verify_reference_output(true_doc, pred_doc):
"""
def test_reference_documents_from_filenames():
parser = docling_parse.pdf_parser()
parser = docling_parse.pdf_parser_v1()
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
@@ -63,7 +63,7 @@ def test_reference_documents_from_filenames():
"""
def test_reference_documents_from_filenames_page_by_page():
parser = docling_parse.pdf_parser()
parser = docling_parse.pdf_parser_v1()
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
@@ -90,7 +90,7 @@ def test_reference_documents_from_filenames_page_by_page():
def test_reference_documents_from_filenames_with_keys():
parser = docling_parse.pdf_parser()
parser = docling_parse.pdf_parser_v1()
#parser.set_loglevel(4)
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
@@ -129,7 +129,7 @@ def test_reference_documents_from_filenames_with_keys():
def test_reference_documents_from_filenames_with_keys_page_by_page():
parser = docling_parse.pdf_parser()
parser = docling_parse.pdf_parser_v1()
#parser.set_loglevel(0)
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
@@ -176,7 +176,7 @@ def test_reference_documents_from_filenames_with_keys_page_by_page():
"""
def test_reference_documents_from_bytesio():
parser = docling_parse.pdf_parser()
parser = docling_parse.pdf_parser_v1()
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
@@ -209,7 +209,7 @@ def test_reference_documents_from_bytesio():
"""
def test_reference_documents_from_bytesio_page_by_page():
parser = docling_parse.pdf_parser()
parser = docling_parse.pdf_parser_v1()
#parser.set_loglevel(1)
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
@@ -244,7 +244,7 @@ def test_reference_documents_from_bytesio_page_by_page():
def test_reference_documents_from_bytesio_with_keys():
parser = docling_parse.pdf_parser()
parser = docling_parse.pdf_parser_v1()
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
@@ -284,7 +284,7 @@ def test_reference_documents_from_bytesio_with_keys():
assert verify_reference_output(true_doc, pred_doc), "verify_reference_output(true_doc, pred_doc)"
def test_reference_documents_from_bytesio_with_keys_page_by_page():
parser = docling_parse.pdf_parser()
parser = docling_parse.pdf_parser_v1()
#parser.set_loglevel(4)
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")