mirror of
https://github.com/docling-project/docling-parse.git
synced 2026-05-17 13:10:49 +00:00
feat!: Upgrade to v2.0.0 (#48)
* feat!: Upgrade to v2.0.0 Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> * Dummy change Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * rename old parser as pdf_parser_v1 Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -42,7 +42,7 @@ docker build --progress=plain \
|
||||
COPY ./dist/*.tar.gz /src/
|
||||
|
||||
RUN USE_SYSTEM_DEPS=\$USE_SYSTEM_DEPS pip3.11 install /src/docling_parse*.tar.gz \
|
||||
&& python3.11 -c 'from docling_parse.docling_parse import pdf_parser, pdf_parser_v2'
|
||||
&& python3.11 -c 'from docling_parse.docling_parse import pdf_parser_v1, pdf_parser_v2'
|
||||
|
||||
COPY ./tests /src/tests
|
||||
|
||||
|
||||
@@ -164,7 +164,7 @@ To build the package, simply run (make sure [poetry](https://python-poetry.org/)
|
||||
poetry build
|
||||
```
|
||||
|
||||
To test the package, run,
|
||||
To test the package, run:
|
||||
|
||||
```
|
||||
poetry run pytest ./tests -v -s
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
PYBIND11_MODULE(docling_parse, m) {
|
||||
|
||||
// purely for backward compatibility
|
||||
pybind11::class_<docling::docling_parser_v1>(m, "pdf_parser")
|
||||
pybind11::class_<docling::docling_parser_v1>(m, "pdf_parser_v1")
|
||||
.def(pybind11::init())
|
||||
|
||||
.def("set_loglevel", &docling::docling_parser_v1::set_loglevel)
|
||||
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
|
||||
from tabulate import tabulate
|
||||
|
||||
from docling_parse.docling_parse import pdf_parser
|
||||
from docling_parse.docling_parse import pdf_parser_v1
|
||||
|
||||
|
||||
def main():
|
||||
@@ -52,7 +52,7 @@ def main():
|
||||
|
||||
# Print the path to the PDF file (or add your processing logic here)
|
||||
|
||||
parser = pdf_parser()
|
||||
parser = pdf_parser_v1()
|
||||
parser.set_loglevel(args.log_level)
|
||||
|
||||
overview = []
|
||||
|
||||
@@ -4,7 +4,7 @@ import os
|
||||
from tabulate import tabulate
|
||||
|
||||
import docling_parse
|
||||
from docling_parse.docling_parse import pdf_parser
|
||||
from docling_parse.docling_parse import pdf_parser_v1
|
||||
|
||||
|
||||
def main():
|
||||
@@ -37,7 +37,7 @@ def main():
|
||||
|
||||
# Print the path to the PDF file (or add your processing logic here)
|
||||
|
||||
parser = pdf_parser()
|
||||
parser = pdf_parser_v1()
|
||||
parser.set_loglevel(args.log_level)
|
||||
|
||||
doc_file = args.pdf # filename
|
||||
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
|
||||
from tabulate import tabulate
|
||||
|
||||
from docling_parse import pdf_parser, pdf_parser_v2
|
||||
from docling_parse import pdf_parser_v1, pdf_parser_v2
|
||||
|
||||
try:
|
||||
from PIL import Image, ImageDraw
|
||||
@@ -98,7 +98,7 @@ def visualise_v1(
|
||||
log_level: str, pdf_path: str, interactive: str, output_dir: str, page_num: int
|
||||
):
|
||||
|
||||
parser = pdf_parser()
|
||||
parser = pdf_parser_v1()
|
||||
parser.set_loglevel_with_label(log_level)
|
||||
|
||||
doc_key = "key"
|
||||
|
||||
@@ -38,7 +38,7 @@ def verify_reference_output(true_doc, pred_doc):
|
||||
"""
|
||||
def test_reference_documents_from_filenames():
|
||||
|
||||
parser = docling_parse.pdf_parser()
|
||||
parser = docling_parse.pdf_parser_v1()
|
||||
|
||||
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
|
||||
|
||||
@@ -63,7 +63,7 @@ def test_reference_documents_from_filenames():
|
||||
"""
|
||||
def test_reference_documents_from_filenames_page_by_page():
|
||||
|
||||
parser = docling_parse.pdf_parser()
|
||||
parser = docling_parse.pdf_parser_v1()
|
||||
|
||||
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
|
||||
|
||||
@@ -90,7 +90,7 @@ def test_reference_documents_from_filenames_page_by_page():
|
||||
|
||||
def test_reference_documents_from_filenames_with_keys():
|
||||
|
||||
parser = docling_parse.pdf_parser()
|
||||
parser = docling_parse.pdf_parser_v1()
|
||||
#parser.set_loglevel(4)
|
||||
|
||||
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
|
||||
@@ -129,7 +129,7 @@ def test_reference_documents_from_filenames_with_keys():
|
||||
|
||||
def test_reference_documents_from_filenames_with_keys_page_by_page():
|
||||
|
||||
parser = docling_parse.pdf_parser()
|
||||
parser = docling_parse.pdf_parser_v1()
|
||||
#parser.set_loglevel(0)
|
||||
|
||||
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
|
||||
@@ -176,7 +176,7 @@ def test_reference_documents_from_filenames_with_keys_page_by_page():
|
||||
"""
|
||||
def test_reference_documents_from_bytesio():
|
||||
|
||||
parser = docling_parse.pdf_parser()
|
||||
parser = docling_parse.pdf_parser_v1()
|
||||
|
||||
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
|
||||
|
||||
@@ -209,7 +209,7 @@ def test_reference_documents_from_bytesio():
|
||||
"""
|
||||
def test_reference_documents_from_bytesio_page_by_page():
|
||||
|
||||
parser = docling_parse.pdf_parser()
|
||||
parser = docling_parse.pdf_parser_v1()
|
||||
#parser.set_loglevel(1)
|
||||
|
||||
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
|
||||
@@ -244,7 +244,7 @@ def test_reference_documents_from_bytesio_page_by_page():
|
||||
|
||||
def test_reference_documents_from_bytesio_with_keys():
|
||||
|
||||
parser = docling_parse.pdf_parser()
|
||||
parser = docling_parse.pdf_parser_v1()
|
||||
|
||||
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
|
||||
|
||||
@@ -284,7 +284,7 @@ def test_reference_documents_from_bytesio_with_keys():
|
||||
assert verify_reference_output(true_doc, pred_doc), "verify_reference_output(true_doc, pred_doc)"
|
||||
def test_reference_documents_from_bytesio_with_keys_page_by_page():
|
||||
|
||||
parser = docling_parse.pdf_parser()
|
||||
parser = docling_parse.pdf_parser_v1()
|
||||
#parser.set_loglevel(4)
|
||||
|
||||
pdf_docs = glob.glob("./tests/pdf_docs/tests/*.pdf")
|
||||
|
||||
Reference in New Issue
Block a user