mirror of
https://github.com/docling-project/docling-parse.git
synced 2026-05-17 13:10:49 +00:00
6fdd74870d
* feat!: Upgrade to v2.0.0 Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> * Dummy change Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * rename old parser as pdf_parser_v1 Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
98 lines
3.8 KiB
C++
98 lines
3.8 KiB
C++
//-*-C++-*-
|
|
|
|
#include <pybind11/pybind11.h>
|
|
#include <pybind11/stl.h>
|
|
#include <pybind11/buffer_info.h>
|
|
//#include <pybind11/numpy.h>
|
|
|
|
#include <pybind/utils/pybind11_json.h>
|
|
|
|
#include <pybind/docling_parser_v1.h>
|
|
#include <pybind/docling_parser_v2.h>
|
|
|
|
PYBIND11_MODULE(docling_parse, m) {
|
|
|
|
// purely for backward compatibility
|
|
pybind11::class_<docling::docling_parser_v1>(m, "pdf_parser_v1")
|
|
.def(pybind11::init())
|
|
|
|
.def("set_loglevel", &docling::docling_parser_v1::set_loglevel)
|
|
.def("set_loglevel_with_label", &docling::docling_parser_v1::set_loglevel_with_label)
|
|
|
|
.def("is_loaded", &docling::docling_parser_v1::is_loaded)
|
|
.def("list_loaded_keys", &docling::docling_parser_v1::list_loaded_keys)
|
|
|
|
.def("load_document", &docling::docling_parser_v1::load_document)
|
|
.def("load_document_from_bytesio", &docling::docling_parser_v1::load_document_from_bytesio)
|
|
|
|
.def("unload_document", &docling::docling_parser_v1::unload_document)
|
|
.def("unload_documents", &docling::docling_parser_v1::unload_documents)
|
|
|
|
.def("number_of_pages", &docling::docling_parser_v1::number_of_pages)
|
|
|
|
.def("parse_pdf_from_key",
|
|
pybind11::overload_cast<std::string>(&docling::docling_parser_v1::parse_pdf_from_key),
|
|
"parse pdf-document using doc-key into json")
|
|
|
|
.def("parse_pdf_from_key_on_page",
|
|
&docling::docling_parser_v1::parse_pdf_from_key_on_page,
|
|
"parse specific page in pdf-document using doc-key from path into json")
|
|
;
|
|
|
|
// exact copy of `pdf_parser`
|
|
/*
|
|
pybind11::class_<docling::docling_parser_v1>(m, "pdf_parser_v1")
|
|
.def(pybind11::init())
|
|
|
|
.def("set_loglevel", &docling::docling_parser_v1::set_loglevel)
|
|
.def("set_loglevel_with_label", &docling::docling_parser_v1::set_loglevel_with_label)
|
|
|
|
.def("is_loaded", &docling::docling_parser_v1::is_loaded)
|
|
.def("list_loaded_keys", &docling::docling_parser_v1::list_loaded_keys)
|
|
|
|
.def("load_document", &docling::docling_parser_v1::load_document)
|
|
.def("load_document_from_bytesio", &docling::docling_parser_v1::load_document_from_bytesio)
|
|
|
|
.def("unload_document", &docling::docling_parser_v1::unload_document)
|
|
.def("unload_documents", &docling::docling_parser_v1::unload_documents)
|
|
|
|
.def("number_of_pages", &docling::docling_parser_v1::number_of_pages)
|
|
|
|
.def("parse_pdf_from_key",
|
|
pybind11::overload_cast<std::string>(&docling::docling_parser_v1::parse_pdf_from_key),
|
|
"parse pdf-document using doc-key into json")
|
|
|
|
.def("parse_pdf_from_key_on_page",
|
|
&docling::docling_parser_v1::parse_pdf_from_key_on_page,
|
|
"parse specific page in pdf-document using doc-key from path into json")
|
|
;
|
|
*/
|
|
|
|
// next generation parser, 10x faster with more finegrained output
|
|
pybind11::class_<docling::docling_parser_v2>(m, "pdf_parser_v2")
|
|
.def(pybind11::init())
|
|
.def(pybind11::init<const std::string&>())
|
|
|
|
.def("set_loglevel", &docling::docling_parser_v2::set_loglevel)
|
|
.def("set_loglevel_with_label", &docling::docling_parser_v2::set_loglevel_with_label)
|
|
|
|
.def("is_loaded", &docling::docling_parser_v2::is_loaded)
|
|
.def("list_loaded_keys", &docling::docling_parser_v2::list_loaded_keys)
|
|
|
|
.def("load_document", &docling::docling_parser_v2::load_document)
|
|
.def("load_document_from_bytesio", &docling::docling_parser_v2::load_document_from_bytesio)
|
|
|
|
.def("unload_document", &docling::docling_parser_v2::unload_document)
|
|
|
|
.def("number_of_pages", &docling::docling_parser_v2::number_of_pages)
|
|
|
|
.def("parse_pdf_from_key",
|
|
pybind11::overload_cast<std::string>(&docling::docling_parser_v2::parse_pdf_from_key),
|
|
"parse pdf-document using doc-key into json")
|
|
|
|
.def("parse_pdf_from_key_on_page",
|
|
&docling::docling_parser_v2::parse_pdf_from_key_on_page,
|
|
"parse specific page in pdf-document using doc-key from path into json")
|
|
;
|
|
}
|