mirror of
https://github.com/docling-project/docling-parse.git
synced 2026-05-17 13:10:49 +00:00
92e02ec4c1
* first working version to parse page-by-page Signed-off-by: Peter Staar <taa@zurich.ibm.com> * added the read page-by-page using bytesio Signed-off-by: Peter Staar <taa@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com>
37 lines
1.1 KiB
C++
37 lines
1.1 KiB
C++
//-*-C++-*-
|
|
|
|
#include <pybind11/pybind11.h>
|
|
#include <pybind11/stl.h>
|
|
#include <pybind11/buffer_info.h>
|
|
//#include <pybind11/numpy.h>
|
|
|
|
#include <pybind/utils/pybind11_json.h>
|
|
|
|
#include <pybind/docling_parser.h>
|
|
|
|
PYBIND11_MODULE(docling_parse, m) {
|
|
|
|
pybind11::class_<docling::docling_parser>(m, "pdf_parser")
|
|
.def(pybind11::init())
|
|
|
|
.def("set_loglevel", &docling::docling_parser::set_loglevel)
|
|
|
|
.def("unload_documents", &docling::docling_parser::unload_documents)
|
|
|
|
.def("find_cells",
|
|
pybind11::overload_cast<std::string>(&docling::docling_parser::find_cells),
|
|
"parse pdf-document from path into json")
|
|
|
|
.def("find_cells_from_bytesio",
|
|
&docling::docling_parser::find_cells_from_bytesio,
|
|
"parse pdf-document from a BytesIO object")
|
|
|
|
.def("find_cells_on_page",
|
|
&docling::docling_parser::find_cells_on_page,
|
|
"parse specific page in pdf-document from path into json")
|
|
|
|
.def("find_cells_from_bytesio_on_page",
|
|
&docling::docling_parser::find_cells_from_bytesio_on_page,
|
|
"parse pdf-document from a BytesIO object for a specific page");
|
|
}
|