Files
docling-parse/app/pybind_parse.cpp
T
Peter W. J. Staar 92e02ec4c1 feat: read page by page (#7)
* first working version to parse page-by-page

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* added the read page-by-page using bytesio

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

---------

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
2024-08-21 08:50:57 +02:00

37 lines
1.1 KiB
C++

//-*-C++-*-
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/buffer_info.h>
//#include <pybind11/numpy.h>
#include <pybind/utils/pybind11_json.h>
#include <pybind/docling_parser.h>
PYBIND11_MODULE(docling_parse, m) {
pybind11::class_<docling::docling_parser>(m, "pdf_parser")
.def(pybind11::init())
.def("set_loglevel", &docling::docling_parser::set_loglevel)
.def("unload_documents", &docling::docling_parser::unload_documents)
.def("find_cells",
pybind11::overload_cast<std::string>(&docling::docling_parser::find_cells),
"parse pdf-document from path into json")
.def("find_cells_from_bytesio",
&docling::docling_parser::find_cells_from_bytesio,
"parse pdf-document from a BytesIO object")
.def("find_cells_on_page",
&docling::docling_parser::find_cells_on_page,
"parse specific page in pdf-document from path into json")
.def("find_cells_from_bytesio_on_page",
&docling::docling_parser::find_cells_from_bytesio_on_page,
"parse pdf-document from a BytesIO object for a specific page");
}