mirror of
https://github.com/docling-project/docling-parse.git
synced 2026-05-17 13:10:49 +00:00
feat: fixed the v2 parser to only return the pages that are requested (#47)
* fixed the v2 parser to only return the pages that are requested Signed-off-by: Peter Staar <taa@zurich.ibm.com> * updated the visualize script Signed-off-by: Peter Staar <taa@zurich.ibm.com> * fixed the default args for compilation Signed-off-by: Peter Staar <taa@zurich.ibm.com> * put std::make_pair to avoid warnings Signed-off-by: Peter Staar <taa@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
836571afac
commit
48451ad095
+3
-3
@@ -25,9 +25,9 @@ void set_loglevel(std::string level)
|
||||
//loguru::set_verbosity(loguru::Verbosity_ERROR);
|
||||
}
|
||||
else
|
||||
loguru::g_stderr_verbosity = loguru::Verbosity_ERROR; {
|
||||
|
||||
}
|
||||
{
|
||||
loguru::g_stderr_verbosity = loguru::Verbosity_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
nlohmann::json create_config(std::filesystem::path ifile,
|
||||
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
|
||||
from tabulate import tabulate
|
||||
|
||||
from docling_parse.docling_parse import pdf_parser, pdf_parser_v2
|
||||
from docling_parse import pdf_parser, pdf_parser_v2
|
||||
|
||||
try:
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
@@ -43,10 +43,7 @@ namespace docling
|
||||
|
||||
std::string pdf_resources_dir;
|
||||
|
||||
//std::map<std::string, std::filesystem::path> key2doc;
|
||||
std::map<std::string, decoder_ptr_type> key2doc;
|
||||
|
||||
//plib::parser parser;
|
||||
};
|
||||
|
||||
docling_parser_v2::docling_parser_v2():
|
||||
@@ -82,7 +79,6 @@ namespace docling
|
||||
std::map<std::string, double> timings = {};
|
||||
pdflib::pdf_resource<pdflib::PAGE_FONT>::initialise(data, timings);
|
||||
}
|
||||
|
||||
|
||||
void docling_parser_v2::set_loglevel(int level)
|
||||
{
|
||||
@@ -114,7 +110,7 @@ namespace docling
|
||||
{
|
||||
loguru::g_stderr_verbosity = loguru::Verbosity_INFO;
|
||||
}
|
||||
else if(level=="warning")
|
||||
else if(level=="warning" or level=="warn")
|
||||
{
|
||||
loguru::g_stderr_verbosity = loguru::Verbosity_WARNING;
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ namespace pdflib
|
||||
|
||||
private:
|
||||
|
||||
void update_timings(std::map<std::string, double>& timings_);
|
||||
void update_timings(std::map<std::string, double>& timings_, bool set_timer);
|
||||
|
||||
private:
|
||||
|
||||
@@ -181,7 +181,10 @@ namespace pdflib
|
||||
utils::timer timer;
|
||||
|
||||
nlohmann::json& json_pages = json_document["pages"];
|
||||
|
||||
json_pages = nlohmann::json::array({});
|
||||
|
||||
bool set_timer=true;
|
||||
|
||||
int page_number=0;
|
||||
for(QPDFObjectHandle page : qpdf_document.getAllPages())
|
||||
{
|
||||
@@ -190,7 +193,8 @@ namespace pdflib
|
||||
pdf_decoder<PAGE> page_decoder(page);
|
||||
|
||||
auto timings_ = page_decoder.decode_page();
|
||||
update_timings(timings_);
|
||||
update_timings(timings_, set_timer);
|
||||
set_timer = false;
|
||||
|
||||
json_pages.push_back(page_decoder.get());
|
||||
|
||||
@@ -208,10 +212,13 @@ namespace pdflib
|
||||
LOG_S(INFO) << "start decoding selected pages ...";
|
||||
utils::timer timer;
|
||||
|
||||
// make sure that we only return the page from the page-numbers
|
||||
nlohmann::json& json_pages = json_document["pages"];
|
||||
|
||||
json_pages = nlohmann::json::array({});
|
||||
|
||||
std::vector<QPDFObjectHandle> pages = qpdf_document.getAllPages();
|
||||
|
||||
|
||||
bool set_timer=true; // make sure we override all timings for this page-set
|
||||
for(auto page_number:page_numbers)
|
||||
{
|
||||
utils::timer timer;
|
||||
@@ -223,7 +230,9 @@ namespace pdflib
|
||||
pdf_decoder<PAGE> page_decoder(pages.at(page_number));
|
||||
|
||||
auto timings_ = page_decoder.decode_page();
|
||||
update_timings(timings_);
|
||||
|
||||
update_timings(timings_, set_timer);
|
||||
set_timer=false;
|
||||
|
||||
json_pages.push_back(page_decoder.get());
|
||||
|
||||
@@ -244,11 +253,11 @@ namespace pdflib
|
||||
timings[__FUNCTION__] = timer.get_time();
|
||||
}
|
||||
|
||||
void pdf_decoder<DOCUMENT>::update_timings(std::map<std::string, double>& timings_)
|
||||
void pdf_decoder<DOCUMENT>::update_timings(std::map<std::string, double>& timings_, bool set_timer)
|
||||
{
|
||||
for(auto itr=timings_.begin(); itr!=timings_.end(); itr++)
|
||||
{
|
||||
if(timings.count(itr->first)==0)
|
||||
if(timings.count(itr->first)==0 or set_timer)
|
||||
{
|
||||
timings[itr->first] = itr->second;
|
||||
}
|
||||
|
||||
@@ -82,19 +82,22 @@ namespace pdflib
|
||||
std::pair<double, double> pdf_resource<PAGE_LINE>::front()
|
||||
{
|
||||
assert(x.size()>0);
|
||||
return std::pair<double, double>(x.front(), y.front());
|
||||
//return std::pair<double, double>(x.front(), y.front());
|
||||
return std::make_pair(x.front(), y.front());
|
||||
}
|
||||
|
||||
std::pair<double, double> pdf_resource<PAGE_LINE>::back()
|
||||
{
|
||||
assert(x.size()>0);
|
||||
return std::pair<double, double>(x.back(), y.back());
|
||||
//return std::pair<double, double>(x.back(), y.back());
|
||||
return std::make_pair(x.back(), y.back());
|
||||
}
|
||||
|
||||
std::pair<double, double> pdf_resource<PAGE_LINE>::operator[](int i)
|
||||
{
|
||||
assert(x.size()>0 and i<x.size());
|
||||
return std::pair<double, double>(x[i], y[i]);
|
||||
//return std::pair<double, double>(x[i], y[i]);
|
||||
return std::make_pair(x[i], y[i]);
|
||||
}
|
||||
|
||||
void pdf_resource<PAGE_LINE>::transform(std::array<double, 9> trafo_matrix)
|
||||
|
||||
@@ -16684,11 +16684,11 @@
|
||||
"lines": []
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.003507,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.015862,
|
||||
"decode_resources": 0.008806,
|
||||
"sanitise_contents": 4.1e-05
|
||||
"decode_contents": 0.003358,
|
||||
"decode_dimensions": 4e-06,
|
||||
"decode_page": 0.015178,
|
||||
"decode_resources": 0.008326,
|
||||
"sanitise_contents": 3.9e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -29861,11 +29861,11 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.015415,
|
||||
"decode_contents": 0.015422,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.02518,
|
||||
"decode_resources": 0.007428,
|
||||
"sanitise_contents": 3.2e-05
|
||||
"decode_page": 0.025149,
|
||||
"decode_resources": 0.007392,
|
||||
"sanitise_contents": 2.9e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -40840,10 +40840,10 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.002348,
|
||||
"decode_contents": 0.00241,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.011055,
|
||||
"decode_resources": 0.006615,
|
||||
"decode_page": 0.011236,
|
||||
"decode_resources": 0.006414,
|
||||
"sanitise_contents": 2.5e-05
|
||||
}
|
||||
},
|
||||
@@ -54715,10 +54715,10 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.004488,
|
||||
"decode_contents": 0.004505,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.012061,
|
||||
"decode_resources": 0.005828,
|
||||
"decode_page": 0.012243,
|
||||
"decode_resources": 0.005641,
|
||||
"sanitise_contents": 2.8e-05
|
||||
}
|
||||
},
|
||||
@@ -71744,11 +71744,11 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.002497,
|
||||
"decode_contents": 0.002465,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.011164,
|
||||
"decode_resources": 0.006387,
|
||||
"sanitise_contents": 4.9e-05
|
||||
"decode_page": 0.011008,
|
||||
"decode_resources": 0.006174,
|
||||
"sanitise_contents": 4.1e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -88941,11 +88941,11 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.004848,
|
||||
"decode_contents": 0.004823,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.015907,
|
||||
"decode_resources": 0.008227,
|
||||
"sanitise_contents": 4e-05
|
||||
"decode_page": 0.016107,
|
||||
"decode_resources": 0.00802,
|
||||
"sanitise_contents": 3.5e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -109738,11 +109738,11 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.010908,
|
||||
"decode_contents": 0.01093,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.018141,
|
||||
"decode_resources": 0.005595,
|
||||
"sanitise_contents": 4e-05
|
||||
"decode_page": 0.018393,
|
||||
"decode_resources": 0.005545,
|
||||
"sanitise_contents": 3.8e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -126017,11 +126017,11 @@
|
||||
"lines": []
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.002365,
|
||||
"decode_contents": 0.00238,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.008046,
|
||||
"decode_resources": 0.003733,
|
||||
"sanitise_contents": 3.8e-05
|
||||
"decode_page": 0.007619,
|
||||
"decode_resources": 0.003613,
|
||||
"sanitise_contents": 3.7e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -140088,11 +140088,11 @@
|
||||
"lines": []
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.002037,
|
||||
"decode_contents": 0.002042,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.008592,
|
||||
"decode_resources": 0.004611,
|
||||
"sanitise_contents": 3.2e-05
|
||||
"decode_page": 0.008342,
|
||||
"decode_resources": 0.004467,
|
||||
"sanitise_contents": 3.5e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -156805,10 +156805,10 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.002362,
|
||||
"decode_contents": 0.002402,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.010877,
|
||||
"decode_resources": 0.006402,
|
||||
"decode_page": 0.010979,
|
||||
"decode_resources": 0.006272,
|
||||
"sanitise_contents": 3.8e-05
|
||||
}
|
||||
},
|
||||
@@ -176492,10 +176492,10 @@
|
||||
"lines": []
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.003171,
|
||||
"decode_contents": 0.003227,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.008376,
|
||||
"decode_resources": 0.002849,
|
||||
"decode_page": 0.007453,
|
||||
"decode_resources": 0.002728,
|
||||
"sanitise_contents": 4.7e-05
|
||||
}
|
||||
},
|
||||
@@ -186267,33 +186267,33 @@
|
||||
"lines": []
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.001524,
|
||||
"decode_contents": 0.001489,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.004688,
|
||||
"decode_resources": 0.001911,
|
||||
"decode_page": 0.004278,
|
||||
"decode_resources": 0.001836,
|
||||
"sanitise_contents": 2.2e-05
|
||||
}
|
||||
}
|
||||
],
|
||||
"timings": {
|
||||
"decode_contents": 0.05547,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_document": 0.154582,
|
||||
"decode_page": 0.149949,
|
||||
"decode_resources": 0.068392,
|
||||
"decoding page 0": 0.016121,
|
||||
"decoding page 1": 0.025364,
|
||||
"decoding page 10": 0.008664,
|
||||
"decoding page 11": 0.004831,
|
||||
"decoding page 2": 0.011215,
|
||||
"decoding page 3": 0.01223,
|
||||
"decoding page 4": 0.011415,
|
||||
"decoding page 5": 0.016156,
|
||||
"decoding page 6": 0.018376,
|
||||
"decoding page 7": 0.008278,
|
||||
"decoding page 8": 0.008797,
|
||||
"decoding page 9": 0.011129,
|
||||
"process_document_from_bytesio": 0.000393,
|
||||
"sanitise_contents": 0.00043200000000000004
|
||||
"decode_contents": 0.055453,
|
||||
"decode_dimensions": 4e-06,
|
||||
"decode_document": 0.152719,
|
||||
"decode_page": 0.14798499999999998,
|
||||
"decode_resources": 0.066428,
|
||||
"decoding page 0": 0.015425,
|
||||
"decoding page 1": 0.025324,
|
||||
"decoding page 10": 0.007762,
|
||||
"decoding page 11": 0.004436,
|
||||
"decoding page 2": 0.011409,
|
||||
"decoding page 3": 0.012426,
|
||||
"decoding page 4": 0.011271,
|
||||
"decoding page 5": 0.016368,
|
||||
"decoding page 6": 0.018673,
|
||||
"decoding page 7": 0.007881,
|
||||
"decoding page 8": 0.008561,
|
||||
"decoding page 9": 0.011247,
|
||||
"process_document_from_bytesio": 0.000421,
|
||||
"sanitise_contents": 0.000414
|
||||
}
|
||||
}
|
||||
@@ -16684,11 +16684,11 @@
|
||||
"lines": []
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.003505,
|
||||
"decode_contents": 0.003498,
|
||||
"decode_dimensions": 1e-06,
|
||||
"decode_page": 0.031621,
|
||||
"decode_resources": 0.02447,
|
||||
"sanitise_contents": 4.5e-05
|
||||
"decode_page": 0.03092,
|
||||
"decode_resources": 0.023731,
|
||||
"sanitise_contents": 4.1e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -29861,11 +29861,11 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.015547,
|
||||
"decode_contents": 0.015755,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.025567,
|
||||
"decode_resources": 0.007578,
|
||||
"sanitise_contents": 3.4e-05
|
||||
"decode_page": 0.025675,
|
||||
"decode_resources": 0.00753,
|
||||
"sanitise_contents": 3e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -40840,11 +40840,11 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.002357,
|
||||
"decode_contents": 0.002356,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.011122,
|
||||
"decode_resources": 0.006673,
|
||||
"sanitise_contents": 2.5e-05
|
||||
"decode_page": 0.011245,
|
||||
"decode_resources": 0.006435,
|
||||
"sanitise_contents": 2.6e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -54715,11 +54715,11 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.004545,
|
||||
"decode_contents": 0.00453,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.012175,
|
||||
"decode_resources": 0.005879,
|
||||
"sanitise_contents": 2.8e-05
|
||||
"decode_page": 0.012265,
|
||||
"decode_resources": 0.005659,
|
||||
"sanitise_contents": 3.1e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -71744,11 +71744,11 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.002495,
|
||||
"decode_contents": 0.002468,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.011211,
|
||||
"decode_resources": 0.006395,
|
||||
"sanitise_contents": 5.3e-05
|
||||
"decode_page": 0.011043,
|
||||
"decode_resources": 0.006183,
|
||||
"sanitise_contents": 6e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -88941,11 +88941,11 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.004854,
|
||||
"decode_contents": 0.004845,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.016029,
|
||||
"decode_resources": 0.008287,
|
||||
"sanitise_contents": 4e-05
|
||||
"decode_page": 0.016165,
|
||||
"decode_resources": 0.008012,
|
||||
"sanitise_contents": 3.6e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -109738,11 +109738,11 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.011055,
|
||||
"decode_contents": 0.011018,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.018426,
|
||||
"decode_resources": 0.005673,
|
||||
"sanitise_contents": 3.8e-05
|
||||
"decode_page": 0.018656,
|
||||
"decode_resources": 0.005701,
|
||||
"sanitise_contents": 3.9e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -126017,11 +126017,11 @@
|
||||
"lines": []
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.002381,
|
||||
"decode_contents": 0.002392,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.008162,
|
||||
"decode_resources": 0.003732,
|
||||
"sanitise_contents": 3.7e-05
|
||||
"decode_page": 0.007637,
|
||||
"decode_resources": 0.003598,
|
||||
"sanitise_contents": 5e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -140088,10 +140088,10 @@
|
||||
"lines": []
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.002052,
|
||||
"decode_contents": 0.002086,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.008661,
|
||||
"decode_resources": 0.004672,
|
||||
"decode_page": 0.008393,
|
||||
"decode_resources": 0.004465,
|
||||
"sanitise_contents": 3.2e-05
|
||||
}
|
||||
},
|
||||
@@ -156805,11 +156805,11 @@
|
||||
]
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.002397,
|
||||
"decode_contents": 0.002384,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.01092,
|
||||
"decode_resources": 0.006372,
|
||||
"sanitise_contents": 3.8e-05
|
||||
"decode_page": 0.010852,
|
||||
"decode_resources": 0.006186,
|
||||
"sanitise_contents": 4e-05
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -176494,8 +176494,8 @@
|
||||
"timings": {
|
||||
"decode_contents": 0.003181,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.008467,
|
||||
"decode_resources": 0.002853,
|
||||
"decode_page": 0.007396,
|
||||
"decode_resources": 0.002713,
|
||||
"sanitise_contents": 4.7e-05
|
||||
}
|
||||
},
|
||||
@@ -186267,33 +186267,33 @@
|
||||
"lines": []
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.001504,
|
||||
"decode_contents": 0.001509,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.004717,
|
||||
"decode_resources": 0.001924,
|
||||
"decode_page": 0.004286,
|
||||
"decode_resources": 0.001817,
|
||||
"sanitise_contents": 2.2e-05
|
||||
}
|
||||
}
|
||||
],
|
||||
"timings": {
|
||||
"decode_contents": 0.055873,
|
||||
"decode_contents": 0.05602199999999999,
|
||||
"decode_dimensions": 1e-06,
|
||||
"decode_document": 0.171846,
|
||||
"decode_page": 0.16707800000000003,
|
||||
"decode_resources": 0.08450799999999999,
|
||||
"decoding page 0": 0.031881,
|
||||
"decoding page 1": 0.025748,
|
||||
"decoding page 10": 0.008766,
|
||||
"decoding page 11": 0.004867,
|
||||
"decoding page 2": 0.011285,
|
||||
"decoding page 3": 0.012362,
|
||||
"decoding page 4": 0.011464,
|
||||
"decoding page 5": 0.016272,
|
||||
"decoding page 6": 0.018668,
|
||||
"decoding page 7": 0.008406,
|
||||
"decoding page 8": 0.008877,
|
||||
"decoding page 9": 0.011179,
|
||||
"process_document_from_file": 0.001949,
|
||||
"sanitise_contents": 0.000439
|
||||
"decode_document": 0.169451,
|
||||
"decode_page": 0.16453300000000004,
|
||||
"decode_resources": 0.08202999999999998,
|
||||
"decoding page 0": 0.031191,
|
||||
"decoding page 1": 0.025893,
|
||||
"decoding page 10": 0.007725,
|
||||
"decoding page 11": 0.004449,
|
||||
"decoding page 2": 0.01142,
|
||||
"decoding page 3": 0.012451,
|
||||
"decoding page 4": 0.01133,
|
||||
"decoding page 5": 0.016427,
|
||||
"decoding page 6": 0.01891,
|
||||
"decoding page 7": 0.007878,
|
||||
"decoding page 8": 0.008632,
|
||||
"decoding page 9": 0.011135,
|
||||
"process_document_from_file": 0.001263,
|
||||
"sanitise_contents": 0.00045400000000000003
|
||||
}
|
||||
}
|
||||
@@ -16684,22 +16684,22 @@
|
||||
"lines": []
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.003576,
|
||||
"decode_contents": 0.00338,
|
||||
"decode_dimensions": 1e-06,
|
||||
"decode_page": 0.01596,
|
||||
"decode_resources": 0.008973,
|
||||
"sanitise_contents": 4.7e-05
|
||||
"decode_page": 0.01523,
|
||||
"decode_resources": 0.008172,
|
||||
"sanitise_contents": 3.9e-05
|
||||
}
|
||||
}
|
||||
],
|
||||
"timings": {
|
||||
"decode_contents": 0.003576,
|
||||
"decode_contents": 0.00338,
|
||||
"decode_dimensions": 1e-06,
|
||||
"decode_document": 0.016939,
|
||||
"decode_page": 0.01596,
|
||||
"decode_resources": 0.008973,
|
||||
"decoding page 0": 0.016238,
|
||||
"process_document_from_bytesio": 0.000384,
|
||||
"sanitise_contents": 4.7e-05
|
||||
"decode_document": 0.016125,
|
||||
"decode_page": 0.01523,
|
||||
"decode_resources": 0.008172,
|
||||
"decoding page 0": 0.015462,
|
||||
"process_document_from_bytesio": 0.00043,
|
||||
"sanitise_contents": 3.9e-05
|
||||
}
|
||||
}
|
||||
@@ -16684,22 +16684,22 @@
|
||||
"lines": []
|
||||
},
|
||||
"timings": {
|
||||
"decode_contents": 0.003454,
|
||||
"decode_contents": 0.003508,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_page": 0.015488,
|
||||
"decode_resources": 0.008604,
|
||||
"sanitise_contents": 4e-05
|
||||
"decode_page": 0.015797,
|
||||
"decode_resources": 0.008661,
|
||||
"sanitise_contents": 4.2e-05
|
||||
}
|
||||
}
|
||||
],
|
||||
"timings": {
|
||||
"decode_contents": 0.003454,
|
||||
"decode_contents": 0.003508,
|
||||
"decode_dimensions": 0.0,
|
||||
"decode_document": 0.016365,
|
||||
"decode_page": 0.015488,
|
||||
"decode_resources": 0.008604,
|
||||
"decoding page 0": 0.015724,
|
||||
"process_document_from_file": 0.00124,
|
||||
"sanitise_contents": 4e-05
|
||||
"decode_document": 0.016818,
|
||||
"decode_page": 0.015797,
|
||||
"decode_resources": 0.008661,
|
||||
"decoding page 0": 0.016078,
|
||||
"process_document_from_file": 0.001244,
|
||||
"sanitise_contents": 4.2e-05
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -82,8 +82,8 @@ def verify_reference_output(true_doc, pred_doc):
|
||||
num_true_pages=len(true_doc["pages"])
|
||||
num_pred_pages=len(pred_doc["pages"])
|
||||
|
||||
message = f"len(pred_doc[\"pages\"])!=len(true_doc[\"pages\"]) => {num_true_pages}!={num_pred_pages}"
|
||||
assert num_true_pages==num_pred_pages, message
|
||||
message = f"len(pred_doc[\"pages\"])!=len(true_doc[\"pages\"]) => {num_pred_pages}!={num_true_pages}"
|
||||
assert num_pred_pages==num_true_pages, message
|
||||
|
||||
for pred_page,true_page in zip(pred_doc["pages"], true_doc["pages"]):
|
||||
# print(pred_page.keys())
|
||||
|
||||
Reference in New Issue
Block a user