diff --git a/docling_parse/processing_dir.py b/docling_parse/processing_dir.py index 5f0f480..d14d55f 100644 --- a/docling_parse/processing_dir.py +++ b/docling_parse/processing_dir.py @@ -101,7 +101,7 @@ async def async_process_files_from_queue( file_queue: Queue, page_level: bool, loglevel: str, sync: bool ): - parser = DoclingPdfParser(loglevel="fatal") + parser = DoclingPdfParser(loglevel=loglevel) overview = [] @@ -112,7 +112,7 @@ async def async_process_files_from_queue( break # logging.info( - print(f"Queue-size [{file_queue.qsize()}], Processing task: {task.file_name}") + print(f"Queue-size [{file_queue.qsize()}], Processing task (sync: {sync}): {task.file_name}") try: start_time = time.time() @@ -153,7 +153,7 @@ async def async_process_files_from_queue( # pages = await asyncio.gather(*page_tasks) # pages = await asyncio.gather(page_tasks[0:4]) - STEP = 2 + STEP = 1 for i in range(0, len(page_tasks), STEP): print(i) sublist = page_tasks[i : i + STEP] diff --git a/src/v2/pdf_decoders/document.h b/src/v2/pdf_decoders/document.h index ae1c294..2e21ae4 100644 --- a/src/v2/pdf_decoders/document.h +++ b/src/v2/pdf_decoders/document.h @@ -329,12 +329,14 @@ namespace pdflib { std::lock_guard lock(mtx); - json_pages[l] = page_decoder.get(); std::stringstream ss; - ss << "decoding page " << page_number; + ss << "decoding page: " << page_number; - timings[ss.str()] = page_timer.get_time(); + LOG_S(INFO) << ss.str(); + + json_pages[l] = page_decoder.get(); + timings[ss.str()] = page_timer.get_time(); } } else