mirror of
https://github.com/docling-project/docling.git
synced 2026-05-17 13:10:38 +00:00
42157a3e10
* Move client SDK to docling Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add client SDK examples Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Mark client SDK as experimental Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
85 lines
2.4 KiB
Python
Vendored
85 lines
2.4 KiB
Python
Vendored
# NOTE: docling.service_client is experimental and may change in future releases.
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
|
|
from docling.datamodel.base_models import OutputFormat
|
|
from docling.datamodel.service.options import (
|
|
ConvertDocumentsOptions as ConvertDocumentsRequestOptions,
|
|
)
|
|
from docling.service_client import DoclingServiceClient
|
|
|
|
SERVICE_URL_ENV = "DOCLING_SERVICE_URL"
|
|
SERVICE_API_KEY_ENV = "DOCLING_SERVICE_API_KEY"
|
|
SAMPLE_SOURCES = [
|
|
"https://arxiv.org/pdf/2206.01062",
|
|
"https://arxiv.org/pdf/2305.03393",
|
|
]
|
|
|
|
|
|
def _service_url() -> str:
|
|
service_url = os.environ.get(SERVICE_URL_ENV)
|
|
if not service_url:
|
|
raise SystemExit(f"Set {SERVICE_URL_ENV} to a running docling-serve URL.")
|
|
return service_url
|
|
|
|
|
|
def create_conversion_options() -> ConvertDocumentsRequestOptions:
|
|
return ConvertDocumentsRequestOptions(
|
|
do_ocr=False,
|
|
do_table_structure=False,
|
|
include_images=False,
|
|
to_formats=[OutputFormat.JSON],
|
|
abort_on_error=False,
|
|
)
|
|
|
|
|
|
def run_convert(client: DoclingServiceClient, source: str) -> None:
|
|
print("\n=== convert() (single source) ===")
|
|
result = client.convert(source=source, options=create_conversion_options())
|
|
print("status:", result.status.value)
|
|
print("document name:", result.document.name)
|
|
print("num pages in output:", len(result.document.pages))
|
|
|
|
|
|
def run_convert_all(client: DoclingServiceClient, sources: list[str]) -> None:
|
|
print("\n=== convert_all() (multiple sources) ===")
|
|
for idx, result in enumerate(
|
|
client.convert_all(
|
|
sources=sources,
|
|
options=create_conversion_options(),
|
|
max_concurrency=2,
|
|
raises_on_error=False,
|
|
),
|
|
start=1,
|
|
):
|
|
print(
|
|
f"{idx}.",
|
|
"input=",
|
|
result.input.file.name,
|
|
"status=",
|
|
result.status.value,
|
|
)
|
|
|
|
|
|
def main() -> None:
|
|
with DoclingServiceClient(
|
|
url=_service_url(),
|
|
api_key=os.environ.get(SERVICE_API_KEY_ENV),
|
|
) as client:
|
|
health = client.health()
|
|
print("health:", health.status)
|
|
|
|
try:
|
|
version = client.version()
|
|
print("version keys:", ", ".join(sorted(version.keys())[:5]), "...")
|
|
except Exception as exc:
|
|
print("version endpoint unavailable:", exc)
|
|
|
|
run_convert(client, SAMPLE_SOURCES[0])
|
|
run_convert_all(client, SAMPLE_SOURCES)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|