mirror of
https://github.com/docling-project/docling.git
synced 2026-05-17 13:10:38 +00:00
42157a3e10
* Move client SDK to docling Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add client SDK examples Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Mark client SDK as experimental Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
180 lines
5.3 KiB
Python
180 lines
5.3 KiB
Python
import os
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from docling.datamodel.base_models import OutputFormat
|
|
from docling.datamodel.service.options import (
|
|
ConvertDocumentsOptions as ConvertDocumentsRequestOptions,
|
|
)
|
|
from docling.service_client import DoclingServiceClient, RawServiceResult
|
|
|
|
SERVICE_URL_ENV = "DOCLING_SERVICE_URL"
|
|
SERVICE_API_KEY_ENV = "DOCLING_SERVICE_API_KEY"
|
|
SERVICE_URL = os.environ.get(SERVICE_URL_ENV)
|
|
SERVICE_API_KEY = os.environ.get(SERVICE_API_KEY_ENV)
|
|
FIXTURES_DIR = Path(__file__).resolve().parent / "data" / "pdf"
|
|
|
|
pytestmark = [
|
|
pytest.mark.skipif(
|
|
bool(os.environ.get("CI")),
|
|
reason="requires a running external docling-serve host; disabled in CI",
|
|
),
|
|
pytest.mark.skipif(
|
|
not SERVICE_URL,
|
|
reason=f"requires a running docling-serve host; set {SERVICE_URL_ENV} to run",
|
|
),
|
|
]
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def live_service_url() -> str:
|
|
assert SERVICE_URL is not None
|
|
return SERVICE_URL.rstrip("/")
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def service_api_key() -> str | None:
|
|
return SERVICE_API_KEY
|
|
|
|
|
|
def _json_options() -> ConvertDocumentsRequestOptions:
|
|
return ConvertDocumentsRequestOptions(
|
|
do_ocr=False,
|
|
do_table_structure=False,
|
|
include_images=False,
|
|
to_formats=[OutputFormat.JSON],
|
|
abort_on_error=False,
|
|
)
|
|
|
|
|
|
def test_convert_and_submit_with_polling_watcher(
|
|
live_service_url: str, service_api_key: str | None, tmp_path: Path
|
|
) -> None:
|
|
source = FIXTURES_DIR / "2206.01062.pdf"
|
|
assert source.exists()
|
|
|
|
with DoclingServiceClient(
|
|
url=live_service_url,
|
|
api_key=service_api_key,
|
|
status_watcher="polling",
|
|
poll_server_wait=0.2,
|
|
job_timeout=300.0,
|
|
options=_json_options(),
|
|
) as client:
|
|
health = client.health()
|
|
assert health.status == "ok"
|
|
|
|
converted = client.convert(source=source)
|
|
assert converted.status.value in {"success", "partial_success"}
|
|
assert converted.document.name == "2206.01062"
|
|
|
|
job = client.submit(source=source, target_format=OutputFormat.JSON)
|
|
submitted = job.result(timeout=300.0)
|
|
assert submitted.status.value in {"success", "partial_success"}
|
|
assert submitted.document.name == "2206.01062"
|
|
|
|
|
|
def test_submit_non_json_returns_raw_payload(
|
|
live_service_url: str, service_api_key: str | None, tmp_path: Path
|
|
) -> None:
|
|
source = FIXTURES_DIR / "2206.01062.pdf"
|
|
assert source.exists()
|
|
|
|
with DoclingServiceClient(
|
|
url=live_service_url,
|
|
api_key=service_api_key,
|
|
status_watcher="polling",
|
|
poll_server_wait=0.2,
|
|
job_timeout=300.0,
|
|
) as client:
|
|
options = ConvertDocumentsRequestOptions(
|
|
do_ocr=False,
|
|
do_table_structure=False,
|
|
include_images=False,
|
|
to_formats=[OutputFormat.MARKDOWN],
|
|
abort_on_error=False,
|
|
)
|
|
job = client.submit(
|
|
source=source, options=options, target_format=OutputFormat.MARKDOWN
|
|
)
|
|
raw_result = job.result(timeout=300.0)
|
|
|
|
assert isinstance(raw_result, RawServiceResult)
|
|
assert len(raw_result.content) > 0
|
|
assert "zip" in raw_result.content_type
|
|
|
|
|
|
def test_convert_all_preserves_input_order(
|
|
live_service_url: str, service_api_key: str | None, tmp_path: Path
|
|
) -> None:
|
|
source = FIXTURES_DIR / "2206.01062.pdf"
|
|
assert source.exists()
|
|
source_a = tmp_path / "order-a.pdf"
|
|
source_b = tmp_path / "order-b.pdf"
|
|
source_a.write_bytes(source.read_bytes())
|
|
source_b.write_bytes(source.read_bytes())
|
|
|
|
with DoclingServiceClient(
|
|
url=live_service_url,
|
|
api_key=service_api_key,
|
|
status_watcher="polling",
|
|
poll_server_wait=0.2,
|
|
job_timeout=300.0,
|
|
) as client:
|
|
results = list(
|
|
client.convert_all(
|
|
sources=[source_a, source_b],
|
|
options=_json_options(),
|
|
max_concurrency=2,
|
|
)
|
|
)
|
|
|
|
assert len(results) == 2
|
|
assert results[0].input.file.name == "order-a.pdf"
|
|
assert results[1].input.file.name == "order-b.pdf"
|
|
|
|
|
|
def test_websocket_watcher_end_to_end(
|
|
live_service_url: str, service_api_key: str | None, tmp_path: Path
|
|
) -> None:
|
|
source = FIXTURES_DIR / "2206.01062.pdf"
|
|
assert source.exists()
|
|
|
|
with DoclingServiceClient(
|
|
url=live_service_url,
|
|
api_key=service_api_key,
|
|
status_watcher="websocket",
|
|
ws_fallback_to_poll=True,
|
|
poll_server_wait=0.2,
|
|
job_timeout=300.0,
|
|
) as client:
|
|
result = client.convert(source=source, options=_json_options())
|
|
|
|
assert result.status.value in {"success", "partial_success"}
|
|
assert result.document.name == "2206.01062"
|
|
|
|
|
|
def test_submit_accepts_custom_request_headers(
|
|
live_service_url: str,
|
|
service_api_key: str | None,
|
|
) -> None:
|
|
source = FIXTURES_DIR / "2206.01062.pdf"
|
|
assert source.exists()
|
|
|
|
with DoclingServiceClient(
|
|
url=live_service_url,
|
|
api_key=service_api_key,
|
|
status_watcher="polling",
|
|
poll_server_wait=0.2,
|
|
job_timeout=300.0,
|
|
) as client:
|
|
job = client.submit(
|
|
source=source,
|
|
options=_json_options(),
|
|
headers={"X-Tenant-Id": "tenant-integration"},
|
|
)
|
|
result = job.result(timeout=300.0)
|
|
|
|
assert result.status.value in {"success", "partial_success"}
|