Files
docling-core/test/test_utils.py
Peter W. J. Staar 928aaed9ab chore: remove unnecesary copyright headers (#440)
* chore: remove unnecesary copyright headers

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* reformatted, still need to fix a test ...

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

---------

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
2025-11-27 11:17:23 +01:00

91 lines
2.7 KiB
Python

"""Test the pydantic models in package utils."""
import json
from pydantic import Field
from requests import Response
from docling_core.utils.alias import AliasModel
from docling_core.utils.file import resolve_source_to_path, resolve_source_to_stream
def test_alias_model() -> None:
"""Test the functionality of AliasModel."""
class AliasModelChild(AliasModel):
foo: str = Field(alias="boo")
data = {"foo": "lorem ipsum"}
data_alias = {"boo": "lorem ipsum"}
# data validated from dict, JSON, and constructor can use field names or aliases
AliasModelChild.model_validate(data_alias)
AliasModelChild.model_validate(data)
AliasModelChild.model_validate_json(json.dumps(data_alias))
AliasModelChild.model_validate_json(json.dumps(data))
AliasModelChild(boo="lorem ipsum") # type: ignore[call-arg]
AliasModelChild(foo="lorem ipsum")
# children classes will also inherite the populate_by_name
class AliasModelGrandChild(AliasModelChild):
var: int
AliasModelGrandChild(boo="lorem ipsum", var=3) # type: ignore[call-arg]
AliasModelGrandChild(foo="lorem ipsum", var=3)
# serialized data will always use aliases
obj = AliasModelChild.model_validate(data_alias)
assert obj.model_dump() == data_alias
assert obj.model_dump() != data
assert obj.model_dump_json() == json.dumps(data_alias, separators=(",", ":"))
assert obj.model_dump_json() != json.dumps(data, separators=(",", ":"))
def test_resolve_source_to_path_url_wout_path(monkeypatch):
expected_str = "foo"
expected_bytes = bytes(expected_str, "utf-8")
def get_dummy_response(*args, **kwargs):
r = Response()
r.status_code = 200
r._content = expected_bytes
return r
monkeypatch.setattr("requests.get", get_dummy_response)
monkeypatch.setattr(
"requests.models.Response.iter_content",
lambda *args, **kwargs: [expected_bytes],
)
path = resolve_source_to_path("https://pypi.org")
with open(path, encoding="utf-8") as f:
text = f.read()
assert text == expected_str
def test_resolve_source_to_stream_url_wout_path(monkeypatch):
expected_str = "foo"
expected_bytes = bytes(expected_str, "utf-8")
def get_dummy_response(*args, **kwargs):
r = Response()
r.status_code = 200
r._content = expected_bytes
return r
monkeypatch.setattr("requests.get", get_dummy_response)
monkeypatch.setattr(
"requests.models.Response.iter_content",
lambda *args, **kwargs: [expected_bytes],
)
doc_stream = resolve_source_to_stream("https://pypi.org")
assert doc_stream.name == "file"
text = doc_stream.stream.read().decode("utf8")
assert text == expected_str