mirror of
https://github.com/docling-project/docling-core.git
synced 2026-05-17 13:10:44 +00:00
268c294cc9
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
95 lines
2.7 KiB
Python
95 lines
2.7 KiB
Python
#
|
|
# Copyright IBM Corp. 2024 - 2024
|
|
# SPDX-License-Identifier: MIT
|
|
#
|
|
|
|
"""Test the pydantic models in package utils."""
|
|
import json
|
|
|
|
from pydantic import Field
|
|
from requests import Response
|
|
|
|
from docling_core.utils.alias import AliasModel
|
|
from docling_core.utils.file import resolve_source_to_path, resolve_source_to_stream
|
|
|
|
|
|
def test_alias_model():
|
|
"""Test the functionality of AliasModel."""
|
|
|
|
class AliasModelChild(AliasModel):
|
|
foo: str = Field(alias="boo")
|
|
|
|
data = {"foo": "lorem ipsum"}
|
|
data_alias = {"boo": "lorem ipsum"}
|
|
|
|
# data validated from dict, JSON, and constructor can use field names or aliases
|
|
|
|
AliasModelChild.model_validate(data_alias)
|
|
AliasModelChild.model_validate(data)
|
|
|
|
AliasModelChild.model_validate_json(json.dumps(data_alias))
|
|
AliasModelChild.model_validate_json(json.dumps(data))
|
|
|
|
AliasModelChild(boo="lorem ipsum")
|
|
AliasModelChild(foo="lorem ipsum")
|
|
|
|
# children classes will also inherite the populate_by_name
|
|
|
|
class AliasModelGrandChild(AliasModelChild):
|
|
var: int
|
|
|
|
AliasModelGrandChild(boo="lorem ipsum", var=3)
|
|
AliasModelGrandChild(foo="lorem ipsum", var=3)
|
|
|
|
# serialized data will always use aliases
|
|
|
|
obj = AliasModelChild.model_validate(data_alias)
|
|
assert obj.model_dump() == data_alias
|
|
assert obj.model_dump() != data
|
|
|
|
assert obj.model_dump_json() == json.dumps(data_alias, separators=(",", ":"))
|
|
assert obj.model_dump_json() != json.dumps(data, separators=(",", ":"))
|
|
|
|
|
|
def test_resolve_source_to_path_url_wout_path(monkeypatch):
|
|
expected_str = "foo"
|
|
expected_bytes = bytes(expected_str, "utf-8")
|
|
|
|
def get_dummy_response(*args, **kwargs):
|
|
r = Response()
|
|
r.status_code = 200
|
|
r._content = expected_bytes
|
|
return r
|
|
|
|
monkeypatch.setattr("requests.get", get_dummy_response)
|
|
monkeypatch.setattr(
|
|
"requests.models.Response.iter_content",
|
|
lambda *args, **kwargs: [expected_bytes],
|
|
)
|
|
path = resolve_source_to_path("https://pypi.org")
|
|
with open(path, encoding="utf-8") as f:
|
|
text = f.read()
|
|
assert text == expected_str
|
|
|
|
|
|
def test_resolve_source_to_stream_url_wout_path(monkeypatch):
|
|
expected_str = "foo"
|
|
expected_bytes = bytes(expected_str, "utf-8")
|
|
|
|
def get_dummy_response(*args, **kwargs):
|
|
r = Response()
|
|
r.status_code = 200
|
|
r._content = expected_bytes
|
|
return r
|
|
|
|
monkeypatch.setattr("requests.get", get_dummy_response)
|
|
monkeypatch.setattr(
|
|
"requests.models.Response.iter_content",
|
|
lambda *args, **kwargs: [expected_bytes],
|
|
)
|
|
doc_stream = resolve_source_to_stream("https://pypi.org")
|
|
assert doc_stream.name == "file"
|
|
|
|
text = doc_stream.stream.read().decode("utf8")
|
|
assert text == expected_str
|