Files
docling-core/test/test_utils.py
Michele Dolfi 268c294cc9 fix: always write with utf8 encoding (#111)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2024-12-17 09:37:01 +01:00

95 lines
2.7 KiB
Python

#
# Copyright IBM Corp. 2024 - 2024
# SPDX-License-Identifier: MIT
#
"""Test the pydantic models in package utils."""
import json
from pydantic import Field
from requests import Response
from docling_core.utils.alias import AliasModel
from docling_core.utils.file import resolve_source_to_path, resolve_source_to_stream
def test_alias_model():
"""Test the functionality of AliasModel."""
class AliasModelChild(AliasModel):
foo: str = Field(alias="boo")
data = {"foo": "lorem ipsum"}
data_alias = {"boo": "lorem ipsum"}
# data validated from dict, JSON, and constructor can use field names or aliases
AliasModelChild.model_validate(data_alias)
AliasModelChild.model_validate(data)
AliasModelChild.model_validate_json(json.dumps(data_alias))
AliasModelChild.model_validate_json(json.dumps(data))
AliasModelChild(boo="lorem ipsum")
AliasModelChild(foo="lorem ipsum")
# children classes will also inherite the populate_by_name
class AliasModelGrandChild(AliasModelChild):
var: int
AliasModelGrandChild(boo="lorem ipsum", var=3)
AliasModelGrandChild(foo="lorem ipsum", var=3)
# serialized data will always use aliases
obj = AliasModelChild.model_validate(data_alias)
assert obj.model_dump() == data_alias
assert obj.model_dump() != data
assert obj.model_dump_json() == json.dumps(data_alias, separators=(",", ":"))
assert obj.model_dump_json() != json.dumps(data, separators=(",", ":"))
def test_resolve_source_to_path_url_wout_path(monkeypatch):
expected_str = "foo"
expected_bytes = bytes(expected_str, "utf-8")
def get_dummy_response(*args, **kwargs):
r = Response()
r.status_code = 200
r._content = expected_bytes
return r
monkeypatch.setattr("requests.get", get_dummy_response)
monkeypatch.setattr(
"requests.models.Response.iter_content",
lambda *args, **kwargs: [expected_bytes],
)
path = resolve_source_to_path("https://pypi.org")
with open(path, encoding="utf-8") as f:
text = f.read()
assert text == expected_str
def test_resolve_source_to_stream_url_wout_path(monkeypatch):
expected_str = "foo"
expected_bytes = bytes(expected_str, "utf-8")
def get_dummy_response(*args, **kwargs):
r = Response()
r.status_code = 200
r._content = expected_bytes
return r
monkeypatch.setattr("requests.get", get_dummy_response)
monkeypatch.setattr(
"requests.models.Response.iter_content",
lambda *args, **kwargs: [expected_bytes],
)
doc_stream = resolve_source_to_stream("https://pypi.org")
assert doc_stream.name == "file"
text = doc_stream.stream.read().decode("utf8")
assert text == expected_str