mirror of
https://github.com/docling-project/docling-core.git
synced 2026-05-17 13:10:44 +00:00
656f56380f
* fix(test): set static typing compatible to python 3.10 Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> * style: enforce some pre-commit hooks on tests Enforce pre-commit hooks black, isort, autoflake, and mypy on test modules. Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> * fix(rec): fix definition issues in attribute, predicate, subject Remove duplicate generic types across base and predicate modules. Create an identifier class for subject names. Remove unnecessary type variables in attribute model. Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> * docs: refer to Docling data objects Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> --------- Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
973 lines
26 KiB
JSON
973 lines
26 KiB
JSON
{
|
|
"$defs": {
|
|
"Acquisition": {
|
|
"additionalProperties": false,
|
|
"description": "Information on how the data was obtained.",
|
|
"properties": {
|
|
"type": {
|
|
"description": "The method to obtain the data.",
|
|
"enum": [
|
|
"API",
|
|
"FTP",
|
|
"Download",
|
|
"Link",
|
|
"Web scraping/Crawling",
|
|
"Other"
|
|
],
|
|
"title": "Type",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"date": {
|
|
"anyOf": [
|
|
{
|
|
"format": "date-time",
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "A string representation of the acquisition datetime in ISO 8601 format.",
|
|
"title": "Date"
|
|
},
|
|
"link": {
|
|
"anyOf": [
|
|
{
|
|
"format": "uri",
|
|
"minLength": 1,
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "Link to the data source of this document.",
|
|
"title": "Link",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"size": {
|
|
"anyOf": [
|
|
{
|
|
"minimum": 0,
|
|
"type": "integer"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "Size in bytes of the raw document from the data source.",
|
|
"title": "Size",
|
|
"x-es-type": "long"
|
|
}
|
|
},
|
|
"required": [
|
|
"type"
|
|
],
|
|
"title": "Acquisition",
|
|
"type": "object"
|
|
},
|
|
"Attribute": {
|
|
"additionalProperties": false,
|
|
"description": "Attribute model that describes a list of characteristics.",
|
|
"properties": {
|
|
"conf": {
|
|
"description": "The confidence level of this attribute characteristics.",
|
|
"maximum": 1.0,
|
|
"minimum": 0.0,
|
|
"title": "Confidence",
|
|
"type": "number",
|
|
"x-es-type": "float"
|
|
},
|
|
"prov": {
|
|
"anyOf": [
|
|
{
|
|
"items": {
|
|
"$ref": "#/$defs/ProvenanceItem"
|
|
},
|
|
"type": "array"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "The sources of this attribute characteristics.",
|
|
"title": "Provenance"
|
|
},
|
|
"predicates": {
|
|
"description": "A list of characteristics (type, value, and name).",
|
|
"items": {
|
|
"$ref": "#/$defs/Predicate"
|
|
},
|
|
"title": "Predicates",
|
|
"type": "array"
|
|
}
|
|
},
|
|
"required": [
|
|
"conf",
|
|
"predicates"
|
|
],
|
|
"title": "Attribute",
|
|
"type": "object"
|
|
},
|
|
"BooleanValue": {
|
|
"additionalProperties": false,
|
|
"description": "Model for boolean values.",
|
|
"properties": {
|
|
"value": {
|
|
"title": "Value",
|
|
"type": "boolean",
|
|
"x-es-type": "boolean"
|
|
}
|
|
},
|
|
"required": [
|
|
"value"
|
|
],
|
|
"title": "BooleanValue",
|
|
"type": "object"
|
|
},
|
|
"CollectionRecordInfo": {
|
|
"additionalProperties": false,
|
|
"description": "Information of a collection of type Record.",
|
|
"properties": {
|
|
"name": {
|
|
"anyOf": [
|
|
{
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "Name of the collection.",
|
|
"title": "Name",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"type": {
|
|
"const": "Record",
|
|
"description": "The collection type.",
|
|
"enum": [
|
|
"Record"
|
|
],
|
|
"title": "Type",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"version": {
|
|
"anyOf": [
|
|
{
|
|
"pattern": "^(?P<major>0|[1-9]\\d*)\\.(?P<minor>0|[1-9]\\d*)\\.(?P<patch>0|[1-9]\\d*)(?:-(?P<prerelease>(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$",
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "The version of this collection model.",
|
|
"title": "Version",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"alias": {
|
|
"anyOf": [
|
|
{
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"type": "array"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "A list of tags (aliases) for the collection.",
|
|
"title": "Alias",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
}
|
|
},
|
|
"required": [
|
|
"type"
|
|
],
|
|
"title": "CollectionRecordInfo",
|
|
"type": "object"
|
|
},
|
|
"DatetimeValue": {
|
|
"additionalProperties": false,
|
|
"description": "Model for datetime values.",
|
|
"properties": {
|
|
"value": {
|
|
"format": "date-time",
|
|
"title": "Value",
|
|
"type": "string"
|
|
}
|
|
},
|
|
"required": [
|
|
"value"
|
|
],
|
|
"title": "DatetimeValue",
|
|
"type": "object"
|
|
},
|
|
"FileInfoObject": {
|
|
"description": "Filing information for any data object to be stored in a Docling database.",
|
|
"properties": {
|
|
"filename": {
|
|
"description": "The name of a persistent object that created this data object",
|
|
"title": "Filename",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"filename-prov": {
|
|
"anyOf": [
|
|
{
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "The provenance of this data object, e.g. an archive file, a URL, or any other repository.",
|
|
"title": "Filename-Prov",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"document-hash": {
|
|
"description": "A unique identifier of this data object within a collection of a Docling database",
|
|
"title": "Document-Hash",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
}
|
|
},
|
|
"required": [
|
|
"filename",
|
|
"document-hash"
|
|
],
|
|
"title": "FileInfoObject",
|
|
"type": "object"
|
|
},
|
|
"GeopointValue": {
|
|
"additionalProperties": false,
|
|
"description": "A representation of a geopoint (longitude and latitude coordinates).",
|
|
"properties": {
|
|
"value": {
|
|
"items": {
|
|
"type": "number"
|
|
},
|
|
"maxItems": 2,
|
|
"minItems": 2,
|
|
"title": "Value",
|
|
"type": "array",
|
|
"x-es-type": "geo_point"
|
|
},
|
|
"conf": {
|
|
"anyOf": [
|
|
{
|
|
"maximum": 1.0,
|
|
"minimum": 0.0,
|
|
"type": "number"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"title": "Conf",
|
|
"x-es-type": "float"
|
|
}
|
|
},
|
|
"required": [
|
|
"value"
|
|
],
|
|
"title": "GeopointValue",
|
|
"type": "object"
|
|
},
|
|
"Identifier": {
|
|
"additionalProperties": false,
|
|
"description": "Unique identifier of a Docling data object.",
|
|
"properties": {
|
|
"type": {
|
|
"description": "A string representing a collection or database that contains this data object.",
|
|
"title": "Type",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"value": {
|
|
"description": "The identifier value of the data object within a collection or database.",
|
|
"title": "Value",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"_name": {
|
|
"description": "A unique identifier of the data object across Docling, consisting of the concatenation of type and value in lower case, separated by hash (#).",
|
|
"pattern": "^.+#.+$",
|
|
"title": "_Name",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
}
|
|
},
|
|
"required": [
|
|
"type",
|
|
"value",
|
|
"_name"
|
|
],
|
|
"title": "Identifier",
|
|
"type": "object"
|
|
},
|
|
"Log": {
|
|
"additionalProperties": false,
|
|
"description": "Log entry to describe an ETL task on a document.",
|
|
"properties": {
|
|
"task": {
|
|
"anyOf": [
|
|
{
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "An identifier of this task. It may be used to identify this task from other tasks of the same agent and type.",
|
|
"title": "Task",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"agent": {
|
|
"description": "The Docling agent that performed the task, e.g., CCS or CXS.",
|
|
"title": "Agent",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"type": {
|
|
"description": "A task category.",
|
|
"title": "Type",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"comment": {
|
|
"anyOf": [
|
|
{
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "A description of the task or any comments in natural language.",
|
|
"title": "Comment"
|
|
},
|
|
"date": {
|
|
"description": "A string representation of the task execution datetime in ISO 8601 format.",
|
|
"format": "date-time",
|
|
"title": "Date",
|
|
"type": "string"
|
|
}
|
|
},
|
|
"required": [
|
|
"agent",
|
|
"type",
|
|
"date"
|
|
],
|
|
"title": "Log",
|
|
"type": "object"
|
|
},
|
|
"NominalValue": {
|
|
"additionalProperties": false,
|
|
"description": "Model for nominal (categorical) values.",
|
|
"properties": {
|
|
"value": {
|
|
"title": "Value",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
}
|
|
},
|
|
"required": [
|
|
"value"
|
|
],
|
|
"title": "NominalValue",
|
|
"type": "object"
|
|
},
|
|
"NumericalValue": {
|
|
"additionalProperties": false,
|
|
"description": "Model for numerical values.",
|
|
"properties": {
|
|
"min": {
|
|
"title": "Min",
|
|
"type": "number",
|
|
"x-es-type": "float"
|
|
},
|
|
"max": {
|
|
"title": "Max",
|
|
"type": "number",
|
|
"x-es-type": "float"
|
|
},
|
|
"val": {
|
|
"title": "Val",
|
|
"type": "number",
|
|
"x-es-type": "float"
|
|
},
|
|
"err": {
|
|
"title": "Err",
|
|
"type": "number",
|
|
"x-es-type": "float"
|
|
},
|
|
"unit": {
|
|
"title": "Unit",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
}
|
|
},
|
|
"required": [
|
|
"min",
|
|
"max",
|
|
"val",
|
|
"err",
|
|
"unit"
|
|
],
|
|
"title": "NumericalValue",
|
|
"type": "object"
|
|
},
|
|
"Predicate": {
|
|
"additionalProperties": false,
|
|
"description": "Model for a predicate.",
|
|
"properties": {
|
|
"key": {
|
|
"$ref": "#/$defs/PredicateKey"
|
|
},
|
|
"value": {
|
|
"$ref": "#/$defs/PredicateValue"
|
|
},
|
|
"numerical_value": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/$defs/NumericalValue"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null
|
|
},
|
|
"numerical_value_si": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/$defs/NumericalValue"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null
|
|
},
|
|
"nominal_value": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/$defs/NominalValue"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null
|
|
},
|
|
"text_value": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/$defs/TextValue"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null
|
|
},
|
|
"boolean_value": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/$defs/BooleanValue"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null
|
|
},
|
|
"datetime_value": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/$defs/DatetimeValue"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null
|
|
},
|
|
"geopoint_value": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/$defs/GeopointValue"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null
|
|
}
|
|
},
|
|
"required": [
|
|
"key",
|
|
"value"
|
|
],
|
|
"title": "Predicate",
|
|
"type": "object"
|
|
},
|
|
"PredicateKey": {
|
|
"additionalProperties": false,
|
|
"description": "Model for the key (unique identifier) of a predicate.",
|
|
"properties": {
|
|
"name": {
|
|
"description": "Name of the predicate key.",
|
|
"title": "Name",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"type": {
|
|
"description": "Type of predicate key.",
|
|
"title": "Type",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
}
|
|
},
|
|
"required": [
|
|
"name",
|
|
"type"
|
|
],
|
|
"title": "PredicateKey",
|
|
"type": "object"
|
|
},
|
|
"PredicateValue": {
|
|
"additionalProperties": false,
|
|
"description": "Model for the value of a predicate.",
|
|
"properties": {
|
|
"name": {
|
|
"description": "Name of the predicate value (actual value).",
|
|
"title": "Name",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"type": {
|
|
"description": "Type of predicate value.",
|
|
"title": "Type",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
}
|
|
},
|
|
"required": [
|
|
"name",
|
|
"type"
|
|
],
|
|
"title": "PredicateValue",
|
|
"type": "object"
|
|
},
|
|
"ProvenanceItem": {
|
|
"additionalProperties": false,
|
|
"description": "A representation of an object provenance.",
|
|
"properties": {
|
|
"type": {
|
|
"anyOf": [
|
|
{
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "Any string representing the type of provenance, e.g. `sentence`, `table`, or `doi`.",
|
|
"title": "The provenance type",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"text": {
|
|
"anyOf": [
|
|
{
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "A text representing the evidence of the provenance, e.g. the sentence text or the content of a table cell",
|
|
"title": "Evidence of the provenance",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"reference": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/$defs/Identifier"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "Reference to another object, e.g. record, statement, URL, or any other object that identifies the provenance",
|
|
"title": "Reference to the provenance object"
|
|
},
|
|
"path": {
|
|
"anyOf": [
|
|
{
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "A path that locates the evidence within the provenance object identified by the `reference` field using a JSON pointer notation, e.g., `#/main-text/5` to locate the `main-text` paragraph at index 5",
|
|
"title": "The location of the provenance within the referenced object",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"span": {
|
|
"anyOf": [
|
|
{
|
|
"items": {
|
|
"type": "integer"
|
|
},
|
|
"maxItems": 2,
|
|
"minItems": 2,
|
|
"type": "array"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "location of the item in the text/table referenced by the `path`, e.g., `[34, 67]`",
|
|
"title": "The location of the item in the text/table"
|
|
}
|
|
},
|
|
"title": "ProvenanceItem",
|
|
"type": "object"
|
|
},
|
|
"RecordDescription": {
|
|
"description": "Additional record metadata, including optional collection-specific fields.",
|
|
"properties": {
|
|
"logs": {
|
|
"description": "Logs that describe the ETL tasks applied to this record.",
|
|
"items": {
|
|
"$ref": "#/$defs/Log"
|
|
},
|
|
"title": "Logs",
|
|
"type": "array"
|
|
},
|
|
"publication_date": {
|
|
"anyOf": [
|
|
{
|
|
"format": "date-time",
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "The date that best represents the last publication time of a record.",
|
|
"title": "Publication date"
|
|
},
|
|
"collection": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/$defs/CollectionRecordInfo"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "The collection information of this record."
|
|
},
|
|
"acquisition": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/$defs/Acquisition"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "Information on how the document was obtained, for data governance purposes."
|
|
}
|
|
},
|
|
"required": [
|
|
"logs"
|
|
],
|
|
"title": "RecordDescription",
|
|
"type": "object"
|
|
},
|
|
"S3Reference": {
|
|
"description": "References an s3 resource.",
|
|
"properties": {
|
|
"__ref_s3_data": {
|
|
"examples": [
|
|
"#/_s3_data/figures/0"
|
|
],
|
|
"title": " Ref S3 Data",
|
|
"type": "string"
|
|
}
|
|
},
|
|
"required": [
|
|
"__ref_s3_data"
|
|
],
|
|
"title": "S3Reference",
|
|
"type": "object"
|
|
},
|
|
"Subject": {
|
|
"additionalProperties": false,
|
|
"description": "A representation of a subject.",
|
|
"properties": {
|
|
"display_name": {
|
|
"description": "Name of the subject in natural language. It can be used for end-user applications to display a human-readable name. For instance, `B(2) Mg(1)` for `MgB2` or `International Business Machines` for `IBM`",
|
|
"title": "Display Name",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"display_image": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/$defs/S3Reference"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "Image representing the subject. It can be used for end-user applications.For example, the chemical structure drawing of a compound or the eight bar IBM logo for IBM.",
|
|
"title": "Display Image",
|
|
"x-es-suppress": true
|
|
},
|
|
"type": {
|
|
"description": "Main subject type. For instance, `material`, `material-class`, `material-device`, `company`, or `person`.",
|
|
"title": "Type",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"names": {
|
|
"description": "List of given names for this subject. They may not be unique across different subjects.",
|
|
"items": {
|
|
"$ref": "#/$defs/SubjectNameIdentifier"
|
|
},
|
|
"title": "Names",
|
|
"type": "array"
|
|
},
|
|
"identifiers": {
|
|
"anyOf": [
|
|
{
|
|
"items": {
|
|
"$ref": "#/$defs/Identifier"
|
|
},
|
|
"type": "array"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "List of unique identifiers in database. For instance, the `PubChem ID` of a record in the PubChem database.",
|
|
"title": "Identifiers"
|
|
},
|
|
"labels": {
|
|
"anyOf": [
|
|
{
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"type": "array"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "List of labels or categories for this subject.",
|
|
"title": "Labels",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
}
|
|
},
|
|
"required": [
|
|
"display_name",
|
|
"type",
|
|
"names"
|
|
],
|
|
"title": "Subject",
|
|
"type": "object"
|
|
},
|
|
"SubjectNameIdentifier": {
|
|
"additionalProperties": false,
|
|
"description": "Identifier of subject names.",
|
|
"properties": {
|
|
"type": {
|
|
"description": "A string representing a collection or database that contains this data object.",
|
|
"title": "Type",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"value": {
|
|
"description": "The identifier value of the data object within a collection or database.",
|
|
"title": "Value",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
},
|
|
"_name": {
|
|
"description": "A unique identifier of the data object across Docling, consisting of the concatenation of type and value in lower case, separated by hash (#).",
|
|
"pattern": "^.+#.+$",
|
|
"title": "_Name",
|
|
"type": "string",
|
|
"x-es-ignore_above": 8191,
|
|
"x-es-type": "keyword"
|
|
}
|
|
},
|
|
"required": [
|
|
"type",
|
|
"value",
|
|
"_name"
|
|
],
|
|
"title": "SubjectNameIdentifier",
|
|
"type": "object"
|
|
},
|
|
"TextValue": {
|
|
"additionalProperties": false,
|
|
"description": "Model for textual values.",
|
|
"properties": {
|
|
"value": {
|
|
"title": "Value",
|
|
"type": "string",
|
|
"x-es-type": "text"
|
|
}
|
|
},
|
|
"required": [
|
|
"value"
|
|
],
|
|
"title": "TextValue",
|
|
"type": "object"
|
|
}
|
|
},
|
|
"description": "A representation of a structured record in an database.",
|
|
"properties": {
|
|
"conf": {
|
|
"description": "This value represents a score to the data item. Items originating from databases will typically have a score 1.0, while items resulting from an NLP model may have a value between 0.0 and 1.0.",
|
|
"maximum": 1.0,
|
|
"minimum": 0.0,
|
|
"title": "The confidence of the evidence",
|
|
"type": "number",
|
|
"x-es-type": "float"
|
|
},
|
|
"prov": {
|
|
"description": "A list of provenance items.",
|
|
"items": {
|
|
"$ref": "#/$defs/ProvenanceItem"
|
|
},
|
|
"title": "Provenance",
|
|
"type": "array"
|
|
},
|
|
"file-info": {
|
|
"$ref": "#/$defs/FileInfoObject"
|
|
},
|
|
"description": {
|
|
"$ref": "#/$defs/RecordDescription"
|
|
},
|
|
"subject": {
|
|
"$ref": "#/$defs/Subject"
|
|
},
|
|
"attributes": {
|
|
"anyOf": [
|
|
{
|
|
"items": {
|
|
"$ref": "#/$defs/Attribute"
|
|
},
|
|
"type": "array"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"title": "Attributes"
|
|
},
|
|
"_name": {
|
|
"anyOf": [
|
|
{
|
|
"type": "string"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "A short description or summary of the record.",
|
|
"title": " Name",
|
|
"x-es-type": "text"
|
|
},
|
|
"identifiers": {
|
|
"anyOf": [
|
|
{
|
|
"items": {
|
|
"$ref": "#/$defs/Identifier"
|
|
},
|
|
"type": "array"
|
|
},
|
|
{
|
|
"type": "null"
|
|
}
|
|
],
|
|
"default": null,
|
|
"description": "A list of unique identifiers of this record in a database.",
|
|
"title": "Identifiers"
|
|
}
|
|
},
|
|
"required": [
|
|
"conf",
|
|
"prov",
|
|
"file-info",
|
|
"description",
|
|
"subject"
|
|
],
|
|
"title": "Record",
|
|
"type": "object"
|
|
} |