mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-05-26 07:00:36 +00:00
Chore: Upgrades tantivy-py to the latest release (#12605)
This commit is contained in:
@@ -30,11 +30,25 @@
|
||||
"**/.idea": true,
|
||||
"**/.venv": true,
|
||||
"**/.coverage": true,
|
||||
"**/coverage.json": true
|
||||
"**/coverage.json": true,
|
||||
"htmlcov/": true,
|
||||
"coverage.xml": true,
|
||||
"junit.xml": true
|
||||
},
|
||||
"python.defaultInterpreterPath": ".venv/bin/python3",
|
||||
"python.languageServer": "Pylance",
|
||||
"python.defaultInterpreterPath": "${workspaceFolder:paperless-ngx}/.venv/bin/python3",
|
||||
"python.analysis.extraPaths": ["${workspaceFolder:paperless-ngx}/src"],
|
||||
"python.analysis.inlayHints.pytestParameters": true,
|
||||
"python.testing.pytestEnabled": true,
|
||||
"python.testing.unittestEnabled": false,
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "charliermarsh.ruff",
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.fixAll.ruff": "explicit",
|
||||
"source.organizeImports.ruff": "explicit"
|
||||
}
|
||||
}
|
||||
},
|
||||
"extensions": {
|
||||
"recommendations": ["ms-python.python", "charliermarsh.ruff", "editorconfig.editorconfig"],
|
||||
|
||||
+3
-2
@@ -73,7 +73,7 @@ dependencies = [
|
||||
"scikit-learn~=1.8.0",
|
||||
"sentence-transformers>=4.1",
|
||||
"setproctitle~=1.3.4",
|
||||
"tantivy>=0.25.1",
|
||||
"tantivy~=0.26.0",
|
||||
"tika-client~=0.11.0",
|
||||
"torch~=2.11.0",
|
||||
"watchfiles>=1.1.1",
|
||||
@@ -143,7 +143,8 @@ typing = [
|
||||
"types-python-dateutil",
|
||||
"types-pytz",
|
||||
"types-redis",
|
||||
"types-setuptools",
|
||||
"types-regex",
|
||||
"types-setuptools"
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
|
||||
@@ -3,7 +3,6 @@ from __future__ import annotations
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
from collections import Counter
|
||||
from datetime import UTC
|
||||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
@@ -11,6 +10,7 @@ from typing import TYPE_CHECKING
|
||||
from typing import Self
|
||||
from typing import TypedDict
|
||||
from typing import TypeVar
|
||||
from typing import cast
|
||||
|
||||
import filelock
|
||||
import regex
|
||||
@@ -36,7 +36,7 @@ from documents.utils import identity
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
from django.contrib.auth.base_user import AbstractBaseUser
|
||||
from django.contrib.auth.models import AbstractUser
|
||||
from django.db.models import QuerySet
|
||||
|
||||
from documents.models import Document
|
||||
@@ -169,9 +169,16 @@ class WriteBatch:
|
||||
def __init__(self, backend: TantivyBackend, lock_timeout: float):
|
||||
self._backend = backend
|
||||
self._lock_timeout = lock_timeout
|
||||
self._writer = None
|
||||
self._raw_writer: tantivy.IndexWriter | None = None
|
||||
self._lock = None
|
||||
|
||||
@property
|
||||
def _writer(self) -> tantivy.IndexWriter:
|
||||
assert self._raw_writer is not None, (
|
||||
"WriteBatch not entered; use as context manager"
|
||||
)
|
||||
return self._raw_writer
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
if self._backend._path is not None:
|
||||
lock_path = self._backend._path / ".tantivy.lock"
|
||||
@@ -183,7 +190,7 @@ class WriteBatch:
|
||||
f"Could not acquire index lock within {self._lock_timeout}s",
|
||||
) from e
|
||||
|
||||
self._writer = self._backend._index.writer()
|
||||
self._raw_writer = self._backend._index.writer()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
@@ -193,9 +200,9 @@ class WriteBatch:
|
||||
self._backend._index.reload()
|
||||
# Explicitly delete writer to release tantivy's internal lock.
|
||||
# On exception the uncommitted writer is simply discarded.
|
||||
if self._writer is not None:
|
||||
del self._writer
|
||||
self._writer = None
|
||||
if self._raw_writer is not None:
|
||||
del self._raw_writer
|
||||
self._raw_writer = None
|
||||
finally:
|
||||
if self._lock is not None:
|
||||
self._lock.release()
|
||||
@@ -222,24 +229,9 @@ class WriteBatch:
|
||||
self._writer.add_document(doc)
|
||||
|
||||
def remove(self, doc_id: int) -> None:
|
||||
"""
|
||||
Remove a document from the batch by its primary key.
|
||||
|
||||
Uses range_query instead of term_query to work around a tantivy-py bug
|
||||
where Python integers are inferred as i64, producing Terms that never
|
||||
match u64 fields.
|
||||
|
||||
TODO: Replace with term_query("id", doc_id) once
|
||||
https://github.com/quickwit-oss/tantivy-py/pull/642 lands.
|
||||
"""
|
||||
"""Remove a document from the batch by its primary key."""
|
||||
self._writer.delete_documents_by_query(
|
||||
tantivy.Query.range_query(
|
||||
self._backend._schema,
|
||||
"id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
doc_id,
|
||||
doc_id,
|
||||
),
|
||||
tantivy.Query.term_query(self._backend._schema, "id", doc_id),
|
||||
)
|
||||
|
||||
|
||||
@@ -288,8 +280,18 @@ class TantivyBackend:
|
||||
# path=None → in-memory index (for tests)
|
||||
# path=some_dir → on-disk index (for production)
|
||||
self._path = path
|
||||
self._index = None
|
||||
self._schema = None
|
||||
self._raw_index: tantivy.Index | None = None
|
||||
self._raw_schema: tantivy.Schema | None = None
|
||||
|
||||
@property
|
||||
def _index(self) -> tantivy.Index:
|
||||
assert self._raw_index is not None, "Index not open; call open() first"
|
||||
return self._raw_index
|
||||
|
||||
@property
|
||||
def _schema(self) -> tantivy.Schema:
|
||||
assert self._raw_schema is not None, "Schema not open; call open() first"
|
||||
return self._raw_schema
|
||||
|
||||
def open(self) -> None:
|
||||
"""
|
||||
@@ -299,14 +301,14 @@ class TantivyBackend:
|
||||
version or language changes. Registers custom tokenizers after opening.
|
||||
Safe to call multiple times - subsequent calls are no-ops.
|
||||
"""
|
||||
if self._index is not None:
|
||||
if self._raw_index is not None:
|
||||
return # pragma: no cover
|
||||
if self._path is not None:
|
||||
self._index = open_or_rebuild_index(self._path)
|
||||
self._raw_index = open_or_rebuild_index(self._path)
|
||||
else:
|
||||
self._index = tantivy.Index(build_schema())
|
||||
register_tokenizers(self._index, settings.SEARCH_LANGUAGE)
|
||||
self._schema = self._index.schema
|
||||
self._raw_index = tantivy.Index(build_schema())
|
||||
register_tokenizers(self._raw_index, settings.SEARCH_LANGUAGE)
|
||||
self._raw_schema = self._raw_index.schema
|
||||
|
||||
def close(self) -> None:
|
||||
"""
|
||||
@@ -314,12 +316,12 @@ class TantivyBackend:
|
||||
|
||||
Safe to call multiple times - subsequent calls are no-ops.
|
||||
"""
|
||||
self._index = None
|
||||
self._schema = None
|
||||
self._raw_index = None
|
||||
self._raw_schema = None
|
||||
|
||||
def _ensure_open(self) -> None:
|
||||
"""Ensure the index is open before operations."""
|
||||
if self._index is None:
|
||||
if self._raw_index is None:
|
||||
self.open() # pragma: no cover
|
||||
|
||||
def _parse_query(
|
||||
@@ -339,7 +341,7 @@ class TantivyBackend:
|
||||
def _apply_permission_filter(
|
||||
self,
|
||||
query: tantivy.Query,
|
||||
user: AbstractBaseUser | None,
|
||||
user: AbstractUser | None,
|
||||
) -> tantivy.Query:
|
||||
"""Wrap a query with a permission filter if the user is not a superuser."""
|
||||
if user is not None:
|
||||
@@ -526,15 +528,6 @@ class TantivyBackend:
|
||||
Use this when you already know which documents to display (from
|
||||
search_ids + ORM filtering) and just need highlight data.
|
||||
|
||||
Note: Each doc_id requires an individual index lookup because tantivy-py
|
||||
does not yet expose a batch fast-field read API. This is acceptable for
|
||||
page-sized batches (typically 25 docs) but should not be called with
|
||||
thousands of IDs.
|
||||
|
||||
TODO: When https://github.com/quickwit-oss/tantivy-py/pull/641 lands,
|
||||
the per-doc range_query lookups here can be replaced with a single
|
||||
collect_u64_fast_field("id", doc_addresses) call.
|
||||
|
||||
Args:
|
||||
query: The search query (used for snippet generation)
|
||||
doc_ids: Ordered list of document IDs to generate hits for
|
||||
@@ -571,32 +564,42 @@ class TantivyBackend:
|
||||
notes_text_query = user_query
|
||||
|
||||
searcher = self._index.searcher()
|
||||
|
||||
# Fetch all requested docs in a single search: user_query MUST match
|
||||
# and exactly the requested IDs MUST match (OR of term_queries).
|
||||
id_filter = tantivy.Query.boolean_query(
|
||||
[
|
||||
(
|
||||
tantivy.Occur.Should,
|
||||
tantivy.Query.term_query(self._schema, "id", did),
|
||||
)
|
||||
for did in doc_ids
|
||||
],
|
||||
)
|
||||
batch_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, user_query),
|
||||
(tantivy.Occur.Must, id_filter),
|
||||
],
|
||||
)
|
||||
batch_results = searcher.search(batch_query, limit=len(doc_ids))
|
||||
|
||||
result_addrs = [addr for _score, addr in batch_results.hits]
|
||||
result_ids = cast("list[int]", searcher.fast_field_values("id", result_addrs))
|
||||
addr_by_id: dict[int, tuple[float, tantivy.DocAddress]] = {
|
||||
doc_id: (score, addr)
|
||||
for (score, addr), doc_id in zip(batch_results.hits, result_ids)
|
||||
}
|
||||
|
||||
snippet_generator = None
|
||||
notes_snippet_generator = None
|
||||
hits: list[SearchHit] = []
|
||||
|
||||
for rank, doc_id in enumerate(doc_ids, start=rank_start):
|
||||
# Look up document by ID, scoring against the user query so that
|
||||
# the returned SearchHit carries a real BM25 relevance score.
|
||||
id_query = tantivy.Query.range_query(
|
||||
self._schema,
|
||||
"id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
doc_id,
|
||||
doc_id,
|
||||
)
|
||||
scored_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, user_query),
|
||||
(tantivy.Occur.Must, id_query),
|
||||
],
|
||||
)
|
||||
results = searcher.search(scored_query, limit=1)
|
||||
|
||||
if not results.hits:
|
||||
if doc_id not in addr_by_id:
|
||||
continue
|
||||
|
||||
score, doc_address = results.hits[0]
|
||||
score, doc_address = addr_by_id[doc_id]
|
||||
actual_doc = searcher.doc(doc_address)
|
||||
doc_dict = actual_doc.to_dict()
|
||||
|
||||
@@ -647,7 +650,7 @@ class TantivyBackend:
|
||||
def search_ids(
|
||||
self,
|
||||
query: str,
|
||||
user: AbstractBaseUser | None,
|
||||
user: AbstractUser | None,
|
||||
*,
|
||||
sort_field: str | None = None,
|
||||
sort_reverse: bool = False,
|
||||
@@ -701,16 +704,16 @@ class TantivyBackend:
|
||||
if threshold is not None:
|
||||
all_hits = [hit for hit in all_hits if hit[1] >= threshold]
|
||||
|
||||
# TODO: Replace with searcher.collect_u64_fast_field("id", addrs) once
|
||||
# https://github.com/quickwit-oss/tantivy-py/pull/641 lands — eliminates
|
||||
# one stored-doc fetch per result (~80% reduction in search_ids latency).
|
||||
return [searcher.doc(doc_addr).to_dict()["id"][0] for doc_addr, *_ in all_hits]
|
||||
return cast(
|
||||
"list[int]",
|
||||
searcher.fast_field_values("id", [doc_addr for doc_addr, *_ in all_hits]),
|
||||
)
|
||||
|
||||
def autocomplete(
|
||||
self,
|
||||
term: str,
|
||||
limit: int,
|
||||
user: AbstractBaseUser | None = None,
|
||||
user: AbstractUser | None = None,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Get autocomplete suggestions for search queries.
|
||||
@@ -738,69 +741,25 @@ class TantivyBackend:
|
||||
|
||||
searcher = self._index.searcher()
|
||||
|
||||
# Build a prefix query on autocomplete_word so we only scan docs
|
||||
# containing words that start with the prefix, not the entire index.
|
||||
# tantivy regex is implicitly anchored; .+ avoids the empty-match
|
||||
# error that .* triggers. We OR with term_query to also match the
|
||||
# exact prefix as a complete word.
|
||||
escaped = re.escape(normalized_term)
|
||||
prefix_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(
|
||||
tantivy.Occur.Should,
|
||||
tantivy.Query.term_query(
|
||||
self._schema,
|
||||
"autocomplete_word",
|
||||
normalized_term,
|
||||
),
|
||||
),
|
||||
(
|
||||
tantivy.Occur.Should,
|
||||
tantivy.Query.regex_query(
|
||||
self._schema,
|
||||
"autocomplete_word",
|
||||
f"{escaped}.+",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
permission_query = None
|
||||
# Intersect with permission filter so autocomplete words from
|
||||
# invisible documents don't leak to other users.
|
||||
if user is not None and not user.is_superuser:
|
||||
final_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, prefix_query),
|
||||
(tantivy.Occur.Must, build_permission_filter(self._schema, user)),
|
||||
],
|
||||
)
|
||||
else:
|
||||
final_query = prefix_query
|
||||
permission_query = build_permission_filter(self._schema, user)
|
||||
|
||||
results = searcher.search(final_query, limit=searcher.num_docs)
|
||||
|
||||
# Count how many visible documents each matching word appears in.
|
||||
word_counts: Counter[str] = Counter()
|
||||
for _score, doc_address in results.hits:
|
||||
stored_doc = searcher.doc(doc_address)
|
||||
doc_dict = stored_doc.to_dict()
|
||||
if "autocomplete_word" in doc_dict:
|
||||
for word in doc_dict["autocomplete_word"]:
|
||||
if word.startswith(normalized_term):
|
||||
word_counts[word] += 1
|
||||
|
||||
# Sort by document frequency descending; break ties alphabetically.
|
||||
matches = sorted(
|
||||
word_counts,
|
||||
key=lambda w: (-word_counts[w], w),
|
||||
matches = searcher.terms_with_prefix(
|
||||
"autocomplete_word",
|
||||
normalized_term,
|
||||
permission_query,
|
||||
limit,
|
||||
)
|
||||
|
||||
return matches[:limit]
|
||||
return [x[0] for x in matches]
|
||||
|
||||
def more_like_this_ids(
|
||||
self,
|
||||
doc_id: int,
|
||||
user: AbstractBaseUser | None,
|
||||
user: AbstractUser | None,
|
||||
*,
|
||||
limit: int | None = None,
|
||||
) -> list[int]:
|
||||
@@ -821,13 +780,7 @@ class TantivyBackend:
|
||||
self._ensure_open()
|
||||
searcher = self._index.searcher()
|
||||
|
||||
id_query = tantivy.Query.range_query(
|
||||
self._schema,
|
||||
"id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
doc_id,
|
||||
doc_id,
|
||||
)
|
||||
id_query = tantivy.Query.term_query(self._schema, "id", doc_id)
|
||||
results = searcher.search(id_query, limit=1)
|
||||
|
||||
if not results.hits:
|
||||
@@ -851,14 +804,9 @@ class TantivyBackend:
|
||||
# Fetch one extra to account for excluding the original document
|
||||
results = searcher.search(final_query, limit=effective_limit + 1)
|
||||
|
||||
# TODO: Replace with collect_u64_fast_field("id", addrs) once
|
||||
# https://github.com/quickwit-oss/tantivy-py/pull/641 lands.
|
||||
ids = []
|
||||
for _score, doc_address in results.hits:
|
||||
result_doc_id = searcher.doc(doc_address).to_dict()["id"][0]
|
||||
if result_doc_id != doc_id:
|
||||
ids.append(result_doc_id)
|
||||
|
||||
addrs = [addr for _score, addr in results.hits]
|
||||
all_ids = cast("list[int]", searcher.fast_field_values("id", addrs))
|
||||
ids = [rid for rid in all_ids if rid != doc_id]
|
||||
return ids[:limit] if limit is not None else ids
|
||||
|
||||
def batch_update(self, lock_timeout: float = 30.0) -> WriteBatch:
|
||||
@@ -906,9 +854,9 @@ class TantivyBackend:
|
||||
register_tokenizers(new_index, settings.SEARCH_LANGUAGE)
|
||||
|
||||
# Point instance at the new index so _build_tantivy_doc uses it
|
||||
old_index, old_schema = self._index, self._schema
|
||||
self._index = new_index
|
||||
self._schema = new_index.schema
|
||||
old_index, old_schema = self._raw_index, self._raw_schema
|
||||
self._raw_index = new_index
|
||||
self._raw_schema = new_index.schema
|
||||
|
||||
try:
|
||||
writer = new_index.writer()
|
||||
@@ -922,8 +870,8 @@ class TantivyBackend:
|
||||
new_index.reload()
|
||||
except BaseException: # pragma: no cover
|
||||
# Restore old index on failure so the backend remains usable
|
||||
self._index = old_index
|
||||
self._schema = old_schema
|
||||
self._raw_index = old_index
|
||||
self._raw_schema = old_schema
|
||||
raise
|
||||
|
||||
|
||||
|
||||
@@ -410,9 +410,6 @@ def normalize_query(query: str) -> str:
|
||||
raise ValueError("Query too complex to process (normalization timed out)")
|
||||
|
||||
|
||||
_MAX_U64 = 2**64 - 1 # u64 max — used as inclusive upper bound for "any owner" range
|
||||
|
||||
|
||||
def build_permission_filter(
|
||||
schema: tantivy.Schema,
|
||||
user: AbstractBaseUser,
|
||||
@@ -432,48 +429,16 @@ def build_permission_filter(
|
||||
|
||||
Returns:
|
||||
Tantivy query that filters results to visible documents
|
||||
|
||||
Implementation Notes:
|
||||
- Uses range_query instead of term_query for owner_id/viewer_id to work
|
||||
around a tantivy-py bug where Python ints are inferred as i64, causing
|
||||
term_query to return no hits on u64 fields.
|
||||
TODO: Replace with term_query once
|
||||
https://github.com/quickwit-oss/tantivy-py/pull/642 lands.
|
||||
|
||||
- Uses range_query(owner_id, 1, MAX_U64) as an "owner exists" check
|
||||
because exists_query is not yet available in tantivy-py 0.25.
|
||||
TODO: Replace with exists_query("owner_id") once that is exposed in
|
||||
a tantivy-py release.
|
||||
|
||||
- Uses disjunction_max_query to combine permission clauses with OR logic
|
||||
"""
|
||||
owner_any = tantivy.Query.range_query(
|
||||
schema,
|
||||
"owner_id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
1,
|
||||
_MAX_U64,
|
||||
)
|
||||
owner_any = tantivy.Query.exists_query("owner_id")
|
||||
no_owner = tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, tantivy.Query.all_query()),
|
||||
(tantivy.Occur.MustNot, owner_any),
|
||||
],
|
||||
)
|
||||
owned = tantivy.Query.range_query(
|
||||
schema,
|
||||
"owner_id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
user.pk,
|
||||
user.pk,
|
||||
)
|
||||
shared = tantivy.Query.range_query(
|
||||
schema,
|
||||
"viewer_id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
user.pk,
|
||||
user.pk,
|
||||
)
|
||||
owned = tantivy.Query.term_query(schema, "owner_id", user.pk)
|
||||
shared = tantivy.Query.term_query(schema, "viewer_id", user.pk)
|
||||
return tantivy.Query.disjunction_max_query([no_owner, owned, shared])
|
||||
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@ import json
|
||||
import logging
|
||||
import shutil
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Final
|
||||
from typing import cast
|
||||
|
||||
import tantivy
|
||||
from django.conf import settings
|
||||
@@ -13,7 +15,8 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = logging.getLogger("paperless.search")
|
||||
|
||||
SCHEMA_VERSION = 1
|
||||
# v1 - Initial tantivy schema format
|
||||
SCHEMA_VERSION: Final[int] = 1
|
||||
|
||||
|
||||
def build_schema() -> tantivy.Schema:
|
||||
@@ -172,7 +175,7 @@ def open_or_rebuild_index(index_dir: Path | None = None) -> tantivy.Index:
|
||||
Opened Tantivy index (caller must register custom tokenizers)
|
||||
"""
|
||||
if index_dir is None:
|
||||
index_dir = settings.INDEX_DIR
|
||||
index_dir = cast("Path", settings.INDEX_DIR)
|
||||
if not index_dir.exists():
|
||||
return tantivy.Index(build_schema())
|
||||
if needs_rebuild(index_dir):
|
||||
|
||||
@@ -24,13 +24,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||
backend = get_backend()
|
||||
searcher = backend._index.searcher()
|
||||
results = searcher.search(
|
||||
tantivy.Query.range_query(
|
||||
backend._schema,
|
||||
"id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
doc.pk,
|
||||
doc.pk,
|
||||
),
|
||||
tantivy.Query.term_query(backend._schema, "id", doc.pk),
|
||||
limit=1,
|
||||
)
|
||||
if results.hits:
|
||||
|
||||
@@ -3012,6 +3012,7 @@ typing = [
|
||||
{ name = "types-python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "types-pytz", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "types-redis", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "types-regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "types-setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
@@ -3081,7 +3082,7 @@ requires-dist = [
|
||||
{ name = "scikit-learn", specifier = "~=1.8.0" },
|
||||
{ name = "sentence-transformers", specifier = ">=4.1" },
|
||||
{ name = "setproctitle", specifier = "~=1.3.4" },
|
||||
{ name = "tantivy", specifier = ">=0.25.1" },
|
||||
{ name = "tantivy", specifier = "~=0.26.0" },
|
||||
{ name = "tika-client", specifier = "~=0.11.0" },
|
||||
{ name = "torch", specifier = "~=2.11.0", index = "https://download.pytorch.org/whl/cpu" },
|
||||
{ name = "watchfiles", specifier = ">=1.1.1" },
|
||||
@@ -3150,6 +3151,7 @@ typing = [
|
||||
{ name = "types-python-dateutil" },
|
||||
{ name = "types-pytz" },
|
||||
{ name = "types-redis" },
|
||||
{ name = "types-regex" },
|
||||
{ name = "types-setuptools" },
|
||||
]
|
||||
|
||||
@@ -4685,30 +4687,30 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "tantivy"
|
||||
version = "0.25.1"
|
||||
version = "0.26.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/1b/f9/0cd3955d155d3e3ef74b864769514dd191e5dacba9f0beb7af2d914942ce/tantivy-0.25.1.tar.gz", hash = "sha256:68a3314699a7d18fcf338b52bae8ce46a97dde1128a3e47e33fa4db7f71f265e", size = 75120, upload-time = "2025-12-02T11:57:12.997Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/57/74/ec8c3f7bb3599af86c19f1a774c37e36a6e7524d3563f3aeb99220981f6f/tantivy-0.26.0.tar.gz", hash = "sha256:7c9507fcc62bac4ef1d40b1ed37ff7fa07e44b5043b30288f63bcf4fdc62644a", size = 93615, upload-time = "2026-04-29T11:51:31.115Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/7a/8a277f377e8a151fc0e71d4ffc1114aefb6e5e1c7dd609fed0955cf34ed8/tantivy-0.25.1-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:d363d7b4207d3a5aa7f0d212420df35bed18bdb6bae26a2a8bd57428388b7c29", size = 7637033, upload-time = "2025-12-02T11:56:18.104Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/31/8b4acdedfc9f9a2d04b1340d07eef5213d6f151d1e18da0cb423e5f090d2/tantivy-0.25.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8f4389cf1d889a1df7c5a3195806b4b56c37cee10d8a26faaa0dea35a867b5ff", size = 3932180, upload-time = "2025-12-02T11:56:19.833Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/dc/3e8499c21b4b9795e8f2fc54c68ce5b92905aaeadadaa56ecfa9180b11b1/tantivy-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99864c09fc54652c3c2486cdf13f86cdc8200f4b481569cb291e095ca5d496e5", size = 4197620, upload-time = "2025-12-02T11:56:21.496Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/8e/f2ce62fffc811eb62bead92c7b23c2e218f817cbd54c4f3b802e03ba1438/tantivy-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05abf37ddbc5063c575548be0d62931629c086bff7a5a1b67cf5a8f5ebf4cd8c", size = 4183794, upload-time = "2025-12-02T11:56:23.215Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/41/e7/6849c713ed0996c7628324c60512c4882006f0a62145e56c624a93407f90/tantivy-0.25.1-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:90fd919e5f611809f746560ecf36eb9be824dec62e21ae17a27243759edb9aa1", size = 7621494, upload-time = "2025-12-02T11:56:27.069Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/22/c3d8294600dc6e7fa350daef9ff337d3c06e132b81df727de9f7a50c692a/tantivy-0.25.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:4613c7cf6c23f3a97989819690a0f956d799354957de7a204abcc60083cebe02", size = 3925219, upload-time = "2025-12-02T11:56:29.403Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/41/fc/cbb1df71dd44c9110eff4eaaeda9d44f2d06182fe0452193be20ddfba93f/tantivy-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c477bd20b4df804d57dfc5033431bef27cde605695ae141b03abbf6ebc069129", size = 4198699, upload-time = "2025-12-02T11:56:31.359Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/47/4d/71abb78b774073c3ce12a4faa4351a9d910a71ffa3659526affba163873d/tantivy-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9b1a1ba1113c523c7ff7b10f282d6c4074006f7ef8d71e1d973d51bf7291ddb", size = 4183585, upload-time = "2025-12-02T11:56:33.317Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/25/73cfbcf1a8ea49be6c42817431cac46b70a119fe64da903fcc2d92b5b511/tantivy-0.25.1-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:f51ff7196c6f31719202080ed8372d5e3d51e92c749c032fb8234f012e99744c", size = 7622530, upload-time = "2025-12-02T11:56:36.839Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/c8/c0d7591cdf4f7e7a9fc4da786d1ca8cd1aacffaa2be16ea6d401a8e4a566/tantivy-0.25.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:550e63321bfcacc003859f2fa29c1e8e56450807b3c9a501c1add27cfb9236d9", size = 3925637, upload-time = "2025-12-02T11:56:38.425Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3a/09/bedfc223bffec7641b417dd7ab071134b2ef8f8550e9b1fb6014657ef52e/tantivy-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fde31cc8d6e122faf7902aeea32bc008a429a6e8904e34d3468126a3ec01b016", size = 4197322, upload-time = "2025-12-02T11:56:40.411Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f5/f1/1fa5183500c8042200c9f2b840d34f5bbcfb434a1ee750e7132262d2a5c9/tantivy-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b11bd5a518b0be645320b47af8493f6a40c4f3234313e37adcf4534a564d27dd", size = 4183143, upload-time = "2025-12-02T11:56:42.048Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8b/2f/581519492226f97d23bd0adc95dad991ebeaa73ea6abc8bff389a3096d9a/tantivy-0.25.1-cp313-cp313t-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:dae99e75b7eaa9bf5bd16ab106b416370f08c135aed0e117d62a3201cd1ffe36", size = 7610316, upload-time = "2025-12-02T11:56:45.927Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/91/40/5d7bc315ab9e6a22c5572656e8ada1c836cfa96dccf533377504fbc3c9d9/tantivy-0.25.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:506e9533c5ef4d3df43bad64ffecc0aa97c76e361ea610815dc3a20a9d6b30b3", size = 3919882, upload-time = "2025-12-02T11:56:48.469Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/02/b9/e0ef2f57a6a72444cb66c2ffbc310ab33ffaace275f1c4b0319d84ea3f18/tantivy-0.25.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dbd4f8f264dacbcc9dee542832da2173fd53deaaea03f082d95214f8b5ed6bc", size = 4196031, upload-time = "2025-12-02T11:56:50.151Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/02/bf3f8cacfd08642e14a73f7956a3fb95d58119132c98c121b9065a1f8615/tantivy-0.25.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:824c643ccb640dd9e35e00c5d5054ddf3323f56fe4219d57d428a9eeea13d22c", size = 4183437, upload-time = "2025-12-02T11:56:51.818Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/44/9f1d67aa5030f7eebc966c863d1316a510a971dd8bb45651df4acdfae9ed/tantivy-0.25.1-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7f5d29ae85dd0f23df8d15b3e7b341d4f9eb5a446bbb9640df48ac1f6d9e0c6c", size = 7623723, upload-time = "2025-12-02T11:56:55.066Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/30/6e085bd3ed9d12da3c91c185854abd70f9dfd35fb36a75ea98428d42c30b/tantivy-0.25.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:f2d2938fb69a74fc1bb36edfaf7f0d1596fa1264db0f377bda2195c58bcb6245", size = 3926243, upload-time = "2025-12-02T11:56:57.058Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/f5/a00d65433430f51718e5cc6938df571765d7c4e03aedec5aef4ab567aa9b/tantivy-0.25.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f5ff124c4802558e627091e780b362ca944169736caba5a372eef39a79d0ae0", size = 4207186, upload-time = "2025-12-02T11:56:58.803Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/19/63/61bdb12fc95f2a7f77bd419a5149bfa9f28caa76cb569bf2b6b06e1d033e/tantivy-0.25.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43b80ef62a340416139c93d19264e5f808da48e04f9305f1092b8ed22be0a5be", size = 4187312, upload-time = "2025-12-02T11:57:00.595Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/94/832f4ee26ed2b3aae69923e827183cb28af8239c893430fbe6240351df9c/tantivy-0.26.0-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5748304687042b2c643e30b646047f254e8a21441515929ea0e54c6de0ed493b", size = 8335068, upload-time = "2026-04-29T11:50:38.042Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d2/dc/88eb02ad37acb5b022ded4fdad984562b2246ec28fe16d55580e678a1251/tantivy-0.26.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8617af05dfc030acca0d93c14594b247e18550d803359946cb8cf76620a767e", size = 4298822, upload-time = "2026-04-29T11:50:40.27Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/60/85/5a553d130be2704302c807fccebad575241f2a7bce2b765240ad9d4c697f/tantivy-0.26.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3dbb17edf7ee25c77f60e7b757488bb8a127c28fc6cac3d8484e2d1ae862d77f", size = 4612952, upload-time = "2026-04-29T11:50:42.644Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/c9/36c3e17d0a8efb2fab1ce0c748ddd1aa3234ead83aaa011fd5c6c780e043/tantivy-0.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62cfb03e3ca4cb0f41213acf0a3fd04ffe7f359ca6e3e91cab37c06799e68d3c", size = 4567017, upload-time = "2026-04-29T11:50:44.943Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/88/34/fbdcbcb6862cdea4f2ed7d62b99c4785b89560c1ae72558a6cec724cf64c/tantivy-0.26.0-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:a4676c1989d8d7e61ebc4d6d4b46224fa9351eedb6e077a815a5f7233c4e20b6", size = 8301385, upload-time = "2026-04-29T11:50:48.769Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d0/40/09e6a400d2505024a412d4c2a67f58ece4703799a53ba432215b90de3835/tantivy-0.26.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:df1e2f21f69ffe9ea12ca7f3f3900eadea4670cf6f8c15aa4fec0bb3a1185577", size = 4284335, upload-time = "2026-04-29T11:50:51.201Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/9d/1e7db895aa5cfac5609bd03cc7bf44a6f8b9f2892baf9225f9189121316d/tantivy-0.26.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2d22d53c9fd718b6d74eea3b0e8a26ec8bd7ca8a594ba15c7a38ac5e9e3d33b", size = 4610238, upload-time = "2026-04-29T11:50:53.01Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/05/205758c98a5e450ec22c08904dbf3fe10dce4d39c3e1c6a05c9daaa436be/tantivy-0.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff0765cfbc9e10e96b71c78074e29f25fee388e5bc8a0a7c8aa166bf80304769", size = 4570688, upload-time = "2026-04-29T11:50:55.057Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fb/56/aef45e8ec7fddbca4885516bc1d8cc61950f666ba8b44ef9e50b8db51f91/tantivy-0.26.0-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7509b06fab07d4209bf37759e3c0c407f6c53fa3184d694ec82416fc8d189e7d", size = 8301236, upload-time = "2026-04-29T11:50:59.299Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/3b/8ce1a1662e6e6c303a65055a42206853adfeaa14596e62b6f218b5af5526/tantivy-0.26.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0c77962afc03f8a7081991fee088d09891acaa3401cdf882b1cc40d9d839a683", size = 4284342, upload-time = "2026-04-29T11:51:01.626Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/15/4f4e74669bdc26e7508d527f27be1a12d249e1d9ab6bba05f47ba1cdedfa/tantivy-0.26.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f017f6991752da8092c46c670e0eae349a1266d2315720ff5b252ed1ea4acd4", size = 4610161, upload-time = "2026-04-29T11:51:03.966Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/50/3e/4648f7fc34834f3f4c99465111f50add73f0517f21ceae7c81771942c99b/tantivy-0.26.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb7df28cc98497c767b86ebc5f167e3f2552739f748a1c28c10e14e7db726cd1", size = 4571125, upload-time = "2026-04-29T11:51:06.162Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/59/3d/a85ffd178a6b00813cd144e4af2db100023a5a7cd9cae3a5f998e74d5cbd/tantivy-0.26.0-cp313-cp313t-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7b9a350e59513e330fbcb0930ef2f574d86a8aa46047a1ad1c5b4c91838e3aec", size = 8296228, upload-time = "2026-04-29T11:51:10.844Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d6/81/c78c3ae37c52e1244da340139e11cca0d44ee742e227bba19ecefbbf54b5/tantivy-0.26.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:e1bdf6dcbd25fdc3244e6b09f5114253a81ec533ad5346eff135ee98668f04c7", size = 4279507, upload-time = "2026-04-29T11:51:13.065Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/39/df/a409dad06800793d7dd2cc831aeb46b808b4ce00df7d506e6760acee2cc2/tantivy-0.26.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9450186861d6f350ec5969ff2e349377d2ed5617f084a8ad0f303ed7a91a2e3", size = 4605550, upload-time = "2026-04-29T11:51:15.289Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bf/24/68245511021df5b19879d262ef2b1adf214ec67cc69761e1a83fd28571b1/tantivy-0.26.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e69569ec68c0e8d1c3aa3cf52423ced6b9b9b5bb0156e8b32088390919d41964", size = 4566249, upload-time = "2026-04-29T11:51:17.398Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/98/53/0c9ec136930dad07c8f24cd31210181e7ba2228aeea9fd75d751b9d716e5/tantivy-0.26.0-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7983206d75c7334fdcb1c49372adf6afe8aeb89ad2abfc7a5fd9701254735134", size = 8319819, upload-time = "2026-04-29T11:51:21.451Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/5b/35b7f2af5101883dde8f0523a69944bd5b2471ce399339e97e2e3775a227/tantivy-0.26.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:4a970d35402612208b077de5feb9da978380c173e7835335c50d15d0f13cbaba", size = 4289015, upload-time = "2026-04-29T11:51:23.435Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/4b/fbc1293fc8108eaa2f11718d77070e09cefe4aac4055fd2036548a776946/tantivy-0.26.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22f6acf0c4d9dff2aecb161071db921c9a0bd3e2485dbc8304fd4561fe50c6a7", size = 4622332, upload-time = "2026-04-29T11:51:25.483Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/86/68/d1ae76c42b523b076434a357b675c37fd9684ac0fc6e0e63f703a1f1014a/tantivy-0.26.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a92825f3215fc4b5ef55da8a577eb9ad30c7ee5c7371daea82329e67aa4c7d12", size = 4573494, upload-time = "2026-04-29T11:51:27.512Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5233,6 +5235,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/55/82/7d25dce10aad92d2226b269bce2f85cfd843b4477cd50245d7d40ecf8f89/types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed", size = 58737, upload-time = "2024-10-04T02:43:57.968Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "types-regex"
|
||||
version = "2026.4.4.20260408"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/92/42/d7c691fc5a8a8ecfba3f23c1c4c087a089af0767610d88c29201193d8f60/types_regex-2026.4.4.20260408.tar.gz", hash = "sha256:86b2975ff11b06e7f538839821510daea2566d9cb18bb8acde47834315409cf9", size = 13182, upload-time = "2026-04-08T04:31:11.887Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e1/92/e109654a804d11d9b60d67c7b29d64b2beac6b2e3209ea075e268e5a1021/types_regex-2026.4.4.20260408-py3-none-any.whl", hash = "sha256:d436bcc409abf9b06747b7e038014afc6d40ef7b72329655c353a1955534068f", size = 11116, upload-time = "2026-04-08T04:31:11.01Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "types-setuptools"
|
||||
version = "80.10.0.20260124"
|
||||
|
||||
Reference in New Issue
Block a user