Improve ingest metadata and auto-send

This commit is contained in:
Lee Nave
2026-01-02 21:16:15 +00:00
parent 9d53cb3fe5
commit 0f3c2efa5c
11 changed files with 274 additions and 52 deletions
+2 -3
View File
@@ -165,8 +165,8 @@ def create_app():
lm.anonymous_user = ub.Anonymous
lm.session_protection = 'strong' if config.config_session == 1 else "basic"
db.CalibreDB.update_config(config)
db.CalibreDB.setup_db(config.config_calibre_dir, cli_param.settings_path)
from .calibre_init import init_calibre_db_from_config
init_calibre_db_from_config(config, cli_param.settings_path)
calibre_db.init_db()
updater_thread.init_updater(config, web_server)
@@ -272,4 +272,3 @@ def create_app():
return app
+50
View File
@@ -0,0 +1,50 @@
import sqlite3
from cps import db, logger
log = logger.create()
DEFAULT_TITLE_SORT_REGEX = (
r'^(A|The|An|Der|Die|Das|Den|Ein|Eine|Einen|Dem|Des|Einem|Eines|Le|La|Les|L\'|Un|Une)\s+'
)
class _MinimalConfig:
def __init__(self, title_regex, calibre_dir):
self.config_title_regex = title_regex
self.config_calibre_dir = calibre_dir
def init_calibre_db_from_config(config, settings_path):
"""Initialize CalibreDB using an already-loaded config object."""
if db.CalibreDB.session_factory and getattr(db.CalibreDB.config, "config_title_regex", None):
return True
db.CalibreDB.update_config(config)
db.CalibreDB.setup_db(config.config_calibre_dir, settings_path)
return db.CalibreDB.session_factory is not None
def init_calibre_db_from_app_db(app_db_path="/config/app.db"):
"""Initialize CalibreDB by reading config from app.db (for background workers)."""
if db.CalibreDB.session_factory and getattr(db.CalibreDB.config, "config_title_regex", None):
return True
calibre_dir = None
title_regex = None
try:
with sqlite3.connect(app_db_path, timeout=30) as con:
cur = con.cursor()
row = cur.execute(
"SELECT config_calibre_dir, config_title_regex FROM settings LIMIT 1"
).fetchone()
if row:
calibre_dir, title_regex = row[0], row[1]
except Exception as e:
log.error(f"Failed to read calibre settings from {app_db_path}: {e}")
return False
if not calibre_dir:
log.error("Calibre library path missing in app.db; cannot initialize CalibreDB")
return False
title_regex = title_regex or DEFAULT_TITLE_SORT_REGEX
db.CalibreDB.update_config(_MinimalConfig(title_regex, calibre_dir))
db.CalibreDB.setup_db(calibre_dir, app_db_path)
return db.CalibreDB.session_factory is not None
+14 -9
View File
@@ -185,6 +185,7 @@ class ConfigSQL(object):
# pylint: disable=no-member
def __init__(self):
self.__dict__["dirty"] = list()
self.cli = None
def init_config(self, session, secret_key, cli):
self._session = session
@@ -235,21 +236,25 @@ class ConfigSQL(object):
return self._settings
def get_config_certfile(self):
if self.cli.certfilepath:
return self.cli.certfilepath
if self.cli.certfilepath == "":
return None
if self.cli:
if self.cli.certfilepath:
return self.cli.certfilepath
if self.cli.certfilepath == "":
return None
return self.config_certfile
def get_config_keyfile(self):
if self.cli.keyfilepath:
return self.cli.keyfilepath
if self.cli.certfilepath == "":
return None
if self.cli:
if self.cli.keyfilepath:
return self.cli.keyfilepath
if self.cli.certfilepath == "":
return None
return self.config_keyfile
def get_config_ipaddress(self):
return self.cli.ip_address or ""
if self.cli:
return self.cli.ip_address or ""
return ""
def _has_role(self, role_flag):
return constants.has_flag(self.config_default_role, role_flag)
+33 -12
View File
@@ -687,12 +687,14 @@ class CalibreDB:
cls.dispose()
if not config_calibre_dir:
cls.config.invalidate()
if cls.config:
cls.config.invalidate()
return None
dbpath = os.path.join(config_calibre_dir, "metadata.db")
if not os.path.exists(dbpath):
cls.config.invalidate()
if cls.config:
cls.config.invalidate()
return None
try:
@@ -717,10 +719,12 @@ class CalibreDB:
conn = cls.engine.connect()
# conn.text_factory = lambda b: b.decode(errors = 'ignore') possible fix for #1302
except Exception as ex:
cls.config.invalidate(ex)
if cls.config:
cls.config.invalidate(ex)
return None
cls.config.db_configured = True
if cls.config:
cls.config.db_configured = True
if not cc_classes:
try:
@@ -780,6 +784,22 @@ class CalibreDB:
self.ensure_session()
return self.session.query(Data).filter(Data.book == book_id).filter(Data.format == file_format).first()
def get_author_by_name(self, name):
self.ensure_session()
return self.session.query(Authors).filter(Authors.name == name).first()
def get_tag_by_name(self, name):
self.ensure_session()
return self.session.query(Tags).filter(Tags.name == name).first()
def get_series_by_name(self, name):
self.ensure_session()
return self.session.query(Series).filter(Series.name == name).first()
def get_publisher_by_name(self, name):
self.ensure_session()
return self.session.query(Publishers).filter(Publishers.name == name).first()
def set_metadata_dirty(self, book_id):
self.ensure_session()
if not self.session.query(Metadata_Dirtied).filter(Metadata_Dirtied.book == book_id).one_or_none():
@@ -1100,14 +1120,15 @@ class CalibreDB:
def create_functions(self, config=None):
self.ensure_session()
# user defined sort function for calibre databases (Series, etc.)
def _title_sort(title):
# calibre sort stuff
title_pat = re.compile(config.config_title_regex, re.IGNORECASE)
match = title_pat.search(title)
if match:
prep = match.group(1)
title = title[len(prep):] + ', ' + prep
return strip_whitespaces(title)
if config:
def _title_sort(title):
# calibre sort stuff
title_pat = re.compile(config.config_title_regex, re.IGNORECASE)
match = title_pat.search(title)
if match:
prep = match.group(1)
title = title[len(prep):] + ', ' + prep
return strip_whitespaces(title)
try:
# sqlalchemy <1.4.24 and sqlalchemy 2.0
+1 -1
View File
@@ -224,7 +224,7 @@ def send_mail(book_id, book_format, convert, ereader_mail, calibrepath, user_id,
for entry in iter(book.data):
if entry.format.upper() == book_format.upper():
converted_file_name = entry.name + '.' + book_format.lower()
link = '<a href="{}">{}</a>'.format(url_for('web.show_book', book_id=book_id), escape(book.title))
link = '<a href="/book/{}">{}</a>'.format(book_id, escape(book.title))
email_text = N_("%(book)s send to eReader", book=link)
for email in ereader_mail.split(','):
email = strip_whitespaces(email)
+38 -14
View File
@@ -6,10 +6,8 @@
# See CONTRIBUTORS for full list of authors.
import json
import os
from typing import Optional, List, Dict
from cps import logger, calibre_db, db, constants
from cps import logger, db
from cps.search_metadata import cl as metadata_providers
import sys
sys.path.insert(1, '/app/calibre-web-automated/scripts/')
@@ -17,7 +15,6 @@ from cwa_db import CWA_DB
log = logger.create()
def fetch_and_apply_metadata(book_id: int, user_enabled: bool = False) -> bool:
"""
Fetch metadata for a newly ingested book and apply it if settings allow.
@@ -30,6 +27,10 @@ def fetch_and_apply_metadata(book_id: int, user_enabled: bool = False) -> bool:
bool: True if metadata was successfully fetched and applied, False otherwise
"""
try:
if not db.CalibreDB.session_factory:
log.error("CalibreDB not initialized; skipping metadata fetch")
return False
# Check global settings (admin-controlled only)
cwa_db = CWA_DB()
cwa_settings = cwa_db.get_cwa_settings()
@@ -60,8 +61,7 @@ def fetch_and_apply_metadata(book_id: int, user_enabled: bool = False) -> bool:
provider_hierarchy = ["google", "douban", "dnb", "ibdb", "comicvine"]
# Global provider enablement map
from cps.cwa_functions import parse_metadata_providers_enabled
enabled_map = parse_metadata_providers_enabled(
enabled_map = _parse_metadata_providers_enabled(
cwa_settings.get('metadata_providers_enabled', '{}')
)
@@ -108,7 +108,7 @@ def fetch_and_apply_metadata(book_id: int, user_enabled: bool = False) -> bool:
return metadata_found
except Exception as e:
log.error(f"Error in fetch_and_apply_metadata: {e}")
log.error(f"Error in fetch_and_apply_metadata: {e}", exc_info=True)
return False
@@ -152,7 +152,7 @@ def _apply_metadata_to_book(book, metadata, calibre_db_instance) -> bool:
if author_name and author_name.strip():
author = calibre_db_instance.get_author_by_name(author_name.strip())
if not author:
author = db.Authors(name=author_name.strip(), sort=author_name.strip())
author = db.Authors(author_name.strip(), author_name.strip())
calibre_db_instance.session.add(author)
book.authors.append(author)
updated = True
@@ -166,14 +166,14 @@ def _apply_metadata_to_book(book, metadata, calibre_db_instance) -> bool:
if book.comments:
book.comments[0].text = metadata.description.strip()
else:
comment = db.Comments(text=metadata.description.strip(), book=book.id)
comment = db.Comments(metadata.description.strip(), book.id)
calibre_db_instance.session.add(comment)
updated = True
else:
if book.comments:
book.comments[0].text = metadata.description.strip()
else:
comment = db.Comments(text=metadata.description.strip(), book=book.id)
comment = db.Comments(metadata.description.strip(), book.id)
calibre_db_instance.session.add(comment)
updated = True
@@ -184,7 +184,7 @@ def _apply_metadata_to_book(book, metadata, calibre_db_instance) -> bool:
if not book.publishers or len(book.publishers) == 0:
publisher = calibre_db_instance.get_publisher_by_name(metadata.publisher.strip())
if not publisher:
publisher = db.Publishers(name=metadata.publisher.strip())
publisher = db.Publishers(metadata.publisher.strip(), metadata.publisher.strip())
calibre_db_instance.session.add(publisher)
book.publishers = [publisher]
updated = True
@@ -193,7 +193,7 @@ def _apply_metadata_to_book(book, metadata, calibre_db_instance) -> bool:
book.publishers.clear()
publisher = calibre_db_instance.get_publisher_by_name(metadata.publisher.strip())
if not publisher:
publisher = db.Publishers(name=metadata.publisher.strip())
publisher = db.Publishers(metadata.publisher.strip(), metadata.publisher.strip())
calibre_db_instance.session.add(publisher)
book.publishers = [publisher]
updated = True
@@ -216,7 +216,7 @@ def _apply_metadata_to_book(book, metadata, calibre_db_instance) -> bool:
hasattr(metadata, 'series') and metadata.series and metadata.series.strip()):
series = calibre_db_instance.get_series_by_name(metadata.series.strip())
if not series:
series = db.Series(name=metadata.series.strip(), sort=metadata.series.strip())
series = db.Series(metadata.series.strip(), metadata.series.strip())
calibre_db_instance.session.add(series)
book.series.clear()
book.series.append(series)
@@ -278,7 +278,7 @@ def _apply_metadata_to_book(book, metadata, calibre_db_instance) -> bool:
existing = True
break
if not existing:
new_identifier = db.Identifiers(type=identifier_type, val=identifier_value, book=book.id)
new_identifier = db.Identifiers(identifier_value, identifier_type, book.id)
calibre_db_instance.session.add(new_identifier)
book.identifiers.append(new_identifier)
updated = True
@@ -301,3 +301,27 @@ def _apply_metadata_to_book(book, metadata, calibre_db_instance) -> bool:
log.error(f"Error applying metadata to book {getattr(book, 'id', 'unknown')}: {e}")
calibre_db_instance.session.rollback()
return False
def _parse_metadata_providers_enabled(raw_value):
"""Lightweight parser for metadata_providers_enabled without importing cwa_functions."""
try:
if raw_value is None:
return {}
if isinstance(raw_value, bytes):
raw_value = raw_value.decode('utf-8', errors='ignore')
if isinstance(raw_value, str):
s = raw_value.strip()
if not s:
return {}
if s.startswith("'") and s.endswith("'"):
s = s[1:-1]
if not s:
return {}
data = json.loads(s)
return data if isinstance(data, dict) else {}
if isinstance(raw_value, dict):
return raw_value
return {}
except (json.JSONDecodeError, ValueError, TypeError, AttributeError):
return {}
+8 -1
View File
@@ -25,7 +25,7 @@ class IBDb(Metadata):
DESCRIPTION = "Internet Book Database"
META_URL = "https://ibdb.dev/"
BOOK_URL = "https://ibdb.dev/book/"
SEARCH_URL = "https://ibdb.dev/search?q="
SEARCH_URL = "https://ibdb.dev/api/search?q="
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
@@ -40,6 +40,13 @@ class IBDb(Metadata):
try:
results = requests.get(IBDb.SEARCH_URL + query, timeout=15)
results.raise_for_status()
except requests.HTTPError as e:
status_code = getattr(e.response, "status_code", None)
if status_code == 501:
log.debug("IBDb search not implemented (501); skipping provider.")
return []
log.warning(e)
return []
except Exception as e:
log.warning(e)
return []
+2 -2
View File
@@ -97,6 +97,7 @@ def register_startup_tasks():
from datetime import datetime, timezone
db = CWA_DB()
delay_minutes = int(db.cwa_settings.get('auto_send_delay_minutes', 0) or 0)
pending = db.scheduled_get_pending_autosend()
for row in pending:
try:
@@ -117,7 +118,7 @@ def register_startup_tasks():
except Exception:
pass
if should_enqueue and bid is not None and uid is not None:
WorkerThread.add(u, TaskAutoSend(f"Auto-sending '{t}' to user's eReader(s)", bid, uid, config.auto_send_delay_minutes), hidden=False)
WorkerThread.add(u, TaskAutoSend(f"Auto-sending '{t}' to user's eReader(s)", bid, uid, delay_minutes), hidden=False)
job = scheduler.schedule(func=_rehydrate_enqueue, trigger=DateTrigger(run_date=run_at_local), name=f"rehydrated auto-send {schedule_id}")
try:
@@ -197,4 +198,3 @@ def should_task_be_running(start, duration):
def calclulate_end_time(start, duration):
start_time = datetime.datetime.now().replace(hour=start, minute=0)
return start_time + datetime.timedelta(hours=duration // 60, minutes=duration % 60)
+16
View File
@@ -44,6 +44,22 @@ $(function() {
});
}, 1000);
}
if ($('#upcomingtable').length) {
$('#upcomingtable').bootstrapTable({
formatNoMatches: function () {
return '';
},
striped: true
});
}
if ($('#upcomingopstable').length) {
$('#upcomingopstable').bootstrapTable({
formatNoMatches: function () {
return '';
},
striped: true
});
}
$(document).on('click', '#select_all', function() {
$('#books-table').bootstrapTable('checkAll');
+3 -1
View File
@@ -93,7 +93,9 @@ class TaskAutoSend(CalibreTask):
self._handleError(f"Auto-send task failed: {str(e)}")
finally:
if 'calibre_db_instance' in locals():
calibre_db_instance.session.close()
session = getattr(calibre_db_instance, "session", None)
if session:
session.close()
@property
def name(self):
+107 -9
View File
@@ -201,6 +201,8 @@ try:
from cps.tasks.auto_send import TaskAutoSend
from cps.services.worker import WorkerThread
from cps import ub as _ub
from cps.calibre_init import init_calibre_db_from_app_db
init_calibre_db_from_app_db()
_CPS_AVAILABLE = True
print("[ingest-processor] Auto-send and metadata functionality available", flush=True)
except ImportError as e:
@@ -262,17 +264,36 @@ def get_internal_api_url(path):
port = '8083'
protocol = "http"
certfile = None
keyfile = None
if _cps_config:
certfile = _cps_config.get_config_certfile()
keyfile = _cps_config.get_config_keyfile()
if certfile and keyfile and os.path.isfile(certfile) and os.path.isfile(keyfile):
protocol = "https"
certfile = getattr(_cps_config, "config_certfile", None)
keyfile = getattr(_cps_config, "config_keyfile", None)
if not certfile and not keyfile:
try:
with sqlite3.connect("/config/app.db", timeout=30) as con:
cur = con.cursor()
row = cur.execute(
"SELECT config_certfile, config_keyfile FROM settings LIMIT 1"
).fetchone()
if row:
certfile, keyfile = row[0], row[1]
except Exception as e:
print(f"[ingest-processor] WARN: Could not read TLS settings from app.db: {e}", flush=True)
if certfile and keyfile and os.path.isfile(certfile) and os.path.isfile(keyfile):
protocol = "https"
if not path.startswith("/"):
path = "/" + path
return f"{protocol}://127.0.0.1:{port}{path}"
def get_internal_api_headers():
"""Provide headers that satisfy localhost-only internal endpoint checks."""
return {"X-Forwarded-For": "127.0.0.1"}
class NewBookProcessor:
def __init__(self, filepath: str):
# Settings / DB
@@ -344,16 +365,34 @@ class NewBookProcessor:
# Track the last added Calibre book id(s) from calibredb output
self.last_added_book_id: int | None = None
self.last_added_book_ids: list[int] = []
self._title_sort_regex = self._get_title_sort_regex()
@staticmethod
def _get_title_sort_regex() -> str:
default_regex = (
r'^(A|The|An|Der|Die|Das|Den|Ein|Eine|Einen|Dem|Des|Einem|Eines|Le|La|Les|L\'|Un|Une)\s+'
)
try:
with sqlite3.connect("/config/app.db", timeout=30) as con:
cur = con.cursor()
row = cur.execute(
"SELECT config_title_regex FROM settings LIMIT 1"
).fetchone()
if row and row[0]:
return row[0]
except Exception as e:
print(f"[ingest-processor] WARN: Could not read config_title_regex from app.db: {e}", flush=True)
return default_regex
@staticmethod
def _parse_added_book_ids(output: str) -> list[int]:
"""Parse calibredb stdout for the 'Added book ids: X[, Y, ...]' line and return IDs.
"""Parse calibredb stdout for the 'Added/Merged/Updated book ids: X[, Y, ...]' line and return IDs.
Handles variations like 'Added book id: 4' or 'Added book ids: 4, 5'.
Handles variations like 'Added book id: 4' or 'Merged book ids: 4, 5'.
"""
try:
import re
m = re.search(r"Added book id[s]?:\s*([0-9,\s]+)", output, flags=re.IGNORECASE)
m = re.search(r"(?:Added|Merged|Updated) book id[s]?:\s*([0-9,\s]+)", output, flags=re.IGNORECASE)
if not m:
return []
nums = m.group(1)
@@ -361,6 +400,47 @@ class NewBookProcessor:
return ids
except Exception:
return []
def _fallback_last_added_book_id(self) -> None:
"""Fallback to the most recently modified book when calibredb output lacks IDs."""
if self.last_added_book_id is not None:
return
try:
with sqlite3.connect(self.metadata_db, timeout=30) as con:
cur = con.cursor()
row = cur.execute(
"SELECT id FROM books ORDER BY last_modified DESC LIMIT 1"
).fetchone()
if row:
self.last_added_book_id = int(row[0])
self.last_added_book_ids = [self.last_added_book_id]
print(
"[ingest-processor] WARN: Could not parse calibredb output; using most recently modified book ID.",
flush=True,
)
except Exception as e:
print(f"[ingest-processor] WARN: Failed to infer book ID after import: {e}", flush=True)
def _register_title_sort_function(self, connection: sqlite3.Connection) -> bool:
"""Register title_sort SQL function on a raw SQLite connection."""
try:
import re
title_pat = re.compile(self._title_sort_regex, re.IGNORECASE)
def _title_sort(title):
if title is None:
title = ""
match = title_pat.search(title)
if match:
prep = match.group(1)
title = title[len(prep):] + ', ' + prep
return " ".join(str(title).split())
connection.create_function("title_sort", 1, _title_sort)
return True
except Exception as e:
print(f"[ingest-processor] WARN: Could not register title_sort function: {e}", flush=True)
return False
def get_split_library(self) -> dict[str, str] | None:
"""Checks whether or not the user has split library enabled. Returns None if they don't and the path of the Split Library location if True."""
with sqlite3.connect("/config/app.db", timeout=30) as con:
@@ -604,6 +684,8 @@ class NewBookProcessor:
if added_ids:
self.last_added_book_ids = added_ids
self.last_added_book_id = added_ids[-1]
else:
self._fallback_last_added_book_id()
else: # audiobook path
meta = audiobook.get_audio_file_info(str(staged_path), format, os.path.basename(str(staged_path)), False)
@@ -649,6 +731,8 @@ class NewBookProcessor:
if added_ids:
self.last_added_book_ids = added_ids
self.last_added_book_id = added_ids[-1]
else:
self._fallback_last_added_book_id()
print(f"[ingest-processor] Added {staged_path.stem} to Calibre database", flush=True)
if self.cwa_settings['auto_backup_imports']:
@@ -688,6 +772,9 @@ class NewBookProcessor:
try:
with sqlite3.connect(self.metadata_db, timeout=30) as con:
cur = con.cursor()
if not self._register_title_sort_function(con):
print("[ingest-processor] INFO: Skipping timestamp adjust (title_sort SQL function unavailable).", flush=True)
return
# pre_import_max_timestamp may be None (empty library) -> update all rows where timestamp < last_modified
if pre_import_max_timestamp is None:
cur.execute('UPDATE books SET timestamp = last_modified WHERE timestamp < last_modified')
@@ -850,7 +937,13 @@ class NewBookProcessor:
'username': username,
'title': actual_title,
}
resp = requests.post(url, json=payload, timeout=5, verify=False)
resp = requests.post(
url,
json=payload,
headers=get_internal_api_headers(),
timeout=5,
verify=False,
)
if resp.status_code == 200:
try:
run_at = resp.json().get('run_at', 'soon')
@@ -976,7 +1069,12 @@ class NewBookProcessor:
try:
url = get_internal_api_url("/cwa-internal/reconnect-db")
print("[ingest-processor] Refreshing Calibre-Web database session...", flush=True)
resp = requests.post(url, timeout=5, verify=False)
resp = requests.post(
url,
headers=get_internal_api_headers(),
timeout=5,
verify=False,
)
if resp.status_code == 200:
print("[ingest-processor] Database session refresh enqueued", flush=True)
else: