Files
Calibre-Web-Automated/cps/uploader.py
T
2025-08-26 20:06:18 +02:00

271 lines
9.3 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
# Calibre-Web Automated fork of Calibre-Web
# Copyright (C) 2018-2025 Calibre-Web contributors
# Copyright (C) 2024-2025 Calibre-Web Automated contributors
# SPDX-License-Identifier: GPL-3.0-or-later
# See CONTRIBUTORS for full list of authors.
import os
import hashlib
from flask_babel import gettext as _
from . import logger, comic, isoLanguages
from .constants import BookMeta
from .helper import split_authors
from .file_helper import get_temp_dir
from .string_helper import strip_whitespaces
log = logger.create()
try:
from wand.image import Image, Color
from wand import version as ImageVersion
from wand.exceptions import PolicyError
use_generic_pdf_cover = False
except (ImportError, RuntimeError) as e:
log.debug('Cannot import Image, generating pdf covers for pdf uploads will not work: %s', e)
use_generic_pdf_cover = True
try:
from pypdf import PdfReader
from pypdf.generic import NullObject
use_pdf_meta = True
except ImportError as ex:
log.debug('PyPDF is recommended for best performance in metadata extracting from pdf files: %s', ex)
try:
from PyPDF2 import PdfReader
from pypdf.generic import NullObject
use_pdf_meta = True
except ImportError as ex:
log.debug('PyPDF is recommended for best performance in metadata extracting from pdf files: %s', ex)
log.debug('PyPdf2 is also possible for metadata extracting from pdf files, but not recommended anymore')
try:
from PyPDF3 import PdfFileReader as PdfReader
from pypdf.generic import NullObject
use_pdf_meta = True
except ImportError as e:
log.debug('Cannot import PyPDF3/PyPDF2, extracting pdf metadata will not work: %s / %s', e)
use_pdf_meta = False
try:
from . import epub
use_epub_meta = True
except ImportError as e:
log.debug('Cannot import epub, extracting epub metadata will not work: %s', e)
use_epub_meta = False
try:
from . import fb2
use_fb2_meta = True
except ImportError as e:
log.debug('Cannot import fb2, extracting fb2 metadata will not work: %s', e)
use_fb2_meta = False
try:
from . import audio
use_audio_meta = True
except ImportError as e:
log.debug('Cannot import mutagen, extracting audio metadata will not work: %s', e)
use_audio_meta = False
def process(tmp_file_path, original_file_name, original_file_extension, rar_executable, no_cover=False):
meta = default_meta(tmp_file_path, original_file_name, original_file_extension)
extension_upper = original_file_extension.upper()
try:
if ".PDF" == extension_upper:
meta = pdf_meta(tmp_file_path, original_file_name, original_file_extension, no_cover)
elif extension_upper in [".KEPUB", ".EPUB"] and use_epub_meta is True:
meta = epub.get_epub_info(tmp_file_path, original_file_name, original_file_extension, no_cover)
elif ".FB2" == extension_upper and use_fb2_meta is True:
meta = fb2.get_fb2_info(tmp_file_path, original_file_extension)
elif extension_upper in ['.CBZ', '.CBT', '.CBR', ".CB7"]:
meta = comic.get_comic_info(tmp_file_path,
original_file_name,
original_file_extension,
rar_executable,
no_cover)
elif extension_upper in [".MP3", ".OGG", ".FLAC", ".WAV", ".AAC", ".AIFF", ".ASF", ".MP4",
".M4A", ".M4B", ".OGV", ".OPUS"] and use_audio_meta:
meta = audio.get_audio_file_info(tmp_file_path, original_file_extension, original_file_name, no_cover)
except Exception as ex:
log.warning('cannot parse metadata, using default: %s', ex)
if not strip_whitespaces(meta.title):
meta = meta._replace(title=original_file_name)
if not strip_whitespaces(meta.author) or meta.author.lower() == 'unknown':
meta = meta._replace(author=_('Unknown'))
return meta
def default_meta(tmp_file_path, original_file_name, original_file_extension):
return BookMeta(
file_path=tmp_file_path,
extension=original_file_extension,
title=original_file_name,
author=_('Unknown'),
cover=None,
description="",
tags="",
series="",
series_id="",
languages="",
publisher="",
pubdate="",
identifiers=[]
)
def parse_xmp(pdf_file):
"""
Parse XMP Metadata and prepare for BookMeta object
"""
try:
xmp_info = pdf_file.xmp_metadata
except Exception as ex:
log.debug('Can not read PDF XMP metadata {}'.format(ex))
return None
if xmp_info:
try:
xmp_author = xmp_info.dc_creator # list
except AttributeError:
xmp_author = ['Unknown']
if xmp_info.dc_title:
xmp_title = xmp_info.dc_title['x-default']
else:
xmp_title = ''
if xmp_info.dc_description:
xmp_description = xmp_info.dc_description['x-default']
else:
xmp_description = ''
languages = []
try:
for i in xmp_info.dc_language:
languages.append(isoLanguages.get_lang3(i))
except AttributeError:
languages.append('')
xmp_tags = ', '.join(xmp_info.dc_subject)
xmp_publisher = ', '.join(xmp_info.dc_publisher)
return {'author': xmp_author,
'title': xmp_title,
'subject': xmp_description,
'tags': xmp_tags,
'languages': languages,
'publisher': xmp_publisher
}
def pdf_meta(tmp_file_path, original_file_name, original_file_extension, no_cover_processing):
doc_info = None
xmp_info = None
create_date = ""
if use_pdf_meta:
with open(tmp_file_path, 'rb') as f:
pdf_file = PdfReader(f)
try:
doc_info = pdf_file.metadata
except Exception as exc:
log.debug('Can not read PDF DocumentInfo {}'.format(exc))
xmp_info = parse_xmp(pdf_file)
if xmp_info:
author = ' & '.join(split_authors(xmp_info['author']))
title = xmp_info['title']
subject = xmp_info['subject']
tags = xmp_info['tags']
languages = xmp_info['languages']
publisher = xmp_info['publisher']
else:
author = 'Unknown'
title = ''
languages = [""]
publisher = ""
subject = ""
tags = ""
if doc_info:
if author == '':
author = ' & '.join(split_authors([doc_info.author])) if doc_info.author else 'Unknown'
if title == '':
title = doc_info.title if doc_info.title else original_file_name
if subject == '':
subject = doc_info.subject or ""
if tags == '' and '/Keywords' in doc_info:
keywords = doc_info['/Keywords']
if not isinstance(keywords, NullObject):
if isinstance(keywords, bytes):
tags = keywords.decode('utf-8')
else:
tags = keywords
if create_date == '':
create_date = doc_info.creation_date
else:
title = original_file_name
return BookMeta(
file_path=tmp_file_path,
extension=original_file_extension,
title=title,
author=author,
cover=pdf_preview(tmp_file_path, original_file_name) if not no_cover_processing else None,
description=subject,
tags=tags,
series="",
series_id="",
languages=','.join(languages),
publisher=publisher,
pubdate=str(create_date),
identifiers=[])
def pdf_preview(tmp_file_path, tmp_dir):
if use_generic_pdf_cover:
return None
try:
cover_file_name = os.path.join(os.path.dirname(tmp_file_path), "cover.jpg")
with Image() as img:
img.options["pdf:use-cropbox"] = "true"
img.read(filename=tmp_file_path + '[0]', resolution=150)
img.compression_quality = 88
if img.alpha_channel:
img.alpha_channel = 'remove'
img.background_color = Color('white')
img.save(filename=os.path.join(tmp_dir, cover_file_name))
return cover_file_name
except PolicyError as ex:
log.warning('Pdf extraction forbidden by Imagemagick policy: %s', ex)
return None
except Exception as ex:
log.warning('Cannot extract cover image, using default: %s', ex)
log.warning('On Windows this error could be caused by missing ghostscript')
return None
def get_magick_version():
ret = dict()
if not use_generic_pdf_cover:
ret['Image Magick'] = ImageVersion.MAGICK_VERSION
else:
ret['Image Magick'] = 'not installed'
return ret
def upload(uploadfile, rar_excecutable):
tmp_dir = get_temp_dir()
filename = uploadfile.filename
filename_root, file_extension = os.path.splitext(filename)
md5 = hashlib.md5(filename.encode('utf-8')).hexdigest() # nosec
tmp_file_path = os.path.join(tmp_dir, md5)
log.debug("Temporary file: %s", tmp_file_path)
uploadfile.save(tmp_file_path)
return process(tmp_file_path, filename_root, file_extension, rar_excecutable)