Files
Calibre-Web-Automated/scripts/ingest-processor.py
T

182 lines
7.9 KiB
Python

import atexit
import json
import os
import subprocess
import sys
import tempfile
import time
import shutil
from pathlib import Path
from cwa_db import CWA_DB
# Creates a lock file unless one already exists meaning an instance of the script is
# already running, then the script is closed, the user is notified and the program
# exits with code 2
try:
lock = open(tempfile.gettempdir() + '/ingest-processor.lock', 'x')
lock.close()
except FileExistsError:
print("CANCELLING... ingest-processor initiated but is already running")
sys.exit(2)
# Make sure required directories are present
required_directories = [
"/config/.cwa_conversion_tmp",
"/config/processed_books",
"/config/processed_books/imported",
"/config/processed_books/failed",
"/config/processed_books/converted"
]
for directory in required_directories:
Path(directory).mkdir(exist_ok=True)
os.system(f"chown -R abc:abc {directory}")
# Defining function to delete the lock on script exit
def removeLock():
os.remove(tempfile.gettempdir() + '/ingest-processor.lock')
# Will automatically run when the script exits
atexit.register(removeLock)
class NewBookProcessor:
def __init__(self, filepath: str):
self.db = CWA_DB()
self.cwa_settings = self.db.cwa_settings
self.supported_book_formats = ['azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txtz']
self.hierarchy_of_success = ['lit', 'mobi', 'azw', 'epub', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz']
self.ingest_folder, self.library_dir, self.tmp_conversion_dir = self.get_dirs("/app/calibre-web-automated/dirs.json")
self.filepath = filepath # path of the book we're targeting
self.filename = os.path.basename(filepath)
self.is_epub: bool = bool(self.filepath.endswith('.epub'))
def get_dirs(self, dirs_json_path: str) -> tuple[str, str, str]:
dirs = {}
with open(dirs_json_path, 'r') as f:
dirs: dict[str, str] = json.load(f)
ingest_folder = f"{dirs['ingest_folder']}/"
library_dir = f"{dirs['calibre_library_dir']}/"
tmp_conversion_dir = f"{dirs['tmp_conversion_dir']}/"
return ingest_folder, library_dir, tmp_conversion_dir
def convert_book(self, import_format: str) -> tuple[bool, str]:
"""Uses the following terminal command to convert the books provided using the calibre converter tool:\n\n--- ebook-convert myfile.input_format myfile.output_format\n\nAnd then saves the resulting epubs to the calibre-web import folder."""
print(f"[ingest-processor]: START_CON: Converting {self.filename}...\n")
original_filepath = Path(self.filepath)
target_filepath = f"{self.tmp_conversion_dir}{original_filepath.stem}.epub"
try:
t_convert_book_start = time.time()
subprocess.run(['ebook-convert', self.filepath, target_filepath], check=True)
t_convert_book_end = time.time()
time_book_conversion = t_convert_book_end - t_convert_book_start
print(f"\n[ingest-processor]: END_CON: Conversion of {self.filename} complete in {time_book_conversion:.2f} seconds.\n")
if self.cwa_settings['auto_backup_conversions']:
shutil.copyfile(self.filepath, f"/config/processed_books/converted/{os.path.basename(original_filepath)}")
self.db.conversion_add_entry(original_filepath.stem,
import_format,
str(self.cwa_settings["auto_backup_conversions"]))
return True, target_filepath
except subprocess.CalledProcessError as e:
print(f"[ingest-processor]: CON_ERROR: {self.filename} could not be converted to epub due to the following error:\nEXIT/ERROR CODE: {e.returncode}\n{e.stderr}")
shutil.copyfile(self.filepath, f"/config/processed_books/failed/{os.path.basename(original_filepath)}")
return False, ""
def can_convert_check(self):
"""When no epubs are detected in the download, this function will go through the list of new files
and check for the format the are in that has the highest chance of successful conversion according to the input format hierarchy list
provided by calibre"""
can_convert = False
import_format = ''
for format in self.hierarchy_of_success:
can_be_converted = bool(self.filepath.endswith(f'.{format}'))
if can_be_converted:
can_convert = True
import_format = format
break
return can_convert, import_format
def delete_current_file(self) -> None:
"""Deletes file just processed from ingest folder"""
os.remove(self.filepath) # Removes processed file
subprocess.run(["find", f"{self.ingest_folder}", "-type", "d", "-empty", "-delete"]) # Removes any now empty folders
def add_book_to_library(self, book_path) -> None:
print("[ingest-processor]: Importing new epub to CWA...")
import_path = Path(book_path)
import_filename = os.path.basename(book_path)
try:
subprocess.run(["calibredb", "add", book_path, f"--library-path={self.library_dir}"], check=True)
print(f"[ingest-processor] Added {import_path.stem} to Calibre database")
if self.cwa_settings['auto_backup_imports']:
shutil.copyfile(book_path, f"/config/processed_books/imported/{import_filename}")
self.db.import_add_entry(import_path.stem,
str(self.cwa_settings["auto_backup_imports"]))
except subprocess.CalledProcessError as e:
print(f"[ingest-processor] {import_path.stem} was not able to be added to the Calibre Library due to the following error:\nCALIBREDB EXIT/ERROR CODE: {e.returncode}\n{e.stderr}")
shutil.copyfile(book_path, f"/config/processed_books/failed/{import_filename}")
def empty_tmp_con_dir(self):
try:
files = os.listdir(self.tmp_conversion_dir)
for file in files:
file_path = os.path.join(self.tmp_conversion_dir, file)
if os.path.isfile(file_path):
os.remove(file_path)
except OSError:
print(f"Error occurred while emptying {self.tmp_conversion_dir}.")
def main(filepath=sys.argv[1]):
# Check if filepath is a directory
# If it is, main will be ran on every file in the given directory
# Inotifywait won't detect files inside folders if the folder was moved rather than copied
if os.path.isdir(filepath):
print(os.listdir(filepath))
for filename in os.listdir(filepath):
f = os.path.join(filepath, filename)
main(f)
return
nbp = NewBookProcessor(filepath)
if not nbp.is_epub: # Books require conversion
print(f"\n[ingest-processor]: Starting conversion process for {nbp.filename}...")
can_convert, import_format = nbp.can_convert_check()
print(f"[ingest-processor]: Converting file from {import_format} to epub format...\n")
if can_convert:
result, epub_filepath = nbp.convert_book(import_format)
if result:
nbp.add_book_to_library(epub_filepath)
nbp.empty_tmp_con_dir()
else:
print(f"[ingest-processor]: Cannot convert {nbp.filepath}. {import_format} is currently unsupported.")
else: # Books need imported
print(f"\n[ingest-processor]: No conversion needed for {nbp.filename}, importing now...")
npb.add_book_to_library(filepath)
nbp.delete_current_file()
del nbp # New in Version 2.0.0, should drastically reduce memory usage with large ingests
if __name__ == "__main__":
main()