Integrated cwa.db more tightly into the processing scripts

This commit is contained in:
crocodilestick
2024-09-25 14:31:32 +00:00
parent ac8c28297f
commit 48b039bb3e
4 changed files with 138 additions and 64 deletions
+2 -1
View File
@@ -1,4 +1,5 @@
{
"ingest_folder":"/cwa-book-ingest",
"calibre_library_dir":"/calibre-library"
"calibre_library_dir":"/calibre-library",
"tmp_conversion_dir":"/config/.cwa_conversion_tmp"
}
+124 -50
View File
@@ -1,25 +1,38 @@
import argparse
import glob
# import argparse
import json
import logging
import os
import re
import sys
import shutil
from pathlib import Path
import subprocess
from cwa_db import CWA_DB
logger = logging.getLogger(__name__)
logging.basicConfig(filename='/config/calibre-web.log', level=logging.INFO)
# Make sure required directories are present
required_directories = [
"/config/.cwa_conversion_tmp",
"/config/processed_books",
"/config/processed_books/imported",
"/config/processed_books/failed",
"/config/processed_books/converted"
]
for directory in required_directories:
Path(directory).mkdir(exist_ok=True)
os.system(f"chown -R abc:abc {directory}")
class LibraryConverter:
def __init__(self, args) -> None:
self.args = args
def __init__(self) -> None: #args
# self.args = args
self.supported_book_formats = ['azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txt', 'txtz']
self.hierarchy_of_success = ['lit', 'mobi', 'azw', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz', 'txt']
self.dirs = self.get_dirs() # Dirs are assigned by user during setup
self.ingest_folder = f"{self.dirs['ingest_folder']}/" # Dir where new files are looked for to process and subsequently deleted
self.library = f"{self.dirs['calibre_library_dir']}/"
self.ingest_folder, self.library_dir, tmp_conversion_dir = self.get_dirs('/app/calibre-web-automated/dirs.json')
self.epubs, self.to_convert = self.get_library_books()
self.current_book = 1
@@ -27,7 +40,7 @@ class LibraryConverter:
self.cwa_settings = self.db.cwa_settings
def get_library_books(self):
library_files = [os.path.join(dirpath,f) for (dirpath, dirnames, filenames) in os.walk(self.library) for f in filenames]
library_files = [os.path.join(dirpath,f) for (dirpath, dirnames, filenames) in os.walk(self.library_dir) for f in filenames]
epub_files = [f for f in library_files if f.endswith('.epub')]
dupe_list = []
to_convert = []
@@ -42,61 +55,122 @@ class LibraryConverter:
return epub_files, to_convert
def get_dirs(self) -> dict[str, str]:
def get_dirs(self, dirs_json_path: str) -> tuple[str, str, str]:
dirs = {}
with open('/app/calibre-web-automated/dirs.json', 'r') as f:
with open(dirs_json_path, 'r') as f:
dirs: dict[str, str] = json.load(f)
return dirs
ingest_folder = f"{dirs['ingest_folder']}/"
library_dir = f"{dirs['calibre_library_dir']}/"
tmp_conversion_dir = f"{dirs['tmp_conversion_dir']}/"
return ingest_folder, library_dir, tmp_conversion_dir
def convert_library(self):
for file in self.to_convert:
logging.basicConfig(filename='/config/calibre-web.log', level=logging.INFO)
print(f"[convert-library]: ({self.current_book}/{len(self.to_convert)}) Converting {os.path.basename(file)}...")
logging.info(f"[convert-library]: ({self.current_book}/{len(self.to_convert)}) Converting {os.path.basename(file)}...")
filename, file_extension = os.path.splitext(file)
filename = filename.split('/')[-1]
book_id = (re.search(r'\(\d*\)', file).group(0))[1:-1]
os.system(f"cp '{file}' '/config/processed_books/{filename}{file_extension}'")
os.system(f"calibredb remove {book_id} --permanent --with-library '{self.library}'")
os.system(f"ebook-convert '/config/processed_books/{filename}{file_extension}' '{self.import_folder}{filename}.epub'") # >>/config/calibre-web.log 2>&1
os.system(f"chown -R abc:abc '{self.library}'")
logging.info(f"[convert-library]: Conversion of {os.path.basename(file)} complete!")
self.current_book += 1
if not self.args.keep:
os.remove(f"/config/processed_books/{filename}{file_extension}")
print_and_log(f"[convert-library]: ({self.current_book}/{len(self.to_convert)}) Converting {os.path.basename(file)}...")
def empty_import_folder(self):
os.system(f"chown -R abc:abc '{self.import_folder}'")
files = glob.glob(f"{self.import_folder}*")
for f in files:
os.remove(f)
filename = os.path.basename(file)
file_extension = Path(file).suffix
try: # Get Calibre Library Book ID
book_id = (re.search(r'\(\d*\)', file).group(0))[1:-1]
except Exception as e:
print_and_log(f"[convert-library] A Calibre Library Book ID could not be determined for {file}. Make sure the structure of your calibre library matches the following example:\n")
print_and_log("Terry Goodkind/")
print_and_log("└── Wizard's First Rule (6120)")
print_and_log(" ├── cover.jpg")
print_and_log(" ├── metadata.opf")
print_and_log(" └── Wizard's First Rule - Terry Goodkind.epub")
shutil.copyfile(file, f"/config/processed_books/failed/{os.path.basename(file)}")
self.current_book += 1
continue
try: # Convert Book
target_filepath = f"{self.tmp_conversion_dir}{Path(file).stem}.epub"
subprocess.run(["ebook-convert", file, target_filepath], check=True)
if self.cwa_settings['auto_backup_conversions']:
shutil.copyfile(file, f"/config/processed_books/converted/{os.path.basename(file)}")
self.db.conversion_add_entry(os.path.basename(target_filepath),
Path(file).suffix,
str(self.cwa_settings["auto_backup_conversions"]))
print_and_log(f"[convert-library]: Conversion of {os.path.basename(file)} successful! Removing old version from library...")
except subprocess.CalledProcessError as e:
print_and_log(f"[convert-library]: Conversion of {os.path.basename(file)} was unsuccessful. See the following error:\n{e}")
shutil.copyfile(file, f"/config/processed_books/failed/{os.path.basename(file)}")
self.current_book += 1
continue
try: # Remove Book from Existing Library
subprocess.run(["calibredb", "remove", book_id, "--permanent", "--with-library", self.library_dir], check=True)
print_and_log(f"[convert-library]: Non-epub version of {Path(file).stem} (Book ID: {book_id}) was successfully removed from library.\nAdding converted version to library...")
except subprocess.CalledProcessError as e:
print_and_log(f"[convert-library]: Non-epub version of {Path(file).stem} couldn't be successfully removed from library. See the following error:\n{e}")
self.current_book += 1
continue
try: # Import converted book to library
subprocess.run(["calibredb", "add", target_filepath, f"--library-path={self.library_dir}"], check=True)
if self.cwa_settings['auto_backup_imports']:
shutil.copyfile(target_filepath, f"/config/processed_books/imported/{os.path.basename(target_filepath)}")
self.db.import_add_entry(os.path.basename(target_filepath),
str(self.cwa_settings["auto_backup_imports"]))
print_and_log(f"[convert-library]: Import of {os.path.basename(target_filepath)} successfully completed!")
except subprocess.CalledProcessError as e:
print_and_log(f"[convert-library]: Import of {os.path.basename(target_filepath)} was not successfully completed. See the following error:\n{e}")
self.current_book += 1
continue
self.current_book += 1
continue
def empty_tmp_con_dir(self):
try:
files = os.listdir(self.tmp_conversion_dir)
for file in files:
file_path = os.path.join(self.tmp_conversion_dir, file)
if os.path.isfile(file_path):
os.remove(file_path)
except OSError:
print(f"Error occurred while emptying {self.tmp_conversion_dir}.")
def main():
parser = argparse.ArgumentParser(
prog='convert-library',
description='Made for the purpose of converting ebooks in a calibre library not in epub format, to epub format'
)
# parser = argparse.ArgumentParser(
# prog='convert-library',
# description='Made for the purpose of converting ebooks in a calibre library not in epub format, to epub format'
# )
parser.add_argument('--replace', '-r', action='store_true', required=False, dest='replace', help='Replaces the old library with the new one', default=False)
parser.add_argument('--keep', '-k', action='store_true', required=False, dest='keep', help='Creates a new epub library with the old one but stores the old files in /config/processed_books', default=False)
args = parser.parse_args()
# parser.add_argument('--replace', '-r', action='store_true', required=False, dest='replace', help='Replaces the old library with the new one', default=False)
# parser.add_argument('--keep', '-k', action='store_true', required=False, dest='keep', help='Creates a new epub library with the old one but stores the old files in /config/processed_books', default=False)
# args = parser.parse_args()
if not args.replace and not args.keep:
print("[convert-library]: You must specify either the --replace/-r or --keep/-k flag")
sys.exit(0)
# if not args.replace and not args.keep:
# print("[convert-library]: You must specify either the --replace/-r or --keep/-k flag")
# sys.exit(0)
# else:
converter = LibraryConverter() # args
if len(converter.to_convert) > 0:
converter.convert_library()
else:
converter = LibraryConverter(args)
if len(converter.to_convert) > 0:
converter.convert_library()
else:
print("[convert-library] No non-epubs found in library. Exiting now...")
logging.info("[convert-library] No non-epubs found in library. Exiting now...")
sys.exit(0)
print(f"\n[convert-library] Library conversion complete! {len(converter.to_convert)} books converted! Exiting now...")
logging.info(f"[convert-library] Library conversion complete! {len(converter.to_convert)} books converted! Exiting now...")
print_and_log("[convert-library] No non-epubs found in library. Exiting now...")
sys.exit(0)
print_and_log(f"\n[convert-library] Library conversion complete! {len(converter.to_convert)} books converted! Exiting now...")
sys.exit(0)
def print_and_log(string) -> None:
logging.info(string)
print(string)
if __name__ == "__main__":
main()
+5 -2
View File
@@ -2,6 +2,7 @@ import sqlite3
import sys
from sqlite3 import Error as sqlError
import os
from datetime import datetime
from tabulate import tabulate
@@ -132,11 +133,13 @@ class CWA_DB:
# self.cur.execute("INSERT INTO cwa_enforcement(timestamp, book_id, book_title, author, epub_path, trigger_type) VALUES (?, ?, ?, ?, ?, ?);", (timestamp, book_id, book_title, author, epub_path, trigger_type))
# self.con.commit()
def import_add_entry(self, timestamp, filename, original_backed_up):
def import_add_entry(self, filename, original_backed_up):
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
self.cur.execute("INSERT INTO cwa_conversions(timestamp, filename, original_backed_up) VALUES (?, ?, ?);", (timestamp, filename, original_backed_up))
self.con.commit()
def conversion_add_entry(self, timestamp, filename, original_format, original_backed_up):
def conversion_add_entry(self, filename, original_format, original_backed_up):
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
self.cur.execute("INSERT INTO cwa_conversions(timestamp, filename, original_format, original_backed_up) VALUES (?, ?, ?, ?);", (timestamp, filename, original_format, original_backed_up))
self.con.commit()
+6 -10
View File
@@ -5,7 +5,6 @@ import subprocess
import sys
import tempfile
import time
from datetime import datetime
import shutil
from pathlib import Path
@@ -45,12 +44,10 @@ class NewBookProcessor:
def __init__(self, filepath: str):
self.db = CWA_DB()
self.cwa_settings = self.db.cwa_settings
input(self.cwa_settings)
self.supported_book_formats = ['azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txtz']
self.hierarchy_of_success = ['lit', 'mobi', 'azw', 'epub', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz']
self.ingest_folder, self.library_dir = self.get_dirs("/app/calibre-web-automated/dirs.json")
self.tmp_conversion_dir = "/config/.cwa_conversion_tmp/"
self.ingest_folder, self.library_dir, self.tmp_conversion_dir = self.get_dirs("/app/calibre-web-automated/dirs.json")
self.filepath = filepath # path of the book we're targeting
self.filename = os.path.basename(filepath)
@@ -62,10 +59,11 @@ class NewBookProcessor:
with open(dirs_json_path, 'r') as f:
dirs: dict[str, str] = json.load(f)
ingest_folder = f"{dirs['ingest_folder']}/" # Dir where new files are looked for to process and subsequently deleted
ingest_folder = f"{dirs['ingest_folder']}/"
library_dir = f"{dirs['calibre_library_dir']}/"
tmp_conversion_dir = f"{dirs['tmp_conversion_dir']}/"
return ingest_folder, library_dir
return ingest_folder, library_dir, tmp_conversion_dir
def convert_book(self, import_format: str) -> tuple[bool, str]:
@@ -83,8 +81,7 @@ class NewBookProcessor:
if self.cwa_settings['auto_backup_conversions']:
shutil.copyfile(self.filepath, f"/config/processed_books/converted/{os.path.basename(original_filepath)}")
self.db.conversion_add_entry(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
os.path.basename(target_filepath),
self.db.conversion_add_entry(original_filepath.stem,
import_format,
str(self.cwa_settings["auto_backup_conversions"]))
@@ -129,8 +126,7 @@ class NewBookProcessor:
if self.cwa_settings['auto_backup_imports']:
shutil.copyfile(book_path, f"/config/processed_books/imported/{import_filename}")
self.db.import_add_entry(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
os.path.basename(target_filepath),
self.db.import_add_entry(import_path.stem,
str(self.cwa_settings["auto_backup_imports"]))
except subprocess.CalledProcessError as e: