252 lines
8.8 KiB
Python
252 lines
8.8 KiB
Python
# coding=utf-8
|
|
|
|
import traceback
|
|
|
|
import pysubs2
|
|
import logging
|
|
import time
|
|
|
|
from mods import EMPTY_TAG_PROCESSOR, EmptyEntryError
|
|
from registry import registry
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SubtitleModifications(object):
|
|
debug = False
|
|
language = None
|
|
initialized_mods = {}
|
|
|
|
font_style_tag_start = u"{\\"
|
|
|
|
def __init__(self, debug=False):
|
|
self.debug = debug
|
|
self.initialized_mods = {}
|
|
|
|
def load(self, fn=None, content=None, language=None, encoding="utf-8"):
|
|
"""
|
|
|
|
:param encoding: used for decoding the content when fn is given, not used in case content is given
|
|
:param language: babelfish.Language language of the subtitle
|
|
:param fn: filename
|
|
:param content: unicode
|
|
:return:
|
|
"""
|
|
self.language = language
|
|
self.initialized_mods = {}
|
|
try:
|
|
if fn:
|
|
self.f = pysubs2.load(fn, encoding=encoding)
|
|
elif content:
|
|
self.f = pysubs2.SSAFile.from_string(content)
|
|
except (IOError,
|
|
UnicodeDecodeError,
|
|
pysubs2.exceptions.UnknownFPSError,
|
|
pysubs2.exceptions.UnknownFormatIdentifierError,
|
|
pysubs2.exceptions.FormatAutodetectionError):
|
|
if fn:
|
|
logger.exception("Couldn't load subtitle: %s: %s", fn, traceback.format_exc())
|
|
elif content:
|
|
logger.exception("Couldn't load subtitle: %s", traceback.format_exc())
|
|
|
|
@classmethod
|
|
def parse_identifier(cls, identifier):
|
|
# simple identifier
|
|
if identifier in registry.mods:
|
|
return identifier, {}
|
|
|
|
# identifier with params; identifier(param=value)
|
|
split_args = identifier[identifier.find("(")+1:-1].split(",")
|
|
args = dict((key, value) for key, value in [sub.split("=") for sub in split_args])
|
|
return identifier[:identifier.find("(")], args
|
|
|
|
@classmethod
|
|
def get_mod_class(cls, identifier):
|
|
identifier, args = cls.parse_identifier(identifier)
|
|
return registry.mods[identifier]
|
|
|
|
@classmethod
|
|
def get_mod_signature(cls, identifier, **kwargs):
|
|
return cls.get_mod_class(identifier).get_signature(**kwargs)
|
|
|
|
def prepare_mods(self, *mods):
|
|
parsed_mods = [SubtitleModifications.parse_identifier(mod) for mod in mods]
|
|
final_mods = {}
|
|
line_mods = []
|
|
non_line_mods = []
|
|
|
|
for identifier, args in parsed_mods:
|
|
if identifier not in registry.mods:
|
|
logger.error("Mod %s not loaded", identifier)
|
|
continue
|
|
|
|
mod_cls = registry.mods[identifier]
|
|
# exclusive mod, kill old, use newest
|
|
if identifier in final_mods and mod_cls.exclusive:
|
|
final_mods.pop(identifier)
|
|
|
|
# merge args of duplicate mods if possible
|
|
elif identifier in final_mods and mod_cls.args_mergeable:
|
|
final_mods[identifier] = mod_cls.merge_args(final_mods[identifier], args)
|
|
continue
|
|
final_mods[identifier] = args
|
|
|
|
# separate all mods into line and non-line mods
|
|
for identifier, args in final_mods.iteritems():
|
|
mod_cls = registry.mods[identifier]
|
|
if mod_cls.modifies_whole_file:
|
|
non_line_mods.append((identifier, args))
|
|
else:
|
|
line_mods.append((mod_cls.order, identifier, args))
|
|
|
|
# initialize the mods
|
|
if identifier not in self.initialized_mods:
|
|
self.initialized_mods[identifier] = mod_cls(self)
|
|
|
|
return line_mods, non_line_mods
|
|
|
|
def modify(self, *mods):
|
|
new_entries = []
|
|
start = time.time()
|
|
line_mods, non_line_mods = self.prepare_mods(*mods)
|
|
|
|
# apply file mods
|
|
if non_line_mods:
|
|
non_line_mods_start = time.time()
|
|
self.apply_non_line_mods(non_line_mods)
|
|
|
|
if self.debug:
|
|
logger.debug("Non-Line mods took %ss", time.time() - non_line_mods_start)
|
|
|
|
# sort line mods
|
|
line_mods.sort(key=lambda x: (x is None, x))
|
|
|
|
# apply line mods
|
|
if line_mods:
|
|
line_mods_start = time.time()
|
|
self.apply_line_mods(new_entries, line_mods)
|
|
|
|
if self.debug:
|
|
logger.debug("Line mods took %ss", time.time() - line_mods_start)
|
|
|
|
self.f.events = new_entries
|
|
if self.debug:
|
|
logger.debug("Subtitle Modification took %ss", time.time() - start)
|
|
|
|
def apply_non_line_mods(self, mods):
|
|
for identifier, args in mods:
|
|
mod = self.initialized_mods[identifier]
|
|
mod.modify(None, debug=self.debug, parent=self, **args)
|
|
|
|
def apply_line_mods(self, new_entries, mods):
|
|
for entry in self.f:
|
|
applied_mods = []
|
|
lines = []
|
|
|
|
line_count = 0
|
|
start_tags = []
|
|
end_tags = []
|
|
|
|
skip_entry = False
|
|
for line in entry.text.split(ur"\N"):
|
|
# don't bother the mods with surrounding tags
|
|
old_line = line
|
|
line = line.strip()
|
|
skip_line = False
|
|
line_count += 1
|
|
|
|
# clean {\X0} tags before processing
|
|
# fixme: handle nested tags?
|
|
start_tag = u""
|
|
end_tag = u""
|
|
if line.startswith(self.font_style_tag_start):
|
|
start_tag = line[:5]
|
|
line = line[5:]
|
|
if line[-5:-3] == self.font_style_tag_start:
|
|
end_tag = line[-5:]
|
|
line = line[:-5]
|
|
|
|
for order, identifier, args in mods:
|
|
mod = self.initialized_mods[identifier]
|
|
|
|
try:
|
|
line = mod.modify(line.strip(), entry=entry.text, debug=self.debug, parent=self, **args)
|
|
except EmptyEntryError:
|
|
if self.debug:
|
|
logger.debug(u"%s: %r -> ''", identifier, entry.text)
|
|
skip_entry = True
|
|
break
|
|
|
|
if not line:
|
|
if self.debug:
|
|
logger.debug(u"%s: %r -> ''", identifier, old_line)
|
|
skip_line = True
|
|
break
|
|
|
|
applied_mods.append(identifier)
|
|
|
|
if skip_entry:
|
|
lines = []
|
|
break
|
|
|
|
if skip_line:
|
|
continue
|
|
|
|
if start_tag:
|
|
start_tags.append(start_tag)
|
|
|
|
if end_tag:
|
|
end_tags.append(end_tag)
|
|
|
|
# append new line and clean possibly newly added empty tags
|
|
cleaned_line = EMPTY_TAG_PROCESSOR.process(start_tag + line + end_tag, debug=self.debug).strip()
|
|
if cleaned_line:
|
|
# we may have a single closing tag, if so, try appending it to the previous line
|
|
if len(cleaned_line) == 5 and cleaned_line.startswith("{\\") and cleaned_line.endswith("0}"):
|
|
if lines:
|
|
prev_line = lines.pop()
|
|
lines.append(prev_line + cleaned_line)
|
|
continue
|
|
|
|
lines.append(cleaned_line)
|
|
else:
|
|
if self.debug:
|
|
logger.debug(u"Ditching now empty line (%r)", line)
|
|
|
|
if not lines:
|
|
# don't bother logging when the entry only had one line
|
|
if self.debug and line_count > 1:
|
|
logger.debug(u"%r -> ''", entry.text)
|
|
continue
|
|
|
|
new_text = ur"\N".join(lines)
|
|
|
|
# cheap man's approach to avoid open tags
|
|
add_start_tags = []
|
|
add_end_tags = []
|
|
if len(start_tags) != len(end_tags):
|
|
for tag in start_tags:
|
|
end_tag = tag.replace("1", "0")
|
|
if end_tag not in end_tags and new_text.count(tag) > new_text.count(end_tag):
|
|
add_end_tags.append(end_tag)
|
|
for tag in end_tags:
|
|
start_tag = tag.replace("0", "1")
|
|
if start_tag not in start_tags and new_text.count(tag) > new_text.count(start_tag):
|
|
add_start_tags.append(start_tag)
|
|
|
|
if add_end_tags or add_start_tags:
|
|
entry.text = u"".join(add_start_tags) + new_text + u"".join(add_end_tags)
|
|
if self.debug:
|
|
logger.debug(u"Fixing tags: %s (%r -> %r)", str(add_start_tags+add_end_tags), new_text,
|
|
entry.text)
|
|
else:
|
|
entry.text = new_text
|
|
|
|
new_entries.append(entry)
|
|
|
|
SubMod = SubtitleModifications
|
|
|
|
|
|
|
|
|