submod retry; WIP

2019-05-19 06:03:55 +02:00
5 changed files with 130 additions and 104 deletions
@@ -6,7 +6,7 @@ import pysubs2
 import logging
 import time

-from mods import EMPTY_TAG_PROCESSOR, EmptyEntryError
+from mods import EMPTY_TAG_PROCESSOR, EmptyEntryError, FullContentRep
 from registry import registry
 from subzero.language import Language

@@ -257,7 +257,16 @@ class SubtitleModifications(object):
                mod.modify(None, debug=self.debug, parent=self, **args)

    def apply_line_mods(self, new_entries, mods):
-        for index, entry in enumerate(self.f, 1):
+        index = 1
+        entries = self.f[:]
+        entry_count = len(entries)
+
+        while 1:
+            if index > entry_count - 1:
+                break
+
+            entry = entries[index]
+
            applied_mods = []
            lines = []

@@ -265,116 +274,110 @@ class SubtitleModifications(object):
            start_tags = []
            end_tags = []

-            t = entry.text.strip()
-            if not t:
+            text = entry.text.replace(ur"\N", "\n").strip()
+            if not text:
                if self.debug:
                    logger.debug(u"Skipping empty line: %s", index)
+                index += 1
                continue

-            skip_entry = False
-            for line in t.split(ur"\N"):
-                # don't bother the mods with surrounding tags
-                old_line = line
-                line = line.strip()
-                skip_line = False
-                line_count += 1
+            try:
+                for line in text.split("\n"):
+                    # don't bother the mods with surrounding tags
+                    old_line = line
+                    line = line.strip()
+                    skip_line = False
+                    line_count += 1

-                if not line:
-                    continue
+                    if not line:
+                        continue

-                # clean {\X0} tags before processing
-                # fixme: handle nested tags?
-                start_tag = u""
-                end_tag = u""
-                if line.startswith(self.font_style_tag_start):
-                    start_tag = line[:5]
-                    line = line[5:]
-                if line[-5:-3] == self.font_style_tag_start:
-                    end_tag = line[-5:]
-                    line = line[:-5]
+                    # clean {\X0} tags before processing
+                    # fixme: handle nested tags?
+                    start_tag = u""
+                    end_tag = u""
+                    if line.startswith(self.font_style_tag_start):
+                        start_tag = line[:5]
+                        line = line[5:]
+                    if line[-5:-3] == self.font_style_tag_start:
+                        end_tag = line[-5:]
+                        line = line[:-5]

-                last_procs_mods = []
+                    last_procs_mods = []

-                # fixme: this double loop is ugly
-                for order, identifier, args in mods:
-                    mod = self.initialized_mods[identifier]
+                    # fixme: this double loop is ugly
+                    for order, identifier, args in mods:
+                        mod = self.initialized_mods[identifier]

-                    try:
-                        line = mod.modify(line.strip(), entry=entry.text, debug=self.debug, parent=self, index=index,
+                        line = mod.modify(line.strip(), entry=text, debug=self.debug, parent=self, index=index,
                                          **args)
-                    except EmptyEntryError:
-                        if self.debug:
-                            logger.debug(u"%d: %s: %r -> ''", index, identifier, entry.text)
-                        skip_entry = True
-                        break

-                    if not line:
-                        if self.debug:
-                            logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
-                        skip_line = True
-                        break
+                        if not line:
+                            if self.debug:
+                                logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
+                            skip_line = True
+                            break

-                    applied_mods.append(identifier)
-                    if mod.last_processors:
-                        last_procs_mods.append([identifier, args])
+                        applied_mods.append(identifier)
+                        if mod.last_processors:
+                            last_procs_mods.append([identifier, args])

-                if skip_entry:
-                    lines = []
-                    break
+                    if skip_line:
+                        continue

-                if skip_line:
-                    continue
+                    for identifier, args in last_procs_mods:
+                        mod = self.initialized_mods[identifier]

-                for identifier, args in last_procs_mods:
-                    mod = self.initialized_mods[identifier]
-
-                    try:
-                        line = mod.modify(line.strip(), entry=entry.text, debug=self.debug, parent=self, index=index,
+                        line = mod.modify(line.strip(), entry=text, debug=self.debug, parent=self, index=index,
                                          procs=["last_process"], **args)
-                    except EmptyEntryError:
+
+                        if not line:
+                            if self.debug:
+                                logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
+                            skip_line = True
+                            break
+
+                    if skip_line:
+                        continue
+
+                    if start_tag:
+                        start_tags.append(start_tag)
+
+                    if end_tag:
+                        end_tags.append(end_tag)
+
+                    # append new line and clean possibly newly added empty tags
+                    cleaned_line = EMPTY_TAG_PROCESSOR.process(start_tag + line + end_tag, debug=self.debug).strip()
+                    if cleaned_line:
+                        # we may have a single closing tag, if so, try appending it to the previous line
+                        if len(cleaned_line) == 5 and cleaned_line.startswith("{\\") and cleaned_line.endswith("0}"):
+                            if lines:
+                                prev_line = lines.pop()
+                                lines.append(prev_line + cleaned_line)
+                                continue
+
+                        lines.append(cleaned_line)
+                    else:
                        if self.debug:
-                            logger.debug(u"%d: %s: %r -> ''", index, identifier, entry.text)
-                        skip_entry = True
-                        break
+                            logger.debug(u"%d: Ditching now empty line (%r)", index, line)

-                    if not line:
-                        if self.debug:
-                            logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
-                        skip_line = True
-                        break
-
-                if skip_entry:
-                    lines = []
-                    break
-
-                if skip_line:
+                if not lines:
+                    # don't bother logging when the entry only had one line
+                    if self.debug and line_count > 1:
+                        logger.debug(u"%d: %r -> ''", index, text)
+                    index += 1
                    continue
+            except EmptyEntryError, e:
+                if self.debug:
+                    logger.debug(u"%d: %s: %r -> ''", index, e.mod.identifier, e.entry)
+                index += 1
+                continue

-                if start_tag:
-                    start_tags.append(start_tag)
-
-                if end_tag:
-                    end_tags.append(end_tag)
-
-                # append new line and clean possibly newly added empty tags
-                cleaned_line = EMPTY_TAG_PROCESSOR.process(start_tag + line + end_tag, debug=self.debug).strip()
-                if cleaned_line:
-                    # we may have a single closing tag, if so, try appending it to the previous line
-                    if len(cleaned_line) == 5 and cleaned_line.startswith("{\\") and cleaned_line.endswith("0}"):
-                        if lines:
-                            prev_line = lines.pop()
-                            lines.append(prev_line + cleaned_line)
-                            continue
-
-                    lines.append(cleaned_line)
-                else:
-                    if self.debug:
-                        logger.debug(u"%d: Ditching now empty line (%r)", index, line)
-
-            if not lines:
-                # don't bother logging when the entry only had one line
-                if self.debug and line_count > 1:
-                    logger.debug(u"%d: %r -> ''", index, entry.text)
+            except FullContentRep, e:
+                if self.debug:
+                    logger.debug(u"%d: %s: %r -> %r", index, e.mod.identifier, text, e.new_content)
+                new_entries.append(e.new_content.replace("\n", ur"\N"))
+                index += 1
                continue

            new_text = ur"\N".join(lines)
@@ -403,6 +406,8 @@ class SubtitleModifications(object):
                entry.text = new_text

            new_entries.append(entry)
+            index += 1
+

 SubMod = SubtitleModifications

@@ -47,7 +47,7 @@ class SubtitleModification(object):
                continue

            old_content = new_content
-            new_content = processor.process(new_content, debug=debug, **kwargs)
+            new_content = processor.process(new_content, debug=debug, mod=self, **kwargs)
            if not new_content:
                if debug:
                    logger.debug("Processor returned empty line: %s", processor.name)
@@ -107,9 +107,22 @@ empty_line_post_processors = [
 ]


-class EmptyEntryError(Exception):
+class ModEvent(Exception):
+    def __init__(self, *args, **kwargs):
+        self.mod = kwargs.pop("mod", None)
+        self.entry = kwargs.pop("entry", None)
+        super(ModEvent, self).__init__(*args, **kwargs)
+
+
+class EmptyEntryError(ModEvent):
    pass


-class EmptyLineError(Exception):
+class EmptyLineError(ModEvent):
    pass
+
+
+class FullContentRep(ModEvent):
+    def __init__(self, *args, **kwargs):
+        self.new_content = kwargs.pop("new_content", None)
+        super(FullContentRep, self).__init__(*args, **kwargs)
@@ -1,7 +1,8 @@
 # coding=utf-8
 import re

-from subzero.modification.mods import SubtitleTextModification, empty_line_post_processors, EmptyEntryError, TAG
+from subzero.modification.mods import SubtitleTextModification, empty_line_post_processors, EmptyEntryError, TAG, \
+    FullContentRep
 from subzero.modification.processors.re_processor import NReProcessor
 from subzero.modification import registry

@@ -10,9 +11,11 @@ class FullBracketEntryProcessor(NReProcessor):
    def process(self, content, debug=False, **kwargs):
        entry = kwargs.get("entry")
        if entry:
-            rep_content = super(FullBracketEntryProcessor, self).process(entry, debug=debug, **kwargs)
-            if not rep_content.strip():
-                raise EmptyEntryError()
+            rep_content = super(FullBracketEntryProcessor, self).process(entry, debug=debug, **kwargs).strip()
+            if not rep_content:
+                raise EmptyEntryError(mod=self.mod, entry=entry)
+            if content != rep_content:
+                raise FullContentRep(new_content=rep_content, mod=self.mod, entry=entry)
        return content


@@ -49,8 +52,8 @@ class HearingImpaired(SubtitleTextModification):
        NReProcessor(re.compile(ur'(?sux)-?%(t)s[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' %
                                {"t": TAG}), "", name="HI_brackets"),

-        #NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
-        #             "", name="HI_bracket_open_start"),
+        FullBracketEntryProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
+                     "", name="HI_bracket_open_start"),

        #NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
        #             name="HI_bracket_open_end"),
@@ -7,12 +7,14 @@ class Processor(object):
    """
    name = None
    parent = None
+    mod = None
    supported = None
    enabled = True

-    def __init__(self, name=None, parent=None, supported=None):
+    def __init__(self, name=None, parent=None, mod=None, supported=None):
        self.name = name
        self.parent = parent
+        self.mod = mod
        self.supported = supported if supported else lambda parent: True

    @property
@@ -20,6 +22,8 @@ class Processor(object):
        return self.name

    def process(self, content, debug=False, **kwargs):
+        if not self.mod:
+            self.mod = kwargs.get("mod", None)
        return content

    def __repr__(self):
@@ -14,12 +14,13 @@ class ReProcessor(Processor):
    pattern = None
    replace_with = None

-    def __init__(self, pattern, replace_with, name=None, supported=None):
-        super(ReProcessor, self).__init__(name=name, supported=supported)
+    def __init__(self, pattern, replace_with, name=None, supported=None, **kwargs):
+        super(ReProcessor, self).__init__(name=name, supported=supported, **kwargs)
        self.pattern = pattern
        self.replace_with = replace_with

    def process(self, content, debug=False, **kwargs):
+        super(ReProcessor, self).process(content, debug=debug, **kwargs)
        return self.pattern.sub(self.replace_with, content)