incremental tmp

2021-03-09 03:09:52 +01:00
2 changed files with 101 additions and 2 deletions
@@ -21,7 +21,7 @@ if debug:
    logging.basicConfig(level=logging.DEBUG)

 #sub = Subtitle(Language.fromietf("eng:forced"), mods=["common", "remove_HI", "OCR_fixes", "fix_uppercase", "shift_offset(ms=-500)", "shift_offset(ms=500)", "shift_offset(s=2,ms=800)"])
-sub = Subtitle(Language.fromietf("eng"), mods=["common", "remove_HI", "OCR_fixes", "fix_uppercase", "shift_offset(ms=0,s=1)"])
+sub = Subtitle(Language.fromietf("eng"), mods=["common", "remove_HI", "OCR_fixes", "fix_uppercase", "shift_offset(ms=0,s=1)", "fix_incremental", "fix_short"])
 sub.content = open(fn).read()
 sub.normalize()
 sub.is_valid()
@@ -1,6 +1,8 @@
 # coding=utf-8

 import re
+import logging
+from collections import OrderedDict

 from subzero.language import Language
 from subzero.modification.mods import SubtitleTextModification, empty_line_post_processors, SubtitleModification
@@ -9,7 +11,7 @@ from subzero.modification.processors.re_processor import NReProcessor
 from subzero.modification import registry
 from tld import get_tld

-
+logger = logging.getLogger(__name__)
 ENGLISH = Language("eng")


@@ -181,7 +183,104 @@ class FixUppercase(SubtitleModification):
            entry.plaintext = self.capitalize(entry.plaintext)


+class FixIncremental(SubtitleModification):
+    identifier = "fix_incremental"
+    description = "Fixes inremental-repeating subtitles"
+    modifies_whole_file = True
+    exclusive = True
+
+    long_description = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+
+    def modify(self, content, debug=False, parent=None, **kwargs):
+        prev_entry = None
+        for entry in parent.f:
+            subs = []
+            for sub in entry.text.split("\N"):
+                if prev_entry and prev_entry.text and prev_entry.text.lower().endswith(sub.lower()):
+                    if debug:
+                        logger.debug(u"Skipping incremental/dup: %s" % sub)
+                    continue
+                subs.append(sub)
+
+            if subs:
+                entry.text = "\N".join(subs)
+
+            prev_entry = entry
+
+
+
+class FixShort(SubtitleModification):
+    identifier = "fix_short"
+    description = "ASDasdasdasdasdd"
+    modifies_whole_file = True
+    exclusive = True
+
+    long_description = "adsadsdasdsadsa"
+
+    def modify(self, content, debug=False, parent=None, **kwargs):
+        prev_entry = None
+        prev_entry_dur = None
+        max_duration = 500
+        max_line_len = 200
+        max_lines = 3
+        entries = []
+        last_lines = []
+        for index, entry in enumerate(parent.f):
+            current_new_lines = []
+            if not last_lines and parent.f[index-1]:
+                print "YOO"
+                # find last lines
+                last_lines = parent.f[index-1].text.split("\N")
+            has_space = len(last_lines) < max_lines
+            last_line = ""
+            # go through each line and pack them
+            for line in entry.text.split("\N"):
+                new_line = ""
+                if line:
+                    if last_line != line and last_line and len(last_line + line) <= max_line_len:
+                        # new line plus line fits
+                        if re.match(".+\W$", line):
+                            if last_line.endswith(" "):
+                                new_line = last_line + line
+                            else:
+                                new_line = last_line + " " + line
+                            logger.debug("MERGING '%s' with '%s' to '%s'", last_line, line, new_line)
+                    else:
+                        new_line = line
+                last_line = new_line
+                current_new_lines.append(new_line)
+
+            # merge entries
+            if prev_entry:
+                #print prev_entry.duration, max_duration, len(new_lines), max_lines
+                if prev_entry.duration < max_duration and len(current_new_lines) < max_lines:
+                    #len(prev_entry.text) < max_len
+                    print "HIT", prev_entry.text, " + ", entry.text
+                    entry_text = prev_entry.text + "\N" + "\N".join(current_new_lines)
+
+                else:
+                    entry_text = "\N".join(current_new_lines)
+            else:
+                entry_text = "\N".join(current_new_lines)
+
+            #prev_entry = entry.copy()
+            if not prev_entry:
+                prev_entry = entry.copy()
+                continue
+            new_entry = prev_entry.copy()
+            new_entry.text = entry_text
+            prev_entry = new_entry.copy()
+            entries.append(new_entry)
+            #new_entries.append(entry.copy())
+
+        parent.f.entries = entries
+
+
+
+
 registry.register(CommonFixes)
 registry.register(RemoveTags)
 registry.register(ReverseRTL)
 registry.register(FixUppercase)
+registry.register(FixIncremental)
+registry.register(FixShort)