submod: common: remove redundant interpunction
This commit is contained in:
@@ -20,7 +20,7 @@ debug = "--debug" in sys.argv
|
||||
if debug:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
sub = Subtitle(Language.fromietf("pol"), mods=["OCR_fixes", "common", "remove_tags", "OCR_fixes", "shift_offset(s=-5, ms=-350)"])
|
||||
sub = Subtitle(Language.fromietf("eng"), mods=["OCR_fixes", "common", "remove_tags", "OCR_fixes", "shift_offset(s=-5, ms=-350)"])
|
||||
sub.content = open(fn).read()
|
||||
sub.normalize()
|
||||
content = sub.get_modified_content(debug=True)
|
||||
@@ -35,10 +35,10 @@ content = sub.get_modified_content(debug=True)
|
||||
#content = fix_text(Subtitle.pysubs2_to_unicode(submod.f, format=format), **ftfy_defaults)\
|
||||
# .encode(encoding="utf-8")
|
||||
#print submod.f.to_string("srt", encoding="utf-8")
|
||||
#print repr(srt)
|
||||
f = codecs.open("testout.srt", "w+")
|
||||
f.write(content)
|
||||
f.close()
|
||||
#print repr(content)
|
||||
#f = codecs.open("testout.srt", "w+")
|
||||
#f.write(content)
|
||||
#f.close()
|
||||
#print submod.f.to_string("srt")
|
||||
#submod.modify("OCR_fixes")
|
||||
#submod.modify("change_FPS(from=24,to=25)")
|
||||
|
||||
@@ -71,6 +71,11 @@ class CommonFixes(SubtitleTextModification):
|
||||
NReProcessor(re.compile(ur'(?u)((?:[^.\s])+\.\s+)([a-zà-ž])'),
|
||||
lambda match: ur'%s%s' % (match.group(1), match.group(2).upper()), name="CM_uppercase_after_dot"),
|
||||
|
||||
# remove double interpunction
|
||||
NReProcessor(re.compile(ur'(?u)(\s*[,!?])\s*([,.!?][,.!?\s]*)'),
|
||||
lambda match: match.group(1).strip() + (" " if match.group(2).endswith(" ") else ""),
|
||||
name="CM_double_interpunct"),
|
||||
|
||||
# remove spaces before punctuation
|
||||
NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=\w)) +([!?.,](?![!?.,]))'), r"\1", name="CM_punctuation_space"),
|
||||
]
|
||||
|
||||
@@ -22,7 +22,7 @@ Mah numbar is wrong: 1 91 7
|
||||
"less text before colons: Earth. Utah, North America."
|
||||
MUSIC PLAYS What is that sound?!
|
||||
ls it , and a punctuation issue ? lol
|
||||
take them balls it. L like turtles
|
||||
take them balls it. L like turtles !! ! this, . is bad .
|
||||
|
||||
6
|
||||
00:00:19,686 --> 00:00:21,103
|
||||
|
||||
Reference in New Issue
Block a user