Compare commits

..

1 Commits

Author SHA1 Message Date
panni f8f99f0fb2 submod retry; WIP 2019-05-19 06:03:55 +02:00
13 changed files with 173 additions and 198 deletions
-7
View File
@@ -1,11 +1,4 @@
2.6.5.3062
Changelog
- core: cf: optimize
- core: http: don't query DNS with IPs. thanks @fgump (fixes sonarr/radarr)
2.6.5.3041
Changelog
+3 -3
View File
@@ -13,7 +13,7 @@
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
<string>2.6.5.3092</string>
<string>2.6.5.3074</string>
<key>PlexFrameworkVersion</key>
<string>2</string>
<key>PlexPluginClass</key>
@@ -23,7 +23,7 @@
<key>PlexPluginConsoleLogging</key>
<string>0</string>
<key>PlexPluginDevMode</key>
<string>0</string>
<string>1</string>
<key>PlexPluginCodePolicy</key>
<!-- this allows channels to access some python methods which are otherwise blocked, as well as import external code libraries, and interact with the PMS HTTP API -->
<string>Elevated</string>
@@ -32,7 +32,7 @@
&lt;h1&gt;Sub-Zero for Plex&lt;/h1&gt;&lt;i&gt;Subtitles done right&lt;/i&gt;
Version 2.6.5.3092
Version 2.6.5.3074 DEV
Originally based on @bramwalet's awesome &lt;a href=&quot;https://github.com/bramwalet/Subliminal.bundle&quot;&gt;Subliminal.bundle&lt;/a&gt;
@@ -309,8 +309,7 @@ class SZProviderPool(ProviderPool):
logger.error('Invalid subtitle')
return False
if not os.environ.get("SZ_KEEP_ENCODING", False):
subtitle.normalize()
subtitle.normalize()
return True
@@ -23,10 +23,9 @@ class ArgenteamSubtitle(Subtitle):
hearing_impaired_verifiable = False
_release_info = None
def __init__(self, language, page_link, download_link, movie_kind, title, season, episode, year, release, version, source,
def __init__(self, language, download_link, movie_kind, title, season, episode, year, release, version, source,
video_codec, tvdb_id, imdb_id, asked_for_episode=None, asked_for_release_group=None, *args, **kwargs):
super(ArgenteamSubtitle, self).__init__(language, page_link=page_link, *args, **kwargs)
self.page_link = page_link
super(ArgenteamSubtitle, self).__init__(language, download_link, *args, **kwargs)
self.download_link = download_link
self.movie_kind = movie_kind
self.title = title
@@ -136,8 +135,7 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
provider_name = 'argenteam'
languages = {Language.fromalpha2(l) for l in ['es']}
video_types = (Episode, Movie)
BASE_URL = "http://www.argenteam.net/"
API_URL = BASE_URL + "api/v1/"
API_URL = "http://argenteam.net/api/v1/"
subtitle_class = ArgenteamSubtitle
hearing_impaired_verifiable = False
language_list = list(languages)
@@ -242,13 +240,12 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
for r in content['releases']:
for s in r['subtitles']:
movie_kind = "episode" if is_episode else "movie"
page_link = self.BASE_URL + movie_kind + "/" + str(aid)
sub = ArgenteamSubtitle(language, page_link, s['uri'], movie_kind, returned_title,
sub = ArgenteamSubtitle(language, s['uri'], "episode" if is_episode else "movie", returned_title,
season, episode, year, r.get('team'), r.get('tags'),
r.get('source'), r.get('codec'), content.get("tvdb"), imdb_id,
asked_for_release_group=video.release_group,
asked_for_episode=episode)
asked_for_episode=episode
)
subtitles.append(sub)
if has_multiple_ids:
@@ -190,7 +190,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
return subtitles
def query(self, video):
#vfn = get_video_filename(video)
vfn = get_video_filename(video)
subtitles = []
#logger.debug(u"Searching for: %s", vfn)
# film = search(vfn, session=self.session)
@@ -218,18 +218,18 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
logger.debug('No alternative results found')
# packs
# if video.season_fully_aired:
# term = u"%s S%02i" % (series, video.season)
# logger.debug('Searching for packs: %s', term)
# time.sleep(self.search_throttle)
# film = search(term, session=self.session, throttle=self.search_throttle)
# if film and film.subtitles:
# logger.debug('Pack results found: %s', len(film.subtitles))
# subtitles += self.parse_results(video, film)
# else:
# logger.debug('No pack results found')
# else:
# logger.debug("Not searching for packs, because the season hasn't fully aired")
if video.season_fully_aired:
term = u"%s S%02i" % (series, video.season)
logger.debug('Searching for packs: %s', term)
time.sleep(self.search_throttle)
film = search(term, session=self.session, throttle=self.search_throttle)
if film and film.subtitles:
logger.debug('Pack results found: %s', len(film.subtitles))
subtitles += self.parse_results(video, film)
else:
logger.debug('No pack results found')
else:
logger.debug("Not searching for packs, because the season hasn't fully aired")
if more_than_one:
time.sleep(self.search_throttle)
else:
@@ -117,14 +117,14 @@ class Subtitle(Subtitle_):
logger.info('Guessing encoding for language %s', self.language)
encodings = ['utf-8']
encodings = ['utf-8', 'utf-16']
# add language-specific encodings
# http://scratchpad.wikia.com/wiki/Character_Encoding_Recommendation_for_Languages
if self.language.alpha3 == 'zho':
encodings.extend(['cp936', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp_2', 'cp950', 'gb18030', 'big5',
'big5hkscs', 'utf-16'])
'big5hkscs'])
elif self.language.alpha3 == 'jpn':
encodings.extend(['shift-jis', 'cp932', 'euc_jp', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', ])
@@ -133,7 +133,7 @@ class Subtitle(Subtitle_):
# arabian/farsi
elif self.language.alpha3 in ('ara', 'fas', 'per'):
encodings.extend(['windows-1256', 'utf-16'])
encodings.append('windows-1256')
elif self.language.alpha3 == 'heb':
encodings.extend(['windows-1255', 'iso-8859-8'])
elif self.language.alpha3 == 'tur':
@@ -251,7 +251,8 @@ class Subtitle(Subtitle_):
subs = pysubs2.SSAFile.from_string(text, fps=self.plex_media_fps)
unicontent = self.pysubs2_to_unicode(subs)
self.content = unicontent.encode(self._guessed_encoding)
self.content = unicontent.encode("utf-8")
self._guessed_encoding = "utf-8"
except:
logger.exception("Couldn't convert subtitle %s to .srt format: %s", self, traceback.format_exc())
return False
@@ -319,8 +320,7 @@ class Subtitle(Subtitle_):
:return: string
"""
if not self.mods:
return fix_text(self.content.decode(encoding=self._guessed_encoding), **ftfy_defaults).encode(
encoding=self._guessed_encoding)
return fix_text(self.content.decode("utf-8"), **ftfy_defaults).encode(encoding="utf-8")
submods = SubtitleModifications(debug=debug)
if submods.load(content=self.text, language=self.language):
@@ -329,7 +329,7 @@ class Subtitle(Subtitle_):
self.mods = submods.mods_used
content = fix_text(self.pysubs2_to_unicode(submods.f, format=format), **ftfy_defaults)\
.encode(encoding=self._guessed_encoding)
.encode(encoding="utf-8")
submods.f = None
del submods
return content
@@ -30,7 +30,6 @@ import enum
import sys
import requests
import time
import logging
is_PY2 = sys.version_info[0] < 3
if is_PY2:
@@ -40,13 +39,8 @@ else:
from contextlib import suppress
from urllib2.request import Request, urlopen
from dogpile.cache.api import NO_VALUE
from subliminal.cache import region
from bs4 import BeautifulSoup, NavigableString
logger = logging.getLogger(__name__)
# constants
HEADERS = {
}
@@ -56,13 +50,6 @@ DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWeb"\
"Kit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
ENDPOINT_RE = re.compile(ur'(?uis)<form action="/subtitles/(.+)">.*?<input type="text"')
class NewEndpoint(Exception):
pass
# utils
def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT):
url = re.sub("\s", "+", url)
@@ -71,17 +58,7 @@ def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT):
html = urlopen(r).read().decode("utf-8")
else:
ret = session.get(url)
try:
ret.raise_for_status()
except requests.HTTPError, e:
if e.response.status_code == 404:
m = ENDPOINT_RE.search(ret.text)
if m:
try:
raise NewEndpoint(m.group(1))
except:
pass
raise
ret.raise_for_status()
html = ret.text
return BeautifulSoup(html, "html.parser")
@@ -273,31 +250,20 @@ def get_first_film(soup, section, year=None, session=None):
def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact, throttle=0):
# note to subscene: if you actually start to randomize the endpoint, we'll have to query your server even more
endpoints = ["searching", "search", "srch", "find"]
if release:
endpoints = ["release"]
else:
endpoint = region.get("subscene_endpoint")
if endpoint is not NO_VALUE and endpoint not in endpoints:
endpoints.insert(0, endpoint)
soup = None
for endpoint in endpoints:
try:
soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, endpoint, term),
session=session)
except NewEndpoint, e:
new_endpoint = e.message
if new_endpoint not in endpoints:
new_endpoint = new_endpoint.strip()
logger.debug("Switching main endpoint to %s", new_endpoint)
region.set("subscene_endpoint", new_endpoint)
except requests.HTTPError, e:
if e.response.status_code == 404:
time.sleep(throttle)
return search(term, release=release, session=session, year=year, limit_to=limit_to, throttle=throttle)
else:
region.delete("subscene_endpoint")
raise Exception("New endpoint %s didn't work; exiting" % new_endpoint)
# fixme: detect endpoint from html
continue
raise
break
if soup:
@@ -6,7 +6,7 @@ import pysubs2
import logging
import time
from mods import EMPTY_TAG_PROCESSOR, EmptyEntryError
from mods import EMPTY_TAG_PROCESSOR, EmptyEntryError, FullContentRep
from registry import registry
from subzero.language import Language
@@ -257,7 +257,16 @@ class SubtitleModifications(object):
mod.modify(None, debug=self.debug, parent=self, **args)
def apply_line_mods(self, new_entries, mods):
for index, entry in enumerate(self.f, 1):
index = 1
entries = self.f[:]
entry_count = len(entries)
while 1:
if index > entry_count - 1:
break
entry = entries[index]
applied_mods = []
lines = []
@@ -265,116 +274,110 @@ class SubtitleModifications(object):
start_tags = []
end_tags = []
t = entry.text.strip()
if not t:
text = entry.text.replace(ur"\N", "\n").strip()
if not text:
if self.debug:
logger.debug(u"Skipping empty line: %s", index)
index += 1
continue
skip_entry = False
for line in t.split(ur"\N"):
# don't bother the mods with surrounding tags
old_line = line
line = line.strip()
skip_line = False
line_count += 1
try:
for line in text.split("\n"):
# don't bother the mods with surrounding tags
old_line = line
line = line.strip()
skip_line = False
line_count += 1
if not line:
continue
if not line:
continue
# clean {\X0} tags before processing
# fixme: handle nested tags?
start_tag = u""
end_tag = u""
if line.startswith(self.font_style_tag_start):
start_tag = line[:5]
line = line[5:]
if line[-5:-3] == self.font_style_tag_start:
end_tag = line[-5:]
line = line[:-5]
# clean {\X0} tags before processing
# fixme: handle nested tags?
start_tag = u""
end_tag = u""
if line.startswith(self.font_style_tag_start):
start_tag = line[:5]
line = line[5:]
if line[-5:-3] == self.font_style_tag_start:
end_tag = line[-5:]
line = line[:-5]
last_procs_mods = []
last_procs_mods = []
# fixme: this double loop is ugly
for order, identifier, args in mods:
mod = self.initialized_mods[identifier]
# fixme: this double loop is ugly
for order, identifier, args in mods:
mod = self.initialized_mods[identifier]
try:
line = mod.modify(line.strip(), entry=entry.text, debug=self.debug, parent=self, index=index,
line = mod.modify(line.strip(), entry=text, debug=self.debug, parent=self, index=index,
**args)
except EmptyEntryError:
if self.debug:
logger.debug(u"%d: %s: %r -> ''", index, identifier, entry.text)
skip_entry = True
break
if not line:
if self.debug:
logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
skip_line = True
break
if not line:
if self.debug:
logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
skip_line = True
break
applied_mods.append(identifier)
if mod.last_processors:
last_procs_mods.append([identifier, args])
applied_mods.append(identifier)
if mod.last_processors:
last_procs_mods.append([identifier, args])
if skip_entry:
lines = []
break
if skip_line:
continue
if skip_line:
continue
for identifier, args in last_procs_mods:
mod = self.initialized_mods[identifier]
for identifier, args in last_procs_mods:
mod = self.initialized_mods[identifier]
try:
line = mod.modify(line.strip(), entry=entry.text, debug=self.debug, parent=self, index=index,
line = mod.modify(line.strip(), entry=text, debug=self.debug, parent=self, index=index,
procs=["last_process"], **args)
except EmptyEntryError:
if not line:
if self.debug:
logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
skip_line = True
break
if skip_line:
continue
if start_tag:
start_tags.append(start_tag)
if end_tag:
end_tags.append(end_tag)
# append new line and clean possibly newly added empty tags
cleaned_line = EMPTY_TAG_PROCESSOR.process(start_tag + line + end_tag, debug=self.debug).strip()
if cleaned_line:
# we may have a single closing tag, if so, try appending it to the previous line
if len(cleaned_line) == 5 and cleaned_line.startswith("{\\") and cleaned_line.endswith("0}"):
if lines:
prev_line = lines.pop()
lines.append(prev_line + cleaned_line)
continue
lines.append(cleaned_line)
else:
if self.debug:
logger.debug(u"%d: %s: %r -> ''", index, identifier, entry.text)
skip_entry = True
break
logger.debug(u"%d: Ditching now empty line (%r)", index, line)
if not line:
if self.debug:
logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
skip_line = True
break
if skip_entry:
lines = []
break
if skip_line:
if not lines:
# don't bother logging when the entry only had one line
if self.debug and line_count > 1:
logger.debug(u"%d: %r -> ''", index, text)
index += 1
continue
except EmptyEntryError, e:
if self.debug:
logger.debug(u"%d: %s: %r -> ''", index, e.mod.identifier, e.entry)
index += 1
continue
if start_tag:
start_tags.append(start_tag)
if end_tag:
end_tags.append(end_tag)
# append new line and clean possibly newly added empty tags
cleaned_line = EMPTY_TAG_PROCESSOR.process(start_tag + line + end_tag, debug=self.debug).strip()
if cleaned_line:
# we may have a single closing tag, if so, try appending it to the previous line
if len(cleaned_line) == 5 and cleaned_line.startswith("{\\") and cleaned_line.endswith("0}"):
if lines:
prev_line = lines.pop()
lines.append(prev_line + cleaned_line)
continue
lines.append(cleaned_line)
else:
if self.debug:
logger.debug(u"%d: Ditching now empty line (%r)", index, line)
if not lines:
# don't bother logging when the entry only had one line
if self.debug and line_count > 1:
logger.debug(u"%d: %r -> ''", index, entry.text)
except FullContentRep, e:
if self.debug:
logger.debug(u"%d: %s: %r -> %r", index, e.mod.identifier, text, e.new_content)
new_entries.append(e.new_content.replace("\n", ur"\N"))
index += 1
continue
new_text = ur"\N".join(lines)
@@ -403,6 +406,8 @@ class SubtitleModifications(object):
entry.text = new_text
new_entries.append(entry)
index += 1
SubMod = SubtitleModifications
@@ -47,7 +47,7 @@ class SubtitleModification(object):
continue
old_content = new_content
new_content = processor.process(new_content, debug=debug, **kwargs)
new_content = processor.process(new_content, debug=debug, mod=self, **kwargs)
if not new_content:
if debug:
logger.debug("Processor returned empty line: %s", processor.name)
@@ -107,9 +107,22 @@ empty_line_post_processors = [
]
class EmptyEntryError(Exception):
class ModEvent(Exception):
def __init__(self, *args, **kwargs):
self.mod = kwargs.pop("mod", None)
self.entry = kwargs.pop("entry", None)
super(ModEvent, self).__init__(*args, **kwargs)
class EmptyEntryError(ModEvent):
pass
class EmptyLineError(Exception):
class EmptyLineError(ModEvent):
pass
class FullContentRep(ModEvent):
def __init__(self, *args, **kwargs):
self.new_content = kwargs.pop("new_content", None)
super(FullContentRep, self).__init__(*args, **kwargs)
@@ -1,7 +1,8 @@
# coding=utf-8
import re
from subzero.modification.mods import SubtitleTextModification, empty_line_post_processors, EmptyEntryError, TAG
from subzero.modification.mods import SubtitleTextModification, empty_line_post_processors, EmptyEntryError, TAG, \
FullContentRep
from subzero.modification.processors.re_processor import NReProcessor
from subzero.modification import registry
@@ -10,9 +11,11 @@ class FullBracketEntryProcessor(NReProcessor):
def process(self, content, debug=False, **kwargs):
entry = kwargs.get("entry")
if entry:
rep_content = super(FullBracketEntryProcessor, self).process(entry, debug=debug, **kwargs)
if not rep_content.strip():
raise EmptyEntryError()
rep_content = super(FullBracketEntryProcessor, self).process(entry, debug=debug, **kwargs).strip()
if not rep_content:
raise EmptyEntryError(mod=self.mod, entry=entry)
if content != rep_content:
raise FullContentRep(new_content=rep_content, mod=self.mod, entry=entry)
return content
@@ -49,8 +52,8 @@ class HearingImpaired(SubtitleTextModification):
NReProcessor(re.compile(ur'(?sux)-?%(t)s[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' %
{"t": TAG}), "", name="HI_brackets"),
#NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
# "", name="HI_bracket_open_start"),
FullBracketEntryProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
"", name="HI_bracket_open_start"),
#NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
# name="HI_bracket_open_end"),
@@ -7,12 +7,14 @@ class Processor(object):
"""
name = None
parent = None
mod = None
supported = None
enabled = True
def __init__(self, name=None, parent=None, supported=None):
def __init__(self, name=None, parent=None, mod=None, supported=None):
self.name = name
self.parent = parent
self.mod = mod
self.supported = supported if supported else lambda parent: True
@property
@@ -20,6 +22,8 @@ class Processor(object):
return self.name
def process(self, content, debug=False, **kwargs):
if not self.mod:
self.mod = kwargs.get("mod", None)
return content
def __repr__(self):
@@ -14,12 +14,13 @@ class ReProcessor(Processor):
pattern = None
replace_with = None
def __init__(self, pattern, replace_with, name=None, supported=None):
super(ReProcessor, self).__init__(name=name, supported=supported)
def __init__(self, pattern, replace_with, name=None, supported=None, **kwargs):
super(ReProcessor, self).__init__(name=name, supported=supported, **kwargs)
self.pattern = pattern
self.replace_with = replace_with
def process(self, content, debug=False, **kwargs):
super(ReProcessor, self).process(content, debug=debug, **kwargs)
return self.pattern.sub(self.replace_with, content)
+7 -13
View File
@@ -91,29 +91,23 @@ the.vbm, mmgoodnow, Vertig0ne, thliu78, tattoomees, ostman, count_confucius, ehe
## Changelog
2.6.5.3092
2.6.5.3074
subscene, addic7ed and titlovi
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
Changelog
- providers: subscene: fix endpoint (hopefully for longer now)
- providers: subscene: don't search for season packs (broken for now; relieves 50% of server load on provider)
- providers: subscene: don't calculate video fn for now
- providers: argenteam: backport fixes from bazarr
- subtitle: try decoding with utf-16 by default as well (zho/farsi)
- submod: HI: remove music tags by default
- core: compat (bazarr): add env var SZ_KEEP_ENCODING to keep encoding of subtitles
2.6.5.3074
Changelog
- core: cf: bypass cf 95% of the time without captchas
- core: fix breaking line endings of certain languages (chinese, UTF-16); fixes #646
- core: update pysubs2 to 0.2.3
2.6.5.3062
Changelog
- core: cf: optimize
- core: http: don't query DNS with IPs. thanks @fgump (fixes sonarr/radarr)
[older changes](CHANGELOG.md)