Compare commits

...

15 Commits

Author SHA1 Message Date
panni f337b53ae3 submod: HI: remove music
submod: common: be less aggressive about music symbols
submod: HI: be less aggressive about brackets
submod: HI: be less aggressive about MAN
2019-05-18 06:23:04 +02:00
panni aea6050d71 subtitle: try decoding with utf-16 by default as well 2019-05-17 23:45:06 +02:00
panni 13d5e0761e providers: subscene: fix endpoint once again 2019-05-13 16:14:26 +02:00
panni ce28d0284c back from dev 2019-05-12 06:17:08 +02:00
panni 1a0bb9c3e4 release 2.6.5.3074 2019-05-12 06:05:16 +02:00
panni d0c71b4b67 bump dev 2019-05-12 05:12:58 +02:00
panni b3f062956d core: re-fix ass/ssa tags in srt in pysubs2 0.2.3 2019-05-12 05:12:34 +02:00
panni 1a853a780c core: update pysubs2 to 0.2.3 2019-05-12 05:01:38 +02:00
panni 5c47ddeb2d core: update chinese encodings; #646 2019-05-12 04:49:30 +02:00
panni b51deb5d01 core: subliminal: don't replace \r with \n by default; fixes utf-16 character transformation issues; fixes #646 2019-05-12 04:48:23 +02:00
panni cbf5ea69be core: cf: update cloudscraper to 1.1.9; fix keyerror 2019-05-08 15:57:33 +02:00
panni e139ffefe6 bump dev 2019-05-08 04:18:25 +02:00
panni dc0a8deb40 core: cf: testing
providers: subscene: testing
2019-05-08 04:14:04 +02:00
panni 97e93cd10a core: cf: update js2py; update cloudscraper to 1.1.5; 2019-05-08 01:31:21 +02:00
panni 03c934cf21 back to dev 2019-05-01 15:39:23 +02:00
39 changed files with 474 additions and 285 deletions
+10
View File
@@ -1,4 +1,14 @@
2.6.5.3041
Changelog
- core: only reference guessed title if there actually is one
- core: cf: optimize
- core/config: add setting for one existing language to be enough, fixes #491
- core/compat: dns: support nameservers via ENV[dns_resolvers]; don't fall back to default DNS when configured custom DNS failed
- providers: titlovi: prevent repeated captcha solving for CF
2.6.5.3017
Changelog
+3 -3
View File
@@ -13,7 +13,7 @@
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
<string>2.6.5.3062</string>
<string>2.6.5.3074</string>
<key>PlexFrameworkVersion</key>
<string>2</string>
<key>PlexPluginClass</key>
@@ -23,7 +23,7 @@
<key>PlexPluginConsoleLogging</key>
<string>0</string>
<key>PlexPluginDevMode</key>
<string>0</string>
<string>1</string>
<key>PlexPluginCodePolicy</key>
<!-- this allows channels to access some python methods which are otherwise blocked, as well as import external code libraries, and interact with the PMS HTTP API -->
<string>Elevated</string>
@@ -32,7 +32,7 @@
&lt;h1&gt;Sub-Zero for Plex&lt;/h1&gt;&lt;i&gt;Subtitles done right&lt;/i&gt;
Version 2.6.5.3062
Version 2.6.5.3074 DEV
Originally based on @bramwalet's awesome &lt;a href=&quot;https://github.com/bramwalet/Subliminal.bundle&quot;&gt;Subliminal.bundle&lt;/a&gt;
@@ -1,11 +1,7 @@
import re
import ssl
try:
import brotli
except ImportError:
brotli = None
import logging
import re
import sys
import ssl
from copy import deepcopy
from time import sleep
@@ -14,7 +10,6 @@ from collections import OrderedDict
from requests.sessions import Session
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.ssl_ import create_urllib3_context
from subliminal_patch.pitcher import pitchers
from .interpreters import JavaScriptInterpreter
from .user_agent import User_Agent
@@ -24,6 +19,11 @@ try:
except ImportError:
pass
try:
import brotli
except ImportError:
pass
try:
from urlparse import urlparse
from urlparse import urlunparse
@@ -31,11 +31,9 @@ except ImportError:
from urllib.parse import urlparse
from urllib.parse import urlunparse
logger = logging.getLogger(__name__)
##########################################################################################################################################################
__version__ = '1.1.1'
__version__ = '1.1.9'
BUG_REPORT = 'Cloudflare may have changed their technique, or there may be a bug in the script.'
@@ -46,38 +44,41 @@ class CipherSuiteAdapter(HTTPAdapter):
def __init__(self, cipherSuite=None, **kwargs):
self.cipherSuite = cipherSuite
if hasattr(ssl, 'PROTOCOL_TLS'):
self.ssl_context = create_urllib3_context(
ssl_version=getattr(ssl, 'PROTOCOL_TLSv1_3', ssl.PROTOCOL_TLSv1_2),
ciphers=self.cipherSuite
)
else:
self.ssl_context = create_urllib3_context(ssl_version=ssl.PROTOCOL_TLSv1)
super(CipherSuiteAdapter, self).__init__(**kwargs)
##########################################################################################################################################################
def init_poolmanager(self, *args, **kwargs):
kwargs['ssl_context'] = create_urllib3_context(ciphers=self.cipherSuite)
kwargs['ssl_context'] = self.ssl_context
return super(CipherSuiteAdapter, self).init_poolmanager(*args, **kwargs)
##########################################################################################################################################################
def proxy_manager_for(self, *args, **kwargs):
kwargs['ssl_context'] = create_urllib3_context(ciphers=self.cipherSuite)
kwargs['ssl_context'] = self.ssl_context
return super(CipherSuiteAdapter, self).proxy_manager_for(*args, **kwargs)
##########################################################################################################################################################
class NeedsCaptchaException(Exception):
pass
class CloudScraper(Session):
was_cf_request = False
def __init__(self, *args, **kwargs):
self.debug = kwargs.pop('debug', False)
self.delay = kwargs.pop('delay', None)
self.interpreter = kwargs.pop('interpreter', 'js2py')
self.allow_brotli = kwargs.pop('allow_brotli', True) and bool(brotli)
self.allow_brotli = kwargs.pop('allow_brotli', True if 'brotli' in sys.modules.keys() else False)
self.cipherSuite = None
super(CloudScraper, self).__init__()
super(CloudScraper, self).__init__(*args, **kwargs)
if 'requests' in self.headers['User-Agent']:
# Set a random User-Agent if no custom User-Agent has been set
@@ -100,24 +101,27 @@ class CloudScraper(Session):
if self.cipherSuite:
return self.cipherSuite
ciphers = [
'GREASE_3A', 'GREASE_6A', 'AES128-GCM-SHA256', 'AES256-GCM-SHA256', 'AES256-GCM-SHA384', 'CHACHA20-POLY1305-SHA256',
'ECDHE-ECDSA-AES128-GCM-SHA256', 'ECDHE-RSA-AES128-GCM-SHA256', 'ECDHE-ECDSA-AES256-GCM-SHA384',
'ECDHE-RSA-AES256-GCM-SHA384', 'ECDHE-ECDSA-CHACHA20-POLY1305-SHA256', 'ECDHE-RSA-CHACHA20-POLY1305-SHA256',
'ECDHE-RSA-AES128-CBC-SHA', 'ECDHE-RSA-AES256-CBC-SHA', 'RSA-AES128-GCM-SHA256', 'RSA-AES256-GCM-SHA384',
'ECDHE-RSA-AES128-GCM-SHA256', 'RSA-AES256-SHA', '3DES-EDE-CBC'
]
self.cipherSuite = ''
ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
if hasattr(ssl, 'PROTOCOL_TLS'):
ciphers = [
'ECDHE-ECDSA-AES128-GCM-SHA256', 'ECDHE-RSA-AES128-GCM-SHA256', 'ECDHE-ECDSA-AES256-GCM-SHA384',
'ECDHE-RSA-AES256-GCM-SHA384', 'ECDHE-ECDSA-CHACHA20-POLY1305-SHA256', 'ECDHE-RSA-CHACHA20-POLY1305-SHA256',
'ECDHE-RSA-AES128-CBC-SHA', 'ECDHE-RSA-AES256-CBC-SHA', 'RSA-AES128-GCM-SHA256', 'RSA-AES256-GCM-SHA384',
'ECDHE-RSA-AES128-GCM-SHA256', 'RSA-AES256-SHA', '3DES-EDE-CBC'
]
for cipher in ciphers:
try:
ctx.set_ciphers(cipher)
self.cipherSuite = '{}:{}'.format(self.cipherSuite, cipher).rstrip(':')
except ssl.SSLError:
pass
if hasattr(ssl, 'PROTOCOL_TLSv1_3'):
ciphers.insert(0, ['GREASE_3A', 'GREASE_6A', 'AES128-GCM-SHA256', 'AES256-GCM-SHA256', 'AES256-GCM-SHA384', 'CHACHA20-POLY1305-SHA256'])
ctx = ssl.SSLContext(getattr(ssl, 'PROTOCOL_TLSv1_3', ssl.PROTOCOL_TLSv1_2))
for cipher in ciphers:
try:
ctx.set_ciphers(cipher)
self.cipherSuite = '{}:{}'.format(self.cipherSuite, cipher).rstrip(':')
except ssl.SSLError:
pass
return self.cipherSuite
@@ -139,54 +143,15 @@ class CloudScraper(Session):
self.debugRequest(resp)
# Check if Cloudflare anti-bot is on
try:
if self.isChallengeRequest(resp):
self.was_cf_request = True
if resp.request.method != 'GET':
# Work around if the initial request is not a GET,
# Supersede with a GET then re-request the original METHOD.
self.request('GET', resp.url)
resp = ourSuper.request(method, url, *args, **kwargs)
else:
# Solve Challenge
resp = self.sendChallengeResponse(resp, **kwargs)
except NeedsCaptchaException:
self.was_cf_request = True
parsed_url = urlparse(url)
domain = parsed_url.netloc
# solve the captcha
site_key = re.search(r'data-sitekey="(.+?)"', resp.content).group(1)
challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', resp.content).group(1)
challenge_ray = re.search(r'data-ray="(.+?)"', resp.content).group(1)
if not all([site_key, challenge_s, challenge_ray]):
raise Exception("cf: Captcha site-key not found!")
pitcher = pitchers.get_pitcher()("cf: %s" % domain, resp.request.url, site_key,
user_agent=self.headers["User-Agent"],
cookies=self.cookies.get_dict(),
is_invisible=True)
parsed_url = urlparse(resp.url)
logger.info("cf: %s: Solving captcha", domain)
result = pitcher.throw()
if not result:
raise Exception("cf: Couldn't solve captcha!")
submit_url = '{}://{}/cdn-cgi/l/chk_captcha'.format(parsed_url.scheme, domain)
method = resp.request.method
cloudflare_kwargs = {
'allow_redirects': False,
'headers': {'Referer': resp.url},
'params': OrderedDict(
[
('s', challenge_s),
('g-recaptcha-response', result)
]
)
}
return self.request(method, submit_url, **cloudflare_kwargs)
if self.isChallengeRequest(resp):
if resp.request.method != 'GET':
# Work around if the initial request is not a GET,
# Supersede with a GET then re-request the original METHOD.
self.request('GET', resp.url)
resp = ourSuper.request(method, url, *args, **kwargs)
else:
# Solve Challenge
resp = self.sendChallengeResponse(resp, **kwargs)
return resp
@@ -196,7 +161,7 @@ class CloudScraper(Session):
def isChallengeRequest(resp):
if resp.headers.get('Server', '').startswith('cloudflare'):
if b'why_captcha' in resp.content or b'/cdn-cgi/l/chk_captcha' in resp.content:
raise NeedsCaptchaException
raise ValueError('Captcha')
return (
resp.status_code in [429, 503]
@@ -52,10 +52,10 @@ class JavaScriptInterpreter(ABC):
js += '\na.value;'
jsEnv = '''
function italics (str) {{ return "<i>" + this + "</i>"; }};
String.prototype.italics=function(str) {{return "<i>" + this + "</i>";}};
var document = {{
createElement: function () {{
return {{ firstChild: {{ href: "http://{domain}/" }} }}
return {{ firstChild: {{ href: "https://{domain}/" }} }}
}},
getElementById: function () {{
return {{"innerHTML": "{innerHTML}"}};
+3 -10
View File
@@ -5,6 +5,7 @@ import re
from .translators.friendly_nodes import REGEXP_CONVERTER
from .utils.injector import fix_js_args
from types import FunctionType, ModuleType, GeneratorType, BuiltinFunctionType, MethodType, BuiltinMethodType
from math import floor, log10
import traceback
try:
import numpy
@@ -603,15 +604,7 @@ class PyJs(object):
elif typ == 'Boolean':
return Js('true') if self.value else Js('false')
elif typ == 'Number': #or self.Class=='Number':
if self.is_nan():
return Js('NaN')
elif self.is_infinity():
sign = '-' if self.value < 0 else ''
return Js(sign + 'Infinity')
elif isinstance(self.value,
long) or self.value.is_integer(): # dont print .0
return Js(unicode(int(self.value)))
return Js(unicode(self.value)) # accurate enough
return Js(unicode(js_dtoa(self.value)))
elif typ == 'String':
return self
else: #object
@@ -1046,7 +1039,7 @@ def PyJsComma(a, b):
return b
from .internals.simplex import JsException as PyJsException
from .internals.simplex import JsException as PyJsException, js_dtoa
import pyjsparser
pyjsparser.parser.ENABLE_JS2PY_ERRORS = lambda msg: MakeError('SyntaxError', msg)
@@ -116,10 +116,12 @@ def eval_js(js):
def eval_js6(js):
"""Just like eval_js but with experimental support for js6 via babel."""
return eval_js(js6_to_js5(js))
def translate_js6(js):
"""Just like translate_js but with experimental support for js6 via babel."""
return translate_js(js6_to_js5(js))
@@ -3,15 +3,19 @@ import re
import datetime
from desc import *
from simplex import *
from conversions import *
import six
from pyjsparser import PyJsParser
from itertools import izip
from .desc import *
from .simplex import *
from .conversions import *
from pyjsparser import PyJsParser
import six
if six.PY2:
from itertools import izip
else:
izip = zip
from conversions import *
from simplex import *
def Type(obj):
@@ -1,8 +1,8 @@
from code import Code
from simplex import MakeError
from opcodes import *
from operations import *
from trans_utils import *
from .code import Code
from .simplex import MakeError
from .opcodes import *
from .operations import *
from .trans_utils import *
SPECIAL_IDENTIFIERS = {'true', 'false', 'this'}
@@ -465,10 +465,11 @@ class ByteCodeGenerator:
self.emit('LOAD_OBJECT', tuple(data))
def Program(self, body, **kwargs):
old_tape_len = len(self.exe.tape)
self.emit('LOAD_UNDEFINED')
self.emit(body)
# add function tape !
self.exe.tape = self.function_declaration_tape + self.exe.tape
self.exe.tape = self.exe.tape[:old_tape_len] + self.function_declaration_tape + self.exe.tape[old_tape_len:]
def Pyimport(self, imp, **kwargs):
raise NotImplementedError(
@@ -735,17 +736,17 @@ def main():
#
# }
a.emit(d)
print a.declared_vars
print a.exe.tape
print len(a.exe.tape)
print(a.declared_vars)
print(a.exe.tape)
print(len(a.exe.tape))
a.exe.compile()
def log(this, args):
print args[0]
print(args[0])
return 999
print a.exe.run(a.exe.space.GlobalObj)
print(a.exe.run(a.exe.space.GlobalObj))
if __name__ == '__main__':
@@ -1,16 +1,17 @@
from opcodes import *
from space import *
from base import *
from .opcodes import *
from .space import *
from .base import *
class Code:
'''Can generate, store and run sequence of ops representing js code'''
def __init__(self, is_strict=False):
def __init__(self, is_strict=False, debug_mode=False):
self.tape = []
self.compiled = False
self.label_locs = None
self.is_strict = is_strict
self.debug_mode = debug_mode
self.contexts = []
self.current_ctx = None
@@ -22,6 +23,10 @@ class Code:
self.GLOBAL_THIS = None
self.space = None
# dbg
self.ctx_depth = 0
def get_new_label(self):
self._label_count += 1
return self._label_count
@@ -74,21 +79,35 @@ class Code:
# 0=normal, 1=return, 2=jump_outside, 3=errors
# execute_fragment_under_context returns:
# (return_value, typ, return_value/jump_loc/py_error)
# ctx.stack must be len 1 and its always empty after the call.
# IMPARTANT: It is guaranteed that the length of the ctx.stack is unchanged.
'''
old_curr_ctx = self.current_ctx
self.ctx_depth += 1
old_stack_len = len(ctx.stack)
old_ret_len = len(self.return_locs)
old_ctx_len = len(self.contexts)
try:
self.current_ctx = ctx
return self._execute_fragment_under_context(
ctx, start_label, end_label)
except JsException as err:
# undo the things that were put on the stack (if any)
# don't worry, I know the recovery is possible through try statement and for this reason try statement
# has its own context and stack so it will not delete the contents of the outer stack
del ctx.stack[:]
if self.debug_mode:
self._on_fragment_exit("js errors")
# undo the things that were put on the stack (if any) to ensure a proper error recovery
del ctx.stack[old_stack_len:]
del self.return_locs[old_ret_len:]
del self.contexts[old_ctx_len :]
return undefined, 3, err
finally:
self.ctx_depth -= 1
self.current_ctx = old_curr_ctx
assert old_stack_len == len(ctx.stack)
def _get_dbg_indent(self):
return self.ctx_depth * ' '
def _on_fragment_exit(self, mode):
print(self._get_dbg_indent() + 'ctx exit (%s)' % mode)
def _execute_fragment_under_context(self, ctx, start_label, end_label):
start, end = self.label_locs[start_label], self.label_locs[end_label]
@@ -97,16 +116,20 @@ class Code:
entry_level = len(self.contexts)
# for e in self.tape[start:end]:
# print e
if self.debug_mode:
print(self._get_dbg_indent() + 'ctx entry (from:%d, to:%d)' % (start, end))
while loc < len(self.tape):
#print loc, self.tape[loc]
if len(self.contexts) == entry_level and loc >= end:
if self.debug_mode:
self._on_fragment_exit('normal')
assert loc == end
assert len(ctx.stack) == (
1 + initial_len), 'Stack change must be equal to +1!'
delta_stack = len(ctx.stack) - initial_len
assert delta_stack == +1, 'Stack change must be equal to +1! got %d' % delta_stack
return ctx.stack.pop(), 0, None # means normal return
# execute instruction
if self.debug_mode:
print(self._get_dbg_indent() + str(loc), self.tape[loc])
status = self.tape[loc].eval(ctx)
# check status for special actions
@@ -116,9 +139,10 @@ class Code:
if len(self.contexts) == entry_level:
# check if jumped outside of the fragment and break if so
if not start <= loc < end:
assert len(ctx.stack) == (
1 + initial_len
), 'Stack change must be equal to +1!'
if self.debug_mode:
self._on_fragment_exit('jump outside loc:%d label:%d' % (loc, status))
delta_stack = len(ctx.stack) - initial_len
assert delta_stack == +1, 'Stack change must be equal to +1! got %d' % delta_stack
return ctx.stack.pop(), 2, status # jump outside
continue
@@ -137,7 +161,10 @@ class Code:
# return: (None, None)
else:
if len(self.contexts) == entry_level:
assert len(ctx.stack) == 1 + initial_len
if self.debug_mode:
self._on_fragment_exit('return')
delta_stack = len(ctx.stack) - initial_len
assert delta_stack == +1, 'Stack change must be equal to +1! got %d' % delta_stack
return undefined, 1, ctx.stack.pop(
) # return signal
return_value = ctx.stack.pop()
@@ -149,6 +176,8 @@ class Code:
continue
# next instruction
loc += 1
if self.debug_mode:
self._on_fragment_exit('internal error - unexpected end of tape, will crash')
assert False, 'Remember to add NOP at the end!'
def run(self, ctx, starting_loc=0):
@@ -156,7 +185,8 @@ class Code:
self.current_ctx = ctx
while loc < len(self.tape):
# execute instruction
#print loc, self.tape[loc]
if self.debug_mode:
print(loc, self.tape[loc])
status = self.tape[loc].eval(ctx)
# check status for special actions
@@ -42,6 +42,7 @@ def executable_code(code_str, space, global_context=True):
space.byte_generator.emit('LABEL', skip)
space.byte_generator.emit('NOP')
space.byte_generator.restore_state()
space.byte_generator.exe.compile(
start_loc=old_tape_len
) # dont read the code from the beginning, dont be stupid!
@@ -71,5 +72,5 @@ def _eval(this, args):
def log(this, args):
print ' '.join(map(to_string, args))
print(' '.join(map(to_string, args)))
return undefined
@@ -1,6 +1,6 @@
from __future__ import unicode_literals
# Type Conversions. to_type. All must return PyJs subclass instance
from simplex import *
from .simplex import *
def to_primitive(self, hint=None):
@@ -73,14 +73,7 @@ def to_string(self):
elif typ == 'Boolean':
return 'true' if self else 'false'
elif typ == 'Number': # or self.Class=='Number':
if is_nan(self):
return 'NaN'
elif is_infinity(self):
sign = '-' if self < 0 else ''
return sign + 'Infinity'
elif int(self) == self: # integer value!
return unicode(int(self))
return unicode(self) # todo make it print exactly like node.js
return js_dtoa(self)
else: # object
return to_string(to_primitive(self, 'String'))
@@ -1,29 +1,22 @@
from __future__ import unicode_literals
from base import Scope
from func_utils import *
from conversions import *
from .base import Scope
from .func_utils import *
from .conversions import *
import six
from prototypes.jsboolean import BooleanPrototype
from prototypes.jserror import ErrorPrototype
from prototypes.jsfunction import FunctionPrototype
from prototypes.jsnumber import NumberPrototype
from prototypes.jsobject import ObjectPrototype
from prototypes.jsregexp import RegExpPrototype
from prototypes.jsstring import StringPrototype
from prototypes.jsarray import ArrayPrototype
import prototypes.jsjson as jsjson
import prototypes.jsutils as jsutils
from .prototypes.jsboolean import BooleanPrototype
from .prototypes.jserror import ErrorPrototype
from .prototypes.jsfunction import FunctionPrototype
from .prototypes.jsnumber import NumberPrototype
from .prototypes.jsobject import ObjectPrototype
from .prototypes.jsregexp import RegExpPrototype
from .prototypes.jsstring import StringPrototype
from .prototypes.jsarray import ArrayPrototype
from .prototypes import jsjson
from .prototypes import jsutils
from .constructors import jsnumber, jsstring, jsarray, jsboolean, jsregexp, jsmath, jsobject, jsfunction, jsconsole
from constructors import jsnumber
from constructors import jsstring
from constructors import jsarray
from constructors import jsboolean
from constructors import jsregexp
from constructors import jsmath
from constructors import jsobject
from constructors import jsfunction
from constructors import jsconsole
def fill_proto(proto, proto_class, space):
@@ -155,7 +148,10 @@ def fill_space(space, byte_generator):
j = easy_func(creator, space)
j.name = unicode(typ)
j.prototype = space.ERROR_TYPES[typ]
set_protected(j, 'prototype', space.ERROR_TYPES[typ])
set_non_enumerable(space.ERROR_TYPES[typ], 'constructor', j)
def new_create(args, space):
message = get_arg(args, 0)
@@ -178,6 +174,7 @@ def fill_space(space, byte_generator):
setattr(space, err_type_name + u'Prototype', extra_err)
error_constructors[err_type_name] = construct_constructor(
err_type_name)
assert space.TypeErrorPrototype is not None
# RegExp
@@ -1,5 +1,5 @@
from simplex import *
from conversions import *
from .simplex import *
from .conversions import *
import six
if six.PY3:
@@ -1,5 +1,5 @@
from operations import *
from base import get_member, get_member_dot, PyJsFunction, Scope
from .operations import *
from .base import get_member, get_member_dot, PyJsFunction, Scope
class OP_CODE(object):
@@ -1,6 +1,6 @@
from __future__ import unicode_literals
from simplex import *
from conversions import *
from .simplex import *
from .conversions import *
# ------------------------------------------------------------------------------
# Unary operations
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
import re
from ..conversions import *
from ..func_utils import *
from jsregexp import RegExpExec
from .jsregexp import RegExpExec
DIGS = set(u'0123456789')
WHITE = u"\u0009\u000A\u000B\u000C\u000D\u0020\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF"
@@ -1,11 +1,9 @@
import pyjsparser
from space import Space
import fill_space
from byte_trans import ByteCodeGenerator
from code import Code
from simplex import MakeError
import sys
sys.setrecursionlimit(100000)
from .space import Space
from . import fill_space
from .byte_trans import ByteCodeGenerator
from .code import Code
from .simplex import *
pyjsparser.parser.ENABLE_JS2PY_ERRORS = lambda msg: MakeError(u'SyntaxError', unicode(msg))
@@ -16,8 +14,8 @@ def get_js_bytecode(js):
a.emit(d)
return a.exe.tape
def eval_js_vm(js):
a = ByteCodeGenerator(Code())
def eval_js_vm(js, debug=False):
a = ByteCodeGenerator(Code(debug_mode=debug))
s = Space()
a.exe.space = s
s.exe = a.exe
@@ -26,7 +24,10 @@ def eval_js_vm(js):
a.emit(d)
fill_space.fill_space(s, a)
# print a.exe.tape
if debug:
from pprint import pprint
pprint(a.exe.tape)
print()
a.exe.compile()
return a.exe.run(a.exe.space.GlobalObj)
@@ -1,6 +1,10 @@
from __future__ import unicode_literals
import six
if six.PY3:
basestring = str
long = int
xrange = range
unicode = str
#Undefined
class PyJsUndefined(object):
@@ -75,7 +79,7 @@ def is_callable(self):
def is_infinity(self):
return self == float('inf') or self == -float('inf')
return self == Infinity or self == -Infinity
def is_nan(self):
@@ -114,7 +118,7 @@ class JsException(Exception):
return self.mes.to_string().value
else:
if self.throw is not None:
from conversions import to_string
from .conversions import to_string
return to_string(self.throw)
else:
return self.typ + ': ' + self.message
@@ -131,3 +135,26 @@ def value_from_js_exception(js_exception, space):
return js_exception.throw
else:
return space.NewError(js_exception.typ, js_exception.message)
def js_dtoa(number):
if is_nan(number):
return u'NaN'
elif is_infinity(number):
if number > 0:
return u'Infinity'
return u'-Infinity'
elif number == 0.:
return u'0'
elif abs(number) < 1e-6 or abs(number) >= 1e21:
frac, exponent = unicode(repr(float(number))).split('e')
# Remove leading zeros from the exponent.
exponent = int(exponent)
return frac + ('e' if exponent < 0 else 'e+') + unicode(exponent)
elif abs(number) < 1e-4: # python starts to return exp notation while we still want the prec
frac, exponent = unicode(repr(float(number))).split('e-')
base = u'0.' + u'0' * (int(exponent) - 1) + frac.lstrip('-').replace('.', '')
return base if number > 0. else u'-' + base
elif isinstance(number, long) or number.is_integer(): # dont print .0
return unicode(int(number))
return unicode(repr(number)) # python representation should be equivalent.
@@ -1,5 +1,5 @@
from base import *
from simplex import *
from .base import *
from .simplex import *
class Space(object):
@@ -1,3 +1,10 @@
import six
if six.PY3:
basestring = str
long = int
xrange = range
unicode = str
def to_key(literal_or_identifier):
''' returns string representation of this object'''
if literal_or_identifier['type'] == 'Identifier':
@@ -6,8 +6,6 @@ if six.PY3:
xrange = range
unicode = str
# todo fix apply and bind
class FunctionPrototype:
def toString():
@@ -41,6 +39,7 @@ class FunctionPrototype:
return this.call(obj, args)
def bind(thisArg):
arguments_ = arguments
target = this
if not target.is_callable():
raise this.MakeError(
@@ -48,5 +47,5 @@ class FunctionPrototype:
if len(arguments) <= 1:
args = ()
else:
args = tuple([arguments[e] for e in xrange(1, len(arguments))])
args = tuple([arguments_[e] for e in xrange(1, len(arguments_))])
return this.PyJsBoundFunction(target, thisArg, args)
@@ -345,7 +345,7 @@ def BlockStatement(type, body):
body) # never returns empty string! In the worst case returns pass\n
def ExpressionStatement(type, expression, **ommit):
def ExpressionStatement(type, expression):
return trans(expression) + '\n' # end expression space with new line
+10
View File
@@ -163,3 +163,13 @@ class Pysubs2CLI(object):
elif args.transform_framerate is not None:
in_fps, out_fps = args.transform_framerate
subs.transform_framerate(in_fps, out_fps)
def __main__():
cli = Pysubs2CLI()
rv = cli(sys.argv[1:])
sys.exit(rv)
if __name__ == "__main__":
__main__()
+3 -1
View File
@@ -17,12 +17,14 @@ class Color(_Color):
return _Color.__new__(cls, r, g, b, a)
#: Version of the pysubs2 library.
VERSION = "0.2.1"
VERSION = "0.2.3"
PY3 = sys.version_info.major == 3
if PY3:
text_type = str
binary_string_type = bytes
else:
text_type = unicode
binary_string_type = str
+1 -3
View File
@@ -3,7 +3,7 @@ from .microdvd import MicroDVDFormat
from .subrip import SubripFormat
from .jsonformat import JSONFormat
from .substation import SubstationFormat
from .txt_generic import TXTGenericFormat, MPL2Format
from .mpl2 import MPL2Format
from .exceptions import *
#: Dict mapping file extensions to format identifiers.
@@ -13,7 +13,6 @@ FILE_EXTENSION_TO_FORMAT_IDENTIFIER = {
".ssa": "ssa",
".sub": "microdvd",
".json": "json",
".txt": "txt_generic",
}
#: Dict mapping format identifiers to implementations (FormatBase subclasses).
@@ -23,7 +22,6 @@ FORMAT_IDENTIFIER_TO_FORMAT_CLASS = {
"ssa": SubstationFormat,
"microdvd": MicroDVDFormat,
"json": JSONFormat,
"txt_generic": TXTGenericFormat,
"mpl2": MPL2Format,
}
@@ -2,44 +2,48 @@
from __future__ import print_function, division, unicode_literals
import re
from numbers import Number
from pysubs2.time import times_to_ms
from .time import times_to_ms
from .formatbase import FormatBase
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
# thanks to http://otsaloma.io/gaupol/doc/api/aeidon.files.mpl2_source.html
MPL2_FORMAT = re.compile(r"^(?um)\[(-?\d+)\]\[(-?\d+)\](.*?)$")
class TXTGenericFormat(FormatBase):
@classmethod
def guess_format(cls, text):
if MPL2_FORMAT.match(text):
return "mpl2"
MPL2_FORMAT = re.compile(r"^(?um)\[(-?\d+)\]\[(-?\d+)\](.*)")
class MPL2Format(FormatBase):
@classmethod
def guess_format(cls, text):
return TXTGenericFormat.guess_format(text)
if MPL2_FORMAT.search(text):
return "mpl2"
@classmethod
def from_file(cls, subs, fp, format_, **kwargs):
def prepare_text(lines):
out = []
for s in lines.split("|"):
s = s.strip()
if s.startswith("/"):
out.append(r"{\i1}%s{\i0}" % s[1:])
continue
# line beginning with '/' is in italics
s = r"{\i1}%s{\i0}" % s[1:].strip()
out.append(s)
return "\n".join(out)
return "\\N".join(out)
subs.events = [SSAEvent(start=times_to_ms(s=float(start) / 10), end=times_to_ms(s=float(end) / 10),
text=prepare_text(text)) for start, end, text in MPL2_FORMAT.findall(fp.getvalue())]
@classmethod
def to_file(cls, subs, fp, format_, **kwargs):
raise NotImplemented
# TODO handle italics
for line in subs:
if line.is_comment:
continue
print("[{start}][{end}] {text}".format(start=int(line.start // 100),
end=int(line.end // 100),
text=line.plaintext.replace("\n", "|")),
file=fp)
@@ -78,7 +78,7 @@ class SSAStyle(object):
s += "%rpx " % self.fontsize
if self.bold: s += "bold "
if self.italic: s += "italic "
s += "'%s'>" % self.fontname
s += "{!r}>".format(self.fontname)
if not PY3: s = s.encode("utf-8")
return s
+9 -1
View File
@@ -46,8 +46,16 @@ class SubripFormat(FormatBase):
following_lines[-1].append(line)
def prepare_text(lines):
# Handle the "happy" empty subtitle case, which is timestamp line followed by blank line(s)
# followed by number line and timestamp line of the next subtitle. Fixes issue #11.
if (len(lines) >= 2
and all(re.match("\s*$", line) for line in lines[:-1])
and re.match("\s*\d+\s*$", lines[-1])):
return ""
# Handle the general case.
s = "".join(lines).strip()
s = re.sub(r"\n* *\d+ *$", "", s) # strip number of next subtitle
s = re.sub(r"\n+ *\d+ *$", "", s) # strip number of next subtitle
s = re.sub(r"< *i *>", r"{\i1}", s)
s = re.sub(r"< */ *i *>", r"{\i0}", s)
s = re.sub(r"< *s *>", r"{\s1}", s)
+14 -13
View File
@@ -4,7 +4,7 @@ from numbers import Number
from .formatbase import FormatBase
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from .common import text_type, Color
from .common import text_type, Color, PY3, binary_string_type
from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP
SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7)
@@ -150,14 +150,7 @@ class SubstationFormat(FormatBase):
if format_ == "ass":
return ass_rgba_to_color(v)
else:
try:
return ssa_rgb_to_color(v)
except ValueError:
try:
return ass_rgba_to_color(v)
except:
return Color(255, 255, 255, 0)
return ssa_rgb_to_color(v)
elif f in {"bold", "underline", "italic", "strikeout"}:
return v == "-1"
elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
@@ -229,7 +222,7 @@ class SubstationFormat(FormatBase):
for k, v in subs.aegisub_project.items():
print(k, v, sep=": ", file=fp)
def field_to_string(f, v):
def field_to_string(f, v, line):
if f in {"start", "end"}:
return ms_to_timestamp(v)
elif f == "marked":
@@ -240,23 +233,31 @@ class SubstationFormat(FormatBase):
return "-1" if v else "0"
elif isinstance(v, (text_type, Number)):
return text_type(v)
elif not PY3 and isinstance(v, binary_string_type):
# A convenience feature, see issue #12 - accept non-unicode strings
# when they are ASCII; this is useful in Python 2, especially for non-text
# fields like style names, where requiring Unicode type seems too stringent
if all(ord(c) < 128 for c in v):
return text_type(v)
else:
raise TypeError("Encountered binary string with non-ASCII codepoint in SubStation field {!r} for line {!r} - please use unicode string instead of str".format(f, line))
elif isinstance(v, Color):
if format_ == "ass":
return color_to_ass_rgba(v)
else:
return color_to_ssa_rgb(v)
else:
raise TypeError("Unexpected type when writing a SubStation field")
raise TypeError("Unexpected type when writing a SubStation field {!r} for line {!r}".format(f, line))
print("\n[V4+ Styles]" if format_ == "ass" else "\n[V4 Styles]", file=fp)
print(STYLE_FORMAT_LINE[format_], file=fp)
for name, sty in subs.styles.items():
fields = [field_to_string(f, getattr(sty, f)) for f in STYLE_FIELDS[format_]]
fields = [field_to_string(f, getattr(sty, f), sty) for f in STYLE_FIELDS[format_]]
print("Style: %s" % name, *fields, sep=",", file=fp)
print("\n[Events]", file=fp)
print(EVENT_FORMAT_LINE[format_], file=fp)
for ev in subs.events:
fields = [field_to_string(f, getattr(ev, f)) for f in EVENT_FIELDS[format_]]
fields = [field_to_string(f, getattr(ev, f), ev) for f in EVENT_FIELDS[format_]]
print(ev.type, end=": ", file=fp)
print(*fields, sep=",", file=fp)
@@ -258,4 +258,4 @@ def fix_line_ending(content):
:rtype: bytes
"""
return content.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
return content.replace(b'\r\n', b'\n')
@@ -11,6 +11,7 @@ import requests
import xmlrpclib
import dns.resolver
import ipaddress
import re
from requests import exceptions
from urllib3.util import connection
@@ -18,8 +19,14 @@ from retry.api import retry_call
from exceptions import APIThrottled
from dogpile.cache.api import NO_VALUE
from subliminal.cache import region
from subliminal_patch.pitcher import pitchers
from cloudscraper import CloudScraper
try:
import brotli
except:
pass
try:
from urlparse import urlparse
except ImportError:
@@ -56,13 +63,81 @@ class CertifiSession(TimeoutSession):
self.verify = pem_file
class CFSession(CloudScraper):
_hdrs = None
class NeedsCaptchaException(Exception):
pass
class CFSession(CloudScraper):
def __init__(self, *args, **kwargs):
super(CFSession, self).__init__(*args, **kwargs)
self.debug = os.environ.get("CF_DEBUG", False)
self._was_cf = False
def _request(self, method, url, *args, **kwargs):
ourSuper = super(CloudScraper, self)
resp = ourSuper.request(method, url, *args, **kwargs)
if resp.headers.get('Content-Encoding') == 'br':
if self.allow_brotli and resp._content:
resp._content = brotli.decompress(resp.content)
else:
logging.warning('Brotli content detected, But option is disabled, we will not continue.')
return resp
# Debug request
if self.debug:
self.debugRequest(resp)
# Check if Cloudflare anti-bot is on
try:
if self.isChallengeRequest(resp):
if resp.request.method != 'GET':
# Work around if the initial request is not a GET,
# Supersede with a GET then re-request the original METHOD.
CloudScraper.request(self, 'GET', resp.url)
resp = ourSuper.request(method, url, *args, **kwargs)
else:
# Solve Challenge
resp = self.sendChallengeResponse(resp, **kwargs)
except ValueError, e:
if e.message == "Captcha":
parsed_url = urlparse(url)
domain = parsed_url.netloc
# solve the captcha
site_key = re.search(r'data-sitekey="(.+?)"', resp.content).group(1)
challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', resp.content).group(1)
challenge_ray = re.search(r'data-ray="(.+?)"', resp.content).group(1)
if not all([site_key, challenge_s, challenge_ray]):
raise Exception("cf: Captcha site-key not found!")
pitcher = pitchers.get_pitcher()("cf: %s" % domain, resp.request.url, site_key,
user_agent=self.headers["User-Agent"],
cookies=self.cookies.get_dict(),
is_invisible=True)
parsed_url = urlparse(resp.url)
logger.info("cf: %s: Solving captcha", domain)
result = pitcher.throw()
if not result:
raise Exception("cf: Couldn't solve captcha!")
submit_url = '{}://{}/cdn-cgi/l/chk_captcha'.format(parsed_url.scheme, domain)
method = resp.request.method
cloudflare_kwargs = {
'allow_redirects': False,
'headers': {'Referer': resp.url},
'params': OrderedDict(
[
('s', challenge_s),
('g-recaptcha-response', result)
]
)
}
return CloudScraper.request(self, method, submit_url, **cloudflare_kwargs)
return resp
def request(self, method, url, *args, **kwargs):
parsed_url = urlparse(url)
@@ -80,20 +155,19 @@ class CFSession(CloudScraper):
self.headers = hdrs
ret = super(CFSession, self).request(method, url, *args, **kwargs)
ret = self._request(method, url, *args, **kwargs)
if self.was_cf_request:
self.was_cf_request = False
logger.debug("We've hit CF, seeing if we need to store previous data")
try:
cf_data = self.get_cf_live_tokens(domain)
except:
logger.debug("Couldn't get CF live tokens for re-use. Cookies: %r", self.cookies)
pass
else:
if cf_data != region.get(cache_key) and cf_data[0]["cf_clearance"]:
try:
cf_data = self.get_cf_live_tokens(domain)
except:
pass
else:
if cf_data and "cf_clearance" in cf_data[0] and cf_data[0]["cf_clearance"]:
if cf_data != region.get(cache_key):
logger.debug("Storing cf data for %s: %s", domain, cf_data)
region.set(cache_key, cf_data)
elif cf_data[0]["cf_clearance"]:
logger.debug("CF Live tokens not updated")
return ret
@@ -210,7 +210,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
for series in [video.series] + video.alternative_series:
term = u"%s - %s Season" % (series, p.number_to_words("%sth" % video.season).capitalize())
logger.debug('Searching for alternative results: %s', term)
film = search(term, session=self.session, release=False)
film = search(term, session=self.session, release=False, throttle=self.search_throttle)
if film and film.subtitles:
logger.debug('Alternative results found: %s', len(film.subtitles))
subtitles += self.parse_results(video, film)
@@ -222,7 +222,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
term = u"%s S%02i" % (series, video.season)
logger.debug('Searching for packs: %s', term)
time.sleep(self.search_throttle)
film = search(term, session=self.session)
film = search(term, session=self.session, throttle=self.search_throttle)
if film and film.subtitles:
logger.debug('Pack results found: %s', len(film.subtitles))
subtitles += self.parse_results(video, film)
@@ -236,7 +236,8 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
more_than_one = len([video.title] + video.alternative_titles) > 1
for title in [video.title] + video.alternative_titles:
logger.debug('Searching for movie results: %s', title)
film = search(title, year=video.year, session=self.session, limit_to=None, release=False)
film = search(title, year=video.year, session=self.session, limit_to=None, release=False,
throttle=self.search_throttle)
if film and film.subtitles:
subtitles += self.parse_results(video, film)
if more_than_one:
@@ -117,13 +117,14 @@ class Subtitle(Subtitle_):
logger.info('Guessing encoding for language %s', self.language)
encodings = ['utf-8']
encodings = ['utf-8', 'utf-16']
# add language-specific encodings
# http://scratchpad.wikia.com/wiki/Character_Encoding_Recommendation_for_Languages
if self.language.alpha3 == 'zho':
encodings.extend(['cp936', 'gb2312', 'cp950', 'gb18030', 'big5', 'big5hkscs'])
encodings.extend(['cp936', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp_2', 'cp950', 'gb18030', 'big5',
'big5hkscs'])
elif self.language.alpha3 == 'jpn':
encodings.extend(['shift-jis', 'cp932', 'euc_jp', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', ])
@@ -28,6 +28,8 @@ import re
import enum
import sys
import requests
import time
is_PY2 = sys.version_info[0] < 3
if is_PY2:
@@ -55,7 +57,9 @@ def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT):
r = Request(url, data=None, headers=dict(HEADERS, **{"User-Agent": user_agent}))
html = urlopen(r).read().decode("utf-8")
else:
html = session.get(url).text
ret = session.get(url)
ret.raise_for_status()
html = ret.text
return BeautifulSoup(html, "html.parser")
@@ -243,17 +247,34 @@ def get_first_film(soup, section, year=None, session=None):
return Film.from_url(url, session=session)
def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact):
soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, "release" if release else "title", term), session=session)
def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact, throttle=0):
# note to subscene: if you actually start to randomize the endpoint, we'll have to query your server even more
endpoints = ["searching", "search", "srch", "find"]
if release:
endpoints = ["release"]
if "Subtitle search by" in str(soup):
rows = soup.find("table").tbody.find_all("tr")
subtitles = Subtitle.from_rows(rows)
return Film(term, subtitles=subtitles)
soup = None
for endpoint in endpoints:
try:
soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, endpoint, term),
session=session)
except requests.HTTPError, e:
if e.response.status_code == 404:
time.sleep(throttle)
# fixme: detect endpoint from html
continue
raise
break
for junk, search_type in SearchTypes.__members__.items():
if section_exists(soup, search_type):
return get_first_film(soup, search_type, year=year, session=session)
if soup:
if "Subtitle search by" in str(soup):
rows = soup.find("table").tbody.find_all("tr")
subtitles = Subtitle.from_rows(rows)
return Film(term, subtitles=subtitles)
if limit_to == search_type:
return
for junk, search_type in SearchTypes.__members__.items():
if section_exists(soup, search_type):
return get_first_film(soup, search_type, year=year, session=session)
if limit_to == search_type:
return
@@ -293,6 +293,9 @@ class SubtitleModifications(object):
end_tag = line[-5:]
line = line[:-5]
last_procs_mods = []
# fixme: this double loop is ugly
for order, identifier, args in mods:
mod = self.initialized_mods[identifier]
@@ -312,6 +315,33 @@ class SubtitleModifications(object):
break
applied_mods.append(identifier)
if mod.last_processors:
last_procs_mods.append([identifier, args])
if skip_entry:
lines = []
break
if skip_line:
continue
for identifier, args in last_procs_mods:
mod = self.initialized_mods[identifier]
try:
line = mod.modify(line.strip(), entry=entry.text, debug=self.debug, parent=self, index=index,
procs=["last_process"], **args)
except EmptyEntryError:
if self.debug:
logger.debug(u"%d: %s: %r -> ''", index, identifier, entry.text)
skip_entry = True
break
if not line:
if self.debug:
logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
skip_line = True
break
if skip_entry:
lines = []
@@ -21,6 +21,7 @@ class SubtitleModification(object):
pre_processors = []
processors = []
post_processors = []
last_processors = []
languages = []
def __init__(self, parent):
@@ -67,15 +68,16 @@ class SubtitleModification(object):
def post_process(self, content, debug=False, parent=None, **kwargs):
return self._process(content, self.post_processors, debug=debug, parent=parent, **kwargs)
def modify(self, content, debug=False, parent=None, **kwargs):
def modify(self, content, debug=False, parent=None, procs=None, **kwargs):
if not content:
return
new_content = content
for method in ("pre_process", "process", "post_process"):
for method in procs or ("pre_process", "process", "post_process"):
if not new_content:
return
new_content = getattr(self, method)(new_content, debug=debug, parent=parent, **kwargs)
new_content = self._process(new_content, getattr(self, "%sors" % method),
debug=debug, parent=parent, **kwargs)
return new_content
@@ -107,3 +109,7 @@ empty_line_post_processors = [
class EmptyEntryError(Exception):
pass
class EmptyLineError(Exception):
pass
@@ -28,7 +28,7 @@ class CommonFixes(SubtitleTextModification):
NReProcessor(re.compile(r'(?u)(\w|\b|\s|^)(-\s?-{1,2})'), ur"\1", name="CM_multidash"),
# line = _/-/\s
NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="CM_non_word_only"),
NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="<CM_non_word_only"),
# remove >>
NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
@@ -37,7 +37,7 @@ class CommonFixes(SubtitleTextModification):
NReProcessor(re.compile(r'(?u)(^\W*:\s*(?=\w+))'), "", name="CM_empty_colon_start"),
# fix music symbols
NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'),
NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s+)|(\s*[*#¶]+\s*$)'),
lambda x: u"" if x.group(1) else u"",
name="CM_music_symbols"),
@@ -49,11 +49,11 @@ class HearingImpaired(SubtitleTextModification):
NReProcessor(re.compile(ur'(?sux)-?%(t)s[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' %
{"t": TAG}), "", name="HI_brackets"),
NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
"", name="HI_bracket_open_start"),
#NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
# "", name="HI_bracket_open_start"),
NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
name="HI_bracket_open_end"),
#NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
# name="HI_bracket_open_end"),
# text before colon (and possible dash in front), max 11 chars after the first whitespace (if any)
# NReProcessor(re.compile(r'(?u)(^[A-z\-\'"_]+[\w\s]{0,11}:[^0-9{2}][\s]*)'), "", name="HI_before_colon"),
@@ -73,7 +73,7 @@ class HearingImpaired(SubtitleTextModification):
supported=lambda p: not p.only_uppercase),
# remove MAN:
NReProcessor(re.compile(ur'(?suxi)(.*MAN:\s*)'), "", name="HI_remove_man"),
NReProcessor(re.compile(ur'(?suxi)(\b(?:WO)MAN:\s*)'), "", name="HI_remove_man"),
# dash in front
# NReProcessor(re.compile(r'(?u)^\s*-\s*'), "", name="HI_starting_dash"),
@@ -81,13 +81,18 @@ class HearingImpaired(SubtitleTextModification):
# all caps at start before new sentence
NReProcessor(re.compile(ur'(?u)^(?=[A-ZÀ-Ž]{4,})[A-ZÀ-Ž-_\s]+\s([A-ZÀ-Ž][a-zà-ž].+)'), r"\1",
name="HI_starting_upper_then_sentence", supported=lambda p: not p.only_uppercase),
# remove music symbols
NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}),
"", name="HI_music_symbols_only"),
]
post_processors = empty_line_post_processors
last_processors = [
# remove music symbols
NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}),
"", name="HI_music_symbols_only"),
# remove music entries
NReProcessor(re.compile(ur'(?ums)(^[-\s>~]*[♫♪]+\s*.+|.+\s*[♫♪]+\s*$)'),
"", name="HI_music"),
]
registry.register(HearingImpaired)
+6 -8
View File
@@ -91,24 +91,22 @@ the.vbm, mmgoodnow, Vertig0ne, thliu78, tattoomees, ostman, count_confucius, ehe
## Changelog
2.6.5.3062
2.6.5.3074
subscene, addic7ed and titlovi
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
Changelog
- core: cf: optimize
- core: http: don't query DNS with IPs. thanks @fgump (fixes sonarr/radarr)
- core: cf: bypass cf 95% of the time without captchas
- core: fix breaking line endings of certain languages (chinese, UTF-16); fixes #646
- core: update pysubs2 to 0.2.3
2.6.5.3041
2.6.5.3062
Changelog
- core: only reference guessed title if there actually is one
- core: cf: optimize
- core/config: add setting for one existing language to be enough, fixes #491
- core/compat: dns: support nameservers via ENV[dns_resolvers]; don't fall back to default DNS when configured custom DNS failed
- providers: titlovi: prevent repeated captcha solving for CF
- core: http: don't query DNS with IPs. thanks @fgump (fixes sonarr/radarr)
[older changes](CHANGELOG.md)