Compare commits
209 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fb8bfeb044 | |||
| e083e133eb | |||
| c787e671c3 | |||
| 31ff93c3f1 | |||
| 289f174e2b | |||
| ea03f3fc4d | |||
| 740fc93c13 | |||
| e94bd3fcb9 | |||
| dba469750b | |||
| c7fe6076cb | |||
| 356f578014 | |||
| b151ed4c55 | |||
| 9455e3b52b | |||
| 60e2656541 | |||
| fcb1a8a6a7 | |||
| 9e5829151d | |||
| 1f0a713f9b | |||
| ff49dd4512 | |||
| 3cf83b5bf7 | |||
| c51ce55d32 | |||
| ee9268957d | |||
| 5d1858d5da | |||
| d59abea1f5 | |||
| e6b79334d8 | |||
| 88d2a44f08 | |||
| 78e47d3cd5 | |||
| 6b6af347da | |||
| dccee96cf1 | |||
| 92b24de7cd | |||
| 1225a4887c | |||
| 2a82857570 | |||
| 831bec3630 | |||
| 55cbf2478a | |||
| 20a850b9e9 | |||
| 11c649a7af | |||
| c1e5a2077b | |||
| dc96f626dd | |||
| 46f48023f4 | |||
| e3d83f6dc2 | |||
| fc484e569f | |||
| a92f3e2480 | |||
| 064c447528 | |||
| 64c1bcd9e6 | |||
| 0cca4a2ebe | |||
| c4c26a76f1 | |||
| 1a638431d7 | |||
| 5d5fa21630 | |||
| e78ace4664 | |||
| 37596c412c | |||
| d200021243 | |||
| 1a999e202f | |||
| fd748b29e9 | |||
| 775d1e3cf1 | |||
| c6a1df9a79 | |||
| 77861a4c6d | |||
| bf1e1c3139 | |||
| a564a1d808 | |||
| 22ac935f9b | |||
| 02e2bcb417 | |||
| 3445259cde | |||
| c20c32c17d | |||
| 3fec766890 | |||
| f208a24213 | |||
| 9e9dfb3f4d | |||
| 83eecf09ed | |||
| 1aebe8d0dd | |||
| bb64e482df | |||
| 1841a72ca7 | |||
| 997d4aa1cf | |||
| d517e86333 | |||
| 8bcfc712fb | |||
| c0cf2fd78e | |||
| 0a7de0e9b6 | |||
| 1e2a127dac | |||
| 5b8cd215e4 | |||
| 7583edf3fe | |||
| 2f219a1a81 | |||
| 9127c38297 | |||
| 0c379f8b9f | |||
| d2b617bdf4 | |||
| 6d6f6d9356 | |||
| 8ffb20ebe3 | |||
| eed7b9da0c | |||
| 802381b2bc | |||
| f265c861d2 | |||
| 1dc7b4b5e4 | |||
| c48aa2b255 | |||
| 66859802f9 | |||
| 433c8e987b | |||
| aa477ca48c | |||
| 65b502afa4 | |||
| 06c0b44589 | |||
| d651f2cbb7 | |||
| 8b5be8ea4b | |||
| f4e82c560d | |||
| c23b3e93a6 | |||
| de447d2d0b | |||
| 95b1272018 | |||
| 11d111da7c | |||
| 638dec0f04 | |||
| a0ab6e406a | |||
| 23242c0f52 | |||
| 48bf70e825 | |||
| ada0b96872 | |||
| 0e4917bba9 | |||
| 8169d31e86 | |||
| 75b83aa163 | |||
| d2022de970 | |||
| 8db1cdacb4 | |||
| 527d171a6a | |||
| 20620cfa7e | |||
| 4d03ca078d | |||
| 775e2cca47 | |||
| 7cb2486d3e | |||
| 02a3ecc9fe | |||
| 54435398af | |||
| ffc42883de | |||
| 0cf0371a43 | |||
| f5156bcea7 | |||
| efdf3b2c9d | |||
| c3d3163392 | |||
| c91d5ca483 | |||
| 5f0982970d | |||
| ee05da70f4 | |||
| 04c283c48d | |||
| 836945c95c | |||
| bd4c180c07 | |||
| e1f5290365 | |||
| eefffcfb1b | |||
| 9e088a5e9d | |||
| 317c02bf06 | |||
| 22724c269c | |||
| 2a48782b6b | |||
| e7c3039fde | |||
| 2afba02b59 | |||
| 94928c2930 | |||
| 2c25191291 | |||
| ba2f3f2172 | |||
| aa5cba9347 | |||
| 5f40452f57 | |||
| 2dd9b1723b | |||
| ee54839f28 | |||
| c2f054a25e | |||
| f095d5c99c | |||
| ab93f9809a | |||
| bbb9a62357 | |||
| 82ffed699f | |||
| 4751ea8396 | |||
| c15d8fbe58 | |||
| b379468b47 | |||
| 0deb3eae21 | |||
| 0c1042ec5c | |||
| 05d0de5120 | |||
| 2fa217d5d9 | |||
| a65b5a5d82 | |||
| 7bb42e95d8 | |||
| db536502a1 | |||
| 47c8f1a2e6 | |||
| 30a0f11515 | |||
| 9bf5123a00 | |||
| f337b53ae3 | |||
| aea6050d71 | |||
| 13d5e0761e | |||
| ce28d0284c | |||
| 1a0bb9c3e4 | |||
| d0c71b4b67 | |||
| b3f062956d | |||
| 1a853a780c | |||
| 5c47ddeb2d | |||
| b51deb5d01 | |||
| cbf5ea69be | |||
| e139ffefe6 | |||
| dc0a8deb40 | |||
| 97e93cd10a | |||
| 03c934cf21 | |||
| 92d0d70258 | |||
| d44298993c | |||
| 12300d4115 | |||
| b4f08f61a6 | |||
| 861a25be41 | |||
| 3e175109a6 | |||
| fb2210f2fd | |||
| e928918201 | |||
| df607e5772 | |||
| a7cc470645 | |||
| 4e6421b928 | |||
| df48e8fccd | |||
| 58111bf204 | |||
| 8c02e75fed | |||
| 6f3f1cb4b5 | |||
| dd27997deb | |||
| a1f70d1d4d | |||
| 7da0bac643 | |||
| b3ab2a451c | |||
| 850f836ebd | |||
| d9fa9d03da | |||
| 76c20dc3d7 | |||
| 4568e222d1 | |||
| 344025226a | |||
| f546fcffce | |||
| 068c2d4d00 | |||
| ccf5a902e5 | |||
| 8c72cf9057 | |||
| 1ce14aa231 | |||
| 643485b879 | |||
| 5b3d9f26be | |||
| 14f2f45f20 | |||
| 8ac6c9d7a7 | |||
| 237a47b8ed |
+148
@@ -1,3 +1,151 @@
|
||||
2.6.5.3183
|
||||
|
||||
subscene, addic7ed
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- core: don't fall back to default providers if none enabled
|
||||
- core: don't process any further if stream info is missing
|
||||
- core: support using mediainfo for retrieving MP4 MOV_TEXT subtitle stream titles (PMS bug)
|
||||
- core: fix embedded subtitle extraction in some cases (#681, #680)
|
||||
- core: scanning: add additional INFO logging for undetected languages
|
||||
- core: bazarr-backport: remove existing subtitle file, to support MergerFS
|
||||
- core: bazarr-backport: generic 10 minute throttling if uncaught exception occurs
|
||||
- providers: addic7ed: fix recaptcha solving; fix show ID retrieval (#681)
|
||||
- providers: addic7ed: add timeout on authentication error
|
||||
- providers: addic7ed: fix shows with dots in them (Mayans M.C.)
|
||||
- providers: addic7ed: fix detection of completed subtitle for non-english users (#686)
|
||||
- providers: addic7ed: add more timeouts in the login process
|
||||
- providers: argenteam: bazarr-backport: use new url; fixes
|
||||
|
||||
|
||||
2.6.5.3152
|
||||
|
||||
subscene, addic7ed
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- core: fix core issue possibly impacting results on OpenSubtitles in certain conditions
|
||||
- core: fix default values of opensubtitles-skip-wrong-fps, use_https; fix #676
|
||||
- core: fix for determining whether to search under certain circumstances; fixes #666
|
||||
- core: #664 fix missing language processing of multiple videos refreshed at once
|
||||
- core: #661 fix match strictness when determining preexisting external subtitles
|
||||
- providers: titlovi: New implementation of Titlovi using API (thanks @viking1304)
|
||||
|
||||
|
||||
2.6.5.3124
|
||||
|
||||
subscene, addic7ed and titlovi
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- core: http: fallback to default DNS when normal resolving fails; fixes #657
|
||||
- core: extract embedded/menu: fix detection of unknown streams; don't use unknown streams if a known language was previously found
|
||||
- core: language: use replacement map from bazarr
|
||||
- providers: titlovi: fix matching
|
||||
- providers: subscene: fix unknown language code error when "empty" result is returned
|
||||
- providers: subscene: add support for pt-BR (based on Diaoul/subliminal@b22cf08)
|
||||
- providers: subscene: explicitly set account filters for languages
|
||||
- providers: subscene: limit alternative searches to 3; set throttle to 8
|
||||
- providers: subscene: move login/cookies to initialization sequence
|
||||
- submod: generic: en: fix ";='s
|
||||
|
||||
|
||||
2.6.5.3109
|
||||
|
||||
subscene, addic7ed and titlovi
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- providers: add Napisy24 (polish)
|
||||
- providers: subscene: reduce provider load by possibly half
|
||||
- providers: subscene: support logging in (username/password are now required)
|
||||
- providers: subscene: fallback to non year results if none found with year
|
||||
|
||||
|
||||
2.6.5.3099
|
||||
|
||||
subscene, addic7ed and titlovi
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- core: allow system DNS again by putting "system" as the DNS
|
||||
- providers: subscene: fix again (subscene, contact us please, so we can end this)
|
||||
|
||||
|
||||
2.6.5.3092
|
||||
|
||||
subscene, addic7ed and titlovi
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- providers: subscene: fix endpoint (hopefully for longer now)
|
||||
- providers: subscene: don't search for season packs (broken for now; relieves 50% of server load on provider)
|
||||
- providers: subscene: don't calculate video fn for now
|
||||
- providers: argenteam: backport fixes from bazarr
|
||||
- subtitle: try decoding with utf-16 by default as well (zho/farsi)
|
||||
- submod: HI: remove music tags by default
|
||||
- core: compat (bazarr): add env var SZ_KEEP_ENCODING to keep encoding of subtitles
|
||||
|
||||
|
||||
2.6.5.3074
|
||||
|
||||
Changelog
|
||||
- core: cf: bypass cf 95% of the time without captchas
|
||||
- core: fix breaking line endings of certain languages (chinese, UTF-16); fixes #646
|
||||
- core: update pysubs2 to 0.2.3
|
||||
|
||||
|
||||
2.6.5.3062
|
||||
|
||||
Changelog
|
||||
- core: cf: optimize
|
||||
- core: http: don't query DNS with IPs. thanks @fgump (fixes sonarr/radarr)
|
||||
|
||||
|
||||
2.6.5.3041
|
||||
|
||||
Changelog
|
||||
- core: only reference guessed title if there actually is one
|
||||
- core: cf: optimize
|
||||
- core/config: add setting for one existing language to be enough, fixes #491
|
||||
- core/compat: dns: support nameservers via ENV[dns_resolvers]; don't fall back to default DNS when configured custom DNS failed
|
||||
- providers: titlovi: prevent repeated captcha solving for CF
|
||||
|
||||
|
||||
2.6.5.3017
|
||||
|
||||
Changelog
|
||||
- core: SRT parsing: handle (bad) ASS color tag in SRT
|
||||
- core: auto extract embedded: only use one unknown sub for first language
|
||||
- core: better embedded streams language detection
|
||||
- core: optimizations
|
||||
- core: extract embedded: fix is_unknown check
|
||||
- core: don't raise exception when subtitle not found inside archive
|
||||
- core: search external subtitles: fix condition
|
||||
- core: better plex transcoder path detection
|
||||
- core: use Log.Warn instead of Log.Warning (#619, #629, #633)
|
||||
- core: also check for "plex transcoder.exe" in case of windows (fixes #619)
|
||||
- core: auto extract: use mbcs encoding for paths on windows
|
||||
- core: Fix issue scandir not returning the name of the file inside Docker images on ARM systems. (thanks @giejay)
|
||||
- core: also clean PYTHONHOME when calling external notification app
|
||||
- core: update certifi to 2019.3.9
|
||||
- core: scan_video: add series/title as alternative by scanning filename itself without parent folders
|
||||
- core: add generic solution for solving captchas using anti captcha services
|
||||
- core: increase cache time to 180d (was: 30d)
|
||||
- core: guess_matches: handle multiple title matches; fixes bazarr#403
|
||||
- windows: fix compatibility issues with plex transcoder
|
||||
- compat: use lowercase paths on subtitle detection
|
||||
- providers: addic7ed: re-enable (using paid anti captch service)
|
||||
- providers: assrt: assume undefined Chinese flavor as Simplified (chs/zho-Hans)
|
||||
- providers: subscene: make it work again by bypassing cf
|
||||
- providers: subscene: don't fail on missing cover
|
||||
- providers: titlovi: re-enable (might need paid anti captch service)
|
||||
- providers: opensubtitles: fix only_foreign handling
|
||||
- providers: opensubtitles: show subtitles with possibly mismatched series when manually listing subs
|
||||
- menu: list subtitles: show subtitles with bad season/episode values as well
|
||||
- refiners: omdb: fix imdb ids with spaces
|
||||
|
||||
|
||||
2.6.4.2911
|
||||
- core: improve file cache (windows especially); use fixed-length cache filenames; fixes #600
|
||||
|
||||
@@ -21,20 +21,21 @@ import support
|
||||
import interface
|
||||
sys.modules["interface"] = interface
|
||||
|
||||
from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
|
||||
from subzero.constants import OS_PLEX_USERAGENT
|
||||
from interface.menu import *
|
||||
from support.plex_media import media_to_videos, get_media_item_ids
|
||||
from support.extract import agent_extract_embedded
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles, store_subtitle_info, get_subtitle_storage
|
||||
from support.storage import save_subtitles, store_subtitle_info
|
||||
from support.items import is_wanted
|
||||
from support.config import config
|
||||
from support.lib import get_intent
|
||||
from support.helpers import track_usage, get_title_for_video_metadata, get_identifier, cast_bool, \
|
||||
audio_streams_match_languages
|
||||
from support.helpers import track_usage, get_title_for_video_metadata, get_identifier, cast_bool
|
||||
from support.history import get_history
|
||||
from support.data import dispatch_migrate
|
||||
from support.activities import activity
|
||||
from support.download import download_best_subtitles
|
||||
from support.localmedia import find_subtitles
|
||||
|
||||
|
||||
def Start():
|
||||
@@ -96,57 +97,7 @@ def Start():
|
||||
|
||||
def update_local_media(videos, ignore_parts_cleanup=None):
|
||||
for video in videos:
|
||||
support.localmedia.find_subtitles(video["plex_part"], ignore_parts_cleanup=ignore_parts_cleanup)
|
||||
|
||||
|
||||
def agent_extract_embedded(video_part_map):
|
||||
try:
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
to_extract = []
|
||||
item_count = 0
|
||||
|
||||
for scanned_video, part_info in video_part_map.iteritems():
|
||||
plexapi_item = scanned_video.plexapi_metadata["item"]
|
||||
stored_subs = subtitle_storage.load_or_new(plexapi_item)
|
||||
valid_langs_in_media = audio_streams_match_languages(scanned_video, config.get_lang_list(ordered=True))
|
||||
|
||||
if not config.lang_list.difference(valid_langs_in_media):
|
||||
Log.Debug("Skipping embedded subtitle extraction for %s, audio streams are in correct language(s)",
|
||||
plexapi_item.rating_key)
|
||||
continue
|
||||
|
||||
for plexapi_part in get_all_parts(plexapi_item):
|
||||
item_count = item_count + 1
|
||||
used_one_unknown_stream = False
|
||||
for requested_language in config.lang_list:
|
||||
embedded_subs = stored_subs.get_by_provider(plexapi_part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(plexapi_part.id, requested_language) or \
|
||||
requested_language in scanned_video.external_subtitle_languages
|
||||
|
||||
if not embedded_subs:
|
||||
stream_data = get_embedded_subtitle_streams(plexapi_part, requested_language=requested_language,
|
||||
skip_unknown=used_one_unknown_stream)
|
||||
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
if stream_data[0]["is_unknown"]:
|
||||
used_one_unknown_stream = True
|
||||
|
||||
to_extract.append(({scanned_video: part_info}, plexapi_part, str(stream.index),
|
||||
str(requested_language), not current))
|
||||
|
||||
if not cast_bool(Prefs["subtitles.search_after_autoextract"]):
|
||||
scanned_video.subtitle_languages.update({requested_language})
|
||||
else:
|
||||
Log.Debug("Skipping embedded subtitle extraction for %s, already got %r from %s",
|
||||
plexapi_item.rating_key, requested_language, embedded_subs[0].id)
|
||||
if to_extract:
|
||||
Log.Info("Triggering extraction of %d embedded subtitles of %d items", len(to_extract), item_count)
|
||||
Thread.Create(multi_extract_embedded, stream_list=to_extract, refresh=True, with_mods=True,
|
||||
single_thread=not config.advanced.auto_extract_multithread)
|
||||
except:
|
||||
Log.Error("Something went wrong when auto-extracting subtitles, continuing: %s", traceback.format_exc())
|
||||
find_subtitles(video["plex_part"], ignore_parts_cleanup=ignore_parts_cleanup)
|
||||
|
||||
|
||||
class SubZeroAgent(object):
|
||||
|
||||
@@ -7,14 +7,16 @@ from subzero.language import Language
|
||||
|
||||
from sub_mod import SubtitleModificationsMenu
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_incl_excl_options, get_item_task_data, \
|
||||
set_refresh_menu_state, route, extract_embedded_sub
|
||||
set_refresh_menu_state, route
|
||||
from support.extract import extract_embedded_sub
|
||||
|
||||
from refresh_item import RefreshItem
|
||||
from subzero.constants import PREFIX
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import timestamp, df, get_language, display_language, get_language_from_stream, is_stream_forced
|
||||
from support.helpers import timestamp, df, get_language, display_language, get_language_from_stream
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub, get_item_title, save_stored_sub
|
||||
from support.plex_media import get_plex_metadata, get_part, get_embedded_subtitle_streams
|
||||
from support.plex_media import get_plex_metadata, get_part, get_embedded_subtitle_streams, is_stream_forced, \
|
||||
update_stream_info
|
||||
from support.scanning import scan_videos
|
||||
from support.scheduler import scheduler
|
||||
from support.storage import get_subtitle_storage
|
||||
@@ -118,6 +120,8 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
if not os.path.exists(part.file):
|
||||
continue
|
||||
|
||||
update_stream_info(part)
|
||||
|
||||
part_id = str(part.id)
|
||||
part_index += 1
|
||||
|
||||
@@ -670,29 +674,28 @@ def ListEmbeddedSubsForItemMenu(**kwargs):
|
||||
stream = stream_data["stream"]
|
||||
is_forced = stream_data["is_forced"]
|
||||
|
||||
if language:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, with_mods=True, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s with default mods",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, with_mods=True, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s with default mods",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
|
||||
@@ -12,19 +12,16 @@ from requests import HTTPError
|
||||
from item_details import ItemDetailsMenu
|
||||
from refresh_item import RefreshItem
|
||||
from menu_helpers import add_incl_excl_options, dig_tree, set_refresh_menu_state, \
|
||||
default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route, \
|
||||
extract_embedded_sub
|
||||
default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route
|
||||
from main import fatality, InclExclMenu
|
||||
from advanced import DispatchRestart
|
||||
from subzero.constants import ART, PREFIX, DEPENDENCY_MODULE_NAMES
|
||||
from support.plex_media import get_all_parts, get_embedded_subtitle_streams
|
||||
from support.extract import season_extract_embedded
|
||||
from support.scheduler import scheduler
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, df, display_language
|
||||
from support.ignore import get_decision_list
|
||||
from support.items import get_all_items, get_items_info, get_item_kind_from_rating_key, get_item, MI_KEY, \
|
||||
get_item_title, get_item_thumb
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.items import get_all_items, get_items_info, get_item_kind_from_rating_key, get_item, get_item_title
|
||||
from support.i18n import _
|
||||
|
||||
# init GUI
|
||||
@@ -174,53 +171,6 @@ def SeasonExtractEmbedded(**kwargs):
|
||||
return MetadataMenu(randomize=timestamp(), title=item_title, **kwargs)
|
||||
|
||||
|
||||
def multi_extract_embedded(stream_list, refresh=False, with_mods=False, single_thread=True, extract_mode="a",
|
||||
history_storage=None):
|
||||
def execute():
|
||||
for video_part_map, plexapi_part, stream_index, language, set_current in stream_list:
|
||||
plexapi_item = video_part_map.keys()[0].plexapi_metadata["item"]
|
||||
|
||||
extract_embedded_sub(rating_key=plexapi_item.rating_key, part_id=plexapi_part.id,
|
||||
plex_item=plexapi_item, part=plexapi_part, scanned_videos=video_part_map,
|
||||
stream_index=stream_index, set_current=set_current,
|
||||
language=language, with_mods=with_mods, refresh=refresh, extract_mode=extract_mode,
|
||||
history_storage=history_storage)
|
||||
|
||||
if single_thread:
|
||||
with Thread.Lock(key="extract_embedded"):
|
||||
execute()
|
||||
else:
|
||||
execute()
|
||||
|
||||
|
||||
def season_extract_embedded(rating_key, requested_language, with_mods=False, force=False):
|
||||
# get stored subtitle info for item id
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
try:
|
||||
for data in get_all_items(key="children", value=rating_key, base="library/metadata"):
|
||||
item = get_item(data[MI_KEY])
|
||||
if item:
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
for part in get_all_parts(item):
|
||||
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(part.id, requested_language)
|
||||
if not embedded_subs or force:
|
||||
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language)
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
set_current = not current or force
|
||||
refresh = not current
|
||||
|
||||
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
|
||||
stream_index=str(stream.index), set_current=set_current,
|
||||
refresh=refresh, language=requested_language, with_mods=with_mods,
|
||||
extract_mode="m")
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
@route(PREFIX + '/ignore_list')
|
||||
def IgnoreListMenu():
|
||||
ref_list = get_decision_list()
|
||||
@@ -368,7 +318,7 @@ def ValidatePrefs():
|
||||
"subtitle_destination_folder", "include", "include_exclude_paths", "include_exclude_sz_files",
|
||||
"new_style_cache", "dbm_supported", "lang_list", "providers", "normal_subs", "forced_only", "forced_also",
|
||||
"plex_transcoder", "refiner_settings", "unrar", "adv_cfg_path", "use_custom_dns",
|
||||
"has_anticaptcha", "anticaptcha_cls"]:
|
||||
"has_anticaptcha", "anticaptcha_cls", "mediainfo_bin"]:
|
||||
|
||||
value = getattr(config, attr)
|
||||
if isinstance(value, dict):
|
||||
|
||||
@@ -1,29 +1,16 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
import types
|
||||
import datetime
|
||||
import subprocess
|
||||
import os
|
||||
import operator
|
||||
|
||||
from func import enable_channel_wrapper, route_wrapper, register_route_function
|
||||
from subzero.lib.io import get_viable_encoding
|
||||
from subzero.language import Language
|
||||
from func import enable_channel_wrapper, route_wrapper
|
||||
from support.i18n import is_localized_string, _
|
||||
from support.items import get_kind, get_item_thumb, get_item, get_item_kind_from_item, refresh_item
|
||||
from support.helpers import get_video_display_title, pad_title, display_language, quote_args, is_stream_forced, \
|
||||
get_title_for_video_metadata, mswindows
|
||||
from support.history import get_history
|
||||
from support.items import get_item_thumb
|
||||
from support.helpers import get_video_display_title, pad_title
|
||||
from support.ignore import get_decision_list
|
||||
from support.lib import get_intent
|
||||
from support.config import config
|
||||
from subzero.constants import ICON_SUB, ICON
|
||||
from support.plex_media import get_part, get_plex_metadata
|
||||
from support.scheduler import scheduler
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles
|
||||
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
|
||||
default_thumb = R(ICON_SUB)
|
||||
main_icon = ICON if not config.is_development else "icon-dev.jpg"
|
||||
@@ -156,89 +143,6 @@ def debounce(func):
|
||||
return func
|
||||
|
||||
|
||||
def extract_embedded_sub(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.pop("part_id")
|
||||
stream_index = kwargs.pop("stream_index")
|
||||
with_mods = kwargs.pop("with_mods", False)
|
||||
language = Language.fromietf(kwargs.pop("language"))
|
||||
refresh = kwargs.pop("refresh", True)
|
||||
set_current = kwargs.pop("set_current", True)
|
||||
|
||||
plex_item = kwargs.pop("plex_item", get_item(rating_key))
|
||||
item_type = get_item_kind_from_item(plex_item)
|
||||
part = kwargs.pop("part", get_part(plex_item, part_id))
|
||||
scanned_videos = kwargs.pop("scanned_videos", None)
|
||||
extract_mode = kwargs.pop("extract_mode", "a")
|
||||
|
||||
any_successful = False
|
||||
|
||||
if part:
|
||||
if not scanned_videos:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
scanned_videos = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if str(stream.index) == stream_index:
|
||||
is_forced = is_stream_forced(stream)
|
||||
bn = os.path.basename(part.file)
|
||||
|
||||
set_refresh_menu_state(_(u"Extracting subtitle %(stream_index)s of %(filename)s",
|
||||
stream_index=stream_index,
|
||||
filename=bn))
|
||||
Log.Info(u"Extracting stream %s (%s) of %s", stream_index, str(language), bn)
|
||||
|
||||
out_codec = stream.codec if stream.codec != "mov_text" else "srt"
|
||||
|
||||
args = [
|
||||
config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", out_codec, "-"
|
||||
]
|
||||
|
||||
cmdline = quote_args(args)
|
||||
Log.Debug(u"Calling: %s", cmdline)
|
||||
if mswindows:
|
||||
Log.Debug("MSWindows: Fixing encoding")
|
||||
cmdline = cmdline.encode("mbcs")
|
||||
|
||||
output = None
|
||||
try:
|
||||
output = subprocess.check_output(cmdline, stderr=subprocess.PIPE, shell=True)
|
||||
except:
|
||||
Log.Error("Extraction failed: %s", traceback.format_exc())
|
||||
|
||||
if output:
|
||||
subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None)
|
||||
subtitle.content = output
|
||||
subtitle.provider_name = "embedded"
|
||||
subtitle.id = "stream_%s" % stream_index
|
||||
subtitle.score = 0
|
||||
subtitle.set_encoding("utf-8")
|
||||
|
||||
# fixme: speedup video; only video.name is needed
|
||||
video = scanned_videos.keys()[0]
|
||||
save_successful = save_subtitles(scanned_videos, {video: [subtitle]}, mode="m",
|
||||
set_current=set_current)
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
if save_successful and refresh:
|
||||
refresh_item(rating_key)
|
||||
|
||||
# add item to history
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata,
|
||||
add_section_title=False, add_episode_title=True)
|
||||
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
thumb=video.plexapi_metadata["super_thumb"],
|
||||
subtitle=subtitle, mode=extract_mode)
|
||||
history.destroy()
|
||||
|
||||
any_successful = True
|
||||
|
||||
return any_successful
|
||||
|
||||
|
||||
class SZObjectContainer(ObjectContainer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
skip_pin_lock = kwargs.pop("skip_pin_lock", False)
|
||||
|
||||
@@ -19,6 +19,10 @@ sys.modules["support.i18n"] = i18n
|
||||
|
||||
helpers._ = i18n._
|
||||
|
||||
import history
|
||||
|
||||
sys.modules["support.history"] = history
|
||||
|
||||
import plex_media
|
||||
sys.modules["support.plex_media"] = plex_media
|
||||
|
||||
@@ -49,6 +53,10 @@ import missing_subtitles
|
||||
|
||||
sys.modules["support.missing_subtitles"] = missing_subtitles
|
||||
|
||||
import extract
|
||||
|
||||
sys.modules["support.extract"] = extract
|
||||
|
||||
import tasks
|
||||
|
||||
sys.modules["support.tasks"] = tasks
|
||||
@@ -57,10 +65,6 @@ import ignore
|
||||
|
||||
sys.modules["support.ignore"] = ignore
|
||||
|
||||
import history
|
||||
|
||||
sys.modules["support.history"] = history
|
||||
|
||||
import data
|
||||
|
||||
sys.modules["support.data"] = data
|
||||
|
||||
@@ -8,12 +8,15 @@ import sys
|
||||
import rarfile
|
||||
import jstyleson
|
||||
import datetime
|
||||
import stat
|
||||
import traceback
|
||||
|
||||
import subliminal
|
||||
import subliminal_patch
|
||||
import subzero.constants
|
||||
import lib
|
||||
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError
|
||||
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError, \
|
||||
DownloadLimitPerDayExceeded
|
||||
from subliminal_patch.core import is_windows_special_path
|
||||
from whichdb import whichdb
|
||||
|
||||
@@ -22,6 +25,7 @@ from subzero.language import Language
|
||||
from subliminal.cli import MutexLock
|
||||
from subzero.lib.io import FileIO, get_viable_encoding
|
||||
from subzero.lib.dict import Dicked
|
||||
from subzero.lib.which import find_executable
|
||||
from subzero.util import get_root_path
|
||||
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW, MEDIA_TYPE_TO_STRING
|
||||
from subzero.prefs import get_user_prefs, update_user_prefs
|
||||
@@ -58,14 +62,17 @@ def int_or_default(s, default):
|
||||
return default
|
||||
|
||||
|
||||
VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable, APIThrottled)
|
||||
VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, DownloadLimitPerDayExceeded,
|
||||
ServiceUnavailable, APIThrottled)
|
||||
|
||||
PROVIDER_THROTTLE_MAP = {
|
||||
"default": {
|
||||
TooManyRequests: (datetime.timedelta(hours=1), "1 hour"),
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
|
||||
DownloadLimitPerDayExceeded: (datetime.timedelta(hours=4), "4 hours"),
|
||||
ServiceUnavailable: (datetime.timedelta(minutes=20), "20 minutes"),
|
||||
APIThrottled: (datetime.timedelta(minutes=10), "10 minutes"),
|
||||
AuthenticationError: (datetime.timedelta(hours=2), "2 hours"),
|
||||
},
|
||||
"opensubtitles": {
|
||||
TooManyRequests: (datetime.timedelta(hours=3), "3 hours"),
|
||||
@@ -75,6 +82,7 @@ PROVIDER_THROTTLE_MAP = {
|
||||
"addic7ed": {
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
|
||||
TooManyRequests: (datetime.timedelta(minutes=5), "5 minutes"),
|
||||
AuthenticationError: (datetime.timedelta(hours=24), "24 hours"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,6 +161,7 @@ class Config(object):
|
||||
anticaptcha_token = None
|
||||
anticaptcha_cls = None
|
||||
has_anticaptcha = False
|
||||
mediainfo_bin = None
|
||||
|
||||
store_recently_played_amount = 40
|
||||
|
||||
@@ -239,6 +248,8 @@ class Config(object):
|
||||
self.embedded_auto_extract = cast_bool(Prefs["subtitles.embedded.autoextract"])
|
||||
self.ietf_as_alpha3 = cast_bool(Prefs["subtitles.language.ietf_normalize"])
|
||||
self.use_custom_dns = self.parse_custom_dns()
|
||||
if not self.advanced.dont_use_mediainfo_mp4:
|
||||
self.mediainfo_bin = self.advanced.mediainfo_bin or find_executable("mediainfo")
|
||||
self.initialized = True
|
||||
|
||||
def migrate_prefs(self):
|
||||
@@ -323,6 +334,19 @@ class Config(object):
|
||||
for exe in try_executables:
|
||||
rarfile.UNRAR_TOOL = exe
|
||||
rarfile.ORIG_UNRAR_TOOL = exe
|
||||
if os.path.isfile(exe) and not os.access(exe, os.X_OK):
|
||||
st = os.stat(exe)
|
||||
try:
|
||||
Log.Debug("setting generic executable permissions for %s", exe)
|
||||
# fixme: too broad?
|
||||
os.chmod(exe, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
||||
except:
|
||||
Log.Debug("failed setting generic executable permissions for %s: %s", exe, traceback.format_exc())
|
||||
try:
|
||||
Log.Debug("setting executable permissions for %s", exe)
|
||||
os.chmod(exe, st.st_mode | stat.S_IEXEC)
|
||||
except:
|
||||
Log.Debug("failed setting executable permissions for %s: %s", exe, traceback.format_exc())
|
||||
try:
|
||||
rarfile.custom_check([rarfile.UNRAR_TOOL], True)
|
||||
except:
|
||||
@@ -761,6 +785,7 @@ class Config(object):
|
||||
return {'opensubtitles': cast_bool(Prefs['provider.opensubtitles.enabled']),
|
||||
# 'thesubdb': Prefs['provider.thesubdb.enabled'],
|
||||
'podnapisi': cast_bool(Prefs['provider.podnapisi.enabled']),
|
||||
'napisy24': cast_bool(Prefs['provider.napisy24.enabled']),
|
||||
'titlovi': cast_bool(Prefs['provider.titlovi.enabled']),
|
||||
'addic7ed': cast_bool(Prefs['provider.addic7ed.enabled']) and self.has_anticaptcha,
|
||||
'tvsubtitles': cast_bool(Prefs['provider.tvsubtitles.enabled']),
|
||||
@@ -773,7 +798,9 @@ class Config(object):
|
||||
'argenteam': cast_bool(Prefs['provider.argenteam.enabled']),
|
||||
'subscenter': False,
|
||||
'assrt': cast_bool(Prefs['provider.assrt.enabled']),
|
||||
}
|
||||
'bsplayer': cast_bool(Prefs['provider.bsplayer.enabled']),
|
||||
'screwzira': cast_bool(Prefs['provider.screwzira.enabled']),
|
||||
}
|
||||
|
||||
@property
|
||||
def providers_enabled(self):
|
||||
@@ -801,6 +828,9 @@ class Config(object):
|
||||
providers["argenteam"] = False
|
||||
providers["assrt"] = False
|
||||
providers["subscene"] = False
|
||||
providers["napisy24"] = False
|
||||
providers["bsplayer"] = False
|
||||
providers["screwzira"] = False
|
||||
providers_forced_off = dict(providers)
|
||||
|
||||
if not self.unrar and providers["legendastv"]:
|
||||
@@ -841,14 +871,12 @@ class Config(object):
|
||||
providers = property(get_providers)
|
||||
|
||||
def get_provider_settings(self):
|
||||
os_use_https = self.advanced.providers.opensubtitles.use_https \
|
||||
if self.advanced.providers.opensubtitles.use_https is not None else True
|
||||
|
||||
os_skip_wrong_fps = self.advanced.providers.opensubtitles.skip_wrong_fps \
|
||||
if self.advanced.providers.opensubtitles.skip_wrong_fps is not None else True
|
||||
os_use_https = self.advanced.providers.opensubtitles.get("use_https", True)
|
||||
os_skip_wrong_fps = self.advanced.providers.opensubtitles.get("skip_wrong_fps", True)
|
||||
|
||||
provider_settings = {'addic7ed': {'username': Prefs['provider.addic7ed.username'],
|
||||
'password': Prefs['provider.addic7ed.password'],
|
||||
'is_vip': cast_bool(Prefs['provider.addic7ed.is_vip']),
|
||||
},
|
||||
'opensubtitles': {'username': Prefs['provider.opensubtitles.username'],
|
||||
'password': Prefs['provider.opensubtitles.password'],
|
||||
@@ -864,8 +892,18 @@ class Config(object):
|
||||
'only_foreign': self.forced_only,
|
||||
'also_foreign': self.forced_also,
|
||||
},
|
||||
'titlovi': {
|
||||
'username': Prefs['provider.titlovi.username'],
|
||||
'password': Prefs['provider.titlovi.password'],
|
||||
},
|
||||
'napisy24': {
|
||||
'username': Prefs['provider.napisy24.username'],
|
||||
'password': Prefs['provider.napisy24.password'],
|
||||
},
|
||||
'subscene': {
|
||||
'only_foreign': self.forced_only,
|
||||
'username': Prefs['provider.subscene.username'],
|
||||
'password': Prefs['provider.subscene.password'],
|
||||
},
|
||||
'legendastv': {'username': Prefs['provider.legendastv.username'],
|
||||
'password': Prefs['provider.legendastv.password'],
|
||||
@@ -894,10 +932,10 @@ class Config(object):
|
||||
throttle_data = PROVIDER_THROTTLE_MAP.get(name, PROVIDER_THROTTLE_MAP["default"]).get(cls, None) or \
|
||||
PROVIDER_THROTTLE_MAP["default"].get(cls, None)
|
||||
|
||||
if not throttle_data:
|
||||
return
|
||||
|
||||
throttle_delta, throttle_description = throttle_data
|
||||
if throttle_data:
|
||||
throttle_delta, throttle_description = throttle_data
|
||||
else:
|
||||
throttle_delta, throttle_description = datetime.timedelta(minutes=10), "10 minutes"
|
||||
|
||||
if "provider_throttle" not in Dict:
|
||||
Dict["provider_throttle"] = {}
|
||||
@@ -1083,11 +1121,13 @@ class Config(object):
|
||||
|
||||
def parse_custom_dns(self):
|
||||
custom_dns = Prefs['use_custom_dns2'].strip()
|
||||
if custom_dns:
|
||||
os.environ["dns_resolvers"] = ""
|
||||
|
||||
if custom_dns and custom_dns != "system":
|
||||
ips = filter(lambda x: x, [d.strip() for d in custom_dns.split(",")])
|
||||
if ips:
|
||||
os.environ["dns_resolvers"] = json.dumps(ips)
|
||||
return os.environ["dns_resolvers"]
|
||||
return os.environ["dns_resolvers"]
|
||||
|
||||
def init_subliminal_patches(self):
|
||||
# configure custom subtitle destination folders for scanning pre-existing subs
|
||||
|
||||
@@ -33,14 +33,14 @@ def get_missing_languages(video, part):
|
||||
alpha3_map = {}
|
||||
if config.ietf_as_alpha3:
|
||||
for language in languages:
|
||||
if language.country:
|
||||
if language and language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
have_languages = video.subtitle_languages.copy()
|
||||
if config.ietf_as_alpha3:
|
||||
for language in have_languages:
|
||||
if language.country:
|
||||
if language and language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
@@ -53,14 +53,14 @@ def get_missing_languages(video, part):
|
||||
filter(lambda l: not l.forced, video.subtitle_languages)
|
||||
if langs:
|
||||
Log.Debug("We have at least one subtitle for any configured language.")
|
||||
return False
|
||||
return set()
|
||||
|
||||
elif "External subtitle" in config.any_language_is_enough:
|
||||
langs = video.subtitle_languages if not not_in_forced else \
|
||||
langs = video.external_subtitle_languages if not not_in_forced else \
|
||||
filter(lambda l: not l.forced, video.external_subtitle_languages)
|
||||
if langs:
|
||||
Log.Debug("We have at least one external subtitle for any configured language.")
|
||||
return False
|
||||
return set()
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
@@ -70,7 +70,7 @@ def get_missing_languages(video, part):
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
return False
|
||||
return set()
|
||||
|
||||
# re-add country codes to the missing languages, in case we've removed them above
|
||||
if config.ietf_as_alpha3:
|
||||
@@ -106,21 +106,22 @@ def language_hook(provider):
|
||||
def download_best_subtitles(video_part_map, min_score=0, throttle_time=None, providers=None):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
languages = set([Language.rebuild(l) for l in config.lang_list])
|
||||
missing_languages = []
|
||||
if not languages:
|
||||
return
|
||||
|
||||
use_videos = []
|
||||
missing_languages = set()
|
||||
for video, part in video_part_map.iteritems():
|
||||
if not video.ignore_all:
|
||||
missing_languages = get_missing_languages(video, part)
|
||||
p_missing_languages = get_missing_languages(video, part)
|
||||
else:
|
||||
missing_languages = languages
|
||||
p_missing_languages = languages
|
||||
|
||||
if missing_languages:
|
||||
Log.Info(u"%s has missing languages: %s", os.path.basename(video.name), missing_languages)
|
||||
if p_missing_languages:
|
||||
Log.Info(u"%s has missing languages: %s", os.path.basename(video.name), p_missing_languages)
|
||||
refine_video(video, refiner_settings=config.refiner_settings)
|
||||
use_videos.append(video)
|
||||
missing_languages.update(p_missing_languages)
|
||||
|
||||
# prepare blacklist
|
||||
blacklist = get_blacklist_from_part_map(video_part_map, languages)
|
||||
|
||||
@@ -0,0 +1,208 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
import subprocess
|
||||
import traceback
|
||||
|
||||
from support.helpers import quote_args, mswindows, get_title_for_video_metadata, cast_bool, \
|
||||
audio_streams_match_languages
|
||||
from support.i18n import _
|
||||
from support.items import get_item_kind_from_item, refresh_item, get_all_items, get_item, MI_KEY
|
||||
from support.storage import get_subtitle_storage, save_subtitles
|
||||
from support.config import config
|
||||
from support.history import get_history
|
||||
from support.plex_media import get_all_parts, get_embedded_subtitle_streams, get_part, get_plex_metadata, \
|
||||
update_stream_info, is_stream_forced
|
||||
from support.scanning import scan_videos
|
||||
from subzero.language import Language
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
|
||||
|
||||
def agent_extract_embedded(video_part_map, set_as_existing=False):
|
||||
try:
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
to_extract = []
|
||||
item_count = 0
|
||||
|
||||
threads = []
|
||||
|
||||
for scanned_video, part_info in video_part_map.iteritems():
|
||||
plexapi_item = scanned_video.plexapi_metadata["item"]
|
||||
stored_subs = subtitle_storage.load_or_new(plexapi_item)
|
||||
valid_langs_in_media = \
|
||||
audio_streams_match_languages(scanned_video, config.get_lang_list(ordered=True))
|
||||
|
||||
if not config.lang_list.difference(valid_langs_in_media):
|
||||
Log.Debug("Skipping embedded subtitle extraction for %s, audio streams are in correct language(s)",
|
||||
plexapi_item.rating_key)
|
||||
continue
|
||||
|
||||
for plexapi_part in get_all_parts(plexapi_item):
|
||||
item_count = item_count + 1
|
||||
used_one_unknown_stream = False
|
||||
used_one_known_stream = False
|
||||
for requested_language in config.lang_list:
|
||||
skip_unknown = used_one_unknown_stream or used_one_known_stream
|
||||
embedded_subs = stored_subs.get_by_provider(plexapi_part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(plexapi_part.id, requested_language) or \
|
||||
requested_language in scanned_video.external_subtitle_languages
|
||||
|
||||
if not embedded_subs:
|
||||
stream_data = get_embedded_subtitle_streams(plexapi_part, requested_language=requested_language,
|
||||
skip_unknown=skip_unknown)
|
||||
|
||||
if stream_data and stream_data[0]["language"]:
|
||||
stream = stream_data[0]["stream"]
|
||||
if stream_data[0]["is_unknown"]:
|
||||
used_one_unknown_stream = True
|
||||
else:
|
||||
used_one_known_stream = True
|
||||
|
||||
to_extract.append(({scanned_video: part_info}, plexapi_part, str(stream.index),
|
||||
str(requested_language), not current))
|
||||
|
||||
if not cast_bool(Prefs["subtitles.search_after_autoextract"]) or set_as_existing:
|
||||
scanned_video.subtitle_languages.update({requested_language})
|
||||
else:
|
||||
Log.Debug("Skipping embedded subtitle extraction for %s, already got %r from %s",
|
||||
plexapi_item.rating_key, requested_language, embedded_subs[0].id)
|
||||
if to_extract:
|
||||
Log.Info("Triggering extraction of %d embedded subtitles of %d items", len(to_extract), item_count)
|
||||
threads.append(Thread.Create(multi_extract_embedded, stream_list=to_extract, refresh=True, with_mods=True,
|
||||
single_thread=not config.advanced.auto_extract_multithread))
|
||||
return threads
|
||||
except:
|
||||
Log.Error("Something went wrong when auto-extracting subtitles, continuing: %s", traceback.format_exc())
|
||||
|
||||
|
||||
def multi_extract_embedded(stream_list, refresh=False, with_mods=False, single_thread=True, extract_mode="a",
|
||||
history_storage=None):
|
||||
def execute():
|
||||
for video_part_map, plexapi_part, stream_index, language, set_current in stream_list:
|
||||
plexapi_item = video_part_map.keys()[0].plexapi_metadata["item"]
|
||||
|
||||
extract_embedded_sub(rating_key=plexapi_item.rating_key, part_id=plexapi_part.id,
|
||||
plex_item=plexapi_item, part=plexapi_part, scanned_videos=video_part_map,
|
||||
stream_index=stream_index, set_current=set_current,
|
||||
language=language, with_mods=with_mods, refresh=refresh, extract_mode=extract_mode,
|
||||
history_storage=history_storage)
|
||||
|
||||
if single_thread:
|
||||
with Thread.Lock(key="extract_embedded"):
|
||||
execute()
|
||||
else:
|
||||
execute()
|
||||
|
||||
|
||||
def season_extract_embedded(rating_key, requested_language, with_mods=False, force=False):
|
||||
# get stored subtitle info for item id
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
try:
|
||||
for data in get_all_items(key="children", value=rating_key, base="library/metadata"):
|
||||
item = get_item(data[MI_KEY])
|
||||
if item:
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
for part in get_all_parts(item):
|
||||
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(part.id, requested_language)
|
||||
if not embedded_subs or force:
|
||||
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language)
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
set_current = not current or force
|
||||
refresh = not current
|
||||
|
||||
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
|
||||
stream_index=str(stream.index), set_current=set_current,
|
||||
refresh=refresh, language=requested_language, with_mods=with_mods,
|
||||
extract_mode="m")
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
def extract_embedded_sub(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.pop("part_id")
|
||||
stream_index = kwargs.pop("stream_index")
|
||||
with_mods = kwargs.pop("with_mods", False)
|
||||
language = Language.fromietf(kwargs.pop("language"))
|
||||
refresh = kwargs.pop("refresh", True)
|
||||
set_current = kwargs.pop("set_current", True)
|
||||
|
||||
plex_item = kwargs.pop("plex_item", get_item(rating_key))
|
||||
item_type = get_item_kind_from_item(plex_item)
|
||||
part = kwargs.pop("part", get_part(plex_item, part_id))
|
||||
scanned_videos = kwargs.pop("scanned_videos", None)
|
||||
extract_mode = kwargs.pop("extract_mode", "a")
|
||||
|
||||
any_successful = False
|
||||
|
||||
from interface.menu_helpers import set_refresh_menu_state
|
||||
|
||||
if part:
|
||||
if not scanned_videos:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
scanned_videos = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
|
||||
update_stream_info(part)
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if str(stream.index) == stream_index:
|
||||
is_forced = is_stream_forced(stream)
|
||||
bn = os.path.basename(part.file)
|
||||
|
||||
set_refresh_menu_state(_(u"Extracting subtitle %(stream_index)s of %(filename)s",
|
||||
stream_index=stream_index,
|
||||
filename=bn))
|
||||
Log.Info(u"Extracting stream %s (%s) of %s", stream_index, str(language), bn)
|
||||
|
||||
out_codec = stream.codec if stream.codec != "mov_text" else "srt"
|
||||
|
||||
args = [
|
||||
config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", out_codec, "-"
|
||||
]
|
||||
|
||||
cmdline = quote_args(args)
|
||||
Log.Debug(u"Calling: %s", cmdline)
|
||||
if mswindows:
|
||||
Log.Debug("MSWindows: Fixing encoding")
|
||||
cmdline = cmdline.encode("mbcs")
|
||||
|
||||
output = None
|
||||
try:
|
||||
output = subprocess.check_output(cmdline, stderr=subprocess.PIPE, shell=True)
|
||||
except:
|
||||
Log.Error("Extraction failed: %s", traceback.format_exc())
|
||||
|
||||
if output:
|
||||
subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None)
|
||||
subtitle.content = output
|
||||
subtitle.provider_name = "embedded"
|
||||
subtitle.id = "stream_%s" % stream_index
|
||||
subtitle.score = 0
|
||||
subtitle.set_encoding("utf-8")
|
||||
|
||||
# fixme: speedup video; only video.name is needed
|
||||
video = scanned_videos.keys()[0]
|
||||
save_successful = save_subtitles(scanned_videos, {video: [subtitle]}, mode="m",
|
||||
set_current=set_current)
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
if save_successful and refresh:
|
||||
refresh_item(rating_key)
|
||||
|
||||
# add item to history
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata,
|
||||
add_section_title=False, add_episode_title=True)
|
||||
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
thumb=video.plexapi_metadata["super_thumb"],
|
||||
subtitle=subtitle, mode=extract_mode)
|
||||
history.destroy()
|
||||
|
||||
any_successful = True
|
||||
|
||||
return any_successful
|
||||
@@ -394,7 +394,7 @@ def get_language_from_stream(lang_code):
|
||||
return Language.fromietf(lang)
|
||||
elif lang:
|
||||
try:
|
||||
return language_from_stream(lang)
|
||||
return language_from_stream(lang_code)
|
||||
except LanguageError:
|
||||
pass
|
||||
|
||||
@@ -437,17 +437,10 @@ def get_language(lang_short):
|
||||
|
||||
|
||||
def display_language(l):
|
||||
if not l:
|
||||
return "Unknown"
|
||||
return _(str(l.basename).lower()) + ((u" (%s)" % _("forced")) if l.forced else "")
|
||||
|
||||
|
||||
def is_stream_forced(stream):
|
||||
stream_title = getattr(stream, "title", "") or ""
|
||||
forced = getattr(stream, "forced", False)
|
||||
if not forced and stream_title and "forced" in stream_title.strip().lower():
|
||||
forced = True
|
||||
|
||||
return forced
|
||||
|
||||
|
||||
class PartUnknownException(Exception):
|
||||
pass
|
||||
|
||||
@@ -7,6 +7,7 @@ import helpers
|
||||
import subtitlehelpers
|
||||
|
||||
from config import config as sz_config
|
||||
from subzero.language import ENDSWITH_LANGUAGECODE_RE
|
||||
|
||||
|
||||
SECONDARY_TAGS = ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom', 'hi', 'cc', 'sdh']
|
||||
@@ -125,7 +126,7 @@ def find_subtitles(part, ignore_parts_cleanup=None):
|
||||
root = split_tag[0]
|
||||
|
||||
# get associated media file name without language
|
||||
sub_fn = subtitlehelpers.ENDSWITH_LANGUAGECODE_RE.sub("", root)
|
||||
sub_fn = ENDSWITH_LANGUAGECODE_RE.sub("", root)
|
||||
|
||||
# subtitle basename and basename without possible language tag not found in collected
|
||||
# media files? kill.
|
||||
|
||||
@@ -7,7 +7,8 @@ import os
|
||||
from babelfish import LanguageReverseError
|
||||
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import get_plex_item_display_title, cast_bool, get_language_from_stream, is_stream_forced
|
||||
from support.helpers import get_plex_item_display_title, cast_bool, get_language_from_stream
|
||||
from support.plex_media import is_stream_forced, update_stream_info
|
||||
from support.items import get_item
|
||||
from support.lib import Plex
|
||||
from support.storage import get_subtitle_storage
|
||||
@@ -35,7 +36,7 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
for media in item.media:
|
||||
existing_subs = {"internal": [], "external": [], "own_external": [], "count": 0}
|
||||
for part in media.parts:
|
||||
|
||||
update_stream_info(part)
|
||||
# did we already download an external subtitle before?
|
||||
if subtitle_target_dir and stored_subs:
|
||||
for language in languages_set:
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# coding=utf-8
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
import helpers
|
||||
from items import get_item
|
||||
@@ -26,6 +27,9 @@ tvdb_guid_identifier = "com.plexapp.agents.thetvdb://"
|
||||
|
||||
|
||||
def get_plexapi_stream_info(plex_item, part_id=None):
|
||||
if not plex_item:
|
||||
return
|
||||
|
||||
d = {"stream": {}}
|
||||
data = d["stream"]
|
||||
|
||||
@@ -100,6 +104,9 @@ def media_to_videos(media, kind="series"):
|
||||
plex_episode = get_item(ep.id)
|
||||
stream_info = get_plexapi_stream_info(plex_episode)
|
||||
|
||||
if not stream_info:
|
||||
continue
|
||||
|
||||
for item in media.seasons[season].episodes[episode].items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
@@ -121,22 +128,24 @@ def media_to_videos(media, kind="series"):
|
||||
)
|
||||
else:
|
||||
stream_info = get_plexapi_stream_info(plex_item)
|
||||
imdb_id = None
|
||||
if imdb_guid_identifier in media.guid:
|
||||
imdb_id = media.guid[len(imdb_guid_identifier):].split("?")[0]
|
||||
for item in media.items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
get_metadata_dict(plex_item, part, dict(stream_info, **{"plex_part": part, "type": "movie",
|
||||
"title": media.title, "id": media.id,
|
||||
"super_thumb": plex_item.thumb,
|
||||
"series_id": None, "year": year,
|
||||
"season_id": None, "imdb_id": imdb_id,
|
||||
"original_title": original_title,
|
||||
"series_tvdb_id": None, "tvdb_id": None,
|
||||
"section": plex_item.section.title})
|
||||
)
|
||||
)
|
||||
|
||||
if stream_info:
|
||||
imdb_id = None
|
||||
if imdb_guid_identifier in media.guid:
|
||||
imdb_id = media.guid[len(imdb_guid_identifier):].split("?")[0]
|
||||
for item in media.items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
get_metadata_dict(plex_item, part, dict(stream_info, **{"plex_part": part, "type": "movie",
|
||||
"title": media.title, "id": media.id,
|
||||
"super_thumb": plex_item.thumb,
|
||||
"series_id": None, "year": year,
|
||||
"season_id": None, "imdb_id": imdb_id,
|
||||
"original_title": original_title,
|
||||
"series_tvdb_id": None, "tvdb_id": None,
|
||||
"section": plex_item.section.title})
|
||||
)
|
||||
)
|
||||
return videos
|
||||
|
||||
|
||||
@@ -174,42 +183,99 @@ def get_all_parts(plex_item):
|
||||
return parts
|
||||
|
||||
|
||||
def update_stream_info(part):
|
||||
try:
|
||||
return update_stream_info_(part)
|
||||
except:
|
||||
Log.Exception("Getting Mediainfo failed for: %s", part.file)
|
||||
|
||||
|
||||
def update_stream_info_(part):
|
||||
if config.mediainfo_bin and part.container == "mp4":
|
||||
cmdline = '%s --Inform="Text;-%%ID%%_%%Title%%" %s' % (config.mediainfo_bin, helpers.quote(part.file))
|
||||
result = subprocess.check_output(cmdline, stderr=subprocess.PIPE, shell=True)
|
||||
if result:
|
||||
try:
|
||||
stream_titles = {}
|
||||
for pair in result[1:].split("-"):
|
||||
sid, title = pair.split("_")
|
||||
stream_titles[int(sid.strip())] = title.strip()
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
filled = []
|
||||
for stream in part.streams:
|
||||
if stream.index is None:
|
||||
Log.Debug("Found stream with no index: %r", stream)
|
||||
|
||||
index = stream.index+1 if stream.index is not None else 1
|
||||
if index in stream_titles:
|
||||
stream.title = stream_titles[index]
|
||||
filled.append(index-1)
|
||||
if filled:
|
||||
Log.Debug("Filled missing MP4 stream title info for streams: %s", filled)
|
||||
|
||||
|
||||
def is_stream_forced(stream):
|
||||
stream_title = getattr(stream, "title", "") or ""
|
||||
forced = getattr(stream, "forced", False)
|
||||
if not forced and stream_title and "forced" in stream_title.strip().lower():
|
||||
forced = True
|
||||
|
||||
return forced
|
||||
|
||||
|
||||
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, skip_unknown=False):
|
||||
streams = []
|
||||
streams_unknown = []
|
||||
all_streams = []
|
||||
has_unknown = False
|
||||
found_requested_language = False
|
||||
update_stream_info(part)
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
|
||||
is_forced = helpers.is_stream_forced(stream)
|
||||
is_forced = is_stream_forced(stream)
|
||||
language = helpers.get_language_from_stream(stream.language_code)
|
||||
if language:
|
||||
language = Language.rebuild(language, forced=is_forced)
|
||||
|
||||
is_unknown = False
|
||||
found_requested_language = requested_language and requested_language == language
|
||||
stream_data = None
|
||||
|
||||
if not language and config.treat_und_as_first:
|
||||
if not language:
|
||||
# only consider first unknown subtitle stream
|
||||
if has_unknown and skip_duplicate_unknown:
|
||||
continue
|
||||
if config.treat_und_as_first:
|
||||
if has_unknown and skip_duplicate_unknown:
|
||||
Log.Debug("skipping duplicate unknown")
|
||||
continue
|
||||
|
||||
language = Language.rebuild(list(config.lang_list)[0], forced=is_forced)
|
||||
language = Language.rebuild(list(config.lang_list)[0], forced=is_forced)
|
||||
else:
|
||||
language = None
|
||||
is_unknown = True
|
||||
has_unknown = True
|
||||
streams_unknown.append({"stream": stream, "is_unknown": is_unknown, "language": language,
|
||||
"is_forced": is_forced})
|
||||
stream_data = {"stream": stream, "is_unknown": is_unknown, "language": language,
|
||||
"is_forced": is_forced}
|
||||
streams_unknown.append(stream_data)
|
||||
|
||||
if not requested_language or found_requested_language:
|
||||
streams.append({"stream": stream, "is_unknown": is_unknown, "language": language,
|
||||
"is_forced": is_forced})
|
||||
stream_data = {"stream": stream, "is_unknown": is_unknown, "language": language,
|
||||
"is_forced": is_forced}
|
||||
streams.append(stream_data)
|
||||
|
||||
if found_requested_language:
|
||||
break
|
||||
|
||||
if streams_unknown and not found_requested_language and not skip_unknown:
|
||||
streams = streams_unknown
|
||||
if stream_data:
|
||||
all_streams.append(stream_data)
|
||||
|
||||
if requested_language:
|
||||
if streams_unknown and not found_requested_language and not skip_unknown:
|
||||
streams = streams_unknown
|
||||
else:
|
||||
streams = all_streams
|
||||
|
||||
return streams
|
||||
|
||||
@@ -245,6 +311,9 @@ def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
|
||||
|
||||
stream_info = get_plexapi_stream_info(plex_item, part_id)
|
||||
|
||||
if not stream_info:
|
||||
return
|
||||
|
||||
# get normalized metadata
|
||||
# fixme: duplicated logic of media_to_videos
|
||||
if item_type == "episode":
|
||||
@@ -366,3 +435,4 @@ class PMSMediaProxy(object):
|
||||
|
||||
m = m.children[0]
|
||||
return parts
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import helpers
|
||||
from babelfish.exceptions import LanguageError
|
||||
|
||||
from support.lib import Plex, get_intent
|
||||
from support.plex_media import get_stream_fps
|
||||
from support.plex_media import get_stream_fps, is_stream_forced, update_stream_info
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.subtitlehelpers import get_subtitles_from_metadata
|
||||
@@ -29,7 +29,7 @@ def prepare_video(pms_video_info, ignore_all=False, hints=None, rating_key=None,
|
||||
if ignore_all:
|
||||
Log.Debug("Force refresh intended.")
|
||||
|
||||
Log.Debug("Detecting streams: %s, external_subtitles=%s, embedded_subtitles=%s" % (
|
||||
Log.Debug("Detecting streams: %s, account_for_external_subtitles=%s, account_for_embedded_subtitles=%s" % (
|
||||
plex_part.file, external_subtitles, embedded_subtitles))
|
||||
|
||||
known_embedded = []
|
||||
@@ -46,23 +46,25 @@ def prepare_video(pms_video_info, ignore_all=False, hints=None, rating_key=None,
|
||||
# fixme: skip the whole scanning process if known_embedded == wanted languages?
|
||||
audio_languages = []
|
||||
if plexpy_part:
|
||||
update_stream_info(plexpy_part)
|
||||
for stream in plexpy_part.streams:
|
||||
if stream.stream_type == 2:
|
||||
lang = None
|
||||
try:
|
||||
lang = language_from_stream(stream.language_code)
|
||||
except LanguageError:
|
||||
Log.Debug("Couldn't detect embedded audio stream language: %s", stream.language_code)
|
||||
Log.Info("Couldn't detect embedded audio stream language: %s", stream.language_code)
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = Language.rebuild(list(config.lang_list)[0])
|
||||
Log.Info("Assuming language %s for audio stream: %s", lang, getattr(stream, "index", None))
|
||||
|
||||
audio_languages.append(lang)
|
||||
|
||||
# subtitle stream
|
||||
elif stream.stream_type == 3 and embedded_subtitles:
|
||||
is_forced = helpers.is_stream_forced(stream)
|
||||
is_forced = is_stream_forced(stream)
|
||||
|
||||
if ((config.forced_only or config.forced_also) and is_forced) or not is_forced:
|
||||
# embedded subtitle
|
||||
@@ -73,11 +75,13 @@ def prepare_video(pms_video_info, ignore_all=False, hints=None, rating_key=None,
|
||||
try:
|
||||
lang = language_from_stream(stream.language_code)
|
||||
except LanguageError:
|
||||
Log.Debug("Couldn't detect embedded subtitle stream language: %s", stream.language_code)
|
||||
Log.Info("Couldn't detect embedded subtitle stream language: %s", stream.language_code)
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = Language.rebuild(list(config.lang_list)[0])
|
||||
Log.Info("Assuming language %s for subtitle stream: %s", lang,
|
||||
getattr(stream, "index", None))
|
||||
|
||||
if lang:
|
||||
if is_forced:
|
||||
@@ -127,7 +131,8 @@ def prepare_video(pms_video_info, ignore_all=False, hints=None, rating_key=None,
|
||||
set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles,
|
||||
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
stored_subs=stored_subs, languages=config.lang_list,
|
||||
only_one=config.only_one, known_metadata_subs=known_metadata_subs)
|
||||
only_one=config.only_one, known_metadata_subs=known_metadata_subs,
|
||||
match_strictness=config.ext_match_strictness)
|
||||
|
||||
# add video fps info
|
||||
video.fps = plex_part.fps
|
||||
|
||||
@@ -5,6 +5,7 @@ import helpers
|
||||
|
||||
from config import config, SUBTITLE_EXTS, TEXT_SUBTITLE_EXTS
|
||||
from bs4 import UnicodeDammit
|
||||
from subzero.language import match_ietf_language
|
||||
|
||||
|
||||
class SubtitleHelper(object):
|
||||
@@ -85,19 +86,6 @@ class VobSubSubtitleHelper(SubtitleHelper):
|
||||
#####################################################################################################################
|
||||
|
||||
|
||||
IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
|
||||
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
|
||||
|
||||
|
||||
def match_ietf_language(s):
|
||||
language_match = re.match(".+\.([^\.]+)$" if not helpers.cast_bool(Prefs["subtitles.language.ietf_display"])
|
||||
else IETF_MATCH, s)
|
||||
if language_match and len(language_match.groups()) == 1:
|
||||
language = language_match.groups()[0]
|
||||
return language
|
||||
return s
|
||||
|
||||
|
||||
class DefaultSubtitleHelper(SubtitleHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, filename):
|
||||
@@ -133,7 +121,7 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
# Attempt to extract the language from the filename (e.g. Avatar (2009).eng)
|
||||
# IETF support thanks to
|
||||
# https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
|
||||
lang_part = match_ietf_language(file)
|
||||
lang_part = match_ietf_language(file, ietf=helpers.cast_bool(Prefs["subtitles.language.ietf_display"]))
|
||||
if lang_part != file:
|
||||
language = Locale.Language.Match(lang_part)
|
||||
elif config.only_one:
|
||||
|
||||
@@ -19,6 +19,7 @@ from support.config import config
|
||||
from support.items import get_recent_items, get_item, is_wanted, get_item_title
|
||||
from support.helpers import track_usage, get_title_for_video_metadata, cast_bool, PartUnknownException
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.extract import agent_extract_embedded
|
||||
from support.scanning import scan_videos
|
||||
from support.i18n import _
|
||||
from download import download_best_subtitles, pre_download_hook, post_download_hook, language_hook
|
||||
@@ -170,12 +171,15 @@ class SubtitleListingMixin(object):
|
||||
else:
|
||||
s.wrong_season_ep = True
|
||||
|
||||
orig_matches = matches.copy()
|
||||
score, score_without_hash = compute_score(matches, s, video, hearing_impaired=use_hearing_impaired)
|
||||
|
||||
unsorted_subtitles.append(
|
||||
(s, compute_score(matches, s, video, hearing_impaired=use_hearing_impaired), matches))
|
||||
scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1), reverse=True)
|
||||
(s, score, score_without_hash, matches, orig_matches))
|
||||
scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1, 2), reverse=True)
|
||||
|
||||
subtitles = []
|
||||
for subtitle, score, matches in scored_subtitles:
|
||||
for subtitle, score, score_without_hash, matches, orig_matches in scored_subtitles:
|
||||
# check score
|
||||
if score < min_score and not subtitle.wrong_series:
|
||||
Log.Info(u'%s: Score %d is below min_score (%d)', self.name, score, min_score)
|
||||
@@ -449,6 +453,17 @@ class SearchAllRecentlyAddedMissing(Task):
|
||||
Log.Debug(u"%s: Looking for missing subtitles: %s", self.name, get_item_title(plex_item))
|
||||
scanned_parts = scan_videos([metadata], providers=providers)
|
||||
|
||||
# auto extract embedded
|
||||
if config.embedded_auto_extract:
|
||||
if config.plex_transcoder:
|
||||
ts = agent_extract_embedded(scanned_parts, set_as_existing=True)
|
||||
if ts:
|
||||
Log.Debug("Waiting for %i extraction threads to finish" % len(ts))
|
||||
for t in ts:
|
||||
t.join()
|
||||
else:
|
||||
Log.Warn("Plex Transcoder not found, can't auto extract")
|
||||
|
||||
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score,
|
||||
providers=providers)
|
||||
hit_providers = downloaded_subtitles is not None
|
||||
|
||||
@@ -288,7 +288,7 @@
|
||||
},
|
||||
{
|
||||
"id": "anticaptcha.service",
|
||||
"label": "AntiCaptcha-Service (needs paid account; enables Addic7ed, titlovi)",
|
||||
"label": "AntiCaptcha-Service (needs paid account; enables Addic7ed)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"none",
|
||||
@@ -335,6 +335,26 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.napisy24.enabled",
|
||||
"label": "Provider: Enable Napisy24 (pl)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.napisy24.username",
|
||||
"label": "Napisy24 Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.napisy24.password",
|
||||
"label": "Napisy24 Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.enabled",
|
||||
"label": "Provider: Enable Addic7ed (needs AntiCaptcha)",
|
||||
@@ -355,6 +375,12 @@
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.is_vip",
|
||||
"label": "Addic7ed VIP? (80 vs 40 downloads per day)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.boost_by2",
|
||||
"label": "Addic7ed: boost score (if requirements met)",
|
||||
@@ -389,10 +415,24 @@
|
||||
},
|
||||
{
|
||||
"id": "provider.titlovi.enabled",
|
||||
"label": "Provider: Enable Titlovi.com (might need AntiCaptcha)",
|
||||
"label": "Provider: Enable Titlovi.com (User and Password required)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.titlovi.username",
|
||||
"label": "Titlovi Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.titlovi.password",
|
||||
"label": "Titlovi Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.legendastv.enabled",
|
||||
"label": "Provider: Enable Legendas TV (mostly pt-BR; UNRAR NEEDED)",
|
||||
@@ -431,6 +471,20 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.subscene.username",
|
||||
"label": "SubScene Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.subscene.password",
|
||||
"label": "SubScene Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.supersubtitles.enabled",
|
||||
"label": "Provider: Enable feliratok.info (Hungarian)",
|
||||
@@ -461,6 +515,18 @@
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.bsplayer.enabled",
|
||||
"label": "Provider: Enable BSPlayer Subtitles",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.screwzira.enabled",
|
||||
"label": "Provider: Enable ScrewZira (Hebrew)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "providers.multithreading",
|
||||
"label": "Search enabled providers simultaneously (multithreading)",
|
||||
@@ -746,6 +812,29 @@
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.SubtitleStorageMaintenance.frequency",
|
||||
"label": "Scheduler: Periodically run subtitle storage maintenance (SZ internal)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"never",
|
||||
"every 6 hours",
|
||||
"every 12 hours",
|
||||
"every 24 hours",
|
||||
"every 1 days",
|
||||
"every 2 days",
|
||||
"every 3 days",
|
||||
"every 4 days",
|
||||
"every 1 weeks",
|
||||
"every 2 weeks",
|
||||
"every 3 weeks",
|
||||
"every 4 weeks",
|
||||
"every 5 weeks",
|
||||
"every 6 weeks",
|
||||
"every 12 weeks"
|
||||
],
|
||||
"default": "every 1 weeks"
|
||||
},
|
||||
{
|
||||
"id": "history_size",
|
||||
"label": "History: amount of items to store historical data for",
|
||||
@@ -861,7 +950,7 @@
|
||||
},
|
||||
{
|
||||
"id": "use_custom_dns2",
|
||||
"label": "Use custom DNS (IPs, comma-separated, leave empty for system DNS. Default: Google/CF)",
|
||||
"label": "Use custom DNS (IPs, comma-separated, set to 'system' for system DNS. Default: Google/CF)",
|
||||
"type": "text",
|
||||
"default": "1.1.1.1, 8.8.8.8"
|
||||
},
|
||||
|
||||
+2
-2
@@ -13,7 +13,7 @@
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>2.6.5.3023</string>
|
||||
<string>2.6.5.3237</string>
|
||||
<key>PlexFrameworkVersion</key>
|
||||
<string>2</string>
|
||||
<key>PlexPluginClass</key>
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
<h1>Sub-Zero for Plex</h1><i>Subtitles done right</i>
|
||||
|
||||
Version 2.6.5.3023 DEV
|
||||
Version 2.6.5.3237 DEV
|
||||
|
||||
Originally based on @bramwalet's awesome <a href="https://github.com/bramwalet/Subliminal.bundle">Subliminal.bundle</a>
|
||||
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
||||
@@ -0,0 +1,196 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import functools
|
||||
from collections import namedtuple
|
||||
from threading import RLock
|
||||
|
||||
_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
|
||||
|
||||
|
||||
@functools.wraps(functools.update_wrapper)
|
||||
def update_wrapper(
|
||||
wrapper,
|
||||
wrapped,
|
||||
assigned=functools.WRAPPER_ASSIGNMENTS,
|
||||
updated=functools.WRAPPER_UPDATES,
|
||||
):
|
||||
"""
|
||||
Patch two bugs in functools.update_wrapper.
|
||||
"""
|
||||
# workaround for http://bugs.python.org/issue3445
|
||||
assigned = tuple(attr for attr in assigned if hasattr(wrapped, attr))
|
||||
wrapper = functools.update_wrapper(wrapper, wrapped, assigned, updated)
|
||||
# workaround for https://bugs.python.org/issue17482
|
||||
wrapper.__wrapped__ = wrapped
|
||||
return wrapper
|
||||
|
||||
|
||||
class _HashedSeq(list):
|
||||
__slots__ = 'hashvalue'
|
||||
|
||||
def __init__(self, tup, hash=hash):
|
||||
self[:] = tup
|
||||
self.hashvalue = hash(tup)
|
||||
|
||||
def __hash__(self):
|
||||
return self.hashvalue
|
||||
|
||||
|
||||
def _make_key(
|
||||
args,
|
||||
kwds,
|
||||
typed,
|
||||
kwd_mark=(object(),),
|
||||
fasttypes=set([int, str, frozenset, type(None)]),
|
||||
sorted=sorted,
|
||||
tuple=tuple,
|
||||
type=type,
|
||||
len=len,
|
||||
):
|
||||
'Make a cache key from optionally typed positional and keyword arguments'
|
||||
key = args
|
||||
if kwds:
|
||||
sorted_items = sorted(kwds.items())
|
||||
key += kwd_mark
|
||||
for item in sorted_items:
|
||||
key += item
|
||||
if typed:
|
||||
key += tuple(type(v) for v in args)
|
||||
if kwds:
|
||||
key += tuple(type(v) for k, v in sorted_items)
|
||||
elif len(key) == 1 and type(key[0]) in fasttypes:
|
||||
return key[0]
|
||||
return _HashedSeq(key)
|
||||
|
||||
|
||||
def lru_cache(maxsize=100, typed=False):
|
||||
"""Least-recently-used cache decorator.
|
||||
|
||||
If *maxsize* is set to None, the LRU features are disabled and the cache
|
||||
can grow without bound.
|
||||
|
||||
If *typed* is True, arguments of different types will be cached separately.
|
||||
For example, f(3.0) and f(3) will be treated as distinct calls with
|
||||
distinct results.
|
||||
|
||||
Arguments to the cached function must be hashable.
|
||||
|
||||
View the cache statistics named tuple (hits, misses, maxsize, currsize) with
|
||||
f.cache_info(). Clear the cache and statistics with f.cache_clear().
|
||||
Access the underlying function with f.__wrapped__.
|
||||
|
||||
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
|
||||
|
||||
"""
|
||||
|
||||
# Users should only access the lru_cache through its public API:
|
||||
# cache_info, cache_clear, and f.__wrapped__
|
||||
# The internals of the lru_cache are encapsulated for thread safety and
|
||||
# to allow the implementation to change (including a possible C version).
|
||||
|
||||
def decorating_function(user_function):
|
||||
|
||||
cache = dict()
|
||||
stats = [0, 0] # make statistics updateable non-locally
|
||||
HITS, MISSES = 0, 1 # names for the stats fields
|
||||
make_key = _make_key
|
||||
cache_get = cache.get # bound method to lookup key or return None
|
||||
_len = len # localize the global len() function
|
||||
lock = RLock() # because linkedlist updates aren't threadsafe
|
||||
root = [] # root of the circular doubly linked list
|
||||
root[:] = [root, root, None, None] # initialize by pointing to self
|
||||
nonlocal_root = [root] # make updateable non-locally
|
||||
PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
|
||||
|
||||
if maxsize == 0:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# no caching, just do a statistics update after a successful call
|
||||
result = user_function(*args, **kwds)
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
elif maxsize is None:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# simple caching without ordering or size limit
|
||||
key = make_key(args, kwds, typed)
|
||||
result = cache_get(
|
||||
key, root
|
||||
) # root used here as a unique not-found sentinel
|
||||
if result is not root:
|
||||
stats[HITS] += 1
|
||||
return result
|
||||
result = user_function(*args, **kwds)
|
||||
cache[key] = result
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
else:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# size limited caching that tracks accesses by recency
|
||||
key = make_key(args, kwds, typed) if kwds or typed else args
|
||||
with lock:
|
||||
link = cache_get(key)
|
||||
if link is not None:
|
||||
# record recent use of the key by moving it
|
||||
# to the front of the list
|
||||
root, = nonlocal_root
|
||||
link_prev, link_next, key, result = link
|
||||
link_prev[NEXT] = link_next
|
||||
link_next[PREV] = link_prev
|
||||
last = root[PREV]
|
||||
last[NEXT] = root[PREV] = link
|
||||
link[PREV] = last
|
||||
link[NEXT] = root
|
||||
stats[HITS] += 1
|
||||
return result
|
||||
result = user_function(*args, **kwds)
|
||||
with lock:
|
||||
root, = nonlocal_root
|
||||
if key in cache:
|
||||
# getting here means that this same key was added to the
|
||||
# cache while the lock was released. since the link
|
||||
# update is already done, we need only return the
|
||||
# computed result and update the count of misses.
|
||||
pass
|
||||
elif _len(cache) >= maxsize:
|
||||
# use the old root to store the new key and result
|
||||
oldroot = root
|
||||
oldroot[KEY] = key
|
||||
oldroot[RESULT] = result
|
||||
# empty the oldest link and make it the new root
|
||||
root = nonlocal_root[0] = oldroot[NEXT]
|
||||
oldkey = root[KEY]
|
||||
root[KEY] = root[RESULT] = None
|
||||
# now update the cache dictionary for the new links
|
||||
del cache[oldkey]
|
||||
cache[key] = oldroot
|
||||
else:
|
||||
# put result in a new link at the front of the list
|
||||
last = root[PREV]
|
||||
link = [last, root, key, result]
|
||||
last[NEXT] = root[PREV] = cache[key] = link
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
def cache_info():
|
||||
"""Report cache statistics"""
|
||||
with lock:
|
||||
return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
|
||||
|
||||
def cache_clear():
|
||||
"""Clear the cache and cache statistics"""
|
||||
with lock:
|
||||
cache.clear()
|
||||
root = nonlocal_root[0]
|
||||
root[:] = [root, root, None, None]
|
||||
stats[:] = [0, 0]
|
||||
|
||||
wrapper.__wrapped__ = user_function
|
||||
wrapper.cache_info = cache_info
|
||||
wrapper.cache_clear = cache_clear
|
||||
return update_wrapper(wrapper, user_function)
|
||||
|
||||
return decorating_function
|
||||
@@ -1,392 +0,0 @@
|
||||
# coding=utf-8
|
||||
|
||||
import logging
|
||||
import random
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import base64
|
||||
|
||||
from copy import deepcopy
|
||||
from time import sleep
|
||||
from collections import OrderedDict
|
||||
from .jsfuck import jsunfuck
|
||||
|
||||
import js2py
|
||||
from requests.sessions import Session
|
||||
from subliminal_patch.pitcher import pitchers
|
||||
|
||||
try:
|
||||
from requests_toolbelt.utils import dump
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
from urlparse import urlunparse
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlunparse
|
||||
|
||||
brotli_available = True
|
||||
|
||||
try:
|
||||
from brotli import decompress as brdec
|
||||
except:
|
||||
brotli_available = False
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__version__ = "2.0.3"
|
||||
|
||||
# Orignally written by https://github.com/Anorov/cloudflare-scrape
|
||||
# Rewritten by VeNoMouS - <venom@gen-x.co.nz> for https://github.com/VeNoMouS/Sick-Beard - 24/3/2018 NZDT
|
||||
|
||||
DEFAULT_USER_AGENTS = [
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/65.0.3325.181 Chrome/65.0.3325.181 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.0; Moto G (5) Build/NPPS25.137-93-8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a Safari/9537.53",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:59.0) Gecko/20100101 Firefox/59.0",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0",
|
||||
]
|
||||
|
||||
BUG_REPORT = """\
|
||||
Cloudflare may have changed their technique, or there may be a bug in the script.
|
||||
"""
|
||||
|
||||
|
||||
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
if brotli_available:
|
||||
brwsrs = os.path.join(cur_path, "browsers_br.json")
|
||||
with open(brwsrs, "r") as f:
|
||||
UA_COMBO = json.load(f, object_pairs_hook=OrderedDict)["chrome"]
|
||||
|
||||
else:
|
||||
brwsrs = os.path.join(cur_path, "browsers.json")
|
||||
UA_COMBO = []
|
||||
with open(brwsrs, "r") as f:
|
||||
_brwsrs = json.load(f, object_pairs_hook=OrderedDict)
|
||||
for entry in _brwsrs:
|
||||
_entry = OrderedDict(("-".join(a.capitalize() for a in key.split("-")), value)
|
||||
for key, value in entry.iteritems())
|
||||
_entry["User-Agent"] = None
|
||||
UA_COMBO.append({"User-Agent": [entry["user-agent"]], "headers": _entry})
|
||||
|
||||
|
||||
class NeedsCaptchaException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class CloudflareScraper(Session):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.delay = kwargs.pop('delay', 8)
|
||||
self.debug = False
|
||||
self._ua = None
|
||||
self._hdrs = None
|
||||
|
||||
super(CloudflareScraper, self).__init__(*args, **kwargs)
|
||||
|
||||
if not self._ua:
|
||||
# Set a random User-Agent if no custom User-Agent has been set
|
||||
ua_combo = random.choice(UA_COMBO)
|
||||
self._ua = random.choice(ua_combo["User-Agent"])
|
||||
self._hdrs = ua_combo["headers"].copy()
|
||||
self._hdrs["User-Agent"] = self._ua
|
||||
self.headers['User-Agent'] = self._ua
|
||||
|
||||
def set_cloudflare_challenge_delay(self, delay):
|
||||
if isinstance(delay, (int, float)) and delay > 0:
|
||||
self.delay = delay
|
||||
|
||||
def is_cloudflare_challenge(self, resp):
|
||||
if resp.headers.get('Server', '').startswith('cloudflare'):
|
||||
if b'why_captcha' in resp.content or b'/cdn-cgi/l/chk_captcha' in resp.content:
|
||||
raise NeedsCaptchaException
|
||||
|
||||
return (
|
||||
resp.status_code in [429, 503]
|
||||
and b"jschl_vc" in resp.content
|
||||
and b"jschl_answer" in resp.content
|
||||
)
|
||||
return False
|
||||
|
||||
def debugRequest(self, req):
|
||||
try:
|
||||
print (dump.dump_all(req).decode('utf-8'))
|
||||
except:
|
||||
pass
|
||||
|
||||
def request(self, method, url, *args, **kwargs):
|
||||
# self.headers = (
|
||||
# OrderedDict(
|
||||
# [
|
||||
# ('User-Agent', self.headers['User-Agent']),
|
||||
# ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
|
||||
# ('Accept-Language', 'en-US,en;q=0.5'),
|
||||
# ('Accept-Encoding', 'gzip, deflate'),
|
||||
# ('Connection', 'close'),
|
||||
# ('Upgrade-Insecure-Requests', '1')
|
||||
# ]
|
||||
# )
|
||||
# )
|
||||
self.headers = self._hdrs.copy()
|
||||
|
||||
resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs)
|
||||
if resp.headers.get('content-encoding') == 'br' and brotli_available:
|
||||
resp._content = brdec(resp._content)
|
||||
|
||||
# Debug request
|
||||
if self.debug:
|
||||
self.debugRequest(resp)
|
||||
|
||||
# Check if Cloudflare anti-bot is on
|
||||
try:
|
||||
if self.is_cloudflare_challenge(resp):
|
||||
# Work around if the initial request is not a GET,
|
||||
# Superseed with a GET then re-request the orignal METHOD.
|
||||
if resp.request.method != 'GET':
|
||||
self.request('GET', resp.url)
|
||||
resp = self.request(method, url, *args, **kwargs)
|
||||
else:
|
||||
resp = self.solve_cf_challenge(resp, **kwargs)
|
||||
except NeedsCaptchaException:
|
||||
# solve the captcha
|
||||
site_key = re.search(r'data-sitekey="(.+?)"', resp.content).group(1)
|
||||
challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', resp.content).group(1)
|
||||
challenge_ray = re.search(r'data-ray="(.+?)"', resp.content).group(1)
|
||||
if not all([site_key, challenge_s, challenge_ray]):
|
||||
raise Exception("cf: Captcha site-key not found!")
|
||||
|
||||
pitcher = pitchers.get_pitcher()("cf", resp.request.url, site_key,
|
||||
user_agent=self.headers["User-Agent"],
|
||||
cookies=self.cookies.get_dict(),
|
||||
is_invisible=True)
|
||||
|
||||
logger.info("cf: Solving captcha")
|
||||
result = pitcher.throw()
|
||||
if not result:
|
||||
raise Exception("cf: Couldn't solve captcha!")
|
||||
|
||||
parsed_url = urlparse(resp.url)
|
||||
domain = parsed_url.netloc
|
||||
submit_url = '{}://{}/cdn-cgi/l/chk_captcha'.format(parsed_url.scheme, domain)
|
||||
method = resp.request.method
|
||||
|
||||
cloudflare_kwargs = {
|
||||
'allow_redirects': False,
|
||||
'headers': {'Referer': resp.url},
|
||||
'params': OrderedDict(
|
||||
[
|
||||
('s', challenge_s),
|
||||
('g-recaptcha-response', result)
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
return self.request(method, submit_url, **cloudflare_kwargs)
|
||||
|
||||
return resp
|
||||
|
||||
def solve_cf_challenge(self, resp, **original_kwargs):
|
||||
body = resp.text
|
||||
|
||||
# Cloudflare requires a delay before solving the challenge
|
||||
if self.delay == 8:
|
||||
try:
|
||||
delay = float(re.search(r'submit\(\);\r?\n\s*},\s*([0-9]+)', body).group(1)) / float(1000)
|
||||
if isinstance(delay, (int, float)):
|
||||
self.delay = delay
|
||||
except:
|
||||
pass
|
||||
|
||||
sleep(self.delay)
|
||||
|
||||
parsed_url = urlparse(resp.url)
|
||||
domain = parsed_url.netloc
|
||||
submit_url = '{}://{}/cdn-cgi/l/chk_jschl'.format(parsed_url.scheme, domain)
|
||||
|
||||
cloudflare_kwargs = deepcopy(original_kwargs)
|
||||
headers = cloudflare_kwargs.setdefault('headers', {'Referer': resp.url})
|
||||
|
||||
try:
|
||||
params = cloudflare_kwargs.setdefault(
|
||||
'params', OrderedDict(
|
||||
[
|
||||
('s', re.search(r'name="s"\svalue="(?P<s_value>[^"]+)', body).group('s_value')),
|
||||
('jschl_vc', re.search(r'name="jschl_vc" value="(\w+)"', body).group(1)),
|
||||
('pass', re.search(r'name="pass" value="(.+?)"', body).group(1)),
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Something is wrong with the page.
|
||||
# This may indicate Cloudflare has changed their anti-bot
|
||||
# technique. If you see this and are running the latest version,
|
||||
# please open a GitHub issue so I can update the code accordingly.
|
||||
raise ValueError("Unable to parse Cloudflare anti-bots page: {} {}".format(e.message, BUG_REPORT))
|
||||
|
||||
# Solve the Javascript challenge
|
||||
params['jschl_answer'] = self.solve_challenge(body, domain)
|
||||
|
||||
# Requests transforms any request into a GET after a redirect,
|
||||
# so the redirect has to be handled manually here to allow for
|
||||
# performing other types of requests even as the first request.
|
||||
method = resp.request.method
|
||||
|
||||
cloudflare_kwargs['allow_redirects'] = False
|
||||
|
||||
redirect = self.request(method, submit_url, **cloudflare_kwargs)
|
||||
redirect_location = urlparse(redirect.headers['Location'])
|
||||
if not redirect_location.netloc:
|
||||
redirect_url = urlunparse(
|
||||
(
|
||||
parsed_url.scheme,
|
||||
domain,
|
||||
redirect_location.path,
|
||||
redirect_location.params,
|
||||
redirect_location.query,
|
||||
redirect_location.fragment
|
||||
)
|
||||
)
|
||||
return self.request(method, redirect_url, **original_kwargs)
|
||||
|
||||
return self.request(method, redirect.headers['Location'], **original_kwargs)
|
||||
|
||||
def solve_challenge(self, body, domain):
|
||||
try:
|
||||
js = re.search(
|
||||
r"setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n",
|
||||
body
|
||||
).group(1)
|
||||
except Exception:
|
||||
raise ValueError("Unable to identify Cloudflare IUAM Javascript on website. {}".format(BUG_REPORT))
|
||||
|
||||
js = re.sub(r"a\.value = ((.+).toFixed\(10\))?", r"\1", js)
|
||||
js = re.sub(r'(e\s=\sfunction\(s\)\s{.*?};)', '', js, flags=re.DOTALL|re.MULTILINE)
|
||||
js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js).replace("t.length", str(len(domain)))
|
||||
|
||||
js = js.replace('; 121', '')
|
||||
|
||||
# Strip characters that could be used to exit the string context
|
||||
# These characters are not currently used in Cloudflare's arithmetic snippet
|
||||
js = re.sub(r"[\n\\']", "", js)
|
||||
|
||||
if 'toFixed' not in js:
|
||||
raise ValueError("Error parsing Cloudflare IUAM Javascript challenge. {}".format(BUG_REPORT))
|
||||
|
||||
try:
|
||||
jsEnv = """
|
||||
var t = "{domain}";
|
||||
var g = String.fromCharCode;
|
||||
|
||||
o = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
|
||||
e = function(s) {{
|
||||
s += "==".slice(2 - (s.length & 3));
|
||||
var bm, r = "", r1, r2, i = 0;
|
||||
for (; i < s.length;) {{
|
||||
bm = o.indexOf(s.charAt(i++)) << 18 | o.indexOf(s.charAt(i++)) << 12 | (r1 = o.indexOf(s.charAt(i++))) << 6 | (r2 = o.indexOf(s.charAt(i++)));
|
||||
r += r1 === 64 ? g(bm >> 16 & 255) : r2 === 64 ? g(bm >> 16 & 255, bm >> 8 & 255) : g(bm >> 16 & 255, bm >> 8 & 255, bm & 255);
|
||||
}}
|
||||
return r;
|
||||
}};
|
||||
|
||||
function italics (str) {{ return '<i>' + this + '</i>'; }};
|
||||
var document = {{
|
||||
getElementById: function () {{
|
||||
return {{'innerHTML': '{innerHTML}'}};
|
||||
}}
|
||||
}};
|
||||
{js}
|
||||
"""
|
||||
|
||||
innerHTML = re.search(
|
||||
'<div(?: [^<>]*)? id="([^<>]*?)">([^<>]*?)<\/div>',
|
||||
body,
|
||||
re.MULTILINE | re.DOTALL
|
||||
)
|
||||
innerHTML = innerHTML.group(2).replace("'", r"\'") if innerHTML else ""
|
||||
|
||||
js = jsunfuck(jsEnv.format(domain=domain, innerHTML=innerHTML, js=js))
|
||||
|
||||
def atob(s):
|
||||
return base64.b64decode('{}'.format(s)).decode('utf-8')
|
||||
|
||||
js2py.disable_pyimport()
|
||||
context = js2py.EvalJs({'atob': atob})
|
||||
result = context.eval(js)
|
||||
except Exception:
|
||||
logging.error("Error executing Cloudflare IUAM Javascript. {}".format(BUG_REPORT))
|
||||
raise
|
||||
|
||||
try:
|
||||
float(result)
|
||||
except Exception:
|
||||
raise ValueError("Cloudflare IUAM challenge returned unexpected answer. {}".format(BUG_REPORT))
|
||||
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def create_scraper(cls, sess=None, **kwargs):
|
||||
"""
|
||||
Convenience function for creating a ready-to-go CloudflareScraper object.
|
||||
"""
|
||||
scraper = cls(**kwargs)
|
||||
|
||||
if sess:
|
||||
attrs = ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']
|
||||
for attr in attrs:
|
||||
val = getattr(sess, attr, None)
|
||||
if val:
|
||||
setattr(scraper, attr, val)
|
||||
|
||||
return scraper
|
||||
|
||||
# Functions for integrating cloudflare-scrape with other applications and scripts
|
||||
@classmethod
|
||||
def get_tokens(cls, url, user_agent=None, debug=False, **kwargs):
|
||||
scraper = cls.create_scraper()
|
||||
scraper.debug = debug
|
||||
|
||||
if user_agent:
|
||||
scraper.headers['User-Agent'] = user_agent
|
||||
|
||||
try:
|
||||
resp = scraper.get(url, **kwargs)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
logging.error("'{}' returned an error. Could not collect tokens.".format(url))
|
||||
raise
|
||||
|
||||
domain = urlparse(resp.url).netloc
|
||||
cookie_domain = None
|
||||
|
||||
for d in scraper.cookies.list_domains():
|
||||
if d.startswith('.') and d in ('.{}'.format(domain)):
|
||||
cookie_domain = d
|
||||
break
|
||||
else:
|
||||
raise ValueError("Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
|
||||
|
||||
return (
|
||||
{
|
||||
'__cfduid': scraper.cookies.get('__cfduid', '', domain=cookie_domain),
|
||||
'cf_clearance': scraper.cookies.get('cf_clearance', '', domain=cookie_domain)
|
||||
},
|
||||
scraper.headers['User-Agent']
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_cookie_string(cls, url, user_agent=None, debug=False, **kwargs):
|
||||
"""
|
||||
Convenience function for building a Cookie HTTP header value.
|
||||
"""
|
||||
tokens, user_agent = cls.get_tokens(url, user_agent=user_agent, debug=debug, **kwargs)
|
||||
return "; ".join("=".join(pair) for pair in tokens.items()), user_agent
|
||||
|
||||
create_scraper = CloudflareScraper.create_scraper
|
||||
get_tokens = CloudflareScraper.get_tokens
|
||||
get_cookie_string = CloudflareScraper.get_cookie_string
|
||||
@@ -1,80 +0,0 @@
|
||||
[
|
||||
{
|
||||
"connection": "close",
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 5.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.102 Safari/537.36",
|
||||
"accept-encoding": "gzip,deflate",
|
||||
"accept-language": "en-US,en;q=0.8"
|
||||
},
|
||||
{
|
||||
"connection": "close",
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 5.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36",
|
||||
"accept-encoding": "gzip,deflate",
|
||||
"accept-language": "en-US,en;q=0.8"
|
||||
},
|
||||
{
|
||||
"connection": "close",
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36",
|
||||
"accept-language": "en-US,en;q=0.8",
|
||||
"accept-encoding": "gzip, deflate, "
|
||||
},
|
||||
{
|
||||
"connection": "close",
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36",
|
||||
"accept-language": "en-US,en;q=0.8",
|
||||
"accept-encoding": "gzip, deflate, "
|
||||
},
|
||||
{
|
||||
"connection": "close",
|
||||
"accept": "*/*",
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:30.0) Gecko/20100101 Firefox/30.0"
|
||||
},
|
||||
{
|
||||
"connection": "close",
|
||||
"accept": "image/jpeg, image/gif, image/pjpeg, application/x-ms-application, application/xaml+xml, application/x-ms-xbap, */*",
|
||||
"accept-language": "en-US",
|
||||
"user-agent": "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)",
|
||||
"accept-encoding": "gzip, deflate"
|
||||
},
|
||||
{
|
||||
"connection": "close",
|
||||
"accept": "text/html, application/xhtml+xml, */*",
|
||||
"accept-language": "en-US",
|
||||
"user-agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
|
||||
"accept-encoding": "gzip, deflate"
|
||||
},
|
||||
{
|
||||
"connection": "close",
|
||||
"accept": "text/html, application/xhtml+xml, */*",
|
||||
"accept-language": "en-US",
|
||||
"user-agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
|
||||
"accept-encoding": "gzip, deflate",
|
||||
"dnt": "1"
|
||||
},
|
||||
{
|
||||
"connection": "close",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0",
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"accept-language": "en-US,en;q=0.5",
|
||||
"accept-encoding": "gzip, deflate"
|
||||
},
|
||||
{
|
||||
"connection": "close",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0",
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"accept-language": "en-US,en;q=0.5",
|
||||
"accept-encoding": "gzip, deflate"
|
||||
},
|
||||
{
|
||||
"connection": "close",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0",
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"accept-language": "en-US,en;q=0.5",
|
||||
"accept-encoding": "gzip, deflate"
|
||||
}
|
||||
]
|
||||
@@ -1,336 +0,0 @@
|
||||
{
|
||||
"chrome": [
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.101 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.101 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.101 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.113 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, , br"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.170 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"User-Agent": null,
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.40 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.40 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.28 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.28 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.28 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.28 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.28 Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Linux; Android 8.1.0; SM-N960F Build/M1AJQ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 8.0.0; SM-G965F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 Build/OPD1.170816.010) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 8.0.0; Pixel Build/OPR6.170623.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.1.1; SM-A530F Build/NMF26X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.1; Pixel Build/NDE63H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.0; SM-G955F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.0; SM-G950F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.0; SM-T825 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 6.0.1; SM-G930F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 6.0; Nexus 6 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 6.0; XT1092 Build/MPE24.49-18) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 6.0.1; SM-N910C Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 5.0.2; SM-G920F Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 5.0; Nexus 6 Build/LRX21O) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 9; Pixel 3 XL Build/PD1A.180720.030) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 9; Pixel 3 Build/PD1A.180720.030) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 9; Pixel 2 Build/PPR1.180610.009) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 4.4; Nexus 5 Build/KRT16M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 4.4.2; SM-T530 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 4.4.4; SM-N910C Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 5.1.1; Nexus 9 Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.1.1; SM-N950F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.90 Mobile Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-US,en;q=0.9"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Linux; Android 8.1.0; SM-T835 Build/M1AJQ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 5.0; XT1092 Build/LXE22.46-19) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,675 @@
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import ssl
|
||||
import requests
|
||||
|
||||
try:
|
||||
import copyreg
|
||||
except ImportError:
|
||||
import copy_reg as copyreg
|
||||
|
||||
try:
|
||||
from HTMLParser import HTMLParser
|
||||
except ImportError:
|
||||
if sys.version_info >= (3, 4):
|
||||
import html
|
||||
else:
|
||||
from html.parser import HTMLParser
|
||||
|
||||
from copy import deepcopy
|
||||
from time import sleep
|
||||
from collections import OrderedDict
|
||||
|
||||
from requests.sessions import Session
|
||||
from requests.adapters import HTTPAdapter
|
||||
|
||||
from .exceptions import (
|
||||
CloudflareLoopProtection,
|
||||
CloudflareCode1020,
|
||||
CloudflareIUAMError,
|
||||
CloudflareReCaptchaError,
|
||||
CloudflareReCaptchaProvider
|
||||
)
|
||||
|
||||
from .interpreters import JavaScriptInterpreter
|
||||
from .reCaptcha import reCaptcha
|
||||
from .user_agent import User_Agent
|
||||
|
||||
try:
|
||||
from requests_toolbelt.utils import dump
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import brotli
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from urlparse import urlparse, urljoin
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
__version__ = '1.2.31'
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class CipherSuiteAdapter(HTTPAdapter):
|
||||
|
||||
__attrs__ = [
|
||||
'ssl_context',
|
||||
'max_retries',
|
||||
'config',
|
||||
'_pool_connections',
|
||||
'_pool_maxsize',
|
||||
'_pool_block'
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.ssl_context = kwargs.pop('ssl_context', None)
|
||||
self.cipherSuite = kwargs.pop('cipherSuite', None)
|
||||
|
||||
if not self.ssl_context:
|
||||
self.ssl_context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
|
||||
self.ssl_context.set_ciphers(self.cipherSuite)
|
||||
self.ssl_context.options |= (ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1)
|
||||
|
||||
super(CipherSuiteAdapter, self).__init__(**kwargs)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def init_poolmanager(self, *args, **kwargs):
|
||||
kwargs['ssl_context'] = self.ssl_context
|
||||
return super(CipherSuiteAdapter, self).init_poolmanager(*args, **kwargs)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def proxy_manager_for(self, *args, **kwargs):
|
||||
kwargs['ssl_context'] = self.ssl_context
|
||||
return super(CipherSuiteAdapter, self).proxy_manager_for(*args, **kwargs)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class CloudScraper(Session):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.debug = kwargs.pop('debug', False)
|
||||
self.delay = kwargs.pop('delay', None)
|
||||
self.cipherSuite = kwargs.pop('cipherSuite', None)
|
||||
self.interpreter = kwargs.pop('interpreter', 'native')
|
||||
self.recaptcha = kwargs.pop('recaptcha', {})
|
||||
self.allow_brotli = kwargs.pop(
|
||||
'allow_brotli',
|
||||
True if 'brotli' in sys.modules.keys() else False
|
||||
)
|
||||
|
||||
self.user_agent = User_Agent(
|
||||
allow_brotli=self.allow_brotli,
|
||||
browser=kwargs.pop('browser', None)
|
||||
)
|
||||
|
||||
self._solveDepthCnt = 0
|
||||
self.solveDepth = kwargs.pop('solveDepth', 3)
|
||||
|
||||
super(CloudScraper, self).__init__(*args, **kwargs)
|
||||
|
||||
# pylint: disable=E0203
|
||||
if 'requests' in self.headers['User-Agent']:
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Set a random User-Agent if no custom User-Agent has been set
|
||||
# ------------------------------------------------------------------------------- #
|
||||
self.headers = self.user_agent.headers
|
||||
if not self.cipherSuite:
|
||||
self.cipherSuite = self.user_agent.cipherSuite
|
||||
|
||||
if isinstance(self.cipherSuite, list):
|
||||
self.cipherSuite = ':'.join(self.cipherSuite)
|
||||
|
||||
self.mount(
|
||||
'https://',
|
||||
CipherSuiteAdapter(
|
||||
cipherSuite=self.cipherSuite
|
||||
)
|
||||
)
|
||||
|
||||
# purely to allow us to pickle dump
|
||||
copyreg.pickle(ssl.SSLContext, lambda obj: (obj.__class__, (obj.protocol,)))
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Allow us to pickle our session back with all variables
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def __getstate__(self):
|
||||
return self.__dict__
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Raise an Exception with no stacktrace and reset depth counter.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def simpleException(self, exception, msg):
|
||||
self._solveDepthCnt = 0
|
||||
sys.tracebacklimit = 0
|
||||
raise exception(msg)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# debug the request via the response
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def debugRequest(req):
|
||||
try:
|
||||
print(dump.dump_all(req).decode('utf-8'))
|
||||
except ValueError as e:
|
||||
print("Debug Error: {}".format(getattr(e, 'message', e)))
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Unescape / decode html entities
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def unescape(html_text):
|
||||
if sys.version_info >= (3, 0):
|
||||
if sys.version_info >= (3, 4):
|
||||
return html.unescape(html_text)
|
||||
|
||||
return HTMLParser().unescape(html_text)
|
||||
|
||||
return HTMLParser().unescape(html_text)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Decode Brotli on older versions of urllib3 manually
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def decodeBrotli(self, resp):
|
||||
if requests.packages.urllib3.__version__ < '1.25.1' and resp.headers.get('Content-Encoding') == 'br':
|
||||
if self.allow_brotli and resp._content:
|
||||
resp._content = brotli.decompress(resp.content)
|
||||
else:
|
||||
logging.warning(
|
||||
'You\'re running urllib3 {}, Brotli content detected, '
|
||||
'Which requires manual decompression, '
|
||||
'But option allow_brotli is set to False, '
|
||||
'We will not continue to decompress.'.format(requests.packages.urllib3.__version__)
|
||||
)
|
||||
|
||||
return resp
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Our hijacker request function
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def request(self, method, url, *args, **kwargs):
|
||||
# pylint: disable=E0203
|
||||
if kwargs.get('proxies') and kwargs.get('proxies') != self.proxies:
|
||||
self.proxies = kwargs.get('proxies')
|
||||
|
||||
resp = self.decodeBrotli(
|
||||
super(CloudScraper, self).request(method, url, *args, **kwargs)
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Debug request
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.debug:
|
||||
self.debugRequest(resp)
|
||||
|
||||
# Check if Cloudflare anti-bot is on
|
||||
if self.is_Challenge_Request(resp):
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Try to solve the challenge and send it back
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self._solveDepthCnt >= self.solveDepth:
|
||||
_ = self._solveDepthCnt
|
||||
self.simpleException(
|
||||
CloudflareLoopProtection,
|
||||
"!!Loop Protection!! We have tried to solve {} time(s) in a row.".format(_)
|
||||
)
|
||||
|
||||
self._solveDepthCnt += 1
|
||||
|
||||
resp = self.Challenge_Response(resp, **kwargs)
|
||||
else:
|
||||
if not resp.is_redirect and resp.status_code not in [429, 503]:
|
||||
self._solveDepthCnt = 0
|
||||
|
||||
return resp
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains a valid Cloudflare challenge
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def is_IUAM_Challenge(resp):
|
||||
try:
|
||||
return (
|
||||
resp.headers.get('Server', '').startswith('cloudflare')
|
||||
and resp.status_code in [429, 503]
|
||||
and re.search(
|
||||
r'action="/.*?__cf_chl_jschl_tk__=\S+".*?name="jschl_vc"\svalue=.*?',
|
||||
resp.text,
|
||||
re.M | re.DOTALL
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains a valid Cloudflare reCaptcha challenge
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def is_reCaptcha_Challenge(resp):
|
||||
try:
|
||||
return (
|
||||
resp.headers.get('Server', '').startswith('cloudflare')
|
||||
and resp.status_code == 403
|
||||
and re.search(
|
||||
r'action="/.*?__cf_chl_captcha_tk__=\S+".*?data\-sitekey=.*?',
|
||||
resp.text,
|
||||
re.M | re.DOTALL
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains Firewall 1020 Error
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def is_Firewall_Blocked(resp):
|
||||
try:
|
||||
return (
|
||||
resp.headers.get('Server', '').startswith('cloudflare')
|
||||
and resp.status_code == 403
|
||||
and re.search(
|
||||
r'<span class="cf-error-code">1020</span>',
|
||||
resp.text,
|
||||
re.M | re.DOTALL
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Wrapper for is_reCaptcha_Challenge, is_IUAM_Challenge, is_Firewall_Blocked
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def is_Challenge_Request(self, resp):
|
||||
if self.is_Firewall_Blocked(resp):
|
||||
self.simpleException(
|
||||
CloudflareCode1020,
|
||||
'Cloudflare has blocked this request (Code 1020 Detected).'
|
||||
)
|
||||
|
||||
if self.is_reCaptcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Try to solve cloudflare javascript challenge.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def IUAM_Challenge_Response(self, body, url, interpreter):
|
||||
try:
|
||||
formPayload = re.search(
|
||||
r'<form (?P<form>id="challenge-form" action="(?P<challengeUUID>.*?'
|
||||
r'__cf_chl_jschl_tk__=\S+)"(.*?)</form>)',
|
||||
body,
|
||||
re.M | re.DOTALL
|
||||
).groupdict()
|
||||
|
||||
if not all(key in formPayload for key in ['form', 'challengeUUID']):
|
||||
self.simpleException(
|
||||
CloudflareIUAMError,
|
||||
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
|
||||
)
|
||||
|
||||
payload = OrderedDict(
|
||||
re.findall(
|
||||
r'name="(r|jschl_vc|pass)"\svalue="(.*?)"',
|
||||
formPayload['form']
|
||||
)
|
||||
)
|
||||
|
||||
except AttributeError:
|
||||
self.simpleException(
|
||||
CloudflareIUAMError,
|
||||
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
|
||||
)
|
||||
|
||||
hostParsed = urlparse(url)
|
||||
|
||||
try:
|
||||
payload['jschl_answer'] = JavaScriptInterpreter.dynamicImport(
|
||||
interpreter
|
||||
).solveChallenge(body, hostParsed.netloc)
|
||||
except Exception as e:
|
||||
self.simpleException(
|
||||
CloudflareIUAMError,
|
||||
'Unable to parse Cloudflare anti-bots page: {}'.format(
|
||||
getattr(e, 'message', e)
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
'url': '{}://{}{}'.format(
|
||||
hostParsed.scheme,
|
||||
hostParsed.netloc,
|
||||
self.unescape(formPayload['challengeUUID'])
|
||||
),
|
||||
'data': payload
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Try to solve the reCaptcha challenge via 3rd party.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def reCaptcha_Challenge_Response(self, provider, provider_params, body, url):
|
||||
try:
|
||||
formPayload = re.search(
|
||||
r'<form class="challenge-form" (?P<form>id="challenge-form" '
|
||||
r'action="(?P<challengeUUID>.*?__cf_chl_captcha_tk__=\S+)"(.*?)</form>)',
|
||||
body,
|
||||
re.M | re.DOTALL
|
||||
).groupdict()
|
||||
|
||||
if not all(key in formPayload for key in ['form', 'challengeUUID']):
|
||||
self.simpleException(
|
||||
CloudflareReCaptchaError,
|
||||
"Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
|
||||
)
|
||||
|
||||
payload = OrderedDict(
|
||||
re.findall(
|
||||
r'(name="r"\svalue|data-ray|data-sitekey)="(.*?)"',
|
||||
formPayload['form']
|
||||
)
|
||||
)
|
||||
except (AttributeError):
|
||||
self.simpleException(
|
||||
CloudflareReCaptchaError,
|
||||
"Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
|
||||
)
|
||||
|
||||
hostParsed = urlparse(url)
|
||||
|
||||
return {
|
||||
'url': '{}://{}{}'.format(
|
||||
hostParsed.scheme,
|
||||
hostParsed.netloc,
|
||||
self.unescape(formPayload['challengeUUID'])
|
||||
),
|
||||
'data': OrderedDict([
|
||||
('r', payload.get('name="r" value', '')),
|
||||
('id', payload.get('data-ray')),
|
||||
(
|
||||
'g-recaptcha-response',
|
||||
reCaptcha.dynamicImport(
|
||||
provider.lower()
|
||||
).solveCaptcha(
|
||||
url,
|
||||
payload['data-sitekey'],
|
||||
provider_params
|
||||
)
|
||||
)
|
||||
])
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Attempt to handle and send the challenge response back to cloudflare
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def Challenge_Response(self, resp, **kwargs):
|
||||
if self.is_reCaptcha_Challenge(resp):
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# double down on the request as some websites are only checking
|
||||
# if cfuid is populated before issuing reCaptcha.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
resp = self.decodeBrotli(
|
||||
super(CloudScraper, self).request(resp.request.method, resp.url, **kwargs)
|
||||
)
|
||||
|
||||
if not self.is_reCaptcha_Challenge(resp):
|
||||
return resp
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# if no reCaptcha provider raise a runtime error.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if not self.recaptcha or not isinstance(self.recaptcha, dict) or not self.recaptcha.get('provider'):
|
||||
self.simpleException(
|
||||
CloudflareReCaptchaProvider,
|
||||
"Cloudflare reCaptcha detected, unfortunately you haven't loaded an anti reCaptcha provider "
|
||||
"correctly via the 'recaptcha' parameter."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# if provider is return_response, return the response without doing anything.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.recaptcha.get('provider') == 'return_response':
|
||||
return resp
|
||||
|
||||
self.recaptcha['proxies'] = self.proxies
|
||||
submit_url = self.reCaptcha_Challenge_Response(
|
||||
self.recaptcha.get('provider'),
|
||||
self.recaptcha,
|
||||
resp.text,
|
||||
resp.url
|
||||
)
|
||||
else:
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Cloudflare requires a delay before solving the challenge
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if not self.delay:
|
||||
try:
|
||||
delay = float(
|
||||
re.search(
|
||||
r'submit\(\);\r?\n\s*},\s*([0-9]+)',
|
||||
resp.text
|
||||
).group(1)
|
||||
) / float(1000)
|
||||
if isinstance(delay, (int, float)):
|
||||
self.delay = delay
|
||||
except (AttributeError, ValueError):
|
||||
self.simpleException(
|
||||
CloudflareIUAMError,
|
||||
"Cloudflare IUAM possibility malformed, issue extracing delay value."
|
||||
)
|
||||
|
||||
sleep(self.delay)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
submit_url = self.IUAM_Challenge_Response(
|
||||
resp.text,
|
||||
resp.url,
|
||||
self.interpreter
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Send the Challenge Response back to Cloudflare
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if submit_url:
|
||||
|
||||
def updateAttr(obj, name, newValue):
|
||||
try:
|
||||
obj[name].update(newValue)
|
||||
return obj[name]
|
||||
except (AttributeError, KeyError):
|
||||
obj[name] = {}
|
||||
obj[name].update(newValue)
|
||||
return obj[name]
|
||||
|
||||
cloudflare_kwargs = deepcopy(kwargs)
|
||||
cloudflare_kwargs['allow_redirects'] = False
|
||||
cloudflare_kwargs['data'] = updateAttr(
|
||||
cloudflare_kwargs,
|
||||
'data',
|
||||
submit_url['data']
|
||||
)
|
||||
|
||||
urlParsed = urlparse(resp.url)
|
||||
cloudflare_kwargs['headers'] = updateAttr(
|
||||
cloudflare_kwargs,
|
||||
'headers',
|
||||
{
|
||||
'Origin': '{}://{}'.format(urlParsed.scheme, urlParsed.netloc),
|
||||
'Referer': resp.url
|
||||
}
|
||||
)
|
||||
|
||||
challengeSubmitResponse = self.request(
|
||||
'POST',
|
||||
submit_url['url'],
|
||||
**cloudflare_kwargs
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Return response if Cloudflare is doing content pass through instead of 3xx
|
||||
# else request with redirect URL also handle protocol scheme change http -> https
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if not challengeSubmitResponse.is_redirect:
|
||||
return challengeSubmitResponse
|
||||
else:
|
||||
cloudflare_kwargs = deepcopy(kwargs)
|
||||
cloudflare_kwargs['headers'] = updateAttr(
|
||||
cloudflare_kwargs,
|
||||
'headers',
|
||||
{'Referer': challengeSubmitResponse.url}
|
||||
)
|
||||
|
||||
if not urlparse(challengeSubmitResponse.headers['Location']).netloc:
|
||||
redirect_location = urljoin(
|
||||
challengeSubmitResponse.url,
|
||||
challengeSubmitResponse.headers['Location']
|
||||
)
|
||||
else:
|
||||
redirect_location = challengeSubmitResponse.headers['Location']
|
||||
|
||||
return self.request(
|
||||
resp.request.method,
|
||||
redirect_location,
|
||||
**cloudflare_kwargs
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# We shouldn't be here...
|
||||
# Re-request the original query and/or process again....
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
return self.request(resp.request.method, resp.url, **kwargs)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@classmethod
|
||||
def create_scraper(cls, sess=None, **kwargs):
|
||||
"""
|
||||
Convenience function for creating a ready-to-go CloudScraper object.
|
||||
"""
|
||||
scraper = cls(**kwargs)
|
||||
|
||||
if sess:
|
||||
for attr in ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']:
|
||||
val = getattr(sess, attr, None)
|
||||
if val:
|
||||
setattr(scraper, attr, val)
|
||||
|
||||
return scraper
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Functions for integrating cloudscraper with other applications and scripts
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@classmethod
|
||||
def get_tokens(cls, url, **kwargs):
|
||||
scraper = cls.create_scraper(
|
||||
**{
|
||||
field: kwargs.pop(field, None) for field in [
|
||||
'allow_brotli',
|
||||
'browser',
|
||||
'debug',
|
||||
'delay',
|
||||
'interpreter',
|
||||
'recaptcha'
|
||||
] if field in kwargs
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
resp = scraper.get(url, **kwargs)
|
||||
resp.raise_for_status()
|
||||
except Exception:
|
||||
logging.error('"{}" returned an error. Could not collect tokens.'.format(url))
|
||||
raise
|
||||
|
||||
domain = urlparse(resp.url).netloc
|
||||
# noinspection PyUnusedLocal
|
||||
cookie_domain = None
|
||||
|
||||
for d in scraper.cookies.list_domains():
|
||||
if d.startswith('.') and d in ('.{}'.format(domain)):
|
||||
cookie_domain = d
|
||||
break
|
||||
else:
|
||||
cls.simpleException(
|
||||
CloudflareIUAMError,
|
||||
"Unable to find Cloudflare cookies. Does the site actually "
|
||||
"have Cloudflare IUAM (I'm Under Attack Mode) enabled?"
|
||||
)
|
||||
|
||||
return (
|
||||
{
|
||||
'__cfduid': scraper.cookies.get('__cfduid', '', domain=cookie_domain),
|
||||
'cf_clearance': scraper.cookies.get('cf_clearance', '', domain=cookie_domain)
|
||||
},
|
||||
scraper.headers['User-Agent']
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@classmethod
|
||||
def get_cookie_string(cls, url, **kwargs):
|
||||
"""
|
||||
Convenience function for building a Cookie HTTP header value.
|
||||
"""
|
||||
tokens, user_agent = cls.get_tokens(url, **kwargs)
|
||||
return '; '.join('='.join(pair) for pair in tokens.items()), user_agent
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if ssl.OPENSSL_VERSION_INFO < (1, 1, 1):
|
||||
print(
|
||||
"DEPRECATION: The OpenSSL being used by this python install ({}) does not meet the minimum supported "
|
||||
"version (>= OpenSSL 1.1.1) in order to support TLS 1.3 required by Cloudflare, "
|
||||
"You may encounter an unexpected reCaptcha or cloudflare 1020 blocks.".format(
|
||||
ssl.OPENSSL_VERSION
|
||||
)
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
create_scraper = CloudScraper.create_scraper
|
||||
get_tokens = CloudScraper.get_tokens
|
||||
get_cookie_string = CloudScraper.get_cookie_string
|
||||
@@ -0,0 +1,99 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
"""
|
||||
cloudscraper.exceptions
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
This module contains the set of cloudscraper exceptions.
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class CloudflareException(Exception):
|
||||
"""
|
||||
Base exception class for cloudscraper for Cloudflare
|
||||
"""
|
||||
|
||||
|
||||
class CloudflareLoopProtection(CloudflareException):
|
||||
"""
|
||||
Raise an exception for recursive depth protection
|
||||
"""
|
||||
|
||||
|
||||
class CloudflareCode1020(CloudflareException):
|
||||
"""
|
||||
Raise an exception for Cloudflare code 1020 block
|
||||
"""
|
||||
|
||||
|
||||
class CloudflareIUAMError(CloudflareException):
|
||||
"""
|
||||
Raise an error for problem extracting IUAM paramters
|
||||
from Cloudflare payload
|
||||
"""
|
||||
|
||||
|
||||
class CloudflareReCaptchaError(CloudflareException):
|
||||
"""
|
||||
Raise an error for problem extracting reCaptcha paramters
|
||||
from Cloudflare payload
|
||||
"""
|
||||
|
||||
|
||||
class CloudflareReCaptchaProvider(CloudflareException):
|
||||
"""
|
||||
Raise an exception for no reCaptcha provider loaded for Cloudflare.
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class reCaptchaException(Exception):
|
||||
"""
|
||||
Base exception class for cloudscraper reCaptcha Providers
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaServiceUnavailable(reCaptchaException):
|
||||
"""
|
||||
Raise an exception for external services that cannot be reached
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaAPIError(reCaptchaException):
|
||||
"""
|
||||
Raise an error for error from API response.
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaAccountError(reCaptchaException):
|
||||
"""
|
||||
Raise an error for reCaptcha provider account problem.
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaTimeout(reCaptchaException):
|
||||
"""
|
||||
Raise an exception for reCaptcha provider taking too long.
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaParameter(reCaptchaException):
|
||||
"""
|
||||
Raise an exception for bad or missing Parameter.
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaBadJobID(reCaptchaException):
|
||||
"""
|
||||
Raise an exception for invalid job id.
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaReportError(reCaptchaException):
|
||||
"""
|
||||
Raise an error for reCaptcha provider unable to report bad solve.
|
||||
"""
|
||||
@@ -0,0 +1,54 @@
|
||||
import sys
|
||||
import logging
|
||||
import abc
|
||||
|
||||
if sys.version_info >= (3, 4):
|
||||
ABC = abc.ABC # noqa
|
||||
else:
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
interpreters = {}
|
||||
BUG_REPORT = 'Cloudflare may have changed their technique, or there may be a bug in the script.'
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class JavaScriptInterpreter(ABC):
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@abc.abstractmethod
|
||||
def __init__(self, name):
|
||||
interpreters[name] = self
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@classmethod
|
||||
def dynamicImport(cls, name):
|
||||
if name not in interpreters:
|
||||
try:
|
||||
__import__('{}.{}'.format(cls.__module__, name))
|
||||
if not isinstance(interpreters.get(name), JavaScriptInterpreter):
|
||||
raise ImportError('The interpreter was not initialized.')
|
||||
except ImportError:
|
||||
logging.error('Unable to load {} interpreter'.format(name))
|
||||
raise
|
||||
|
||||
return interpreters[name]
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@abc.abstractmethod
|
||||
def eval(self, jsEnv, js):
|
||||
pass
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def solveChallenge(self, body, domain):
|
||||
try:
|
||||
return float(self.eval(body, domain))
|
||||
except Exception:
|
||||
logging.error('Error executing Cloudflare IUAM Javascript. {}'.format(BUG_REPORT))
|
||||
raise
|
||||
@@ -0,0 +1,103 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
import sys
|
||||
import ctypes.util
|
||||
|
||||
from ctypes import c_void_p, c_size_t, byref, create_string_buffer, CDLL
|
||||
|
||||
from . import JavaScriptInterpreter
|
||||
from .encapsulated import template
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class ChallengeInterpreter(JavaScriptInterpreter):
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def __init__(self):
|
||||
super(ChallengeInterpreter, self).__init__('chakracore')
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def eval(self, body, domain):
|
||||
chakraCoreLibrary = None
|
||||
|
||||
# check current working directory.
|
||||
for _libraryFile in ['libChakraCore.so', 'libChakraCore.dylib', 'ChakraCore.dll']:
|
||||
if os.path.isfile(os.path.join(os.getcwd(), _libraryFile)):
|
||||
chakraCoreLibrary = os.path.join(os.getcwd(), _libraryFile)
|
||||
continue
|
||||
|
||||
if not chakraCoreLibrary:
|
||||
chakraCoreLibrary = ctypes.util.find_library('ChakraCore')
|
||||
|
||||
if not chakraCoreLibrary:
|
||||
sys.tracebacklimit = 0
|
||||
raise RuntimeError(
|
||||
'ChakraCore library not found in current path or any of your system library paths, '
|
||||
'please download from https://www.github.com/VeNoMouS/cloudscraper/tree/ChakraCore/, '
|
||||
'or https://github.com/Microsoft/ChakraCore/'
|
||||
)
|
||||
|
||||
try:
|
||||
chakraCore = CDLL(chakraCoreLibrary)
|
||||
except OSError:
|
||||
sys.tracebacklimit = 0
|
||||
raise RuntimeError('There was an error loading the ChakraCore library {}'.format(chakraCoreLibrary))
|
||||
|
||||
if sys.platform != 'win32':
|
||||
chakraCore.DllMain(0, 1, 0)
|
||||
chakraCore.DllMain(0, 2, 0)
|
||||
|
||||
script = create_string_buffer(template(body, domain).encode('utf-16'))
|
||||
|
||||
runtime = c_void_p()
|
||||
chakraCore.JsCreateRuntime(0, 0, byref(runtime))
|
||||
|
||||
context = c_void_p()
|
||||
chakraCore.JsCreateContext(runtime, byref(context))
|
||||
chakraCore.JsSetCurrentContext(context)
|
||||
|
||||
fname = c_void_p()
|
||||
chakraCore.JsCreateString(
|
||||
'iuam-challenge.js',
|
||||
len('iuam-challenge.js'),
|
||||
byref(fname)
|
||||
)
|
||||
|
||||
scriptSource = c_void_p()
|
||||
chakraCore.JsCreateExternalArrayBuffer(
|
||||
script,
|
||||
len(script),
|
||||
0,
|
||||
0,
|
||||
byref(scriptSource)
|
||||
)
|
||||
|
||||
jsResult = c_void_p()
|
||||
chakraCore.JsRun(scriptSource, 0, fname, 0x02, byref(jsResult))
|
||||
|
||||
resultJSString = c_void_p()
|
||||
chakraCore.JsConvertValueToString(jsResult, byref(resultJSString))
|
||||
|
||||
stringLength = c_size_t()
|
||||
chakraCore.JsCopyString(resultJSString, 0, 0, byref(stringLength))
|
||||
|
||||
resultSTR = create_string_buffer(stringLength.value + 1)
|
||||
chakraCore.JsCopyString(
|
||||
resultJSString,
|
||||
byref(resultSTR),
|
||||
stringLength.value + 1,
|
||||
0
|
||||
)
|
||||
|
||||
chakraCore.JsDisposeRuntime(runtime)
|
||||
|
||||
return resultSTR.value
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
ChallengeInterpreter()
|
||||
@@ -0,0 +1,58 @@
|
||||
import logging
|
||||
import re
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
def template(body, domain):
|
||||
BUG_REPORT = 'Cloudflare may have changed their technique, or there may be a bug in the script.'
|
||||
|
||||
try:
|
||||
js = re.search(
|
||||
r'setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n',
|
||||
body
|
||||
).group(1)
|
||||
except Exception:
|
||||
raise ValueError('Unable to identify Cloudflare IUAM Javascript on website. {}'.format(BUG_REPORT))
|
||||
|
||||
js = re.sub(r'\s{2,}', ' ', js, flags=re.MULTILINE | re.DOTALL).replace('\'; 121\'', '')
|
||||
js += '\na.value;'
|
||||
|
||||
jsEnv = '''
|
||||
String.prototype.italics=function(str) {{return "<i>" + this + "</i>";}};
|
||||
var document = {{
|
||||
createElement: function () {{
|
||||
return {{ firstChild: {{ href: "https://{domain}/" }} }}
|
||||
}},
|
||||
getElementById: function () {{
|
||||
return {{"innerHTML": "{innerHTML}"}};
|
||||
}}
|
||||
}};
|
||||
'''
|
||||
|
||||
try:
|
||||
innerHTML = re.search(
|
||||
r'<div(?: [^<>]*)? id="([^<>]*?)">([^<>]*?)</div>',
|
||||
body,
|
||||
re.MULTILINE | re.DOTALL
|
||||
)
|
||||
innerHTML = innerHTML.group(2) if innerHTML else ''
|
||||
|
||||
except: # noqa
|
||||
logging.error('Error extracting Cloudflare IUAM Javascript. {}'.format(BUG_REPORT))
|
||||
raise
|
||||
|
||||
return '{}{}'.format(
|
||||
re.sub(
|
||||
r'\s{2,}',
|
||||
' ',
|
||||
jsEnv.format(
|
||||
domain=domain,
|
||||
innerHTML=innerHTML
|
||||
),
|
||||
re.MULTILINE | re.DOTALL
|
||||
),
|
||||
js
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
@@ -0,0 +1,44 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import js2py
|
||||
import logging
|
||||
import base64
|
||||
|
||||
from . import JavaScriptInterpreter
|
||||
|
||||
from .encapsulated import template
|
||||
from .jsunfuck import jsunfuck
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class ChallengeInterpreter(JavaScriptInterpreter):
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def __init__(self):
|
||||
super(ChallengeInterpreter, self).__init__('js2py')
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def eval(self, body, domain):
|
||||
|
||||
jsPayload = template(body, domain)
|
||||
|
||||
if js2py.eval_js('(+(+!+[]+[+!+[]]+(!![]+[])[!+[]+!+[]+!+[]]+[!+[]+!+[]]+[+[]])+[])[+!+[]]') == '1':
|
||||
logging.warning('WARNING - Please upgrade your js2py https://github.com/PiotrDabkowski/Js2Py, applying work around for the meantime.')
|
||||
jsPayload = jsunfuck(jsPayload)
|
||||
|
||||
def atob(s):
|
||||
return base64.b64decode('{}'.format(s)).decode('utf-8')
|
||||
|
||||
js2py.disable_pyimport()
|
||||
context = js2py.EvalJs({'atob': atob})
|
||||
result = context.eval(jsPayload)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
ChallengeInterpreter()
|
||||
+6
-6
@@ -80,18 +80,18 @@ CONSTRUCTORS = {
|
||||
'RegExp': 'Function("return/"+false+"/")()'
|
||||
}
|
||||
|
||||
|
||||
def jsunfuck(jsfuckString):
|
||||
|
||||
for key in sorted(MAPPING, key=lambda k: len(MAPPING[k]), reverse=True):
|
||||
if MAPPING.get(key) in jsfuckString:
|
||||
jsfuckString = jsfuckString.replace(MAPPING.get(key), '"{}"'.format(key))
|
||||
|
||||
|
||||
for key in sorted(SIMPLE, key=lambda k: len(SIMPLE[k]), reverse=True):
|
||||
if SIMPLE.get(key) in jsfuckString:
|
||||
jsfuckString = jsfuckString.replace(SIMPLE.get(key), '{}'.format(key))
|
||||
|
||||
#for key in sorted(CONSTRUCTORS, key=lambda k: len(CONSTRUCTORS[k]), reverse=True):
|
||||
|
||||
# for key in sorted(CONSTRUCTORS, key=lambda k: len(CONSTRUCTORS[k]), reverse=True):
|
||||
# if CONSTRUCTORS.get(key) in jsfuckString:
|
||||
# jsfuckString = jsfuckString.replace(CONSTRUCTORS.get(key), '{}'.format(key))
|
||||
|
||||
return jsfuckString
|
||||
|
||||
return jsfuckString
|
||||
@@ -0,0 +1,120 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
import operator as op
|
||||
|
||||
from . import JavaScriptInterpreter
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class ChallengeInterpreter(JavaScriptInterpreter):
|
||||
|
||||
def __init__(self):
|
||||
super(ChallengeInterpreter, self).__init__('native')
|
||||
|
||||
def eval(self, body, domain):
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
operators = {
|
||||
'+': op.add,
|
||||
'-': op.sub,
|
||||
'*': op.mul,
|
||||
'/': op.truediv
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def jsfuckToNumber(jsFuck):
|
||||
t = ''
|
||||
|
||||
split_numbers = re.compile(r'-?\d+').findall
|
||||
|
||||
for i in re.findall(
|
||||
r'\((?:\d|\+|\-)*\)',
|
||||
jsFuck.replace('!+[]', '1').replace('!![]', '1').replace('[]', '0').lstrip('+').replace('(+', '(')
|
||||
):
|
||||
t = '{}{}'.format(t, sum(int(x) for x in split_numbers(i)))
|
||||
|
||||
return int(t)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def divisorMath(payload, needle, domain):
|
||||
jsfuckMath = payload.split('/')
|
||||
if needle in jsfuckMath[1]:
|
||||
expression = re.findall(r"^(.*?)(.)\(function", jsfuckMath[1])[0]
|
||||
expression_value = operators[expression[1]](
|
||||
float(jsfuckToNumber(expression[0])),
|
||||
float(ord(domain[jsfuckToNumber(jsfuckMath[1][
|
||||
jsfuckMath[1].find('"("+p+")")}') + len('"("+p+")")}'):-2
|
||||
])]))
|
||||
)
|
||||
else:
|
||||
expression_value = jsfuckToNumber(jsfuckMath[1])
|
||||
|
||||
expression_value = jsfuckToNumber(jsfuckMath[0]) / float(expression_value)
|
||||
|
||||
return expression_value
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def challengeSolve(body, domain):
|
||||
jschl_answer = 0
|
||||
|
||||
jsfuckChallenge = re.search(
|
||||
r"setTimeout\(function\(\){\s+var.*?f,\s*(?P<variable>\w+).*?:(?P<init>\S+)};"
|
||||
r".*?\('challenge-form'\);\s+;(?P<challenge>.*?a\.value)"
|
||||
r"(?:.*id=\"cf-dn-.*?>(?P<k>\S+)<)?",
|
||||
body,
|
||||
re.DOTALL | re.MULTILINE
|
||||
).groupdict()
|
||||
|
||||
jsfuckChallenge['challenge'] = re.finditer(
|
||||
r'{}.*?([+\-*/])=(.*?);(?=a\.value|{})'.format(
|
||||
jsfuckChallenge['variable'],
|
||||
jsfuckChallenge['variable']
|
||||
),
|
||||
jsfuckChallenge['challenge']
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if '/' in jsfuckChallenge['init']:
|
||||
val = jsfuckChallenge['init'].split('/')
|
||||
jschl_answer = jsfuckToNumber(val[0]) / float(jsfuckToNumber(val[1]))
|
||||
else:
|
||||
jschl_answer = jsfuckToNumber(jsfuckChallenge['init'])
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
for expressionMatch in jsfuckChallenge['challenge']:
|
||||
oper, expression = expressionMatch.groups()
|
||||
|
||||
if '/' in expression:
|
||||
expression_value = divisorMath(expression, 'function(p)', domain)
|
||||
else:
|
||||
if 'Element' in expression:
|
||||
expression_value = divisorMath(jsfuckChallenge['k'], '"("+p+")")}', domain)
|
||||
else:
|
||||
expression_value = jsfuckToNumber(expression)
|
||||
|
||||
jschl_answer = operators[oper](jschl_answer, expression_value)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if not jsfuckChallenge['k'] and '+ t.length' in body:
|
||||
jschl_answer += len(domain)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
return '{0:.10f}'.format(jschl_answer)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
return challengeSolve(body, domain)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
ChallengeInterpreter()
|
||||
@@ -0,0 +1,49 @@
|
||||
import base64
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from . import JavaScriptInterpreter
|
||||
from .encapsulated import template
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class ChallengeInterpreter(JavaScriptInterpreter):
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def __init__(self):
|
||||
super(ChallengeInterpreter, self).__init__('nodejs')
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def eval(self, body, domain):
|
||||
try:
|
||||
js = 'var atob = function(str) {return Buffer.from(str, "base64").toString("binary");};' \
|
||||
'var challenge = atob("%s");' \
|
||||
'var context = {atob: atob};' \
|
||||
'var options = {filename: "iuam-challenge.js", timeout: 4000};' \
|
||||
'var answer = require("vm").runInNewContext(challenge, context, options);' \
|
||||
'process.stdout.write(String(answer));' \
|
||||
% base64.b64encode(template(body, domain).encode('UTF-8')).decode('ascii')
|
||||
|
||||
return subprocess.check_output(['node', '-e', js])
|
||||
|
||||
except OSError as e:
|
||||
if e.errno == 2:
|
||||
raise EnvironmentError(
|
||||
'Missing Node.js runtime. Node is required and must be in the PATH (check with `node -v`).\n\n'
|
||||
'Your Node binary may be called `nodejs` rather than `node`, '
|
||||
'in which case you may need to run `apt-get install nodejs-legacy` on some Debian-based systems.\n\n'
|
||||
'(Please read the cloudscraper README\'s Dependencies section: '
|
||||
'https://github.com/VeNoMouS/cloudscraper#dependencies.)'
|
||||
)
|
||||
raise
|
||||
except Exception:
|
||||
sys.tracebacklimit = 0
|
||||
raise RuntimeError('Error executing Cloudflare IUAM Javascript in nodejs')
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
ChallengeInterpreter()
|
||||
@@ -0,0 +1,33 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import sys
|
||||
|
||||
try:
|
||||
import v8eval
|
||||
except ImportError:
|
||||
sys.tracebacklimit = 0
|
||||
raise RuntimeError('Please install the python module v8eval either via pip or download it from https://github.com/sony/v8eval')
|
||||
|
||||
from . import JavaScriptInterpreter
|
||||
from .encapsulated import template
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class ChallengeInterpreter(JavaScriptInterpreter):
|
||||
|
||||
def __init__(self):
|
||||
super(ChallengeInterpreter, self).__init__('v8')
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def eval(self, body, domain):
|
||||
try:
|
||||
return v8eval.V8().eval(template(body, domain))
|
||||
except (TypeError, v8eval.V8Error):
|
||||
RuntimeError('We encountered an error running the V8 Engine.')
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
ChallengeInterpreter()
|
||||
@@ -0,0 +1,236 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
from ..exceptions import (
|
||||
reCaptchaServiceUnavailable,
|
||||
reCaptchaAPIError,
|
||||
reCaptchaTimeout,
|
||||
reCaptchaParameter,
|
||||
reCaptchaBadJobID,
|
||||
reCaptchaReportError
|
||||
)
|
||||
|
||||
try:
|
||||
import polling
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install the python module 'polling' via pip or download it from "
|
||||
"https://github.com/justiniso/polling/"
|
||||
)
|
||||
|
||||
from . import reCaptcha
|
||||
|
||||
|
||||
class captchaSolver(reCaptcha):
|
||||
|
||||
def __init__(self):
|
||||
super(captchaSolver, self).__init__('2captcha')
|
||||
self.host = 'https://2captcha.com'
|
||||
self.session = requests.Session()
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def checkErrorStatus(response, request_type):
|
||||
if response.status_code in [500, 502]:
|
||||
raise reCaptchaServiceUnavailable('2Captcha: Server Side Error {}'.format(response.status_code))
|
||||
|
||||
errors = {
|
||||
'in.php': {
|
||||
"ERROR_WRONG_USER_KEY": "You've provided api_key parameter value is in incorrect format, it should contain 32 symbols.",
|
||||
"ERROR_KEY_DOES_NOT_EXIST": "The api_key you've provided does not exists.",
|
||||
"ERROR_ZERO_BALANCE": "You don't have sufficient funds on your account.",
|
||||
"ERROR_PAGEURL": "pageurl parameter is missing in your request.",
|
||||
"ERROR_NO_SLOT_AVAILABLE":
|
||||
"No Slots Available.\nYou can receive this error in two cases:\n"
|
||||
"1. If you solve ReCaptcha: the queue of your captchas that are not distributed to workers is too long. "
|
||||
"Queue limit changes dynamically and depends on total amount of captchas awaiting solution and usually it's between 50 and 100 captchas.\n"
|
||||
"2. If you solve Normal Captcha: your maximum rate for normal captchas is lower than current rate on the server."
|
||||
"You can change your maximum rate in your account's settings.",
|
||||
"ERROR_IP_NOT_ALLOWED": "The request is sent from the IP that is not on the list of your allowed IPs.",
|
||||
"IP_BANNED": "Your IP address is banned due to many frequent attempts to access the server using wrong authorization keys.",
|
||||
"ERROR_BAD_TOKEN_OR_PAGEURL":
|
||||
"You can get this error code when sending ReCaptcha V2. "
|
||||
"That happens if your request contains invalid pair of googlekey and pageurl. "
|
||||
"The common reason for that is that ReCaptcha is loaded inside an iframe hosted on another domain/subdomain.",
|
||||
"ERROR_GOOGLEKEY":
|
||||
"You can get this error code when sending ReCaptcha V2. "
|
||||
"That means that sitekey value provided in your request is incorrect: it's blank or malformed.",
|
||||
"MAX_USER_TURN": "You made more than 60 requests within 3 seconds.Your account is banned for 10 seconds. Ban will be lifted automatically."
|
||||
},
|
||||
'res.php': {
|
||||
"ERROR_CAPTCHA_UNSOLVABLE":
|
||||
"We are unable to solve your captcha - three of our workers were unable solve it "
|
||||
"or we didn't get an answer within 90 seconds (300 seconds for ReCaptcha V2). "
|
||||
"We will not charge you for that request.",
|
||||
"ERROR_WRONG_USER_KEY": "You've provided api_key parameter value in incorrect format, it should contain 32 symbols.",
|
||||
"ERROR_KEY_DOES_NOT_EXIST": "The api_key you've provided does not exists.",
|
||||
"ERROR_WRONG_ID_FORMAT": "You've provided captcha ID in wrong format. The ID can contain numbers only.",
|
||||
"ERROR_WRONG_CAPTCHA_ID": "You've provided incorrect captcha ID.",
|
||||
"ERROR_BAD_DUPLICATES":
|
||||
"Error is returned when 100% accuracy feature is enabled. "
|
||||
"The error means that max numbers of tries is reached but min number of matches not found.",
|
||||
"REPORT_NOT_RECORDED": "Error is returned to your complain request if you already complained lots of correctly solved captchas.",
|
||||
"ERROR_IP_ADDRES":
|
||||
"You can receive this error code when registering a pingback (callback) IP or domain."
|
||||
"That happes if your request is coming from an IP address that doesn't match the IP address of your pingback IP or domain.",
|
||||
"ERROR_TOKEN_EXPIRED": "You can receive this error code when sending GeeTest. That error means that challenge value you provided is expired.",
|
||||
"ERROR_EMPTY_ACTION": "Action parameter is missing or no value is provided for action parameter."
|
||||
}
|
||||
}
|
||||
|
||||
if response.json().get('status') is False and response.json().get('request') in errors.get(request_type):
|
||||
raise reCaptchaAPIError(
|
||||
'{} {}'.format(
|
||||
response.json().get('request'),
|
||||
errors.get(request_type).get(response.json().get('request'))
|
||||
)
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def reportJob(self, jobID):
|
||||
if not jobID:
|
||||
raise reCaptchaBadJobID(
|
||||
"2Captcha: Error bad job id to request reCaptcha."
|
||||
)
|
||||
|
||||
def _checkRequest(response):
|
||||
if response.ok and response.json().get('status') == 1:
|
||||
return response
|
||||
|
||||
self.checkErrorStatus(response, 'res.php')
|
||||
|
||||
return None
|
||||
|
||||
response = polling.poll(
|
||||
lambda: self.session.get(
|
||||
'{}/res.php'.format(self.host),
|
||||
params={
|
||||
'key': self.api_key,
|
||||
'action': 'reportbad',
|
||||
'id': jobID,
|
||||
'json': '1'
|
||||
}
|
||||
),
|
||||
check_success=_checkRequest,
|
||||
step=5,
|
||||
timeout=180
|
||||
)
|
||||
|
||||
if response:
|
||||
return True
|
||||
else:
|
||||
raise reCaptchaReportError(
|
||||
"2Captcha: Error - Failed to report bad reCaptcha solve."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def requestJob(self, jobID):
|
||||
if not jobID:
|
||||
raise reCaptchaBadJobID("2Captcha: Error bad job id to request reCaptcha.")
|
||||
|
||||
def _checkRequest(response):
|
||||
if response.ok and response.json().get('status') == 1:
|
||||
return response
|
||||
|
||||
self.checkErrorStatus(response, 'res.php')
|
||||
|
||||
return None
|
||||
|
||||
response = polling.poll(
|
||||
lambda: self.session.get(
|
||||
'{}/res.php'.format(self.host),
|
||||
params={
|
||||
'key': self.api_key,
|
||||
'action': 'get',
|
||||
'id': jobID,
|
||||
'json': '1'
|
||||
}
|
||||
),
|
||||
check_success=_checkRequest,
|
||||
step=5,
|
||||
timeout=180
|
||||
)
|
||||
|
||||
if response:
|
||||
return response.json().get('request')
|
||||
else:
|
||||
raise reCaptchaTimeout(
|
||||
"2Captcha: Error failed to solve reCaptcha."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def requestSolve(self, site_url, site_key):
|
||||
def _checkRequest(response):
|
||||
if response.ok and response.json().get("status") == 1 and response.json().get('request'):
|
||||
return response
|
||||
|
||||
self.checkErrorStatus(response, 'in.php')
|
||||
|
||||
return None
|
||||
|
||||
response = polling.poll(
|
||||
lambda: self.session.post(
|
||||
'{}/in.php'.format(self.host),
|
||||
data={
|
||||
'key': self.api_key,
|
||||
'method': 'userrecaptcha',
|
||||
'googlekey': site_key,
|
||||
'pageurl': site_url,
|
||||
'json': '1',
|
||||
'soft_id': '5507698'
|
||||
},
|
||||
allow_redirects=False
|
||||
),
|
||||
check_success=_checkRequest,
|
||||
step=5,
|
||||
timeout=180
|
||||
)
|
||||
|
||||
if response:
|
||||
return response.json().get('request')
|
||||
else:
|
||||
raise reCaptchaBadJobID(
|
||||
'2Captcha: Error no job id was returned.'
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
|
||||
jobID = None
|
||||
|
||||
if not reCaptchaParams.get('api_key'):
|
||||
raise reCaptchaParameter(
|
||||
"2Captcha: Missing api_key parameter."
|
||||
)
|
||||
|
||||
self.api_key = reCaptchaParams.get('api_key')
|
||||
|
||||
if reCaptchaParams.get('proxy'):
|
||||
self.session.proxies = reCaptchaParams.get('proxies')
|
||||
|
||||
try:
|
||||
jobID = self.requestSolve(site_url, site_key)
|
||||
return self.requestJob(jobID)
|
||||
except polling.TimeoutException:
|
||||
try:
|
||||
if jobID:
|
||||
self.reportJob(jobID)
|
||||
except polling.TimeoutException:
|
||||
raise reCaptchaTimeout(
|
||||
"2Captcha: reCaptcha solve took to long and also failed reporting the job the job id {}.".format(jobID)
|
||||
)
|
||||
|
||||
raise reCaptchaTimeout(
|
||||
"2Captcha: reCaptcha solve took to long to execute job id {}, aborting.".format(jobID)
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
captchaSolver()
|
||||
@@ -0,0 +1,207 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
import requests
|
||||
|
||||
try:
|
||||
import polling
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install the python module 'polling' via pip or download it from "
|
||||
"https://github.com/justiniso/polling/"
|
||||
)
|
||||
|
||||
from ..exceptions import (
|
||||
reCaptchaServiceUnavailable,
|
||||
reCaptchaAPIError,
|
||||
reCaptchaTimeout,
|
||||
reCaptchaParameter,
|
||||
reCaptchaBadJobID
|
||||
)
|
||||
|
||||
from . import reCaptcha
|
||||
|
||||
|
||||
class captchaSolver(reCaptcha):
|
||||
|
||||
def __init__(self):
|
||||
super(captchaSolver, self).__init__('9kw')
|
||||
self.host = 'https://www.9kw.eu/index.cgi'
|
||||
self.maxtimeout = 180
|
||||
self.session = requests.Session()
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def checkErrorStatus(response):
|
||||
if response.status_code in [500, 502]:
|
||||
raise reCaptchaServiceUnavailable(
|
||||
'9kw: Server Side Error {}'.format(response.status_code)
|
||||
)
|
||||
|
||||
error_codes = {
|
||||
1: 'No API Key available.',
|
||||
2: 'No API key found.',
|
||||
3: 'No active API key found.',
|
||||
4: 'API Key has been disabled by the operator. ',
|
||||
5: 'No user found.',
|
||||
6: 'No data found.',
|
||||
7: 'Found No ID.',
|
||||
8: 'found No captcha.',
|
||||
9: 'No image found.',
|
||||
10: 'Image size not allowed.',
|
||||
11: 'credit is not sufficient.',
|
||||
12: 'what was done.',
|
||||
13: 'No answer contain.',
|
||||
14: 'Captcha already been answered.',
|
||||
15: 'Captcha to quickly filed.',
|
||||
16: 'JD check active.',
|
||||
17: 'Unknown problem.',
|
||||
18: 'Found No ID.',
|
||||
19: 'Incorrect answer.',
|
||||
20: 'Do not timely filed (Incorrect UserID).',
|
||||
21: 'Link not allowed.',
|
||||
22: 'Prohibited submit.',
|
||||
23: 'Entering prohibited.',
|
||||
24: 'Too little credit.',
|
||||
25: 'No entry found.',
|
||||
26: 'No Conditions accepted.',
|
||||
27: 'No coupon code found in the database.',
|
||||
28: 'Already unused voucher code.',
|
||||
29: 'maxTimeout under 60 seconds.',
|
||||
30: 'User not found.',
|
||||
31: 'An account is not yet 24 hours in system.',
|
||||
32: 'An account does not have the full rights.',
|
||||
33: 'Plugin needed a update.',
|
||||
34: 'No HTTPS allowed.',
|
||||
35: 'No HTTP allowed.',
|
||||
36: 'Source not allowed.',
|
||||
37: 'Transfer denied.',
|
||||
38: 'Incorrect answer without space',
|
||||
39: 'Incorrect answer with space',
|
||||
40: 'Incorrect answer with not only numbers',
|
||||
41: 'Incorrect answer with not only A-Z, a-z',
|
||||
42: 'Incorrect answer with not only 0-9, A-Z, a-z',
|
||||
43: 'Incorrect answer with not only [0-9,- ]',
|
||||
44: 'Incorrect answer with not only [0-9A-Za-z,- ]',
|
||||
45: 'Incorrect answer with not only coordinates',
|
||||
46: 'Incorrect answer with not only multiple coordinates',
|
||||
47: 'Incorrect answer with not only data',
|
||||
48: 'Incorrect answer with not only rotate number',
|
||||
49: 'Incorrect answer with not only text',
|
||||
50: 'Incorrect answer with not only text and too short',
|
||||
51: 'Incorrect answer with not enough chars',
|
||||
52: 'Incorrect answer with too many chars',
|
||||
53: 'Incorrect answer without no or yes',
|
||||
54: 'Assignment was not found.'
|
||||
}
|
||||
|
||||
if response.text.startswith('{'):
|
||||
if response.json().get('error'):
|
||||
raise reCaptchaAPIError(error_codes.get(int(response.json().get('error'))))
|
||||
else:
|
||||
error_code = int(re.search(r'^00(?P<error_code>\d+)', response.text).groupdict().get('error_code', 0))
|
||||
if error_code:
|
||||
raise reCaptchaAPIError(error_codes.get(error_code))
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def requestJob(self, jobID):
|
||||
if not jobID:
|
||||
raise reCaptchaBadJobID(
|
||||
"9kw: Error bad job id to request reCaptcha against."
|
||||
)
|
||||
|
||||
def _checkRequest(response):
|
||||
if response.ok and response.json().get('answer') != 'NO DATA':
|
||||
return response
|
||||
|
||||
self.checkErrorStatus(response)
|
||||
|
||||
return None
|
||||
|
||||
response = polling.poll(
|
||||
lambda: self.session.get(
|
||||
self.host,
|
||||
params={
|
||||
'apikey': self.api_key,
|
||||
'action': 'usercaptchacorrectdata',
|
||||
'id': jobID,
|
||||
'info': 1,
|
||||
'json': 1
|
||||
}
|
||||
),
|
||||
check_success=_checkRequest,
|
||||
step=10,
|
||||
timeout=(self.maxtimeout + 10)
|
||||
)
|
||||
|
||||
if response:
|
||||
return response.json().get('answer')
|
||||
else:
|
||||
raise reCaptchaTimeout("9kw: Error failed to solve reCaptcha.")
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def requestSolve(self, site_url, site_key):
|
||||
def _checkRequest(response):
|
||||
if response.ok and response.text.startswith('{') and response.json().get('captchaid'):
|
||||
return response
|
||||
|
||||
self.checkErrorStatus(response)
|
||||
|
||||
return None
|
||||
|
||||
response = polling.poll(
|
||||
lambda: self.session.post(
|
||||
self.host,
|
||||
data={
|
||||
'apikey': self.api_key,
|
||||
'action': 'usercaptchaupload',
|
||||
'interactive': 1,
|
||||
'file-upload-01': site_key,
|
||||
'oldsource': 'recaptchav2',
|
||||
'pageurl': site_url,
|
||||
'maxtimeout': self.maxtimeout,
|
||||
'json': 1
|
||||
},
|
||||
allow_redirects=False
|
||||
),
|
||||
check_success=_checkRequest,
|
||||
step=5,
|
||||
timeout=(self.maxtimeout + 10)
|
||||
)
|
||||
|
||||
if response:
|
||||
return response.json().get('captchaid')
|
||||
else:
|
||||
raise reCaptchaBadJobID('9kw: Error no valid job id was returned.')
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
|
||||
jobID = None
|
||||
|
||||
if not reCaptchaParams.get('api_key'):
|
||||
raise reCaptchaParameter("9kw: Missing api_key parameter.")
|
||||
|
||||
self.api_key = reCaptchaParams.get('api_key')
|
||||
|
||||
if reCaptchaParams.get('maxtimeout'):
|
||||
self.maxtimeout = reCaptchaParams.get('maxtimeout')
|
||||
|
||||
if reCaptchaParams.get('proxy'):
|
||||
self.session.proxies = reCaptchaParams.get('proxies')
|
||||
|
||||
try:
|
||||
jobID = self.requestSolve(site_url, site_key)
|
||||
return self.requestJob(jobID)
|
||||
except polling.TimeoutException:
|
||||
raise reCaptchaTimeout(
|
||||
"9kw: reCaptcha solve took to long to execute 'captchaid' {}, aborting.".format(jobID)
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
captchaSolver()
|
||||
@@ -0,0 +1,46 @@
|
||||
import abc
|
||||
import logging
|
||||
import sys
|
||||
|
||||
if sys.version_info >= (3, 4):
|
||||
ABC = abc.ABC # noqa
|
||||
else:
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
captchaSolvers = {}
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class reCaptcha(ABC):
|
||||
@abc.abstractmethod
|
||||
def __init__(self, name):
|
||||
captchaSolvers[name] = self
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@classmethod
|
||||
def dynamicImport(cls, name):
|
||||
if name not in captchaSolvers:
|
||||
try:
|
||||
__import__('{}.{}'.format(cls.__module__, name))
|
||||
if not isinstance(captchaSolvers.get(name), reCaptcha):
|
||||
raise ImportError('The anti reCaptcha provider was not initialized.')
|
||||
except ImportError:
|
||||
logging.error("Unable to load {} anti reCaptcha provider".format(name))
|
||||
raise
|
||||
|
||||
return captchaSolvers[name]
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@abc.abstractmethod
|
||||
def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
|
||||
pass
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def solveCaptcha(self, site_url, site_key, reCaptchaParams):
|
||||
return self.getCaptchaAnswer(site_url, site_key, reCaptchaParams)
|
||||
@@ -0,0 +1,49 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from ..exceptions import reCaptchaParameter
|
||||
|
||||
try:
|
||||
from python_anticaptcha import (
|
||||
AnticaptchaClient,
|
||||
NoCaptchaTaskProxylessTask
|
||||
)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install the python module 'python_anticaptcha' via pip or download it from "
|
||||
"https://github.com/ad-m/python-anticaptcha"
|
||||
)
|
||||
|
||||
from . import reCaptcha
|
||||
|
||||
|
||||
class captchaSolver(reCaptcha):
|
||||
|
||||
def __init__(self):
|
||||
super(captchaSolver, self).__init__('anticaptcha')
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
|
||||
if not reCaptchaParams.get('api_key'):
|
||||
raise reCaptchaParameter("anticaptcha: Missing api_key parameter.")
|
||||
|
||||
client = AnticaptchaClient(reCaptchaParams.get('api_key'))
|
||||
|
||||
if reCaptchaParams.get('proxy'):
|
||||
client.session.proxies = reCaptchaParams.get('proxies')
|
||||
|
||||
task = NoCaptchaTaskProxylessTask(site_url, site_key)
|
||||
|
||||
if not hasattr(client, 'createTaskSmee'):
|
||||
raise NotImplementedError(
|
||||
"Please upgrade 'python_anticaptcha' via pip or download it from "
|
||||
"https://github.com/ad-m/python-anticaptcha"
|
||||
)
|
||||
|
||||
job = client.createTaskSmee(task)
|
||||
return job.get_solution_response()
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
captchaSolver()
|
||||
@@ -0,0 +1,227 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import json
|
||||
import requests
|
||||
|
||||
try:
|
||||
import polling
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install the python module 'polling' via pip or download it from "
|
||||
"https://github.com/justiniso/polling/"
|
||||
)
|
||||
|
||||
from ..exceptions import (
|
||||
reCaptchaServiceUnavailable,
|
||||
reCaptchaAccountError,
|
||||
reCaptchaTimeout,
|
||||
reCaptchaParameter,
|
||||
reCaptchaBadJobID,
|
||||
reCaptchaReportError
|
||||
)
|
||||
|
||||
from . import reCaptcha
|
||||
|
||||
|
||||
class captchaSolver(reCaptcha):
|
||||
|
||||
def __init__(self):
|
||||
super(captchaSolver, self).__init__('deathbycaptcha')
|
||||
self.host = 'http://api.dbcapi.me/api'
|
||||
self.session = requests.Session()
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def checkErrorStatus(response):
|
||||
errors = dict(
|
||||
[
|
||||
(400, "DeathByCaptcha: 400 Bad Request"),
|
||||
(403, "DeathByCaptcha: 403 Forbidden - Invalid credentails or insufficient credits."),
|
||||
# (500, "DeathByCaptcha: 500 Internal Server Error."),
|
||||
(503, "DeathByCaptcha: 503 Service Temporarily Unavailable.")
|
||||
]
|
||||
)
|
||||
|
||||
if response.status_code in errors:
|
||||
raise reCaptchaServiceUnavailable(errors.get(response.status_code))
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def login(self, username, password):
|
||||
self.username = username
|
||||
self.password = password
|
||||
|
||||
def _checkRequest(response):
|
||||
if response.ok:
|
||||
if response.json().get('is_banned'):
|
||||
raise reCaptchaAccountError('DeathByCaptcha: Your account is banned.')
|
||||
|
||||
if response.json().get('balanace') == 0:
|
||||
raise reCaptchaAccountError('DeathByCaptcha: insufficient credits.')
|
||||
|
||||
return response
|
||||
|
||||
self.checkErrorStatus(response)
|
||||
|
||||
return None
|
||||
|
||||
response = polling.poll(
|
||||
lambda: self.session.post(
|
||||
'{}/user'.format(self.host),
|
||||
headers={'Accept': 'application/json'},
|
||||
data={
|
||||
'username': self.username,
|
||||
'password': self.password
|
||||
}
|
||||
),
|
||||
check_success=_checkRequest,
|
||||
step=10,
|
||||
timeout=120
|
||||
)
|
||||
|
||||
self.debugRequest(response)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def reportJob(self, jobID):
|
||||
if not jobID:
|
||||
raise reCaptchaBadJobID(
|
||||
"DeathByCaptcha: Error bad job id to report failed reCaptcha."
|
||||
)
|
||||
|
||||
def _checkRequest(response):
|
||||
if response.status_code == 200:
|
||||
return response
|
||||
|
||||
self.checkErrorStatus(response)
|
||||
|
||||
return None
|
||||
|
||||
response = polling.poll(
|
||||
lambda: self.session.post(
|
||||
'{}/captcha/{}/report'.format(self.host, jobID),
|
||||
headers={'Accept': 'application/json'},
|
||||
data={
|
||||
'username': self.username,
|
||||
'password': self.password
|
||||
}
|
||||
),
|
||||
check_success=_checkRequest,
|
||||
step=10,
|
||||
timeout=180
|
||||
)
|
||||
|
||||
if response:
|
||||
return True
|
||||
else:
|
||||
raise reCaptchaReportError(
|
||||
"DeathByCaptcha: Error report failed reCaptcha."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def requestJob(self, jobID):
|
||||
if not jobID:
|
||||
raise reCaptchaBadJobID(
|
||||
"DeathByCaptcha: Error bad job id to request reCaptcha."
|
||||
)
|
||||
|
||||
def _checkRequest(response):
|
||||
if response.ok and response.json().get('text'):
|
||||
return response
|
||||
|
||||
self.checkErrorStatus(response)
|
||||
|
||||
return None
|
||||
|
||||
response = polling.poll(
|
||||
lambda: self.session.get(
|
||||
'{}/captcha/{}'.format(self.host, jobID),
|
||||
headers={'Accept': 'application/json'}
|
||||
),
|
||||
check_success=_checkRequest,
|
||||
step=10,
|
||||
timeout=180
|
||||
)
|
||||
|
||||
if response:
|
||||
return response.json().get('text')
|
||||
else:
|
||||
raise reCaptchaTimeout(
|
||||
"DeathByCaptcha: Error failed to solve reCaptcha."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def requestSolve(self, site_url, site_key):
|
||||
def _checkRequest(response):
|
||||
if response.ok and response.json().get("is_correct") and response.json().get('captcha'):
|
||||
return response
|
||||
|
||||
self.checkErrorStatus(response)
|
||||
|
||||
return None
|
||||
|
||||
response = polling.poll(
|
||||
lambda: self.session.post(
|
||||
'{}/captcha'.format(self.host),
|
||||
headers={'Accept': 'application/json'},
|
||||
data={
|
||||
'username': self.username,
|
||||
'password': self.password,
|
||||
'type': '4',
|
||||
'token_params': json.dumps({
|
||||
'googlekey': site_key,
|
||||
'pageurl': site_url
|
||||
})
|
||||
},
|
||||
allow_redirects=False
|
||||
),
|
||||
check_success=_checkRequest,
|
||||
step=10,
|
||||
timeout=180
|
||||
)
|
||||
|
||||
if response:
|
||||
return response.json().get('captcha')
|
||||
else:
|
||||
raise reCaptchaBadJobID(
|
||||
'DeathByCaptcha: Error no job id was returned.'
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
|
||||
jobID = None
|
||||
|
||||
for param in ['username', 'password']:
|
||||
if not reCaptchaParams.get(param):
|
||||
raise reCaptchaParameter(
|
||||
"DeathByCaptcha: Missing '{}' parameter.".format(param)
|
||||
)
|
||||
setattr(self, param, reCaptchaParams.get(param))
|
||||
|
||||
if reCaptchaParams.get('proxy'):
|
||||
self.session.proxies = reCaptchaParams.get('proxies')
|
||||
|
||||
try:
|
||||
jobID = self.requestSolve(site_url, site_key)
|
||||
return self.requestJob(jobID)
|
||||
except polling.TimeoutException:
|
||||
try:
|
||||
if jobID:
|
||||
self.reportJob(jobID)
|
||||
except polling.TimeoutException:
|
||||
raise reCaptchaTimeout(
|
||||
"DeathByCaptcha: reCaptcha solve took to long and also failed reporting the job id {}.".format(jobID)
|
||||
)
|
||||
|
||||
raise reCaptchaTimeout(
|
||||
"DeathByCaptcha: reCaptcha solve took to long to execute job id {}, aborting.".format(jobID)
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
captchaSolver()
|
||||
@@ -0,0 +1,117 @@
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import ssl
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class User_Agent():
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.headers = None
|
||||
self.cipherSuite = []
|
||||
self.loadUserAgent(*args, **kwargs)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def loadHeaders(self, user_agents, user_agent_version):
|
||||
if user_agents.get(self.browser).get('releases').get(user_agent_version).get('headers'):
|
||||
self.headers = user_agents.get(self.browser).get('releases').get(user_agent_version).get('headers')
|
||||
else:
|
||||
self.headers = user_agents.get(self.browser).get('default_headers')
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def filterAgents(self, releases):
|
||||
filtered = {}
|
||||
|
||||
for release in releases:
|
||||
if self.mobile and releases[release]['User-Agent']['mobile']:
|
||||
filtered[release] = filtered.get(release, []) + releases[release]['User-Agent']['mobile']
|
||||
|
||||
if self.desktop and releases[release]['User-Agent']['desktop']:
|
||||
filtered[release] = filtered.get(release, []) + releases[release]['User-Agent']['desktop']
|
||||
|
||||
return filtered
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def tryMatchCustom(self, user_agents):
|
||||
for browser in user_agents:
|
||||
for release in user_agents[browser]['releases']:
|
||||
for platform in ['mobile', 'desktop']:
|
||||
if re.search(re.escape(self.custom), ' '.join(user_agents[browser]['releases'][release]['User-Agent'][platform])):
|
||||
self.browser = browser
|
||||
self.loadHeaders(user_agents, release)
|
||||
self.headers['User-Agent'] = self.custom
|
||||
self.cipherSuite = user_agents[self.browser].get('cipherSuite', [])
|
||||
return True
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def loadUserAgent(self, *args, **kwargs):
|
||||
self.browser = kwargs.pop('browser', None)
|
||||
|
||||
if isinstance(self.browser, dict):
|
||||
self.custom = self.browser.get('custom', None)
|
||||
self.desktop = self.browser.get('desktop', True)
|
||||
self.mobile = self.browser.get('mobile', True)
|
||||
self.browser = self.browser.get('browser', None)
|
||||
else:
|
||||
self.custom = kwargs.pop('custom', None)
|
||||
self.desktop = kwargs.pop('desktop', True)
|
||||
self.mobile = kwargs.pop('mobile', True)
|
||||
|
||||
if not self.desktop and not self.mobile:
|
||||
sys.tracebacklimit = 0
|
||||
raise RuntimeError("Sorry you can't have mobile and desktop disabled at the same time.")
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), 'browsers.json'), 'r') as fp:
|
||||
user_agents = json.load(
|
||||
fp,
|
||||
object_pairs_hook=OrderedDict
|
||||
)
|
||||
|
||||
if self.custom:
|
||||
if not self.tryMatchCustom(user_agents):
|
||||
self.cipherSuite = [
|
||||
ssl._DEFAULT_CIPHERS,
|
||||
'!AES128-SHA',
|
||||
'!ECDHE-RSA-AES256-SHA',
|
||||
]
|
||||
self.headers = OrderedDict([
|
||||
('User-Agent', self.custom),
|
||||
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'),
|
||||
('Accept-Language', 'en-US,en;q=0.9'),
|
||||
('Accept-Encoding', 'gzip, deflate, br')
|
||||
])
|
||||
else:
|
||||
if self.browser and not user_agents.get(self.browser):
|
||||
sys.tracebacklimit = 0
|
||||
raise RuntimeError('Sorry "{}" browser User-Agent was not found.'.format(self.browser))
|
||||
|
||||
if not self.browser:
|
||||
self.browser = random.SystemRandom().choice(list(user_agents))
|
||||
|
||||
self.cipherSuite = user_agents.get(self.browser).get('cipherSuite', [])
|
||||
|
||||
filteredAgents = self.filterAgents(user_agents.get(self.browser).get('releases'))
|
||||
|
||||
user_agent_version = random.SystemRandom().choice(list(filteredAgents))
|
||||
|
||||
self.loadHeaders(user_agents, user_agent_version)
|
||||
|
||||
self.headers['User-Agent'] = random.SystemRandom().choice(filteredAgents[user_agent_version])
|
||||
|
||||
if not kwargs.get('allow_brotli', False) and 'br' in self.headers['Accept-Encoding']:
|
||||
self.headers['Accept-Encoding'] = ','.join([
|
||||
encoding for encoding in self.headers['Accept-Encoding'].split(',') if encoding.strip() != 'br'
|
||||
]).strip()
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -5,6 +5,7 @@ import re
|
||||
from .translators.friendly_nodes import REGEXP_CONVERTER
|
||||
from .utils.injector import fix_js_args
|
||||
from types import FunctionType, ModuleType, GeneratorType, BuiltinFunctionType, MethodType, BuiltinMethodType
|
||||
from math import floor, log10
|
||||
import traceback
|
||||
try:
|
||||
import numpy
|
||||
@@ -603,15 +604,7 @@ class PyJs(object):
|
||||
elif typ == 'Boolean':
|
||||
return Js('true') if self.value else Js('false')
|
||||
elif typ == 'Number': #or self.Class=='Number':
|
||||
if self.is_nan():
|
||||
return Js('NaN')
|
||||
elif self.is_infinity():
|
||||
sign = '-' if self.value < 0 else ''
|
||||
return Js(sign + 'Infinity')
|
||||
elif isinstance(self.value,
|
||||
long) or self.value.is_integer(): # dont print .0
|
||||
return Js(unicode(int(self.value)))
|
||||
return Js(unicode(self.value)) # accurate enough
|
||||
return Js(unicode(js_dtoa(self.value)))
|
||||
elif typ == 'String':
|
||||
return self
|
||||
else: #object
|
||||
@@ -1046,7 +1039,7 @@ def PyJsComma(a, b):
|
||||
return b
|
||||
|
||||
|
||||
from .internals.simplex import JsException as PyJsException
|
||||
from .internals.simplex import JsException as PyJsException, js_dtoa
|
||||
import pyjsparser
|
||||
pyjsparser.parser.ENABLE_JS2PY_ERRORS = lambda msg: MakeError('SyntaxError', msg)
|
||||
|
||||
|
||||
@@ -116,10 +116,12 @@ def eval_js(js):
|
||||
|
||||
|
||||
def eval_js6(js):
|
||||
"""Just like eval_js but with experimental support for js6 via babel."""
|
||||
return eval_js(js6_to_js5(js))
|
||||
|
||||
|
||||
def translate_js6(js):
|
||||
"""Just like translate_js but with experimental support for js6 via babel."""
|
||||
return translate_js(js6_to_js5(js))
|
||||
|
||||
|
||||
|
||||
@@ -3,15 +3,19 @@ import re
|
||||
|
||||
import datetime
|
||||
|
||||
from desc import *
|
||||
from simplex import *
|
||||
from conversions import *
|
||||
import six
|
||||
from pyjsparser import PyJsParser
|
||||
from itertools import izip
|
||||
from .desc import *
|
||||
from .simplex import *
|
||||
from .conversions import *
|
||||
|
||||
from pyjsparser import PyJsParser
|
||||
|
||||
import six
|
||||
if six.PY2:
|
||||
from itertools import izip
|
||||
else:
|
||||
izip = zip
|
||||
|
||||
|
||||
from conversions import *
|
||||
from simplex import *
|
||||
|
||||
|
||||
def Type(obj):
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from code import Code
|
||||
from simplex import MakeError
|
||||
from opcodes import *
|
||||
from operations import *
|
||||
from trans_utils import *
|
||||
from .code import Code
|
||||
from .simplex import MakeError
|
||||
from .opcodes import *
|
||||
from .operations import *
|
||||
from .trans_utils import *
|
||||
|
||||
SPECIAL_IDENTIFIERS = {'true', 'false', 'this'}
|
||||
|
||||
@@ -465,10 +465,11 @@ class ByteCodeGenerator:
|
||||
self.emit('LOAD_OBJECT', tuple(data))
|
||||
|
||||
def Program(self, body, **kwargs):
|
||||
old_tape_len = len(self.exe.tape)
|
||||
self.emit('LOAD_UNDEFINED')
|
||||
self.emit(body)
|
||||
# add function tape !
|
||||
self.exe.tape = self.function_declaration_tape + self.exe.tape
|
||||
self.exe.tape = self.exe.tape[:old_tape_len] + self.function_declaration_tape + self.exe.tape[old_tape_len:]
|
||||
|
||||
def Pyimport(self, imp, **kwargs):
|
||||
raise NotImplementedError(
|
||||
@@ -735,17 +736,17 @@ def main():
|
||||
#
|
||||
# }
|
||||
a.emit(d)
|
||||
print a.declared_vars
|
||||
print a.exe.tape
|
||||
print len(a.exe.tape)
|
||||
print(a.declared_vars)
|
||||
print(a.exe.tape)
|
||||
print(len(a.exe.tape))
|
||||
|
||||
a.exe.compile()
|
||||
|
||||
def log(this, args):
|
||||
print args[0]
|
||||
print(args[0])
|
||||
return 999
|
||||
|
||||
print a.exe.run(a.exe.space.GlobalObj)
|
||||
print(a.exe.run(a.exe.space.GlobalObj))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -1,16 +1,17 @@
|
||||
from opcodes import *
|
||||
from space import *
|
||||
from base import *
|
||||
from .opcodes import *
|
||||
from .space import *
|
||||
from .base import *
|
||||
|
||||
|
||||
class Code:
|
||||
'''Can generate, store and run sequence of ops representing js code'''
|
||||
|
||||
def __init__(self, is_strict=False):
|
||||
def __init__(self, is_strict=False, debug_mode=False):
|
||||
self.tape = []
|
||||
self.compiled = False
|
||||
self.label_locs = None
|
||||
self.is_strict = is_strict
|
||||
self.debug_mode = debug_mode
|
||||
|
||||
self.contexts = []
|
||||
self.current_ctx = None
|
||||
@@ -22,6 +23,10 @@ class Code:
|
||||
self.GLOBAL_THIS = None
|
||||
self.space = None
|
||||
|
||||
# dbg
|
||||
self.ctx_depth = 0
|
||||
|
||||
|
||||
def get_new_label(self):
|
||||
self._label_count += 1
|
||||
return self._label_count
|
||||
@@ -74,21 +79,35 @@ class Code:
|
||||
# 0=normal, 1=return, 2=jump_outside, 3=errors
|
||||
# execute_fragment_under_context returns:
|
||||
# (return_value, typ, return_value/jump_loc/py_error)
|
||||
# ctx.stack must be len 1 and its always empty after the call.
|
||||
# IMPARTANT: It is guaranteed that the length of the ctx.stack is unchanged.
|
||||
'''
|
||||
old_curr_ctx = self.current_ctx
|
||||
self.ctx_depth += 1
|
||||
old_stack_len = len(ctx.stack)
|
||||
old_ret_len = len(self.return_locs)
|
||||
old_ctx_len = len(self.contexts)
|
||||
try:
|
||||
self.current_ctx = ctx
|
||||
return self._execute_fragment_under_context(
|
||||
ctx, start_label, end_label)
|
||||
except JsException as err:
|
||||
# undo the things that were put on the stack (if any)
|
||||
# don't worry, I know the recovery is possible through try statement and for this reason try statement
|
||||
# has its own context and stack so it will not delete the contents of the outer stack
|
||||
del ctx.stack[:]
|
||||
if self.debug_mode:
|
||||
self._on_fragment_exit("js errors")
|
||||
# undo the things that were put on the stack (if any) to ensure a proper error recovery
|
||||
del ctx.stack[old_stack_len:]
|
||||
del self.return_locs[old_ret_len:]
|
||||
del self.contexts[old_ctx_len :]
|
||||
return undefined, 3, err
|
||||
finally:
|
||||
self.ctx_depth -= 1
|
||||
self.current_ctx = old_curr_ctx
|
||||
assert old_stack_len == len(ctx.stack)
|
||||
|
||||
def _get_dbg_indent(self):
|
||||
return self.ctx_depth * ' '
|
||||
|
||||
def _on_fragment_exit(self, mode):
|
||||
print(self._get_dbg_indent() + 'ctx exit (%s)' % mode)
|
||||
|
||||
def _execute_fragment_under_context(self, ctx, start_label, end_label):
|
||||
start, end = self.label_locs[start_label], self.label_locs[end_label]
|
||||
@@ -97,16 +116,20 @@ class Code:
|
||||
entry_level = len(self.contexts)
|
||||
# for e in self.tape[start:end]:
|
||||
# print e
|
||||
|
||||
if self.debug_mode:
|
||||
print(self._get_dbg_indent() + 'ctx entry (from:%d, to:%d)' % (start, end))
|
||||
while loc < len(self.tape):
|
||||
#print loc, self.tape[loc]
|
||||
if len(self.contexts) == entry_level and loc >= end:
|
||||
if self.debug_mode:
|
||||
self._on_fragment_exit('normal')
|
||||
assert loc == end
|
||||
assert len(ctx.stack) == (
|
||||
1 + initial_len), 'Stack change must be equal to +1!'
|
||||
delta_stack = len(ctx.stack) - initial_len
|
||||
assert delta_stack == +1, 'Stack change must be equal to +1! got %d' % delta_stack
|
||||
return ctx.stack.pop(), 0, None # means normal return
|
||||
|
||||
# execute instruction
|
||||
if self.debug_mode:
|
||||
print(self._get_dbg_indent() + str(loc), self.tape[loc])
|
||||
status = self.tape[loc].eval(ctx)
|
||||
|
||||
# check status for special actions
|
||||
@@ -116,9 +139,10 @@ class Code:
|
||||
if len(self.contexts) == entry_level:
|
||||
# check if jumped outside of the fragment and break if so
|
||||
if not start <= loc < end:
|
||||
assert len(ctx.stack) == (
|
||||
1 + initial_len
|
||||
), 'Stack change must be equal to +1!'
|
||||
if self.debug_mode:
|
||||
self._on_fragment_exit('jump outside loc:%d label:%d' % (loc, status))
|
||||
delta_stack = len(ctx.stack) - initial_len
|
||||
assert delta_stack == +1, 'Stack change must be equal to +1! got %d' % delta_stack
|
||||
return ctx.stack.pop(), 2, status # jump outside
|
||||
continue
|
||||
|
||||
@@ -137,7 +161,10 @@ class Code:
|
||||
# return: (None, None)
|
||||
else:
|
||||
if len(self.contexts) == entry_level:
|
||||
assert len(ctx.stack) == 1 + initial_len
|
||||
if self.debug_mode:
|
||||
self._on_fragment_exit('return')
|
||||
delta_stack = len(ctx.stack) - initial_len
|
||||
assert delta_stack == +1, 'Stack change must be equal to +1! got %d' % delta_stack
|
||||
return undefined, 1, ctx.stack.pop(
|
||||
) # return signal
|
||||
return_value = ctx.stack.pop()
|
||||
@@ -149,6 +176,8 @@ class Code:
|
||||
continue
|
||||
# next instruction
|
||||
loc += 1
|
||||
if self.debug_mode:
|
||||
self._on_fragment_exit('internal error - unexpected end of tape, will crash')
|
||||
assert False, 'Remember to add NOP at the end!'
|
||||
|
||||
def run(self, ctx, starting_loc=0):
|
||||
@@ -156,7 +185,8 @@ class Code:
|
||||
self.current_ctx = ctx
|
||||
while loc < len(self.tape):
|
||||
# execute instruction
|
||||
#print loc, self.tape[loc]
|
||||
if self.debug_mode:
|
||||
print(loc, self.tape[loc])
|
||||
status = self.tape[loc].eval(ctx)
|
||||
|
||||
# check status for special actions
|
||||
|
||||
@@ -42,6 +42,7 @@ def executable_code(code_str, space, global_context=True):
|
||||
space.byte_generator.emit('LABEL', skip)
|
||||
space.byte_generator.emit('NOP')
|
||||
space.byte_generator.restore_state()
|
||||
|
||||
space.byte_generator.exe.compile(
|
||||
start_loc=old_tape_len
|
||||
) # dont read the code from the beginning, dont be stupid!
|
||||
@@ -71,5 +72,5 @@ def _eval(this, args):
|
||||
|
||||
|
||||
def log(this, args):
|
||||
print ' '.join(map(to_string, args))
|
||||
print(' '.join(map(to_string, args)))
|
||||
return undefined
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
# Type Conversions. to_type. All must return PyJs subclass instance
|
||||
from simplex import *
|
||||
from .simplex import *
|
||||
|
||||
|
||||
def to_primitive(self, hint=None):
|
||||
@@ -73,14 +73,7 @@ def to_string(self):
|
||||
elif typ == 'Boolean':
|
||||
return 'true' if self else 'false'
|
||||
elif typ == 'Number': # or self.Class=='Number':
|
||||
if is_nan(self):
|
||||
return 'NaN'
|
||||
elif is_infinity(self):
|
||||
sign = '-' if self < 0 else ''
|
||||
return sign + 'Infinity'
|
||||
elif int(self) == self: # integer value!
|
||||
return unicode(int(self))
|
||||
return unicode(self) # todo make it print exactly like node.js
|
||||
return js_dtoa(self)
|
||||
else: # object
|
||||
return to_string(to_primitive(self, 'String'))
|
||||
|
||||
|
||||
@@ -1,29 +1,22 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from base import Scope
|
||||
from func_utils import *
|
||||
from conversions import *
|
||||
from .base import Scope
|
||||
from .func_utils import *
|
||||
from .conversions import *
|
||||
import six
|
||||
from prototypes.jsboolean import BooleanPrototype
|
||||
from prototypes.jserror import ErrorPrototype
|
||||
from prototypes.jsfunction import FunctionPrototype
|
||||
from prototypes.jsnumber import NumberPrototype
|
||||
from prototypes.jsobject import ObjectPrototype
|
||||
from prototypes.jsregexp import RegExpPrototype
|
||||
from prototypes.jsstring import StringPrototype
|
||||
from prototypes.jsarray import ArrayPrototype
|
||||
import prototypes.jsjson as jsjson
|
||||
import prototypes.jsutils as jsutils
|
||||
from .prototypes.jsboolean import BooleanPrototype
|
||||
from .prototypes.jserror import ErrorPrototype
|
||||
from .prototypes.jsfunction import FunctionPrototype
|
||||
from .prototypes.jsnumber import NumberPrototype
|
||||
from .prototypes.jsobject import ObjectPrototype
|
||||
from .prototypes.jsregexp import RegExpPrototype
|
||||
from .prototypes.jsstring import StringPrototype
|
||||
from .prototypes.jsarray import ArrayPrototype
|
||||
from .prototypes import jsjson
|
||||
from .prototypes import jsutils
|
||||
|
||||
from .constructors import jsnumber, jsstring, jsarray, jsboolean, jsregexp, jsmath, jsobject, jsfunction, jsconsole
|
||||
|
||||
from constructors import jsnumber
|
||||
from constructors import jsstring
|
||||
from constructors import jsarray
|
||||
from constructors import jsboolean
|
||||
from constructors import jsregexp
|
||||
from constructors import jsmath
|
||||
from constructors import jsobject
|
||||
from constructors import jsfunction
|
||||
from constructors import jsconsole
|
||||
|
||||
|
||||
def fill_proto(proto, proto_class, space):
|
||||
@@ -155,7 +148,10 @@ def fill_space(space, byte_generator):
|
||||
|
||||
j = easy_func(creator, space)
|
||||
j.name = unicode(typ)
|
||||
j.prototype = space.ERROR_TYPES[typ]
|
||||
|
||||
set_protected(j, 'prototype', space.ERROR_TYPES[typ])
|
||||
|
||||
set_non_enumerable(space.ERROR_TYPES[typ], 'constructor', j)
|
||||
|
||||
def new_create(args, space):
|
||||
message = get_arg(args, 0)
|
||||
@@ -178,6 +174,7 @@ def fill_space(space, byte_generator):
|
||||
setattr(space, err_type_name + u'Prototype', extra_err)
|
||||
error_constructors[err_type_name] = construct_constructor(
|
||||
err_type_name)
|
||||
|
||||
assert space.TypeErrorPrototype is not None
|
||||
|
||||
# RegExp
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from simplex import *
|
||||
from conversions import *
|
||||
from .simplex import *
|
||||
from .conversions import *
|
||||
|
||||
import six
|
||||
if six.PY3:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from operations import *
|
||||
from base import get_member, get_member_dot, PyJsFunction, Scope
|
||||
from .operations import *
|
||||
from .base import get_member, get_member_dot, PyJsFunction, Scope
|
||||
|
||||
|
||||
class OP_CODE(object):
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
from simplex import *
|
||||
from conversions import *
|
||||
from .simplex import *
|
||||
from .conversions import *
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Unary operations
|
||||
|
||||
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
from ..conversions import *
|
||||
from ..func_utils import *
|
||||
from jsregexp import RegExpExec
|
||||
from .jsregexp import RegExpExec
|
||||
|
||||
DIGS = set(u'0123456789')
|
||||
WHITE = u"\u0009\u000A\u000B\u000C\u000D\u0020\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF"
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
import pyjsparser
|
||||
from space import Space
|
||||
import fill_space
|
||||
from byte_trans import ByteCodeGenerator
|
||||
from code import Code
|
||||
from simplex import MakeError
|
||||
import sys
|
||||
sys.setrecursionlimit(100000)
|
||||
from .space import Space
|
||||
from . import fill_space
|
||||
from .byte_trans import ByteCodeGenerator
|
||||
from .code import Code
|
||||
from .simplex import *
|
||||
|
||||
|
||||
pyjsparser.parser.ENABLE_JS2PY_ERRORS = lambda msg: MakeError(u'SyntaxError', unicode(msg))
|
||||
@@ -16,8 +14,8 @@ def get_js_bytecode(js):
|
||||
a.emit(d)
|
||||
return a.exe.tape
|
||||
|
||||
def eval_js_vm(js):
|
||||
a = ByteCodeGenerator(Code())
|
||||
def eval_js_vm(js, debug=False):
|
||||
a = ByteCodeGenerator(Code(debug_mode=debug))
|
||||
s = Space()
|
||||
a.exe.space = s
|
||||
s.exe = a.exe
|
||||
@@ -26,7 +24,10 @@ def eval_js_vm(js):
|
||||
|
||||
a.emit(d)
|
||||
fill_space.fill_space(s, a)
|
||||
# print a.exe.tape
|
||||
if debug:
|
||||
from pprint import pprint
|
||||
pprint(a.exe.tape)
|
||||
print()
|
||||
a.exe.compile()
|
||||
|
||||
return a.exe.run(a.exe.space.GlobalObj)
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
import six
|
||||
|
||||
if six.PY3:
|
||||
basestring = str
|
||||
long = int
|
||||
xrange = range
|
||||
unicode = str
|
||||
|
||||
#Undefined
|
||||
class PyJsUndefined(object):
|
||||
@@ -75,7 +79,7 @@ def is_callable(self):
|
||||
|
||||
|
||||
def is_infinity(self):
|
||||
return self == float('inf') or self == -float('inf')
|
||||
return self == Infinity or self == -Infinity
|
||||
|
||||
|
||||
def is_nan(self):
|
||||
@@ -114,7 +118,7 @@ class JsException(Exception):
|
||||
return self.mes.to_string().value
|
||||
else:
|
||||
if self.throw is not None:
|
||||
from conversions import to_string
|
||||
from .conversions import to_string
|
||||
return to_string(self.throw)
|
||||
else:
|
||||
return self.typ + ': ' + self.message
|
||||
@@ -131,3 +135,26 @@ def value_from_js_exception(js_exception, space):
|
||||
return js_exception.throw
|
||||
else:
|
||||
return space.NewError(js_exception.typ, js_exception.message)
|
||||
|
||||
|
||||
def js_dtoa(number):
|
||||
if is_nan(number):
|
||||
return u'NaN'
|
||||
elif is_infinity(number):
|
||||
if number > 0:
|
||||
return u'Infinity'
|
||||
return u'-Infinity'
|
||||
elif number == 0.:
|
||||
return u'0'
|
||||
elif abs(number) < 1e-6 or abs(number) >= 1e21:
|
||||
frac, exponent = unicode(repr(float(number))).split('e')
|
||||
# Remove leading zeros from the exponent.
|
||||
exponent = int(exponent)
|
||||
return frac + ('e' if exponent < 0 else 'e+') + unicode(exponent)
|
||||
elif abs(number) < 1e-4: # python starts to return exp notation while we still want the prec
|
||||
frac, exponent = unicode(repr(float(number))).split('e-')
|
||||
base = u'0.' + u'0' * (int(exponent) - 1) + frac.lstrip('-').replace('.', '')
|
||||
return base if number > 0. else u'-' + base
|
||||
elif isinstance(number, long) or number.is_integer(): # dont print .0
|
||||
return unicode(int(number))
|
||||
return unicode(repr(number)) # python representation should be equivalent.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from base import *
|
||||
from simplex import *
|
||||
from .base import *
|
||||
from .simplex import *
|
||||
|
||||
|
||||
class Space(object):
|
||||
|
||||
@@ -1,3 +1,10 @@
|
||||
import six
|
||||
if six.PY3:
|
||||
basestring = str
|
||||
long = int
|
||||
xrange = range
|
||||
unicode = str
|
||||
|
||||
def to_key(literal_or_identifier):
|
||||
''' returns string representation of this object'''
|
||||
if literal_or_identifier['type'] == 'Identifier':
|
||||
|
||||
@@ -6,8 +6,6 @@ if six.PY3:
|
||||
xrange = range
|
||||
unicode = str
|
||||
|
||||
# todo fix apply and bind
|
||||
|
||||
|
||||
class FunctionPrototype:
|
||||
def toString():
|
||||
@@ -41,6 +39,7 @@ class FunctionPrototype:
|
||||
return this.call(obj, args)
|
||||
|
||||
def bind(thisArg):
|
||||
arguments_ = arguments
|
||||
target = this
|
||||
if not target.is_callable():
|
||||
raise this.MakeError(
|
||||
@@ -48,5 +47,5 @@ class FunctionPrototype:
|
||||
if len(arguments) <= 1:
|
||||
args = ()
|
||||
else:
|
||||
args = tuple([arguments[e] for e in xrange(1, len(arguments))])
|
||||
args = tuple([arguments_[e] for e in xrange(1, len(arguments_))])
|
||||
return this.PyJsBoundFunction(target, thisArg, args)
|
||||
|
||||
@@ -345,7 +345,7 @@ def BlockStatement(type, body):
|
||||
body) # never returns empty string! In the worst case returns pass\n
|
||||
|
||||
|
||||
def ExpressionStatement(type, expression, **ommit):
|
||||
def ExpressionStatement(type, expression):
|
||||
return trans(expression) + '\n' # end expression space with new line
|
||||
|
||||
|
||||
|
||||
@@ -43,6 +43,8 @@ python -c "import logging; logging.basicConfig(level=logging.DEBUG); logging.get
|
||||
# subscenter:list
|
||||
python -c "import logging; logging.basicConfig(level=logging.DEBUG); logging.getLogger('rebulk').setLevel(logging.WARNING); import subliminal_patch, subliminal; subliminal.region.configure('dogpile.cache.memory'); from subliminal_patch.core import SZProviderPool; from babelfish import Language; from subliminal.core import scan_video; print SZProviderPool(providers=['subscenter'], )['subscenter'].list_subtitles(scan_video('FULL_PATH'), languages=[Language('heb')])"
|
||||
|
||||
# subscene:list
|
||||
python -c "import logging; logging.basicConfig(level=logging.DEBUG); logging.getLogger('rebulk').setLevel(logging.WARNING); import subliminal_patch, subliminal; subliminal.region.configure('dogpile.cache.memory'); from subliminal_patch.core import SZProviderPool; from subzero.language import Language; from subzero.video import parse_video; SZProviderPool(providers=['subscene'], provider_configs={'subscene': {'username': 'USERNAME', 'password': 'PASSWORD'}})['subscene'].list_subtitles(parse_video('FILENAME', {}, {'type': 'episode'}, dry_run=True), languages=[Language('eng')])"
|
||||
|
||||
# refining
|
||||
python -c "import logging; logging.basicConfig(level=logging.DEBUG); logging.getLogger('rebulk').setLevel(logging.WARNING); import os; os.environ['U1pfT01EQl9LRVk'] = '789CF30DAC2C8B0AF433F5C9AD34290A712DF30D7135F12D0FB3E502006FDE081E'; import subliminal_patch, subliminal; subliminal.region.configure('dogpile.cache.memory'); from subzero.video import parse_video, refine_video; video = parse_video('FILE_NAME', {'type': 'episode'}, dry_run=True); print refine_video(video)"
|
||||
|
||||
@@ -163,3 +163,13 @@ class Pysubs2CLI(object):
|
||||
elif args.transform_framerate is not None:
|
||||
in_fps, out_fps = args.transform_framerate
|
||||
subs.transform_framerate(in_fps, out_fps)
|
||||
|
||||
|
||||
def __main__():
|
||||
cli = Pysubs2CLI()
|
||||
rv = cli(sys.argv[1:])
|
||||
sys.exit(rv)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
__main__()
|
||||
|
||||
@@ -17,12 +17,14 @@ class Color(_Color):
|
||||
return _Color.__new__(cls, r, g, b, a)
|
||||
|
||||
#: Version of the pysubs2 library.
|
||||
VERSION = "0.2.1"
|
||||
VERSION = "0.2.3"
|
||||
|
||||
|
||||
PY3 = sys.version_info.major == 3
|
||||
|
||||
if PY3:
|
||||
text_type = str
|
||||
binary_string_type = bytes
|
||||
else:
|
||||
text_type = unicode
|
||||
binary_string_type = str
|
||||
|
||||
@@ -12,3 +12,6 @@ class UnknownFormatIdentifierError(Pysubs2Error):
|
||||
|
||||
class FormatAutodetectionError(Pysubs2Error):
|
||||
"""Subtitle format is ambiguous or unknown."""
|
||||
|
||||
class ContentNotUsable(Pysubs2Error):
|
||||
"""Current content not usable for specified format"""
|
||||
|
||||
@@ -3,7 +3,7 @@ from .microdvd import MicroDVDFormat
|
||||
from .subrip import SubripFormat
|
||||
from .jsonformat import JSONFormat
|
||||
from .substation import SubstationFormat
|
||||
from .txt_generic import TXTGenericFormat, MPL2Format
|
||||
from .mpl2 import MPL2Format
|
||||
from .exceptions import *
|
||||
|
||||
#: Dict mapping file extensions to format identifiers.
|
||||
@@ -13,7 +13,6 @@ FILE_EXTENSION_TO_FORMAT_IDENTIFIER = {
|
||||
".ssa": "ssa",
|
||||
".sub": "microdvd",
|
||||
".json": "json",
|
||||
".txt": "txt_generic",
|
||||
}
|
||||
|
||||
#: Dict mapping format identifiers to implementations (FormatBase subclasses).
|
||||
@@ -23,7 +22,6 @@ FORMAT_IDENTIFIER_TO_FORMAT_CLASS = {
|
||||
"ssa": SubstationFormat,
|
||||
"microdvd": MicroDVDFormat,
|
||||
"json": JSONFormat,
|
||||
"txt_generic": TXTGenericFormat,
|
||||
"mpl2": MPL2Format,
|
||||
}
|
||||
|
||||
|
||||
+20
-16
@@ -2,44 +2,48 @@
|
||||
|
||||
from __future__ import print_function, division, unicode_literals
|
||||
import re
|
||||
from numbers import Number
|
||||
|
||||
from pysubs2.time import times_to_ms
|
||||
from .time import times_to_ms
|
||||
from .formatbase import FormatBase
|
||||
from .ssaevent import SSAEvent
|
||||
from .ssastyle import SSAStyle
|
||||
|
||||
|
||||
# thanks to http://otsaloma.io/gaupol/doc/api/aeidon.files.mpl2_source.html
|
||||
MPL2_FORMAT = re.compile(r"^(?um)\[(-?\d+)\]\[(-?\d+)\](.*?)$")
|
||||
|
||||
|
||||
class TXTGenericFormat(FormatBase):
|
||||
@classmethod
|
||||
def guess_format(cls, text):
|
||||
if MPL2_FORMAT.match(text):
|
||||
return "mpl2"
|
||||
MPL2_FORMAT = re.compile(r"^(?um)\[(-?\d+)\]\[(-?\d+)\](.*)")
|
||||
|
||||
|
||||
class MPL2Format(FormatBase):
|
||||
@classmethod
|
||||
def guess_format(cls, text):
|
||||
return TXTGenericFormat.guess_format(text)
|
||||
if MPL2_FORMAT.search(text):
|
||||
return "mpl2"
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, subs, fp, format_, **kwargs):
|
||||
def prepare_text(lines):
|
||||
out = []
|
||||
for s in lines.split("|"):
|
||||
s = s.strip()
|
||||
|
||||
if s.startswith("/"):
|
||||
out.append(r"{\i1}%s{\i0}" % s[1:])
|
||||
continue
|
||||
# line beginning with '/' is in italics
|
||||
s = r"{\i1}%s{\i0}" % s[1:].strip()
|
||||
|
||||
out.append(s)
|
||||
return "\n".join(out)
|
||||
return "\\N".join(out)
|
||||
|
||||
subs.events = [SSAEvent(start=times_to_ms(s=float(start) / 10), end=times_to_ms(s=float(end) / 10),
|
||||
text=prepare_text(text)) for start, end, text in MPL2_FORMAT.findall(fp.getvalue())]
|
||||
|
||||
@classmethod
|
||||
def to_file(cls, subs, fp, format_, **kwargs):
|
||||
raise NotImplemented
|
||||
|
||||
# TODO handle italics
|
||||
for line in subs:
|
||||
if line.is_comment:
|
||||
continue
|
||||
|
||||
print("[{start}][{end}] {text}".format(start=int(line.start // 100),
|
||||
end=int(line.end // 100),
|
||||
text=line.plaintext.replace("\n", "|")),
|
||||
file=fp)
|
||||
@@ -41,6 +41,7 @@ class SSAStyle(object):
|
||||
self.italic = False #: Italic
|
||||
self.underline = False #: Underline (ASS only)
|
||||
self.strikeout = False #: Strikeout (ASS only)
|
||||
self.drawing = False #: Drawing (ASS only, see http://docs.aegisub.org/3.1/ASS_Tags/#drawing-tags
|
||||
self.scalex = 100.0 #: Horizontal scaling (ASS only)
|
||||
self.scaley = 100.0 #: Vertical scaling (ASS only)
|
||||
self.spacing = 0.0 #: Letter spacing (ASS only)
|
||||
@@ -78,7 +79,7 @@ class SSAStyle(object):
|
||||
s += "%rpx " % self.fontsize
|
||||
if self.bold: s += "bold "
|
||||
if self.italic: s += "italic "
|
||||
s += "'%s'>" % self.fontname
|
||||
s += "{!r}>".format(self.fontname)
|
||||
if not PY3: s = s.encode("utf-8")
|
||||
return s
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ from .formatbase import FormatBase
|
||||
from .ssaevent import SSAEvent
|
||||
from .ssastyle import SSAStyle
|
||||
from .substation import parse_tags
|
||||
from .exceptions import ContentNotUsable
|
||||
from .time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms
|
||||
|
||||
#: Largest timestamp allowed in SubRip, ie. 99:59:59,999.
|
||||
@@ -46,8 +47,16 @@ class SubripFormat(FormatBase):
|
||||
following_lines[-1].append(line)
|
||||
|
||||
def prepare_text(lines):
|
||||
# Handle the "happy" empty subtitle case, which is timestamp line followed by blank line(s)
|
||||
# followed by number line and timestamp line of the next subtitle. Fixes issue #11.
|
||||
if (len(lines) >= 2
|
||||
and all(re.match("\s*$", line) for line in lines[:-1])
|
||||
and re.match("\s*\d+\s*$", lines[-1])):
|
||||
return ""
|
||||
|
||||
# Handle the general case.
|
||||
s = "".join(lines).strip()
|
||||
s = re.sub(r"\n* *\d+ *$", "", s) # strip number of next subtitle
|
||||
s = re.sub(r"\n+ *\d+ *$", "", s) # strip number of next subtitle
|
||||
s = re.sub(r"< *i *>", r"{\i1}", s)
|
||||
s = re.sub(r"< */ *i *>", r"{\i0}", s)
|
||||
s = re.sub(r"< *s *>", r"{\s1}", s)
|
||||
@@ -73,6 +82,7 @@ class SubripFormat(FormatBase):
|
||||
if sty.italic: fragment = "<i>%s</i>" % fragment
|
||||
if sty.underline: fragment = "<u>%s</u>" % fragment
|
||||
if sty.strikeout: fragment = "<s>%s</s>" % fragment
|
||||
if sty.drawing: raise ContentNotUsable
|
||||
body.append(fragment)
|
||||
|
||||
return re.sub("\n+", "\n", "".join(body).strip())
|
||||
@@ -82,7 +92,10 @@ class SubripFormat(FormatBase):
|
||||
for i, line in enumerate(visible_lines, 1):
|
||||
start = ms_to_timestamp(line.start)
|
||||
end = ms_to_timestamp(line.end)
|
||||
text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
|
||||
try:
|
||||
text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
|
||||
except ContentNotUsable:
|
||||
continue
|
||||
|
||||
print("%d" % i, file=fp) # Python 2.7 compat
|
||||
print(start, "-->", end, file=fp)
|
||||
|
||||
@@ -4,7 +4,7 @@ from numbers import Number
|
||||
from .formatbase import FormatBase
|
||||
from .ssaevent import SSAEvent
|
||||
from .ssastyle import SSAStyle
|
||||
from .common import text_type, Color
|
||||
from .common import text_type, Color, PY3, binary_string_type
|
||||
from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP
|
||||
|
||||
SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7)
|
||||
@@ -110,7 +110,7 @@ def parse_tags(text, style=SSAStyle.DEFAULT_STYLE, styles={}):
|
||||
|
||||
def apply_overrides(all_overrides):
|
||||
s = style.copy()
|
||||
for tag in re.findall(r"\\[ibus][10]|\\r[a-zA-Z_0-9 ]*", all_overrides):
|
||||
for tag in re.findall(r"\\[ibusp][0-9]|\\r[a-zA-Z_0-9 ]*", all_overrides):
|
||||
if tag == r"\r":
|
||||
s = style.copy() # reset to original line style
|
||||
elif tag.startswith(r"\r"):
|
||||
@@ -122,6 +122,13 @@ def parse_tags(text, style=SSAStyle.DEFAULT_STYLE, styles={}):
|
||||
elif "b" in tag: s.bold = "1" in tag
|
||||
elif "u" in tag: s.underline = "1" in tag
|
||||
elif "s" in tag: s.strikeout = "1" in tag
|
||||
elif "p" in tag:
|
||||
try:
|
||||
scale = int(tag[2:])
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
s.drawing = scale > 0
|
||||
return s
|
||||
|
||||
overrides = SSAEvent.OVERRIDE_SEQUENCE.findall(text)
|
||||
@@ -150,14 +157,7 @@ class SubstationFormat(FormatBase):
|
||||
if format_ == "ass":
|
||||
return ass_rgba_to_color(v)
|
||||
else:
|
||||
try:
|
||||
return ssa_rgb_to_color(v)
|
||||
except ValueError:
|
||||
try:
|
||||
return ass_rgba_to_color(v)
|
||||
except:
|
||||
return Color(255, 255, 255, 0)
|
||||
|
||||
return ssa_rgb_to_color(v)
|
||||
elif f in {"bold", "underline", "italic", "strikeout"}:
|
||||
return v == "-1"
|
||||
elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
|
||||
@@ -229,7 +229,7 @@ class SubstationFormat(FormatBase):
|
||||
for k, v in subs.aegisub_project.items():
|
||||
print(k, v, sep=": ", file=fp)
|
||||
|
||||
def field_to_string(f, v):
|
||||
def field_to_string(f, v, line):
|
||||
if f in {"start", "end"}:
|
||||
return ms_to_timestamp(v)
|
||||
elif f == "marked":
|
||||
@@ -240,23 +240,31 @@ class SubstationFormat(FormatBase):
|
||||
return "-1" if v else "0"
|
||||
elif isinstance(v, (text_type, Number)):
|
||||
return text_type(v)
|
||||
elif not PY3 and isinstance(v, binary_string_type):
|
||||
# A convenience feature, see issue #12 - accept non-unicode strings
|
||||
# when they are ASCII; this is useful in Python 2, especially for non-text
|
||||
# fields like style names, where requiring Unicode type seems too stringent
|
||||
if all(ord(c) < 128 for c in v):
|
||||
return text_type(v)
|
||||
else:
|
||||
raise TypeError("Encountered binary string with non-ASCII codepoint in SubStation field {!r} for line {!r} - please use unicode string instead of str".format(f, line))
|
||||
elif isinstance(v, Color):
|
||||
if format_ == "ass":
|
||||
return color_to_ass_rgba(v)
|
||||
else:
|
||||
return color_to_ssa_rgb(v)
|
||||
else:
|
||||
raise TypeError("Unexpected type when writing a SubStation field")
|
||||
raise TypeError("Unexpected type when writing a SubStation field {!r} for line {!r}".format(f, line))
|
||||
|
||||
print("\n[V4+ Styles]" if format_ == "ass" else "\n[V4 Styles]", file=fp)
|
||||
print(STYLE_FORMAT_LINE[format_], file=fp)
|
||||
for name, sty in subs.styles.items():
|
||||
fields = [field_to_string(f, getattr(sty, f)) for f in STYLE_FIELDS[format_]]
|
||||
fields = [field_to_string(f, getattr(sty, f), sty) for f in STYLE_FIELDS[format_]]
|
||||
print("Style: %s" % name, *fields, sep=",", file=fp)
|
||||
|
||||
print("\n[Events]", file=fp)
|
||||
print(EVENT_FORMAT_LINE[format_], file=fp)
|
||||
for ev in subs.events:
|
||||
fields = [field_to_string(f, getattr(ev, f)) for f in EVENT_FIELDS[format_]]
|
||||
fields = [field_to_string(f, getattr(ev, f), ev) for f in EVENT_FIELDS[format_]]
|
||||
print(ev.type, end=": ", file=fp)
|
||||
print(*fields, sep=",", file=fp)
|
||||
|
||||
@@ -27,3 +27,8 @@ class ServiceUnavailable(ProviderError):
|
||||
class DownloadLimitExceeded(ProviderError):
|
||||
"""Exception raised by providers when download limit is exceeded."""
|
||||
pass
|
||||
|
||||
|
||||
class DownloadLimitPerDayExceeded(ProviderError):
|
||||
"""Exception raised by providers when download limit is exceeded."""
|
||||
pass
|
||||
|
||||
@@ -94,7 +94,8 @@ provider_manager = RegistrableExtensionManager('subliminal.providers', [
|
||||
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
|
||||
'shooter = subliminal.providers.shooter:ShooterProvider',
|
||||
'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider',
|
||||
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider'
|
||||
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider',
|
||||
'screwzira = subliminal.providers.screwzira:ScrewZiraProvider'
|
||||
])
|
||||
|
||||
#: Refiner manager
|
||||
|
||||
@@ -258,4 +258,4 @@ def fix_line_ending(content):
|
||||
:rtype: bytes
|
||||
|
||||
"""
|
||||
return content.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
|
||||
return content.replace(b'\r\n', b'\n')
|
||||
|
||||
@@ -12,6 +12,13 @@ from_subscene = {
|
||||
'Malay': 'msa', 'Pashto': 'pus', 'Punjabi': 'pan', 'Swahili': 'swa'
|
||||
}
|
||||
|
||||
from_subscene_with_country = {
|
||||
'Brazillian Portuguese': ('por', 'BR')
|
||||
}
|
||||
|
||||
to_subscene_with_country = {val: key for key, val in from_subscene_with_country.items()}
|
||||
|
||||
|
||||
to_subscene = {v: k for k, v in from_subscene.items()}
|
||||
|
||||
exact_languages_alpha3 = [
|
||||
@@ -34,12 +41,12 @@ language_ids = {
|
||||
'mkd': 48, 'mal': 64, 'mni': 65, 'mon': 72, 'pus': 67, 'pol': 31,
|
||||
'por': 32, 'pan': 66, 'rus': 34, 'srp': 35, 'sin': 58, 'slk': 36,
|
||||
'slv': 37, 'som': 70, 'tgl': 53, 'tam': 59, 'tel': 63, 'tha': 40,
|
||||
'tur': 41, 'ukr': 56, 'urd': 42, 'yor': 71
|
||||
'tur': 41, 'ukr': 56, 'urd': 42, 'yor': 71, 'pt-BR': 4
|
||||
}
|
||||
|
||||
# TODO: specify codes for unspecified_languages
|
||||
unspecified_languages = [
|
||||
'Big 5 code', 'Brazillian Portuguese', 'Bulgarian/ English',
|
||||
'Big 5 code', 'Bulgarian/ English',
|
||||
'Chinese BG code', 'Dutch/ English', 'English/ German',
|
||||
'Hungarian/ English', 'Rohingya'
|
||||
]
|
||||
@@ -50,6 +57,8 @@ alpha3_of_code = {l.name: l.alpha3 for l in supported_languages}
|
||||
|
||||
supported_languages.update({Language(l) for l in to_subscene})
|
||||
|
||||
supported_languages.update({Language(lang, cr) for lang, cr in to_subscene_with_country})
|
||||
|
||||
|
||||
class SubsceneConverter(LanguageReverseConverter):
|
||||
codes = {l.name for l in supported_languages}
|
||||
@@ -61,9 +70,15 @@ class SubsceneConverter(LanguageReverseConverter):
|
||||
if alpha3 in to_subscene:
|
||||
return to_subscene[alpha3]
|
||||
|
||||
if (alpha3, country) in to_subscene_with_country:
|
||||
return to_subscene_with_country[(alpha3, country)]
|
||||
|
||||
raise ConfigurationError('Unsupported language for subscene: %s, %s, %s' % (alpha3, country, script))
|
||||
|
||||
def reverse(self, code):
|
||||
if code in from_subscene_with_country:
|
||||
return from_subscene_with_country[code]
|
||||
|
||||
if code in from_subscene:
|
||||
return (from_subscene[code],)
|
||||
|
||||
|
||||
@@ -27,16 +27,6 @@ class TitloviConverter(LanguageReverseConverter):
|
||||
}
|
||||
self.codes = set(self.from_titlovi.keys())
|
||||
|
||||
# temporary fix, should be removed as soon as API is used
|
||||
self.lang_from_countrycode = {'ba': ('bos',),
|
||||
'en': ('eng',),
|
||||
'hr': ('hrv',),
|
||||
'mk': ('mkd',),
|
||||
'rs': ('srp',),
|
||||
'rsc': ('srp', None, 'Cyrl'),
|
||||
'si': ('slv',)
|
||||
}
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if (alpha3, country, script) in self.to_titlovi:
|
||||
return self.to_titlovi[(alpha3, country, script)]
|
||||
@@ -49,9 +39,5 @@ class TitloviConverter(LanguageReverseConverter):
|
||||
if titlovi in self.from_titlovi:
|
||||
return self.from_titlovi[titlovi]
|
||||
|
||||
# temporary fix, should be removed as soon as API is used
|
||||
if titlovi in self.lang_from_countrycode:
|
||||
return self.lang_from_countrycode[titlovi]
|
||||
|
||||
raise ConfigurationError('Unsupported language number for titlovi: %s' % titlovi)
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ from subliminal.core import guessit, ProviderPool, io, is_windows_special_path,
|
||||
ThreadPoolExecutor, check_video
|
||||
from subliminal_patch.exceptions import TooManyRequests, APIThrottled
|
||||
|
||||
from subzero.language import Language
|
||||
from subzero.language import Language, ENDSWITH_LANGUAGECODE_RE, FULL_LANGUAGE_LIST
|
||||
from scandir import scandir, scandir_generic as _scandir_generic
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -62,7 +62,7 @@ class SZProviderPool(ProviderPool):
|
||||
def __init__(self, providers=None, provider_configs=None, blacklist=None, throttle_callback=None,
|
||||
pre_download_hook=None, post_download_hook=None, language_hook=None):
|
||||
#: Name of providers to use
|
||||
self.providers = providers or provider_registry.names()
|
||||
self.providers = providers
|
||||
|
||||
#: Provider configuration
|
||||
self.provider_configs = provider_configs or {}
|
||||
@@ -186,12 +186,9 @@ class SZProviderPool(ProviderPool):
|
||||
except (requests.Timeout, socket.timeout):
|
||||
logger.error('Provider %r timed out', provider)
|
||||
|
||||
except (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable, APIThrottled), e:
|
||||
self.throttle_callback(provider, e)
|
||||
return
|
||||
|
||||
except:
|
||||
except Exception as e:
|
||||
logger.exception('Unexpected error in provider %r: %s', provider, traceback.format_exc())
|
||||
self.throttle_callback(provider, e)
|
||||
|
||||
def list_subtitles(self, video, languages):
|
||||
"""List subtitles.
|
||||
@@ -267,7 +264,7 @@ class SZProviderPool(ProviderPool):
|
||||
requests.exceptions.SSLError,
|
||||
requests.Timeout,
|
||||
socket.timeout):
|
||||
logger.error('Provider %r connection error', subtitle.provider_name)
|
||||
logger.exception('Provider %r connection error', subtitle.provider_name)
|
||||
|
||||
except ResponseNotReady:
|
||||
logger.error('Provider %r response error, reinitializing', subtitle.provider_name)
|
||||
@@ -283,14 +280,10 @@ class SZProviderPool(ProviderPool):
|
||||
logger.debug("RAR Traceback: %s", traceback.format_exc())
|
||||
return False
|
||||
|
||||
except (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable, APIThrottled), e:
|
||||
self.throttle_callback(subtitle.provider_name, e)
|
||||
self.discarded_providers.add(subtitle.provider_name)
|
||||
return False
|
||||
|
||||
except:
|
||||
except Exception as e:
|
||||
logger.exception('Unexpected error in provider %r, Traceback: %s', subtitle.provider_name,
|
||||
traceback.format_exc())
|
||||
self.throttle_callback(subtitle.provider_name, e)
|
||||
self.discarded_providers.add(subtitle.provider_name)
|
||||
return False
|
||||
|
||||
@@ -309,7 +302,8 @@ class SZProviderPool(ProviderPool):
|
||||
logger.error('Invalid subtitle')
|
||||
return False
|
||||
|
||||
subtitle.normalize()
|
||||
if not os.environ.get("SZ_KEEP_ENCODING", False):
|
||||
subtitle.normalize()
|
||||
|
||||
return True
|
||||
|
||||
@@ -360,15 +354,16 @@ class SZProviderPool(ProviderPool):
|
||||
orig_matches = matches.copy()
|
||||
|
||||
logger.debug('%r: Found matches %r', s, matches)
|
||||
score, score_without_hash = compute_score(matches, s, video, hearing_impaired=use_hearing_impaired)
|
||||
unsorted_subtitles.append(
|
||||
(s, compute_score(matches, s, video, hearing_impaired=use_hearing_impaired), matches, orig_matches))
|
||||
(s, score, score_without_hash, matches, orig_matches))
|
||||
|
||||
# sort subtitles by score
|
||||
scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1), reverse=True)
|
||||
scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1, 2), reverse=True)
|
||||
|
||||
# download best subtitles, falling back on the next on error
|
||||
downloaded_subtitles = []
|
||||
for subtitle, score, matches, orig_matches in scored_subtitles:
|
||||
for subtitle, score, score_without_hash, matches, orig_matches in scored_subtitles:
|
||||
# check score
|
||||
if score < min_score:
|
||||
logger.info('%r: Score %d is below min_score (%d)', subtitle, score, min_score)
|
||||
@@ -472,7 +467,7 @@ if is_windows_special_path:
|
||||
SZAsyncProviderPool = SZProviderPool
|
||||
|
||||
|
||||
def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, skip_hashing=False):
|
||||
def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, skip_hashing=False, hash_from=None):
|
||||
"""Scan a video from a `path`.
|
||||
|
||||
patch:
|
||||
@@ -537,28 +532,38 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
|
||||
video.alternative_titles.append(alt_guess["title"])
|
||||
logger.debug("Adding alternative title: %s", alt_guess["title"])
|
||||
|
||||
if dont_use_actual_file:
|
||||
if dont_use_actual_file and not hash_from:
|
||||
return video
|
||||
|
||||
# size and hashes
|
||||
if not skip_hashing:
|
||||
video.size = os.path.getsize(path)
|
||||
hash_path = hash_from or path
|
||||
video.size = os.path.getsize(hash_path)
|
||||
if video.size > 10485760:
|
||||
logger.debug('Size is %d', video.size)
|
||||
osub_hash = None
|
||||
|
||||
if "bsplayer" in providers:
|
||||
video.hashes['bsplayer'] = osub_hash = hash_opensubtitles(hash_path)
|
||||
|
||||
if "opensubtitles" in providers:
|
||||
video.hashes['opensubtitles'] = hash_opensubtitles(path)
|
||||
video.hashes['opensubtitles'] = osub_hash = osub_hash or hash_opensubtitles(hash_path)
|
||||
|
||||
if "shooter" in providers:
|
||||
video.hashes['shooter'] = hash_shooter(path)
|
||||
video.hashes['shooter'] = hash_shooter(hash_path)
|
||||
|
||||
if "thesubdb" in providers:
|
||||
video.hashes['thesubdb'] = hash_thesubdb(path)
|
||||
video.hashes['thesubdb'] = hash_thesubdb(hash_path)
|
||||
|
||||
if "napiprojekt" in providers:
|
||||
try:
|
||||
video.hashes['napiprojekt'] = hash_napiprojekt(path)
|
||||
video.hashes['napiprojekt'] = hash_napiprojekt(hash_path)
|
||||
except MemoryError:
|
||||
logger.warning(u"Couldn't compute napiprojekt hash for %s", path)
|
||||
logger.warning(u"Couldn't compute napiprojekt hash for %s", hash_path)
|
||||
|
||||
if "napisy24" in providers:
|
||||
# Napisy24 uses the same hash as opensubtitles
|
||||
video.hashes['napisy24'] = osub_hash or hash_opensubtitles(hash_path)
|
||||
|
||||
logger.debug('Computed hashes %r', video.hashes)
|
||||
else:
|
||||
@@ -567,14 +572,16 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
|
||||
return video
|
||||
|
||||
|
||||
def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False):
|
||||
def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False, match_strictness="strict"):
|
||||
dirpath, filename = os.path.split(path)
|
||||
dirpath = dirpath or '.'
|
||||
fileroot, fileext = os.path.splitext(filename)
|
||||
fn_no_ext, fileext = os.path.splitext(filename)
|
||||
fn_no_ext_lower = fn_no_ext.lower()
|
||||
subtitles = {}
|
||||
_scandir = _scandir_generic if scandir_generic else scandir
|
||||
|
||||
for entry in _scandir(dirpath):
|
||||
if not entry.name and not scandir_generic:
|
||||
if (not entry.name or entry.name in ('\x0c', '$', ',', '\x7f')) and not scandir_generic:
|
||||
logger.debug('Could not determine the name of the file, retrying with scandir_generic')
|
||||
return _search_external_subtitles(path, languages, only_one, True)
|
||||
if not entry.is_file(follow_symlinks=False):
|
||||
@@ -583,9 +590,11 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
|
||||
p = entry.name
|
||||
|
||||
# keep only valid subtitle filenames
|
||||
if not p.lower().startswith(fileroot.lower()) or not p.lower().endswith(SUBTITLE_EXTENSIONS):
|
||||
if not p.lower().endswith(SUBTITLE_EXTENSIONS):
|
||||
continue
|
||||
|
||||
# not p.lower().startswith(fileroot.lower()) or not
|
||||
|
||||
p_root, p_ext = os.path.splitext(p)
|
||||
if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"):
|
||||
continue
|
||||
@@ -603,22 +612,34 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
|
||||
if adv_tag:
|
||||
forced = "forced" in adv_tag
|
||||
|
||||
# remove possible language code for matching
|
||||
p_root_bare = ENDSWITH_LANGUAGECODE_RE.sub(
|
||||
lambda m: "" if str(m.group(1)).lower() in FULL_LANGUAGE_LIST else m.group(0), p_root)
|
||||
|
||||
p_root_lower = p_root_bare.lower()
|
||||
|
||||
filename_matches = p_root_lower == fn_no_ext_lower
|
||||
filename_contains = p_root_lower in fn_no_ext_lower
|
||||
|
||||
if not filename_matches:
|
||||
if match_strictness == "strict" or (match_strictness == "loose" and not filename_contains):
|
||||
continue
|
||||
|
||||
language = None
|
||||
|
||||
# extract the potential language code
|
||||
language_code = p_root[len(fileroot):].replace('_', '-')[1:]
|
||||
|
||||
# default language is undefined
|
||||
language = Language('und')
|
||||
|
||||
# attempt to parse
|
||||
if language_code:
|
||||
try:
|
||||
language_code = p_root.rsplit(".", 1)[1].replace('_', '-')
|
||||
try:
|
||||
language = Language.fromietf(language_code)
|
||||
language.forced = forced
|
||||
except ValueError:
|
||||
except (ValueError, LanguageReverseError):
|
||||
logger.error('Cannot parse language code %r', language_code)
|
||||
language = None
|
||||
language_code = None
|
||||
except IndexError:
|
||||
language_code = None
|
||||
|
||||
elif not language_code and only_one:
|
||||
if not language and not language_code and only_one:
|
||||
language = Language.rebuild(list(languages)[0], forced=forced)
|
||||
|
||||
subtitles[p] = language
|
||||
@@ -628,7 +649,7 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
|
||||
return subtitles
|
||||
|
||||
|
||||
def search_external_subtitles(path, languages=None, only_one=False):
|
||||
def search_external_subtitles(path, languages=None, only_one=False, match_strictness="strict"):
|
||||
"""
|
||||
wrap original search_external_subtitles function to search multiple paths for one given video
|
||||
# todo: cleanup and merge with _search_external_subtitles
|
||||
@@ -649,10 +670,11 @@ def search_external_subtitles(path, languages=None, only_one=False):
|
||||
if os.path.isdir(os.path.dirname(abspath)):
|
||||
try:
|
||||
subtitles.update(_search_external_subtitles(abspath, languages=languages,
|
||||
only_one=only_one))
|
||||
only_one=only_one, match_strictness=match_strictness))
|
||||
except OSError:
|
||||
subtitles.update(_search_external_subtitles(abspath, languages=languages,
|
||||
only_one=only_one, scandir_generic=True))
|
||||
only_one=only_one, match_strictness=match_strictness,
|
||||
scandir_generic=True))
|
||||
logger.debug("external subs: found %s", subtitles)
|
||||
return subtitles
|
||||
|
||||
@@ -845,6 +867,9 @@ def save_subtitles(file_path, subtitles, single=False, directory=None, chmod=Non
|
||||
logger.debug(u"Saving %r to %r", subtitle, subtitle_path)
|
||||
content = subtitle.get_modified_content(format=format, debug=debug_mods)
|
||||
if content:
|
||||
if os.path.exists(subtitle_path):
|
||||
os.remove(subtitle_path)
|
||||
|
||||
with open(subtitle_path, 'w') as f:
|
||||
f.write(content)
|
||||
subtitle.storage_path = subtitle_path
|
||||
|
||||
@@ -9,3 +9,8 @@ class TooManyRequests(ProviderError):
|
||||
|
||||
class APIThrottled(ProviderError):
|
||||
pass
|
||||
|
||||
|
||||
class ParseResponseError(ProviderError):
|
||||
"""Exception raised by providers when they are not able to parse the response."""
|
||||
pass
|
||||
|
||||
@@ -10,6 +10,8 @@ import logging
|
||||
import requests
|
||||
import xmlrpclib
|
||||
import dns.resolver
|
||||
import ipaddress
|
||||
import re
|
||||
|
||||
from requests import exceptions
|
||||
from urllib3.util import connection
|
||||
@@ -17,7 +19,13 @@ from retry.api import retry_call
|
||||
from exceptions import APIThrottled
|
||||
from dogpile.cache.api import NO_VALUE
|
||||
from subliminal.cache import region
|
||||
from cfscrape import CloudflareScraper
|
||||
from subliminal_patch.pitcher import pitchers
|
||||
from cloudscraper import CloudScraper, User_Agent
|
||||
|
||||
try:
|
||||
import brotli
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
@@ -55,39 +63,114 @@ class CertifiSession(TimeoutSession):
|
||||
self.verify = pem_file
|
||||
|
||||
|
||||
class CFSession(CloudflareScraper):
|
||||
def __init__(self):
|
||||
super(CFSession, self).__init__()
|
||||
class NeedsCaptchaException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class CFSession(CloudScraper):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CFSession, self).__init__(*args, **kwargs)
|
||||
self.debug = os.environ.get("CF_DEBUG", False)
|
||||
|
||||
def _request(self, method, url, *args, **kwargs):
|
||||
ourSuper = super(CloudScraper, self)
|
||||
resp = ourSuper.request(method, url, *args, **kwargs)
|
||||
|
||||
if resp.headers.get('Content-Encoding') == 'br':
|
||||
if self.allow_brotli and resp._content:
|
||||
resp._content = brotli.decompress(resp.content)
|
||||
else:
|
||||
logging.warning('Brotli content detected, But option is disabled, we will not continue.')
|
||||
return resp
|
||||
|
||||
# Debug request
|
||||
if self.debug:
|
||||
self.debugRequest(resp)
|
||||
|
||||
# Check if Cloudflare anti-bot is on
|
||||
try:
|
||||
print repr(resp)
|
||||
if self.is_IUAM_Challenge(resp):
|
||||
print "TRYYYYYYYYYY"
|
||||
if resp.request.method != 'GET':
|
||||
# Work around if the initial request is not a GET,
|
||||
# Supersede with a GET then re-request the original METHOD.
|
||||
CloudScraper.request(self, 'GET', resp.url)
|
||||
resp = ourSuper.request(method, url, *args, **kwargs)
|
||||
else:
|
||||
# Solve Challenge
|
||||
resp = self.Challenge_Response(resp, **kwargs)
|
||||
|
||||
except ValueError, e:
|
||||
print "YEEEEEEEEEEEEEE"
|
||||
if e.message == "Captcha":
|
||||
parsed_url = urlparse(url)
|
||||
domain = parsed_url.netloc
|
||||
# solve the captcha
|
||||
site_key = re.search(r'data-sitekey="(.+?)"', resp.content).group(1)
|
||||
challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', resp.content).group(1)
|
||||
challenge_ray = re.search(r'data-ray="(.+?)"', resp.content).group(1)
|
||||
if not all([site_key, challenge_s, challenge_ray]):
|
||||
raise Exception("cf: Captcha site-key not found!")
|
||||
|
||||
pitcher = pitchers.get_pitcher()("cf: %s" % domain, resp.request.url, site_key,
|
||||
user_agent=self.headers["User-Agent"],
|
||||
cookies=self.cookies.get_dict(),
|
||||
is_invisible=True)
|
||||
|
||||
parsed_url = urlparse(resp.url)
|
||||
logger.info("cf: %s: Solving captcha", domain)
|
||||
result = pitcher.throw()
|
||||
if not result:
|
||||
raise Exception("cf: Couldn't solve captcha!")
|
||||
|
||||
submit_url = '{}://{}/cdn-cgi/l/chk_captcha'.format(parsed_url.scheme, domain)
|
||||
method = resp.request.method
|
||||
|
||||
cloudflare_kwargs = {
|
||||
'allow_redirects': False,
|
||||
'headers': {'Referer': resp.url},
|
||||
'params': OrderedDict(
|
||||
[
|
||||
('s', challenge_s),
|
||||
('g-recaptcha-response', result)
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
return CloudScraper.request(self, method, submit_url, **cloudflare_kwargs)
|
||||
|
||||
return resp
|
||||
|
||||
def request(self, method, url, *args, **kwargs):
|
||||
parsed_url = urlparse(url)
|
||||
domain = parsed_url.netloc
|
||||
|
||||
cache_key = "cf_data2_%s" % domain
|
||||
cache_key = "cf_data3_%s" % domain
|
||||
|
||||
if not self.cookies.get("__cfduid", "", domain=domain):
|
||||
if not self.cookies.get("cf_clearance", "", domain=domain):
|
||||
cf_data = region.get(cache_key)
|
||||
if cf_data is not NO_VALUE:
|
||||
cf_cookies, user_agent, hdrs = cf_data
|
||||
cf_cookies, hdrs = cf_data
|
||||
logger.debug("Trying to use old cf data for %s: %s", domain, cf_data)
|
||||
for cookie, value in cf_cookies.iteritems():
|
||||
self.cookies.set(cookie, value, domain=domain)
|
||||
|
||||
self._hdrs = hdrs
|
||||
self._ua = user_agent
|
||||
self.headers['User-Agent'] = self._ua
|
||||
self.headers = hdrs
|
||||
|
||||
ret = super(CFSession, self).request(method, url, *args, **kwargs)
|
||||
ret = self._request(method, url, *args, **kwargs)
|
||||
|
||||
try:
|
||||
cf_data = self.get_cf_live_tokens(domain)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
if cf_data != region.get(cache_key) and cf_data[0]["__cfduid"] and cf_data[0]["cf_clearance"]:
|
||||
logger.debug("Storing cf data for %s: %s", domain, cf_data)
|
||||
region.set(cache_key, cf_data)
|
||||
if cf_data and "cf_clearance" in cf_data[0] and cf_data[0]["cf_clearance"]:
|
||||
if cf_data != region.get(cache_key):
|
||||
logger.debug("Storing cf data for %s: %s", domain, cf_data)
|
||||
region.set(cache_key, cf_data)
|
||||
elif cf_data[0]["cf_clearance"]:
|
||||
logger.debug("CF Live tokens not updated")
|
||||
|
||||
return ret
|
||||
|
||||
@@ -101,11 +184,11 @@ class CFSession(CloudflareScraper):
|
||||
"Unable to find Cloudflare cookies. Does the site actually have "
|
||||
"Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
|
||||
|
||||
return (OrderedDict([
|
||||
return (OrderedDict(filter(lambda x: x[1], [
|
||||
("__cfduid", self.cookies.get("__cfduid", "", domain=cookie_domain)),
|
||||
("cf_clearance", self.cookies.get("cf_clearance", "", domain=cookie_domain))
|
||||
]),
|
||||
self._ua, self._hdrs
|
||||
])),
|
||||
self.headers
|
||||
)
|
||||
|
||||
|
||||
@@ -161,12 +244,20 @@ class SubZeroRequestsTransport(xmlrpclib.SafeTransport):
|
||||
# change our user agent to reflect Requests
|
||||
user_agent = "Python XMLRPC with Requests (python-requests.org)"
|
||||
proxies = None
|
||||
xm_ver = 1
|
||||
session_var = "PHPSESSID"
|
||||
|
||||
def __init__(self, use_https=True, verify=None, user_agent=None, timeout=10, *args, **kwargs):
|
||||
self.verify = pem_file if verify is None else verify
|
||||
self.use_https = use_https
|
||||
self.user_agent = user_agent if user_agent is not None else self.user_agent
|
||||
self.timeout = timeout
|
||||
self.session = requests.Session()
|
||||
self.session.headers['User-Agent'] = self.user_agent
|
||||
# if 'requests' in self.session.headers['User-Agent']:
|
||||
# # Set a random User-Agent if no custom User-Agent has been set
|
||||
# self.session.headers = User_Agent(allow_brotli=False).headers
|
||||
|
||||
proxy = os.environ.get('SZ_HTTP_PROXY')
|
||||
if proxy:
|
||||
self.proxies = {
|
||||
@@ -180,18 +271,40 @@ class SubZeroRequestsTransport(xmlrpclib.SafeTransport):
|
||||
"""
|
||||
Make an xmlrpc request.
|
||||
"""
|
||||
headers = {'User-Agent': self.user_agent}
|
||||
url = self._build_url(host, handler)
|
||||
cache_key = "xm%s_%s" % (self.xm_ver, host)
|
||||
|
||||
old_sessvar = self.session.cookies.get(self.session_var, "")
|
||||
if not old_sessvar:
|
||||
data = region.get(cache_key)
|
||||
if data is not NO_VALUE:
|
||||
logger.debug("Trying to re-use headers/cookies for %s" % host)
|
||||
self.session.cookies, self.session.headers = data
|
||||
old_sessvar = self.session.cookies.get(self.session_var, "")
|
||||
|
||||
try:
|
||||
resp = requests.post(url, data=request_body, headers=headers,
|
||||
stream=True, timeout=self.timeout, proxies=self.proxies,
|
||||
verify=self.verify)
|
||||
resp = self.session.post(url, data=request_body,
|
||||
stream=True, timeout=self.timeout, proxies=self.proxies,
|
||||
verify=self.verify)
|
||||
|
||||
if self.session_var in resp.cookies and resp.cookies[self.session_var] != old_sessvar:
|
||||
logger.debug("Storing %s cookies" % host)
|
||||
region.set(cache_key, [self.session.cookies, self.session.headers])
|
||||
except ValueError:
|
||||
logger.debug("Wiping cookies/headers cache (VE) for %s" % host)
|
||||
region.delete(cache_key)
|
||||
raise
|
||||
except Exception:
|
||||
logger.debug("Wiping cookies/headers cache (EX) for %s" % host)
|
||||
region.delete(cache_key)
|
||||
raise # something went wrong
|
||||
else:
|
||||
resp.raise_for_status()
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
except requests.exceptions.HTTPError:
|
||||
logger.debug("Wiping cookies/headers cache (RE) for %s" % host)
|
||||
region.delete(cache_key)
|
||||
raise
|
||||
|
||||
try:
|
||||
if 'x-ratelimit-remaining' in resp.headers and int(resp.headers['x-ratelimit-remaining']) <= 2:
|
||||
@@ -236,41 +349,46 @@ def patch_create_connection():
|
||||
global _custom_resolver, _custom_resolver_ips, dns_cache
|
||||
host, port = address
|
||||
|
||||
__custom_resolver_ips = os.environ.get("dns_resolvers", None)
|
||||
try:
|
||||
ipaddress.ip_address(unicode(host))
|
||||
except (ipaddress.AddressValueError, ValueError):
|
||||
__custom_resolver_ips = os.environ.get("dns_resolvers", None)
|
||||
|
||||
# resolver ips changed in the meantime?
|
||||
if __custom_resolver_ips != _custom_resolver_ips:
|
||||
_custom_resolver = None
|
||||
_custom_resolver_ips = __custom_resolver_ips
|
||||
dns_cache = {}
|
||||
# resolver ips changed in the meantime?
|
||||
if __custom_resolver_ips != _custom_resolver_ips:
|
||||
_custom_resolver = None
|
||||
_custom_resolver_ips = __custom_resolver_ips
|
||||
dns_cache = {}
|
||||
|
||||
custom_resolver = _custom_resolver
|
||||
custom_resolver = _custom_resolver
|
||||
|
||||
if not custom_resolver:
|
||||
if _custom_resolver_ips:
|
||||
logger.debug("DNS: Trying to use custom DNS resolvers: %s", _custom_resolver_ips)
|
||||
custom_resolver = dns.resolver.Resolver(configure=False)
|
||||
custom_resolver.lifetime = 8.0
|
||||
try:
|
||||
custom_resolver.nameservers = json.loads(_custom_resolver_ips)
|
||||
except:
|
||||
logger.debug("DNS: Couldn't load custom DNS resolvers: %s", _custom_resolver_ips)
|
||||
if not custom_resolver:
|
||||
if _custom_resolver_ips:
|
||||
logger.debug("DNS: Trying to use custom DNS resolvers: %s", _custom_resolver_ips)
|
||||
custom_resolver = dns.resolver.Resolver(configure=False)
|
||||
custom_resolver.lifetime = os.environ.get("dns_resolvers_timeout", 8.0)
|
||||
try:
|
||||
custom_resolver.nameservers = json.loads(_custom_resolver_ips)
|
||||
except:
|
||||
logger.debug("DNS: Couldn't load custom DNS resolvers: %s", _custom_resolver_ips)
|
||||
else:
|
||||
_custom_resolver = custom_resolver
|
||||
|
||||
if custom_resolver:
|
||||
if host in dns_cache:
|
||||
ip = dns_cache[host]
|
||||
logger.debug("DNS: Using %s=%s from cache", host, ip)
|
||||
return _orig_create_connection((ip, port), *args, **kwargs)
|
||||
else:
|
||||
_custom_resolver = custom_resolver
|
||||
|
||||
if custom_resolver:
|
||||
if host in dns_cache:
|
||||
ip = dns_cache[host]
|
||||
logger.debug("DNS: Using %s=%s from cache", host, ip)
|
||||
else:
|
||||
try:
|
||||
ip = custom_resolver.query(host)[0].address
|
||||
logger.debug("DNS: Resolved %s to %s using %s", host, ip, custom_resolver.nameservers)
|
||||
dns_cache[host] = ip
|
||||
except dns.exception.DNSException:
|
||||
logger.warning("DNS: Couldn't resolve %s with DNS: %s", host, custom_resolver.nameservers)
|
||||
raise
|
||||
try:
|
||||
ip = custom_resolver.query(host)[0].address
|
||||
logger.debug("DNS: Resolved %s to %s using %s", host, ip, custom_resolver.nameservers)
|
||||
dns_cache[host] = ip
|
||||
return _orig_create_connection((ip, port), *args, **kwargs)
|
||||
except dns.exception.DNSException:
|
||||
logger.warning("DNS: Couldn't resolve %s with DNS: %s", host, custom_resolver.nameservers)
|
||||
|
||||
logger.debug("DNS: Falling back to default DNS or IP on %s", host)
|
||||
return _orig_create_connection((host, port), *args, **kwargs)
|
||||
|
||||
patch_create_connection._sz_patched = True
|
||||
|
||||
@@ -2,15 +2,20 @@
|
||||
import logging
|
||||
import re
|
||||
import datetime
|
||||
import types
|
||||
|
||||
import subliminal
|
||||
import time
|
||||
|
||||
from random import randint
|
||||
|
||||
from dogpile.cache.api import NO_VALUE
|
||||
from requests import Session
|
||||
from subliminal.cache import region
|
||||
from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError
|
||||
from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError, ConfigurationError, \
|
||||
DownloadLimitPerDayExceeded
|
||||
from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \
|
||||
Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup, show_cells_re
|
||||
Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup
|
||||
from subliminal.subtitle import fix_line_ending
|
||||
from subliminal_patch.utils import sanitize
|
||||
from subliminal_patch.exceptions import TooManyRequests
|
||||
@@ -19,6 +24,8 @@ from subzero.language import Language
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
show_cells_re = re.compile(b'<td class="(?:version|vr)">.*?</td>', re.DOTALL)
|
||||
|
||||
#: Series header parsing regex
|
||||
series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),*&!?-]+?)(?: \((?P<year>\d{4})\))?$')
|
||||
|
||||
@@ -60,16 +67,22 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'zho'
|
||||
]} | {Language.fromietf(l) for l in ["sr-Latn", "sr-Cyrl"]}
|
||||
|
||||
vip = False
|
||||
USE_ADDICTED_RANDOM_AGENTS = False
|
||||
hearing_impaired_verifiable = True
|
||||
subtitle_class = Addic7edSubtitle
|
||||
server_url = 'https://www.addic7ed.com/'
|
||||
|
||||
sanitize_characters = {'-', ':', '(', ')', '.', '/'}
|
||||
last_show_ids_fetch_key = "addic7ed_last_id_fetch"
|
||||
|
||||
def __init__(self, username=None, password=None, use_random_agents=False):
|
||||
def __init__(self, username=None, password=None, use_random_agents=False, is_vip=False):
|
||||
super(Addic7edProvider, self).__init__(username=username, password=password)
|
||||
self.USE_ADDICTED_RANDOM_AGENTS = use_random_agents
|
||||
self.vip = is_vip
|
||||
|
||||
if not all((username, password)):
|
||||
raise ConfigurationError('Username and password must be specified')
|
||||
|
||||
def initialize(self):
|
||||
self.session = Session()
|
||||
@@ -101,13 +114,18 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
'remember': 'true'}
|
||||
|
||||
tries = 0
|
||||
while tries < 3:
|
||||
while tries <= 3:
|
||||
tries += 1
|
||||
r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url})
|
||||
if "grecaptcha" in r.content:
|
||||
if "g-recaptcha" in r.content or "grecaptcha" in r.content:
|
||||
logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
|
||||
'happen once every so often')
|
||||
|
||||
site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1)
|
||||
for g, s in (("g-recaptcha-response", r'g-recaptcha.+?data-sitekey=\"(.+?)\"'),
|
||||
("recaptcha_response", r'grecaptcha.execute\(\'(.+?)\',')):
|
||||
site_key = re.search(s, r.content).group(1)
|
||||
if site_key:
|
||||
break
|
||||
if not site_key:
|
||||
logger.error("Addic7ed: Captcha site-key not found!")
|
||||
return
|
||||
@@ -119,23 +137,31 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
|
||||
result = pitcher.throw()
|
||||
if not result:
|
||||
raise Exception("Addic7ed: Couldn't solve captcha!")
|
||||
if tries >= 3:
|
||||
raise Exception("Addic7ed: Couldn't solve captcha!")
|
||||
logger.info("Addic7ed: Couldn't solve captcha! Retrying")
|
||||
time.sleep(4)
|
||||
continue
|
||||
|
||||
data["recaptcha_response"] = result
|
||||
data[g] = result
|
||||
|
||||
time.sleep(1)
|
||||
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
|
||||
headers={"Referer": self.server_url + "login.php"})
|
||||
|
||||
if "relax, slow down" in r.content:
|
||||
raise TooManyRequests(self.username)
|
||||
|
||||
if r.status_code != 302:
|
||||
if "User <b></b> doesn't exist" in r.content and tries <= 2:
|
||||
logger.info("Addic7ed: Error, trying again. (%s/%s)", tries+1, 3)
|
||||
tries += 1
|
||||
continue
|
||||
|
||||
if "Wrong password" in r.content or "doesn't exist" in r.content:
|
||||
raise AuthenticationError(self.username)
|
||||
|
||||
if r.status_code != 302:
|
||||
if tries >= 3:
|
||||
logger.error("Addic7ed: Something went wrong when logging in")
|
||||
raise AuthenticationError(self.username)
|
||||
logger.info("Addic7ed: Something went wrong when logging in; retrying")
|
||||
time.sleep(4)
|
||||
continue
|
||||
break
|
||||
|
||||
store_verification("addic7ed", self.session)
|
||||
@@ -143,10 +169,12 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
logger.debug('Addic7ed: Logged in')
|
||||
self.logged_in = True
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
def terminate(self):
|
||||
self.session.close()
|
||||
|
||||
def get_show_id(self, series, year=None, country_code=None):
|
||||
def get_show_id(self, series, year=None, country_code=None, ignore_cache=False):
|
||||
"""Get the best matching show id for `series`, `year` and `country_code`.
|
||||
|
||||
First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`.
|
||||
@@ -158,32 +186,45 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
:type country_code: str
|
||||
:return: the show id, if found.
|
||||
:rtype: int
|
||||
|
||||
"""
|
||||
series_sanitized = sanitize(series).lower()
|
||||
show_ids = self._get_show_ids()
|
||||
show_id = None
|
||||
ids_to_look_for = {sanitize(series).lower(), sanitize(series.replace(".", "")).lower()}
|
||||
show_ids = self._get_show_ids()
|
||||
if ignore_cache or not show_ids:
|
||||
show_ids = self._get_show_ids.refresh(self)
|
||||
|
||||
# attempt with country
|
||||
if not show_id and country_code:
|
||||
logger.debug('Getting show id with country')
|
||||
show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower()))
|
||||
logger.debug("Trying show ids: %s", ids_to_look_for)
|
||||
for series_sanitized in ids_to_look_for:
|
||||
# attempt with country
|
||||
if not show_id and country_code:
|
||||
logger.debug('Getting show id with country')
|
||||
show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower()))
|
||||
|
||||
# attempt with year
|
||||
if not show_id and year:
|
||||
logger.debug('Getting show id with year')
|
||||
show_id = show_ids.get('%s %d' % (series_sanitized, year))
|
||||
# attempt with year
|
||||
if not show_id and year:
|
||||
logger.debug('Getting show id with year')
|
||||
show_id = show_ids.get('%s %d' % (series_sanitized, year))
|
||||
|
||||
# attempt clean
|
||||
if not show_id:
|
||||
logger.debug('Getting show id')
|
||||
show_id = show_ids.get(series_sanitized)
|
||||
# attempt clean
|
||||
if not show_id:
|
||||
logger.debug('Getting show id')
|
||||
show_id = show_ids.get(series_sanitized)
|
||||
|
||||
# search as last resort
|
||||
# broken right now
|
||||
# if not show_id:
|
||||
# logger.warning('Series %s not found in show ids', series)
|
||||
# show_id = self._search_show_id(series)
|
||||
if not show_id:
|
||||
now = datetime.datetime.now()
|
||||
last_fetch = region.get(self.last_show_ids_fetch_key)
|
||||
|
||||
# re-fetch show ids once per day if any show ID not found
|
||||
if not ignore_cache and last_fetch != NO_VALUE and last_fetch + datetime.timedelta(days=1) < now:
|
||||
logger.info("Show id not found; re-fetching show ids")
|
||||
return self.get_show_id(series, year=year, country_code=country_code, ignore_cache=True)
|
||||
logger.debug("Not refreshing show ids, as the last fetch has been too recent")
|
||||
|
||||
# search as last resort
|
||||
# broken right now
|
||||
# if not show_id:
|
||||
# logger.warning('Series %s not found in show ids', series)
|
||||
# show_id = self._search_show_id(series)
|
||||
|
||||
return show_id
|
||||
|
||||
@@ -197,6 +238,8 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
"""
|
||||
# get the show page
|
||||
logger.info('Getting show ids')
|
||||
region.set(self.last_show_ids_fetch_key, datetime.datetime.now())
|
||||
|
||||
r = self.session.get(self.server_url + 'shows.php', timeout=10)
|
||||
r.raise_for_status()
|
||||
|
||||
@@ -205,14 +248,15 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
# Assuming the site's markup is bad, and stripping it down to only contain what's needed.
|
||||
show_cells = re.findall(show_cells_re, r.content)
|
||||
if show_cells:
|
||||
soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
|
||||
soup = ParserBeautifulSoup(b''.join(show_cells).decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
|
||||
else:
|
||||
# If RegEx fails, fall back to original r.content and use 'html.parser'
|
||||
soup = ParserBeautifulSoup(r.content, ['html.parser'])
|
||||
|
||||
# populate the show ids
|
||||
show_ids = {}
|
||||
for show in soup.select('td > h3 > a[href^="/show/"]'):
|
||||
shows = soup.select('td > h3 > a[href^="/show/"]')
|
||||
for show in shows:
|
||||
show_clean = sanitize(show.text, default_characters=self.sanitize_characters)
|
||||
try:
|
||||
show_id = int(show['href'][6:])
|
||||
@@ -230,6 +274,9 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
|
||||
logger.debug('Found %d show ids', len(show_ids))
|
||||
|
||||
if not show_ids:
|
||||
raise Exception("Addic7ed: No show IDs found!")
|
||||
|
||||
return show_ids
|
||||
|
||||
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
|
||||
@@ -329,7 +376,7 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
|
||||
# ignore incomplete subtitles
|
||||
status = cells[5].text
|
||||
if status != 'Completed':
|
||||
if "%" in status:
|
||||
logger.debug('Ignoring subtitle with status %s', status)
|
||||
continue
|
||||
|
||||
@@ -355,6 +402,27 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
return subtitles
|
||||
|
||||
def download_subtitle(self, subtitle):
|
||||
last_dls = region.get("addic7ed_dls")
|
||||
now = datetime.datetime.now()
|
||||
one_day = datetime.timedelta(hours=24)
|
||||
|
||||
def raise_limit():
|
||||
logger.info("Addic7ed: Downloads per day exceeded (%s)", cap)
|
||||
raise DownloadLimitPerDayExceeded
|
||||
|
||||
if not isinstance(last_dls, types.ListType):
|
||||
last_dls = []
|
||||
else:
|
||||
# filter all non-expired DLs
|
||||
last_dls = filter(lambda t: t + one_day > now, last_dls)
|
||||
region.set("addic7ed_dls", last_dls)
|
||||
|
||||
cap = self.vip and 80 or 40
|
||||
amount = len(last_dls)
|
||||
|
||||
if amount >= cap:
|
||||
raise_limit()
|
||||
|
||||
# download the subtitle
|
||||
r = self.session.get(self.server_url + subtitle.download_link, headers={'Referer': subtitle.page_link},
|
||||
timeout=10)
|
||||
@@ -366,7 +434,7 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
if not r.content:
|
||||
# Provider wrongful return a status of 304 Not Modified with an empty content
|
||||
# raise_for_status won't raise exception for that status code
|
||||
logger.error('Unable to download subtitle. No data returned from provider')
|
||||
logger.error('Addic7ed: Unable to download subtitle. No data returned from provider')
|
||||
return
|
||||
|
||||
# detect download limit exceeded
|
||||
@@ -374,3 +442,10 @@ class Addic7edProvider(_Addic7edProvider):
|
||||
raise DownloadLimitExceeded
|
||||
|
||||
subtitle.content = fix_line_ending(r.content)
|
||||
last_dls.append(datetime.datetime.now())
|
||||
region.set("addic7ed_dls", last_dls)
|
||||
logger.info("Addic7ed: Used %s/%s downloads", amount + 1, cap)
|
||||
|
||||
if amount + 1 >= cap:
|
||||
raise_limit()
|
||||
|
||||
|
||||
@@ -23,9 +23,10 @@ class ArgenteamSubtitle(Subtitle):
|
||||
hearing_impaired_verifiable = False
|
||||
_release_info = None
|
||||
|
||||
def __init__(self, language, download_link, movie_kind, title, season, episode, year, release, version, source,
|
||||
def __init__(self, language, page_link, download_link, movie_kind, title, season, episode, year, release, version, source,
|
||||
video_codec, tvdb_id, imdb_id, asked_for_episode=None, asked_for_release_group=None, *args, **kwargs):
|
||||
super(ArgenteamSubtitle, self).__init__(language, download_link, *args, **kwargs)
|
||||
super(ArgenteamSubtitle, self).__init__(language, page_link=page_link, *args, **kwargs)
|
||||
self.page_link = page_link
|
||||
self.download_link = download_link
|
||||
self.movie_kind = movie_kind
|
||||
self.title = title
|
||||
@@ -135,7 +136,8 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
provider_name = 'argenteam'
|
||||
languages = {Language.fromalpha2(l) for l in ['es']}
|
||||
video_types = (Episode, Movie)
|
||||
API_URL = "http://argenteam.net/api/v1/"
|
||||
BASE_URL = "https://argenteam.net/"
|
||||
API_URL = BASE_URL + "api/v1/"
|
||||
subtitle_class = ArgenteamSubtitle
|
||||
hearing_impaired_verifiable = False
|
||||
language_list = list(languages)
|
||||
@@ -240,12 +242,15 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
|
||||
for r in content['releases']:
|
||||
for s in r['subtitles']:
|
||||
sub = ArgenteamSubtitle(language, s['uri'], "episode" if is_episode else "movie", returned_title,
|
||||
movie_kind = "episode" if is_episode else "movie"
|
||||
page_link = self.BASE_URL + movie_kind + "/" + str(aid)
|
||||
# use https and new domain
|
||||
download_link = s['uri'].replace('http://www.argenteam.net/', self.BASE_URL)
|
||||
sub = ArgenteamSubtitle(language, page_link, download_link, movie_kind, returned_title,
|
||||
season, episode, year, r.get('team'), r.get('tags'),
|
||||
r.get('source'), r.get('codec'), content.get("tvdb"), imdb_id,
|
||||
asked_for_release_group=video.release_group,
|
||||
asked_for_episode=episode
|
||||
)
|
||||
asked_for_episode=episode)
|
||||
subtitles.append(sub)
|
||||
|
||||
if has_multiple_ids:
|
||||
|
||||
@@ -0,0 +1,213 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import io
|
||||
import os
|
||||
|
||||
from requests import Session
|
||||
from guessit import guessit
|
||||
from subliminal_patch.providers import Provider
|
||||
from subliminal_patch.subtitle import Subtitle
|
||||
from subliminal.utils import sanitize_release_group
|
||||
from subliminal.subtitle import guess_matches
|
||||
from subzero.language import Language
|
||||
|
||||
import gzip
|
||||
import random
|
||||
from time import sleep
|
||||
from xml.etree import ElementTree
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BSPlayerSubtitle(Subtitle):
|
||||
"""BSPlayer Subtitle."""
|
||||
provider_name = 'bsplayer'
|
||||
hash_verifiable = True
|
||||
|
||||
def __init__(self, language, filename, subtype, video, link):
|
||||
super(BSPlayerSubtitle, self).__init__(language)
|
||||
self.language = language
|
||||
self.filename = filename
|
||||
self.page_link = link
|
||||
self.subtype = subtype
|
||||
self.video = video
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self.page_link
|
||||
|
||||
@property
|
||||
def release_info(self):
|
||||
return self.filename
|
||||
|
||||
def get_matches(self, video):
|
||||
matches = set()
|
||||
matches |= guess_matches(video, guessit(self.filename))
|
||||
matches.add('hash')
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
class BSPlayerProvider(Provider):
|
||||
"""BSPlayer Provider."""
|
||||
languages = {Language('por', 'BR')} | {Language(l) for l in [
|
||||
'ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra', 'hun', 'ita', 'jpn', 'kor', 'nld', 'pol', 'por',
|
||||
'ron', 'rus', 'spa', 'swe', 'tur', 'ukr', 'zho'
|
||||
]}
|
||||
SEARCH_THROTTLE = 8
|
||||
hash_verifiable = True
|
||||
|
||||
# batantly based on kodi's bsplayer plugin
|
||||
# also took from BSPlayer-Subtitles-Downloader
|
||||
def __init__(self):
|
||||
self.initialize()
|
||||
|
||||
def initialize(self):
|
||||
self.session = Session()
|
||||
self.search_url = self.get_sub_domain()
|
||||
self.token = None
|
||||
self.login()
|
||||
|
||||
def terminate(self):
|
||||
self.session.close()
|
||||
self.logout()
|
||||
|
||||
def api_request(self, func_name='logIn', params='', tries=5):
|
||||
headers = {
|
||||
'User-Agent': 'BSPlayer/2.x (1022.12360)',
|
||||
'Content-Type': 'text/xml; charset=utf-8',
|
||||
'Connection': 'close',
|
||||
'SOAPAction': '"http://api.bsplayer-subtitles.com/v1.php#{func_name}"'.format(func_name=func_name)
|
||||
}
|
||||
data = (
|
||||
'<?xml version="1.0" encoding="UTF-8"?>\n'
|
||||
'<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" '
|
||||
'xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" '
|
||||
'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
|
||||
'xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ns1="{search_url}">'
|
||||
'<SOAP-ENV:Body SOAP-ENV:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">'
|
||||
'<ns1:{func_name}>{params}</ns1:{func_name}></SOAP-ENV:Body></SOAP-ENV:Envelope>'
|
||||
).format(search_url=self.search_url, func_name=func_name, params=params)
|
||||
logger.info('Sending request: %s.' % func_name)
|
||||
for i in iter(range(tries)):
|
||||
try:
|
||||
self.session.headers.update(headers.items())
|
||||
res = self.session.post(self.search_url, data)
|
||||
return ElementTree.fromstring(res.text)
|
||||
|
||||
except Exception as ex:
|
||||
logger.info("ERROR: %s." % ex)
|
||||
if func_name == 'logIn':
|
||||
self.search_url = self.get_sub_domain()
|
||||
|
||||
sleep(1)
|
||||
logger.info('ERROR: Too many tries (%d)...' % tries)
|
||||
raise Exception('Too many tries...')
|
||||
|
||||
def login(self):
|
||||
# If already logged in
|
||||
if self.token:
|
||||
return True
|
||||
|
||||
root = self.api_request(
|
||||
func_name='logIn',
|
||||
params=('<username></username>'
|
||||
'<password></password>'
|
||||
'<AppID>BSPlayer v2.67</AppID>')
|
||||
)
|
||||
res = root.find('.//return')
|
||||
if res.find('status').text == 'OK':
|
||||
self.token = res.find('data').text
|
||||
logger.info("Logged In Successfully.")
|
||||
return True
|
||||
return False
|
||||
|
||||
def logout(self):
|
||||
# If already logged out / not logged in
|
||||
if not self.token:
|
||||
return True
|
||||
|
||||
root = self.api_request(
|
||||
func_name='logOut',
|
||||
params='<handle>{token}</handle>'.format(token=self.token)
|
||||
)
|
||||
res = root.find('.//return')
|
||||
self.token = None
|
||||
if res.find('status').text == 'OK':
|
||||
logger.info("Logged Out Successfully.")
|
||||
return True
|
||||
return False
|
||||
|
||||
def query(self, video, video_hash, language):
|
||||
if not self.login():
|
||||
return []
|
||||
|
||||
if isinstance(language, (tuple, list, set)):
|
||||
# language_ids = ",".join(language)
|
||||
# language_ids = 'spa'
|
||||
language_ids = ','.join(sorted(l.opensubtitles for l in language))
|
||||
|
||||
if video.imdb_id is None:
|
||||
imdbId = '*'
|
||||
else:
|
||||
imdbId = video.imdb_id
|
||||
sleep(self.SEARCH_THROTTLE)
|
||||
root = self.api_request(
|
||||
func_name='searchSubtitles',
|
||||
params=(
|
||||
'<handle>{token}</handle>'
|
||||
'<movieHash>{movie_hash}</movieHash>'
|
||||
'<movieSize>{movie_size}</movieSize>'
|
||||
'<languageId>{language_ids}</languageId>'
|
||||
'<imdbId>{imdbId}</imdbId>'
|
||||
).format(token=self.token, movie_hash=video_hash,
|
||||
movie_size=video.size, language_ids=language_ids, imdbId=imdbId)
|
||||
)
|
||||
res = root.find('.//return/result')
|
||||
if res.find('status').text != 'OK':
|
||||
return []
|
||||
|
||||
items = root.findall('.//return/data/item')
|
||||
subtitles = []
|
||||
if items:
|
||||
logger.info("Subtitles Found.")
|
||||
for item in items:
|
||||
subID = item.find('subID').text
|
||||
subDownloadLink = item.find('subDownloadLink').text
|
||||
subLang = Language.fromopensubtitles(item.find('subLang').text)
|
||||
subName = item.find('subName').text
|
||||
subFormat = item.find('subFormat').text
|
||||
subtitles.append(
|
||||
BSPlayerSubtitle(subLang, subName, subFormat, video, subDownloadLink)
|
||||
)
|
||||
return subtitles
|
||||
|
||||
def list_subtitles(self, video, languages):
|
||||
return self.query(video, video.hashes['bsplayer'], languages)
|
||||
|
||||
def get_sub_domain(self):
|
||||
# s1-9, s101-109
|
||||
SUB_DOMAINS = ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9',
|
||||
's101', 's102', 's103', 's104', 's105', 's106', 's107', 's108', 's109']
|
||||
API_URL_TEMPLATE = "http://{sub_domain}.api.bsplayer-subtitles.com/v1.php"
|
||||
sub_domains_end = len(SUB_DOMAINS) - 1
|
||||
return API_URL_TEMPLATE.format(sub_domain=SUB_DOMAINS[random.randint(0, sub_domains_end)])
|
||||
|
||||
def download_subtitle(self, subtitle):
|
||||
session = Session()
|
||||
_addheaders = {
|
||||
'User-Agent': 'Mozilla/4.0 (compatible; Synapse)'
|
||||
}
|
||||
session.headers.update(_addheaders)
|
||||
res = session.get(subtitle.page_link)
|
||||
if res:
|
||||
if res.text == '500':
|
||||
raise ValueError('Error 500 on server')
|
||||
|
||||
with gzip.GzipFile(fileobj=io.BytesIO(res.content)) as gf:
|
||||
subtitle.content = gf.read()
|
||||
subtitle.normalize()
|
||||
|
||||
return subtitle
|
||||
raise ValueError('Problems conecting to the server')
|
||||
@@ -0,0 +1,124 @@
|
||||
import logging
|
||||
import os
|
||||
from io import BytesIO
|
||||
from zipfile import ZipFile
|
||||
|
||||
from requests import Session
|
||||
|
||||
from subliminal_patch.subtitle import Subtitle
|
||||
from subliminal_patch.providers import Provider
|
||||
from subliminal import __short_version__
|
||||
from subliminal.exceptions import AuthenticationError, ConfigurationError
|
||||
from subliminal.subtitle import fix_line_ending
|
||||
from subzero.language import Language
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Napisy24Subtitle(Subtitle):
|
||||
'''Napisy24 Subtitle.'''
|
||||
provider_name = 'napisy24'
|
||||
|
||||
def __init__(self, language, hash, imdb_id, napis_id):
|
||||
super(Napisy24Subtitle, self).__init__(language)
|
||||
self.hash = hash
|
||||
self.imdb_id = imdb_id
|
||||
self.napis_id = napis_id
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self.hash
|
||||
|
||||
def get_matches(self, video):
|
||||
matches = set()
|
||||
|
||||
# hash
|
||||
if 'napisy24' in video.hashes and video.hashes['napisy24'] == self.hash:
|
||||
matches.add('hash')
|
||||
|
||||
# imdb_id
|
||||
if video.imdb_id and self.imdb_id == video.imdb_id:
|
||||
matches.add('imdb_id')
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
class Napisy24Provider(Provider):
|
||||
'''Napisy24 Provider.'''
|
||||
languages = {Language(l) for l in ['pol']}
|
||||
required_hash = 'napisy24'
|
||||
api_url = 'http://napisy24.pl/run/CheckSubAgent.php'
|
||||
|
||||
def __init__(self, username=None, password=None):
|
||||
if all((username, password)):
|
||||
self.username = username
|
||||
self.password = password
|
||||
else:
|
||||
self.username = 'subliminal'
|
||||
self.password = 'lanimilbus'
|
||||
|
||||
self.session = None
|
||||
|
||||
def initialize(self):
|
||||
self.session = Session()
|
||||
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
|
||||
self.session.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
|
||||
def terminate(self):
|
||||
self.session.close()
|
||||
|
||||
def query(self, language, size, name, hash):
|
||||
params = {
|
||||
'postAction': 'CheckSub',
|
||||
'ua': self.username,
|
||||
'ap': self.password,
|
||||
'fs': size,
|
||||
'fh': hash,
|
||||
'fn': os.path.basename(name),
|
||||
'n24pref': 1
|
||||
}
|
||||
|
||||
response = self.session.post(self.api_url, data=params, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
response_content = response.content.split(b'||', 1)
|
||||
n24_data = response_content[0].decode()
|
||||
|
||||
if n24_data[:2] != 'OK':
|
||||
if n24_data[:11] == 'login error':
|
||||
raise AuthenticationError('Login failed')
|
||||
logger.error('Unknown response: %s', response.content)
|
||||
return None
|
||||
|
||||
n24_status = n24_data[:4]
|
||||
if n24_status == 'OK-0':
|
||||
logger.info('No subtitles found')
|
||||
return None
|
||||
|
||||
subtitle_info = dict(p.split(':', 1) for p in n24_data.split('|')[1:])
|
||||
logger.debug('Subtitle info: %s', subtitle_info)
|
||||
|
||||
if n24_status == 'OK-1':
|
||||
logger.info('No subtitles found but got video info')
|
||||
return None
|
||||
elif n24_status == 'OK-2':
|
||||
logger.info('Found subtitles')
|
||||
elif n24_status == 'OK-3':
|
||||
logger.info('Found subtitles but not from Napisy24 database')
|
||||
return None
|
||||
|
||||
subtitle_content = response_content[1]
|
||||
|
||||
subtitle = Napisy24Subtitle(language, hash, 'tt%s' % subtitle_info['imdb'].zfill(7), subtitle_info['napisId'])
|
||||
with ZipFile(BytesIO(subtitle_content)) as zf:
|
||||
subtitle.content = fix_line_ending(zf.open(zf.namelist()[0]).read())
|
||||
|
||||
return subtitle
|
||||
|
||||
def list_subtitles(self, video, languages):
|
||||
subtitles = [self.query(l, video.size, video.name, video.hashes['napisy24']) for l in languages]
|
||||
return [s for s in subtitles if s is not None]
|
||||
|
||||
def download_subtitle(self, subtitle):
|
||||
# there is no download step, content is already filled from listing subtitles
|
||||
pass
|
||||
@@ -105,7 +105,7 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
|
||||
|
||||
def __init__(self, username=None, password=None, use_tag_search=False, only_foreign=False, also_foreign=False,
|
||||
skip_wrong_fps=True, is_vip=False, use_ssl=True, timeout=15):
|
||||
if any((username, password)) and not all((username, password)):
|
||||
if not all((username, password)):
|
||||
raise ConfigurationError('Username and password must be specified')
|
||||
|
||||
self.username = username or ''
|
||||
@@ -154,6 +154,7 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
|
||||
logger.debug('Logged in with token %r', self.token[:10]+"X"*(len(self.token)-10))
|
||||
|
||||
region.set("os_token", self.token)
|
||||
time.sleep(1)
|
||||
|
||||
def use_token_or_login(self, func):
|
||||
if not self.token:
|
||||
@@ -162,6 +163,7 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
|
||||
try:
|
||||
return func()
|
||||
except Unauthorized:
|
||||
logger.debug("Token not valid, logging in again")
|
||||
self.log_in()
|
||||
return func()
|
||||
|
||||
@@ -197,16 +199,11 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
|
||||
return
|
||||
|
||||
logger.error("Login failed, please check your credentials")
|
||||
raise
|
||||
|
||||
def terminate(self):
|
||||
if self.token:
|
||||
try:
|
||||
checked(lambda: self.server.LogOut(self.token))
|
||||
except:
|
||||
logger.error("Logout failed: %s", traceback.format_exc())
|
||||
|
||||
self.server = None
|
||||
self.token = None
|
||||
#self.token = None
|
||||
|
||||
def list_subtitles(self, video, languages):
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,241 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from requests import Session
|
||||
import json
|
||||
import logging
|
||||
from subzero.language import Language
|
||||
from bs4 import BeautifulSoup
|
||||
from guessit import guessit
|
||||
|
||||
from subliminal_patch.providers import Provider
|
||||
from subliminal.providers import Episode, Movie
|
||||
from subliminal_patch.utils import sanitize
|
||||
from subliminal_patch.subtitle import Subtitle, guess_matches
|
||||
from subliminal.subtitle import fix_line_ending
|
||||
|
||||
__author__ = "Dor Nizar"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ScrewZiraSubtitle(Subtitle):
|
||||
provider_name = 'screwzira'
|
||||
|
||||
def __init__(self, language, title_id, subtitle_id, series, season, episode, release, year):
|
||||
super(ScrewZiraSubtitle, self).__init__(language, subtitle_id)
|
||||
self.title_id = title_id
|
||||
self.subtitle_id = subtitle_id
|
||||
self.series = series
|
||||
self.season = season
|
||||
self.episode = episode
|
||||
self.release = release
|
||||
self.year = year
|
||||
|
||||
def get_matches(self, video):
|
||||
matches = set()
|
||||
logger.debug("--ScrewZiraSubtitle--\n{}".format(self.__dict__))
|
||||
|
||||
# episode
|
||||
if isinstance(video, Episode):
|
||||
# series
|
||||
if video.series and sanitize(self.series) == sanitize(video.series):
|
||||
matches.add('series')
|
||||
# season
|
||||
if video.season and self.season == video.season:
|
||||
matches.add('season')
|
||||
# episode
|
||||
if video.episode and self.episode == video.episode:
|
||||
matches.add('episode')
|
||||
# guess
|
||||
matches |= guess_matches(video, guessit(self.release, {'type': 'episode'}))
|
||||
# movie
|
||||
elif isinstance(video, Movie):
|
||||
# title
|
||||
if video.title and (sanitize(self.series) in (
|
||||
sanitize(name) for name in [video.title] + video.alternative_titles)):
|
||||
matches.add('title')
|
||||
# year
|
||||
if video.year and self.year == video.year:
|
||||
matches.add('year')
|
||||
# guess
|
||||
matches |= guess_matches(video, guessit(self.release, {'type': 'movie'}))
|
||||
|
||||
logger.debug("ScrewZira subtitle criteria match:\n{}".format(matches))
|
||||
return matches
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self.subtitle_id
|
||||
|
||||
|
||||
class ScrewZiraProvider(Provider):
|
||||
subtitle_class = ScrewZiraSubtitle
|
||||
languages = {Language.fromalpha2(l) for l in ['he']}
|
||||
URL_SERVER = 'https://www.screwzira.com/'
|
||||
|
||||
URI_SEARCH_TITLE = 'Services/ContentProvider.svc/GetSearchForecast'
|
||||
URI_SEARCH_SERIES_SUBTITLE = 'Services/GetModuleAjax.ashx'
|
||||
URI_SEARCH_MOVIE_SUBTITLE = "MovieInfo.aspx"
|
||||
URI_REQ_SUBTITLE_ID = "Services/ContentProvider.svc/RequestSubtitleDownload"
|
||||
URI_DOWNLOAD_SUBTITLE = "Services/DownloadFile.ashx"
|
||||
|
||||
def initialize(self):
|
||||
logger.debug("ScrewZira initialize")
|
||||
self.session = Session()
|
||||
self.session.headers[
|
||||
'User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; ' \
|
||||
'Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
|
||||
|
||||
def terminate(self):
|
||||
logger.debug("ScrewZira terminate")
|
||||
self.session.close()
|
||||
|
||||
def __init__(self):
|
||||
self.session = None
|
||||
|
||||
def _search_series(self, title):
|
||||
logger.debug("Searching '{}'".format(title))
|
||||
title_request = {
|
||||
"request": {
|
||||
"SearchString": title,
|
||||
"SearchType": "Film"
|
||||
}
|
||||
}
|
||||
r = self.session.post(self.URL_SERVER + self.URI_SEARCH_TITLE, json=title_request, allow_redirects=False,
|
||||
timeout=10)
|
||||
r.raise_for_status()
|
||||
series_found = r.json()
|
||||
if 'd' in series_found:
|
||||
try:
|
||||
series_found = json.loads(series_found['d'])
|
||||
except ValueError:
|
||||
series_found = None
|
||||
if 'Items' in series_found:
|
||||
return series_found['Items']
|
||||
return []
|
||||
|
||||
def _search_subtitles(self, title_id, season=None, episode=None):
|
||||
if season and episode:
|
||||
params = {
|
||||
'moduleName': 'SubtitlesList',
|
||||
'SeriesID': title_id,
|
||||
'Season': season,
|
||||
'Episode': episode
|
||||
}
|
||||
r = self.session.get(url=self.URL_SERVER + self.URI_SEARCH_SERIES_SUBTITLE, params=params)
|
||||
else:
|
||||
params = {
|
||||
'ID': title_id,
|
||||
}
|
||||
r = self.session.get(url=self.URL_SERVER + self.URI_SEARCH_MOVIE_SUBTITLE, params=params)
|
||||
|
||||
r.raise_for_status()
|
||||
results = r.content
|
||||
if not results:
|
||||
return []
|
||||
subtitles = BeautifulSoup(results, 'html.parser').select('a.fa')
|
||||
logger.debug("[BS4] Elements found:\n{}".format(subtitles))
|
||||
subtitle_list = []
|
||||
for i in subtitles:
|
||||
subtitle_id = i.attrs['data-subtitle-id']
|
||||
release = i.findParent().findParent().text.strip().split('\n')[0]
|
||||
subtitle_list.append((subtitle_id, release))
|
||||
|
||||
return subtitle_list # [(Subtitle ID, name), (....)]
|
||||
|
||||
def _req_download_identifier(self, title_id, subtitle_id):
|
||||
logger.debug("Request subtitle identifier for: title id: {}, subtitle id: {}".format(title_id, subtitle_id))
|
||||
data = {
|
||||
'request': {
|
||||
'FilmID': title_id,
|
||||
'SubtitleID': subtitle_id,
|
||||
'FontSize': 0,
|
||||
'FontColor': "",
|
||||
'PredefinedLayout': -1
|
||||
}
|
||||
}
|
||||
|
||||
r = self.session.post(self.URL_SERVER + self.URI_REQ_SUBTITLE_ID, json=data, allow_redirects=False,
|
||||
timeout=10)
|
||||
r.raise_for_status()
|
||||
try:
|
||||
r = json.loads(r.json()['d'])
|
||||
except ValueError:
|
||||
r = {}
|
||||
|
||||
if 'DownloadIdentifier' not in r:
|
||||
logger.error("Download Identifier not found")
|
||||
return None
|
||||
return r['DownloadIdentifier']
|
||||
|
||||
def _download_subtitles(self, download_id):
|
||||
logger.debug("Downloading subtitles by download identifier - {}".format(download_id))
|
||||
data = {'DownloadIdentifier': download_id}
|
||||
r = self.session.get(self.URL_SERVER + self.URI_DOWNLOAD_SUBTITLE, params=data,
|
||||
timeout=10)
|
||||
r.raise_for_status()
|
||||
if not r.content:
|
||||
logger.debug("Download subtitle failed")
|
||||
return None
|
||||
|
||||
logger.debug("Download subtitle success")
|
||||
return r.content
|
||||
|
||||
def query(self, title, season=None, episode=None, year=None):
|
||||
subtitles = []
|
||||
titles = self._search_series(title)
|
||||
if season and episode:
|
||||
logger.debug("Searching for:\nTitle: {}\nSeason: {}\nEpisode: {}\nYear: {}".format(title, season,
|
||||
episode, year))
|
||||
else:
|
||||
logger.debug("Searching for:\nTitle: {}\nYear: {}\n".format(title, year))
|
||||
for title in titles:
|
||||
logger.debug("Title Candidate: {}".format(title))
|
||||
title_id = title['ID']
|
||||
if season and episode:
|
||||
result = self._search_subtitles(title_id, season, episode)
|
||||
else:
|
||||
result = self._search_subtitles(title_id)
|
||||
|
||||
if not result:
|
||||
continue
|
||||
|
||||
for subtitle_id, release in result:
|
||||
subtitles.append(self.subtitle_class(next(iter(self.languages)), title_id, subtitle_id,
|
||||
title['EngName'], season, episode, release, year))
|
||||
|
||||
if subtitles:
|
||||
logger.debug("Found Subtitle Candidates: {}".format(subtitles))
|
||||
return subtitles
|
||||
|
||||
def list_subtitles(self, video, languages):
|
||||
season = episode = year = title = None
|
||||
|
||||
if isinstance(video, Episode):
|
||||
logger.info("list_subtitles Series: {}, season: {}, episode: {}".format(video.series,
|
||||
video.season,
|
||||
video.episode))
|
||||
title = video.series
|
||||
season = video.season
|
||||
episode = video.episode
|
||||
elif isinstance(video, Movie):
|
||||
logger.info("list_subtitles Movie: {}, year: {}".format(video.title, video.year))
|
||||
title = video.title
|
||||
year = video.year
|
||||
|
||||
return [s for s in self.query(title, season, episode, year) if s.language in languages]
|
||||
|
||||
def download_subtitle(self, subtitle):
|
||||
# type: (ScrewZiraSubtitle) -> None
|
||||
|
||||
logger.info('Downloading subtitle from ScrewZira: %r', subtitle)
|
||||
downloadID = self._req_download_identifier(subtitle.title_id, subtitle.subtitle_id)
|
||||
if not downloadID:
|
||||
logger.debug('Unable to retrieve download identifier')
|
||||
return None
|
||||
|
||||
content = self._download_subtitles(downloadID)
|
||||
if not content:
|
||||
logger.debug('Unable to download subtitle')
|
||||
return None
|
||||
|
||||
subtitle.content = fix_line_ending(content)
|
||||
@@ -4,29 +4,34 @@ import io
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import requests
|
||||
|
||||
import inflect
|
||||
import cfscrape
|
||||
import re
|
||||
import json
|
||||
import HTMLParser
|
||||
import urlparse
|
||||
|
||||
from random import randint
|
||||
from zipfile import ZipFile
|
||||
|
||||
from babelfish import language_converters
|
||||
from guessit import guessit
|
||||
from dogpile.cache.api import NO_VALUE
|
||||
from subliminal import Episode, ProviderError
|
||||
from subliminal.cache import region
|
||||
from subliminal.exceptions import ConfigurationError, ServiceUnavailable
|
||||
from subliminal.utils import sanitize_release_group
|
||||
from subliminal.cache import region
|
||||
from subliminal_patch.http import RetryingCFSession
|
||||
from subliminal_patch.providers import Provider
|
||||
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
|
||||
from subliminal_patch.subtitle import Subtitle, guess_matches
|
||||
from subliminal_patch.converters.subscene import language_ids, supported_languages
|
||||
from subscene_api.subscene import search, Subtitle as APISubtitle
|
||||
from subscene_api.subscene import search, Subtitle as APISubtitle, SITE_DOMAIN
|
||||
from subzero.language import Language
|
||||
|
||||
p = inflect.engine()
|
||||
|
||||
|
||||
language_converters.register('subscene = subliminal_patch.converters.subscene:SubsceneConverter')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -117,28 +122,106 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
skip_wrong_fps = False
|
||||
hearing_impaired_verifiable = True
|
||||
only_foreign = False
|
||||
username = None
|
||||
password = None
|
||||
|
||||
search_throttle = 2 # seconds
|
||||
search_throttle = 8 # seconds
|
||||
|
||||
def __init__(self, only_foreign=False, username=None, password=None):
|
||||
if not all((username, password)):
|
||||
raise ConfigurationError('Username and password must be specified')
|
||||
|
||||
def __init__(self, only_foreign=False):
|
||||
self.only_foreign = only_foreign
|
||||
self.username = username
|
||||
self.password = password
|
||||
|
||||
def initialize(self):
|
||||
logger.info("Creating session")
|
||||
self.session = RetryingCFSession()
|
||||
|
||||
prev_cookies = region.get("subscene_cookies2")
|
||||
if prev_cookies != NO_VALUE:
|
||||
logger.debug("Re-using old subscene cookies: %r", prev_cookies)
|
||||
self.session.cookies.update(prev_cookies)
|
||||
|
||||
else:
|
||||
logger.debug("Logging in")
|
||||
self.login()
|
||||
|
||||
def login(self):
|
||||
r = self.session.get("https://subscene.com/account/login")
|
||||
if "Server Error" in r.content:
|
||||
logger.error("Login unavailable; Maintenance?")
|
||||
raise ServiceUnavailable("Login unavailable; Maintenance?")
|
||||
|
||||
match = re.search(r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.content)
|
||||
|
||||
if match:
|
||||
h = HTMLParser.HTMLParser()
|
||||
data = json.loads(h.unescape(match.group(1)))
|
||||
login_url = urlparse.urljoin(data["siteUrl"], data["loginUrl"])
|
||||
time.sleep(1.0)
|
||||
|
||||
r = self.session.post(login_url,
|
||||
{
|
||||
"username": self.username,
|
||||
"password": self.password,
|
||||
data["antiForgery"]["name"]: data["antiForgery"]["value"]
|
||||
})
|
||||
pep_content = re.search(r"<form method=\"post\" action=\"https://subscene\.com/\">"
|
||||
r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?"
|
||||
r"access_token\".+?value=\"(?P<access_token>.+?)\".+?"
|
||||
r"token_type.+?value=\"(?P<token_type>.+?)\".+?"
|
||||
r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?"
|
||||
r"scope.+?value=\"(?P<scope>.+?)\".+?"
|
||||
r"state.+?value=\"(?P<state>.+?)\".+?"
|
||||
r"session_state.+?value=\"(?P<session_state>.+?)\"",
|
||||
r.content, re.MULTILINE | re.DOTALL)
|
||||
|
||||
if pep_content:
|
||||
r = self.session.post(SITE_DOMAIN, pep_content.groupdict())
|
||||
try:
|
||||
r.raise_for_status()
|
||||
except Exception:
|
||||
raise ProviderError("Something went wrong when trying to log in: %s", traceback.format_exc())
|
||||
else:
|
||||
cj = self.session.cookies.copy()
|
||||
store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username")
|
||||
for cn in self.session.cookies.iterkeys():
|
||||
if cn not in store_cks:
|
||||
del cj[cn]
|
||||
|
||||
logger.debug("Storing cookies: %r", cj)
|
||||
region.set("subscene_cookies2", cj)
|
||||
return
|
||||
raise ProviderError("Something went wrong when trying to log in #1")
|
||||
|
||||
def terminate(self):
|
||||
logger.info("Closing session")
|
||||
self.session.close()
|
||||
|
||||
def _create_filters(self, languages):
|
||||
self.filters = dict(HearingImpaired="2")
|
||||
acc_filters = self.filters.copy()
|
||||
if self.only_foreign:
|
||||
self.filters["ForeignOnly"] = "True"
|
||||
acc_filters["ForeignOnly"] = self.filters["ForeignOnly"].lower()
|
||||
logger.info("Only searching for foreign/forced subtitles")
|
||||
|
||||
self.filters["LanguageFilter"] = ",".join((str(language_ids[l.alpha3]) for l in languages
|
||||
if l.alpha3 in language_ids))
|
||||
selected_ids = []
|
||||
for l in languages:
|
||||
lid = language_ids.get(l.basename, language_ids.get(l.alpha3, None))
|
||||
if lid:
|
||||
selected_ids.append(str(lid))
|
||||
|
||||
acc_filters["SelectedIds"] = selected_ids
|
||||
self.filters["LanguageFilter"] = ",".join(acc_filters["SelectedIds"])
|
||||
|
||||
last_filters = region.get("subscene_filters")
|
||||
if last_filters != acc_filters:
|
||||
region.set("subscene_filters", acc_filters)
|
||||
logger.debug("Setting account filters to %r", acc_filters)
|
||||
self.session.post("https://u.subscene.com/filter", acc_filters, allow_redirects=False)
|
||||
|
||||
logger.debug("Filter created: '%s'" % self.filters)
|
||||
|
||||
@@ -181,7 +264,11 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
def parse_results(self, video, film):
|
||||
subtitles = []
|
||||
for s in film.subtitles:
|
||||
subtitle = SubsceneSubtitle.from_api(s)
|
||||
try:
|
||||
subtitle = SubsceneSubtitle.from_api(s)
|
||||
except NotImplementedError, e:
|
||||
logger.info(e)
|
||||
continue
|
||||
subtitle.asked_for_release_group = video.release_group
|
||||
if isinstance(video, Episode):
|
||||
subtitle.asked_for_episode = video.episode
|
||||
@@ -194,10 +281,16 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
|
||||
return subtitles
|
||||
|
||||
def do_search(self, *args, **kwargs):
|
||||
try:
|
||||
return search(*args, **kwargs)
|
||||
except requests.HTTPError:
|
||||
region.delete("subscene_cookies2")
|
||||
|
||||
def query(self, video):
|
||||
vfn = get_video_filename(video)
|
||||
# vfn = get_video_filename(video)
|
||||
subtitles = []
|
||||
#logger.debug(u"Searching for: %s", vfn)
|
||||
# logger.debug(u"Searching for: %s", vfn)
|
||||
# film = search(vfn, session=self.session)
|
||||
#
|
||||
# if film and film.subtitles:
|
||||
@@ -206,16 +299,17 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
# else:
|
||||
# logger.debug('No release results found')
|
||||
|
||||
#time.sleep(self.search_throttle)
|
||||
# time.sleep(self.search_throttle)
|
||||
|
||||
# re-search for episodes without explicit release name
|
||||
if isinstance(video, Episode):
|
||||
#term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
|
||||
more_than_one = len([video.series] + video.alternative_series) > 1
|
||||
for series in [video.series] + video.alternative_series:
|
||||
titles = list(set([video.series] + video.alternative_series))[:2]
|
||||
# term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
|
||||
more_than_one = len(titles) > 1
|
||||
for series in titles:
|
||||
term = u"%s - %s Season" % (series, p.number_to_words("%sth" % video.season).capitalize())
|
||||
logger.debug('Searching for alternative results: %s', term)
|
||||
film = search(term, session=self.session, release=False)
|
||||
film = self.do_search(term, session=self.session, release=False, throttle=self.search_throttle)
|
||||
if film and film.subtitles:
|
||||
logger.debug('Alternative results found: %s', len(film.subtitles))
|
||||
subtitles += self.parse_results(video, film)
|
||||
@@ -223,25 +317,27 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
logger.debug('No alternative results found')
|
||||
|
||||
# packs
|
||||
if video.season_fully_aired:
|
||||
term = u"%s S%02i" % (series, video.season)
|
||||
logger.debug('Searching for packs: %s', term)
|
||||
time.sleep(self.search_throttle)
|
||||
film = search(term, session=self.session)
|
||||
if film and film.subtitles:
|
||||
logger.debug('Pack results found: %s', len(film.subtitles))
|
||||
subtitles += self.parse_results(video, film)
|
||||
else:
|
||||
logger.debug('No pack results found')
|
||||
else:
|
||||
logger.debug("Not searching for packs, because the season hasn't fully aired")
|
||||
# if video.season_fully_aired:
|
||||
# term = u"%s S%02i" % (series, video.season)
|
||||
# logger.debug('Searching for packs: %s', term)
|
||||
# time.sleep(self.search_throttle)
|
||||
# film = search(term, session=self.session, throttle=self.search_throttle)
|
||||
# if film and film.subtitles:
|
||||
# logger.debug('Pack results found: %s', len(film.subtitles))
|
||||
# subtitles += self.parse_results(video, film)
|
||||
# else:
|
||||
# logger.debug('No pack results found')
|
||||
# else:
|
||||
# logger.debug("Not searching for packs, because the season hasn't fully aired")
|
||||
if more_than_one:
|
||||
time.sleep(self.search_throttle)
|
||||
else:
|
||||
more_than_one = len([video.title] + video.alternative_titles) > 1
|
||||
for title in [video.title] + video.alternative_titles:
|
||||
logger.debug('Searching for movie results: %s', title)
|
||||
film = search(title, year=video.year, session=self.session, limit_to=None, release=False)
|
||||
titles = list(set([video.title] + video.alternative_titles))[:2]
|
||||
more_than_one = len(titles) > 1
|
||||
for title in titles:
|
||||
logger.debug('Searching for movie results: %r', title)
|
||||
film = self.do_search(title, year=video.year, session=self.session, limit_to=None, release=False,
|
||||
throttle=self.search_throttle)
|
||||
if film and film.subtitles:
|
||||
subtitles += self.parse_results(video, film)
|
||||
if more_than_one:
|
||||
|
||||
@@ -2,42 +2,35 @@
|
||||
|
||||
import io
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
import dateutil.parser
|
||||
|
||||
import rarfile
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from zipfile import ZipFile, is_zipfile
|
||||
from rarfile import RarFile, is_rarfile
|
||||
from babelfish import language_converters, Script
|
||||
from requests import RequestException
|
||||
from requests import RequestException, codes as request_codes
|
||||
from guessit import guessit
|
||||
from subliminal_patch.http import RetryingCFSession
|
||||
from subliminal_patch.providers import Provider
|
||||
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
|
||||
from subliminal_patch.subtitle import Subtitle
|
||||
from subliminal_patch.utils import sanitize, fix_inconsistent_naming as _fix_inconsistent_naming
|
||||
from subliminal.exceptions import ProviderError
|
||||
from subliminal.exceptions import ProviderError, AuthenticationError, ConfigurationError
|
||||
from subliminal.score import get_equivalent_release_groups
|
||||
from subliminal.utils import sanitize_release_group
|
||||
from subliminal.subtitle import guess_matches
|
||||
from subliminal.video import Episode, Movie
|
||||
from subliminal.subtitle import fix_line_ending
|
||||
from subliminal_patch.pitcher import pitchers, load_verification, store_verification
|
||||
from subzero.language import Language
|
||||
|
||||
from random import randint
|
||||
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
|
||||
from subzero.language import Language
|
||||
from dogpile.cache.api import NO_VALUE
|
||||
from subliminal.cache import region
|
||||
|
||||
# parsing regex definitions
|
||||
title_re = re.compile(r'(?P<title>(?:.+(?= [Aa][Kk][Aa] ))|.+)(?:(?:.+)(?P<altitle>(?<= [Aa][Kk][Aa] ).+))?')
|
||||
lang_re = re.compile(r'(?<=flags/)(?P<lang>.{2})(?:.)(?P<script>c?)(?:.+)')
|
||||
season_re = re.compile(r'Sezona (?P<season>\d+)')
|
||||
episode_re = re.compile(r'Epizoda (?P<episode>\d+)')
|
||||
year_re = re.compile(r'(?P<year>\d+)')
|
||||
fps_re = re.compile(r'fps: (?P<fps>.+)')
|
||||
|
||||
|
||||
def fix_inconsistent_naming(title):
|
||||
@@ -51,6 +44,7 @@ def fix_inconsistent_naming(title):
|
||||
return _fix_inconsistent_naming(title, {"DC's Legends of Tomorrow": "Legends of Tomorrow",
|
||||
"Marvel's Jessica Jones": "Jessica Jones"})
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configure :mod:`rarfile` to use the same path separator as :mod:`zipfile`
|
||||
@@ -62,9 +56,9 @@ language_converters.register('titlovi = subliminal_patch.converters.titlovi:Titl
|
||||
class TitloviSubtitle(Subtitle):
|
||||
provider_name = 'titlovi'
|
||||
|
||||
def __init__(self, language, page_link, download_link, sid, releases, title, alt_title=None, season=None,
|
||||
episode=None, year=None, fps=None, asked_for_release_group=None, asked_for_episode=None):
|
||||
super(TitloviSubtitle, self).__init__(language, page_link=page_link)
|
||||
def __init__(self, language, download_link, sid, releases, title, alt_title=None, season=None,
|
||||
episode=None, year=None, rating=None, download_count=None, asked_for_release_group=None, asked_for_episode=None):
|
||||
super(TitloviSubtitle, self).__init__(language)
|
||||
self.sid = sid
|
||||
self.releases = self.release_info = releases
|
||||
self.title = title
|
||||
@@ -73,11 +67,21 @@ class TitloviSubtitle(Subtitle):
|
||||
self.episode = episode
|
||||
self.year = year
|
||||
self.download_link = download_link
|
||||
self.fps = fps
|
||||
self.rating = rating
|
||||
self.download_count = download_count
|
||||
self.matches = None
|
||||
self.asked_for_release_group = asked_for_release_group
|
||||
self.asked_for_episode = asked_for_episode
|
||||
|
||||
def __repr__(self):
|
||||
if self.season and self.episode:
|
||||
return '<%s "%s (%r)" s%.2de%.2d [%s:%s] ID:%r R:%.2f D:%r>' % (
|
||||
self.__class__.__name__, self.title, self.year, self.season, self.episode, self.language, self._guessed_encoding, self.sid,
|
||||
self.rating, self.download_count)
|
||||
else:
|
||||
return '<%s "%s (%r)" [%s:%s] ID:%r R:%.2f D:%r>' % (
|
||||
self.__class__.__name__, self.title, self.year, self.language, self._guessed_encoding, self.sid, self.rating, self.download_count)
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self.sid
|
||||
@@ -134,20 +138,62 @@ class TitloviSubtitle(Subtitle):
|
||||
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
subtitle_class = TitloviSubtitle
|
||||
languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')}
|
||||
server_url = 'https://titlovi.com'
|
||||
search_url = server_url + '/titlovi/?'
|
||||
download_url = server_url + '/download/?type=1&mediaid='
|
||||
api_url = 'https://kodi.titlovi.com/api/subtitles'
|
||||
api_gettoken_url = api_url + '/gettoken'
|
||||
api_search_url = api_url + '/search'
|
||||
|
||||
def __init__(self, username=None, password=None):
|
||||
if not all((username, password)):
|
||||
raise ConfigurationError('Username and password must be specified')
|
||||
|
||||
self.username = username
|
||||
self.password = password
|
||||
|
||||
self.session = None
|
||||
|
||||
self.user_id = None
|
||||
self.login_token = None
|
||||
self.token_exp = None
|
||||
|
||||
def initialize(self):
|
||||
self.session = RetryingCFSession()
|
||||
load_verification("titlovi", self.session)
|
||||
#load_verification("titlovi", self.session)
|
||||
|
||||
token = region.get("titlovi_token")
|
||||
if token is not NO_VALUE:
|
||||
self.user_id, self.login_token, self.token_exp = token
|
||||
if datetime.now() > self.token_exp:
|
||||
logger.debug('Token expired')
|
||||
self.log_in()
|
||||
else:
|
||||
logger.debug('Use cached token')
|
||||
else:
|
||||
logger.debug('Token not found in cache')
|
||||
self.log_in()
|
||||
|
||||
def log_in(self):
|
||||
login_params = dict(username=self.username, password=self.password, json=True)
|
||||
try:
|
||||
response = self.session.post(self.api_gettoken_url, params=login_params)
|
||||
if response.status_code == request_codes.ok:
|
||||
resp_json = response.json()
|
||||
self.login_token = resp_json.get('Token')
|
||||
self.user_id = resp_json.get('UserId')
|
||||
self.token_exp = dateutil.parser.parse(resp_json.get('ExpirationDate'))
|
||||
|
||||
region.set("titlovi_token", [self.user_id, self.login_token, self.token_exp])
|
||||
logger.debug('New token obtained')
|
||||
|
||||
elif response.status_code == request_codes.unauthorized:
|
||||
raise AuthenticationError('Login failed')
|
||||
|
||||
except RequestException as e:
|
||||
logger.error(e)
|
||||
def terminate(self):
|
||||
self.session.close()
|
||||
|
||||
def query(self, languages, title, season=None, episode=None, year=None, video=None):
|
||||
items_per_page = 10
|
||||
current_page = 1
|
||||
def query(self, languages, title, season=None, episode=None, year=None, imdb_id=None, video=None):
|
||||
search_params = dict()
|
||||
|
||||
used_languages = languages
|
||||
lang_strings = [str(lang) for lang in used_languages]
|
||||
@@ -162,168 +208,73 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
langs = '|'.join(map(str, [l.titlovi for l in used_languages]))
|
||||
|
||||
# set query params
|
||||
params = {'prijevod': title, 'jezik': langs}
|
||||
search_params['query'] = title
|
||||
search_params['lang'] = langs
|
||||
is_episode = False
|
||||
if season and episode:
|
||||
is_episode = True
|
||||
params['s'] = season
|
||||
params['e'] = episode
|
||||
if year:
|
||||
params['g'] = year
|
||||
search_params['season'] = season
|
||||
search_params['episode'] = episode
|
||||
#if year:
|
||||
# search_params['year'] = year
|
||||
if imdb_id:
|
||||
search_params['imdbID'] = imdb_id
|
||||
|
||||
# loop through paginated results
|
||||
logger.info('Searching subtitles %r', params)
|
||||
logger.info('Searching subtitles %r', search_params)
|
||||
subtitles = []
|
||||
query_results = []
|
||||
|
||||
while True:
|
||||
# query the server
|
||||
try:
|
||||
r = self.session.get(self.search_url, params=params, timeout=10)
|
||||
r.raise_for_status()
|
||||
except RequestException as e:
|
||||
captcha_passed = False
|
||||
if e.response.status_code == 403 and "data-sitekey" in e.response.content:
|
||||
logger.info('titlovi: Solving captcha. This might take a couple of minutes, but should only '
|
||||
'happen once every so often')
|
||||
try:
|
||||
search_params['token'] = self.login_token
|
||||
search_params['userid'] = self.user_id
|
||||
search_params['json'] = True
|
||||
|
||||
site_key = re.search(r'data-sitekey="(.+?)"', e.response.content).group(1)
|
||||
challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', e.response.content).group(1)
|
||||
challenge_ray = re.search(r'data-ray="(.+?)"', e.response.content).group(1)
|
||||
if not all([site_key, challenge_s, challenge_ray]):
|
||||
raise Exception("titlovi: Captcha site-key not found!")
|
||||
response = self.session.get(self.api_search_url, params=search_params)
|
||||
resp_json = response.json()
|
||||
if resp_json['SubtitleResults']:
|
||||
query_results.extend(resp_json['SubtitleResults'])
|
||||
|
||||
pitcher = pitchers.get_pitcher()("titlovi", e.request.url, site_key,
|
||||
user_agent=self.session.headers["User-Agent"],
|
||||
cookies=self.session.cookies.get_dict(),
|
||||
is_invisible=True)
|
||||
|
||||
result = pitcher.throw()
|
||||
if not result:
|
||||
raise Exception("titlovi: Couldn't solve captcha!")
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
|
||||
s_params = {
|
||||
"s": challenge_s,
|
||||
"id": challenge_ray,
|
||||
"g-recaptcha-response": result,
|
||||
}
|
||||
r = self.session.get(self.server_url + "/cdn-cgi/l/chk_captcha", params=s_params, timeout=10,
|
||||
allow_redirects=False)
|
||||
r.raise_for_status()
|
||||
r = self.session.get(self.search_url, params=params, timeout=10)
|
||||
r.raise_for_status()
|
||||
store_verification("titlovi", self.session)
|
||||
captcha_passed = True
|
||||
for sub in query_results:
|
||||
|
||||
if not captcha_passed:
|
||||
logger.exception('RequestException %s', e)
|
||||
break
|
||||
# title and alternate title
|
||||
match = title_re.search(sub.get('Title'))
|
||||
if match:
|
||||
_title = match.group('title')
|
||||
alt_title = match.group('altitle')
|
||||
else:
|
||||
try:
|
||||
soup = BeautifulSoup(r.content, 'lxml')
|
||||
continue
|
||||
|
||||
# number of results
|
||||
result_count = int(soup.select_one('.results_count b').string)
|
||||
except:
|
||||
result_count = None
|
||||
# handle movies and series separately
|
||||
if is_episode:
|
||||
subtitle = self.subtitle_class(Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title,
|
||||
alt_title=alt_title, season=sub.get('Season'), episode=sub.get('Episode'),
|
||||
year=sub.get('Year'), rating=sub.get('Rating'),
|
||||
download_count=sub.get('DownloadCount'),
|
||||
asked_for_release_group=video.release_group,
|
||||
asked_for_episode=episode)
|
||||
else:
|
||||
subtitle = self.subtitle_class(Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title,
|
||||
alt_title=alt_title, year=sub.get('Year'), rating=sub.get('Rating'),
|
||||
download_count=sub.get('DownloadCount'),
|
||||
asked_for_release_group=video.release_group)
|
||||
logger.debug('Found subtitle %r', subtitle)
|
||||
|
||||
# exit if no results
|
||||
if not result_count:
|
||||
if not subtitles:
|
||||
logger.debug('No subtitles found')
|
||||
else:
|
||||
logger.debug("No more subtitles found")
|
||||
break
|
||||
# prime our matches so we can use the values later
|
||||
subtitle.get_matches(video)
|
||||
|
||||
# number of pages with results
|
||||
pages = int(math.ceil(result_count / float(items_per_page)))
|
||||
|
||||
# get current page
|
||||
if 'pg' in params:
|
||||
current_page = int(params['pg'])
|
||||
|
||||
try:
|
||||
sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
|
||||
for sub in sublist:
|
||||
# subtitle id
|
||||
sid = sub.find(attrs={'data-id': True}).attrs['data-id']
|
||||
# get download link
|
||||
download_link = self.download_url + sid
|
||||
# title and alternate title
|
||||
match = title_re.search(sub.a.string)
|
||||
if match:
|
||||
_title = match.group('title')
|
||||
alt_title = match.group('altitle')
|
||||
else:
|
||||
continue
|
||||
|
||||
# page link
|
||||
page_link = self.server_url + sub.a.attrs['href']
|
||||
# subtitle language
|
||||
match = lang_re.search(sub.select_one('.lang').attrs['src'])
|
||||
if match:
|
||||
try:
|
||||
# decode language
|
||||
lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# relase year or series start year
|
||||
match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
|
||||
if match:
|
||||
r_year = int(match.group('year'))
|
||||
# fps
|
||||
match = fps_re.search(sub.select_one('.fps').string)
|
||||
if match:
|
||||
fps = match.group('fps')
|
||||
# releases
|
||||
releases = str(sub.select_one('.fps').parent.contents[0].string)
|
||||
|
||||
# handle movies and series separately
|
||||
if is_episode:
|
||||
# season and episode info
|
||||
sxe = sub.select_one('.s0xe0y').string
|
||||
r_season = None
|
||||
r_episode = None
|
||||
if sxe:
|
||||
match = season_re.search(sxe)
|
||||
if match:
|
||||
r_season = int(match.group('season'))
|
||||
match = episode_re.search(sxe)
|
||||
if match:
|
||||
r_episode = int(match.group('episode'))
|
||||
|
||||
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
|
||||
alt_title=alt_title, season=r_season, episode=r_episode,
|
||||
year=r_year, fps=fps,
|
||||
asked_for_release_group=video.release_group,
|
||||
asked_for_episode=episode)
|
||||
else:
|
||||
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
|
||||
alt_title=alt_title, year=r_year, fps=fps,
|
||||
asked_for_release_group=video.release_group)
|
||||
logger.debug('Found subtitle %r', subtitle)
|
||||
|
||||
# prime our matches so we can use the values later
|
||||
subtitle.get_matches(video)
|
||||
|
||||
# add found subtitles
|
||||
subtitles.append(subtitle)
|
||||
|
||||
finally:
|
||||
soup.decompose()
|
||||
|
||||
# stop on last page
|
||||
if current_page >= pages:
|
||||
break
|
||||
|
||||
# increment current page
|
||||
params['pg'] = current_page + 1
|
||||
logger.debug('Getting page %d', params['pg'])
|
||||
# add found subtitles
|
||||
subtitles.append(subtitle)
|
||||
|
||||
return subtitles
|
||||
|
||||
def list_subtitles(self, video, languages):
|
||||
season = episode = None
|
||||
|
||||
if isinstance(video, Episode):
|
||||
title = video.series
|
||||
season = video.season
|
||||
@@ -333,6 +284,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
|
||||
return [s for s in
|
||||
self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year,
|
||||
imdb_id=video.imdb_id,
|
||||
video=video)]
|
||||
|
||||
def download_subtitle(self, subtitle):
|
||||
@@ -370,10 +322,12 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
sub_to_extract = None
|
||||
|
||||
for sub_name in subs_in_archive:
|
||||
if not ('.cyr' in sub_name or '.cir' in sub_name):
|
||||
_sub_name = sub_name.lower()
|
||||
|
||||
if not ('.cyr' in _sub_name or '.cir' in _sub_name or 'cyr)' in _sub_name):
|
||||
sr_lat_subs.append(sub_name)
|
||||
|
||||
if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name:
|
||||
if ('.cyr' in sub_name or '.cir' in _sub_name) and not '.lat' in _sub_name.lower():
|
||||
sr_cyr_subs.append(sub_name)
|
||||
|
||||
if subtitle.language == 'sr':
|
||||
|
||||
@@ -87,7 +87,10 @@ def refine(video, **kwargs):
|
||||
# parse series year
|
||||
series_year = None
|
||||
if result['firstAired']:
|
||||
series_year = datetime.datetime.strptime(result['firstAired'], '%Y-%m-%d').year
|
||||
try:
|
||||
series_year = datetime.datetime.strptime(result['firstAired'], '%Y-%m-%d').year
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# discard mismatches on year
|
||||
if video.year and series_year and video.year != series_year:
|
||||
|
||||
@@ -60,6 +60,8 @@ def compute_score(matches, subtitle, video, hearing_impaired=None):
|
||||
episode_hash_valid_if = {"series", "season", "episode", "format"}
|
||||
movie_hash_valid_if = {"video_codec", "format"}
|
||||
|
||||
orig_matches = matches.copy()
|
||||
|
||||
# on hash match, discard everything else
|
||||
if subtitle.hash_verifiable:
|
||||
if 'hash' in matches:
|
||||
@@ -83,41 +85,47 @@ def compute_score(matches, subtitle, video, hearing_impaired=None):
|
||||
matches &= {'hash'}
|
||||
|
||||
# handle equivalent matches
|
||||
eq_matches = set()
|
||||
if is_episode:
|
||||
if 'title' in matches:
|
||||
logger.debug('Adding title match equivalent')
|
||||
matches.add('episode')
|
||||
eq_matches.add('episode')
|
||||
if 'series_imdb_id' in matches:
|
||||
logger.debug('Adding series_imdb_id match equivalent')
|
||||
matches |= {'series', 'year'}
|
||||
eq_matches |= {'series', 'year'}
|
||||
if 'imdb_id' in matches:
|
||||
logger.debug('Adding imdb_id match equivalents')
|
||||
matches |= {'series', 'year', 'season', 'episode'}
|
||||
eq_matches |= {'series', 'year', 'season', 'episode'}
|
||||
if 'tvdb_id' in matches:
|
||||
logger.debug('Adding tvdb_id match equivalents')
|
||||
matches |= {'series', 'year', 'season', 'episode', 'title'}
|
||||
eq_matches |= {'series', 'year', 'season', 'episode', 'title'}
|
||||
if 'series_tvdb_id' in matches:
|
||||
logger.debug('Adding series_tvdb_id match equivalents')
|
||||
matches |= {'series', 'year'}
|
||||
eq_matches |= {'series', 'year'}
|
||||
|
||||
# specials
|
||||
if video.is_special and 'title' in matches and 'series' in matches \
|
||||
and 'year' in matches:
|
||||
logger.debug('Adding special title match equivalent')
|
||||
matches |= {'season', 'episode'}
|
||||
eq_matches |= {'season', 'episode'}
|
||||
|
||||
elif is_movie:
|
||||
if 'imdb_id' in matches:
|
||||
logger.debug('Adding imdb_id match equivalents')
|
||||
matches |= {'title', 'year'}
|
||||
eq_matches |= {'title', 'year'}
|
||||
|
||||
matches |= eq_matches
|
||||
|
||||
# handle hearing impaired
|
||||
if hearing_impaired is not None and subtitle.hearing_impaired == hearing_impaired:
|
||||
logger.debug('Matched hearing_impaired')
|
||||
matches.add('hearing_impaired')
|
||||
orig_matches.add('hearing_impaired')
|
||||
|
||||
# compute the score
|
||||
score = sum((scores.get(match, 0) for match in matches))
|
||||
logger.info('%r: Computed score %r with final matches %r', subtitle, score, matches)
|
||||
|
||||
return score
|
||||
score_without_hash = sum((scores.get(match, 0) for match in orig_matches | eq_matches if match != "hash"))
|
||||
|
||||
return score, score_without_hash
|
||||
|
||||
@@ -19,6 +19,15 @@ from subliminal import Subtitle as Subtitle_
|
||||
from subliminal.subtitle import Episode, Movie, sanitize_release_group, get_equivalent_release_groups
|
||||
from subliminal_patch.utils import sanitize
|
||||
from ftfy import fix_text
|
||||
from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE
|
||||
|
||||
BOMS = (
|
||||
(BOM_UTF8, "UTF-8"),
|
||||
(BOM_UTF32_BE, "UTF-32-BE"),
|
||||
(BOM_UTF32_LE, "UTF-32-LE"),
|
||||
(BOM_UTF16_BE, "UTF-16-BE"),
|
||||
(BOM_UTF16_LE, "UTF-16-LE"),
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -105,6 +114,9 @@ class Subtitle(Subtitle_):
|
||||
# normalize line endings
|
||||
self.content = self.content.replace("\r\n", "\n").replace('\r', '\n')
|
||||
|
||||
def _check_bom(self, data):
|
||||
return [encoding for bom, encoding in BOMS if data.startswith(bom)]
|
||||
|
||||
def guess_encoding(self):
|
||||
"""Guess encoding using the language, falling back on chardet.
|
||||
|
||||
@@ -119,11 +131,17 @@ class Subtitle(Subtitle_):
|
||||
|
||||
encodings = ['utf-8']
|
||||
|
||||
# check UTF BOMs
|
||||
bom_encodings = self._check_bom(self.content)
|
||||
if bom_encodings:
|
||||
encodings = list(set(enc.lower() for enc in bom_encodings + encodings))
|
||||
|
||||
# add language-specific encodings
|
||||
# http://scratchpad.wikia.com/wiki/Character_Encoding_Recommendation_for_Languages
|
||||
|
||||
if self.language.alpha3 == 'zho':
|
||||
encodings.extend(['cp936', 'gb2312', 'cp950', 'gb18030', 'big5', 'big5hkscs'])
|
||||
encodings.extend(['cp936', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp_2', 'cp950', 'gb18030', 'big5',
|
||||
'big5hkscs', 'utf-16'])
|
||||
elif self.language.alpha3 == 'jpn':
|
||||
encodings.extend(['shift-jis', 'cp932', 'euc_jp', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
|
||||
'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', ])
|
||||
@@ -132,7 +150,7 @@ class Subtitle(Subtitle_):
|
||||
|
||||
# arabian/farsi
|
||||
elif self.language.alpha3 in ('ara', 'fas', 'per'):
|
||||
encodings.append('windows-1256')
|
||||
encodings.extend(['windows-1256', 'utf-16'])
|
||||
elif self.language.alpha3 == 'heb':
|
||||
encodings.extend(['windows-1255', 'iso-8859-8'])
|
||||
elif self.language.alpha3 == 'tur':
|
||||
@@ -250,8 +268,7 @@ class Subtitle(Subtitle_):
|
||||
subs = pysubs2.SSAFile.from_string(text, fps=self.plex_media_fps)
|
||||
|
||||
unicontent = self.pysubs2_to_unicode(subs)
|
||||
self.content = unicontent.encode("utf-8")
|
||||
self._guessed_encoding = "utf-8"
|
||||
self.content = unicontent.encode(self._guessed_encoding)
|
||||
except:
|
||||
logger.exception("Couldn't convert subtitle %s to .srt format: %s", self, traceback.format_exc())
|
||||
return False
|
||||
@@ -261,6 +278,12 @@ class Subtitle(Subtitle_):
|
||||
|
||||
@classmethod
|
||||
def pysubs2_to_unicode(cls, sub, format="srt"):
|
||||
"""
|
||||
this is a modified version of pysubs2.SubripFormat.to_file with special handling for drawing tags in ASS
|
||||
:param sub:
|
||||
:param format:
|
||||
:return:
|
||||
"""
|
||||
def ms_to_timestamp(ms, mssep=","):
|
||||
"""Convert ms to 'HH:MM:SS,mmm'"""
|
||||
# XXX throw on overflow/underflow?
|
||||
@@ -272,9 +295,12 @@ class Subtitle(Subtitle_):
|
||||
def prepare_text(text, style):
|
||||
body = []
|
||||
for fragment, sty in parse_tags(text, style, sub.styles):
|
||||
fragment = fragment.replace(ur"\h", u" ")
|
||||
fragment = fragment.replace(ur"\n", u"\n")
|
||||
fragment = fragment.replace(ur"\N", u"\n")
|
||||
fragment = fragment.replace(r"\h", u" ")
|
||||
fragment = fragment.replace(r"\n", u"\n")
|
||||
fragment = fragment.replace(r"\N", u"\n")
|
||||
if sty.drawing:
|
||||
raise pysubs2.ContentNotUsable
|
||||
|
||||
if format == "srt":
|
||||
if sty.italic:
|
||||
fragment = u"<i>%s</i>" % fragment
|
||||
@@ -306,7 +332,10 @@ class Subtitle(Subtitle_):
|
||||
for i, line in enumerate(visible_lines, 1):
|
||||
start = ms_to_timestamp(line.start, mssep=mssep)
|
||||
end = ms_to_timestamp(line.end, mssep=mssep)
|
||||
text = prepare_text(line.text, sub.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
|
||||
try:
|
||||
text = prepare_text(line.text, sub.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
|
||||
except pysubs2.ContentNotUsable:
|
||||
continue
|
||||
|
||||
out.append(u"%d\n" % i)
|
||||
out.append(u"%s --> %s\n" % (start, end))
|
||||
@@ -319,7 +348,8 @@ class Subtitle(Subtitle_):
|
||||
:return: string
|
||||
"""
|
||||
if not self.mods:
|
||||
return fix_text(self.content.decode("utf-8"), **ftfy_defaults).encode(encoding="utf-8")
|
||||
return fix_text(self.content.decode(encoding=self._guessed_encoding), **ftfy_defaults).encode(
|
||||
encoding=self._guessed_encoding)
|
||||
|
||||
submods = SubtitleModifications(debug=debug)
|
||||
if submods.load(content=self.text, language=self.language):
|
||||
@@ -328,7 +358,7 @@ class Subtitle(Subtitle_):
|
||||
self.mods = submods.mods_used
|
||||
|
||||
content = fix_text(self.pysubs2_to_unicode(submods.f, format=format), **ftfy_defaults)\
|
||||
.encode(encoding="utf-8")
|
||||
.encode(encoding=self._guessed_encoding)
|
||||
submods.f = None
|
||||
del submods
|
||||
return content
|
||||
@@ -339,6 +369,15 @@ class ModifiedSubtitle(Subtitle):
|
||||
id = None
|
||||
|
||||
|
||||
MERGED_FORMATS = {
|
||||
"TV": ("HDTV", "SDTV", "AHDTV", "UHDTV"),
|
||||
"Air": ("SATRip", "DVB", "PPV"),
|
||||
"Disk": ("DVD", "HD-DVD", "BluRay")
|
||||
}
|
||||
|
||||
MERGED_FORMATS_REV = dict((v.lower(), k.lower()) for k in MERGED_FORMATS for v in MERGED_FORMATS[k])
|
||||
|
||||
|
||||
def guess_matches(video, guess, partial=False):
|
||||
"""Get matches between a `video` and a `guess`.
|
||||
|
||||
@@ -421,21 +460,25 @@ def guess_matches(video, guess, partial=False):
|
||||
formats = [formats]
|
||||
|
||||
if video.format:
|
||||
video_format = video.format
|
||||
if video_format in ("HDTV", "SDTV", "TV"):
|
||||
video_format = "TV"
|
||||
logger.debug("Treating HDTV/SDTV the same")
|
||||
video_format = video.format.lower()
|
||||
_video_gen_format = MERGED_FORMATS_REV.get(video_format)
|
||||
if _video_gen_format:
|
||||
logger.debug("Treating %s as %s the same", video_format, _video_gen_format)
|
||||
|
||||
for frmt in formats:
|
||||
if frmt in ("HDTV", "SDTV"):
|
||||
frmt = "TV"
|
||||
_guess_gen_frmt = MERGED_FORMATS_REV.get(frmt.lower())
|
||||
|
||||
if frmt.lower() == video_format.lower():
|
||||
if _guess_gen_frmt == _video_gen_format:
|
||||
matches.add('format')
|
||||
break
|
||||
if "release_group" in matches and "format" not in matches:
|
||||
logger.info("Release group matched but format didn't. Remnoving release group match.")
|
||||
matches.remove("release_group")
|
||||
|
||||
# video_codec
|
||||
if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec:
|
||||
matches.add('video_codec')
|
||||
|
||||
# audio_codec
|
||||
if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec:
|
||||
matches.add('audio_codec')
|
||||
|
||||
@@ -21,9 +21,10 @@ if debug:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
#sub = Subtitle(Language.fromietf("eng:forced"), mods=["common", "remove_HI", "OCR_fixes", "fix_uppercase", "shift_offset(ms=-500)", "shift_offset(ms=500)", "shift_offset(s=2,ms=800)"])
|
||||
sub = Subtitle(Language.fromietf("eng:forced"), mods=["common", "remove_HI", "OCR_fixes", "fix_uppercase", "shift_offset(ms=0,s=1)"])
|
||||
sub = Subtitle(Language.fromietf("eng"), mods=["common", "remove_HI", "OCR_fixes", "fix_uppercase", "shift_offset(ms=0,s=1)"])
|
||||
sub.content = open(fn).read()
|
||||
sub.normalize()
|
||||
sub.is_valid()
|
||||
content = sub.get_modified_content(debug=True)
|
||||
|
||||
#submod = SubMod(debug=debug)
|
||||
|
||||
@@ -28,6 +28,9 @@ import re
|
||||
|
||||
import enum
|
||||
import sys
|
||||
import requests
|
||||
import time
|
||||
import logging
|
||||
|
||||
is_PY2 = sys.version_info[0] < 3
|
||||
if is_PY2:
|
||||
@@ -37,8 +40,13 @@ else:
|
||||
from contextlib import suppress
|
||||
from urllib2.request import Request, urlopen
|
||||
|
||||
from dogpile.cache.api import NO_VALUE
|
||||
from subliminal.cache import region
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# constants
|
||||
HEADERS = {
|
||||
}
|
||||
@@ -48,14 +56,23 @@ DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWeb"\
|
||||
"Kit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
|
||||
|
||||
|
||||
ENDPOINT_RE = re.compile(ur'(?uis)<form.+?action="/subtitles/(.+)">.*?<input type="text"')
|
||||
|
||||
|
||||
class NewEndpoint(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# utils
|
||||
def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT):
|
||||
def soup_for(url, data=None, session=None, user_agent=DEFAULT_USER_AGENT):
|
||||
url = re.sub("\s", "+", url)
|
||||
if not session:
|
||||
r = Request(url, data=None, headers=dict(HEADERS, **{"User-Agent": user_agent}))
|
||||
html = urlopen(r).read().decode("utf-8")
|
||||
else:
|
||||
html = session.get(url).text
|
||||
ret = session.post(url, data=data)
|
||||
ret.raise_for_status()
|
||||
html = ret.text
|
||||
return BeautifulSoup(html, "html.parser")
|
||||
|
||||
|
||||
@@ -108,7 +125,7 @@ class Subtitle(object):
|
||||
subtitles = []
|
||||
|
||||
for row in rows:
|
||||
if row.td.a is not None:
|
||||
if row.td.a is not None and row.td.get("class", ["lazy"])[0] != "empty":
|
||||
subtitles.append(cls.from_row(row))
|
||||
|
||||
return subtitles
|
||||
@@ -238,22 +255,52 @@ def get_first_film(soup, section, year=None, session=None):
|
||||
url = SITE_DOMAIN + t.div.a.get("href")
|
||||
break
|
||||
if not url:
|
||||
return
|
||||
# fallback to non-year results
|
||||
logger.info("Falling back to non-year results as year wasn't found (%s)", year)
|
||||
url = SITE_DOMAIN + tag.findNext("ul").find("li").div.a.get("href")
|
||||
|
||||
return Film.from_url(url, session=session)
|
||||
|
||||
|
||||
def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact):
|
||||
soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, "release" if release else "title", term), session=session)
|
||||
def find_endpoint(session, content=None):
|
||||
endpoint = region.get("subscene_endpoint2")
|
||||
if endpoint is NO_VALUE:
|
||||
if not content:
|
||||
content = session.get(SITE_DOMAIN).text
|
||||
|
||||
if "Subtitle search by" in str(soup):
|
||||
rows = soup.find("table").tbody.find_all("tr")
|
||||
subtitles = Subtitle.from_rows(rows)
|
||||
return Film(term, subtitles=subtitles)
|
||||
m = ENDPOINT_RE.search(content)
|
||||
if m:
|
||||
endpoint = m.group(1).strip()
|
||||
logger.debug("Switching main endpoint to %s", endpoint)
|
||||
region.set("subscene_endpoint2", endpoint)
|
||||
return endpoint
|
||||
|
||||
for junk, search_type in SearchTypes.__members__.items():
|
||||
if section_exists(soup, search_type):
|
||||
return get_first_film(soup, search_type, year=year, session=session)
|
||||
|
||||
if limit_to == search_type:
|
||||
return
|
||||
def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact, throttle=0):
|
||||
# note to subscene: if you actually start to randomize the endpoint, we'll have to query your server even more
|
||||
|
||||
if release:
|
||||
endpoint = "release"
|
||||
else:
|
||||
endpoint = find_endpoint(session)
|
||||
time.sleep(throttle)
|
||||
|
||||
if not endpoint:
|
||||
logger.error("Couldn't find endpoint, exiting")
|
||||
return
|
||||
|
||||
soup = soup_for("%s/subtitles/%s" % (SITE_DOMAIN, endpoint), data={"query": term},
|
||||
session=session)
|
||||
|
||||
if soup:
|
||||
if "Subtitle search by" in str(soup):
|
||||
rows = soup.find("table").tbody.find_all("tr")
|
||||
subtitles = Subtitle.from_rows(rows)
|
||||
return Film(term, subtitles=subtitles)
|
||||
|
||||
for junk, search_type in SearchTypes.__members__.items():
|
||||
if section_exists(soup, search_type):
|
||||
return get_first_film(soup, search_type, year=year, session=session)
|
||||
|
||||
if limit_to == search_type:
|
||||
return
|
||||
|
||||
@@ -2,7 +2,8 @@
|
||||
|
||||
OS_PLEX_USERAGENT = 'plexapp.com v9.0'
|
||||
|
||||
DEPENDENCY_MODULE_NAMES = ['subliminal', 'subliminal_patch', 'enzyme', 'guessit', 'subzero', 'libfilebot', 'cfscrape']
|
||||
DEPENDENCY_MODULE_NAMES = ['subliminal', 'subliminal_patch', 'enzyme', 'guessit', 'subzero', 'libfilebot',
|
||||
'cloudscraper']
|
||||
PERSONAL_MEDIA_IDENTIFIER = "com.plexapp.agents.none"
|
||||
PLUGIN_IDENTIFIER_SHORT = "subzero"
|
||||
PLUGIN_IDENTIFIER = "com.plexapp.agents.%s" % PLUGIN_IDENTIFIER_SHORT
|
||||
|
||||
@@ -1,16 +1,42 @@
|
||||
# coding=utf-8
|
||||
import types
|
||||
import re
|
||||
|
||||
from babelfish.exceptions import LanguageError
|
||||
from babelfish import Language as Language_, basestr
|
||||
from babelfish import Language as Language_, basestr, LANGUAGE_MATRIX
|
||||
|
||||
repl_map = {
|
||||
"dk": "da",
|
||||
"nld": "nl",
|
||||
"english": "en",
|
||||
"alb": "sq",
|
||||
"arm": "hy",
|
||||
"baq": "eu",
|
||||
"bur": "my",
|
||||
"chi": "zh",
|
||||
"cze": "cs",
|
||||
"dut": "nl",
|
||||
"fre": "fr",
|
||||
"geo": "ka",
|
||||
"ger": "de",
|
||||
"gre": "el",
|
||||
"ice": "is",
|
||||
"mac": "mk",
|
||||
"mao": "mi",
|
||||
"may": "ms",
|
||||
"per": "fa",
|
||||
"rum": "ro",
|
||||
"slo": "sk",
|
||||
"tib": "bo",
|
||||
}
|
||||
|
||||
|
||||
ALPHA2_LIST = list(set(filter(lambda x: x, map(lambda x: x.alpha2, LANGUAGE_MATRIX)))) + list(repl_map.values())
|
||||
ALPHA3b_LIST = list(set(filter(lambda x: x, map(lambda x: x.alpha3, LANGUAGE_MATRIX)))) + \
|
||||
list(set(filter(lambda x: len(x) == 3, list(repl_map.keys()))))
|
||||
FULL_LANGUAGE_LIST = ALPHA2_LIST + ALPHA3b_LIST
|
||||
|
||||
|
||||
def language_from_stream(l):
|
||||
if not l:
|
||||
raise LanguageError()
|
||||
@@ -115,3 +141,16 @@ class Language(Language_):
|
||||
return Language(*Language_.fromietf(s).__getstate__())
|
||||
|
||||
return Language(*Language_.fromalpha3b(s).__getstate__())
|
||||
|
||||
|
||||
IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
|
||||
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
|
||||
|
||||
|
||||
def match_ietf_language(s, ietf=False):
|
||||
language_match = re.match(".+\.([^\.]+)$" if not ietf
|
||||
else IETF_MATCH, s)
|
||||
if language_match and len(language_match.groups()) == 1:
|
||||
language = language_match.groups()[0]
|
||||
return language
|
||||
return s
|
||||
|
||||
@@ -107,6 +107,12 @@ class Dicked(object):
|
||||
for key, value in entries.iteritems():
|
||||
self.__dict__[key] = (Dicked(**value) if isinstance(value, dict) else value)
|
||||
|
||||
def has(self, key):
|
||||
return self._entries is not None and key in self._entries
|
||||
|
||||
def get(self, key, default=None):
|
||||
return self._entries.get(key, default) if self._entries else default
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -36,6 +36,7 @@ SZ_FIX_DATA = {
|
||||
u" l ": u" I ",
|
||||
u"'sjust": u"'s just",
|
||||
u"'tjust": u"'t just",
|
||||
u"\";": u"'s",
|
||||
},
|
||||
"WholeWords": {
|
||||
u"I'11": u"I'll",
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
# coding=utf-8
|
||||
class EmptyEntryError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class EmptyLineError(Exception):
|
||||
pass
|
||||
@@ -6,7 +6,8 @@ import pysubs2
|
||||
import logging
|
||||
import time
|
||||
|
||||
from mods import EMPTY_TAG_PROCESSOR, EmptyEntryError
|
||||
from mods import EMPTY_TAG_PROCESSOR
|
||||
from exc import EmptyEntryError
|
||||
from registry import registry
|
||||
from subzero.language import Language
|
||||
|
||||
@@ -293,15 +294,18 @@ class SubtitleModifications(object):
|
||||
end_tag = line[-5:]
|
||||
line = line[:-5]
|
||||
|
||||
last_procs_mods = []
|
||||
|
||||
# fixme: this double loop is ugly
|
||||
for order, identifier, args in mods:
|
||||
mod = self.initialized_mods[identifier]
|
||||
|
||||
try:
|
||||
line = mod.modify(line.strip(), entry=entry.text, debug=self.debug, parent=self, index=index,
|
||||
line = mod.modify(line.strip(), entry=t, debug=self.debug, parent=self, index=index,
|
||||
**args)
|
||||
except EmptyEntryError:
|
||||
if self.debug:
|
||||
logger.debug(u"%d: %s: %r -> ''", index, identifier, entry.text)
|
||||
logger.debug(u"%d: %s: %r -> ''", index, identifier, t)
|
||||
skip_entry = True
|
||||
break
|
||||
|
||||
@@ -312,6 +316,33 @@ class SubtitleModifications(object):
|
||||
break
|
||||
|
||||
applied_mods.append(identifier)
|
||||
if mod.last_processors:
|
||||
last_procs_mods.append([identifier, args])
|
||||
|
||||
if skip_entry:
|
||||
lines = []
|
||||
break
|
||||
|
||||
if skip_line:
|
||||
continue
|
||||
|
||||
for identifier, args in last_procs_mods:
|
||||
mod = self.initialized_mods[identifier]
|
||||
|
||||
try:
|
||||
line = mod.modify(line.strip(), entry=t, debug=self.debug, parent=self, index=index,
|
||||
procs=["last_process"], **args)
|
||||
except EmptyEntryError:
|
||||
if self.debug:
|
||||
logger.debug(u"%d: %s: %r -> ''", index, identifier, t)
|
||||
skip_entry = True
|
||||
break
|
||||
|
||||
if not line:
|
||||
if self.debug:
|
||||
logger.debug(u"%d: %s: %r -> ''", index, identifier, old_line)
|
||||
skip_line = True
|
||||
break
|
||||
|
||||
if skip_entry:
|
||||
lines = []
|
||||
|
||||
@@ -21,6 +21,7 @@ class SubtitleModification(object):
|
||||
pre_processors = []
|
||||
processors = []
|
||||
post_processors = []
|
||||
last_processors = []
|
||||
languages = []
|
||||
|
||||
def __init__(self, parent):
|
||||
@@ -67,15 +68,16 @@ class SubtitleModification(object):
|
||||
def post_process(self, content, debug=False, parent=None, **kwargs):
|
||||
return self._process(content, self.post_processors, debug=debug, parent=parent, **kwargs)
|
||||
|
||||
def modify(self, content, debug=False, parent=None, **kwargs):
|
||||
def modify(self, content, debug=False, parent=None, procs=None, **kwargs):
|
||||
if not content:
|
||||
return
|
||||
|
||||
new_content = content
|
||||
for method in ("pre_process", "process", "post_process"):
|
||||
for method in procs or ("pre_process", "process", "post_process"):
|
||||
if not new_content:
|
||||
return
|
||||
new_content = getattr(self, method)(new_content, debug=debug, parent=parent, **kwargs)
|
||||
new_content = self._process(new_content, getattr(self, "%sors" % method),
|
||||
debug=debug, parent=parent, **kwargs)
|
||||
|
||||
return new_content
|
||||
|
||||
@@ -105,5 +107,3 @@ empty_line_post_processors = [
|
||||
]
|
||||
|
||||
|
||||
class EmptyEntryError(Exception):
|
||||
pass
|
||||
|
||||
@@ -7,6 +7,7 @@ from subzero.modification.mods import SubtitleTextModification, empty_line_post_
|
||||
from subzero.modification.processors import FuncProcessor
|
||||
from subzero.modification.processors.re_processor import NReProcessor
|
||||
from subzero.modification import registry
|
||||
from tld import get_tld
|
||||
|
||||
|
||||
ENGLISH = Language("eng")
|
||||
@@ -28,7 +29,7 @@ class CommonFixes(SubtitleTextModification):
|
||||
NReProcessor(re.compile(r'(?u)(\w|\b|\s|^)(-\s?-{1,2})'), ur"\1—", name="CM_multidash"),
|
||||
|
||||
# line = _/-/\s
|
||||
NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="CM_non_word_only"),
|
||||
NReProcessor(re.compile(r'(?u)(^\W*[-_.:<>~"\']+\W*$)'), "", name="CM_non_word_only"),
|
||||
|
||||
# remove >>
|
||||
NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
|
||||
@@ -37,7 +38,7 @@ class CommonFixes(SubtitleTextModification):
|
||||
NReProcessor(re.compile(r'(?u)(^\W*:\s*(?=\w+))'), "", name="CM_empty_colon_start"),
|
||||
|
||||
# fix music symbols
|
||||
NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'),
|
||||
NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s+)|(\s*[*#¶]+\s*$)'),
|
||||
lambda x: u"♪ " if x.group(1) else u" ♪",
|
||||
name="CM_music_symbols"),
|
||||
|
||||
@@ -113,7 +114,9 @@ class CommonFixes(SubtitleTextModification):
|
||||
NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=\w)) +([!?.,](?![!?.,]| \.))'), r"\1", name="CM_punctuation_space"),
|
||||
|
||||
# add space after punctuation
|
||||
NReProcessor(re.compile(r'(?u)([!?.,:])([A-zÀ-ž]{2,})'), r"\1 \2", name="CM_punctuation_space2"),
|
||||
NReProcessor(re.compile(r'(?u)(([^\s]*)([!?.,:])([A-zÀ-ž]{2,}))'),
|
||||
lambda match: u"%s%s %s" % (match.group(2), match.group(3), match.group(4)) if not get_tld(match.group(1), fail_silently=True, fix_protocol=True) else match.group(1),
|
||||
name="CM_punctuation_space2"),
|
||||
|
||||
# fix lowercase I in english
|
||||
NReProcessor(re.compile(r'(?u)(\b)i(\b)'), r"\1I\2", name="CM_EN_lowercase_i",
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
# coding=utf-8
|
||||
import re
|
||||
|
||||
from subzero.modification.mods import SubtitleTextModification, empty_line_post_processors, EmptyEntryError, TAG
|
||||
from subzero.modification.mods import SubtitleTextModification, empty_line_post_processors, TAG
|
||||
from subzero.modification.exc import EmptyEntryError
|
||||
from subzero.modification.processors.re_processor import NReProcessor
|
||||
from subzero.modification import registry
|
||||
|
||||
@@ -46,14 +47,14 @@ class HearingImpaired(SubtitleTextModification):
|
||||
name="HI_before_colon_noncaps"),
|
||||
|
||||
# brackets (only remove if at least 3 chars in brackets)
|
||||
NReProcessor(re.compile(ur'(?sux)-?%(t)s[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' %
|
||||
NReProcessor(re.compile(ur'(?sux)-?%(t)s["\']*[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]]["\']*[\s:]*%(t)s' %
|
||||
{"t": TAG}), "", name="HI_brackets"),
|
||||
|
||||
NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
|
||||
"", name="HI_bracket_open_start"),
|
||||
#NReProcessor(re.compile(ur'(?sux)-?%(t)s[([]%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+%(t)s$' % {"t": TAG}),
|
||||
# "", name="HI_bracket_open_start"),
|
||||
|
||||
NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
|
||||
name="HI_bracket_open_end"),
|
||||
#NReProcessor(re.compile(ur'(?sux)-?%(t)s(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' % {"t": TAG}), "",
|
||||
# name="HI_bracket_open_end"),
|
||||
|
||||
# text before colon (and possible dash in front), max 11 chars after the first whitespace (if any)
|
||||
# NReProcessor(re.compile(r'(?u)(^[A-z\-\'"_]+[\w\s]{0,11}:[^0-9{2}][\s]*)'), "", name="HI_before_colon"),
|
||||
@@ -73,7 +74,7 @@ class HearingImpaired(SubtitleTextModification):
|
||||
supported=lambda p: not p.only_uppercase),
|
||||
|
||||
# remove MAN:
|
||||
NReProcessor(re.compile(ur'(?suxi)(.*MAN:\s*)'), "", name="HI_remove_man"),
|
||||
NReProcessor(re.compile(ur'(?suxi)(\b(?:WO)MAN:\s*)'), "", name="HI_remove_man"),
|
||||
|
||||
# dash in front
|
||||
# NReProcessor(re.compile(r'(?u)^\s*-\s*'), "", name="HI_starting_dash"),
|
||||
@@ -81,13 +82,18 @@ class HearingImpaired(SubtitleTextModification):
|
||||
# all caps at start before new sentence
|
||||
NReProcessor(re.compile(ur'(?u)^(?=[A-ZÀ-Ž]{4,})[A-ZÀ-Ž-_\s]+\s([A-ZÀ-Ž][a-zà-ž].+)'), r"\1",
|
||||
name="HI_starting_upper_then_sentence", supported=lambda p: not p.only_uppercase),
|
||||
|
||||
# remove music symbols
|
||||
NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}),
|
||||
"", name="HI_music_symbols_only"),
|
||||
]
|
||||
|
||||
post_processors = empty_line_post_processors
|
||||
last_processors = [
|
||||
# remove music symbols
|
||||
NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}),
|
||||
"", name="HI_music_symbols_only"),
|
||||
|
||||
# remove music entries
|
||||
NReProcessor(re.compile(ur'(?ums)(^[-\s>~]*[*#¶♫♪]+\s*.+|.+\s*[*#¶♫♪]+\s*$)'),
|
||||
"", name="HI_music", entry=True),
|
||||
]
|
||||
|
||||
|
||||
registry.register(HearingImpaired)
|
||||
|
||||
@@ -10,7 +10,7 @@ class Processor(object):
|
||||
supported = None
|
||||
enabled = True
|
||||
|
||||
def __init__(self, name=None, parent=None, supported=None):
|
||||
def __init__(self, name=None, parent=None, supported=None, **kwargs):
|
||||
self.name = name
|
||||
self.parent = parent
|
||||
self.supported = supported if supported else lambda parent: True
|
||||
@@ -35,7 +35,7 @@ class Processor(object):
|
||||
class FuncProcessor(Processor):
|
||||
func = None
|
||||
|
||||
def __init__(self, func, name=None, parent=None, supported=None):
|
||||
def __init__(self, func, name=None, parent=None, supported=None, **kwargs):
|
||||
super(FuncProcessor, self).__init__(name=name, supported=supported)
|
||||
self.func = func
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user