Compare commits

..

16 Commits

Author SHA1 Message Date
Antoine Bertin f264092e74 Release v0.3 2011-08-18 08:55:19 +02:00
Antoine Bertin 052481c0a1 Add more checks before adjusting permissions 2011-08-18 08:55:05 +02:00
Antoine Bertin 4e3f622de0 Add possibility to choose mode of created files (chmod) 2011-08-17 22:15:10 +02:00
Antoine Bertin 143c46fff0 Fix encoding issues with logging 2011-08-17 21:16:47 +02:00
Antoine Bertin bc8b0c762d Add a script to ease subtitles download 2011-07-31 19:42:58 +02:00
Antoine Bertin 9ec92d1be4 Fix dependencies failure when installing package 2011-07-31 19:42:25 +02:00
Antoine Bertin 29ee43a67d Fix a bug when series is not guessed by guessit 2011-07-23 13:39:11 +02:00
Antoine Bertin 0f15f8f52d Revert to development version 2011-07-23 13:24:30 +02:00
Antoine Bertin 04fdd0bf4e Release v0.2 2011-07-11 20:42:16 +02:00
Antoine Bertin dff90cb90e Fix an encoding issue when passing unicode entries 2011-07-08 08:16:53 +02:00
Antoine Bertin a56f6db0ca Update Copyright notice 2011-07-07 22:57:34 +02:00
Antoine Bertin 95cae1213c Remove some extra logging 2011-07-07 22:57:09 +02:00
Antoine Bertin f4fb9dd9d6 Fix a bug where plugins didn't save to config file 2011-07-07 22:56:52 +02:00
Antoine Bertin 67a6647aa8 Switch version to master 2011-07-07 22:26:42 +02:00
Antoine Bertin bb355c214d License update 2011-07-07 08:51:48 +02:00
Antoine Bertin 9f298366a9 Initial commit 2011-07-05 02:27:11 +02:00
1966 changed files with 2598 additions and 422781 deletions
-3
View File
@@ -1,3 +0,0 @@
.gitattributes export-ignore
/Wiki export-ignore
.gitignore export-ignore
+4 -59
View File
@@ -1,59 +1,4 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
.settings
# Rope
.ropeproject
# Django stuff:
*.log
*.pot
# Sphinx documentation
docs/_build/
# pycharm
.idea
icon.psd
main-icon.psd
build
dist
subliminal.egg-info
*.pyc
View File
-1095
View File
File diff suppressed because it is too large Load Diff
-323
View File
@@ -1,323 +0,0 @@
# coding=utf-8
import sys
import datetime
from subzero.sandbox import fix_environment_stuff
module = sys.modules['__main__']
fix_environment_stuff(module, {})
globals = getattr(module, "__builtins__")["globals"]
for key, value in getattr(module, "__builtins__").iteritems():
if key != "globals":
globals()[key] = value
import logger
sys.modules["logger"] = logger
import support
import interface
sys.modules["interface"] = interface
from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
from interface.menu import *
from support.plex_media import media_to_videos, get_media_item_ids
from support.scanning import scan_videos
from support.storage import save_subtitles, store_subtitle_info, get_subtitle_storage
from support.items import is_wanted
from support.config import config
from support.lib import get_intent
from support.helpers import track_usage, get_title_for_video_metadata, get_identifier, cast_bool, \
audio_streams_match_languages
from support.history import get_history
from support.data import dispatch_migrate
from support.activities import activity
from support.download import download_best_subtitles
def Start():
HTTP.CacheTime = 0
HTTP.Headers['User-agent'] = OS_PLEX_USERAGENT
config.init_cache()
# clear expired intents
intent = get_intent()
intent.cleanup()
#Locale.DefaultLocale = "de"
# clear expired menu history items
now = datetime.datetime.now()
if "menu_history" in Dict:
for key, timeout in Dict["menu_history"].copy().items():
if now > timeout:
try:
del Dict["menu_history"][key]
except:
pass
# run migrations
if "subs" in Dict or "history" in Dict:
Thread.Create(dispatch_migrate)
# clear old task data
scheduler.clear_task_data()
# init defaults; perhaps not the best idea to use ValidatePrefs here, but we'll see
ValidatePrefs()
Log.Debug(config.full_version)
if not config.permissions_ok:
Log.Error("Insufficient permissions on library folders:")
for title, path in config.missing_permissions:
Log.Error("Insufficient permissions on library %s, folder: %s" % (title, path))
# run task scheduler
scheduler.run()
# bind activities
if config.enable_channel:
Thread.Create(activity.start)
if "anon_id" not in Dict:
Dict["anon_id"] = get_identifier()
# track usage
if cast_bool(Prefs["track_usage"]):
if "first_use" not in Dict:
Dict["first_use"] = datetime.datetime.utcnow()
Dict.Save()
track_usage("General", "plugin", "first_start", config.version)
track_usage("General", "plugin", "start", config.version)
def update_local_media(videos, ignore_parts_cleanup=None):
for video in videos:
support.localmedia.find_subtitles(video["plex_part"], ignore_parts_cleanup=ignore_parts_cleanup)
def agent_extract_embedded(video_part_map):
try:
subtitle_storage = get_subtitle_storage()
to_extract = []
item_count = 0
for scanned_video, part_info in video_part_map.iteritems():
plexapi_item = scanned_video.plexapi_metadata["item"]
stored_subs = subtitle_storage.load_or_new(plexapi_item)
valid_langs_in_media = audio_streams_match_languages(scanned_video, config.get_lang_list(ordered=True))
if not config.lang_list.difference(valid_langs_in_media):
Log.Debug("Skipping embedded subtitle extraction for %s, audio streams are in correct language(s)",
plexapi_item.rating_key)
continue
for plexapi_part in get_all_parts(plexapi_item):
item_count = item_count + 1
used_one_unknown_stream = False
for requested_language in config.lang_list:
embedded_subs = stored_subs.get_by_provider(plexapi_part.id, requested_language, "embedded")
current = stored_subs.get_any(plexapi_part.id, requested_language) or \
requested_language in scanned_video.external_subtitle_languages
if not embedded_subs:
stream_data = get_embedded_subtitle_streams(plexapi_part, requested_language=requested_language,
skip_unknown=used_one_unknown_stream)
if stream_data:
stream = stream_data[0]["stream"]
if stream_data[0]["is_unknown"]:
used_one_unknown_stream = True
to_extract.append(({scanned_video: part_info}, plexapi_part, str(stream.index),
str(requested_language), not current))
if not cast_bool(Prefs["subtitles.search_after_autoextract"]):
scanned_video.subtitle_languages.update({requested_language})
else:
Log.Debug("Skipping embedded subtitle extraction for %s, already got %r from %s",
plexapi_item.rating_key, requested_language, embedded_subs[0].id)
if to_extract:
Log.Info("Triggering extraction of %d embedded subtitles of %d items", len(to_extract), item_count)
Thread.Create(multi_extract_embedded, stream_list=to_extract, refresh=True, with_mods=True,
single_thread=not config.advanced.auto_extract_multithread)
except:
Log.Error("Something went wrong when auto-extracting subtitles, continuing: %s", traceback.format_exc())
class SubZeroAgent(object):
agent_type = None
agent_type_verbose = None
languages = [Locale.Language.English]
primary_provider = False
score_prefs_key = None
debounce = 10
def __init__(self, *args, **kwargs):
super(SubZeroAgent, self).__init__(*args, **kwargs)
self.agent_type = "movies" if isinstance(self, Agent.Movies) else "series"
self.name = "Sub-Zero Subtitles (%s, %s)" % (self.agent_type_verbose, config.get_version())
def search(self, results, media, lang):
Log.Debug("Sub-Zero %s, %s search" % (config.version, self.agent_type))
results.Append(MetadataSearchResult(id='null', score=100))
def store_blank_subtitle_metadata(self, video_part_map):
store_subtitle_info(video_part_map, dict((k, []) for k in video_part_map.keys()), None, mode="a")
def update(self, metadata, media, lang):
if not config.enable_agent:
Log.Debug("Skipping Sub-Zero agent(s)")
return
Log.Debug("Sub-Zero %s, %s update called" % (config.version, self.agent_type))
if not media:
Log.Error("Called with empty media, something is really wrong with your setup!")
return
intent = get_intent()
item_ids = []
try:
config.init_subliminal_patches()
all_videos = media_to_videos(media, kind=self.agent_type)
# media ignored?
ignore_parts_cleanup = []
videos = []
for video in all_videos:
if not is_wanted(video["id"], item=video["item"]):
Log.Debug(u'Skipping "%s"' % video["filename"])
ignore_parts_cleanup.append(video["path"])
continue
videos.append(video)
# find local media
update_local_media(all_videos, ignore_parts_cleanup=ignore_parts_cleanup)
if not videos:
Log.Debug(u"Nothing to do.")
return
try:
use_score = int(Prefs[self.score_prefs_key].strip())
except ValueError:
Log.Error("Please only put numbers into the scores setting. Exiting")
return
set_refresh_menu_state(media, media_type=self.agent_type)
# scanned_video_part_map = {subliminal.Video: plex_part, ...}
providers = config.get_providers(media_type=self.agent_type)
try:
scanned_video_part_map = scan_videos(videos, providers=providers)
except IOError, e:
Log.Exception("Permission error, please check your folder/file permissions. Exiting.")
if cast_bool(Prefs["check_permissions"]):
config.permissions_ok = False
config.missing_permissions = e.message
return
# auto extract embedded
if config.embedded_auto_extract:
if config.plex_transcoder:
agent_extract_embedded(scanned_video_part_map)
else:
Log.Warn("Plex Transcoder not found, can't auto extract")
# clear missing subtitles menu data
if not scheduler.is_task_running("MissingSubtitles"):
scheduler.clear_task_data("MissingSubtitles")
downloaded_subtitles = None
# debounce for self.debounce seconds
now = datetime.datetime.now()
if "last_call" in Dict:
last_call = Dict["last_call"]
if last_call + datetime.timedelta(seconds=self.debounce) > now:
wait = self.debounce - (now - last_call).seconds
if wait >= 1:
Log.Debug("Waiting %s seconds until continuing", wait)
Thread.Sleep(wait)
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
try:
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score,
throttle_time=self.debounce, providers=providers)
except:
Log.Exception("Something went wrong when downloading subtitles")
if downloaded_subtitles is not None:
Dict["last_call"] = datetime.datetime.now()
item_ids = get_media_item_ids(media, kind=self.agent_type)
downloaded_any = False
if downloaded_subtitles:
downloaded_any = any(downloaded_subtitles.values())
if downloaded_any:
save_successful = False
try:
save_successful = save_subtitles(scanned_video_part_map, downloaded_subtitles,
mods=config.default_mods)
except:
Log.Exception("Something went wrong when saving subtitles")
track_usage("Subtitle", "refreshed", "download", 1)
# store SZ meta info even if download wasn't successful
if not save_successful:
self.store_blank_subtitle_metadata(scanned_video_part_map)
else:
for video, video_subtitles in downloaded_subtitles.items():
# store item(s) in history
for subtitle in video_subtitles:
history = get_history()
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
thumb=video.plexapi_metadata["super_thumb"],
subtitle=subtitle)
history.destroy()
else:
# store SZ meta info even if we've downloaded none
self.store_blank_subtitle_metadata(scanned_video_part_map)
update_local_media(videos)
finally:
# update the menu state
set_refresh_menu_state(None)
# notify any running tasks about our finished update
for item_id in item_ids:
#scheduler.signal("updated_metadata", item_id)
# resolve existing intent for that id
intent.resolve("force", item_id)
Dict.Save()
# fsync cache
if config.new_style_cache:
config.sync_cache()
class SubZeroSubtitlesAgentMovies(SubZeroAgent, Agent.Movies):
contributes_to = ['com.plexapp.agents.imdb', 'com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.themoviedb', 'com.plexapp.agents.hama']
score_prefs_key = "subtitles.search.minimumMovieScore2"
agent_type_verbose = "Movies"
class SubZeroSubtitlesAgentTvShows(SubZeroAgent, Agent.TV_Shows):
contributes_to = ['com.plexapp.agents.thetvdb', 'com.plexapp.agents.themoviedb',
'com.plexapp.agents.thetvdbdvdorder', 'com.plexapp.agents.xbmcnfotv', 'com.plexapp.agents.hama']
score_prefs_key = "subtitles.search.minimumTVScore2"
agent_type_verbose = "TV"
-23
View File
@@ -1,23 +0,0 @@
import sys
import menu
sys.modules["interface.menu"] = menu
sys.modules["menu"] = menu
import menu_helpers
sys.modules["interface.menu_helpers"] = menu_helpers
import advanced
sys.modules["interface.advanced"] = advanced
import main
sys.modules["interface.main"] = main
import refresh_item
sys.modules["interface.refresh_item"] = refresh_item
import item_details
sys.modules["interface.item_details"] = item_details
import sub_mod
sys.modules["interface.modification"] = sub_mod
-454
View File
@@ -1,454 +0,0 @@
# coding=utf-8
import datetime
import StringIO
import glob
import os
import traceback
import urlparse
from zipfile import ZipFile, ZIP_DEFLATED
from subzero.language import Language
from subzero.lib.io import FileIO
from subzero.constants import PREFIX, PLUGIN_IDENTIFIER
from menu_helpers import SubFolderObjectContainer, debounce, set_refresh_menu_state, ZipObject, ObjectContainer, route
from main import fatality
from support.helpers import timestamp, pad_title
from support.config import config
from support.lib import Plex
from support.storage import reset_storage, log_storage, get_subtitle_storage
from support.scheduler import scheduler
from support.items import set_mods_for_part, get_item_kind_from_rating_key
from support.i18n import _
@route(PREFIX + '/advanced')
def AdvancedMenu(randomize=None, header=None, message=None):
oc = SubFolderObjectContainer(
header=header or _("Internal stuff, pay attention!"),
message=message,
no_cache=True,
no_history=True,
replace_parent=False,
title2=_("Advanced"))
if config.lock_advanced_menu and not config.pin_correct:
oc.add(DirectoryObject(
key=Callback(
PinMenu,
randomize=timestamp(),
success_go_to=_("advanced")),
title=pad_title(_("Enter PIN")),
summary=_("The owner has restricted the access to this menu. Please enter the correct pin"),
))
return oc
oc.add(DirectoryObject(
key=Callback(TriggerRestart, randomize=timestamp()),
title=pad_title(_("Restart the plugin")),
))
oc.add(DirectoryObject(
key=Callback(GetLogsLink),
title=_("Get my logs (copy the appearing link and open it in your browser, please)"),
summary=_("Copy the appearing link and open it in your browser, please"),
))
oc.add(DirectoryObject(
key=Callback(TriggerBetterSubtitles, randomize=timestamp()),
title=pad_title(_("Trigger find better subtitles")),
))
oc.add(DirectoryObject(
key=Callback(SkipFindBetterSubtitles, randomize=timestamp()),
title=pad_title(_("Skip next find better subtitles (sets last run to now)")),
))
oc.add(DirectoryObject(
key=Callback(SkipRecentlyAddedMissing, randomize=timestamp()),
title=pad_title(_("Skip next find recently added with missing subtitles (sets last run to now)")),
))
oc.add(DirectoryObject(
key=Callback(TriggerStorageMaintenance, randomize=timestamp()),
title=pad_title(_("Trigger subtitle storage maintenance")),
))
oc.add(DirectoryObject(
key=Callback(TriggerStorageMigration, randomize=timestamp()),
title=pad_title(_("Trigger subtitle storage migration (expensive)")),
))
oc.add(DirectoryObject(
key=Callback(TriggerCacheMaintenance, randomize=timestamp()),
title=pad_title(_("Trigger cache maintenance (refiners, providers and packs/archives)")),
))
oc.add(DirectoryObject(
key=Callback(ApplyDefaultMods, randomize=timestamp()),
title=pad_title(_("Apply configured default subtitle mods to all (active) stored subtitles")),
))
oc.add(DirectoryObject(
key=Callback(ReApplyMods, randomize=timestamp()),
title=pad_title(_("Re-Apply mods of all stored subtitles")),
))
oc.add(DirectoryObject(
key=Callback(LogStorage, key="tasks", randomize=timestamp()),
title=pad_title(_("Log the plugin's scheduled tasks state storage")),
))
oc.add(DirectoryObject(
key=Callback(LogStorage, key="ignore", randomize=timestamp()),
title=pad_title(_("Log the plugin's internal ignorelist storage")),
))
oc.add(DirectoryObject(
key=Callback(LogStorage, key=None, randomize=timestamp()),
title=pad_title(_("Log the plugin's complete state storage")),
))
oc.add(DirectoryObject(
key=Callback(ResetStorage, key="tasks", randomize=timestamp()),
title=pad_title(_("Reset the plugin's scheduled tasks state storage")),
))
oc.add(DirectoryObject(
key=Callback(ResetStorage, key="ignore", randomize=timestamp()),
title=pad_title(_("Reset the plugin's internal ignorelist storage")),
))
oc.add(DirectoryObject(
key=Callback(ResetStorage, key="menu_history", randomize=timestamp()),
title=pad_title(_("Reset the plugin's menu history storage")),
))
oc.add(DirectoryObject(
key=Callback(InvalidateCache, randomize=timestamp()),
title=pad_title(_("Invalidate Sub-Zero metadata caches (subliminal)")),
))
oc.add(DirectoryObject(
key=Callback(ResetProviderThrottle, randomize=timestamp()),
title=pad_title(_("Reset provider throttle states")),
))
return oc
def DispatchRestart():
Thread.CreateTimer(1.0, Restart)
@route(PREFIX + '/advanced/restart/trigger')
@debounce
def TriggerRestart(randomize=None):
set_refresh_menu_state(_("Restarting the plugin"))
DispatchRestart()
return fatality(
header=_("Restart triggered, please wait about 5 seconds"),
force_title=" ",
only_refresh=True,
replace_parent=True,
no_history=True,
randomize=timestamp())
@route(PREFIX + '/advanced/restart/execute')
@debounce
def Restart(randomize=None):
Plex[":/plugins"].restart(PLUGIN_IDENTIFIER)
@route(PREFIX + '/storage/reset', sure=bool)
@debounce
def ResetStorage(key, randomize=None, sure=False):
if not sure:
oc = SubFolderObjectContainer(
no_history=True,
title1=_("Reset subtitle storage"),
title2=_("Are you sure?"))
oc.add(DirectoryObject(
key=Callback(
ResetStorage,
key=key,
sure=True,
randomize=timestamp()),
title=pad_title(_("Are you really sure?")),
))
return oc
reset_storage(key)
if key == "tasks":
# reinitialize the scheduler
scheduler.init_storage()
scheduler.setup_tasks()
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("Information Storage (%s) reset", key)
)
@route(PREFIX + '/storage/log')
def LogStorage(key, randomize=None):
log_storage(key)
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("Information Storage (%s) logged", key)
)
@route(PREFIX + '/triggerbetter')
@debounce
def TriggerBetterSubtitles(randomize=None):
scheduler.dispatch_task("FindBetterSubtitles")
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("FindBetterSubtitles triggered")
)
@route(PREFIX + '/skipbetter')
@debounce
def SkipFindBetterSubtitles(randomize=None):
task = scheduler.task("FindBetterSubtitles")
task.last_run = datetime.datetime.now()
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("FindBetterSubtitles skipped")
)
@route(PREFIX + '/skipram')
@debounce
def SkipRecentlyAddedMissing(randomize=None):
task = scheduler.task("SearchAllRecentlyAddedMissing")
task.last_run = datetime.datetime.now()
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("SearchAllRecentlyAddedMissing skipped")
)
@route(PREFIX + '/triggermaintenance')
@debounce
def TriggerStorageMaintenance(randomize=None):
scheduler.dispatch_task("SubtitleStorageMaintenance")
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("SubtitleStorageMaintenance triggered")
)
@route(PREFIX + '/triggerstoragemigration')
@debounce
def TriggerStorageMigration(randomize=None):
scheduler.dispatch_task("MigrateSubtitleStorage")
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("MigrateSubtitleStorage triggered")
)
@route(PREFIX + '/triggercachemaintenance')
@debounce
def TriggerCacheMaintenance(randomize=None):
scheduler.dispatch_task("CacheMaintenance")
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("TriggerCacheMaintenance triggered")
)
def apply_default_mods(reapply_current=False, scandir_generic=False):
storage = get_subtitle_storage()
subs_applied = 0
try:
for fn in storage.get_all_files(scandir_generic=scandir_generic):
data = storage.load(None, filename=fn)
if data:
video_id = data.video_id
item_type = get_item_kind_from_rating_key(video_id)
if not item_type:
continue
for part_id, part in data.parts.iteritems():
for lang, subs in part.iteritems():
current_sub = subs.get("current")
if not current_sub:
continue
sub = subs[current_sub]
if not sub.content:
continue
current_mods = sub.mods or []
if not reapply_current:
add_mods = list(set(config.default_mods).difference(set(current_mods)))
if not add_mods:
continue
else:
if not current_mods:
continue
add_mods = []
try:
set_mods_for_part(video_id, part_id, Language.fromietf(lang), item_type, add_mods, mode="add")
except:
Log.Error("Couldn't set mods for %s:%s: %s", video_id, part_id, traceback.format_exc())
continue
subs_applied += 1
except OSError:
return apply_default_mods(reapply_current=reapply_current, scandir_generic=True)
storage.destroy()
Log.Debug("Applied mods to %i items" % subs_applied)
@route(PREFIX + '/applydefaultmods')
@debounce
def ApplyDefaultMods(randomize=None):
Thread.CreateTimer(1.0, apply_default_mods)
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("This may take some time ...")
)
@route(PREFIX + '/reapplyallmods')
@debounce
def ReApplyMods(randomize=None):
Thread.CreateTimer(1.0, apply_default_mods, reapply_current=True)
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("This may take some time ...")
)
@route(PREFIX + '/get_logs_link')
def GetLogsLink():
if not config.plex_token:
oc = ObjectContainer(
title2=_("Download Logs"),
no_cache=True,
no_history=True,
header=_("Sorry, feature unavailable"),
message=_("Universal Plex token not available"))
return oc
# try getting the link base via the request in context, first, otherwise use the public ip
req_headers = Core.sandbox.context.request.headers
get_external_ip = True
link_base = ""
if "Origin" in req_headers:
link_base = req_headers["Origin"]
Log.Debug("Using origin-based link_base")
get_external_ip = False
elif "Referer" in req_headers:
parsed = urlparse.urlparse(req_headers["Referer"])
link_base = "%s://%s%s" % (parsed.scheme, parsed.hostname, (":%s" % parsed.port) if parsed.port else "")
Log.Debug("Using referer-based link_base")
get_external_ip = False
if get_external_ip or "plex.tv" in link_base:
ip = Core.networking.http_request("http://www.plexapp.com/ip.php", cacheTime=7200).content.strip()
link_base = "https://%s:32400" % ip
Log.Debug("Using ip-based fallback link_base")
logs_link = "%s%s?X-Plex-Token=%s" % (link_base, PREFIX + '/logs', config.plex_token)
oc = ObjectContainer(
title2=logs_link,
no_cache=True,
no_history=True,
header=_("Copy this link and open this in your browser, please"),
message=logs_link)
return oc
@route(PREFIX + '/logs')
def DownloadLogs():
buff = StringIO.StringIO()
zip_archive = ZipFile(buff, mode='w', compression=ZIP_DEFLATED)
logs = sorted(glob.glob(config.plugin_log_path + '*')) + [config.server_log_path]
for path in logs:
data = StringIO.StringIO()
data.write(FileIO.read(path))
zip_archive.writestr(os.path.basename(path), data.getvalue())
zip_archive.close()
return ZipObject(buff.getvalue())
@route(PREFIX + '/invalidatecache')
@debounce
def InvalidateCache(randomize=None):
from subliminal.cache import region
if config.new_style_cache:
region.backend.clear()
else:
region.invalidate()
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("Cache invalidated")
)
@route(PREFIX + '/pin')
def PinMenu(pin="", randomize=None, success_go_to="channel"):
oc = ObjectContainer(
title2=_("Enter PIN number ") + str(len(pin) + 1),
no_cache=True,
no_history=True,
skip_pin_lock=True)
if pin == config.pin:
Dict["pin_correct_time"] = datetime.datetime.now()
config.locked = False
if success_go_to == "channel":
return fatality(
force_title=_("PIN correct"),
header=_("PIN correct"),
no_history=True)
elif success_go_to == "advanced":
return AdvancedMenu(randomize=timestamp())
for i in range(10):
oc.add(DirectoryObject(
key=Callback(
PinMenu,
randomize=timestamp(),
pin=pin + str(i),
success_go_to=success_go_to),
title=pad_title(str(i)),
))
oc.add(DirectoryObject(
key=Callback(
PinMenu,
randomize=timestamp(),
success_go_to=success_go_to),
title=pad_title(_("Reset")),
))
return oc
@route(PREFIX + '/pin_lock')
def ClearPin(randomize=None):
Dict["pin_correct_time"] = None
config.locked = True
return fatality(force_title=_("Menu locked"), header=" ", no_history=True)
@route(PREFIX + '/reset_throttle')
def ResetProviderThrottle(randomize=None):
Dict["provider_throttle"] = {}
Dict.Save()
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("Provider throttles reset")
)
-185
View File
@@ -1,185 +0,0 @@
# coding=utf-8
import datetime
import operator
from support.config import config
from support.helpers import timestamp
def enable_channel_wrapper(func, enforce_route=False):
"""
returns the original wrapper :func: (route or handler) if applicable, else the plain to-be-wrapped function
:param func: original wrapper
:return: original wrapper or wrapped function
"""
def noop(*args, **kwargs):
def inner(*a, **k):
"""
:param a: args
:param k: kwargs
:return: originally to-be-wrapped function
"""
return a[0] if len(a) else a
return inner
def wrap(*args, **kwargs):
return (func if (config.enable_channel or enforce_route) else noop)(*args, **kwargs)
return wrap
ROUTE_REGISTRY = {}
def get_func_name(args):
return list(args).pop(0).__name__
def get_lookup_key(f, args, kwargs):
return tuple([f.__name__, tuple(args), tuple([(key, value) for key, value in kwargs.iteritems()])])
def should_debounce(f, key, kw):
return getattr(f, "debounce", False) and "randomize" in kw and key in Dict["menu_history"]
def register_route_function(f):
fn = f.__name__
if fn != "ValidatePrefs" and fn not in ROUTE_REGISTRY:
ROUTE_REGISTRY[fn] = f
return f
def main_menu_fallback():
key = get_lookup_key(ROUTE_REGISTRY["fatality"], [], {})
Dict["last_menu_item"] = key
add_to_menu_history(key)
return ROUTE_REGISTRY["fatality"](randomize=timestamp())
def add_to_menu_history(key):
# add function to menu history
mh = Dict["menu_history"]
if key in mh:
del mh[key]
mh[key] = datetime.datetime.now() + datetime.timedelta(hours=6)
# limit to 25 items
Dict["menu_history"] = dict(sorted(sorted(mh.items(), key=operator.itemgetter(1),
reverse=True)[:25]))
try:
Dict.Save()
except TypeError:
Log.Error("Can't save menu history for: %r", key)
del Dict["menu_history"][key]
def route_wrapper(*args, **kwargs):
def wrap(f):
already_wrapped = getattr(f, "orig_f", False)
register_route_function(f)
def inner(*a, **kw):
if "menu_history" not in Dict:
Dict["menu_history"] = {}
if "last_menu_item" not in Dict:
Dict["last_menu_item"] = None
key = get_lookup_key(f, list(a), kw)
ret_f = f
ret_a = a
ret_kw = kw
# mh = Dict["menu_history"]
# mh_keys = [k for k, v in sorted(mh.items(), key=operator.itemgetter(1))]
#
# fallback_needed = False
# fallback_found = False
if should_debounce(ret_f, key, kw):
# special case for TriggerRestart
if ret_f.__name__ in ("TriggerRestart", "Restart"):
Log.Debug("Don't trigger a re-restart, falling back to main menu")
else:
Log.Debug("not triggering %s twice with %s, %s, returning to main menu" %
(f.__name__, a, kw))
return main_menu_fallback()
#
# fallback_needed = True
#
# # try to find a suitable fallback route in case we've encountered an already visited
# # debounced route
# fallbacks = []
# current_last_visit = mh[key]
# last_menu_item = Dict["last_menu_item"]
# direction_backwards = True
#
# if last_menu_item and last_menu_item in mh and key in mh:
# last_mi_pos = mh_keys.index(last_menu_item)
# current_mi_pos = mh_keys.index(key)
# if current_mi_pos > -1 and last_mi_pos > -1:
# print "SHEKEL", current_mi_pos, last_mi_pos, current_mi_pos < last_mi_pos
# only consider items in menu history that have an older timestamp than the current
# for key_, last_visit in sorted(mh.items(), key=operator.itemgetter(1),
# reverse=True):
# if last_visit < current_last_visit:
# fallbacks.append(key_)
#
# for key_ in fallbacks:
# # old data structure
# if not len(key_) == 3 or not (isinstance(key_[1], tuple) and isinstance(key_[2], tuple)):
# continue
#
# old_f, old_a, old_kw = key_
# if old_f == "ValidatePrefs":
# continue
#
# possible_fallback = ROUTE_REGISTRY[old_f]
#
# # non-debounced function found
# if not getattr(possible_fallback, "debounce", False):
# ret_kw = dict(old_kw)
# ret_a = old_a
# if "randomize" in ret_kw:
# ret_kw["randomize"] = timestamp()
#
# ret_f = possible_fallback
# key = get_lookup_key(ret_f, list(ret_a), ret_kw)
# fallback_found = True
#
# Log.Debug("not triggering %s twice with %s, %s, returning to %s, %s, %s" %
# (f.__name__, a, kw, ret_f.__name__, ret_a, ret_kw))
#
# break
#
# if not fallback_found:
# Log.Debug("No fallback found in menu history for %s, falling back to main menu", f)
# return main_menu_fallback()
# if not fallback_needed:
# add_to_menu_history(key)
# if ret_f.__name__ != "ValidatePrefs":
# Dict["last_menu_item"] = key
#
add_to_menu_history(key)
Dict["last_menu_item"] = key
return ret_f(*ret_a, **ret_kw)
# @route may be used multiple times
enforce_route = kwargs.pop("enforce_route", None)
if not already_wrapped:
inner.orig_f = f
return enable_channel_wrapper(route(*args, **kwargs), enforce_route=enforce_route)(inner)
return enable_channel_wrapper(route(*args, **kwargs), enforce_route=enforce_route)(f)
return wrap
-722
View File
@@ -1,722 +0,0 @@
# coding=utf-8
import os
from collections import OrderedDict
from subzero.language import Language
from sub_mod import SubtitleModificationsMenu
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_incl_excl_options, get_item_task_data, \
set_refresh_menu_state, route, extract_embedded_sub
from refresh_item import RefreshItem
from subzero.constants import PREFIX
from support.config import config, TEXT_SUBTITLE_EXTS
from support.helpers import timestamp, df, get_language, display_language, get_language_from_stream, is_stream_forced
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub, get_item_title, save_stored_sub
from support.plex_media import get_plex_metadata, get_part, get_embedded_subtitle_streams
from support.scanning import scan_videos
from support.scheduler import scheduler
from support.storage import get_subtitle_storage
from support.i18n import _
# fixme: needs kwargs cleanup
@route(PREFIX + '/item/{rating_key}/actions')
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None, header=None,
message=None):
"""
displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
:param rating_key:
:param title:
:param base_title:
:param item_title:
:param randomize:
:return:
"""
from interface.main import InclExclMenu
title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
item = plex_item = get_item(rating_key)
current_kind = get_item_kind_from_rating_key(rating_key)
timeout = 30
oc = SubFolderObjectContainer(
title2=title,
replace_parent=True,
header=header,
message=message)
if not item:
oc.add(DirectoryObject(
key=Callback(
ItemDetailsMenu,
rating_key=rating_key,
title=title,
base_title=base_title,
item_title=item_title,
randomize=timestamp()),
title=_(u"Item not found: %s!", item_title),
summary=_("Plex didn't return any information about the item, please refresh it and come back later"),
thumb=default_thumb
))
return oc
# add back to season for episode
if current_kind == "episode":
from interface.menu import MetadataMenu
show = get_item(item.show.rating_key)
season = get_item(item.season.rating_key)
oc.add(DirectoryObject(
key=Callback(
MetadataMenu,
rating_key=season.rating_key,
title=season.title,
base_title=show.title,
previous_item_type="show",
previous_rating_key=show.rating_key,
display_items=True,
randomize=timestamp()),
title=_(u"< Back to %s", season.title),
summary=_("Back to %s > %s", show.title, season.title),
thumb=season.thumb or default_thumb
))
oc.add(DirectoryObject(
key=Callback(
RefreshItem,
rating_key=rating_key,
item_title=item_title,
randomize=timestamp(),
timeout=timeout * 1000),
title=_(u"Refresh: %s", item_title),
summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up "
"new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind)),
thumb=item.thumb or default_thumb
))
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(),
timeout=timeout * 1000),
title=_(u"Force-find subtitles: %(item_title)s", item_title=item_title),
summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones"),
thumb=item.thumb or default_thumb
))
# get stored subtitle info for item id
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load_or_new(item)
# look for subtitles for all available media parts and all of their languages
has_multiple_parts = len(plex_item.media) > 1
part_index = 0
for media in plex_item.media:
for part in media.parts:
filename = os.path.basename(part.file)
if not os.path.exists(part.file):
continue
part_id = str(part.id)
part_index += 1
part_index_addon = u""
part_summary_addon = u""
if has_multiple_parts:
part_index_addon = _(u"File %(file_part_index)s: ", file_part_index=part_index)
part_summary_addon = u"%s " % filename
# iterate through all configured languages
for lang in config.lang_list:
# get corresponding stored subtitle data for that media part (physical media item), for language
current_sub = stored_subs.get_any(part_id, lang)
current_sub_id = None
current_sub_provider_name = None
summary = _(u"%(part_summary)sNo current subtitle in storage", part_summary=part_summary_addon)
current_score = None
if current_sub:
current_sub_id = current_sub.id
current_sub_provider_name = current_sub.provider_name
current_score = current_sub.score
summary = _(u"%(part_summary)sCurrent subtitle: %(provider_name)s (added: %(date_added)s, "
u"%(mode)s), Language: %(language)s, Score: %(score)i, Storage: %(storage_type)s",
part_summary=part_summary_addon,
provider_name=_(current_sub.provider_name),
date_added=df(current_sub.date_added),
mode=_(current_sub.mode_verbose),
language=display_language(lang),
score=current_sub.score,
storage_type=current_sub.storage_type)
oc.add(DirectoryObject(
key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
item_title=item_title, language=lang, language_name=display_language(lang),
current_id=current_sub_id,
item_type=plex_item.type, filename=filename, current_data=summary,
randomize=timestamp(), current_provider=current_sub_provider_name,
current_score=current_score),
title=_(u"%(part_summary)sManage %(language)s subtitle", part_summary=part_index_addon,
language=display_language(lang)),
summary=summary
))
else:
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
item_title=item_title, language=lang, language_name=display_language(lang),
current_id=current_sub_id,
item_type=plex_item.type, filename=filename, current_data=summary,
randomize=timestamp(), current_provider=current_sub_provider_name,
current_score=current_score),
title=_(u"%(part_summary)sList %(language)s subtitles", part_summary=part_index_addon,
language=display_language(lang)),
summary=summary
))
if config.plex_transcoder:
# embedded subtitles
embedded_count = 0
embedded_langs = []
for stream in part.streams:
# subtitle stream
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
lang = get_language_from_stream(stream.language_code)
is_forced = is_stream_forced(stream)
if not lang and config.treat_und_as_first:
lang = list(config.lang_list)[0]
if lang:
lang = Language.rebuild(lang, forced=is_forced)
embedded_langs.append(lang)
embedded_count += 1
if embedded_count:
oc.add(DirectoryObject(
key=Callback(ListEmbeddedSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
item_type=plex_item.type, item_title=item_title, base_title=base_title,
randomize=timestamp()),
title=_(u"%(part_summary)sEmbedded subtitles (%(languages)s)",
part_summary=part_index_addon,
languages=", ".join(display_language(l)
for l in list(OrderedDict.fromkeys(embedded_langs)))),
summary=_(u"Extract embedded subtitle streams")
))
ignore_title = item_title
if current_kind == "episode":
ignore_title = get_item_title(item)
add_incl_excl_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=InclExclMenu)
subtitle_storage.destroy()
return oc
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}')
def SubtitleOptionsMenu(**kwargs):
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True, header=kwargs.get("header"),
message=kwargs.get("message"))
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
current_data = unicode(kwargs["current_data"])
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
subs_count = stored_subs.count(part_id, language)
kwargs.pop("randomize")
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
title=kwargs["title"], randomize=timestamp()),
title=_(u"< Back to %s", kwargs["title"]),
summary=current_data,
thumb=default_thumb
))
if subs_count:
oc.add(DirectoryObject(
key=Callback(ListStoredSubsForItemMenu, randomize=timestamp(), **kwargs),
title=_(u"Select active %(language)s subtitle", language=kwargs["language_name"]),
summary=_(u"%(count)d subtitles in storage", count=subs_count)
))
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, randomize=timestamp(), **kwargs),
title=_(u"List available %(language)s subtitles", language=kwargs["language_name"]),
summary=current_data
))
if current_sub:
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title=_(u"Modify current %(language)s subtitle", language=kwargs["language_name"]),
summary=_(u"Currently applied mods: %(mod_list)s",
mod_list=(", ".join(current_sub.mods) if current_sub.mods else "none"))
))
if current_sub.provider_name != "embedded":
oc.add(DirectoryObject(
key=Callback(BlacklistSubtitleMenu, randomize=timestamp(), **kwargs),
title=_(u"Blacklist current %(language)s subtitle and search for a new one",
language=kwargs["language_name"]),
summary=current_data
))
current_bl, subs = stored_subs.get_blacklist(part_id, language)
if current_bl:
oc.add(DirectoryObject(
key=Callback(ManageBlacklistMenu, randomize=timestamp(), **kwargs),
title=_(u"Manage blacklist (%(amount)s contained)", amount=len(current_bl)),
summary=_(u"Inspect currently blacklisted subtitles")
))
storage.destroy()
return oc
@route(PREFIX + '/item/list_stored_subs/{rating_key}/{part_id}')
def ListStoredSubsForItemMenu(**kwargs):
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = Language.fromietf(kwargs["language"])
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
all_subs = stored_subs.get_all(part_id, language)
kwargs.pop("randomize")
for key, subtitle in sorted(filter(lambda x: x[0] not in ("current", "blacklist"), all_subs.items()),
key=lambda x: x[1].date_added, reverse=True):
is_current = key == all_subs["current"]
summary = _(u"added: %(date_added)s, %(mode)s, Language: %(language)s, Score: %(score)i, Storage: "
u"%(storage_type)s",
date_added=df(subtitle.date_added),
mode=_(subtitle.mode_verbose),
language=display_language(language),
score=subtitle.score,
storage_type=subtitle.storage_type)
sub_name = subtitle.provider_name
if sub_name == "embedded":
sub_name += " (%s)" % subtitle.id
oc.add(DirectoryObject(
key=Callback(SelectStoredSubForItemMenu, randomize=timestamp(), sub_key="__".join(key), **kwargs),
title=_(u"%(current_state)s%(subtitle_name)s, Score: %(score)s",
current_state=_("Current: ") if is_current else _("Stored: "),
subtitle_name=sub_name,
score=subtitle.score),
summary=summary
))
return oc
@route(PREFIX + '/item/set_current_sub/{rating_key}/{part_id}')
@debounce
def SelectStoredSubForItemMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = Language.fromietf(kwargs["language"])
item_type = kwargs["item_type"]
sub_key = tuple(kwargs.pop("sub_key").split("__"))
plex_item = get_item(rating_key)
storage = get_subtitle_storage()
stored_subs = storage.load(plex_item.rating_key)
subtitles = stored_subs.get_all(part_id, language)
subtitle = subtitles[sub_key]
save_stored_sub(subtitle, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
stored_subs=stored_subs)
stored_subs.set_current(part_id, language, sub_key)
storage.save(stored_subs)
storage.destroy()
kwa = {
"header": _("Success"),
"message": _("Subtitle saved to disk"),
"title": kwargs["title"],
"item_title": kwargs["item_title"],
"base_title": kwargs.get("base_title")
}
# fixme: return to SubtitleOptionsMenu properly? (needs recomputation of current_data
return ItemDetailsMenu(rating_key, randomize=timestamp(), **kwa)
@route(PREFIX + '/item/blacklist_recent/{language}')
@route(PREFIX + '/item/blacklist_recent')
def BlacklistRecentSubtitleMenu(**kwargs):
if "last_played_items" not in Dict or not Dict["last_played_items"]:
return
rating_key = Dict["last_played_items"][0]
kwargs["rating_key"] = rating_key
return BlacklistAllPartsSubtitleMenu(**kwargs)
@route(PREFIX + '/item/blacklist_all/{rating_key}/{language}')
@route(PREFIX + '/item/blacklist_all/{rating_key}')
def BlacklistAllPartsSubtitleMenu(**kwargs):
rating_key = kwargs.get("rating_key")
language = kwargs.get("language")
if language:
language = Language.fromietf(language)
item = get_item(rating_key)
if not item:
return
item_title = get_item_title(item)
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load_or_new(item)
for part_id, languages in stored_subs.parts.iteritems():
sub_dict = languages
if language:
key = str(language)
if key not in sub_dict:
continue
sub_dict = {key: sub_dict[key]}
for language, subs in sub_dict.iteritems():
if "current" in subs:
stored_subs.blacklist(part_id, language, subs["current"])
Log.Info("Added %s to blacklist", subs["current"])
subtitle_storage.save(stored_subs)
subtitle_storage.destroy()
return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
def blacklist(rating_key, part_id, language):
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
if not current_sub:
return
stored_subs.blacklist(part_id, language, current_sub.key)
storage.save(stored_subs)
storage.destroy()
Log.Info("Added %s to blacklist", current_sub.key)
return True
@route(PREFIX + '/item/blacklist/{rating_key}/{part_id}')
@debounce
def BlacklistSubtitleMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
item_title = kwargs["item_title"]
blacklist(rating_key, part_id, language)
kwargs.pop("randomize")
return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
@route(PREFIX + '/item/manage_blacklist/{rating_key}/{part_id}', force=bool)
@debounce
def ManageBlacklistMenu(**kwargs):
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
remove_sub_key = kwargs.pop("remove_sub_key", None)
current_data = unicode(kwargs["current_data"])
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
current_bl, subs = stored_subs.get_blacklist(part_id, language)
if remove_sub_key:
remove_sub_key = tuple(remove_sub_key.split("__"))
stored_subs.blacklist(part_id, language, remove_sub_key, add=False)
storage.save(stored_subs)
Log.Info("Removed %s from blacklist", remove_sub_key)
kwargs.pop("randomize")
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
title=kwargs["title"], randomize=timestamp()),
title=_(u"< Back to %s", kwargs["title"]),
summary=current_data,
thumb=default_thumb
))
def sorter(pair):
# thanks RestrictedModule parser for messing with lambda (x, y)
return pair[1]["date_added"]
for sub_key, data in sorted(current_bl.iteritems(), key=sorter, reverse=True):
provider_name, subtitle_id = sub_key
title = _(u"%(provider_name)s, %(subtitle_id)s (added: %(date_added)s, %(mode)s), Language: %(language)s, "
u"Score: %(score)i, Storage: %(storage_type)s",
provider_name=_(provider_name),
subtitle_id=subtitle_id,
date_added=df(data["date_added"]),
mode=_(current_sub.get_mode_verbose(data["mode"])),
language=display_language(Language.fromietf(language)),
score=data["score"],
storage_type=data["storage_type"])
oc.add(DirectoryObject(
key=Callback(ManageBlacklistMenu, remove_sub_key="__".join(sub_key), randomize=timestamp(), **kwargs),
title=title,
summary=_(u"Remove subtitle from blacklist")
))
storage.destroy()
return oc
@route(PREFIX + '/item/search/{rating_key}/{part_id}', force=bool)
def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item_title=None, filename=None,
item_type="episode", language=None, language_name=None, force=False, current_id=None,
current_data=None,
current_provider=None, current_score=None, randomize=None):
assert rating_key, part_id
running = scheduler.is_task_running("AvailableSubsForItem")
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
current_data = unicode(current_data) if current_data else None
if (search_results is None or force) and not running:
scheduler.dispatch_task("AvailableSubsForItem", rating_key=rating_key, item_type=item_type, part_id=part_id,
language=language)
running = True
oc = SubFolderObjectContainer(title2=unicode(title), replace_parent=True)
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, rating_key=rating_key, item_title=item_title, title=title, randomize=timestamp()),
title=_(u"< Back to %s", title),
summary=current_data,
thumb=default_thumb
))
metadata = get_plex_metadata(rating_key, part_id, item_type)
plex_part = None
if not config.low_impact_mode:
scanned_parts = scan_videos([metadata], ignore_all=True)
if not scanned_parts:
Log.Error("Couldn't list available subtitles for %s", rating_key)
return oc
video, plex_part = scanned_parts.items()[0]
video_display_data = [video.format] if video.format else []
if video.release_group:
video_display_data.append(unicode(_(u"by %(release_group)s", release_group=video.release_group)))
video_display_data = " ".join(video_display_data)
else:
video_display_data = metadata["filename"]
current_display = (_(u"Current: %(provider_name)s (%(score)s) ",
provider_name=_(current_provider),
score=current_score if current_provider else ""))
if not running:
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title, language=language,
filename=filename, part_id=part_id, title=title, current_id=current_id, force=True,
current_provider=current_provider, current_score=current_score,
current_data=current_data, item_type=item_type, randomize=timestamp()),
title=_(u"Search for %(language)s subs (%(video_data)s)",
language=get_language(language).name,
video_data=video_display_data),
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
thumb=default_thumb
))
if search_results == "found_none":
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
language=language, filename=filename, current_data=current_data, force=True,
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
current_provider=current_provider, current_score=current_score,
randomize=timestamp()),
title=_(u"No subtitles found"),
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
thumb=default_thumb
))
else:
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
language=language, filename=filename, current_data=current_data,
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
current_provider=current_provider, current_score=current_score,
randomize=timestamp()),
title=_(u"Searching for %(language)s subs (%(video_data)s), refresh here ...",
language=display_language(get_language(language)),
video_data=video_display_data),
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
thumb=default_thumb
))
if not search_results or search_results == "found_none":
return oc
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
current_bl, subs = stored_subs.get_blacklist(part_id, language)
seen = []
for subtitle in search_results:
if subtitle.id in seen:
continue
bl_addon = ""
if (str(subtitle.provider_name), str(subtitle.id)) in current_bl:
bl_addon = "Blacklisted "
wrong_fps_addon = ""
wrong_series_addon = ""
wrong_season_ep_addon = ""
if subtitle.wrong_fps:
if plex_part:
wrong_fps_addon = _(" (wrong FPS, sub: %(subtitle_fps)s, media: %(media_fps)s)",
subtitle_fps=subtitle.fps,
media_fps=plex_part.fps)
else:
wrong_fps_addon = _(" (wrong FPS, sub: %(subtitle_fps)s, media: unknown, low impact mode)",
subtitle_fps=subtitle.fps)
if subtitle.wrong_series:
wrong_series_addon = _(" (possibly wrong series)")
if subtitle.wrong_season_ep:
wrong_season_ep_addon = _(" (possibly wrong season/episode)")
oc.add(DirectoryObject(
key=Callback(TriggerDownloadSubtitle, rating_key=rating_key, randomize=timestamp(), item_title=item_title,
subtitle_id=str(subtitle.id), language=language),
title=_(u"%(blacklisted_state)s%(current_state)s: %(provider_name)s, score: %(score)s%(wrong_fps_state)s"
u"%(wrong_series_state)s%(wrong_season_ep_state)s",
blacklisted_state=bl_addon,
current_state=_("Available") if current_id != subtitle.id else _("Current"),
provider_name=_(subtitle.provider_name),
score=subtitle.score,
wrong_fps_state=wrong_fps_addon,
wrong_series_state=wrong_series_addon,
wrong_season_ep_state=wrong_season_ep_addon),
summary=_(u"Release: %(release_info)s, Matches: %(matches)s",
release_info=subtitle.release_info,
matches=", ".join(subtitle.matches)),
thumb=default_thumb
))
seen.append(subtitle.id)
return oc
@route(PREFIX + '/download_subtitle/{rating_key}')
@debounce
def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None, language=None, randomize=None):
from interface.main import fatality
set_refresh_menu_state(_("Downloading subtitle for %(title_or_id)s", title_or_id=item_title or rating_key))
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
download_subtitle = None
for subtitle in search_results:
if str(subtitle.id) == subtitle_id:
download_subtitle = subtitle
break
if not download_subtitle:
Log.Error(u"Something went horribly wrong")
else:
scheduler.dispatch_task("DownloadSubtitleForItem", rating_key=rating_key, subtitle=download_subtitle)
scheduler.clear_task_data("AvailableSubsForItem")
return fatality(randomize=timestamp(), header=" ", replace_parent=True)
@route(PREFIX + '/item/embedded/{rating_key}/{part_id}')
def ListEmbeddedSubsForItemMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
title = kwargs["title"]
kwargs.pop("randomize")
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
base_title=kwargs["base_title"], title=kwargs["item_title"], randomize=timestamp()),
title=_("< Back to %s", kwargs["title"]),
thumb=default_thumb
))
plex_item = get_item(rating_key)
part = get_part(plex_item, part_id)
if part:
for stream_data in get_embedded_subtitle_streams(part, skip_duplicate_unknown=False):
language = stream_data["language"]
is_unknown = stream_data["is_unknown"]
stream = stream_data["stream"]
is_forced = stream_data["is_forced"]
if language:
oc.add(DirectoryObject(
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
stream_index=str(stream.index), language=language, with_mods=True, **kwargs),
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
u"%(stream_title)s with default mods",
stream_index=stream.index,
language=display_language(language),
unknown_state=_(" (unknown)") if is_unknown else "",
forced_state=_(" (forced)") if is_forced else "",
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
))
oc.add(DirectoryObject(
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
stream_index=str(stream.index), language=language, **kwargs),
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
u"%(stream_title)s",
stream_index=stream.index,
language=display_language(language),
unknown_state=_(" (unknown)") if is_unknown else "",
forced_state=_(" (forced)") if is_forced else "",
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
))
return oc
@route(PREFIX + '/item/extract_embedded/{rating_key}/{part_id}/{stream_index}')
@debounce
def TriggerExtractEmbeddedSubForItemMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs.get("part_id")
stream_index = kwargs.get("stream_index")
Thread.Create(extract_embedded_sub, extract_mode="m", **kwargs)
header = _(u"Extracting of embedded subtitle %s of part %s:%s triggered",
stream_index, rating_key, part_id)
kwargs.pop("randomize")
kwargs.pop("item_type")
kwargs.pop("stream_index")
kwargs.pop("part_id")
kwargs.pop("with_mods", False)
kwargs.pop("language")
kwargs["title"] = kwargs["item_title"]
kwargs["header"] = header
kwargs["message"] = header
return ItemDetailsMenu(randomize=timestamp(), **kwargs)
-472
View File
@@ -1,472 +0,0 @@
# coding=utf-8
from subzero.constants import PREFIX, TITLE, ART
from support.config import config
from support.helpers import pad_title, timestamp, df, display_language
from support.scheduler import scheduler
from support.ignore import get_decision_list
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, get_item_title
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_incl_excl_options, \
ObjectContainer, route, handler
from support.i18n import _
from item_details import ItemDetailsMenu
@handler(PREFIX, TITLE if not config.is_development else TITLE + " DEV", art=ART, thumb=main_icon)
@route(PREFIX)
def fatality(randomize=None, force_title=None, header=None, message=None, only_refresh=False, no_history=False,
replace_parent=False):
"""
subzero main menu
"""
from interface.advanced import PinMenu, ClearPin, AdvancedMenu
from interface.menu import RefreshMissing, IgnoreListMenu, HistoryMenu
title = config.full_version # force_title if force_title is not None else config.full_version
oc = ObjectContainer(title1=title, title2=title, header=unicode(header) if header else title, message=message,
no_history=no_history,
replace_parent=replace_parent, no_cache=True)
# always re-check permissions
config.refresh_permissions_status()
# always re-check enabled sections
config.refresh_enabled_sections()
if config.lock_menu and not config.pin_correct:
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp()),
title=pad_title(_("Enter PIN")),
summary=_("The owner has restricted the access to this menu. Please enter the correct pin"),
))
return oc
if not config.permissions_ok and config.missing_permissions:
if not isinstance(config.missing_permissions, list):
oc.add(DirectoryObject(
key=Callback(fatality, randomize=timestamp()),
title=pad_title(_("Insufficient permissions")),
summary=config.missing_permissions,
))
else:
for title, path in config.missing_permissions:
oc.add(DirectoryObject(
key=Callback(fatality, randomize=timestamp()),
title=pad_title(_("Insufficient permissions")),
summary=_("Insufficient permissions on library %(title)s, folder: %(path)s",
title=title,
path=path),
))
return oc
if not config.enabled_sections:
oc.add(DirectoryObject(
key=Callback(fatality, randomize=timestamp()),
title=pad_title(_("I'm not enabled!")),
summary=_("Please enable me for some of your libraries in your server settings; currently I do nothing"),
))
return oc
if not only_refresh:
if Dict["current_refresh_state"]:
oc.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title(_("Working ... refresh here")),
summary=_("Current state: %s; Last state: %s",
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
)
))
oc.add(DirectoryObject(
key=Callback(OnDeckMenu),
title=_("On-deck items"),
summary=_("Shows the current on deck items and allows you to individually (force-) refresh their metadata/subtitles."),
thumb=R("icon-ondeck.jpg")
))
if "last_played_items" in Dict and Dict["last_played_items"]:
oc.add(DirectoryObject(
key=Callback(RecentlyPlayedMenu),
title=pad_title(_("Recently played items")),
summary=_("Shows the %s recently played items and allows you to individually (force-) refresh their metadata/subtitles.", config.store_recently_played_amount),
thumb=R("icon-played.jpg")
))
oc.add(DirectoryObject(
key=Callback(RecentlyAddedMenu),
title=_("Recently-added items"),
summary=_("Shows the recently added items per section."),
thumb=R("icon-added.jpg")
))
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, randomize=timestamp()),
title=_("Show recently added items with missing subtitles"),
summary=_("Lists items with missing subtitles. Click on \"Find recent items with missing subs\" to update list"),
thumb=R("icon-missing.jpg")
))
oc.add(DirectoryObject(
key=Callback(SectionsMenu),
title=_("Browse all items"),
summary=_("Go through your whole library and manage your ignore list. You can also (force-) refresh the metadata/subtitles of individual items."),
thumb=R("icon-browse.jpg")
))
task_name = "SearchAllRecentlyAddedMissing"
task = scheduler.task(task_name)
if task.ready_for_display:
task_state = _("Running: %(items_done)s/%(items_searching)s (%(percentage)s%%)",
items_done=task.items_done,
items_searching=task.items_searching,
percentage=task.percentage)
else:
lr = scheduler.last_run(task_name)
nr = scheduler.next_run(task_name)
task_state = _("Last run: %s; Next scheduled run: %s; Last runtime: %s",
df(scheduler.last_run(task_name)) if lr else "never",
df(scheduler.next_run(task_name)) if nr else "never",
str(task.last_run_time).split(".")[0])
oc.add(DirectoryObject(
key=Callback(RefreshMissing, randomize=timestamp()),
title=_("Search for missing subtitles (in recently-added items, max-age: %s)", Prefs[
"scheduler.item_is_recent_age"]),
summary=_("Automatically run periodically by the scheduler, if configured. %s", task_state),
thumb=R("icon-search.jpg")
))
ref_list = get_decision_list()
incl_excl_ref = _("include list") if ref_list.store == "include" else _("ignore list")
oc.add(DirectoryObject(
key=Callback(IgnoreListMenu),
title=_("Display %(incl_excl_list_name)s (%(count)d)",
incl_excl_list_name=incl_excl_ref, count=len(ref_list)),
summary=_("Show the current %(incl_excl_list_name)s (mainly used for the automatic tasks)",
incl_excl_list_name=incl_excl_ref),
thumb=R("icon-ignore.jpg")
))
oc.add(DirectoryObject(
key=Callback(HistoryMenu),
title=_("History"),
summary=_("Show the last %i downloaded subtitles", int(Prefs["history_size"])),
thumb=R("icon-history.jpg")
))
oc.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title(_("Refresh")),
summary=_("Current state: %s; Last state: %s",
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
),
thumb=R("icon-refresh.jpg")
))
# add re-lock after pin unlock
if config.pin:
oc.add(DirectoryObject(
key=Callback(ClearPin, randomize=timestamp()),
title=pad_title(_("Re-lock menu(s)")),
summary=_("Enabled the PIN again for menu(s)")
))
if not only_refresh:
if "provider_throttle" in Dict and Dict["provider_throttle"].keys():
summary_data = []
for provider, data in Dict["provider_throttle"].iteritems():
reason, until, desc = data
summary_data.append(unicode(_("%(throttled_provider)s until %(until_date)s (%(reason)s)",
throttled_provider=provider,
until_date=until.strftime("%y/%m/%d %H:%M"),
reason=reason)))
oc.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title(_("Throttled providers: %s", ", ".join(Dict["provider_throttle"].keys()))),
summary=", ".join(summary_data),
thumb=R("icon-throttled.jpg")
))
oc.add(DirectoryObject(
key=Callback(AdvancedMenu),
title=pad_title(_("Advanced functions")),
summary=_("Use at your own risk"),
thumb=R("icon-advanced.jpg")
))
return oc
@route(PREFIX + '/on_deck')
def OnDeckMenu(message=None):
"""
displays the items on deck
:param message:
:return:
"""
return mergedItemsMenu(title=_("Items On Deck"), base_title=_("Items On Deck"), itemGetter=get_on_deck_items)
@route(PREFIX + '/recently_played')
def RecentlyPlayedMenu():
base_title = _("Recently Played")
oc = SubFolderObjectContainer(title2=base_title, replace_parent=True)
for item in [get_item(rating_key) for rating_key in Dict["last_played_items"]]:
if not item:
continue
if getattr(getattr(item, "__class__"), "__name__") not in ("Episode", "Movie"):
continue
item_title = get_item_title(item)
oc.add(DirectoryObject(
thumb=get_item_thumb(item) or default_thumb,
title=item_title,
key=Callback(ItemDetailsMenu, title=base_title + " > " + item.title, item_title=item.title,
rating_key=item.rating_key)
))
return oc
@route(PREFIX + '/recently_added')
def RecentlyAddedMenu(message=None):
"""
displays the items recently added per section
:param message:
:return:
"""
return SectionsMenu(base_title=_("Recently added"), section_items_key="recently_added", ignore_options=False)
@route(PREFIX + '/recent', force=bool)
@debounce
def RecentMissingSubtitlesMenu(force=False, randomize=None):
title = _("Items with missing subtitles")
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
running = scheduler.is_task_running("MissingSubtitles")
task_data = scheduler.get_task_data("MissingSubtitles")
missing_items = task_data["missing_subtitles"] if task_data else None
if ((missing_items is None) or force) and not running:
scheduler.dispatch_task("MissingSubtitles")
running = True
if not running:
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, force=True, randomize=timestamp()),
title=_(u"Find recent items with missing subtitles"),
thumb=default_thumb
))
else:
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, force=False, randomize=timestamp()),
title=_(u"Updating, refresh here ..."),
thumb=default_thumb
))
if missing_items is not None:
for added_at, item_id, item_title, item, missing_languages in missing_items:
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, title=title + " > " + item_title, item_title=item_title,
rating_key=item_id),
title=item_title,
summary=_("Missing: %s", ", ".join(display_language(l) for l in missing_languages)),
thumb=get_item_thumb(item) or default_thumb
))
return oc
def mergedItemsMenu(title, itemGetter, itemGetterKwArgs=None, base_title=None, *args, **kwargs):
"""
displays an item list of dynamic kinds of items
:param title:
:param itemGetter:
:param itemGetterKwArgs:
:param base_title:
:param args:
:param kwargs:
:return:
"""
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
items = itemGetter(*args, **kwargs)
for kind, title, item_id, deeper, item in items:
oc.add(DirectoryObject(
title=title,
key=Callback(ItemDetailsMenu, title=base_title + " > " + title, item_title=title, rating_key=item_id),
thumb=get_item_thumb(item) or default_thumb
))
return oc
def determine_section_display(kind, item, pass_kwargs=None):
"""
returns the menu function for a section based on the size of it (amount of items)
:param kind:
:param item:
:return:
"""
if pass_kwargs and pass_kwargs.get("section_items_key", "all") != "all":
return SectionMenu
if item.size > 80:
return SectionFirstLetterMenu
return SectionMenu
@route(PREFIX + '/incl_excl/set/{kind}/{rating_key}/{todo}/sure={sure}', kind=str, rating_key=str, todo=str, sure=bool)
def InclExclMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
"""
displays the ignore options for a menu
:param kind:
:param rating_key:
:param title:
:param sure:
:param todo:
:return:
"""
ref_list = get_decision_list()
include = ref_list.store == "include"
list_str_ref = "include" if include else "ignore"
in_list = rating_key in ref_list[kind]
if include:
# shortcut
sure = True
todo = "add" if not in_list else "remove"
if not sure:
t = u"Add %(kind)s %(title)s to the ignore list"
if in_list:
t = u"Remove %(kind)s %(title)s from the ignore list"
oc = SubFolderObjectContainer(no_history=True, replace_parent=True,
title1=_(t,
kind=ref_list.verbose(kind),
title=title
),
title2=_("Are you sure?"))
oc.add(DirectoryObject(
key=Callback(InclExclMenu, kind=kind, rating_key=rating_key, title=title, sure=True,
todo="add" if not in_list else "remove"),
title=pad_title(_("Are you sure?")),
))
return oc
rel = ref_list[kind]
dont_change = False
state = None
if todo == "remove":
if not in_list:
dont_change = True
else:
rel.remove(rating_key)
Log.Info("Removed %s (%s) from the %s list", title, rating_key, list_str_ref)
ref_list.remove_title(kind, rating_key)
ref_list.save()
elif todo == "add":
if in_list:
dont_change = True
else:
rel.append(rating_key)
Log.Info("Added %s (%s) to the %s list", title, rating_key, list_str_ref)
ref_list.add_title(kind, rating_key, title)
ref_list.save()
else:
dont_change = True
if dont_change:
return fatality(force_title=" ", header=_("Didn't change the %(incl_excl_list_name)s",
incl_excl_list_name=_(list_str_ref)), no_history=True)
if include:
t = "%(title)s added to the include list"
if todo == "remove":
t = "%(title)s removed from the include list"
else:
t = "%(title)s added to the ignore list"
if todo == "remove":
t = "%(title)s removed from the ignore list"
return fatality(force_title=" ", header=_(t, title=title,), no_history=True)
@route(PREFIX + '/sections')
def SectionsMenu(base_title=_("Sections"), section_items_key="all", ignore_options=True):
"""
displays the menu for all sections
:return:
"""
items = get_all_items("sections")
return dig_tree(SubFolderObjectContainer(title2=_("Sections"), no_cache=True, no_history=True), items, None,
menu_determination_callback=determine_section_display, pass_kwargs={"base_title": base_title,
"section_items_key": section_items_key,
"ignore_options": ignore_options},
fill_args={"title": "section_title"})
@route(PREFIX + '/section', ignore_options=bool)
def SectionMenu(rating_key, title=None, base_title=None, section_title=None, ignore_options=True,
section_items_key="all"):
"""
displays the contents of a section
:param section_items_key:
:param rating_key:
:param title:
:param base_title:
:param section_title:
:param ignore_options:
:return:
"""
from menu import MetadataMenu
items = get_all_items(key=section_items_key, value=rating_key, base="library/sections")
kind, deeper = get_items_info(items)
title = unicode(title)
section_title = title
title = base_title + " > " + title
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
if ignore_options:
add_incl_excl_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=InclExclMenu)
return dig_tree(oc, items, MetadataMenu,
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": "section",
"previous_rating_key": rating_key})
@route(PREFIX + '/section/firstLetter', deeper=bool)
def SectionFirstLetterMenu(rating_key, title=None, base_title=None, section_title=None, ignore_options=True,
section_items_key="all"):
"""
displays the contents of a section indexed by its first char (A-Z, 0-9...)
:param ignore_options: ignored
:param section_items_key: ignored
:param rating_key:
:param title:
:param base_title:
:param section_title:
:return:
"""
from menu import FirstLetterMetadataMenu
items = get_all_items(key="first_character", value=rating_key, base="library/sections")
kind, deeper = get_items_info(items)
title = unicode(title)
oc = SubFolderObjectContainer(title2=section_title, no_cache=True, no_history=True)
title = base_title + " > " + title
add_incl_excl_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=InclExclMenu)
oc.add(DirectoryObject(
key=Callback(SectionMenu, title=_("All"), base_title=title, rating_key=rating_key, ignore_options=False),
title="All"
)
)
return dig_tree(oc, items, FirstLetterMetadataMenu, force_rating_key=rating_key, fill_args={"key": "key"},
pass_kwargs={"base_title": title, "display_items": deeper, "previous_rating_key": rating_key})
-427
View File
@@ -1,427 +0,0 @@
# coding=utf-8
import locale
import logging
import os
import platform
import traceback
import logger
import copy
from requests import HTTPError
from item_details import ItemDetailsMenu
from refresh_item import RefreshItem
from menu_helpers import add_incl_excl_options, dig_tree, set_refresh_menu_state, \
default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route, \
extract_embedded_sub
from main import fatality, InclExclMenu
from advanced import DispatchRestart
from subzero.constants import ART, PREFIX, DEPENDENCY_MODULE_NAMES
from support.plex_media import get_all_parts, get_embedded_subtitle_streams
from support.scheduler import scheduler
from support.config import config
from support.helpers import timestamp, df, display_language
from support.ignore import get_decision_list
from support.items import get_all_items, get_items_info, get_item_kind_from_rating_key, get_item, MI_KEY, \
get_item_title, get_item_thumb
from support.storage import get_subtitle_storage
from support.i18n import _
# init GUI
ObjectContainer.art = R(ART)
ObjectContainer.no_cache = True
# default thumb for DirectoryObjects
DirectoryObject.thumb = default_thumb
Plugin.AddViewGroup("full_details", viewMode="InfoList", mediaType="items", type="list", summary=2)
@route(PREFIX + '/section/firstLetter/key', deeper=bool)
def FirstLetterMetadataMenu(rating_key, key, title=None, base_title=None, display_items=False, previous_item_type=None,
previous_rating_key=None):
"""
displays the contents of a section filtered by the first letter
:param rating_key: actually is the section's key
:param key: the firstLetter wanted
:param title: the first letter, or #
:param deeper:
:return:
"""
title = base_title + " > " + unicode(title)
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
items = get_all_items(key="first_character", value=[rating_key, key], base="library/sections", flat=False)
kind, deeper = get_items_info(items)
dig_tree(oc, items, MetadataMenu,
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": kind,
"previous_rating_key": rating_key})
return oc
@route(PREFIX + '/section/contents', display_items=bool)
def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, previous_item_type=None,
previous_rating_key=None, message=None, header=None, randomize=None):
"""
displays the contents of a section based on whether it has a deeper tree or not (movies->movie (item) list; series->series list)
:param rating_key:
:param title:
:param base_title:
:param display_items:
:param previous_item_type:
:param previous_rating_key:
:return:
"""
title = unicode(title)
item_title = title
title = base_title + " > " + title
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True, header=header, message=message,
view_group="full_details")
current_kind = get_item_kind_from_rating_key(rating_key)
if display_items:
timeout = 30
show = None
# add back to series for season
if current_kind == "season":
timeout = 720
show = get_item(previous_rating_key)
oc.add(DirectoryObject(
key=Callback(MetadataMenu, rating_key=show.rating_key, title=show.title, base_title=show.section.title,
previous_item_type="section", display_items=True, randomize=timestamp()),
title=_(u"< Back to %s", show.title),
thumb=show.thumb or default_thumb
))
elif current_kind == "series":
# it shouldn't take more than 6 minutes to scan all of a series' files and determine the force refresh
timeout = 3600
items = get_all_items(key="children", value=rating_key, base="library/metadata")
kind, deeper = get_items_info(items)
dig_tree(oc, items, MetadataMenu,
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": kind,
"previous_rating_key": rating_key})
# we don't know exactly where we are here, only add ignore option to series
if current_kind in ("series", "season"):
item = get_item(rating_key)
sub_title = get_item_title(item)
add_incl_excl_options(oc, current_kind, title=sub_title, rating_key=rating_key, callback_menu=InclExclMenu)
# mass-extract embedded
if current_kind == "season" and config.plex_transcoder:
for lang in config.lang_list:
oc.add(DirectoryObject(
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
base_title=show.section.title, display_items=display_items, item_title=item_title,
title=title,
previous_item_type=previous_item_type, with_mods=True,
previous_rating_key=previous_rating_key, randomize=timestamp()),
title=_(u"Extract missing %(language)s embedded subtitles", language=display_language(lang)),
summary=_("Extracts the not yet extracted embedded subtitles of all episodes for the current "
"season with all configured default modifications")
))
oc.add(DirectoryObject(
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
base_title=show.section.title, display_items=display_items, item_title=item_title,
title=title, force=True,
previous_item_type=previous_item_type, with_mods=True,
previous_rating_key=previous_rating_key, randomize=timestamp()),
title=_(u"Extract and activate %(language)s embedded subtitles", language=display_language(lang)),
summary=_("Extracts embedded subtitles of all episodes for the current season "
"with all configured default modifications")
))
# add refresh
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=title, refresh_kind=current_kind,
previous_rating_key=previous_rating_key, timeout=timeout * 1000, randomize=timestamp()),
title=_(u"Refresh: %s", item_title),
summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up "
"new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind))
))
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=title, force=True,
refresh_kind=current_kind, previous_rating_key=previous_rating_key, timeout=timeout * 1000,
randomize=timestamp()),
title=_(u"Auto-Find subtitles: %s", item_title),
summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones")
))
else:
return ItemDetailsMenu(rating_key=rating_key, title=title, item_title=item_title)
return oc
@route(PREFIX + '/season/extract_embedded/{rating_key}/{language}')
def SeasonExtractEmbedded(**kwargs):
rating_key = kwargs.get("rating_key")
requested_language = kwargs.pop("language")
with_mods = kwargs.pop("with_mods")
item_title = kwargs.pop("item_title")
title = kwargs.pop("title")
force = kwargs.pop("force", False)
Thread.Create(season_extract_embedded, **{"rating_key": rating_key, "requested_language": requested_language,
"with_mods": with_mods, "force": force})
kwargs["header"] = _("Success")
kwargs["message"] = _(u"Extracting of embedded subtitles for %s triggered", title)
kwargs.pop("randomize")
return MetadataMenu(randomize=timestamp(), title=item_title, **kwargs)
def multi_extract_embedded(stream_list, refresh=False, with_mods=False, single_thread=True, extract_mode="a",
history_storage=None):
def execute():
for video_part_map, plexapi_part, stream_index, language, set_current in stream_list:
plexapi_item = video_part_map.keys()[0].plexapi_metadata["item"]
extract_embedded_sub(rating_key=plexapi_item.rating_key, part_id=plexapi_part.id,
plex_item=plexapi_item, part=plexapi_part, scanned_videos=video_part_map,
stream_index=stream_index, set_current=set_current,
language=language, with_mods=with_mods, refresh=refresh, extract_mode=extract_mode,
history_storage=history_storage)
if single_thread:
with Thread.Lock(key="extract_embedded"):
execute()
else:
execute()
def season_extract_embedded(rating_key, requested_language, with_mods=False, force=False):
# get stored subtitle info for item id
subtitle_storage = get_subtitle_storage()
try:
for data in get_all_items(key="children", value=rating_key, base="library/metadata"):
item = get_item(data[MI_KEY])
if item:
stored_subs = subtitle_storage.load_or_new(item)
for part in get_all_parts(item):
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
current = stored_subs.get_any(part.id, requested_language)
if not embedded_subs or force:
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language)
if stream_data:
stream = stream_data[0]["stream"]
set_current = not current or force
refresh = not current
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
stream_index=str(stream.index), set_current=set_current,
refresh=refresh, language=requested_language, with_mods=with_mods,
extract_mode="m")
finally:
subtitle_storage.destroy()
@route(PREFIX + '/ignore_list')
def IgnoreListMenu():
ref_list = get_decision_list()
include = ref_list.store == "include"
list_title = _("Include list" if include else "Ignore list")
oc = SubFolderObjectContainer(title2=list_title, replace_parent=True)
for key in ref_list.key_order:
values = ref_list[key]
for value in values:
add_incl_excl_options(oc, key, title=ref_list.get_title(key, value), rating_key=value,
callback_menu=InclExclMenu)
return oc
@route(PREFIX + '/history')
def HistoryMenu():
from support.history import get_history
history = get_history()
oc = SubFolderObjectContainer(title2=_("History"), replace_parent=True)
for item in history.items[:100]:
possible_language = item.language
language_display = item.lang_name if not possible_language else display_language(possible_language)
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, title=item.title, item_title=item.item_title,
rating_key=item.rating_key),
title=u"%s (%s)" % (item.item_title, _(item.mode_verbose)),
summary=_(u"%s in %s (%s, score: %s), %s", language_display, item.section_title,
_(item.provider_name), item.score, df(item.time)),
thumb=item.thumb or default_thumb
))
history.destroy()
return oc
@route(PREFIX + '/missing/refresh')
@debounce
def RefreshMissing(randomize=None):
scheduler.dispatch_task("SearchAllRecentlyAddedMissing")
header = "Refresh of recently added items with missing subtitles triggered"
return fatality(header=header, replace_parent=True)
def replace_item(obj, key, replace_value):
for k, v in obj.items():
if isinstance(v, dict):
obj[k] = replace_item(v, key, replace_value)
if key in obj:
obj[key] = replace_value
return obj
def check_connections():
# debug drone
Log.Debug("Checking connections ...")
log_buffer = []
try:
from subliminal_patch.refiners.drone import SonarrClient, RadarrClient
log_buffer.append(["----- Connections -----"])
for key, cls in [("sonarr", SonarrClient), ("radarr", RadarrClient)]:
if key in config.refiner_settings:
cname = key.capitalize()
try:
status = cls(**config.refiner_settings[key]).status(timeout=5)
except HTTPError, e:
if e.response.status_code == 401:
log_buffer.append(("%s: NOT WORKING - BAD API KEY", cname))
else:
log_buffer.append(("%s: NOT WORKING - %s", cname, traceback.format_exc()))
except:
log_buffer.append(("%s: NOT WORKING - %s", cname, traceback.format_exc()))
else:
if status and status["version"]:
log_buffer.append(("%s: OK - %s", cname, status["version"]))
else:
log_buffer.append(("%s: NOT WORKING - %s", cname))
except:
log_buffer.append(("Something went really wrong when evaluating Sonarr/Radarr: %s", traceback.format_exc()))
finally:
Core.log.setLevel(logging.DEBUG)
for entry in log_buffer:
Log.Debug(*entry)
Core.log.setLevel(logging.getLevelName(Prefs["log_level"]))
@route(PREFIX + '/ValidatePrefs', enforce_route=True)
def ValidatePrefs():
Core.log.setLevel(logging.DEBUG)
if Prefs["log_console"]:
Core.log.addHandler(logger.console_handler)
Log.Debug("Logging to console from now on")
else:
Core.log.removeHandler(logger.console_handler)
Log.Debug("Stop logging to console")
# cache the channel state
update_dict = False
restart = False
# reset pin
Dict["pin_correct_time"] = None
config.initialize()
if "channel_enabled" not in Dict:
update_dict = True
elif Dict["channel_enabled"] != config.enable_channel:
Log.Debug("Interface features %s, restarting plugin", "enabled" if config.enable_channel else "disabled")
update_dict = True
restart = True
if "plugin_pin_mode2" not in Dict:
update_dict = True
elif Dict["plugin_pin_mode2"] != Prefs["plugin_pin_mode2"]:
update_dict = True
restart = True
if update_dict:
Dict["channel_enabled"] = config.enable_channel
Dict["plugin_pin_mode2"] = Prefs["plugin_pin_mode2"]
Dict.Save()
if restart:
scheduler.stop()
DispatchRestart()
return
scheduler.setup_tasks()
scheduler.clear_task_data("MissingSubtitles")
set_refresh_menu_state(None)
Log.Debug("Validate Prefs called.")
# SZ config debug
Log.Debug("--- SZ Config-Debug ---")
for attr in [
"version", "app_support_path", "data_path", "data_items_path", "enable_agent",
"enable_channel", "permissions_ok", "missing_permissions", "fs_encoding",
"subtitle_destination_folder", "include", "include_exclude_paths", "include_exclude_sz_files",
"new_style_cache", "dbm_supported", "lang_list", "providers", "normal_subs", "forced_only", "forced_also",
"plex_transcoder", "refiner_settings", "unrar", "adv_cfg_path", "use_custom_dns",
"has_anticaptcha", "anticaptcha_cls"]:
value = getattr(config, attr)
if isinstance(value, dict):
d = replace_item(copy.deepcopy(value), "api_key", "xxxxxxxxxxxxxxxxxxxxxxxxx")
Log.Debug("config.%s: %s", attr, d)
continue
if attr in ("api_key",):
value = "xxxxxxxxxxxxxxxxxxxxxxxxx"
Log.Debug("config.%s: %s", attr, value)
for attr in ["plugin_log_path", "server_log_path"]:
value = getattr(config, attr)
if value:
access = os.access(value, os.R_OK)
if Core.runtime.os == "Windows":
try:
f = open(value, "r")
f.read(1)
f.close()
except:
access = False
Log.Debug("config.%s: %s (accessible: %s)", attr, value, access)
for attr in [
"subtitles.save.filesystem", ]:
Log.Debug("Pref.%s: %s", attr, Prefs[attr])
if "sonarr" in config.refiner_settings or "radarr" in config.refiner_settings:
Thread.Create(check_connections)
# fixme: check existance of and os access of logs
Log.Debug("----- Environment -----")
Log.Debug("Platform: %s", Core.runtime.platform)
Log.Debug("OS: %s", Core.runtime.os)
Log.Debug("Python: %s", platform.python_version())
for key, value in os.environ.iteritems():
if key.startswith("PLEX") or key.startswith("SZ_"):
if "TOKEN" in key:
outval = "xxxxxxxxxxxxxxxxxxx"
else:
outval = value
Log.Debug("%s: %s", key, outval)
Log.Debug("Locale: %s", locale.getdefaultlocale())
Log.Debug("-----------------------")
Log.Debug("Setting log-level to %s", Prefs["log_level"])
logger.register_logging_handler(DEPENDENCY_MODULE_NAMES, level=Prefs["log_level"])
Core.log.setLevel(logging.getLevelName(Prefs["log_level"]))
os.environ['U1pfT01EQl9LRVk'] = '789CF30DAC2C8B0AF433F5C9AD34290A712DF30D7135F12D0FB3E502006FDE081E'
return
-294
View File
@@ -1,294 +0,0 @@
# coding=utf-8
import traceback
import types
import datetime
import subprocess
import os
import operator
from func import enable_channel_wrapper, route_wrapper, register_route_function
from subzero.lib.io import get_viable_encoding
from subzero.language import Language
from support.i18n import is_localized_string, _
from support.items import get_kind, get_item_thumb, get_item, get_item_kind_from_item, refresh_item
from support.helpers import get_video_display_title, pad_title, display_language, quote_args, is_stream_forced, \
get_title_for_video_metadata, mswindows
from support.history import get_history
from support.ignore import get_decision_list
from support.lib import get_intent
from support.config import config
from subzero.constants import ICON_SUB, ICON
from support.plex_media import get_part, get_plex_metadata
from support.scheduler import scheduler
from support.scanning import scan_videos
from support.storage import save_subtitles
from subliminal_patch.subtitle import ModifiedSubtitle
default_thumb = R(ICON_SUB)
main_icon = ICON if not config.is_development else "icon-dev.jpg"
# noinspection PyUnboundLocalVariable
route = route_wrapper
# noinspection PyUnboundLocalVariable
handler = enable_channel_wrapper(handler)
def add_incl_excl_options(oc, kind, callback_menu=None, title=None, rating_key=None, add_kind=True):
"""
:param oc: oc to add our options to
:param kind: movie, show, episode ... - gets translated to the ignore key (sections, series, items)
:param callback_menu: menu to inject
:param title:
:param rating_key:
:return:
"""
# try to translate kind to the ignore key
use_kind = kind
ref_list = get_decision_list()
if kind not in ref_list:
use_kind = ref_list.translate_key(kind)
if not use_kind or use_kind not in ref_list:
return
in_list = rating_key in ref_list[use_kind]
include = ref_list.store == "include"
if include:
t = u"Enable Sub-Zero for %(kind)s \"%(title)s\""
if in_list:
t = u"Disable Sub-Zero for %(kind)s \"%(title)s\""
else:
t = u"Ignore %(kind)s \"%(title)s\""
if in_list:
t = u"Un-ignore %(kind)s \"%(title)s\""
oc.add(DirectoryObject(
key=Callback(callback_menu, kind=use_kind, sure=False, todo="not_set", rating_key=str(rating_key), title=title),
title=_(t,
kind=ref_list.verbose(kind) if add_kind else "",
title=unicode(title))
)
)
def dig_tree(oc, items, menu_callback, menu_determination_callback=None, force_rating_key=None, fill_args=None,
pass_kwargs=None, thumb=default_thumb):
for kind, title, key, dig_deeper, item in items:
thumb = get_item_thumb(item) or thumb
add_kwargs = {}
if fill_args:
add_kwargs = dict((name, getattr(item, k)) for k, name in fill_args.iteritems() if item and hasattr(item, k))
if pass_kwargs:
add_kwargs.update(pass_kwargs)
# force details view for show/season
summary = " " if kind in ("show", "season") else None
oc.add(DirectoryObject(
key=Callback(menu_callback or menu_determination_callback(kind, item, pass_kwargs=pass_kwargs), title=title,
rating_key=force_rating_key or key, **add_kwargs),
title=pad_title(title) if kind in ("show", "season") else title, thumb=thumb, summary=summary
))
return oc
def set_refresh_menu_state(state_or_media, media_type="movies"):
"""
:param state_or_media: string, None, or Media argument from Agent.update()
:param media_type: movies or series
:return:
"""
if not state_or_media:
# store it in last state and remove the current
Dict["last_refresh_state"] = Dict["current_refresh_state"]
Dict["current_refresh_state"] = None
Dict.Save()
return
if isinstance(state_or_media, types.StringTypes) or is_localized_string(state_or_media):
Dict["current_refresh_state"] = unicode(state_or_media)
Dict.Save()
return
media = state_or_media
media_id = media.id
title = None
if media_type == "series":
for season in media.seasons:
for episode in media.seasons[season].episodes:
ep = media.seasons[season].episodes[episode]
media_id = ep.id
title = get_video_display_title(_("show"), ep.title, parent_title=media.title, season=int(season), episode=int(episode))
else:
title = get_video_display_title(_("movie"), media.title)
intent = get_intent()
force_refresh = intent.get("force", media_id)
t = u"Refreshing %(title)s"
if force_refresh:
t = u"Force-refreshing %(title)s"
Dict["current_refresh_state"] = unicode(_(t,
title=unicode(title)))
Dict.Save()
def get_item_task_data(task_name, rating_key, language):
task_data = scheduler.get_task_data(task_name)
search_results = task_data.get(rating_key, {}) if task_data else {}
return search_results.get(language)
def debounce(func):
"""
prevent func from being called twice with the same arguments
:param func:
:return:
"""
func.debounce = True
return func
def extract_embedded_sub(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs.pop("part_id")
stream_index = kwargs.pop("stream_index")
with_mods = kwargs.pop("with_mods", False)
language = Language.fromietf(kwargs.pop("language"))
refresh = kwargs.pop("refresh", True)
set_current = kwargs.pop("set_current", True)
plex_item = kwargs.pop("plex_item", get_item(rating_key))
item_type = get_item_kind_from_item(plex_item)
part = kwargs.pop("part", get_part(plex_item, part_id))
scanned_videos = kwargs.pop("scanned_videos", None)
extract_mode = kwargs.pop("extract_mode", "a")
any_successful = False
if part:
if not scanned_videos:
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
scanned_videos = scan_videos([metadata], ignore_all=True, skip_hashing=True)
for stream in part.streams:
# subtitle stream
if str(stream.index) == stream_index:
is_forced = is_stream_forced(stream)
bn = os.path.basename(part.file)
set_refresh_menu_state(_(u"Extracting subtitle %(stream_index)s of %(filename)s",
stream_index=stream_index,
filename=bn))
Log.Info(u"Extracting stream %s (%s) of %s", stream_index, str(language), bn)
out_codec = stream.codec if stream.codec != "mov_text" else "srt"
args = [
config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", out_codec, "-"
]
cmdline = quote_args(args)
Log.Debug(u"Calling: %s", cmdline)
if mswindows:
Log.Debug("MSWindows: Fixing encoding")
cmdline = cmdline.encode("mbcs")
output = None
try:
output = subprocess.check_output(cmdline, stderr=subprocess.PIPE, shell=True)
except:
Log.Error("Extraction failed: %s", traceback.format_exc())
if output:
subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None)
subtitle.content = output
subtitle.provider_name = "embedded"
subtitle.id = "stream_%s" % stream_index
subtitle.score = 0
subtitle.set_encoding("utf-8")
# fixme: speedup video; only video.name is needed
video = scanned_videos.keys()[0]
save_successful = save_subtitles(scanned_videos, {video: [subtitle]}, mode="m",
set_current=set_current)
set_refresh_menu_state(None)
if save_successful and refresh:
refresh_item(rating_key)
# add item to history
item_title = get_title_for_video_metadata(video.plexapi_metadata,
add_section_title=False, add_episode_title=True)
history = get_history()
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
thumb=video.plexapi_metadata["super_thumb"],
subtitle=subtitle, mode=extract_mode)
history.destroy()
any_successful = True
return any_successful
class SZObjectContainer(ObjectContainer):
def __init__(self, *args, **kwargs):
skip_pin_lock = kwargs.pop("skip_pin_lock", False)
super(SZObjectContainer, self).__init__(*args, **kwargs)
if (config.lock_menu or config.lock_advanced_menu) and not config.pin_correct and not skip_pin_lock:
config.locked = True
def add(self, *args, **kwargs):
# disable self.add if we're in lockdown
container = args[0]
current_menu_target = container.key.split("?")[0]
is_pin_menu = current_menu_target.endswith("/pin")
if config.locked and config.lock_menu and not is_pin_menu:
return
return super(SZObjectContainer, self).add(*args, **kwargs)
OriginalObjectContainer = ObjectContainer
ObjectContainer = SZObjectContainer
class SubFolderObjectContainer(ObjectContainer):
def __init__(self, *args, **kwargs):
super(SubFolderObjectContainer, self).__init__(*args, **kwargs)
from interface.menu import fatality
from support.helpers import pad_title, timestamp
self.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title(_("<< Back to home")),
summary=_("Current state: %s; Last state: %s",
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
)
))
ObjectClass = getattr(getattr(Redirect, "_object_class"), "__bases__")[0]
class ZipObject(ObjectClass):
def __init__(self, data):
ObjectClass.__init__(self, "")
self.zipdata = data
self.SetHeader("Content-Type", "application/zip")
def Content(self):
self.SetHeader("Content-Disposition",
'attachment; filename="' + datetime.datetime.now().strftime("Logs_%y%m%d_%H-%M-%S.zip")
+ '"')
return self.zipdata
-32
View File
@@ -1,32 +0,0 @@
# coding=utf-8
from subzero.constants import PREFIX
from menu_helpers import debounce, set_refresh_menu_state, route
from support.items import refresh_item
from support.helpers import timestamp
from support.i18n import _
@route(PREFIX + '/item/refresh/{rating_key}/force', force=True)
@route(PREFIX + '/item/refresh/{rating_key}')
@debounce
def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=False, refresh_kind=None,
previous_rating_key=None, timeout=8000, randomize=None, trigger=True):
assert rating_key
from interface.main import fatality
header = " "
if trigger:
t = u"Triggering refresh for %(title)s"
if force:
u"Triggering forced refresh for %(title)s"
set_refresh_menu_state(_(t,
title=item_title))
Thread.Create(refresh_item, rating_key=rating_key, force=force, refresh_kind=refresh_kind,
parent_rating_key=previous_rating_key, timeout=int(timeout))
t = u"Refresh of item %(item_id)s triggered"
if force:
t = u"Forced refresh of item %(item_id)s triggered"
header = _(t,
item_id=rating_key)
return fatality(randomize=timestamp(), header=header, replace_parent=True)
-284
View File
@@ -1,284 +0,0 @@
# coding=utf-8
import traceback
import types
from subzero.language import Language
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, route
from subzero.modification import registry as mod_registry, SubtitleModifications
from subzero.constants import PREFIX
from support.plex_media import get_plex_metadata
from support.scanning import scan_videos
from support.helpers import timestamp, pad_title
from support.items import get_current_sub, set_mods_for_part
from support.i18n import _
@route(PREFIX + '/item/sub_mods/{rating_key}/{part_id}', force=bool)
def SubtitleModificationsMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
lang_instance = Language.fromietf(language)
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
kwargs.pop("randomize")
current_mods = current_sub.mods or []
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
from interface.item_details import SubtitleOptionsMenu
oc.add(DirectoryObject(
key=Callback(SubtitleOptionsMenu, randomize=timestamp(), **kwargs),
title=_(u"< Back to subtitle options for: %s", kwargs["title"]),
summary=unicode(kwargs["current_data"]),
thumb=default_thumb
))
for identifier, mod in mod_registry.mods.iteritems():
if mod.advanced:
continue
if mod.exclusive and identifier in current_mods:
continue
if mod.languages and lang_instance not in mod.languages:
continue
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=identifier, mode="add", randomize=timestamp(), **kwargs),
title=pad_title(_(mod.description)), summary=_(mod.long_description) or ""
))
fps_mod = SubtitleModifications.get_mod_class("change_FPS")
oc.add(DirectoryObject(
key=Callback(SubtitleFPSModMenu, randomize=timestamp(), **kwargs),
title=pad_title(_(fps_mod.description)), summary=_(fps_mod.long_description) or ""
))
shift_mod = SubtitleModifications.get_mod_class("shift_offset")
oc.add(DirectoryObject(
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
title=pad_title(_(shift_mod.description)), summary=_(shift_mod.long_description) or ""
))
color_mod = SubtitleModifications.get_mod_class("color")
oc.add(DirectoryObject(
key=Callback(SubtitleColorModMenu, randomize=timestamp(), **kwargs),
title=pad_title(_(color_mod.description)), summary=_(color_mod.long_description) or ""
))
if current_mods:
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=None, mode="remove_last", randomize=timestamp(), **kwargs),
title=pad_title(_("Remove last applied mod (%s)", current_mods[-1])),
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
))
oc.add(DirectoryObject(
key=Callback(SubtitleListMods, randomize=timestamp(), **kwargs),
title=pad_title(_("Manage applied mods")),
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods))
))
oc.add(DirectoryObject(
key=Callback(SubtitleReapplyMods, randomize=timestamp(), **kwargs),
title=pad_title(_("Reapply applied mods")),
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
))
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=None, mode="clear", randomize=timestamp(), **kwargs),
title=pad_title(_("Restore original version")),
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
))
storage.destroy()
return oc
@route(PREFIX + '/item/sub_mod_fps/{rating_key}/{part_id}', force=bool)
def SubtitleFPSModMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
item_type = kwargs["item_type"]
kwargs.pop("randomize")
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title=_("< Back to subtitle modification menu")
))
metadata = get_plex_metadata(rating_key, part_id, item_type)
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
video, plex_part = scanned_parts.items()[0]
target_fps = plex_part.fps
for fps in ["23.980", "23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
if float(fps) == float(target_fps):
continue
if float(fps) > float(target_fps):
indicator = _("subs constantly getting faster")
else:
indicator = _("subs constantly getting slower")
mod_ident = SubtitleModifications.get_mod_signature("change_FPS", **{"from": fps, "to": target_fps})
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
title=_("%(from_fps)s fps -> %(to_fps)s fps (%(slower_or_faster_indicator)s)",
from_fps=fps,
to_fps=target_fps,
slower_or_faster_indicator=indicator)
))
return oc
POSSIBLE_UNITS = (("ms", "milliseconds"), ("s", "seconds"), ("m", "minutes"), ("h", "hours"))
POSSIBLE_UNITS_D = dict(POSSIBLE_UNITS)
@route(PREFIX + '/item/sub_mod_shift_unit/{rating_key}/{part_id}', force=bool)
def SubtitleShiftModUnitMenu(**kwargs):
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
kwargs.pop("randomize")
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title=_("< Back to subtitle modifications")
))
for unit, title in POSSIBLE_UNITS:
oc.add(DirectoryObject(
key=Callback(SubtitleShiftModMenu, unit=unit, randomize=timestamp(), **kwargs),
title=_("Adjust by %(time_and_unit)s", time_and_unit=title)
))
return oc
@route(PREFIX + '/item/sub_mod_shift/{rating_key}/{part_id}/{unit}', force=bool)
def SubtitleShiftModMenu(unit=None, **kwargs):
if unit not in POSSIBLE_UNITS_D:
raise NotImplementedError
kwargs.pop("randomize")
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
oc.add(DirectoryObject(
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
title=_("< Back to unit selection")
))
rng = []
if unit == "h":
rng = list(reversed(range(-10, 0))) + list(reversed(range(1, 11)))
elif unit in ("m", "s"):
rng = list(reversed(range(-15, 0))) + list(reversed(range(1, 16)))
elif unit == "ms":
rng = list(reversed(range(-900, 0, 100))) + list(reversed(range(100, 1000, 100)))
for i in rng:
if i == 0:
continue
mod_ident = SubtitleModifications.get_mod_signature("shift_offset", **{unit: i})
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
title="%s %s" % (("%s" if i < 0 else "+%s") % i, unit)
))
return oc
@route(PREFIX + '/item/sub_mod_colors/{rating_key}/{part_id}', force=bool)
def SubtitleColorModMenu(**kwargs):
kwargs.pop("randomize")
color_mod = SubtitleModifications.get_mod_class("color")
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title=_("< Back to subtitle modification menu")
))
for color, code in color_mod.colors.iteritems():
mod_ident = SubtitleModifications.get_mod_signature("color", **{"name": color})
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
title="%s (%s)" % (color, code)
))
return oc
@route(PREFIX + '/item/sub_set_mods/{rating_key}/{part_id}/{mods}/{mode}', force=bool)
@debounce
def SubtitleSetMods(mods=None, mode=None, **kwargs):
if not isinstance(mods, types.ListType) and mods:
mods = [mods]
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
lang_a2 = kwargs["language"]
item_type = kwargs["item_type"]
language = Language.fromietf(lang_a2)
set_mods_for_part(rating_key, part_id, language, item_type, mods, mode=mode)
kwargs.pop("randomize")
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
@route(PREFIX + '/item/sub_reapply_mods/{rating_key}/{part_id}', force=bool)
@debounce
def SubtitleReapplyMods(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
lang_a2 = kwargs["language"]
item_type = kwargs["item_type"]
language = Language.fromietf(lang_a2)
set_mods_for_part(rating_key, part_id, language, item_type, [], mode="add")
kwargs.pop("randomize")
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
@route(PREFIX + '/item/sub_list_mods/{rating_key}/{part_id}', force=bool)
@debounce
def SubtitleListMods(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
kwargs.pop("randomize")
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title=_("< Back to subtitle modifications")
))
for identifier in current_sub.mods:
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=identifier, mode="remove", randomize=timestamp(), **kwargs),
title=_("Remove: %(mod_name)s", mod_name=identifier)
))
storage.destroy()
return oc
-45
View File
@@ -1,45 +0,0 @@
import logging
def register_logging_handler(dependencies, level="ERROR"):
plex_handler = PlexLoggerHandler()
for dependency in dependencies:
Log.Debug("Registering LoggerHandler for dependency: %s" % dependency)
log = logging.getLogger(dependency)
# remove previous plex logging handlers
# fixme: this is not the most elegant solution...
for handler in log.handlers:
if isinstance(handler, PlexLoggerHandler):
log.removeHandler(handler)
log.setLevel(level)
log.addHandler(plex_handler)
class PlexLoggerHandler(logging.StreamHandler):
def __init__(self, level=0):
super(PlexLoggerHandler, self).__init__(level)
def getFormattedString(self, record):
return record.name + ": " + record.getMessage()
def emit(self, record):
if record.levelno == logging.DEBUG:
Log.Debug(self.getFormattedString(record))
elif record.levelno == logging.INFO:
Log.Info(self.getFormattedString(record))
elif record.levelno == logging.WARNING:
Log.Warn(self.getFormattedString(record))
elif record.levelno == logging.ERROR:
Log.Error(self.getFormattedString(record))
elif record.levelno == logging.CRITICAL:
Log.Critical(self.getFormattedString(record))
elif record.levelno == logging.FATAL:
Log.Exception(self.getFormattedString(record))
else:
Log.Error("UNKNOWN LEVEL: %s", record.getMessage())
console_handler = logging.StreamHandler()
console_formatter = Framework.core.LogFormatter('%(asctime)-15s - %(name)-32s (%(thread)x) : %(levelname)s (%(module)s:%(lineno)d) - %(message)s')
console_handler.setFormatter(console_formatter)
-6
View File
@@ -1,6 +0,0 @@
License for parts taken out of plexinc-agents/LocalMedia.bundle
License
-------
If the software submitted to this repository accesses or calls any software provided by Plex (“Interfacing Software”), then as a condition for receiving services from Plex in response to such accesses or calls, you agree to grant and do hereby grant to Plex and its affiliates worldwide a worldwide, nonexclusive, and royalty-free right and license to use (including testing, hosting and linking to), copy, publicly perform, publicly display, reproduce in copies for distribution, and distribute the copies of any Interfacing Software made by you or with your assistance; provided, however, that you may notify Plex at legal@plex.tv if you do not wish for Plex to use, distribute, copy, publicly perform, publicly display, reproduce in copies for distribution, or distribute copies of an Interfacing Software that was created by you, and Plex will reasonable efforts to comply with such a request within a reasonable time.
-72
View File
@@ -1,72 +0,0 @@
import sys
# thanks, https://github.com/trakt/Plex-Trakt-Scrobbler/blob/master/Trakttv.bundle/Contents/Code/core/__init__.py
import config
sys.modules["support.config"] = config
import helpers
sys.modules["support.helpers"] = helpers
import lib
sys.modules["support.lib"] = lib
import i18n
sys.modules["support.i18n"] = i18n
helpers._ = i18n._
import plex_media
sys.modules["support.plex_media"] = plex_media
import localmedia
sys.modules["support.localmedia"] = localmedia
import subtitlehelpers
sys.modules["support.subtitlehelpers"] = subtitlehelpers
import items
sys.modules["support.items"] = items
import scheduler
sys.modules["support.scheduler"] = scheduler
import storage
sys.modules["support.storage"] = storage
import scanning
sys.modules["support.scanning"] = scanning
import missing_subtitles
sys.modules["support.missing_subtitles"] = missing_subtitles
import tasks
sys.modules["support.tasks"] = tasks
import ignore
sys.modules["support.ignore"] = ignore
import history
sys.modules["support.history"] = history
import data
sys.modules["support.data"] = data
import activities
sys.modules["support.activities"] = activities
import download
sys.modules["support.download"] = download
-132
View File
@@ -1,132 +0,0 @@
# coding=utf-8
from wraptor.decorators import throttle
from config import config
from items import get_item, get_item_kind_from_item, refresh_item
Activity = None
try:
from plex_activity import Activity
except ImportError:
pass
class PlexActivityManager(object):
def start(self):
activity_sources_enabled = None
if not Activity:
return
if config.plex_token:
from plex import Plex
Plex.configuration.defaults.authentication(config.plex_token)
activity_sources_enabled = ["websocket"]
Activity.on('websocket.playing', self.on_playing)
if activity_sources_enabled:
Activity.start(activity_sources_enabled)
@throttle(5, instance_method=True)
def on_playing(self, info):
# ignore non-playing states and anything too far in
if info["state"] != "playing" or info["viewOffset"] > 60000:
return
# don't trigger on the first hit ever
if "last_played_items" not in Dict:
Dict["last_played_items"] = []
Dict.Save()
return
rating_key = info["ratingKey"]
# only use integer based rating keys
try:
int(rating_key)
except ValueError:
return
if rating_key in Dict["last_played_items"] and rating_key != Dict["last_played_items"][0]:
# shift last played
Dict["last_played_items"].insert(0,
Dict["last_played_items"].pop(Dict["last_played_items"].index(rating_key)))
Dict.Save()
elif rating_key not in Dict["last_played_items"]:
# new playing; store last X recently played items
Dict["last_played_items"].insert(0, rating_key)
Dict["last_played_items"] = Dict["last_played_items"][:config.store_recently_played_amount]
Dict.Save()
if not config.react_to_activities:
return
debug_msg = "Started playing %s. Refreshing it." % rating_key
# todo: cleanup debug messages for hybrid-plus
keys_to_refresh = []
if config.activity_mode in ["refresh", "next_episode", "hybrid", "hybrid-plus"]:
# next episode or next episode and current movie
if config.activity_mode in ["next_episode", "hybrid", "hybrid-plus"]:
plex_item = get_item(rating_key)
if not plex_item:
Log.Warn("Can't determine media type of %s, skipping" % rating_key)
return
if get_item_kind_from_item(plex_item) == "episode":
next_ep = self.get_next_episode(rating_key)
if config.activity_mode == "hybrid-plus":
keys_to_refresh.append(rating_key)
if next_ep:
keys_to_refresh.append(next_ep.rating_key)
debug_msg = "Started playing %s. Refreshing next episode (%s, S%02iE%02i)." % \
(rating_key, next_ep.rating_key, int(next_ep.season.index), int(next_ep.index))
else:
if config.activity_mode in ("hybrid", "hybrid-plus"):
keys_to_refresh.append(rating_key)
elif config.activity_mode == "refresh":
keys_to_refresh.append(rating_key)
if keys_to_refresh:
Log.Debug(debug_msg)
Log.Debug("Refreshing %s", keys_to_refresh)
for key in keys_to_refresh:
refresh_item(key)
def get_next_episode(self, rating_key):
plex_item = get_item(rating_key)
if not plex_item:
return
if get_item_kind_from_item(plex_item) == "episode":
# get season
season = get_item(plex_item.season.rating_key)
if not season:
return
# determine next episode
# next episode is in the same season
if plex_item.index < season.episode_count:
# get next ep
for ep in season.children():
if ep.index == plex_item.index + 1:
return ep
# it's not, try getting the first episode of the next season
else:
# get show
show = get_item(plex_item.show.rating_key)
# is there a next season?
if season.index < show.season_count:
for other_season in show.children():
if other_season.index == season.index + 1:
next_season = other_season
for ep in next_season.children():
if ep.index == 1:
return ep
activity = PlexActivityManager()
-42
View File
@@ -1,42 +0,0 @@
# coding=utf-8
def refresh_plex_token():
username = Prefs["plex_username"]
password = Prefs["plex_password"]
if not username or not password:
if "token" in Dict:
del Dict["token"]
Dict.Save()
return
if "uuid" not in Dict:
Dict["uuid"] = String.UUID()
Dict.Save()
current_uuid = Dict["uuid"]
headers = {
'X-Plex-Device-Name': 'Sub-Zero',
'X-Plex-Product': 'Sub-Zero',
'X-Plex-Version': '1.3.0',
'X-Plex-Client-Identifier': "%s" % current_uuid,
}
request = HTTP.Request("https://plex.tv/users/sign_in.json", headers=headers,
values={'user[login]': Prefs["plex_username"], 'user[password]': Prefs["plex_password"]}, immediate=True)
token = None
if request:
try:
data = JSON.ObjectFromString(request.content)
token = data["user"]["authentication_token"]
log_data = data.copy()
log_data["user"]["authentication_token"] = "xxxxxxxxxxxxxxxxxx"
Log.Debug("Data returned from plex.tv: %s", log_data)
except:
pass
if token:
Dict["token"] = token
Dict.Save()
return True
File diff suppressed because it is too large Load Diff
-89
View File
@@ -1,89 +0,0 @@
# coding=utf-8
import traceback
def dispatch_migrate():
try:
migrate()
except:
Log.Error("Migration failed: %s" % traceback.format_exc())
del Dict["subs"]
Dict.Save()
def migrate():
"""
some Dict/Data migrations here, no need for a more in-depth migration path for now
:return:
"""
# migrate subtitle history from Dict to Data
if "history" in Dict and Dict["history"].get("history_items"):
Log.Debug("Running migration for history data")
from support.history import get_history
history = get_history()
for item in reversed(Dict["history"]["history_items"]):
history.add(item.item_title, item.rating_key, item.section_title, subtitle=item.subtitle, mode=item.mode,
time=item.time)
del Dict["history"]
history.destroy()
Dict.Save()
# migrate subtitle storage from Dict to Data
if "subs" in Dict:
from support.storage import get_subtitle_storage
from subzero.subtitle_storage import StoredSubtitle
from support.plex_media import get_item
subtitle_storage = get_subtitle_storage()
for video_id, parts in Dict["subs"].iteritems():
try:
item = get_item(video_id)
except:
continue
if not item:
continue
stored_subs = subtitle_storage.load_or_new(item)
stored_subs.version = 1
Log.Debug(u"Migrating %s" % video_id)
stored_any = False
for part_id, lang_dict in parts.iteritems():
part_id = str(part_id)
Log.Debug(u"Migrating %s, %s" % (video_id, part_id))
for lang, subs in lang_dict.iteritems():
lang = str(lang)
if "current" in subs:
current_key = subs["current"]
provider_name, subtitle_id = current_key
sub = subs.get(current_key)
if sub and sub.get("title") and sub.get("mode"): # ditch legacy data without sufficient info
stored_subs.title = sub["title"]
new_sub = StoredSubtitle(sub["score"], sub["storage"], sub["hash"], provider_name,
subtitle_id, date_added=sub["date_added"], mode=sub["mode"])
if part_id not in stored_subs.parts:
stored_subs.parts[part_id] = {}
if lang not in stored_subs.parts[part_id]:
stored_subs.parts[part_id][lang] = {}
Log.Debug(u"Migrating %s, %s, %s" % (video_id, part_id, current_key))
stored_subs.parts[part_id][lang][current_key] = new_sub
stored_subs.parts[part_id][lang]["current"] = current_key
stored_any = True
if stored_any:
subtitle_storage.save(stored_subs)
subtitle_storage.destroy()
del Dict["subs"]
Dict.Save()
-141
View File
@@ -1,141 +0,0 @@
# coding=utf-8
import os
from subzero.language import Language
import subliminal_patch as subliminal
from support.config import config
from support.helpers import audio_streams_match_languages
from subliminal_patch import compute_score
from support.plex_media import get_blacklist_from_part_map
from subzero.video import refine_video
from support.storage import get_pack_data, store_pack_data
def get_missing_languages(video, part):
languages_list = config.get_lang_list(ordered=True)
languages = set(languages_list)
valid_langs_in_media = set()
if Prefs["subtitles.when"] != "Always":
valid_langs_in_media = audio_streams_match_languages(video, languages_list)
languages = languages.difference(valid_langs_in_media)
if languages:
Log.Debug("Languages missing after taking the audio streams into account: %s" % languages)
if valid_langs_in_media and not languages:
Log.Debug("Skipping subtitle search for %s, audio streams are in correct language(s)",
video)
return set()
# should we treat IETF as alpha3? (ditch the country part)
alpha3_map = {}
if config.ietf_as_alpha3:
for language in languages:
if language.country:
alpha3_map[language.alpha3] = language.country
language.country = None
have_languages = video.subtitle_languages.copy()
if config.ietf_as_alpha3:
for language in have_languages:
if language.country:
alpha3_map[language.alpha3] = language.country
language.country = None
missing_languages = (languages - have_languages)
if config.any_language_is_enough != "Always search for all configured languages":
not_in_forced = "foreign" in config.any_language_is_enough
if "External or embedded subtitle" in config.any_language_is_enough:
langs = video.subtitle_languages if not not_in_forced else \
filter(lambda l: not l.forced, video.subtitle_languages)
if langs:
Log.Debug("We have at least one subtitle for any configured language.")
return False
elif "External subtitle" in config.any_language_is_enough:
langs = video.subtitle_languages if not not_in_forced else \
filter(lambda l: not l.forced, video.external_subtitle_languages)
if langs:
Log.Debug("We have at least one external subtitle for any configured language.")
return False
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
if not missing_languages or found_one_which_is_enough:
if found_one_which_is_enough:
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
else:
Log.Debug('All languages %r exist for %s', languages, video)
return False
# re-add country codes to the missing languages, in case we've removed them above
if config.ietf_as_alpha3:
for language in languages:
language.country = alpha3_map.get(language.alpha3, None)
return missing_languages
def pre_download_hook(subtitle):
if subtitle.is_pack:
# try retrieving the subtitle from a cached pack archive
pack_data = get_pack_data(subtitle)
if pack_data:
subtitle.pack_data = pack_data
def post_download_hook(subtitle):
# if a new pack was downloaded, store it in the cache; providers' download method is responsible for
# setting subtitle.pack_data to None in case the cached pack data we provided was successfully used
if subtitle.is_pack and subtitle.pack_data:
# store pack data in cache
store_pack_data(subtitle, subtitle.pack_data)
# may be redundant
subtitle.pack_data = None
def language_hook(provider):
return config.get_lang_list(provider=provider)
def download_best_subtitles(video_part_map, min_score=0, throttle_time=None, providers=None):
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
languages = set([Language.rebuild(l) for l in config.lang_list])
missing_languages = []
if not languages:
return
use_videos = []
for video, part in video_part_map.iteritems():
if not video.ignore_all:
missing_languages = get_missing_languages(video, part)
else:
missing_languages = languages
if missing_languages:
Log.Info(u"%s has missing languages: %s", os.path.basename(video.name), missing_languages)
refine_video(video, refiner_settings=config.refiner_settings)
use_videos.append(video)
# prepare blacklist
blacklist = get_blacklist_from_part_map(video_part_map, languages)
if use_videos and missing_languages:
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s, languages: %s" %
(min_score, hearing_impaired, missing_languages))
return subliminal.download_best_subtitles(set(use_videos), missing_languages, min_score, hearing_impaired,
providers=providers or config.providers,
provider_configs=config.provider_settings,
pool_class=config.provider_pool,
compute_score=compute_score, throttle_time=throttle_time,
blacklist=blacklist, throttle_callback=config.provider_throttle,
pre_download_hook=pre_download_hook,
post_download_hook=post_download_hook,
language_hook=language_hook)
Log.Debug("All languages for all requested videos exist. Doing nothing.")
-453
View File
@@ -1,453 +0,0 @@
# coding=utf-8
import os
import traceback
import types
import unicodedata
import datetime
import urllib
import time
import re
import platform
import subprocess
import sys
from collections import OrderedDict
from babelfish.exceptions import LanguageError
import chardet
from bs4 import UnicodeDammit
from subzero.language import Language, language_from_stream
from subzero.analytics import track_event
mswindows = (sys.platform == "win32")
if mswindows:
from subprocess import list2cmdline
quote_args = list2cmdline
else:
# POSIX
from pipes import quote
def quote_args(seq):
return ' '.join(quote(arg) for arg in seq)
# Unicode control characters can appear in ID3v2 tags but are not legal in XML.
RE_UNICODE_CONTROL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \
u'|' + \
u'([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])' % \
(
unichr(0xd800), unichr(0xdbff), unichr(0xdc00), unichr(0xdfff),
unichr(0xd800), unichr(0xdbff), unichr(0xdc00), unichr(0xdfff),
unichr(0xd800), unichr(0xdbff), unichr(0xdc00), unichr(0xdfff)
)
def cast_bool(value):
return str(value).strip() in ("true", "True")
def cast_int(value, default=None):
try:
return int(value)
except ValueError:
return default
# A platform independent way to split paths which might come in with different separators.
def split_path(str):
if str.find('\\') != -1:
return str.split('\\')
else:
return str.split('/')
def unicodize(s):
filename = s
try:
filename = unicodedata.normalize('NFC', unicode(s.decode('utf-8')))
except:
Log('Failed to unicodize: ' + repr(filename))
try:
filename = re.sub(RE_UNICODE_CONTROL, '', filename)
except:
Log('Couldn\'t strip control characters: ' + repr(filename))
return filename
def force_unicode(s):
if not isinstance(s, types.UnicodeType):
try:
s = s.decode("utf-8")
except UnicodeDecodeError:
t = chardet.detect(s)
try:
s = s.decode(t["encoding"])
except UnicodeDecodeError:
s = UnicodeDammit(s).unicode_markup
return s
def clean_filename(filename):
# this will remove any whitespace and punctuation chars and replace them with spaces, strip and return as lowercase
return string.translate(filename.encode('utf-8'), string.maketrans(string.punctuation + string.whitespace,
' ' * len(
string.punctuation + string.whitespace))).strip().lower()
def is_recent(t):
now = datetime.datetime.now()
when = datetime.datetime.fromtimestamp(t)
value, key = Prefs["scheduler.item_is_recent_age"].split()
if now - datetime.timedelta(**{key: int(value)}) < when:
return True
return False
# thanks, Plex-Trakt-Scrobbler
def str_pad(s, length, align='left', pad_char=' ', trim=False):
if not s:
return s
if not isinstance(s, (str, unicode)):
s = str(s)
if len(s) == length:
return s
elif len(s) > length and not trim:
return s
if align == 'left':
if len(s) > length:
return s[:length]
else:
return s + (pad_char * (length - len(s)))
elif align == 'right':
if len(s) > length:
return s[len(s) - length:]
else:
return (pad_char * (length - len(s))) + s
else:
raise ValueError("Unknown align type, expected either 'left' or 'right'")
def pad_title(value, width=49):
"""Pad a title to 30 characters to force the 'details' view."""
return str_pad(value, width, pad_char=' ')
def get_plex_item_display_title(item, kind, parent=None, parent_title=None, section_title=None,
add_section_title=False):
"""
:param item: plex item
:param kind: show or movie
:param parent: season or None
:param parent_title: parentTitle or None
:return:
"""
return get_video_display_title(kind, item.title,
section_title=(
section_title or (parent.section.title if parent and getattr(parent, "section")
else None)),
parent_title=(parent_title or (parent.show.title if parent else None)),
season=parent.index if parent else None,
episode=item.index if kind == "show" else None,
add_section_title=add_section_title)
def series_num(v):
try:
return int(v)
except (TypeError, ValueError):
pass
def get_video_display_title(kind, title, section_title=None, parent_title=None, season=None, episode=None,
add_section_title=False):
section_add = ""
if add_section_title:
section_add = ("%s: " % section_title) if section_title else ""
if kind in ("season", "show") and parent_title:
if series_num(season) is not None and series_num(episode) is not None:
return '%s%s S%02dE%02d%s' % (section_add, parent_title, season or 0, episode or 0,
(", %s" % title if title else ""))
elif series_num(season) is not None:
return '%s%s S%02d%s' % (section_add, parent_title, season or 0,
(", %s" % title if title else ""))
return '%s%s%s' % (section_add, parent_title, (", %s" % title if title else ""))
return "%s%s" % (section_add, title)
def get_title_for_video_metadata(metadata, add_section_title=True, add_episode_title=False):
"""
:param metadata:
:param add_section_title:
:param add_episode_title: add the episode's title if its an episode else always add title
:return:
"""
# compute item title
add_title = (add_episode_title and metadata["series_id"]) or not metadata["series_id"]
return get_video_display_title(
"show" if metadata["series_id"] else "movie",
metadata["title"] if add_title else "",
parent_title=metadata.get("series", None),
season=metadata.get("season", None),
episode=metadata.get("episode", None),
section_title=metadata.get("section", None),
add_section_title=add_section_title
)
def get_identifier():
identifier = None
try:
identifier = Platform.MachineIdentifier
except:
pass
if not identifier:
identifier = String.UUID()
return Hash.SHA1(identifier + "SUBZEROOOOOOOOOO")
def encode_message(base, s):
return "%s?message=%s" % (base, urllib.quote_plus(s))
def decode_message(s):
return urllib.unquote_plus(s)
def timestamp():
return int(time.time()*1000)
def df(d):
return d.strftime("%Y-%m-%d %H:%M:%S") if d else "legacy data"
def query_plex(url, args):
"""
simple http query to the plex API without parsing anything too complicated
:param url:
:param args:
:return:
"""
use_args = args.copy()
computed_args = "&".join(["%s=%s" % (key, String.Quote(value)) for key, value in use_args.iteritems()])
return HTTP.Request(url + ("?%s" % computed_args) if computed_args else "", immediate=True)
def check_write_permissions(path):
if platform.system() == "Windows":
# physical access check
check_path = os.path.join(os.path.realpath(path), ".sz_perm_chk")
try:
if os.path.exists(check_path):
os.rmdir(check_path)
os.mkdir(check_path)
os.rmdir(check_path)
return True
except OSError:
pass
else:
# os.access check
return os.access(path, os.W_OK | os.X_OK)
return False
def get_item_hints(data):
"""
:param data: video item dict of media_to_videos
:return:
"""
hints = {"title": data["original_title"] or data["title"], "type": "movie"}
if data["type"] == "episode":
hints.update(
{
"type": "episode",
"episode_title": data["title"],
"title": data["original_title"] or data["series"],
}
)
if hints["title"]:
hints["title"] = hints["title"].replace(":", "")
return hints
def notify_executable(exe_info, videos, subtitles, storage):
variables = (
"subtitle_language", "subtitle_path", "subtitle_filename", "provider", "score", "storage", "series_id",
"series", "title", "section", "filename", "path", "folder", "season_id", "type", "id", "season"
)
to_clean = ("PYTHONPATH", "PYTHONHOME")
exe, arguments = exe_info
for video, video_subtitles in subtitles.items():
for subtitle in video_subtitles:
lang = str(subtitle.language)
data = video.plexapi_metadata.copy()
data.update({
"subtitle_language": lang,
"provider": subtitle.provider_name,
"score": subtitle.score,
"storage": storage,
"subtitle_path": subtitle.storage_path,
"subtitle_filename": os.path.basename(subtitle.storage_path)
})
# fill missing data with None
prepared_data = dict((v, data.get(v)) for v in variables)
prepared_arguments = [arg % prepared_data for arg in arguments]
Log.Debug(u"Calling %s with arguments: %s" % (exe, prepared_arguments))
if not mswindows:
env_path = {"PATH": os.pathsep.join(
[
"/usr/local/bin",
"/usr/bin",
os.environ.get("PATH", "")
]
)
}
env = dict(os.environ, **env_path)
env.pop("LD_LIBRARY_PATH", None)
else:
env = dict(os.environ)
# clean out any Plex-PYTHONPATH that may bleed through the spawned process
for v in to_clean:
if v in env and "plex" in env[v].lower():
del env[v]
try:
proc = subprocess.Popen(quote_args([exe] + prepared_arguments), stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True, env=env, cwd=os.path.dirname(exe))
output, errors = proc.communicate()
if proc.returncode == 1:
Log.Error(u"Calling %s with args %s failed: output:\n%s, error:\n%s", exe, prepared_arguments,
output, errors)
return
output = output.decode()
except:
Log.Error(u"Calling %s failed: %s", exe, traceback.format_exc())
else:
Log.Debug(u"Process output: %s", output)
def track_usage(category=None, action=None, label=None, value=None):
if not cast_bool(Prefs["track_usage"]):
return
if "last_tracked" not in Dict:
Dict["last_tracked"] = OrderedDict()
Dict.Save()
event_key = (category, action, label, value)
now = datetime.datetime.now()
if event_key in Dict["last_tracked"] and (Dict["last_tracked"][event_key] + datetime.timedelta(minutes=30)) < now:
return
Dict["last_tracked"][event_key] = now
# maintenance
for key, value in Dict["last_tracked"].copy().iteritems():
# kill day old values
if value < now - datetime.timedelta(days=1):
try:
del Dict["last_tracked"][key]
except:
pass
try:
Thread.Create(dispatch_track_usage, category, action, label, value,
identifier=Dict["anon_id"], first_use=Dict["first_use"],
add=Network.PublicAddress)
except:
Log.Debug("Something went wrong when reporting anonymous user statistics: %s", traceback.format_exc())
def dispatch_track_usage(*args, **kwargs):
identifier = kwargs.pop("identifier")
first_use = kwargs.pop("first_use")
add = kwargs.pop("add")
try:
track_event(identifier=identifier, first_use=first_use, add=add, *[str(a) for a in args])
except:
Log.Debug("Something went wrong when reporting anonymous user statistics: %s", traceback.format_exc())
def get_language_from_stream(lang_code):
if lang_code:
lang = Locale.Language.Match(lang_code)
if lang and lang != "xx":
# Log.Debug("Found language: %r", lang)
return Language.fromietf(lang)
elif lang:
try:
return language_from_stream(lang)
except LanguageError:
pass
def audio_streams_match_languages(video, languages):
without_forced = filter(lambda x: not x.forced, languages)
if video.audio_languages and without_forced:
if Prefs["subtitles.when"] == "Always":
return set()
elif Prefs["subtitles.when"] == "When main audio stream is not Subtitle Language (1)":
if video.audio_languages[0] == without_forced[0]:
return set(without_forced)
elif Prefs["subtitles.when"] == "When any audio stream is not Subtitle Language (1)":
if without_forced[0] in video.audio_languages:
return set(without_forced)
elif Prefs["subtitles.when"] == "When main audio stream is not any configured language":
if video.audio_languages[0] in without_forced:
return set(without_forced)
elif Prefs["subtitles.when"] == "When any audio stream is not any configured language":
matching = set(video.audio_languages).intersection(set(without_forced))
if matching:
return set(without_forced)
# if Prefs["subtitles.when_forced"] in [
# "Always",
# "Only for Subtitle Language (1)",
# "Only for Subtitle Language (2)",
# "Only for Subtitle Language (3)"
# ]:
return set()
def get_language(lang_short):
return Language.fromietf(lang_short)
def display_language(l):
return _(str(l.basename).lower()) + ((u" (%s)" % _("forced")) if l.forced else "")
def is_stream_forced(stream):
stream_title = getattr(stream, "title", "") or ""
forced = getattr(stream, "forced", False)
if not forced and stream_title and "forced" in stream_title.strip().lower():
forced = True
return forced
class PartUnknownException(Exception):
pass
-4
View File
@@ -1,4 +0,0 @@
# coding=utf-8
from subzero.history_storage import SubtitleHistory
get_history = lambda: SubtitleHistory(Data, Thread, int(Prefs["history_size"]))
-109
View File
@@ -1,109 +0,0 @@
# coding=utf-8
import inspect
from support.config import config
core = getattr(Data, "_core")
# get original localization module in order to access its base classes later on
def get_localization_module():
cls = getattr(core.localization, "__class__")
return inspect.getmodule(cls)
plex_i18n_module = get_localization_module()
def old_style_placeholders_count(s):
# fixme: incomplete, use regex
return sum(s.count(c) for c in ["%s", "%d", "%r", "%f", "%i"])
def check_old_style_placeholders(k, args):
# replace escaped %'s?
k = k.__str__().replace("%%", "")
if "%(" in k:
Log.Error(u"%r defines named placeholders for formatting" % k)
return "NEEDS NAMED ARGUMENTS"
placeholders_found = old_style_placeholders_count(k)
if placeholders_found and not args:
Log.Error(u"%r requires a arguments for formatting" % k)
return "NEEDS FORMAT ARGUMENTS"
elif not placeholders_found and args:
Log.Error(u"%r doesn't define placeholders for formatting" % k)
return "HAS NO FORMAT ARGUMENTS"
elif placeholders_found and placeholders_found != len(args):
Log.Error(u"%r wrong amount of arguments supplied for formatting" % k)
return "WRONG FORMAT ARGUMENT COUNT"
class SmartLocalStringFormatter(plex_i18n_module.LocalStringFormatter):
"""
this allows the use of dictionaries for string formatting, also does some sanity checking on the keys and values
"""
def __init__(self, string1, string2, locale=None):
if isinstance(string2, tuple):
# dictionary passed
if len(string2) == 1 and hasattr(string2[0], "iteritems"):
string2 = string2[0]
if config.debug_i18n:
if "%(" not in string1.__str__().replace("%%", ""):
Log.Error(u"%r: dictionary for non-named format string supplied" % string1.__str__())
string1 = "%s"
string2 = "NO NAMED ARGUMENTS"
# arguments
elif len(string2) >= 1 and config.debug_i18n:
msg = check_old_style_placeholders(string1, string2)
if msg:
string1 = "%s"
string2 = msg
setattr(self, "_string1", string1)
setattr(self, "_string2", string2)
setattr(self, "_locale", locale)
def local_string_with_optional_format(key, *args, **kwargs):
if kwargs:
args = (kwargs,)
else:
args = tuple(args)
if args:
# fixme: may not be the best idea as this evaluates the string early
try:
return unicode(SmartLocalStringFormatter(plex_i18n_module.LocalString(core, key, Locale.CurrentLocale), args))
except (TypeError, ValueError):
Log.Exception("Broken translation!")
Log.Debug("EN string: %s", plex_i18n_module.LocalString(core, key, "en"))
Log.Debug("%s string: %r", Locale.CurrentLocale,
unicode(plex_i18n_module.LocalString(core, key, Locale.CurrentLocale)))
return unicode(SmartLocalStringFormatter(plex_i18n_module.LocalString(core, key, "en"), args))
# check string instances for arguments
if config.debug_i18n:
msg = check_old_style_placeholders(key, args)
if msg:
return msg
try:
return unicode(plex_i18n_module.LocalString(core, key, Locale.CurrentLocale))
except TypeError:
Log.Exception("Broken translation!")
return unicode(plex_i18n_module.LocalString(core, key, "en"))
_ = local_string_with_optional_format
def is_localized_string(s):
return hasattr(s, "localize")
-75
View File
@@ -1,75 +0,0 @@
# coding=utf-8
from subzero.lib.dict import DictProxy
from config import config
class ExcludeDict(DictProxy):
store = "ignore"
# single item keys returned by helpers.items.getItems mapped to their parents
translate_keys = {
"section": "sections",
"show": "series",
"movie": "videos",
"episode": "videos",
"season": "seasons",
}
# getItems types mapped to their verbose names
keys_verbose = {
"sections": "Section",
"series": "Series",
"videos": "Item",
"seasons": "Season",
}
key_order = ("sections", "series", "videos", "seasons")
def __len__(self):
try:
return sum(len(self.Dict[self.store][key]) for key in self.key_order)
except KeyError:
# old version
self.Dict[self.store] = self.setup_defaults()
return 0
def translate_key(self, name):
return self.translate_keys.get(name)
def verbose(self, name):
return self.keys_verbose.get(self.translate_key(name) or name)
def get_title_key(self, kind, key):
return "%s_%s" % (kind, key)
def add_title(self, kind, key, title):
self["titles"][self.get_title_key(kind, key)] = title
def remove_title(self, kind, key):
title_key = self.get_title_key(kind, key)
if title_key in self.titles:
del self.titles[title_key]
def get_title(self, kind, key):
title_key = self.get_title_key(kind, key)
if title_key in self.titles:
return self.titles[title_key]
def save(self):
Dict.Save()
def setup_defaults(self):
return {"sections": [], "series": [], "videos": [], "titles": {}, "seasons": []}
class IncludeDict(ExcludeDict):
store = "include"
exclude_list = ExcludeDict(Dict)
include_list = IncludeDict(Dict)
def get_decision_list():
return include_list if config.include else exclude_list
-479
View File
@@ -1,479 +0,0 @@
# coding=utf-8
import logging
import re
import traceback
import types
import os
import time
import datetime
from ignore import get_decision_list
from helpers import is_recent, get_plex_item_display_title, query_plex, PartUnknownException
from lib import Plex, get_intent
from config import config
from subliminal_patch.subtitle import ModifiedSubtitle
from subzero.modification import registry as mod_registry, SubtitleModifications
from socket import timeout
logger = logging.getLogger(__name__)
MI_KIND, MI_TITLE, MI_KEY, MI_DEEPER, MI_ITEM = 0, 1, 2, 3, 4
container_size_re = re.compile(ur'totalSize="(\d+)"')
def get_item(key):
try:
item_id = int(key)
except ValueError:
return
try:
item_container = Plex["library"].metadata(item_id)
except timeout:
Log.Debug("PMS API timed out when querying information about item %d", item_id)
return
try:
return list(item_container)[0]
except:
pass
def get_item_kind(item):
return type(item).__name__
PLEX_API_TYPE_MAP = {
"Show": "series",
"Season": "season",
"Episode": "episode",
"Movie": "movie",
}
def get_item_kind_from_rating_key(key):
item = get_item(key)
return PLEX_API_TYPE_MAP.get(get_item_kind(item))
def get_item_kind_from_item(item):
return PLEX_API_TYPE_MAP.get(get_item_kind(item))
def get_item_title(item):
kind = get_item_kind_from_item(item)
if kind not in ("episode", "movie", "season", "series"):
return
if kind == "episode":
return get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
parent_title=item.show.title)
elif kind == "season":
return get_plex_item_display_title(item, "season", parent=item.show, section_title="Season",
parent_title=item.show.title)
else:
return get_plex_item_display_title(item, kind, section_title=None)
def get_item_thumb(item):
kind = get_item_kind(item)
if kind == "Episode":
return item.show.thumb
elif kind == "Section":
return item.art
return item.thumb
def get_items_info(items):
return items[0][MI_KIND], items[0][MI_DEEPER]
def get_kind(items):
return items[0][MI_KIND]
def get_section_size(key):
"""
quick query to determine the section size
:param key:
:return:
"""
size = None
url = "http://127.0.0.1:32400/library/sections/%s/all" % int(key)
use_args = {
"X-Plex-Container-Size": "0",
"X-Plex-Container-Start": "0"
}
response = query_plex(url, use_args)
matches = container_size_re.findall(response.content)
if matches:
size = int(matches[0])
return size
def get_items(key="recently_added", base="library", value=None, flat=False, add_section_title=False):
"""
try to handle all return types plex throws at us and return a generalized item tuple
"""
items = []
apply_value = None
if value:
if isinstance(value, types.ListType):
apply_value = value
else:
apply_value = [value]
result = getattr(Plex[base], key)(*(apply_value or []))
for item in result:
cls = getattr(getattr(item, "__class__"), "__name__")
if hasattr(item, "scanner"):
kind = "section"
elif cls == "Directory":
kind = "directory"
else:
kind = item.type
# only return items for our enabled sections
section_key = None
if kind == "section":
section_key = item.key
else:
if hasattr(item, "section_key"):
section_key = getattr(item, "section_key")
if section_key and section_key not in config.enabled_sections:
continue
if kind == "season":
# fixme: i think this case is unused now
if flat:
# return episodes
for child in item.children():
items.append(("episode", get_plex_item_display_title(child, "show", parent=item, add_section_title=add_section_title), int(item.rating_key),
False, child))
else:
# return seasons
items.append(("season", item.title, int(item.rating_key), True, item))
elif kind == "directory":
items.append(("directory", item.title, item.key, True, item))
elif kind == "section":
if item.type in ['movie', 'show']:
item.size = get_section_size(item.key)
items.append(("section", item.title, int(item.key), True, item))
elif kind == "episode":
items.append(
(kind, get_plex_item_display_title(item, "show", parent=item.season, parent_title=item.show.title, section_title=item.section.title,
add_section_title=add_section_title), int(item.rating_key), False, item))
elif kind in ("movie", "artist", "photo"):
items.append((kind, get_plex_item_display_title(item, kind, section_title=item.section.title, add_section_title=add_section_title),
int(item.rating_key), False, item))
elif kind == "show":
items.append((
kind, get_plex_item_display_title(item, kind, section_title=item.section.title, add_section_title=add_section_title), int(item.rating_key), True,
item))
return items
def get_recent_items():
"""
actually get the recent items, not limited like /library/recentlyAdded
:return:
"""
args = {
"sort": "addedAt:desc",
"X-Plex-Container-Start": "0",
"X-Plex-Container-Size": "%s" % config.max_recent_items_per_library
}
episode_re = re.compile(ur'(?su)ratingKey="(?P<key>\d+)"'
ur'.+?grandparentRatingKey="(?P<parent_key>\d+)"'
ur'.+?title="(?P<title>.*?)"'
ur'.+?grandparentTitle="(?P<parent_title>.*?)"'
ur'.+?index="(?P<episode>\d+?)"'
ur'.+?parentIndex="(?P<season>\d+?)".+?addedAt="(?P<added>\d+)"'
ur'.+?<Part.+? file="(?P<filename>[^"]+?)"')
movie_re = re.compile(ur'(?su)ratingKey="(?P<key>\d+)".+?title="(?P<title>.*?)'
ur'".+?addedAt="(?P<added>\d+)"'
ur'.+?<Part.+? file="(?P<filename>[^"]+?)"')
available_keys = ("key", "title", "parent_key", "parent_title", "season", "episode", "added", "filename")
recent = []
ref_list = get_decision_list()
for section in Plex["library"].sections():
if section.type not in ("movie", "show") \
or section.key not in config.enabled_sections \
or ((section.key not in ref_list.sections and config.include)
or (section.key in ref_list.sections and not config.include)):
Log.Debug(u"Skipping section: %s" % section.title)
continue
use_args = args.copy()
plex_item_type = "Movie"
if section.type == "show":
use_args["type"] = "4"
plex_item_type = "Episode"
url = "http://127.0.0.1:32400/library/sections/%s/all" % int(section.key)
response = query_plex(url, use_args)
matcher = episode_re if section.type == "show" else movie_re
matches = [m.groupdict() for m in matcher.finditer(response.content)]
for match in matches:
data = dict((key, match[key] if key in match else None) for key in available_keys)
if section.type == "show" and ((data["parent_key"] not in ref_list.series and config.include) or
(data["parent_key"] in ref_list.series and not config.include)):
Log.Debug(u"Skipping series: %s" % data["parent_title"])
continue
if (data["key"] not in ref_list.videos and config.include) or \
(data["key"] in ref_list.videos and not config.include):
Log.Debug(u"Skipping item: %s" % data["title"])
continue
if not is_physically_wanted(data["filename"], plex_item_type):
Log.Debug(u"Skipping item (physically not wanted): %s" % data["title"])
continue
if is_recent(int(data["added"])):
recent.append((int(data["added"]), section.type, section.title, data["key"]))
return recent
def get_on_deck_items():
return get_items(key="on_deck", add_section_title=True)
def get_recently_added_items():
return get_items(key="recently_added", add_section_title=True, flat=False)
def get_all_items(key, base="library", value=None, flat=False):
return get_items(key, base=base, value=value, flat=flat)
def is_wanted(rating_key, item=None):
"""
check whether an item, its show/season/section is in the soft or the hard ignore list
:param rating_key:
:param item:
:return:
"""
ref_list = get_decision_list()
ret_val = ref_list.store == "include"
inc_exc_verbose = "exclude" if not ret_val else "include"
# item in soft include/exclude list
if ref_list["videos"] and rating_key in ref_list["videos"]:
Log.Debug("Item %s is in the soft %s list" % (rating_key, inc_exc_verbose))
return ret_val
item = item or get_item(rating_key)
kind = get_item_kind(item)
# show in soft include/exclude list
if kind == "Episode" and ref_list["series"] and item.show.rating_key in ref_list["series"]:
Log.Debug("Item %s's show is in the soft %s list" % (rating_key, inc_exc_verbose))
return ret_val
# season in soft include/exclude list
if kind == "Episode" and ref_list["seasons"] and item.season.rating_key in ref_list["seasons"]:
Log.Debug("Item %s's season is in the soft %s list" % (rating_key, inc_exc_verbose))
return ret_val
# section in soft include/exclude list
if ref_list["sections"] and item.section.key in ref_list["sections"]:
Log.Debug("Item %s's section is in the soft %s list" % (rating_key, inc_exc_verbose))
return ret_val
# physical/path include/exclude
if config.include_exclude_sz_files or config.include_exclude_paths:
for media in item.media:
for part in media.parts:
return is_physically_wanted(part.file, kind)
return not ret_val
def is_physically_wanted(fn, kind):
if config.include_exclude_sz_files or config.include_exclude_paths:
# normally check current item folder and the library
check_paths = [".", "../"]
if kind == "Episode":
# series/episode, we've got a season folder here, also
check_paths.append("../../")
wanted_results = []
if config.include_exclude_sz_files:
for sub_path in check_paths:
wanted_results.append(config.is_physically_wanted(
os.path.normpath(os.path.join(os.path.dirname(fn), sub_path)), fn))
if config.include_exclude_paths:
wanted_results.append(config.is_path_wanted(fn))
if config.include and any(wanted_results):
return True
elif not config.include and not all(wanted_results):
return False
return not config.include
def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, parent_rating_key=None):
intent = get_intent()
# timeout actually is the time for which the intent will be valid
if force:
Log.Debug("Setting intent for force-refresh of %s to timeout: %s", rating_key, timeout)
intent.set("force", rating_key, timeout=timeout)
# force Dict.Save()
intent.store.save()
refresh = [rating_key]
if refresh_kind == "season":
# season refresh, needs explicit per-episode refresh
refresh = [item.rating_key for item in list(Plex["library/metadata"].children(int(rating_key)))]
multiple = len(refresh) > 1
for key in refresh:
Log.Info("%s item %s", "Refreshing" if not force else "Forced-refreshing", key)
Plex["library/metadata"].refresh(key)
if multiple:
Thread.Sleep(10.0)
def get_current_sub(rating_key, part_id, language, plex_item=None):
from support.storage import get_subtitle_storage
item = plex_item or get_item(rating_key)
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load_or_new(item)
current_sub = stored_subs.get_any(part_id, language)
return current_sub, stored_subs, subtitle_storage
def save_stored_sub(stored_subtitle, rating_key, part_id, language, item_type, plex_item=None, storage=None,
stored_subs=None):
"""
in order for this to work, if the calling supplies stored_subs and storage, it has to trigger its saving and
destruction explicitly
:param stored_subtitle:
:param rating_key:
:param part_id:
:param language:
:param item_type:
:param plex_item:
:param storage:
:param stored_subs:
:return:
"""
from support.plex_media import get_plex_metadata
from support.scanning import scan_videos
from support.storage import save_subtitles, get_subtitle_storage
plex_item = plex_item or get_item(rating_key)
stored_subs_was_provided = True
if not stored_subs or not storage:
storage = get_subtitle_storage()
stored_subs = storage.load(plex_item.rating_key)
stored_subs_was_provided = False
if not all([plex_item, stored_subs]):
return
try:
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
except PartUnknownException:
return
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
video, plex_part = scanned_parts.items()[0]
subtitle = ModifiedSubtitle(language, mods=stored_subtitle.mods)
subtitle.content = stored_subtitle.content
if stored_subtitle.encoding:
# thanks plex
setattr(subtitle, "_guessed_encoding", stored_subtitle.encoding)
if stored_subtitle.encoding != "utf-8":
subtitle.normalize()
stored_subtitle.content = subtitle.content
stored_subtitle.encoding = "utf-8"
storage.save(stored_subs)
subtitle.plex_media_fps = plex_part.fps
subtitle.page_link = stored_subtitle.id
subtitle.language = language
subtitle.id = stored_subtitle.id
try:
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
stored_subtitle.mods = subtitle.mods
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
", ".join(subtitle.mods) if subtitle.mods else "none")
except:
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
if subtitle.storage_path:
stored_subtitle.last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
if not stored_subs_was_provided:
storage.save(stored_subs)
storage.destroy()
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
plex_item = get_item(rating_key)
if not plex_item:
return
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language, plex_item=plex_item)
if mode == "add":
for mod in mods:
identifier, args = SubtitleModifications.parse_identifier(mod)
mod_class = SubtitleModifications.get_mod_class(identifier)
if identifier not in mod_registry.mods_available:
raise NotImplementedError("Mod unknown or not registered")
# clean exclusive mods
if mod_class.exclusive and current_sub.mods:
for current_mod in current_sub.mods[:]:
if current_mod.startswith(identifier):
current_sub.mods.remove(current_mod)
Log.Info("Removing superseded mod %s" % current_mod)
current_sub.add_mod(mod)
elif mode == "clear":
current_sub.add_mod(None)
elif mode == "remove":
for mod in mods:
current_sub.mods.remove(mod)
elif mode == "remove_last":
if current_sub.mods:
current_sub.mods.pop()
else:
raise NotImplementedError("Wrong mode given")
save_stored_sub(current_sub, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
stored_subs=stored_subs)
storage.save(stored_subs)
storage.destroy()
-56
View File
@@ -1,56 +0,0 @@
# coding=utf-8
import plex
from subzero.intent import TempIntent
from subzero.lib.dict import DictProxy
from subzero.lib.httpfake import PlexPyNativeResponseProxy
from subzero.constants import DEFAULT_TIMEOUT
class PlexPyNativeRequestProxy(object):
"""
A really dumb object that tries to mimic requests.Request in an incomplete way, so that plex.Plex
uses native plex HTTPRequests instead of the better requests.Request class.
This allows us to operate freely on 127.0.0.1's PMS.
To be used in conjunction with subzero.lib.httpfake.PlexPyNativeResponseProxy
"""
url = None
data = None
headers = None
method = None
def prepare(self):
return self
def send(self):
# fixme: add self.data to HTTP.Request
data = None
status_code = 200
try:
data = HTTP.Request(self.url, headers=self.headers, immediate=True, method=self.method,
timeout=DEFAULT_TIMEOUT)
except Ex.HTTPError as e:
status_code = e.code
return PlexPyNativeResponseProxy(data, status_code, self)
plex.request.Request = PlexPyNativeRequestProxy
Plex = plex.Plex
class IntentDictStorage(DictProxy):
store = "intent"
def setup_defaults(self):
return {"force": {}}
def get_intent():
"""
use this to get an intent from inside a separate thread
:return:
"""
return TempIntent(store=IntentDictStorage(Dict))
-207
View File
@@ -1,207 +0,0 @@
# coding=utf-8
import os
import config
import helpers
import subtitlehelpers
from config import config as sz_config
SECONDARY_TAGS = ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom', 'hi', 'cc', 'sdh']
def find_subtitles(part, ignore_parts_cleanup=None):
lang_sub_map = {}
ignore_parts_cleanup = ignore_parts_cleanup or []
part_filename = helpers.unicodize(part.file)
part_basename = os.path.splitext(os.path.basename(part_filename))[0]
use_filesystem = helpers.cast_bool(Prefs["subtitles.save.filesystem"])
sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
if Prefs["subtitles.save.subFolder.Custom"] else None
use_sub_subfolder = Prefs["subtitles.save.subFolder"] != "current folder" and not sub_dir_custom
autoclean = helpers.cast_bool(Prefs["subtitles.autoclean"])
sub_subfolder = None
paths = [os.path.dirname(part_filename)] if use_filesystem else []
global_folders = []
if use_filesystem:
# Check for local subtitles subdirectory
sub_dir_base = paths[0]
sub_dir_list = []
if use_sub_subfolder:
# got selected subfolder
sub_subfolder = os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"])
sub_dir_list.append(sub_subfolder)
sub_subfolder = os.path.normpath(helpers.unicodize(sub_subfolder))
if sub_dir_custom:
# got custom subfolder
sub_dir_custom = os.path.normpath(sub_dir_custom)
if os.path.isdir(sub_dir_custom) and os.path.isabs(sub_dir_custom):
# absolute folder
sub_dir_list.append(sub_dir_custom)
global_folders.append(sub_dir_custom)
else:
# relative folder
fld = os.path.join(sub_dir_base, sub_dir_custom)
sub_dir_list.append(fld)
for sub_dir in sub_dir_list:
if os.path.isdir(sub_dir):
paths.append(sub_dir)
# Check for a global subtitle location
global_subtitle_folder = os.path.join(Core.app_support_path, 'Subtitles')
if os.path.exists(global_subtitle_folder):
paths.append(global_subtitle_folder)
global_folders.append(global_subtitle_folder)
# normalize all paths
paths = [os.path.normpath(helpers.unicodize(path)) for path in paths]
# We start by building a dictionary of files to their absolute paths. We also need to know
# the number of media files that are actually present, in case the found local media asset
# is limited to a single instance per media file.
#
file_paths = {}
total_media_files = 0
media_files = []
for path in paths:
for file_path_listing in os.listdir(path.encode(sz_config.fs_encoding)):
# When using os.listdir with a unicode path, it will always return a string using the
# NFD form. However, we internally are using the form NFC and therefore need to convert
# it to allow correct regex / comparisons to be performed.
#
file_path_listing = helpers.unicodize(file_path_listing)
if os.path.isfile(os.path.join(path, file_path_listing).encode(sz_config.fs_encoding)):
file_paths[file_path_listing.lower()] = os.path.join(path, file_path_listing)
# If we've found an actual media file, we should record it.
(root, ext) = os.path.splitext(file_path_listing)
if ext.lower()[1:] in config.VIDEO_EXTS:
total_media_files += 1
# collect found media files
media_files.append(root)
# cleanup any leftover subtitle if no associated media file was found
if autoclean and ignore_parts_cleanup:
Log.Info("Skipping housekeeping of: %s", paths)
if use_filesystem and autoclean and not ignore_parts_cleanup:
for path in paths:
# only housekeep in sub_subfolder if sub_subfolder is used
if use_sub_subfolder and path != sub_subfolder and not sz_config.advanced.thorough_cleaning:
continue
# we can't housekeep the global subtitle folders as we don't know about *all* media files
# in a library; skip them
skip_path = False
for fld in global_folders:
if path.startswith(fld):
Log.Info("Skipping housekeeping of folder: %s", path)
skip_path = True
break
if skip_path:
continue
for file_path_listing in os.listdir(path.encode(sz_config.fs_encoding)):
file_path_listing = helpers.unicodize(file_path_listing)
enc_fn = os.path.join(path, file_path_listing).encode(sz_config.fs_encoding)
if os.path.isfile(enc_fn):
(root, ext) = os.path.splitext(file_path_listing)
# it's a subtitle file
if ext.lower()[1:] in config.SUBTITLE_EXTS_BASE:
# get fn without forced/default/normal tag
split_tag = root.rsplit(".", 1)
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
root = split_tag[0]
# get associated media file name without language
sub_fn = subtitlehelpers.ENDSWITH_LANGUAGECODE_RE.sub("", root)
# subtitle basename and basename without possible language tag not found in collected
# media files? kill.
if root not in media_files and sub_fn not in media_files:
Log.Info("Removing leftover subtitle: %s", os.path.join(path, file_path_listing))
try:
os.remove(enc_fn)
except (OSError, IOError):
Log.Error("Removing failed")
Log('Looking for subtitle media in %d paths with %d media files.', len(paths), total_media_files)
Log('Paths: %s', ", ".join([helpers.unicodize(p) for p in paths]))
for file_path in file_paths.values():
local_filename = os.path.basename(file_path)
bn, ext = os.path.splitext(local_filename)
local_basename = helpers.unicodize(bn)
# get fn without forced/default/normal tag
split_tag = local_basename.rsplit(".", 1)
has_additional_tag = False
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
local_basename = split_tag[0]
has_additional_tag = True
# split off possible language tag
local_basename2 = local_basename.rsplit('.', 1)[0]
filename_matches_part = local_basename == part_basename or local_basename2 == part_basename
filename_contains_part = part_basename in local_basename
if not ext.lower()[1:] in config.SUBTITLE_EXTS:
continue
# if the file is located within the global subtitle folders and its name doesn't match exactly, ignore it
if global_folders and not filename_matches_part:
skip_path = False
for fld in global_folders:
if file_path.startswith(fld):
skip_path = True
break
if skip_path:
continue
# determine whether to pick up the subtitle based on our match strictness
if not filename_matches_part:
if sz_config.ext_match_strictness == "strict" or (
sz_config.ext_match_strictness == "loose" and not filename_contains_part):
# Log.Debug("%s doesn't match %s, skipping" % (helpers.unicodize(local_filename),
# helpers.unicodize(part_basename)))
continue
subtitle_helper = subtitlehelpers.subtitle_helpers(file_path)
if subtitle_helper is not None:
local_lang_map = subtitle_helper.process_subtitles(part)
for new_language, subtitles in local_lang_map.items():
# Add the possible new language along with the located subtitles so that we can validate them
# at the end...
#
if not lang_sub_map.has_key(new_language):
lang_sub_map[new_language] = []
lang_sub_map[new_language] = lang_sub_map[new_language] + subtitles
# add known metadata subs to our sub list
if not use_filesystem:
for language, sub_list in subtitlehelpers.get_subtitles_from_metadata(part).iteritems():
if sub_list:
if language not in lang_sub_map:
lang_sub_map[language] = []
lang_sub_map[language] = lang_sub_map[language] + sub_list
# Now whack subtitles that don't exist anymore.
for language in lang_sub_map.keys():
part.subtitles[language].validate_keys(lang_sub_map[language])
# Now whack the languages that don't exist anymore.
for language in list(set(part.subtitles.keys()) - set(lang_sub_map.keys())):
part.subtitles[language].validate_keys({})
-194
View File
@@ -1,194 +0,0 @@
# coding=utf-8
import traceback
import time
import os
from babelfish import LanguageReverseError
from support.config import config, TEXT_SUBTITLE_EXTS
from support.helpers import get_plex_item_display_title, cast_bool, get_language_from_stream, is_stream_forced
from support.items import get_item
from support.lib import Plex
from support.storage import get_subtitle_storage
from subzero.video import has_external_subtitle
from subzero.language import Language
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()):
item_id = int(rating_key)
item = get_item(rating_key)
if kind == "show":
item_title = get_plex_item_display_title(item, kind, parent=item.season, section_title=section_title, parent_title=item.show.title)
else:
item_title = get_plex_item_display_title(item, kind, section_title=section_title)
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load(rating_key)
subtitle_storage.destroy()
subtitle_target_dir, tdir_is_absolute = config.subtitle_sub_dir
missing = set()
languages_set = set([Language.rebuild(l) for l in languages])
for media in item.media:
existing_subs = {"internal": [], "external": [], "own_external": [], "count": 0}
for part in media.parts:
# did we already download an external subtitle before?
if subtitle_target_dir and stored_subs:
for language in languages_set:
if has_external_subtitle(part.id, stored_subs, language):
# check the existence of the actual subtitle file
# get media filename without extension
part_basename = os.path.splitext(os.path.basename(part.file))[0]
# compute target directory for subtitle
# fixme: move to central location
if tdir_is_absolute:
possible_subtitle_path_base = subtitle_target_dir
else:
possible_subtitle_path_base = os.path.join(os.path.dirname(part.file), subtitle_target_dir)
possible_subtitle_path_base = os.path.realpath(possible_subtitle_path_base)
# folder actually exists?
if not os.path.isdir(possible_subtitle_path_base):
continue
found_any = False
for ext in config.subtitle_formats:
if cast_bool(Prefs['subtitles.only_one']):
possible_subtitle_path = os.path.join(possible_subtitle_path_base,
u"%s.%s" % (part_basename, ext))
else:
possible_subtitle_path = os.path.join(possible_subtitle_path_base,
u"%s.%s.%s" % (part_basename, language, ext))
# check for subtitle existence
if os.path.isfile(possible_subtitle_path):
found_any = True
Log.Debug(u"Found: %s", possible_subtitle_path)
break
if found_any:
existing_subs["own_external"].append(language)
existing_subs["count"] = existing_subs["count"] + 1
for stream in part.streams:
if stream.stream_type == 3:
is_forced = is_stream_forced(stream)
if stream.index:
key = "internal"
else:
key = "external"
if not config.exotic_ext and stream.codec.lower() not in TEXT_SUBTITLE_EXTS:
continue
# treat unknown language as lang1?
if not stream.language_code and config.treat_und_as_first:
lang = Language.rebuild(list(config.lang_list)[0])
# we can't parse empty language codes
elif not stream.language_code or not stream.codec:
continue
else:
# parse with internal language parser first
try:
lang = get_language_from_stream(stream.language_code)
if not lang:
if config.treat_und_as_first:
lang = Language.rebuild(list(config.lang_list)[0])
else:
continue
except (ValueError, LanguageReverseError):
continue
if lang:
# Log.Debug("Found babelfish language: %r", lang)
lang.forced = is_forced
existing_subs[key].append(lang)
existing_subs["count"] = existing_subs["count"] + 1
missing_from_part = set([Language.rebuild(l) for l in languages])
if existing_subs["count"]:
# fixme: this is actually somewhat broken with IETF, as Plex doesn't store the country portion
# (pt instead of pt-BR) inside the database. So it might actually download pt-BR if there's a local pt-BR
# subtitle but not our own.
existing_flat = set((existing_subs["internal"] if internal else [])
+ (existing_subs["external"] if external else [])
+ existing_subs["own_external"])
check_languages = set([Language.rebuild(l) for l in languages])
alpha3_map = {}
if config.ietf_as_alpha3:
for language in existing_flat:
if language.country:
alpha3_map[language.alpha3] = language.country
language.country = None
for language in check_languages:
if language.country:
alpha3_map[language.alpha3] = language.country
language.country = None
# compare sets of strings, not sets of different Language instances
check_languages_str = set(str(l) for l in check_languages)
existing_flat_str = set(str(l) for l in existing_flat)
if check_languages_str.issubset(existing_flat_str) or \
(len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
# all subs found
#Log.Info(u"All subtitles exist for '%s'", item_title)
continue
missing_from_part = set(Language.fromietf(l) for l in check_languages_str - existing_flat_str)
if config.ietf_as_alpha3:
for language in missing_from_part:
language.country = alpha3_map.get(language.alpha3, None)
if missing_from_part:
Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id,
missing_from_part)
missing.update(missing_from_part)
if missing:
# deduplicate
missing = set(Language.fromietf(la) for la in set(str(l) for l in missing))
return added_at, item_id, item_title, item, missing
def items_get_all_missing_subs(items, sleep_after_request=False):
missing = []
for added_at, kind, section_title, key in items:
try:
state = item_discover_missing_subs(
key,
kind=kind,
added_at=added_at,
section_title=section_title,
languages=config.lang_list.copy(),
internal=cast_bool(Prefs["subtitles.scan.embedded"]),
external=cast_bool(Prefs["subtitles.scan.external"])
)
if state:
# (added_at, item_id, title, item, missing_languages)
missing.append(state)
except:
Log.Error("Something went wrong when getting the state of item %s: %s", key, traceback.format_exc())
if sleep_after_request:
time.sleep(sleep_after_request)
return missing
def refresh_item(item):
if not config.no_refresh:
Plex["library/metadata"].refresh(item)
-368
View File
@@ -1,368 +0,0 @@
# coding=utf-8
import os
import helpers
from items import get_item
from subzero.language import Language
from lib import Plex
from support.config import TEXT_SUBTITLE_EXTS, config
def get_metadata_dict(item, part, add):
data = {
"item": item,
"section": item.section.title,
"path": part.file,
"folder": os.path.dirname(part.file),
"filename": os.path.basename(part.file)
}
data.update(add)
return data
imdb_guid_identifier = "com.plexapp.agents.imdb://"
tvdb_guid_identifier = "com.plexapp.agents.thetvdb://"
def get_plexapi_stream_info(plex_item, part_id=None):
d = {"stream": {}}
data = d["stream"]
# find current part
current_part = None
current_media = None
for media in plex_item.media:
for part in media.parts:
if not part_id or str(part.id) == part_id:
current_part = part
current_media = media
break
if current_part:
break
if not current_part:
return d
data["video_codec"] = current_media.video_codec
if current_media.audio_codec:
data["audio_codec"] = current_media.audio_codec.upper()
if data["audio_codec"] == "DCA":
data["audio_codec"] = "DTS"
if current_media.audio_channels == 8:
data["audio_channels"] = "7.1"
elif current_media.audio_channels == 6:
data["audio_channels"] = "5.1"
else:
data["audio_channels"] = "%s.0" % str(current_media.audio_channels)
# iter streams
for stream in current_part.streams:
if stream.stream_type == 1:
# video stream
data["resolution"] = "%s%s" % (current_media.video_resolution,
"i" if stream.scan_type != "progressive" else "p")
break
return d
def media_to_videos(media, kind="series"):
"""
iterates through media and returns the associated parts (videos)
:param media:
:param kind:
:return:
"""
videos = []
# this is a Show or a Movie object
plex_item = get_item(media.id)
year = plex_item.year
original_title = plex_item.title_original
if kind == "series":
for season in media.seasons:
season_object = media.seasons[season]
for episode in media.seasons[season].episodes:
ep = media.seasons[season].episodes[episode]
tvdb_id = None
series_tvdb_id = None
if tvdb_guid_identifier in ep.guid:
tvdb_id = ep.guid[len(tvdb_guid_identifier):].split("?")[0]
series_tvdb_id = tvdb_id.split("/")[0]
# get plex item via API for additional metadata
plex_episode = get_item(ep.id)
stream_info = get_plexapi_stream_info(plex_episode)
for item in media.seasons[season].episodes[episode].items:
for part in item.parts:
videos.append(
get_metadata_dict(plex_episode, part,
dict(stream_info, **{"plex_part": part, "type": "episode",
"title": ep.title,
"series": media.title, "id": ep.id, "year": year,
"series_id": media.id,
"super_thumb": plex_item.thumb,
"season_id": season_object.id,
"imdb_id": None, "series_tvdb_id": series_tvdb_id,
"tvdb_id": tvdb_id,
"original_title": original_title,
"episode": plex_episode.index,
"season": plex_episode.season.index,
"section": plex_episode.section.title
})
)
)
else:
stream_info = get_plexapi_stream_info(plex_item)
imdb_id = None
if imdb_guid_identifier in media.guid:
imdb_id = media.guid[len(imdb_guid_identifier):].split("?")[0]
for item in media.items:
for part in item.parts:
videos.append(
get_metadata_dict(plex_item, part, dict(stream_info, **{"plex_part": part, "type": "movie",
"title": media.title, "id": media.id,
"super_thumb": plex_item.thumb,
"series_id": None, "year": year,
"season_id": None, "imdb_id": imdb_id,
"original_title": original_title,
"series_tvdb_id": None, "tvdb_id": None,
"section": plex_item.section.title})
)
)
return videos
IGNORE_FN = ("subzero.ignore", ".subzero.ignore", ".nosz")
def get_stream_fps(streams):
"""
accepts a list of plex streams or a list of the plex api streams
"""
for stream in streams:
# video
stream_type = getattr(stream, "type", getattr(stream, "stream_type", None))
if stream_type == 1:
return getattr(stream, "frameRate", getattr(stream, "frame_rate", "25.000"))
return "25.000"
def get_media_item_ids(media, kind="series"):
# fixme: does this work correctly for full series force-refreshes and its intents?
ids = [media.id]
if kind == "series":
for season in media.seasons:
for episode in media.seasons[season].episodes:
ids.append(media.seasons[season].episodes[episode].id)
return ids
def get_all_parts(plex_item):
parts = []
for media in plex_item.media:
parts += media.parts
return parts
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, skip_unknown=False):
streams = []
streams_unknown = []
has_unknown = False
found_requested_language = False
for stream in part.streams:
# subtitle stream
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
is_forced = helpers.is_stream_forced(stream)
language = helpers.get_language_from_stream(stream.language_code)
if language:
language = Language.rebuild(language, forced=is_forced)
is_unknown = False
found_requested_language = requested_language and requested_language == language
if not language and config.treat_und_as_first:
# only consider first unknown subtitle stream
if has_unknown and skip_duplicate_unknown:
continue
language = Language.rebuild(list(config.lang_list)[0], forced=is_forced)
is_unknown = True
has_unknown = True
streams_unknown.append({"stream": stream, "is_unknown": is_unknown, "language": language,
"is_forced": is_forced})
if not requested_language or found_requested_language:
streams.append({"stream": stream, "is_unknown": is_unknown, "language": language,
"is_forced": is_forced})
if found_requested_language:
break
if streams_unknown and not found_requested_language and not skip_unknown:
streams = streams_unknown
return streams
def get_part(plex_item, part_id):
for media in plex_item.media:
for part in media.parts:
if str(part.id) == str(part_id):
return part
def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
"""
uses the Plex 3rd party API accessor to get metadata information
:param rating_key: movie or episode
:param part_id:
:param item_type:
:return:
"""
if not plex_item:
plex_item = get_item(rating_key)
if not plex_item:
return
# find current part
current_part = get_part(plex_item, part_id)
if not current_part:
raise helpers.PartUnknownException("Part unknown")
stream_info = get_plexapi_stream_info(plex_item, part_id)
# get normalized metadata
# fixme: duplicated logic of media_to_videos
if item_type == "episode":
show = list(Plex["library"].metadata(plex_item.show.rating_key))[0]
year = show.year
tvdb_id = None
series_tvdb_id = None
original_title = show.title_original
if tvdb_guid_identifier in plex_item.guid:
tvdb_id = plex_item.guid[len(tvdb_guid_identifier):].split("?")[0]
series_tvdb_id = tvdb_id.split("/")[0]
metadata = get_metadata_dict(plex_item, current_part,
dict(stream_info,
**{"plex_part": current_part, "type": "episode", "title": plex_item.title,
"series": plex_item.show.title, "id": plex_item.rating_key,
"series_id": plex_item.show.rating_key,
"season_id": plex_item.season.rating_key,
"imdb_id": None,
"year": year,
"tvdb_id": tvdb_id,
"super_thumb": plex_item.show.thumb,
"series_tvdb_id": series_tvdb_id,
"original_title": original_title,
"season": plex_item.season.index,
"episode": plex_item.index
})
)
else:
imdb_id = None
original_title = plex_item.title_original
if imdb_guid_identifier in plex_item.guid:
imdb_id = plex_item.guid[len(imdb_guid_identifier):].split("?")[0]
metadata = get_metadata_dict(plex_item, current_part,
dict(stream_info, **{"plex_part": current_part, "type": "movie",
"title": plex_item.title, "id": plex_item.rating_key,
"series_id": None,
"season_id": None,
"imdb_id": imdb_id,
"year": plex_item.year,
"tvdb_id": None,
"super_thumb": plex_item.thumb,
"series_tvdb_id": None,
"original_title": original_title,
"season": None,
"episode": None,
"section": plex_item.section.title})
)
return metadata
def get_blacklist_from_part_map(video_part_map, languages):
from support.storage import get_subtitle_storage
subtitle_storage = get_subtitle_storage()
blacklist = []
for video, part in video_part_map.iteritems():
stored_subs = subtitle_storage.load_or_new(video.plexapi_metadata["item"])
for language in languages:
current_bl, subs = stored_subs.get_blacklist(part.id, language)
if not current_bl:
continue
blacklist = blacklist + [(str(a), str(b)) for a, b in current_bl.keys()]
subtitle_storage.destroy()
return blacklist
class PMSMediaProxy(object):
"""
Proxy object for getting data from a mediatree items "internally" via the PMS
note: this could be useful later on: Media.TV_Show(getattr(Metadata, "_access_point"), id=XXXXXX)
"""
def __init__(self, media_id):
self.mediatree = Media.TreeForDatabaseID(media_id)
def get_part(self, part_id=None):
"""
walk the mediatree until the given part was found; if no part was given, return the first one
:param part_id:
:return:
"""
m = self.mediatree
while 1:
if m.items:
media_item = m.items[0]
if not part_id:
return media_item.parts[0] if media_item.parts else None
for part in media_item.parts:
if str(part.id) == str(part_id):
return part
break
if not m.children:
break
m = m.children[0]
def get_all_parts(self):
"""
walk the mediatree until the given part was found; if no part was given, return the first one
:param part_id:
:return:
"""
m = self.mediatree
parts = []
while 1:
if m.items:
media_item = m.items[0]
for part in media_item.parts:
parts.append(part)
break
if not m.children:
break
m = m.children[0]
return parts
-170
View File
@@ -1,170 +0,0 @@
# coding=utf-8
import traceback
import helpers
from babelfish.exceptions import LanguageError
from support.lib import Plex, get_intent
from support.plex_media import get_stream_fps
from support.storage import get_subtitle_storage
from support.config import config, TEXT_SUBTITLE_EXTS
from support.subtitlehelpers import get_subtitles_from_metadata
from subzero.video import parse_video, set_existing_languages
from subzero.language import language_from_stream, Language
def prepare_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False):
"""
returnes a subliminal/guessit-refined parsed video
:param pms_video_info:
:param ignore_all:
:param hints:
:param rating_key:
:return:
"""
embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
external_subtitles = not ignore_all and Prefs['subtitles.scan.external']
plex_part = pms_video_info["plex_part"]
if ignore_all:
Log.Debug("Force refresh intended.")
Log.Debug("Detecting streams: %s, external_subtitles=%s, embedded_subtitles=%s" % (
plex_part.file, external_subtitles, embedded_subtitles))
known_embedded = []
parts = []
for media in list(Plex["library"].metadata(rating_key))[0].media:
parts += media.parts
plexpy_part = None
for part in parts:
if int(part.id) == int(plex_part.id):
plexpy_part = part
# embedded subtitles
# fixme: skip the whole scanning process if known_embedded == wanted languages?
audio_languages = []
if plexpy_part:
for stream in plexpy_part.streams:
if stream.stream_type == 2:
lang = None
try:
lang = language_from_stream(stream.language_code)
except LanguageError:
Log.Debug("Couldn't detect embedded audio stream language: %s", stream.language_code)
# treat unknown language as lang1?
if not lang and config.treat_und_as_first:
lang = Language.rebuild(list(config.lang_list)[0])
audio_languages.append(lang)
# subtitle stream
elif stream.stream_type == 3 and embedded_subtitles:
is_forced = helpers.is_stream_forced(stream)
if ((config.forced_only or config.forced_also) and is_forced) or not is_forced:
# embedded subtitle
# fixme: tap into external subtitles here instead of scanning for ourselves later?
if stream.codec and getattr(stream, "index", None):
if config.exotic_ext or stream.codec.lower() in config.text_based_formats:
lang = None
try:
lang = language_from_stream(stream.language_code)
except LanguageError:
Log.Debug("Couldn't detect embedded subtitle stream language: %s", stream.language_code)
# treat unknown language as lang1?
if not lang and config.treat_und_as_first:
lang = Language.rebuild(list(config.lang_list)[0])
if lang:
if is_forced:
lang.forced = True
known_embedded.append(lang)
else:
Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)
# metadata subtitles
known_metadata_subs = set()
meta_subs = get_subtitles_from_metadata(plex_part)
for language, subList in meta_subs.iteritems():
try:
lang = Language.fromietf(Locale.Language.Match(language))
except LanguageError:
if config.treat_und_as_first:
lang = Language.rebuild(list(config.lang_list)[0])
else:
continue
if subList:
for key in subList:
if key.startswith("subzero_md_forced"):
lang = Language.rebuild(lang, forced=True)
known_metadata_subs.add(lang)
Log.Debug("Found metadata subtitle %r:%s for %s", lang, key, plex_part.file)
Log.Debug("Known metadata subtitles: %r", known_metadata_subs)
Log.Debug("Known embedded subtitles: %r", known_embedded)
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load(rating_key)
subtitle_storage.destroy()
try:
# get basic video info scan (filename)
video = parse_video(plex_part.file, hints, skip_hashing=config.low_impact_mode or skip_hashing,
providers=providers)
# set stream languages
if audio_languages:
video.audio_languages = audio_languages
Log.Info("Found audio streams: %s" % ", ".join([str(l) for l in audio_languages]))
if not ignore_all:
set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles,
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
stored_subs=stored_subs, languages=config.lang_list,
only_one=config.only_one, known_metadata_subs=known_metadata_subs)
# add video fps info
video.fps = plex_part.fps
return video
except ValueError:
Log.Warn("File could not be guessed: %s: %s", plex_part.file, traceback.format_exc())
def scan_videos(videos, ignore_all=False, providers=None, skip_hashing=False):
"""
receives a list of videos containing dictionaries returned by media_to_videos
:param videos:
:param kind: series or movies
:return: dictionary of subliminal.video.scan_video, key=subliminal scanned video, value=plex file part
"""
ret = {}
for video in videos:
intent = get_intent()
force_refresh = intent.get("force", video["id"], video["series_id"], video["season_id"])
Log.Debug("Determining force-refresh (video: %s, series: %s, season: %s), result: %s"
% (video["id"], video["series_id"], video["season_id"], force_refresh))
hints = helpers.get_item_hints(video)
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
p = providers or config.get_providers(media_type="series" if video["type"] == "episode" else "movies")
scanned_video = prepare_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
rating_key=video["id"], providers=p,
skip_hashing=skip_hashing)
if not scanned_video:
continue
scanned_video.id = video["id"]
part_metadata = video.copy()
del part_metadata["plex_part"]
scanned_video.plexapi_metadata = part_metadata
scanned_video.ignore_all = force_refresh
ret[scanned_video] = video["plex_part"]
return ret
-233
View File
@@ -1,233 +0,0 @@
# coding=utf-8
import datetime
import logging
import traceback
from config import config
def parse_frequency(s):
if s == "never" or s is None:
return None, None
kind, num, unit = s.split()
return int(num), unit
class DefaultScheduler(object):
queue_thread = None
scheduler_thread = None
running = False
registry = None
def __init__(self):
self.queue_thread = None
self.scheduler_thread = None
self.running = False
self.registry = []
self.tasks = {}
self.init_storage()
def init_storage(self):
if "tasks" not in Dict:
Dict["tasks"] = {"queue": []}
Dict.Save()
if "queue" not in Dict["tasks"]:
Dict["tasks"]["queue"] = []
def get_task_data(self, name):
if name not in Dict["tasks"]:
raise NotImplementedError("Task missing! %s" % name)
if "data" in Dict["tasks"][name]:
return Dict["tasks"][name]["data"]
def clear_task_data(self, name=None):
if name is None:
# full clean
Log.Debug("Clearing previous task data")
if Dict["tasks"]:
for task_name in Dict["tasks"].keys():
if task_name == "queue":
Dict["tasks"][task_name] = []
continue
Dict["tasks"][task_name]["data"] = {}
Dict["tasks"][task_name]["running"] = False
Dict.Save()
return
if name not in Dict["tasks"]:
raise NotImplementedError("Task missing! %s" % name)
Dict["tasks"][name]["data"] = {}
Dict["tasks"][name]["running"] = False
Dict.Save()
Log.Debug("Task data cleared: %s", name)
def register(self, task):
self.registry.append(task)
def setup_tasks(self):
# discover tasks;
self.tasks = {}
for cls in self.registry:
task = cls()
try:
task_frequency = Prefs["scheduler.tasks.%s.frequency" % task.name]
except KeyError:
task_frequency = getattr(task, "frequency", None)
self.tasks[task.name] = {"task": task, "frequency": parse_frequency(task_frequency)}
def run(self):
self.running = True
self.scheduler_thread = Thread.Create(self.scheduler_worker)
self.queue_thread = Thread.Create(self.queue_worker)
def stop(self):
self.running = False
def task(self, name):
if name not in self.tasks:
return None
return self.tasks[name]["task"]
def is_task_running(self, name):
task = self.task(name)
if task:
return task.running
def last_run(self, task):
if task not in self.tasks:
return None
return self.tasks[task]["task"].last_run
def next_run(self, task):
if task not in self.tasks or not self.tasks[task]["task"].periodic:
return None
frequency_num, frequency_key = self.tasks[task]["frequency"]
if not frequency_num:
return None
last = self.tasks[task]["task"].last_run
use_date = last
now = datetime.datetime.now()
if not use_date:
use_date = now
return max(use_date + datetime.timedelta(**{frequency_key: frequency_num}), now)
def run_task(self, name, *args, **kwargs):
task = self.tasks[name]["task"]
if task.running:
Log.Debug("Scheduler: Not running %s, as it's currently running.", name)
return False
Log.Debug("Scheduler: Running task %s", name)
try:
task.prepare(*args, **kwargs)
task.run()
except Exception, e:
Log.Error("Scheduler: Something went wrong when running %s: %s", name, traceback.format_exc())
finally:
try:
task.post_run(Dict["tasks"][name]["data"])
except:
Log.Error("Scheduler: task.post_run failed for %s: %s", name, traceback.format_exc())
Dict.Save()
config.sync_cache()
def dispatch_task(self, *args, **kwargs):
if "queue" not in Dict["tasks"]:
Dict["tasks"]["queue"] = []
Dict["tasks"]["queue"].append((args, kwargs))
def signal(self, name, *args, **kwargs):
for task_name in self.tasks.keys():
task = self.task(task_name)
if not task:
Log.Error("Scheduler: Task %s not found (?!)" % task_name)
continue
if not task.periodic:
continue
if task.running:
Log.Debug("Scheduler: Sending signal %s to task %s (%s, %s)", name, task_name, args, kwargs)
try:
status = task.signal(name, *args, **kwargs)
except NotImplementedError:
Log.Debug("Scheduler: Signal ignored by %s", task_name)
continue
if status:
Log.Debug("Scheduler: Signal accepted by %s", task_name)
else:
Log.Debug("Scheduler: Signal not accepted by %s", task_name)
continue
Log.Debug("Scheduler: Not sending signal %s to task %s, because: not running", name, task_name)
def queue_worker(self):
Thread.Sleep(10.0)
while 1:
if not self.running:
break
# single dispatch requested?
if Dict["tasks"]["queue"]:
# work queue off
queue = Dict["tasks"]["queue"][:]
Dict["tasks"]["queue"] = []
Dict.Save()
for args, kwargs in queue:
Log.Debug("Queue: Dispatching single task: %s, %s", args, kwargs)
Thread.Create(self.run_task, True, *args, **kwargs)
Thread.Sleep(5.0)
Thread.Sleep(1)
def scheduler_worker(self):
Thread.Sleep(10.0)
while 1:
if not self.running:
break
# scheduled tasks
for name in self.tasks.keys():
now = datetime.datetime.now()
info = self.tasks.get(name)
if not info:
Log.Error("Scheduler: Task %s not found (?!)" % name)
continue
task = info["task"]
if name not in Dict["tasks"] or not task.periodic:
continue
if task.running:
continue
frequency_num, frequency_key = info["frequency"]
if not frequency_num:
continue
# run legacy SARAM once
if name == "SearchAllRecentlyAddedMissing" and ("hasRunLSARAM" not in Dict or not Dict["hasRunLSARAM"]):
task = self.tasks["LegacySearchAllRecentlyAddedMissing"]["task"]
task.last_run = None
name = "LegacySearchAllRecentlyAddedMissing"
Dict["hasRunLSARAM"] = True
Dict.Save()
if not task.last_run or (task.last_run + datetime.timedelta(**{frequency_key: frequency_num}) <= now):
# fixme: scheduled tasks run synchronously. is this the best idea?
Thread.Create(self.run_task, True, name)
#Thread.Sleep(5.0)
#self.run_task(name)
Thread.Sleep(5.0)
Thread.Sleep(1)
scheduler = DefaultScheduler()
-255
View File
@@ -1,255 +0,0 @@
# coding=utf-8
import datetime
import os
import pprint
import copy
import traceback
import types
from subliminal_patch.core import save_subtitles as subliminal_save_subtitles
from subzero.subtitle_storage import StoredSubtitlesManager
from subzero.lib.io import FileIO
from subtitlehelpers import force_utf8
from config import config
from helpers import notify_executable, get_title_for_video_metadata, cast_bool, force_unicode
from plex_media import PMSMediaProxy
from support.items import get_item
def get_subtitle_storage():
return StoredSubtitlesManager(Data, Thread, get_item)
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a", set_current=True):
"""
stores information about downloaded subtitles in plex's Dict()
"""
subtitle_storage = get_subtitle_storage()
for video, video_subtitles in downloaded_subtitles.items():
part = scanned_video_part_map[video]
part_id = str(part.id)
video_id = str(video.id)
plex_item = get_item(video_id)
if not plex_item:
Log.Warn("Plex item not found: %s", video_id)
continue
metadata = video.plexapi_metadata
title = get_title_for_video_metadata(metadata)
stored_subs = subtitle_storage.load(video_id)
is_new = False
if not stored_subs:
is_new = True
Log.Debug(u"Creating new subtitle storage: %s, %s", video_id, part_id)
stored_subs = subtitle_storage.new(plex_item)
for subtitle in video_subtitles:
lang = str(subtitle.language)
subtitle.normalize()
Log.Debug(u"Adding subtitle to storage: %s, %s, %s, %s, %s" % (video_id, part_id, lang, title,
subtitle.guess_encoding()))
last_mod = None
if subtitle.storage_path:
last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode, last_mod=last_mod,
set_current=set_current)
if ret_val:
Log.Debug("Subtitle stored")
else:
Log.Debug("Subtitle already existing in storage")
if is_new or video_subtitles:
Log.Debug("Saving subtitle storage for %s" % video_id)
subtitle_storage.save(stored_subs)
subtitle_storage.destroy()
def reset_storage(key):
"""
resets the Dict[key] storage, thanks to https://docs.google.com/document/d/1hhLjV1pI-TA5y91TiJq64BdgKwdLnFt4hWgeOqpz1NA/edit#
We can't use the nice Plex interface for this, as it calls get multiple times before set
#Plex[":/plugins/*/prefs"].set("com.plexapp.agents.subzero", "reset_storage", False)
"""
Log.Debug("resetting storage")
Dict[key] = {}
Dict.Save()
def log_storage(key):
if not key:
Log.Debug(pprint.pformat(getattr(Dict, "_dict")))
if key in Dict:
Log.Debug(pprint.pformat(Dict[key]))
def get_target_folder(file_path):
fld = None
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
if Prefs["subtitles.save.subFolder.Custom"] else None
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
# specific subFolder requested, create it if it doesn't exist
fld_base = os.path.split(file_path)[0]
if fld_custom:
if fld_custom.startswith("/"):
# absolute folder
fld = fld_custom
else:
fld = os.path.join(fld_base, fld_custom)
else:
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
fld = force_unicode(fld)
if not os.path.exists(fld):
os.makedirs(fld)
return fld
def save_subtitles_to_file(subtitles, tags=None):
for video, video_subtitles in subtitles.items():
if not video_subtitles:
continue
if not isinstance(video, types.StringTypes):
file_path = video.name
else:
file_path = video
fld = get_target_folder(file_path)
subliminal_save_subtitles(file_path, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
chmod=config.chmod, path_decoder=force_unicode,
debug_mods=config.debug_mods, formats=config.subtitle_formats, tags=tags)
return True
def save_subtitles_to_metadata(videos, subtitles):
for video, video_subtitles in subtitles.items():
mediaPart = videos[video]
for subtitle in video_subtitles:
content = subtitle.get_modified_content(debug=config.debug_mods)
if not isinstance(mediaPart, Framework.api.agentkit.MediaPart):
# we're being handed a Plex.py model instance here, not an internal PMS MediaPart object.
# get the correct one
mp = PMSMediaProxy(video.id).get_part(mediaPart.id)
else:
mp = mediaPart
pm = Proxy.Media(content, ext="srt", forced="1" if subtitle.language.forced else None)
new_key = "subzero_md" + ("_forced" if subtitle.language.forced else "")
lang = Locale.Language.Match(subtitle.language.alpha2)
for key, proxy in getattr(mp.subtitles[lang], "_proxies").iteritems():
if not proxy or not len(proxy) >= 5:
Log.Debug("Can't parse metadata: %s" % repr(proxy))
continue
if proxy[0] == "Media":
if not key.startswith("subzero_"):
if key == "subzero":
Log.Debug("Removing legacy metadata subtitle for %s", lang)
del mp.subtitles[lang][key]
Log.Debug("Existing metadata subtitle for %s: %s", lang, key)
Log.Debug("Adding metadata sub for %s: %s", lang, subtitle)
mp.subtitles[lang][new_key] = pm
return True
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None,
set_current=True):
"""
:param set_current: save the subtitle as the current one
:param scanned_video_part_map:
:param downloaded_subtitles:
:param mode:
:param bare_save: don't trigger anything; don't store information
:param mods: enabled mods
:return:
"""
meta_fallback = False
save_successful = False
# big fixme: scanned_video_part_map isn't needed to the current extent. rewrite.
if mods:
for video, video_subtitles in downloaded_subtitles.items():
if not video_subtitles:
continue
for subtitle in video_subtitles:
Log.Info("Applying mods: %s to %s", mods, subtitle)
subtitle.mods = mods
subtitle.plex_media_fps = video.fps
storage = "metadata"
save_to_fs = cast_bool(Prefs['subtitles.save.filesystem'])
if save_to_fs:
storage = "filesystem"
if set_current:
if save_to_fs:
try:
Log.Debug("Using filesystem as subtitle storage")
save_subtitles_to_file(downloaded_subtitles)
except OSError:
if cast_bool(Prefs["subtitles.save.metadata_fallback"]):
meta_fallback = True
storage = "metadata"
else:
raise
else:
save_successful = True
if not save_to_fs or meta_fallback:
if meta_fallback:
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
else:
Log.Debug("Using metadata as subtitle storage")
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
if not bare_save and save_successful and config.notify_executable:
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
if (not bare_save and save_successful) or not set_current:
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode, set_current=set_current)
return save_successful
def get_pack_id(subtitle):
return "%s_%s" % (subtitle.provider_name, subtitle.numeric_id)
def get_pack_data(subtitle):
subtitle_id = get_pack_id(subtitle)
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
if os.path.isfile(archive):
Log.Info("Loading archive from pack cache: %s", subtitle_id)
try:
data = FileIO.read(archive, 'rb')
return data
except:
Log.Error("Couldn't load archive from pack cache: %s: %s", subtitle_id, traceback.format_exc())
def store_pack_data(subtitle, data):
subtitle_id = get_pack_id(subtitle)
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
Log.Info("Storing archive in pack cache: %s", subtitle_id)
try:
FileIO.write(archive, data, 'wb')
except:
Log.Error("Couldn't store archive in pack cache: %s: %s", subtitle_id, traceback.format_exc())
-219
View File
@@ -1,219 +0,0 @@
# coding=utf-8
import re, os
import helpers
from config import config, SUBTITLE_EXTS, TEXT_SUBTITLE_EXTS
from bs4 import UnicodeDammit
class SubtitleHelper(object):
def __init__(self, filename):
self.filename = filename
def subtitle_helpers(filename):
filename = helpers.unicodize(filename)
helper_classes = [DefaultSubtitleHelper]
if helpers.cast_bool(Prefs["subtitles.scan.exotic_ext"]):
helper_classes.insert(0, VobSubSubtitleHelper)
for cls in helper_classes:
if cls.is_helper_for(filename):
return cls(filename)
return None
#####################################################################################################################
class VobSubSubtitleHelper(SubtitleHelper):
@classmethod
def is_helper_for(cls, filename):
(file, file_extension) = os.path.splitext(filename)
# We only support idx (and maybe sub)
if not file_extension.lower() in ['.idx', '.sub']:
return False
# If we've been given a sub, we only support it if there exists a matching idx file
return os.path.exists(file + '.idx')
def process_subtitles(self, part):
lang_sub_map = {}
# We don't directly process the sub file, only the idx. Therefore if we are passed on of these files, we simply
# ignore it.
(file, ext) = os.path.splitext(self.filename)
if ext == '.sub':
return lang_sub_map
# If we have an idx file, we need to confirm there is an identically names sub file before we can proceed.
sub_filename = file + ".sub"
if not os.path.exists(sub_filename):
return lang_sub_map
Log('Attempting to parse VobSub file: ' + self.filename)
idx = Core.storage.load(os.path.join(self.filename))
if idx.count('VobSub index file') == 0:
Log('The idx file does not appear to be a VobSub, skipping...')
return lang_sub_map
languages = {}
language_index = 0
basename = os.path.basename(self.filename)
for language in re.findall('\nid: ([A-Za-z]{2})', idx):
if not languages.has_key(language):
languages[language] = []
Log('Found .idx subtitle file: ' + self.filename + ' language: ' + language + ' stream index: ' + str(language_index))
languages[language].append(Proxy.LocalFile(self.filename, index=str(language_index), format="vobsub"))
language_index += 1
if not lang_sub_map.has_key(language):
lang_sub_map[language] = []
lang_sub_map[language].append(basename)
for language, subs in languages.items():
part.subtitles[language][basename] = subs
return lang_sub_map
#####################################################################################################################
IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
def match_ietf_language(s):
language_match = re.match(".+\.([^\.]+)$" if not helpers.cast_bool(Prefs["subtitles.language.ietf_display"])
else IETF_MATCH, s)
if language_match and len(language_match.groups()) == 1:
language = language_match.groups()[0]
return language
return s
class DefaultSubtitleHelper(SubtitleHelper):
@classmethod
def is_helper_for(cls, filename):
(file, file_extension) = os.path.splitext(filename)
return file_extension.lower()[1:] in SUBTITLE_EXTS
def process_subtitles(self, part):
lang_sub_map = {}
if not os.path.exists(self.filename):
return lang_sub_map
basename = os.path.basename(self.filename)
(file, ext) = os.path.splitext(self.filename)
# Remove the initial '.' from the extension
ext = ext[1:]
forced = ''
default = ''
split_tag = file.rsplit('.', 1)
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'embedded-forced',
'custom']:
file = split_tag[0]
sub_tag = split_tag[1].lower()
# don't do anything with 'normal', we don't need it
if 'forced' in sub_tag:
forced = '1'
elif 'default' == sub_tag:
default = '1'
# Attempt to extract the language from the filename (e.g. Avatar (2009).eng)
# IETF support thanks to
# https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
lang_part = match_ietf_language(file)
if lang_part != file:
language = Locale.Language.Match(lang_part)
elif config.only_one:
language = Locale.Language.Match(list(config.lang_list)[0].alpha2)
else:
language = Locale.Language.Match("xx")
# skip non-SRT if wanted
if not config.exotic_ext and ext not in TEXT_SUBTITLE_EXTS:
return lang_sub_map
codec = None
format = None
if ext in ['txt', 'sub']:
try:
file_contents = Core.storage.load(self.filename)
lines = [line.strip() for line in file_contents.splitlines(True)]
if re.match('^\{[0-9]+\}\{[0-9]*\}', lines[1]):
format = 'microdvd'
elif re.match('^[0-9]{1,2}:[0-9]{2}:[0-9]{2}[:=,]', lines[1]):
format = 'txt'
elif '[SUBTITLE]' in lines[1]:
format = 'subviewer'
else:
Log("The subtitle file does not have a known format, skipping... : " + self.filename)
return lang_sub_map
except:
Log("An error occurred while attempting to parse the subtitle file, skipping... : " + self.filename)
return lang_sub_map
# fixme: re-add vtt once Plex Inc. fixes this line in LocalMedia.bundle
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb']:
codec = ext.replace('ass', 'ssa')
if format is None:
format = codec
Log('Found subtitle file: ' + self.filename + ' language: ' + language + ' codec: ' + str(
codec) + ' format: ' + str(format) + ' default: ' + default + ' forced: ' + forced)
key = ("subzero_ex" + "_forced" if forced else "") + basename
part.subtitles[language][key] = Proxy.LocalFile(self.filename, codec=codec, format=format, default=default,
forced=forced)
lang_sub_map[language] = [key]
return lang_sub_map
def get_subtitles_from_metadata(part):
subs = {}
if hasattr(part, "subtitles") and part.subtitles:
for language in part.subtitles:
subs[language] = []
for key, proxy in getattr(part.subtitles[language], "_proxies").iteritems():
if not proxy or not len(proxy) >= 5:
Log.Debug("Can't parse metadata: %s" % repr(proxy))
continue
p_type = proxy[0]
if p_type == "Media":
if not key.startswith("subzero"):
continue
# metadata subtitle
#Log.Debug(u"Found metadata subtitle: %s, %s, %s" % (language, key, repr(proxy)))
subs[language].append(key)
return subs
def force_utf8(content):
a = UnicodeDammit(content)
if a.original_encoding:
Log.Debug("detected encoding: %s (None: most likely already successfully decoded)" % a.original_encoding)
else:
Log.Debug("detected encoding: unicode (already decoded)")
# easy way out - already utf-8
if a.original_encoding and a.original_encoding == "utf-8":
return content
return (a.unicode_markup if a.unicode_markup else content.decode('ascii', 'replace')).encode("utf-8")
-929
View File
@@ -1,929 +0,0 @@
# coding=utf-8
import glob
import os
import datetime
import operator
import traceback
from urllib2 import URLError
from subliminal_patch.score import compute_score
from subliminal_patch.core import download_subtitles
from subliminal import list_subtitles as list_all_subtitles, region as subliminal_cache_region
from subzero.language import Language
from subzero.video import refine_video
from missing_subtitles import items_get_all_missing_subs, refresh_item
from scheduler import scheduler
from storage import save_subtitles, get_subtitle_storage
from support.config import config
from support.items import get_recent_items, get_item, is_wanted, get_item_title
from support.helpers import track_usage, get_title_for_video_metadata, cast_bool, PartUnknownException
from support.plex_media import get_plex_metadata
from support.scanning import scan_videos
from support.i18n import _
from download import download_best_subtitles, pre_download_hook, post_download_hook, language_hook
class Task(object):
name = None
scheduler = None
periodic = False
running = False
time_start = None
data = None
PROVIDER_SLACK = 30
DL_PROVIDER_SLACK = 30
stored_attributes = ("last_run", "last_run_time", "running")
default_data = {"last_run": None, "last_run_time": None, "running": False, "data": {}}
# task ready for being status-displayed?
ready_for_display = False
def __init__(self):
self.name = self.get_class_name()
self.ready_for_display = False
self.time_start = None
self.setup_defaults()
self.running = False
def get_class_name(self):
return getattr(getattr(self, "__class__"), "__name__")
def __getattribute__(self, name):
if name in object.__getattribute__(self, "stored_attributes"):
return Dict["tasks"].get(self.name, {}).get(name, None)
return object.__getattribute__(self, name)
def __setattr__(self, name, value):
if name in object.__getattribute__(self, "stored_attributes"):
Dict["tasks"][self.name][name] = value
Dict.Save()
return
object.__setattr__(self, name, value)
def setup_defaults(self):
if self.name not in Dict["tasks"]:
Dict["tasks"][self.name] = self.default_data.copy()
return
sd = Dict["tasks"][self.name]
# forward-migration
for key, def_value in self.default_data.iteritems():
hasval = key in sd
if not hasval:
sd[key] = def_value
def signal(self, *args, **kwargs):
raise NotImplementedError
def prepare(self, *args, **kwargs):
return
def run(self):
Log.Info(u"Task: running: %s", self.name)
self.time_start = datetime.datetime.now()
def post_run(self, data_holder):
self.running = False
self.last_run = datetime.datetime.now()
if self.time_start and self.last_run:
self.last_run_time = self.last_run - self.time_start
self.time_start = None
Log.Info(u"Task: ran: %s", self.name)
class SubtitleListingMixin(object):
def list_subtitles(self, rating_key, item_type, part_id, language, skip_wrong_fps=True, metadata=None,
scanned_parts=None, air_date_cutoff=None):
if not metadata:
metadata = get_plex_metadata(rating_key, part_id, item_type)
if not metadata:
return
providers = config.get_providers(media_type="series" if item_type == "episode" else "movies")
if not scanned_parts:
scanned_parts = scan_videos([metadata], ignore_all=True, providers=providers)
if not scanned_parts:
Log.Error(u"%s: Couldn't list available subtitles for %s", self.name, rating_key)
return
video, plex_part = scanned_parts.items()[0]
refine_video(video, refiner_settings=config.refiner_settings)
if air_date_cutoff is not None and metadata["item"].year and \
metadata["item"].year + air_date_cutoff < datetime.date.today().year:
Log.Debug("Skipping searching for subtitles: %s, it aired over %s year(s) ago.", rating_key,
air_date_cutoff)
return
config.init_subliminal_patches()
provider_settings = config.provider_settings
if not skip_wrong_fps:
provider_settings["opensubtitles"]["skip_wrong_fps"] = False
if item_type == "episode":
min_score = 240
if video.is_special:
min_score = 180
else:
min_score = 60
languages = {Language.fromietf(language)}
available_subs = list_all_subtitles([video], languages,
providers=providers,
provider_configs=provider_settings,
pool_class=config.provider_pool,
throttle_callback=config.provider_throttle,
language_hook=language_hook)
use_hearing_impaired = Prefs['subtitles.search.hearingImpaired'] in ("prefer", "force HI")
# sort subtitles by score
unsorted_subtitles = []
for s in available_subs[video]:
Log.Debug(u"%s: Starting score computation for %s", self.name, s)
try:
matches = s.get_matches(video)
except AttributeError:
Log.Error(u"%s: Match computation failed for %s: %s", self.name, s, traceback.format_exc())
continue
# skip wrong season/episodes
if item_type == "episode":
can_verify_series = True
if not s.hash_verifiable and "hash" in matches:
can_verify_series = False
if can_verify_series and not {"series", "season", "episode"}.issubset(matches):
if "series" not in matches:
s.wrong_series = True
else:
s.wrong_season_ep = True
unsorted_subtitles.append(
(s, compute_score(matches, s, video, hearing_impaired=use_hearing_impaired), matches))
scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1), reverse=True)
subtitles = []
for subtitle, score, matches in scored_subtitles:
# check score
if score < min_score and not subtitle.wrong_series:
Log.Info(u'%s: Score %d is below min_score (%d)', self.name, score, min_score)
continue
subtitle.score = score
subtitle.matches = matches
subtitle.part_id = part_id
subtitle.item_type = item_type
subtitles.append(subtitle)
return subtitles
class DownloadSubtitleMixin(object):
def download_subtitle(self, subtitle, rating_key, mode="m"):
from interface.menu_helpers import set_refresh_menu_state
item_type = subtitle.item_type
part_id = subtitle.part_id
metadata = get_plex_metadata(rating_key, part_id, item_type)
providers = config.get_providers(media_type="series" if item_type == "episode" else "movies")
scanned_parts = scan_videos([metadata], ignore_all=True, providers=providers)
video, plex_part = scanned_parts.items()[0]
pre_download_hook(subtitle)
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
download_subtitles([subtitle], providers=providers,
provider_configs=config.provider_settings,
pool_class=config.provider_pool, throttle_callback=config.provider_throttle)
post_download_hook(subtitle)
# may be redundant
subtitle.pack_data = None
download_successful = False
if subtitle.content:
try:
save_subtitles(scanned_parts, {video: [subtitle]}, mode=mode, mods=config.default_mods)
if mode == "m":
Log.Debug(u"%s: Manually downloaded subtitle for: %s", self.name, rating_key)
track_usage("Subtitle", "manual", "download", 1)
elif mode == "b":
Log.Debug(u"%s: Downloaded better subtitle for: %s", self.name, rating_key)
track_usage("Subtitle", "better", "download", 1)
download_successful = True
refresh_item(rating_key)
except:
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s",
self.name, traceback.format_exc())
finally:
set_refresh_menu_state(None)
if download_successful:
# store item in history
from support.history import get_history
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
history = get_history()
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
thumb=video.plexapi_metadata["super_thumb"],
subtitle=subtitle,
mode=mode)
history.destroy()
# clear missing subtitles menu data
if not scheduler.is_task_running("MissingSubtitles"):
scheduler.clear_task_data("MissingSubtitles")
else:
set_refresh_menu_state(_(u"%(class_name)s: Subtitle download failed (%(item_id)s)",
class_name=self.name,
item_id=rating_key))
return download_successful
class AvailableSubsForItem(SubtitleListingMixin, Task):
item_type = None
part_id = None
language = None
rating_key = None
def prepare(self, *args, **kwargs):
self.item_type = kwargs.get("item_type")
self.part_id = kwargs.get("part_id")
self.language = kwargs.get("language")
self.rating_key = kwargs.get("rating_key")
def setup_defaults(self):
super(AvailableSubsForItem, self).setup_defaults()
# reset any previous data
Dict["tasks"][self.name]["data"] = {}
def run(self):
super(AvailableSubsForItem, self).run()
self.running = True
try:
track_usage("Subtitle", "manual", "list", 1)
except:
Log.Error("Something went wrong with track_usage: %s", traceback.format_exc())
Log.Debug("Listing available subtitles for: %s", self.rating_key)
subs = self.list_subtitles(self.rating_key, self.item_type, self.part_id, self.language, skip_wrong_fps=False)
if not subs:
self.data = "found_none"
return
# we can't have nasty unpicklable stuff like ZipFile, BytesIO etc in self.data
self.data = [s.make_picklable() for s in subs]
def post_run(self, task_data):
super(AvailableSubsForItem, self).post_run(task_data)
# clean old data
for key in task_data.keys():
if key != self.rating_key:
del task_data[key]
task_data.update({self.rating_key: {self.language: self.data}})
class DownloadSubtitleForItem(DownloadSubtitleMixin, Task):
subtitle = None
rating_key = None
def prepare(self, *args, **kwargs):
self.subtitle = kwargs["subtitle"]
self.rating_key = kwargs["rating_key"]
def run(self):
super(DownloadSubtitleForItem, self).run()
self.running = True
self.download_subtitle(self.subtitle, self.rating_key)
self.running = False
class MissingSubtitles(Task):
rating_key = None
item_type = None
part_id = None
language = None
def run(self):
super(MissingSubtitles, self).run()
self.running = True
self.data = []
recent_items = get_recent_items()
if recent_items:
self.data = items_get_all_missing_subs(recent_items)
def post_run(self, task_data):
super(MissingSubtitles, self).post_run(task_data)
task_data["missing_subtitles"] = self.data
class SearchAllRecentlyAddedMissing(Task):
periodic = True
items_done = None
items_searching = None
percentage = 0
def __init__(self):
super(SearchAllRecentlyAddedMissing, self).__init__()
self.items_done = None
self.items_searching = None
self.percentage = 0
def signal_updated_metadata(self, *args, **kwargs):
return True
def prepare(self):
self.items_done = 0
self.items_searching = 0
self.percentage = 0
self.ready_for_display = True
def run(self):
super(SearchAllRecentlyAddedMissing, self).run()
self.running = True
self.prepare()
from support.history import get_history
history = get_history()
now = datetime.datetime.now()
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
series_providers = config.get_providers(media_type="series")
movie_providers = config.get_providers(media_type="movies")
is_recent_str = Prefs["scheduler.item_is_recent_age"]
num, ident = is_recent_str.split()
max_search_days = 0
if ident == "days":
max_search_days = int(num)
elif ident == "weeks":
max_search_days = int(num) * 7
subtitle_storage = get_subtitle_storage()
recent_files = subtitle_storage.get_recent_files(age_days=max_search_days)
self.items_searching = len(recent_files)
download_count = 0
videos_with_downloads = 0
config.init_subliminal_patches()
Log.Info(u"%s: Searching for subtitles for %s items", self.name, self.items_searching)
def skip_item():
self.items_searching = self.items_searching - 1
self.percentage = int(self.items_done * 100 / self.items_searching) if self.items_searching > 0 else 100
# search for subtitles in viable items
try:
for fn in recent_files:
stored_subs = subtitle_storage.load(filename=fn)
if not stored_subs:
Log.Debug("Skipping item %s because storage is empty", fn)
skip_item()
continue
video_id = stored_subs.video_id
# added_date <= max_search_days?
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
Log.Debug("Skipping item %s because it's too old", video_id)
skip_item()
continue
if stored_subs.item_type == "episode":
min_score = min_score_series
providers = series_providers
else:
min_score = min_score_movies
providers = movie_providers
parts = []
plex_item = get_item(video_id)
if not plex_item:
Log.Info(u"%s: Item %s unknown, skipping", self.name, video_id)
skip_item()
continue
if not is_wanted(video_id, item=plex_item):
skip_item()
continue
for media in plex_item.media:
parts += media.parts
downloads_per_video = 0
hit_providers = False
for part in parts:
part_id = part.id
try:
metadata = get_plex_metadata(video_id, part_id, stored_subs.item_type)
except PartUnknownException:
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
continue
if not metadata:
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
continue
Log.Debug(u"%s: Looking for missing subtitles: %s", self.name, get_item_title(plex_item))
scanned_parts = scan_videos([metadata], providers=providers)
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score,
providers=providers)
hit_providers = downloaded_subtitles is not None
download_successful = False
if downloaded_subtitles:
downloaded_any = any(downloaded_subtitles.values())
if not downloaded_any:
continue
try:
save_subtitles(scanned_parts, downloaded_subtitles, mode="a", mods=config.default_mods)
Log.Debug(u"%s: Downloaded subtitle for item with missing subs: %s", self.name, video_id)
download_successful = True
refresh_item(video_id)
track_usage("Subtitle", "manual", "download", 1)
except:
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s", self.name,
traceback.format_exc())
finally:
scanned_parts = None
try:
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
if download_successful:
# store item in history
for video, video_subtitles in downloaded_subtitles.items():
if not video_subtitles:
continue
for subtitle in video_subtitles:
downloads_per_video += 1
history.add(item_title, video.id, section_title=metadata["section"],
thumb=video.plexapi_metadata["super_thumb"],
subtitle=subtitle,
mode="a")
downloaded_subtitles = None
except:
Log.Error(u"%s: DEBUG HIT: %s", self.name, traceback.format_exc())
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
Thread.Sleep(self.PROVIDER_SLACK)
download_count += downloads_per_video
if downloads_per_video:
videos_with_downloads += 1
self.items_done = self.items_done + 1
self.percentage = int(self.items_done * 100 / self.items_searching) if self.items_searching > 0 else 100
stored_subs = None
if downloads_per_video:
Log.Debug(u"%s: Subtitles have been downloaded, "
u"waiting %s seconds before continuing", self.name, self.DL_PROVIDER_SLACK)
Thread.Sleep(self.DL_PROVIDER_SLACK)
else:
if hit_providers:
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
Thread.Sleep(self.PROVIDER_SLACK)
finally:
subtitle_storage.destroy()
history.destroy()
if download_count:
Log.Debug(u"%s: done. Missing subtitles found for %s/%s items (%s subs downloaded)", self.name,
videos_with_downloads, self.items_searching, download_count)
else:
Log.Debug(u"%s: done. No subtitles found for %s items", self.name, self.items_searching)
def post_run(self, task_data):
super(SearchAllRecentlyAddedMissing, self).post_run(task_data)
self.ready_for_display = False
self.percentage = 0
self.items_done = None
self.items_searching = None
class LegacySearchAllRecentlyAddedMissing(Task):
periodic = True
frequency = "never"
items_done = None
items_searching = None
items_searching_ids = None
items_failed = None
percentage = 0
stall_time = 30
def __init__(self):
super(LegacySearchAllRecentlyAddedMissing, self).__init__()
self.items_done = None
self.items_searching = None
self.items_searching_ids = None
self.items_failed = None
self.percentage = 0
def signal(self, signal_name, *args, **kwargs):
handler = getattr(self, "signal_%s" % signal_name)
return handler(*args, **kwargs) if handler else None
def signal_updated_metadata(self, *args, **kwargs):
item_id = int(args[0])
if self.items_searching_ids is not None and item_id in self.items_searching_ids:
self.items_done.append(item_id)
return True
def prepare(self, *args, **kwargs):
self.items_done = []
recent_items = get_recent_items()
missing = items_get_all_missing_subs(recent_items, sleep_after_request=0.2)
ids = set([id for added_at, id, title, item, missing_languages in missing if is_wanted(id, item=item)])
self.items_searching = missing
self.items_searching_ids = ids
self.items_failed = []
self.percentage = 0
self.ready_for_display = True
def run(self):
super(LegacySearchAllRecentlyAddedMissing, self).run()
self.running = True
missing_count = len(self.items_searching)
items_done_count = 0
for added_at, item_id, title, item, missing_languages in self.items_searching:
Log.Debug(u"Task: %s, triggering refresh for %s (%s)", self.name, title, item_id)
try:
refresh_item(item_id)
except URLError:
# timeout
pass
search_started = datetime.datetime.now()
tries = 1
while 1:
if item_id in self.items_done:
items_done_count += 1
self.percentage = int(items_done_count * 100 / missing_count) if missing_count > 0 else 100
Log.Debug(u"Task: %s, item %s done (%s%%, %s/%s)", self.name, item_id, self.percentage,
items_done_count, missing_count)
break
# item considered stalled after self.stall_time seconds passed after last refresh
if (datetime.datetime.now() - search_started).total_seconds() > self.stall_time:
if tries > 3:
self.items_failed.append(item_id)
Log.Debug(u"Task: %s, item stalled for %s times: %s, skipping", self.name, tries, item_id)
break
Log.Debug(u"Task: %s, item stalled for %s seconds: %s, retrying", self.name, self.stall_time,
item_id)
tries += 1
try:
refresh_item(item_id)
except URLError:
pass
search_started = datetime.datetime.now()
Thread.Sleep(1)
Thread.Sleep(0.1)
# we can't hammer the PMS, otherwise requests will be stalled
Thread.Sleep(5)
Log.Debug("Task: %s, done (%s%%, %s/%s). Failed items: %s", self.name, self.percentage,
items_done_count, missing_count, self.items_failed)
def post_run(self, task_data):
super(LegacySearchAllRecentlyAddedMissing, self).post_run(task_data)
self.ready_for_display = False
self.percentage = 0
self.items_done = None
self.items_failed = None
self.items_searching = None
self.items_searching_ids = None
class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
periodic = True
# TV: episode, format, series, year, season, video_codec, release_group, hearing_impaired, resolution
series_cutoff = 357
# movies: format, title, release_group, year, video_codec, resolution, hearing_impaired
movies_cutoff = 117
def signal_updated_metadata(self, *args, **kwargs):
return True
def run(self):
super(FindBetterSubtitles, self).run()
self.running = True
better_found = 0
try:
max_search_days = int(Prefs["scheduler.tasks.FindBetterSubtitles.max_days_after_added"].strip())
except ValueError:
Log.Error(u"Please only put numbers into the FindBetterSubtitles.max_days_after_added setting. Exiting")
return
else:
if max_search_days > 30:
Log.Error(u"%s: FindBetterSubtitles.max_days_after_added is too big. Max is 30 days.", self.name)
return
now = datetime.datetime.now()
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
min_score_extracted_series = config.advanced.find_better_as_extracted_tv_score or 352
min_score_extracted_movies = config.advanced.find_better_as_extracted_movie_score or 82
overwrite_manually_modified = cast_bool(
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified"])
overwrite_manually_selected = cast_bool(
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected"])
air_date_cutoff_pref = Prefs["scheduler.tasks.FindBetterSubtitles.air_date_cutoff"]
if air_date_cutoff_pref == "don't limit":
air_date_cutoff = None
else:
air_date_cutoff = int(air_date_cutoff_pref.split()[0])
subtitle_storage = get_subtitle_storage()
viable_item_count = 0
try:
for fn in subtitle_storage.get_recent_files(age_days=max_search_days):
stored_subs = subtitle_storage.load(filename=fn)
if not stored_subs:
continue
video_id = stored_subs.video_id
if stored_subs.item_type == "episode":
cutoff = self.series_cutoff
min_score = min_score_series
min_score_extracted = min_score_extracted_series
else:
cutoff = self.movies_cutoff
min_score = min_score_movies
min_score_extracted = min_score_extracted_movies
# don't search for better subtitles until at least 30 minutes have passed
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
Log.Debug(u"%s: Item %s too new, skipping", self.name, video_id)
continue
# added_date <= max_search_days?
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
continue
viable_item_count += 1
ditch_parts = []
# look through all stored subtitle data
for part_id, languages in stored_subs.parts.iteritems():
part_id = str(part_id)
# all languages
for language, current_subs in languages.iteritems():
current_key = current_subs.get("current")
current = current_subs.get(current_key)
# currently got subtitle?
# fixme: check for existence
if not current:
continue
current_score = current.score
current_mode = current.mode
# late cutoff met? skip
if current_score >= cutoff:
Log.Debug(u"%s: Skipping finding better subs, "
u"cutoff met (current: %s, cutoff: %s): %s (%s)",
self.name, current_score, cutoff, stored_subs.title, video_id)
continue
# got manual subtitle but don't want to touch those?
if current_mode == "m" and not overwrite_manually_selected:
Log.Debug(u"%s: Skipping finding better subs, "
u"had manual: %s (%s)", self.name, stored_subs.title, video_id)
continue
# subtitle modifications different from default
if not overwrite_manually_modified and current.mods \
and set(current.mods).difference(set(config.default_mods)):
Log.Debug(u"%s: Skipping finding better subs, it has manual modifications: %s (%s)",
self.name, stored_subs.title, video_id)
continue
try:
subs = self.list_subtitles(video_id, stored_subs.item_type, part_id, language,
air_date_cutoff=air_date_cutoff)
except PartUnknownException:
Log.Info(u"%s: Part %s unknown/gone; ditching subtitle info", self.name, part_id)
ditch_parts.append(part_id)
continue
hit_providers = subs is not None
if subs:
# subs are already sorted by score
better_downloaded = False
better_tried_download = 0
better_visited = 0
for sub in subs:
if sub.score > current_score and sub.score > min_score:
if current.provider_name == "embedded" and sub.score < min_score_extracted:
Log.Debug(u"%s: Not downloading subtitle for %s, we've got an active extracted "
u"embedded sub and the min score %s isn't met (%s).",
self.name, video_id, min_score_extracted, sub.score)
better_visited += 1
break
Log.Debug(u"%s: Better subtitle found for %s, downloading", self.name, video_id)
better_tried_download += 1
ret = self.download_subtitle(sub, video_id, mode="b")
if ret:
better_found += 1
better_downloaded = True
break
else:
Log.Debug(u"%s: Couldn't download/save subtitle. "
u"Continuing to the next one", self.name)
Log.Debug(u"%s: Waiting %s seconds before continuing",
self.name, self.DL_PROVIDER_SLACK)
Thread.Sleep(self.DL_PROVIDER_SLACK)
better_visited += 1
if better_tried_download and not better_downloaded:
Log.Debug(u"%s: Tried downloading better subtitle for %s, "
u"but every try failed.", self.name, video_id)
elif better_downloaded:
Log.Debug(u"%s: Better subtitle downloaded for %s", self.name, video_id)
if better_tried_download or better_downloaded:
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.DL_PROVIDER_SLACK)
Thread.Sleep(self.DL_PROVIDER_SLACK)
elif better_visited:
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
Thread.Sleep(self.PROVIDER_SLACK)
subs = None
elif hit_providers:
# hit the providers but didn't try downloading? wait.
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
Thread.Sleep(self.PROVIDER_SLACK)
if ditch_parts:
for part_id in ditch_parts:
try:
del stored_subs.parts[part_id]
except KeyError:
pass
subtitle_storage.save(stored_subs)
ditch_parts = None
stored_subs = None
Thread.Sleep(1)
finally:
subtitle_storage.destroy()
if better_found:
Log.Debug(u"%s: done. Better subtitles found for %s/%s items", self.name, better_found,
viable_item_count)
else:
Log.Debug(u"%s: done. No better subtitles found for %s items", self.name, viable_item_count)
class SubtitleStorageMaintenance(Task):
periodic = True
frequency = "every 7 days"
def run(self):
super(SubtitleStorageMaintenance, self).run()
self.running = True
Log.Info(u"%s: Running subtitle storage maintenance", self.name)
storage = get_subtitle_storage()
try:
deleted_items = storage.delete_missing(wanted_languages=set(str(l) for l in config.lang_list))
except OSError:
deleted_items = storage.delete_missing(wanted_languages=set(str(l) for l in config.lang_list),
scandir_generic=True)
if deleted_items:
Log.Info(u"%s: Subtitle information for %d non-existant videos have been cleaned up",
self.name, len(deleted_items))
Log.Debug(u"%s: Videos: %s", self.name, deleted_items)
else:
Log.Info(u"%s: Nothing to do", self.name)
storage.destroy()
class MenuHistoryMaintenance(Task):
periodic = True
frequency = "every 7 days"
def run(self):
super(MenuHistoryMaintenance, self).run()
self.running = True
Log.Info(u"%s: Running menu history maintenance", self.name)
now = datetime.datetime.now()
if "menu_history" in Dict:
for key, timeout in Dict["menu_history"].copy().items():
if now > timeout:
try:
del Dict["menu_history"][key]
except:
pass
class MigrateSubtitleStorage(Task):
periodic = False
frequency = None
def run(self):
super(MigrateSubtitleStorage, self).run()
self.running = True
Log.Info(u"%s: Running subtitle storage migration", self.name)
storage = get_subtitle_storage()
def migrate(scandir_generic=False):
for fn in storage.get_all_files(scandir_generic=scandir_generic):
if fn.endswith(".json.gz"):
continue
Log.Debug(u"%s: Migrating %s", self.name, fn)
storage.load(None, fn)
try:
migrate()
except OSError:
migrate(scandir_generic=True)
storage.destroy()
class CacheMaintenance(Task):
periodic = True
frequency = "every 1 days"
main_cache_validity = 14 # days
pack_cache_validity = 4 # days
def run(self):
super(CacheMaintenance, self).run()
self.running = True
Log.Info(u"%s: Running cache maintenance", self.name)
now = datetime.datetime.now()
def remove_expired(path, expiry):
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(path))
if mtime + datetime.timedelta(days=expiry) < now:
try:
os.remove(path)
except (IOError, OSError):
Log.Debug("Couldn't remove cache file: %s", os.path.basename(path))
# main cache
if config.new_style_cache:
for fn in subliminal_cache_region.backend.all_filenames:
remove_expired(fn, self.main_cache_validity)
# archive cache
for fn in glob.iglob(os.path.join(config.pack_cache_dir, "*.archive")):
remove_expired(fn, self.pack_cache_validity)
scheduler.register(LegacySearchAllRecentlyAddedMissing)
scheduler.register(SearchAllRecentlyAddedMissing)
scheduler.register(AvailableSubsForItem)
scheduler.register(DownloadSubtitleForItem)
scheduler.register(MissingSubtitles)
scheduler.register(FindBetterSubtitles)
scheduler.register(SubtitleStorageMaintenance)
scheduler.register(MigrateSubtitleStorage)
scheduler.register(MenuHistoryMaintenance)
scheduler.register(CacheMaintenance)
-917
View File
@@ -1,917 +0,0 @@
[
{
"id": "langPref1a",
"label": "Subtitle Language (1)",
"type": "enum",
"values": [
"sq",
"ar",
"be",
"bs",
"bg",
"ca",
"zh",
"cs",
"da",
"nl",
"en",
"et",
"fi",
"fr",
"de",
"el",
"he",
"hi",
"hu",
"is",
"id",
"it",
"ja",
"ko",
"lv",
"lt",
"mk",
"ms",
"no",
"fa",
"pl",
"pt",
"pt-br",
"ro",
"ru",
"sr",
"sr-cyrl",
"sr-latn",
"sk",
"sl",
"es",
"sv",
"th",
"tr",
"uk",
"vi",
"hr",
"zh-hans",
"zh-hant"
],
"default": "en"
},
{
"id": "langPref2a",
"label": "Subtitle Language (2)",
"type": "enum",
"values": [
"None",
"sq",
"ar",
"be",
"bs",
"bg",
"ca",
"zh",
"cs",
"da",
"nl",
"en",
"et",
"fi",
"fr",
"de",
"el",
"he",
"hi",
"hu",
"is",
"id",
"it",
"ja",
"ko",
"lv",
"lt",
"mk",
"ms",
"no",
"fa",
"pl",
"pt",
"pt-br",
"ro",
"ru",
"sr",
"sr-cyrl",
"sr-latn",
"sk",
"sl",
"es",
"sv",
"th",
"tr",
"uk",
"vi",
"hr",
"zh-hans",
"zh-hant"
],
"default": "None"
},
{
"id": "langPref3a",
"label": "Subtitle Language (3)",
"type": "enum",
"values": [
"None",
"sq",
"ar",
"be",
"bs",
"bg",
"ca",
"zh",
"cs",
"da",
"nl",
"en",
"et",
"fi",
"fr",
"de",
"el",
"he",
"hi",
"hu",
"is",
"id",
"it",
"ja",
"ko",
"lv",
"lt",
"mk",
"ms",
"no",
"fa",
"pl",
"pt",
"pt-br",
"ro",
"ru",
"sr",
"sr-cyrl",
"sr-latn",
"sk",
"sl",
"es",
"sv",
"th",
"tr",
"uk",
"vi",
"hr",
"zh-hans",
"zh-hant"
],
"default": "None"
},
{
"id": "langPrefCustom",
"label": "Additional Subtitle Languages (use ISO-639-1 codes; comma-separated)",
"type": "text",
"default": "None"
},
{
"id": "subtitles.when",
"label": "Download subtitles",
"type": "enum",
"values": [
"Never",
"Always",
"When main audio stream is not Subtitle Language (1)",
"When main audio stream is not any configured language",
"When any audio stream is not Subtitle Language (1)",
"When any audio stream is not any configured language"
],
"default": "Always"
},
{
"id": "subtitles.when_forced",
"label": "Download foreign/forced subtitles",
"type": "enum",
"values": [
"Never",
"Always",
"Only for Subtitle Language (1)",
"Only for Subtitle Language (2)",
"Only for Subtitle Language (3)"
],
"default": "Never"
},
{
"id": "subtitles.any_language_is_enough",
"label": "Don't search for subtitles if a subtitle in any configured language exists as",
"type": "enum",
"values": [
"External or embedded subtitle",
"External or embedded subtitle (not foreign/forced)",
"External subtitle",
"External subtitle (not foreign/forced)",
"Always search for all configured languages"
],
"default": "Always search for all configured languages"
},
{
"id": "subtitles.language.ietf_display",
"label": "Display languages with country attribute as ISO 639-1 (e.g. pt-BR = pt)",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.language.ietf_normalize",
"label": "Treat languages with country attribute as ISO 639-1 (e.g. don't download pt-BR if pt subtitle exists)",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.only_one",
"label": "Restrict to one language (skips adding \".lang.\" to the subtitle filename; only uses \"Subtitle Language (1)\")",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.language.treat_und_as_first",
"label": "Embedded streams: Treat \"Undefined\" (und) as language 1",
"type": "bool",
"default": "true"
},
{
"id": "media_rename1",
"label": "I rename my files using",
"type": "enum",
"values": [
"Sonarr/Radarr (fill api info below)",
"Filebot",
"Sonarr/Radarr/Filebot",
"Symlink to original file",
"I keep the original filenames",
"none of the above"
],
"default": "I keep the original filenames"
},
{
"id": "use_file_info_file",
"label": "Retrieve original filename from .file_info/file_info index files (see wiki)",
"type": "bool",
"default": "false"
},
{
"id": "drone_api.sonarr.url",
"label": "Sonarr URL (add URL base if configured)",
"type": "text",
"default": "http://127.0.0.1:8989"
},
{
"id": "drone_api.sonarr.api_key",
"label": "Sonarr API key",
"type": "text",
"default": ""
},
{
"id": "drone_api.radarr.url",
"label": "Radarr URL (add URL base if configured, min. version: 0.2.0.897)",
"type": "text",
"default": "http://127.0.0.1:7878"
},
{
"id": "drone_api.radarr.api_key",
"label": "Radarr API key",
"type": "text",
"default": ""
},
{
"id": "anticaptcha.service",
"label": "AntiCaptcha-Service (needs paid account; enables Addic7ed, titlovi)",
"type": "enum",
"values": [
"none",
"anti-captcha.com",
"deathbycaptcha.com"
],
"default": "none"
},
{
"id": "anticaptcha.api_key",
"label": "AntiCaptcha-Service key (anti-captcha.com: account_key; deathbycaptcha.com: username:password)",
"type": "text",
"default": ""
},
{
"id": "provider.opensubtitles.enabled",
"label": "Provider: Enable OpenSubtitles",
"type": "bool",
"default": "true"
},
{
"id": "provider.opensubtitles.username",
"label": "Opensubtitles Username",
"type": "text",
"default": ""
},
{
"id": "provider.opensubtitles.password",
"label": "Opensubtitles Password",
"type": "text",
"option": "hidden",
"default": "",
"secure": "true"
},
{
"id": "provider.opensubtitles.is_vip",
"label": "OpenSubtitles VIP? (ad-free subs, 1000 subs/day, no-cache VIP server: http://v.ht/osvip)",
"type": "bool",
"default": "false"
},
{
"id": "provider.podnapisi.enabled",
"label": "Provider: Enable Podnapisi.NET",
"type": "bool",
"default": "true"
},
{
"id": "provider.addic7ed.enabled",
"label": "Provider: Enable Addic7ed (needs AntiCaptcha)",
"type": "bool",
"default": "true"
},
{
"id": "provider.addic7ed.username",
"label": "Addic7ed Username",
"type": "text",
"default": ""
},
{
"id": "provider.addic7ed.password",
"label": "Addic7ed Password",
"type": "text",
"option": "hidden",
"default": "",
"secure": "true"
},
{
"id": "provider.addic7ed.boost_by2",
"label": "Addic7ed: boost score (if requirements met)",
"type": "enum",
"values": [
"100",
"95",
"90",
"85",
"80",
"75",
"70",
"67",
"65",
"60",
"55",
"50",
"45",
"40",
"35",
"30",
"25",
"21",
"20",
"19",
"15",
"10",
"5",
"0"
],
"default": "19"
},
{
"id": "provider.titlovi.enabled",
"label": "Provider: Enable Titlovi.com (might need AntiCaptcha)",
"type": "bool",
"default": "true"
},
{
"id": "provider.legendastv.enabled",
"label": "Provider: Enable Legendas TV (mostly pt-BR; UNRAR NEEDED)",
"type": "bool",
"default": "false"
},
{
"id": "provider.legendastv.username",
"label": "Legendas TV Username",
"type": "text",
"default": ""
},
{
"id": "provider.legendastv.password",
"label": "Legendas TV Password",
"type": "text",
"option": "hidden",
"default": "",
"secure": "true"
},
{
"id": "provider.tvsubtitles.enabled",
"label": "Provider: Enable TVsubtitles.net",
"type": "bool",
"default": "true"
},
{
"id": "provider.napiprojekt.enabled",
"label": "Provider: Enable NapiProjekt.pl (Polish)",
"type": "bool",
"default": "false"
},
{
"id": "provider.subscene.enabled",
"label": "Provider: Enable SubScene (TV shows)",
"type": "bool",
"default": "true"
},
{
"id": "provider.supersubtitles.enabled",
"label": "Provider: Enable feliratok.info (Hungarian)",
"type": "bool",
"default": "false"
},
{
"id": "provider.hosszupuska.enabled",
"label": "Provider: Enable hosszupuskasub.com (Hungarian)",
"type": "bool",
"default": "false"
},
{
"id": "provider.argenteam.enabled",
"label": "Provider: Enable aRGENTeaM (Spanish)",
"type": "bool",
"default": "false"
},
{
"id": "provider.assrt.enabled",
"label": "Provider: Enable assrt.net (Chinese)",
"type": "bool",
"default": "false"
},
{
"id": "provider.assrt.token",
"label": "Assrt API Token",
"type": "text",
"default": ""
},
{
"id": "providers.multithreading",
"label": "Search enabled providers simultaneously (multithreading)",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.embedded.autoextract",
"label": "Automatically extract and use embedded subtitles upon media addition (with configured default mods)",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.search_after_autoextract",
"label": "After automatic extraction of embedded subtitles, also immediately search for available subtitles?",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.scan.embedded",
"label": "Don't search for subtitles of a language if there are embedded subtitles inside the media file (MKV/MP4)?",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.scan.external",
"label": "Don't search for subtitles of a language if they already exist on the filesystem (metadata/filesystem)?",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.scan.filename_strictness",
"label": "How strict should these subtitles existing on the filesystem be detected?",
"type": "enum",
"values": [
"exact: media filename match",
"loose: filename contains media filename",
"any"
],
"default": "loose: filename contains media filename"
},
{
"id": "subtitles.scan.exotic_ext",
"label": "Include non-text subtitle formats (anything else than .srt/.ssa/.ass/.vtt; embedded or external) in the above?",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.search.minimumTVScore2",
"label": "Minimum score for TV (min: 240, def/sane: 337, min-ideal: 352; see http://v.ht/szscores)",
"type": "text",
"default": "337"
},
{
"id": "subtitles.search.minimumMovieScore2",
"label": "Minimum score for movies (min: 60, def/sane: 69, min-ideal: 82; see http://v.ht/szscores)",
"type": "text",
"default": "60"
},
{
"id": "subtitles.search.hearingImpaired",
"label": "Download hearing impaired subtitles.",
"type": "enum",
"values": [
"prefer",
"don't prefer",
"force HI",
"force non-HI"
],
"default": "don't prefer"
},
{
"id": "subtitles.remove_hi",
"label": "Remove Hearing Impaired tags from downloaded subtitles",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.remove_tags",
"label": "Remove style tags from downloaded subtitles (bold, italic, underline, colors, ...)",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.fix_common",
"label": "Fix common issues in subtitles",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.fix_ocr",
"label": "Fix common OCR errors in downloaded subtitles",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.fix_only_uppercase",
"label": "Fix only uppercase downloaded subtitles",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.reverse_rtl",
"label": "Reverse punctuation in RTL languages (heb, ara, fas)",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.colors",
"label": "Change colors of subtitles to",
"type": "enum",
"values": [
"don't change",
"white",
"light-grey",
"red",
"green",
"yellow",
"blue",
"magenta",
"cyan",
"black",
"dark-red",
"dark-green",
"dark-yellow",
"dark-blue",
"dark-magenta",
"dark-cyan",
"dark-grey"
],
"default": "don't change"
},
{
"id": "subtitles.save.filesystem",
"label": "Store subtitles next to media files (instead of metadata)",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.save.formats",
"label": "Subtitle formats to save (non-SRT only works if the previous option is enabled)",
"type": "enum",
"values": [
"SRT",
"VTT",
"SRT+VTT"
],
"default": "SRT"
},
{
"id": "subtitles.save.subFolder",
"label": "Subtitle Folder (\"current folder\" is the folder the current media file lives in)",
"type": "enum",
"values": [
"current folder",
"sub",
"subs",
"subtitle",
"subtitles"
],
"default": "current folder"
},
{
"id": "subtitles.save.subFolder.Custom",
"label": "Custom Subtitle folder (overrides \"Subtitle Folder\"; computes to real paths)",
"type": "text",
"default": ""
},
{
"id": "subtitles.save.metadata_fallback",
"label": "Fall back to metadata storage if filesystem storage failed",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.save.chmod",
"label": "Set subtitle file permissions to (integer, e.g.: 0775)",
"type": "text",
"default": ""
},
{
"id": "subtitles.autoclean",
"label": "Automatically delete leftover/unused (externally saved) subtitles",
"type": "bool",
"default": "false"
},
{
"id": "activity.on_playback",
"label": "On media playback: search for missing subtitles (refresh item)",
"type": "enum",
"values": [
"never",
"current media item",
"next episode (series)",
"hybrid: current item or next episode",
"hybrid-plus: current item and next episode"
],
"default": "never"
},
{
"id": "scheduler.tasks.SearchAllRecentlyAddedMissing.frequency",
"label": "Scheduler: Periodically search for recent items with missing subtitles",
"type": "enum",
"values": [
"never",
"every 6 hours",
"every 12 hours",
"every 24 hours"
],
"default": "every 6 hours"
},
{
"id": "scheduler.item_is_recent_age",
"label": "Scheduler: Item age to be considered recent",
"type": "enum",
"values": [
"1 days",
"2 days",
"3 days",
"4 days",
"1 weeks",
"2 weeks",
"3 weeks",
"4 weeks",
"5 weeks",
"6 weeks",
"12 weeks"
],
"default": "2 weeks"
},
{
"id": "scheduler.max_recent_items_per_library",
"label": "Scheduler: Recent items to consider per library",
"type": "text",
"default": "1000"
},
{
"id": "scheduler.tasks.FindBetterSubtitles.frequency",
"label": "Scheduler: Periodically search for better subtitles",
"type": "enum",
"values": [
"never",
"every 6 hours",
"every 12 hours",
"every 24 hours"
],
"default": "every 12 hours"
},
{
"id": "scheduler.tasks.FindBetterSubtitles.max_days_after_added",
"label": "Scheduler: Days to search for better subtitles (max: 30 days)",
"type": "text",
"default": "7"
},
{
"id": "scheduler.tasks.FindBetterSubtitles.air_date_cutoff",
"label": "Scheduler: Don't search for better subtitles if the item's air date is older than",
"type": "enum",
"values": [
"don't limit",
"1 year",
"2 years",
"3 years",
"4 years",
"5 years",
"6 years",
"7 years",
"8 years",
"9 years",
"10 years"
],
"default": "1 year"
},
{
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected",
"label": "Scheduler: Overwrite manually selected subtitles when better found",
"type": "bool",
"default": "true"
},
{
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified",
"label": "Scheduler: Overwrite subtitles with non-default subtitle modifications when better found",
"type": "bool",
"default": "false"
},
{
"id": "history_size",
"label": "History: amount of items to store historical data for",
"type": "enum",
"values": [
"50",
"100",
"150",
"250",
"500"
],
"default": "100"
},
{
"id": "subtitles.try_downloads",
"label": "How many download tries per subtitle (on timeout or error)",
"type": "enum",
"values": [
"1",
"2",
"3",
"4"
],
"default": "2"
},
{
"id": "subtitles.include_exclude_mode",
"label": "Should SZ be enabled or disabled by default? (impacts the settings below and the plugin menu)",
"type": "enum",
"values": [
"enable SZ for all items by default, use ignore mode",
"disable SZ for all items by default, use include mode"
],
"default": "enable SZ for all items by default, use ignore mode"
},
{
"id": "subtitles.include_exclude_paths",
"label": "Enable/disable Sub-Zero in the following paths (comma-separated; the setting above impacts this)",
"type": "text",
"default": ""
},
{
"id": "subtitles.include_exclude_fs",
"label": "Use \"subzero.ignore/.subzero.ignore/.nosz\" (ignore mode) or \"subzero.include/.subzero.include/.sz\" (include mode) files inside folders",
"type": "bool",
"default": "false"
},
{
"id": "plugin_mode2",
"label": "Sub-Zero mode",
"type": "enum",
"values": [
"agent + interface",
"only agent",
"only interface"
],
"default": "agent + interface"
},
{
"id": "plugin_pin",
"label": "Access PIN (any amount of numbers, 0-9)",
"type": "text",
"option": "hidden",
"default": "",
"secure": "true"
},
{
"id": "plugin_pin_valid_for",
"label": "Access PIN valid for minutes",
"type": "text",
"default": "10"
},
{
"id": "plugin_pin_mode2",
"label": "Use PIN to restrict access to (needs plugin or PMS restart)",
"type": "enum",
"values": [
"disabled",
"interface",
"advanced menu"
],
"default": "disabled"
},
{
"id": "notify_executable",
"label": "Call this executable upon successful subtitle download (see Wiki for details)",
"type": "text",
"default": ""
},
{
"id": "check_permissions",
"label": "Check for correct folder permissions of every library on plugin start",
"type": "bool",
"default": "true"
},
{
"id": "new_style_cache",
"label": "Use new style caching (for subliminal)",
"type": "bool",
"default": "true"
},
{
"id": "low_impact_mode",
"label": "Low impact mode (for remote filesystems)",
"type": "bool",
"default": "false"
},
{
"id": "pms_request_timeout",
"label": "Timeout for API requests sent to the PMS",
"type": "text",
"default": "15"
},
{
"id": "use_custom_dns2",
"label": "Use custom DNS (IPs, comma-separated, leave empty for system DNS. Default: Google/CF)",
"type": "text",
"default": "1.1.1.1, 8.8.8.8"
},
{
"id": "proxy",
"label": "HTTP proxy to use for providers (supports credentials)",
"type": "text",
"default": ""
},
{
"id": "path_to_advanced_settings",
"label": "Custom path to advanced_settings.json",
"type": "text",
"default": ""
},
{
"id": "log_level",
"label": "How verbose should the logging be?",
"type": "enum",
"values": [
"CRITICAL",
"ERROR",
"WARNING",
"INFO",
"DEBUG"
],
"default": "WARNING"
},
{
"id": "log_rotate_keep",
"label": "How many log backups to keep?",
"type": "text",
"default": "5"
},
{
"id": "log_debug_mods",
"label": "Log subtitle modification (debug)",
"type": "bool",
"default": "false"
},
{
"id": "log_console",
"label": "Log to console (for development/debugging)",
"type": "bool",
"default": "false"
},
{
"id": "track_usage",
"label": "Collect anonymous usage statistics",
"type": "bool",
"default": "true"
}
]
-53
View File
@@ -1,53 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundleIdentifier</key>
<string>com.plexapp.agents.subzero</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleShortVersionString</key>
<string>2.6.5</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
<string>2.6.5.3074</string>
<key>PlexFrameworkVersion</key>
<string>2</string>
<key>PlexPluginClass</key>
<string>Agent</string>
<key>PlexPluginMode</key>
<string>Daemon</string>
<key>PlexPluginConsoleLogging</key>
<string>0</string>
<key>PlexPluginDevMode</key>
<string>1</string>
<key>PlexPluginCodePolicy</key>
<!-- this allows channels to access some python methods which are otherwise blocked, as well as import external code libraries, and interact with the PMS HTTP API -->
<string>Elevated</string>
<key>PlexAgentAttributionText</key>
<string>&lt;div style=&quot;white-space: pre;&quot;&gt;&lt;img src=&quot;https://raw.githubusercontent.com/pannal/Sub-Zero.bundle/master/Contents/Resources/subzero.gif&quot; /&gt;
&lt;h1&gt;Sub-Zero for Plex&lt;/h1&gt;&lt;i&gt;Subtitles done right&lt;/i&gt;
Version 2.6.5.3074 DEV
Originally based on @bramwalet's awesome &lt;a href=&quot;https://github.com/bramwalet/Subliminal.bundle&quot;&gt;Subliminal.bundle&lt;/a&gt;
If you like this, buy me a beer: &lt;a href=&quot;https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&amp;hosted_button_id=G9VKR2B8PMNKG&quot; target=&quot;_blank&quot; title=&quot;donate&quot;&gt;&lt;img src=&quot;https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif&quot; alt=&quot;donate&quot; title=&quot;donate&quot; /&gt;&lt;/a&gt;
&lt;strong&gt;Need help?&lt;/strong&gt;
Wiki: &lt;a href=&quot;http://v.ht/szwiki&quot;&gt;http://v.ht/szwiki&lt;/a&gt;
Score info: &lt;a href=&quot;http://v.ht/szscores&quot;&gt;http://v.ht/szscores&lt;/a&gt;
Plex thread: &lt;a href=&quot;https://forums.plex.tv/discussion/186575&quot;>https://forums.plex.tv/discussion/186575&lt;/a&gt;
Github: &lt;a href=&quot;https://github.com/pannal/Sub-Zero.bundle&quot;&gt;https://github.com/pannal/Sub-Zero&lt;/a&gt;
3rd party licenses: &lt;a href=&quot;https://github.com/pannal/Sub-Zero.bundle/tree/master/Licenses&quot;&gt;https://github.com/pannal/Sub-Zero.bundle/tree/master/Licenses&lt;/a&gt;
panni, 2019
&lt;/div&gt;
</string>
</dict>
</plist>
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,16 +0,0 @@
try:
import ast
from _markerlib.markers import default_environment, compile, interpret
except ImportError:
if 'ast' in globals():
raise
def default_environment():
return {}
def compile(marker):
def marker_fn(environment=None, override=None):
# 'empty markers are True' heuristic won't install extra deps.
return not marker.strip()
marker_fn.__doc__ = marker
return marker_fn
def interpret(marker, environment=None, override=None):
return compile(marker)()
@@ -1,119 +0,0 @@
# -*- coding: utf-8 -*-
"""Interpret PEP 345 environment markers.
EXPR [in|==|!=|not in] EXPR [or|and] ...
where EXPR belongs to any of those:
python_version = '%s.%s' % (sys.version_info[0], sys.version_info[1])
python_full_version = sys.version.split()[0]
os.name = os.name
sys.platform = sys.platform
platform.version = platform.version()
platform.machine = platform.machine()
platform.python_implementation = platform.python_implementation()
a free string, like '2.6', or 'win32'
"""
__all__ = ['default_environment', 'compile', 'interpret']
import ast
import os
import platform
import sys
import weakref
_builtin_compile = compile
try:
from platform import python_implementation
except ImportError:
if os.name == "java":
# Jython 2.5 has ast module, but not platform.python_implementation() function.
def python_implementation():
return "Jython"
else:
raise
# restricted set of variables
_VARS = {'sys.platform': sys.platform,
'python_version': '%s.%s' % sys.version_info[:2],
# FIXME parsing sys.platform is not reliable, but there is no other
# way to get e.g. 2.7.2+, and the PEP is defined with sys.version
'python_full_version': sys.version.split(' ', 1)[0],
'os.name': os.name,
'platform.version': platform.version(),
'platform.machine': platform.machine(),
'platform.python_implementation': python_implementation(),
'extra': None # wheel extension
}
for var in list(_VARS.keys()):
if '.' in var:
_VARS[var.replace('.', '_')] = _VARS[var]
def default_environment():
"""Return copy of default PEP 385 globals dictionary."""
return dict(_VARS)
class ASTWhitelist(ast.NodeTransformer):
def __init__(self, statement):
self.statement = statement # for error messages
ALLOWED = (ast.Compare, ast.BoolOp, ast.Attribute, ast.Name, ast.Load, ast.Str)
# Bool operations
ALLOWED += (ast.And, ast.Or)
# Comparison operations
ALLOWED += (ast.Eq, ast.Gt, ast.GtE, ast.In, ast.Is, ast.IsNot, ast.Lt, ast.LtE, ast.NotEq, ast.NotIn)
def visit(self, node):
"""Ensure statement only contains allowed nodes."""
if not isinstance(node, self.ALLOWED):
raise SyntaxError('Not allowed in environment markers.\n%s\n%s' %
(self.statement,
(' ' * node.col_offset) + '^'))
return ast.NodeTransformer.visit(self, node)
def visit_Attribute(self, node):
"""Flatten one level of attribute access."""
new_node = ast.Name("%s.%s" % (node.value.id, node.attr), node.ctx)
return ast.copy_location(new_node, node)
def parse_marker(marker):
tree = ast.parse(marker, mode='eval')
new_tree = ASTWhitelist(marker).generic_visit(tree)
return new_tree
def compile_marker(parsed_marker):
return _builtin_compile(parsed_marker, '<environment marker>', 'eval',
dont_inherit=True)
_cache = weakref.WeakValueDictionary()
def compile(marker):
"""Return compiled marker as a function accepting an environment dict."""
try:
return _cache[marker]
except KeyError:
pass
if not marker.strip():
def marker_fn(environment=None, override=None):
""""""
return True
else:
compiled_marker = compile_marker(parse_marker(marker))
def marker_fn(environment=None, override=None):
"""override updates environment"""
if override is None:
override = {}
if environment is None:
environment = default_environment()
environment.update(override)
return eval(compiled_marker, environment)
marker_fn.__doc__ = marker
_cache[marker] = marker_fn
return _cache[marker]
def interpret(marker, environment=None):
return compile(marker)(environment)
File diff suppressed because it is too large Load Diff
-85
View File
@@ -1,85 +0,0 @@
"""Generic interface to all dbm clones.
Instead of
import dbm
d = dbm.open(file, 'w', 0666)
use
import anydbm
d = anydbm.open(file, 'w')
The returned object is a dbhash, gdbm, dbm or dumbdbm object,
dependent on the type of database being opened (determined by whichdb
module) in the case of an existing dbm. If the dbm does not exist and
the create or new flag ('c' or 'n') was specified, the dbm type will
be determined by the availability of the modules (tested in the above
order).
It has the following interface (key and data are strings):
d[key] = data # store data at key (may override data at
# existing key)
data = d[key] # retrieve data at key (raise KeyError if no
# such key)
del d[key] # delete data stored at key (raises KeyError
# if no such key)
flag = key in d # true if the key exists
list = d.keys() # return a list of all existing keys (slow!)
Future versions may change the order in which implementations are
tested for existence, and add interfaces to other dbm-like
implementations.
"""
class error(Exception):
pass
_names = ['dbhash', 'gdbm', 'dbm', 'dumbdbm']
_errors = [error]
_defaultmod = None
for _name in _names:
try:
_mod = __import__(_name)
except ImportError:
continue
if not _defaultmod:
_defaultmod = _mod
_errors.append(_mod.error)
if not _defaultmod:
raise ImportError, "no dbm clone found; tried %s" % _names
error = tuple(_errors)
def open(file, flag='r', mode=0666):
"""Open or create database at path given by *file*.
Optional argument *flag* can be 'r' (default) for read-only access, 'w'
for read-write access of an existing database, 'c' for read-write access
to a new or existing database, and 'n' for read-write access to a new
database.
Note: 'r' and 'w' fail if the database doesn't exist; 'c' creates it
only if it doesn't exist; and 'n' always creates a new database.
"""
# guess the type of an existing database
from whichdb import whichdb
result=whichdb(file)
if result is None:
# db doesn't exist
if 'c' in flag or 'n' in flag:
# file doesn't exist and the new
# flag was used so use default type
mod = _defaultmod
else:
raise error, "need 'c' or 'n' flag to open new db"
elif result == "":
# db type cannot be determined
raise error, "db type could not be determined"
else:
mod = __import__(result)
return mod.open(file, flag, mode)
-552
View File
@@ -1,552 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2005-2010 ActiveState Software Inc.
# Copyright (c) 2013 Eddy Petrișor
"""Utilities for determining application-specific dirs.
See <http://github.com/ActiveState/appdirs> for details and usage.
"""
# Dev Notes:
# - MSDN on where to store app data files:
# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
__version_info__ = (1, 4, 0)
__version__ = '.'.join(map(str, __version_info__))
import sys
import os
PY3 = sys.version_info[0] == 3
if PY3:
unicode = str
if sys.platform.startswith('java'):
import platform
os_name = platform.java_ver()[3][0]
if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
system = 'win32'
elif os_name.startswith('Mac'): # "Mac OS X", etc.
system = 'darwin'
else: # "Linux", "SunOS", "FreeBSD", etc.
# Setting this to "linux2" is not ideal, but only Windows or Mac
# are actually checked for and the rest of the module expects
# *sys.platform* style strings.
system = 'linux2'
else:
system = sys.platform
def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
r"""Return full path to the user-specific data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"roaming" (boolean, default False) can be set True to use the Windows
roaming appdata directory. That means that for users on a Windows
network setup for roaming profiles, this user data will be
sync'd on login. See
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
for a discussion of issues.
Typical user data directories are:
Mac OS X: ~/Library/Application Support/<AppName>
Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined
Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
That means, by default "~/.local/share/<AppName>".
"""
if system == "win32":
if appauthor is None:
appauthor = appname
const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
path = os.path.normpath(_get_win_folder(const))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
elif system == 'darwin':
path = os.path.expanduser('~/Library/Application Support/')
if appname:
path = os.path.join(path, appname)
else:
path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
"""Return full path to the user-shared data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"multipath" is an optional parameter only applicable to *nix
which indicates that the entire list of data dirs should be
returned. By default, the first item from XDG_DATA_DIRS is
returned, or '/usr/local/share/<AppName>',
if XDG_DATA_DIRS is not set
Typical user data directories are:
Mac OS X: /Library/Application Support/<AppName>
Unix: /usr/local/share/<AppName> or /usr/share/<AppName>
Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7.
For Unix, this is using the $XDG_DATA_DIRS[0] default.
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
"""
if system == "win32":
if appauthor is None:
appauthor = appname
path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
elif system == 'darwin':
path = os.path.expanduser('/Library/Application Support')
if appname:
path = os.path.join(path, appname)
else:
# XDG default for $XDG_DATA_DIRS
# only first, if multipath is False
path = os.getenv('XDG_DATA_DIRS',
os.pathsep.join(['/usr/local/share', '/usr/share']))
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
if appname:
if version:
appname = os.path.join(appname, version)
pathlist = [os.sep.join([x, appname]) for x in pathlist]
if multipath:
path = os.pathsep.join(pathlist)
else:
path = pathlist[0]
return path
if appname and version:
path = os.path.join(path, version)
return path
def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
r"""Return full path to the user-specific config dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"roaming" (boolean, default False) can be set True to use the Windows
roaming appdata directory. That means that for users on a Windows
network setup for roaming profiles, this user data will be
sync'd on login. See
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
for a discussion of issues.
Typical user data directories are:
Mac OS X: same as user_data_dir
Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined
Win *: same as user_data_dir
For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
That means, by deafult "~/.config/<AppName>".
"""
if system in ["win32", "darwin"]:
path = user_data_dir(appname, appauthor, None, roaming)
else:
path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
"""Return full path to the user-shared data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"multipath" is an optional parameter only applicable to *nix
which indicates that the entire list of config dirs should be
returned. By default, the first item from XDG_CONFIG_DIRS is
returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
Typical user data directories are:
Mac OS X: same as site_data_dir
Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
$XDG_CONFIG_DIRS
Win *: same as site_data_dir
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
"""
if system in ["win32", "darwin"]:
path = site_data_dir(appname, appauthor)
if appname and version:
path = os.path.join(path, version)
else:
# XDG default for $XDG_CONFIG_DIRS
# only first, if multipath is False
path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
if appname:
if version:
appname = os.path.join(appname, version)
pathlist = [os.sep.join([x, appname]) for x in pathlist]
if multipath:
path = os.pathsep.join(pathlist)
else:
path = pathlist[0]
return path
def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
r"""Return full path to the user-specific cache dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"opinion" (boolean) can be False to disable the appending of
"Cache" to the base app data dir for Windows. See
discussion below.
Typical user cache directories are:
Mac OS X: ~/Library/Caches/<AppName>
Unix: ~/.cache/<AppName> (XDG default)
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
On Windows the only suggestion in the MSDN docs is that local settings go in
the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
app data dir (the default returned by `user_data_dir` above). Apps typically
put cache data somewhere *under* the given dir here. Some examples:
...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
...\Acme\SuperApp\Cache\1.0
OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
This can be disabled with the `opinion=False` option.
"""
if system == "win32":
if appauthor is None:
appauthor = appname
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
if opinion:
path = os.path.join(path, "Cache")
elif system == 'darwin':
path = os.path.expanduser('~/Library/Caches')
if appname:
path = os.path.join(path, appname)
else:
path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
r"""Return full path to the user-specific log dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"opinion" (boolean) can be False to disable the appending of
"Logs" to the base app data dir for Windows, and "log" to the
base cache dir for Unix. See discussion below.
Typical user cache directories are:
Mac OS X: ~/Library/Logs/<AppName>
Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
On Windows the only suggestion in the MSDN docs is that local settings
go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
examples of what some windows apps use for a logs dir.)
OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
value for Windows and appends "log" to the user cache dir for Unix.
This can be disabled with the `opinion=False` option.
"""
if system == "darwin":
path = os.path.join(
os.path.expanduser('~/Library/Logs'),
appname)
elif system == "win32":
path = user_data_dir(appname, appauthor, version)
version = False
if opinion:
path = os.path.join(path, "Logs")
else:
path = user_cache_dir(appname, appauthor, version)
version = False
if opinion:
path = os.path.join(path, "log")
if appname and version:
path = os.path.join(path, version)
return path
class AppDirs(object):
"""Convenience wrapper for getting application dirs."""
def __init__(self, appname, appauthor=None, version=None, roaming=False,
multipath=False):
self.appname = appname
self.appauthor = appauthor
self.version = version
self.roaming = roaming
self.multipath = multipath
@property
def user_data_dir(self):
return user_data_dir(self.appname, self.appauthor,
version=self.version, roaming=self.roaming)
@property
def site_data_dir(self):
return site_data_dir(self.appname, self.appauthor,
version=self.version, multipath=self.multipath)
@property
def user_config_dir(self):
return user_config_dir(self.appname, self.appauthor,
version=self.version, roaming=self.roaming)
@property
def site_config_dir(self):
return site_config_dir(self.appname, self.appauthor,
version=self.version, multipath=self.multipath)
@property
def user_cache_dir(self):
return user_cache_dir(self.appname, self.appauthor,
version=self.version)
@property
def user_log_dir(self):
return user_log_dir(self.appname, self.appauthor,
version=self.version)
#---- internal support stuff
def _get_win_folder_from_registry(csidl_name):
"""This is a fallback technique at best. I'm not sure if using the
registry for this guarantees us the correct answer for all CSIDL_*
names.
"""
import _winreg
shell_folder_name = {
"CSIDL_APPDATA": "AppData",
"CSIDL_COMMON_APPDATA": "Common AppData",
"CSIDL_LOCAL_APPDATA": "Local AppData",
}[csidl_name]
key = _winreg.OpenKey(
_winreg.HKEY_CURRENT_USER,
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
)
dir, type = _winreg.QueryValueEx(key, shell_folder_name)
return dir
def _get_win_folder_with_pywin32(csidl_name):
from win32com.shell import shellcon, shell
dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
# Try to make this a unicode path because SHGetFolderPath does
# not return unicode strings when there is unicode data in the
# path.
try:
dir = unicode(dir)
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in dir:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
try:
import win32api
dir = win32api.GetShortPathName(dir)
except ImportError:
pass
except UnicodeError:
pass
return dir
def _get_win_folder_with_ctypes(csidl_name):
import ctypes
csidl_const = {
"CSIDL_APPDATA": 26,
"CSIDL_COMMON_APPDATA": 35,
"CSIDL_LOCAL_APPDATA": 28,
}[csidl_name]
buf = ctypes.create_unicode_buffer(1024)
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in buf:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
buf2 = ctypes.create_unicode_buffer(1024)
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
buf = buf2
return buf.value
def _get_win_folder_with_jna(csidl_name):
import array
from com.sun import jna
from com.sun.jna.platform import win32
buf_size = win32.WinDef.MAX_PATH * 2
buf = array.zeros('c', buf_size)
shell = win32.Shell32.INSTANCE
shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in dir:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
buf = array.zeros('c', buf_size)
kernel = win32.Kernel32.INSTANCE
if kernal.GetShortPathName(dir, buf, buf_size):
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
return dir
if system == "win32":
try:
import win32com.shell
_get_win_folder = _get_win_folder_with_pywin32
except ImportError:
try:
from ctypes import windll
_get_win_folder = _get_win_folder_with_ctypes
except ImportError:
try:
import com.sun.jna
_get_win_folder = _get_win_folder_with_jna
except ImportError:
_get_win_folder = _get_win_folder_from_registry
#---- self test code
if __name__ == "__main__":
appname = "MyApp"
appauthor = "MyCompany"
props = ("user_data_dir", "site_data_dir",
"user_config_dir", "site_config_dir",
"user_cache_dir", "user_log_dir")
print("-- app dirs (with optional 'version')")
dirs = AppDirs(appname, appauthor, version="1.0")
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (without optional 'version')")
dirs = AppDirs(appname, appauthor)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (without optional 'appauthor')")
dirs = AppDirs(appname)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (with disabled 'appauthor')")
dirs = AppDirs(appname, appauthor=False)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
File diff suppressed because it is too large Load Diff
@@ -1,61 +0,0 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from asio.file import SEEK_ORIGIN_CURRENT
from asio.file_opener import FileOpener
from asio.open_parameters import OpenParameters
from asio.interfaces.posix import PosixInterface
from asio.interfaces.windows import WindowsInterface
import os
class ASIO(object):
platform_handler = None
@classmethod
def get_handler(cls):
if cls.platform_handler:
return cls.platform_handler
if os.name == 'nt':
cls.platform_handler = WindowsInterface
elif os.name == 'posix':
cls.platform_handler = PosixInterface
else:
raise NotImplementedError()
return cls.platform_handler
@classmethod
def open(cls, file_path, opener=True, parameters=None):
"""Open file
:type file_path: str
:param opener: Use FileOpener, for use with the 'with' statement
:type opener: bool
:rtype: asio.file.File
"""
if not parameters:
parameters = OpenParameters()
if opener:
return FileOpener(file_path, parameters)
return ASIO.get_handler().open(
file_path,
parameters=parameters.handlers.get(ASIO.get_handler())
)
-92
View File
@@ -1,92 +0,0 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from io import RawIOBase
import time
DEFAULT_BUFFER_SIZE = 4096
SEEK_ORIGIN_BEGIN = 0
SEEK_ORIGIN_CURRENT = 1
SEEK_ORIGIN_END = 2
class ReadTimeoutError(Exception):
pass
class File(RawIOBase):
platform_handler = None
def __init__(self, *args, **kwargs):
super(File, self).__init__(*args, **kwargs)
def get_handler(self):
"""
:rtype: asio.interfaces.base.Interface
"""
if not self.platform_handler:
raise ValueError()
return self.platform_handler
def get_size(self):
"""Get the current file size
:rtype: int
"""
return self.get_handler().get_size(self)
def get_path(self):
"""Get the path of this file
:rtype: str
"""
return self.get_handler().get_path(self)
def seek(self, offset, origin):
"""Sets a reference point of a file to the given value.
:param offset: The point relative to origin to move
:type offset: int
:param origin: Reference point to seek (SEEK_ORIGIN_BEGIN, SEEK_ORIGIN_CURRENT, SEEK_ORIGIN_END)
:type origin: int
"""
return self.get_handler().seek(self, offset, origin)
def read(self, n=-1):
"""Read up to n bytes from the object and return them.
:type n: int
:rtype: str
"""
return self.get_handler().read(self, n)
def readinto(self, b):
"""Read up to len(b) bytes into bytearray b and return the number of bytes read."""
data = self.read(len(b))
if data is None:
return None
b[:len(data)] = data
return len(data)
def close(self):
"""Close the file handle"""
return self.get_handler().close(self)
def readable(self, *args, **kwargs):
return True
@@ -1,21 +0,0 @@
class FileOpener(object):
def __init__(self, file_path, parameters=None):
self.file_path = file_path
self.parameters = parameters
self.file = None
def __enter__(self):
self.file = ASIO.get_handler().open(
self.file_path,
self.parameters.handlers.get(ASIO.get_handler())
)
return self.file
def __exit__(self, exc_type, exc_val, exc_tb):
if not self.file:
return
self.file.close()
self.file = None
@@ -1,41 +0,0 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from asio.file import DEFAULT_BUFFER_SIZE
class Interface(object):
@classmethod
def open(cls, file_path, parameters=None):
raise NotImplementedError()
@classmethod
def get_size(cls, fp):
raise NotImplementedError()
@classmethod
def get_path(cls, fp):
raise NotImplementedError()
@classmethod
def seek(cls, fp, pointer, distance):
raise NotImplementedError()
@classmethod
def read(cls, fp, n=DEFAULT_BUFFER_SIZE):
raise NotImplementedError()
@classmethod
def close(cls, fp):
raise NotImplementedError()
@@ -1,123 +0,0 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from asio.file import File, DEFAULT_BUFFER_SIZE
from asio.interfaces.base import Interface
import sys
import os
if os.name == 'posix':
import select
# fcntl is only required on darwin
if sys.platform == 'darwin':
import fcntl
F_GETPATH = 50
class PosixInterface(Interface):
@classmethod
def open(cls, file_path, parameters=None):
"""
:type file_path: str
:rtype: asio.interfaces.posix.PosixFile
"""
if not parameters:
parameters = {}
if not parameters.get('mode'):
parameters.pop('mode')
if not parameters.get('buffering'):
parameters.pop('buffering')
fd = os.open(file_path, os.O_RDONLY | os.O_NONBLOCK)
return PosixFile(fd)
@classmethod
def get_size(cls, fp):
"""
:type fp: asio.interfaces.posix.PosixFile
:rtype: int
"""
return os.fstat(fp.fd).st_size
@classmethod
def get_path(cls, fp):
"""
:type fp: asio.interfaces.posix.PosixFile
:rtype: int
"""
# readlink /dev/fd fails on darwin, so instead use fcntl F_GETPATH
if sys.platform == 'darwin':
return fcntl.fcntl(fp.fd, F_GETPATH, '\0' * 1024).rstrip('\0')
# Use /proc/self/fd if available
if os.path.lexists("/proc/self/fd/"):
return os.readlink("/proc/self/fd/%s" % fp.fd)
# Fallback to /dev/fd
if os.path.lexists("/dev/fd/"):
return os.readlink("/dev/fd/%s" % fp.fd)
raise NotImplementedError('Environment not supported (fdescfs not mounted?)')
@classmethod
def seek(cls, fp, offset, origin):
"""
:type fp: asio.interfaces.posix.PosixFile
:type offset: int
:type origin: int
"""
os.lseek(fp.fd, offset, origin)
@classmethod
def read(cls, fp, n=DEFAULT_BUFFER_SIZE):
"""
:type fp: asio.interfaces.posix.PosixFile
:type n: int
:rtype: str
"""
r, w, x = select.select([fp.fd], [], [], 5)
if r:
return os.read(fp.fd, n)
return None
@classmethod
def close(cls, fp):
"""
:type fp: asio.interfaces.posix.PosixFile
"""
os.close(fp.fd)
class PosixFile(File):
platform_handler = PosixInterface
def __init__(self, fd, *args, **kwargs):
"""
:type fd: asio.file.File
"""
super(PosixFile, self).__init__(*args, **kwargs)
self.fd = fd
def __str__(self):
return "<asio_posix.PosixFile file: %s>" % self.fd
@@ -1,201 +0,0 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from asio.file import File, DEFAULT_BUFFER_SIZE
from asio.interfaces.base import Interface
import os
NULL = 0
if os.name == 'nt':
from asio.interfaces.windows.interop import WindowsInterop
class WindowsInterface(Interface):
@classmethod
def open(cls, file_path, parameters=None):
"""
:type file_path: str
:rtype: asio.interfaces.windows.WindowsFile
"""
if not parameters:
parameters = {}
return WindowsFile(WindowsInterop.create_file(
file_path,
parameters.get('desired_access', WindowsInterface.GenericAccess.READ),
parameters.get('share_mode', WindowsInterface.ShareMode.ALL),
parameters.get('creation_disposition', WindowsInterface.CreationDisposition.OPEN_EXISTING),
parameters.get('flags_and_attributes', NULL)
))
@classmethod
def get_size(cls, fp):
"""
:type fp: asio.interfaces.windows.WindowsFile
:rtype: int
"""
return WindowsInterop.get_file_size(fp.handle)
@classmethod
def get_path(cls, fp):
"""
:type fp: asio.interfaces.windows.WindowsFile
:rtype: str
"""
if not fp.file_map:
fp.file_map = WindowsInterop.create_file_mapping(fp.handle, WindowsInterface.Protection.READONLY)
if not fp.map_view:
fp.map_view = WindowsInterop.map_view_of_file(fp.file_map, WindowsInterface.FileMapAccess.READ, 1)
file_name = WindowsInterop.get_mapped_file_name(fp.map_view)
return file_name
@classmethod
def seek(cls, fp, offset, origin):
"""
:type fp: asio.interfaces.windows.WindowsFile
:type offset: int
:type origin: int
:rtype: int
"""
return WindowsInterop.set_file_pointer(
fp.handle,
offset,
origin
)
@classmethod
def read(cls, fp, n=DEFAULT_BUFFER_SIZE):
"""
:type fp: asio.interfaces.windows.WindowsFile
:type n: int
:rtype: str
"""
return WindowsInterop.read(fp.handle, n)
@classmethod
def read_into(cls, fp, b):
"""
:type fp: asio.interfaces.windows.WindowsFile
:type b: str
:rtype: int
"""
return WindowsInterop.read_into(fp.handle, b)
@classmethod
def close(cls, fp):
"""
:type fp: asio.interfaces.windows.WindowsFile
:rtype: bool
"""
if fp.map_view:
WindowsInterop.unmap_view_of_file(fp.map_view)
if fp.file_map:
WindowsInterop.close_handle(fp.file_map)
return bool(WindowsInterop.close_handle(fp.handle))
class GenericAccess(object):
READ = 0x80000000
WRITE = 0x40000000
EXECUTE = 0x20000000
ALL = 0x10000000
class ShareMode(object):
READ = 0x00000001
WRITE = 0x00000002
DELETE = 0x00000004
ALL = READ | WRITE | DELETE
class CreationDisposition(object):
CREATE_NEW = 1
CREATE_ALWAYS = 2
OPEN_EXISTING = 3
OPEN_ALWAYS = 4
TRUNCATE_EXISTING = 5
class Attribute(object):
READONLY = 0x00000001
HIDDEN = 0x00000002
SYSTEM = 0x00000004
DIRECTORY = 0x00000010
ARCHIVE = 0x00000020
DEVICE = 0x00000040
NORMAL = 0x00000080
TEMPORARY = 0x00000100
SPARSE_FILE = 0x00000200
REPARSE_POINT = 0x00000400
COMPRESSED = 0x00000800
OFFLINE = 0x00001000
NOT_CONTENT_INDEXED = 0x00002000
ENCRYPTED = 0x00004000
class Flag(object):
WRITE_THROUGH = 0x80000000
OVERLAPPED = 0x40000000
NO_BUFFERING = 0x20000000
RANDOM_ACCESS = 0x10000000
SEQUENTIAL_SCAN = 0x08000000
DELETE_ON_CLOSE = 0x04000000
BACKUP_SEMANTICS = 0x02000000
POSIX_SEMANTICS = 0x01000000
OPEN_REPARSE_POINT = 0x00200000
OPEN_NO_RECALL = 0x00100000
FIRST_PIPE_INSTANCE = 0x00080000
class Protection(object):
NOACCESS = 0x01
READONLY = 0x02
READWRITE = 0x04
WRITECOPY = 0x08
EXECUTE = 0x10
EXECUTE_READ = 0x20,
EXECUTE_READWRITE = 0x40
EXECUTE_WRITECOPY = 0x80
GUARD = 0x100
NOCACHE = 0x200
WRITECOMBINE = 0x400
class FileMapAccess(object):
COPY = 0x0001
WRITE = 0x0002
READ = 0x0004
ALL_ACCESS = 0x001f
EXECUTE = 0x0020
class WindowsFile(File):
platform_handler = WindowsInterface
def __init__(self, handle, *args, **kwargs):
super(WindowsFile, self).__init__(*args, **kwargs)
self.handle = handle
self.file_map = None
self.map_view = None
def readinto(self, b):
return self.get_handler().read_into(self, b)
def __str__(self):
return "<asio_windows.WindowsFile file: %s>" % self.handle
@@ -1,230 +0,0 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ctypes.wintypes import *
from ctypes import *
import logging
log = logging.getLogger(__name__)
CreateFileW = windll.kernel32.CreateFileW
CreateFileW.argtypes = (LPCWSTR, DWORD, DWORD, c_void_p, DWORD, DWORD, HANDLE)
CreateFileW.restype = HANDLE
ReadFile = windll.kernel32.ReadFile
ReadFile.argtypes = (HANDLE, c_void_p, DWORD, POINTER(DWORD), HANDLE)
ReadFile.restype = BOOL
NULL = 0
MAX_PATH = 260
DEFAULT_BUFFER_SIZE = 4096
LPSECURITY_ATTRIBUTES = c_void_p
class WindowsInterop(object):
ri_buffer = None
@classmethod
def create_file(cls, path, desired_access, share_mode, creation_disposition, flags_and_attributes):
h = CreateFileW(
path,
desired_access,
share_mode,
NULL,
creation_disposition,
flags_and_attributes,
NULL
)
error = GetLastError()
if error != 0:
raise Exception('[WindowsASIO.open] "%s"' % FormatError(error))
return h
@classmethod
def read(cls, handle, buf_size=DEFAULT_BUFFER_SIZE):
buf = create_string_buffer(buf_size)
bytes_read = c_ulong(0)
success = ReadFile(handle, buf, buf_size, byref(bytes_read), NULL)
error = GetLastError()
if error:
log.debug('read_file - error: (%s) "%s"', error, FormatError(error))
if not success and error:
raise Exception('[WindowsInterop.read_file] (%s) "%s"' % (error, FormatError(error)))
# Return if we have a valid buffer
if success and bytes_read.value:
return buf.value
return None
@classmethod
def read_into(cls, handle, b):
if cls.ri_buffer is None or len(cls.ri_buffer) < len(b):
cls.ri_buffer = create_string_buffer(len(b))
bytes_read = c_ulong(0)
success = ReadFile(handle, cls.ri_buffer, len(b), byref(bytes_read), NULL)
bytes_read = int(bytes_read.value)
b[:bytes_read] = cls.ri_buffer[:bytes_read]
error = GetLastError()
if not success and error:
raise Exception('[WindowsInterop.read_file] (%s) "%s"' % (error, FormatError(error)))
# Return if we have a valid buffer
if success and bytes_read:
return bytes_read
return None
@classmethod
def set_file_pointer(cls, handle, distance, method):
pos_high = DWORD(NULL)
result = windll.kernel32.SetFilePointer(
handle,
c_ulong(distance),
byref(pos_high),
DWORD(method)
)
if result == -1:
raise Exception('[WindowsASIO.seek] INVALID_SET_FILE_POINTER: "%s"' % FormatError(GetLastError()))
return result
@classmethod
def get_file_size(cls, handle):
return windll.kernel32.GetFileSize(
handle,
DWORD(NULL)
)
@classmethod
def close_handle(cls, handle):
return windll.kernel32.CloseHandle(handle)
@classmethod
def create_file_mapping(cls, handle, protect, maximum_size_high=0, maximum_size_low=1):
return HANDLE(windll.kernel32.CreateFileMappingW(
handle,
LPSECURITY_ATTRIBUTES(NULL),
DWORD(protect),
DWORD(maximum_size_high),
DWORD(maximum_size_low),
LPCSTR(NULL)
))
@classmethod
def map_view_of_file(cls, map_handle, desired_access, num_bytes, file_offset_high=0, file_offset_low=0):
return HANDLE(windll.kernel32.MapViewOfFile(
map_handle,
DWORD(desired_access),
DWORD(file_offset_high),
DWORD(file_offset_low),
num_bytes
))
@classmethod
def unmap_view_of_file(cls, view_handle):
return windll.kernel32.UnmapViewOfFile(view_handle)
@classmethod
def get_mapped_file_name(cls, view_handle, translate_device_name=True):
buf = create_string_buffer(MAX_PATH + 1)
result = windll.psapi.GetMappedFileNameW(
cls.get_current_process(),
view_handle,
buf,
MAX_PATH
)
# Raise exception on error
error = GetLastError()
if result == 0:
raise Exception(FormatError(error))
# Retrieve a clean file name (skipping over NUL bytes)
file_name = cls.clean_buffer_value(buf)
# If we are not translating the device name return here
if not translate_device_name:
return file_name
drives = cls.get_logical_drive_strings()
# Find the drive matching the file_name device name
translated = False
for drive in drives:
device_name = cls.query_dos_device(drive)
if file_name.startswith(device_name):
file_name = drive + file_name[len(device_name):]
translated = True
break
if not translated:
raise Exception('Unable to translate device name')
return file_name
@classmethod
def get_logical_drive_strings(cls, buf_size=512):
buf = create_string_buffer(buf_size)
result = windll.kernel32.GetLogicalDriveStringsW(buf_size, buf)
error = GetLastError()
if result == 0:
raise Exception(FormatError(error))
drive_strings = cls.clean_buffer_value(buf)
return [dr for dr in drive_strings.split('\\') if dr != '']
@classmethod
def query_dos_device(cls, drive, buf_size=MAX_PATH):
buf = create_string_buffer(buf_size)
result = windll.kernel32.QueryDosDeviceA(
drive,
buf,
buf_size
)
return cls.clean_buffer_value(buf)
@classmethod
def get_current_process(cls):
return HANDLE(windll.kernel32.GetCurrentProcess())
@classmethod
def clean_buffer_value(cls, buf):
value = ""
for ch in buf.raw:
if ord(ch) != 0:
value += ch
return value
@@ -1,47 +0,0 @@
from asio.interfaces.posix import PosixInterface
from asio.interfaces.windows import WindowsInterface
class OpenParameters(object):
def __init__(self):
self.handlers = {}
# Update handler_parameters with defaults
self.posix()
self.windows()
def posix(self, mode=None, buffering=None):
"""
:type mode: str
:type buffering: int
"""
self.handlers.update({PosixInterface: {
'mode': mode,
'buffering': buffering
}})
def windows(self, desired_access=WindowsInterface.GenericAccess.READ,
share_mode=WindowsInterface.ShareMode.ALL,
creation_disposition=WindowsInterface.CreationDisposition.OPEN_EXISTING,
flags_and_attributes=0):
"""
:param desired_access: WindowsInterface.DesiredAccess
:type desired_access: int
:param share_mode: WindowsInterface.ShareMode
:type share_mode: int
:param creation_disposition: WindowsInterface.CreationDisposition
:type creation_disposition: int
:param flags_and_attributes: WindowsInterface.Attribute, WindowsInterface.Flag
:type flags_and_attributes: int
"""
self.handlers.update({WindowsInterface: {
'desired_access': desired_access,
'share_mode': share_mode,
'creation_disposition': creation_disposition,
'flags_and_attributes': flags_and_attributes
}})
@@ -1,25 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
__title__ = 'babelfish'
__version__ = '0.5.5-dev'
__author__ = 'Antoine Bertin'
__license__ = 'BSD'
__copyright__ = 'Copyright 2015 the BabelFish authors'
import sys
if sys.version_info[0] >= 3:
basestr = str
else:
basestr = basestring
from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
CountryReverseConverter)
from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
from .script import SCRIPTS, SCRIPT_MATRIX, Script
@@ -1,287 +0,0 @@
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
import collections
from pkg_resources import iter_entry_points, EntryPoint
from ..exceptions import LanguageConvertError, LanguageReverseError
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
class CaseInsensitiveDict(collections.MutableMapping):
"""A case-insensitive ``dict``-like object.
Implements all methods and operations of
``collections.MutableMapping`` as well as dict's ``copy``. Also
provides ``lower_items``.
All keys are expected to be strings. The structure remembers the
case of the last key to be set, and ``iter(instance)``,
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
will contain case-sensitive keys. However, querying and contains
testing is case insensitive:
cid = CaseInsensitiveDict()
cid['English'] = 'eng'
cid['ENGLISH'] == 'eng' # True
list(cid) == ['English'] # True
If the constructor, ``.update``, or equality comparison
operations are given keys that have equal ``.lower()``s, the
behavior is undefined.
"""
def __init__(self, data=None, **kwargs):
self._store = dict()
if data is None:
data = {}
self.update(data, **kwargs)
def __setitem__(self, key, value):
# Use the lowercased key for lookups, but store the actual
# key alongside the value.
self._store[key.lower()] = (key, value)
def __getitem__(self, key):
return self._store[key.lower()][1]
def __delitem__(self, key):
del self._store[key.lower()]
def __iter__(self):
return (casedkey for casedkey, mappedvalue in self._store.values())
def __len__(self):
return len(self._store)
def lower_items(self):
"""Like iteritems(), but with all lowercase keys."""
return (
(lowerkey, keyval[1])
for (lowerkey, keyval)
in self._store.items()
)
def __eq__(self, other):
if isinstance(other, collections.Mapping):
other = CaseInsensitiveDict(other)
else:
return NotImplemented
# Compare insensitively
return dict(self.lower_items()) == dict(other.lower_items())
# Copy is required
def copy(self):
return CaseInsensitiveDict(self._store.values())
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
class LanguageConverter(object):
"""A :class:`LanguageConverter` supports converting an alpha3 language code with an
alpha2 country code and a script code into a custom code
.. attribute:: codes
Set of possible custom codes
"""
def convert(self, alpha3, country=None, script=None):
"""Convert an alpha3 language code with an alpha2 country code and a script code
into a custom code
:param string alpha3: ISO-639-3 language code
:param country: ISO-3166 country code, if any
:type country: string or None
:param script: ISO-15924 script code, if any
:type script: string or None
:return: the corresponding custom code
:rtype: string
:raise: :class:`~babelfish.exceptions.LanguageConvertError`
"""
raise NotImplementedError
class LanguageReverseConverter(LanguageConverter):
"""A :class:`LanguageConverter` able to reverse a custom code into a alpha3
ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
"""
def reverse(self, code):
"""Reverse a custom code into alpha3, country and script code
:param string code: custom code to reverse
:return: the corresponding alpha3 ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
:rtype: tuple
:raise: :class:`~babelfish.exceptions.LanguageReverseError`
"""
raise NotImplementedError
class LanguageEquivalenceConverter(LanguageReverseConverter):
"""A :class:`LanguageEquivalenceConverter` is a utility class that allows you to easily define a
:class:`LanguageReverseConverter` by only specifying the dict from alpha3 to their corresponding symbols.
You must specify the dict of equivalence as a class variable named SYMBOLS.
If you also set the class variable CASE_SENSITIVE to ``True`` then the reverse conversion function will be
case-sensitive (it is case-insensitive by default).
Example::
class MyCodeConverter(babelfish.LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {'fra': 'mycode1', 'eng': 'mycode2'}
"""
CASE_SENSITIVE = False
def __init__(self):
self.codes = set()
self.to_symbol = {}
if self.CASE_SENSITIVE:
self.from_symbol = {}
else:
self.from_symbol = CaseInsensitiveDict()
for alpha3, symbol in self.SYMBOLS.items():
self.to_symbol[alpha3] = symbol
self.from_symbol[symbol] = (alpha3, None, None)
self.codes.add(symbol)
def convert(self, alpha3, country=None, script=None):
try:
return self.to_symbol[alpha3]
except KeyError:
raise LanguageConvertError(alpha3, country, script)
def reverse(self, code):
try:
return self.from_symbol[code]
except KeyError:
raise LanguageReverseError(code)
class CountryConverter(object):
"""A :class:`CountryConverter` supports converting an alpha2 country code
into a custom code
.. attribute:: codes
Set of possible custom codes
"""
def convert(self, alpha2):
"""Convert an alpha2 country code into a custom code
:param string alpha2: ISO-3166-1 language code
:return: the corresponding custom code
:rtype: string
:raise: :class:`~babelfish.exceptions.CountryConvertError`
"""
raise NotImplementedError
class CountryReverseConverter(CountryConverter):
"""A :class:`CountryConverter` able to reverse a custom code into a alpha2
ISO-3166-1 country code
"""
def reverse(self, code):
"""Reverse a custom code into alpha2 code
:param string code: custom code to reverse
:return: the corresponding alpha2 ISO-3166-1 country code
:rtype: string
:raise: :class:`~babelfish.exceptions.CountryReverseError`
"""
raise NotImplementedError
class ConverterManager(object):
"""Manager for babelfish converters behaving like a dict with lazy loading
Loading is done in this order:
* Entry point converters
* Registered converters
* Internal converters
.. attribute:: entry_point
The entry point where to look for converters
.. attribute:: internal_converters
Internal converters with entry point syntax
"""
entry_point = ''
internal_converters = []
def __init__(self):
#: Registered converters with entry point syntax
self.registered_converters = []
#: Loaded converters
self.converters = {}
def __getitem__(self, name):
"""Get a converter, lazy loading it if necessary"""
if name in self.converters:
return self.converters[name]
for ep in iter_entry_points(self.entry_point):
if ep.name == name:
self.converters[ep.name] = ep.load()()
return self.converters[ep.name]
for ep in (EntryPoint.parse(c) for c in self.registered_converters + self.internal_converters):
if ep.name == name:
# `require` argument of ep.load() is deprecated in newer versions of setuptools
if hasattr(ep, 'resolve'):
plugin = ep.resolve()
elif hasattr(ep, '_load'):
plugin = ep._load()
else:
plugin = ep.load(require=False)
self.converters[ep.name] = plugin()
return self.converters[ep.name]
raise KeyError(name)
def __setitem__(self, name, converter):
"""Load a converter"""
self.converters[name] = converter
def __delitem__(self, name):
"""Unload a converter"""
del self.converters[name]
def __iter__(self):
"""Iterator over loaded converters"""
return iter(self.converters)
def register(self, entry_point):
"""Register a converter
:param string entry_point: converter to register (entry point syntax)
:raise: ValueError if already registered
"""
if entry_point in self.registered_converters:
raise ValueError('Already registered')
self.registered_converters.insert(0, entry_point)
def unregister(self, entry_point):
"""Unregister a converter
:param string entry_point: converter to unregister (entry point syntax)
"""
self.registered_converters.remove(entry_point)
def __contains__(self, name):
return name in self.converters
@@ -1,17 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha2Converter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha2:
SYMBOLS[iso_language.alpha3] = iso_language.alpha2
@@ -1,17 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha3BConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha3b:
SYMBOLS[iso_language.alpha3] = iso_language.alpha3b
@@ -1,17 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha3TConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha3t:
SYMBOLS[iso_language.alpha3] = iso_language.alpha3t
@@ -1,31 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import CountryReverseConverter, CaseInsensitiveDict
from ..country import COUNTRY_MATRIX
from ..exceptions import CountryConvertError, CountryReverseError
class CountryNameConverter(CountryReverseConverter):
def __init__(self):
self.codes = set()
self.to_name = {}
self.from_name = CaseInsensitiveDict()
for country in COUNTRY_MATRIX:
self.codes.add(country.name)
self.to_name[country.alpha2] = country.name
self.from_name[country.name] = country.alpha2
def convert(self, alpha2):
if alpha2 not in self.to_name:
raise CountryConvertError(alpha2)
return self.to_name[alpha2]
def reverse(self, name):
if name not in self.from_name:
raise CountryReverseError(name)
return self.from_name[name]
@@ -1,17 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class NameConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = False
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.name:
SYMBOLS[iso_language.alpha3] = iso_language.name
@@ -1,36 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageReverseConverter, CaseInsensitiveDict
from ..exceptions import LanguageReverseError
from ..language import language_converters
class OpenSubtitlesConverter(LanguageReverseConverter):
def __init__(self):
self.alpha3b_converter = language_converters['alpha3b']
self.alpha2_converter = language_converters['alpha2']
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
'scc': ('srp', None), 'mne': ('srp', 'ME')})
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(self.from_opensubtitles.keys()))
def convert(self, alpha3, country=None, script=None):
alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
if (alpha3b, country) in self.to_opensubtitles:
return self.to_opensubtitles[(alpha3b, country)]
return alpha3b
def reverse(self, opensubtitles):
if opensubtitles in self.from_opensubtitles:
return self.from_opensubtitles[opensubtitles]
for conv in [self.alpha3b_converter, self.alpha2_converter]:
try:
return conv.reverse(opensubtitles)
except LanguageReverseError:
pass
raise LanguageReverseError(opensubtitles)
@@ -1,23 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageConverter
from ..exceptions import LanguageConvertError
from ..language import LANGUAGE_MATRIX
class ScopeConverter(LanguageConverter):
FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
SYMBOLS[iso_language.alpha3] = iso_language.scope
codes = set(SYMBOLS.values())
def convert(self, alpha3, country=None, script=None):
if self.SYMBOLS[alpha3] in self.FULLNAME:
return self.FULLNAME[self.SYMBOLS[alpha3]]
raise LanguageConvertError(alpha3, country, script)
@@ -1,23 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageConverter
from ..exceptions import LanguageConvertError
from ..language import LANGUAGE_MATRIX
class LanguageTypeConverter(LanguageConverter):
FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
SYMBOLS[iso_language.alpha3] = iso_language.type
codes = set(SYMBOLS.values())
def convert(self, alpha3, country=None, script=None):
if self.SYMBOLS[alpha3] in self.FULLNAME:
return self.FULLNAME[self.SYMBOLS[alpha3]]
raise LanguageConvertError(alpha3, country, script)
@@ -1,106 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from collections import namedtuple
from functools import partial
from pkg_resources import resource_stream # @UnresolvedImport
from .converters import ConverterManager
from . import basestr
COUNTRIES = {}
COUNTRY_MATRIX = []
#: The namedtuple used in the :data:`COUNTRY_MATRIX`
IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
f = resource_stream('babelfish', 'data/iso-3166-1.txt')
f.readline()
for l in f:
iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
COUNTRIES[iso_country.alpha2] = iso_country.name
COUNTRY_MATRIX.append(iso_country)
f.close()
class CountryConverterManager(ConverterManager):
""":class:`~babelfish.converters.ConverterManager` for country converters"""
entry_point = 'babelfish.country_converters'
internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
country_converters = CountryConverterManager()
class CountryMeta(type):
"""The :class:`Country` metaclass
Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
"""
def __getattr__(cls, name):
if name.startswith('from'):
return partial(cls.fromcode, converter=name[4:])
return type.__getattribute__(cls, name)
class Country(CountryMeta(str('CountryBase'), (object,), {})):
"""A country on Earth
A country is represented by a 2-letter code from the ISO-3166 standard
:param string country: 2-letter ISO-3166 country code
"""
def __init__(self, country):
if country not in COUNTRIES:
raise ValueError('%r is not a valid country' % country)
#: ISO-3166 2-letter country code
self.alpha2 = country
@classmethod
def fromcode(cls, code, converter):
"""Create a :class:`Country` by its `code` using `converter` to
:meth:`~babelfish.converters.CountryReverseConverter.reverse` it
:param string code: the code to reverse
:param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
:return: the corresponding :class:`Country` instance
:rtype: :class:`Country`
"""
return cls(country_converters[converter].reverse(code))
def __getstate__(self):
return self.alpha2
def __setstate__(self, state):
self.alpha2 = state
def __getattr__(self, name):
try:
return country_converters[name].convert(self.alpha2)
except KeyError:
raise AttributeError(name)
def __hash__(self):
return hash(self.alpha2)
def __eq__(self, other):
if isinstance(other, basestr):
return str(self) == other
if not isinstance(other, Country):
return False
return self.alpha2 == other.alpha2
def __ne__(self, other):
return not self == other
def __repr__(self):
return '<Country [%s]>' % self
def __str__(self):
return self.alpha2
@@ -1,45 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
import os.path
import tempfile
import zipfile
import requests
DATA_DIR = os.path.dirname(__file__)
# iso-3166-1.txt
print('Downloading ISO-3166-1 standard (ISO country codes)...')
with open(os.path.join(DATA_DIR, 'iso-3166-1.txt'), 'w') as f:
r = requests.get('http://www.iso.org/iso/home/standards/country_codes/country_names_and_code_elements_txt.htm')
f.write(r.content.strip())
# iso-639-3.tab
print('Downloading ISO-639-3 standard (ISO language codes)...')
with tempfile.TemporaryFile() as f:
r = requests.get('http://www-01.sil.org/iso639-3/iso-639-3_Code_Tables_20130531.zip')
f.write(r.content)
with zipfile.ZipFile(f) as z:
z.extract('iso-639-3.tab', DATA_DIR)
# iso-15924
print('Downloading ISO-15924 standard (ISO script codes)...')
with tempfile.TemporaryFile() as f:
r = requests.get('http://www.unicode.org/iso15924/iso15924.txt.zip')
f.write(r.content)
with zipfile.ZipFile(f) as z:
z.extract('iso15924-utf8-20131012.txt', DATA_DIR)
# opensubtitles supported languages
print('Downloading OpenSubtitles supported languages...')
with open(os.path.join(DATA_DIR, 'opensubtitles_languages.txt'), 'w') as f:
r = requests.get('http://www.opensubtitles.org/addons/export_languages.php')
f.write(r.content)
print('Done!')
@@ -1,250 +0,0 @@
Country Name;ISO 3166-1-alpha-2 code
AFGHANISTAN;AF
ÅLAND ISLANDS;AX
ALBANIA;AL
ALGERIA;DZ
AMERICAN SAMOA;AS
ANDORRA;AD
ANGOLA;AO
ANGUILLA;AI
ANTARCTICA;AQ
ANTIGUA AND BARBUDA;AG
ARGENTINA;AR
ARMENIA;AM
ARUBA;AW
AUSTRALIA;AU
AUSTRIA;AT
AZERBAIJAN;AZ
BAHAMAS;BS
BAHRAIN;BH
BANGLADESH;BD
BARBADOS;BB
BELARUS;BY
BELGIUM;BE
BELIZE;BZ
BENIN;BJ
BERMUDA;BM
BHUTAN;BT
BOLIVIA, PLURINATIONAL STATE OF;BO
BONAIRE, SINT EUSTATIUS AND SABA;BQ
BOSNIA AND HERZEGOVINA;BA
BOTSWANA;BW
BOUVET ISLAND;BV
BRAZIL;BR
BRITISH INDIAN OCEAN TERRITORY;IO
BRUNEI DARUSSALAM;BN
BULGARIA;BG
BURKINA FASO;BF
BURUNDI;BI
CAMBODIA;KH
CAMEROON;CM
CANADA;CA
CAPE VERDE;CV
CAYMAN ISLANDS;KY
CENTRAL AFRICAN REPUBLIC;CF
CHAD;TD
CHILE;CL
CHINA;CN
CHRISTMAS ISLAND;CX
COCOS (KEELING) ISLANDS;CC
COLOMBIA;CO
COMOROS;KM
CONGO;CG
CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
COOK ISLANDS;CK
COSTA RICA;CR
CÔTE D'IVOIRE;CI
CROATIA;HR
CUBA;CU
CURAÇAO;CW
CYPRUS;CY
CZECH REPUBLIC;CZ
DENMARK;DK
DJIBOUTI;DJ
DOMINICA;DM
DOMINICAN REPUBLIC;DO
ECUADOR;EC
EGYPT;EG
EL SALVADOR;SV
EQUATORIAL GUINEA;GQ
ERITREA;ER
ESTONIA;EE
ETHIOPIA;ET
FALKLAND ISLANDS (MALVINAS);FK
FAROE ISLANDS;FO
FIJI;FJ
FINLAND;FI
FRANCE;FR
FRENCH GUIANA;GF
FRENCH POLYNESIA;PF
FRENCH SOUTHERN TERRITORIES;TF
GABON;GA
GAMBIA;GM
GEORGIA;GE
GERMANY;DE
GHANA;GH
GIBRALTAR;GI
GREECE;GR
GREENLAND;GL
GRENADA;GD
GUADELOUPE;GP
GUAM;GU
GUATEMALA;GT
GUERNSEY;GG
GUINEA;GN
GUINEA-BISSAU;GW
GUYANA;GY
HAITI;HT
HEARD ISLAND AND MCDONALD ISLANDS;HM
HOLY SEE (VATICAN CITY STATE);VA
HONDURAS;HN
HONG KONG;HK
HUNGARY;HU
ICELAND;IS
INDIA;IN
INDONESIA;ID
IRAN, ISLAMIC REPUBLIC OF;IR
IRAQ;IQ
IRELAND;IE
ISLE OF MAN;IM
ISRAEL;IL
ITALY;IT
JAMAICA;JM
JAPAN;JP
JERSEY;JE
JORDAN;JO
KAZAKHSTAN;KZ
KENYA;KE
KIRIBATI;KI
KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
KOREA, REPUBLIC OF;KR
KUWAIT;KW
KYRGYZSTAN;KG
LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
LATVIA;LV
LEBANON;LB
LESOTHO;LS
LIBERIA;LR
LIBYA;LY
LIECHTENSTEIN;LI
LITHUANIA;LT
LUXEMBOURG;LU
MACAO;MO
MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
MADAGASCAR;MG
MALAWI;MW
MALAYSIA;MY
MALDIVES;MV
MALI;ML
MALTA;MT
MARSHALL ISLANDS;MH
MARTINIQUE;MQ
MAURITANIA;MR
MAURITIUS;MU
MAYOTTE;YT
MEXICO;MX
MICRONESIA, FEDERATED STATES OF;FM
MOLDOVA, REPUBLIC OF;MD
MONACO;MC
MONGOLIA;MN
MONTENEGRO;ME
MONTSERRAT;MS
MOROCCO;MA
MOZAMBIQUE;MZ
MYANMAR;MM
NAMIBIA;NA
NAURU;NR
NEPAL;NP
NETHERLANDS;NL
NEW CALEDONIA;NC
NEW ZEALAND;NZ
NICARAGUA;NI
NIGER;NE
NIGERIA;NG
NIUE;NU
NORFOLK ISLAND;NF
NORTHERN MARIANA ISLANDS;MP
NORWAY;NO
OMAN;OM
PAKISTAN;PK
PALAU;PW
PALESTINE, STATE OF;PS
PANAMA;PA
PAPUA NEW GUINEA;PG
PARAGUAY;PY
PERU;PE
PHILIPPINES;PH
PITCAIRN;PN
POLAND;PL
PORTUGAL;PT
PUERTO RICO;PR
QATAR;QA
RÉUNION;RE
ROMANIA;RO
RUSSIAN FEDERATION;RU
RWANDA;RW
SAINT BARTHÉLEMY;BL
SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
SAINT KITTS AND NEVIS;KN
SAINT LUCIA;LC
SAINT MARTIN (FRENCH PART);MF
SAINT PIERRE AND MIQUELON;PM
SAINT VINCENT AND THE GRENADINES;VC
SAMOA;WS
SAN MARINO;SM
SAO TOME AND PRINCIPE;ST
SAUDI ARABIA;SA
SENEGAL;SN
SERBIA;RS
SEYCHELLES;SC
SIERRA LEONE;SL
SINGAPORE;SG
SINT MAARTEN (DUTCH PART);SX
SLOVAKIA;SK
SLOVENIA;SI
SOLOMON ISLANDS;SB
SOMALIA;SO
SOUTH AFRICA;ZA
SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
SOUTH SUDAN;SS
SPAIN;ES
SRI LANKA;LK
SUDAN;SD
SURINAME;SR
SVALBARD AND JAN MAYEN;SJ
SWAZILAND;SZ
SWEDEN;SE
SWITZERLAND;CH
SYRIAN ARAB REPUBLIC;SY
TAIWAN, PROVINCE OF CHINA;TW
TAJIKISTAN;TJ
TANZANIA, UNITED REPUBLIC OF;TZ
THAILAND;TH
TIMOR-LESTE;TL
TOGO;TG
TOKELAU;TK
TONGA;TO
TRINIDAD AND TOBAGO;TT
TUNISIA;TN
TURKEY;TR
TURKMENISTAN;TM
TURKS AND CAICOS ISLANDS;TC
TUVALU;TV
UGANDA;UG
UKRAINE;UA
UNITED ARAB EMIRATES;AE
UNITED KINGDOM;GB
UNITED STATES;US
UNITED STATES MINOR OUTLYING ISLANDS;UM
URUGUAY;UY
UZBEKISTAN;UZ
VANUATU;VU
VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
VIET NAM;VN
VIRGIN ISLANDS, BRITISH;VG
VIRGIN ISLANDS, U.S.;VI
WALLIS AND FUTUNA;WF
WESTERN SAHARA;EH
YEMEN;YE
ZAMBIA;ZM
ZIMBABWE;ZW
File diff suppressed because it is too large Load Diff
@@ -1,176 +0,0 @@
#
# ISO 15924 - Codes for the representation of names of scripts
# Codes pour la représentation des noms d’écritures
# Format:
# Code;N°;English Name;Nom français;PVA;Date
#
Afak;439;Afaka;afaka;;2010-12-21
Aghb;239;Caucasian Albanian;aghbanien;;2012-10-16
Ahom;338;Ahom, Tai Ahom;âhom;;2012-11-01
Arab;160;Arabic;arabe;Arabic;2004-05-01
Armi;124;Imperial Aramaic;araméen impérial;Imperial_Aramaic;2009-06-01
Armn;230;Armenian;arménien;Armenian;2004-05-01
Avst;134;Avestan;avestique;Avestan;2009-06-01
Bali;360;Balinese;balinais;Balinese;2006-10-10
Bamu;435;Bamum;bamoum;Bamum;2009-06-01
Bass;259;Bassa Vah;bassa;;2010-03-26
Batk;365;Batak;batik;Batak;2010-07-23
Beng;325;Bengali;bengalî;Bengali;2004-05-01
Blis;550;Blissymbols;symboles Bliss;;2004-05-01
Bopo;285;Bopomofo;bopomofo;Bopomofo;2004-05-01
Brah;300;Brahmi;brahma;Brahmi;2010-07-23
Brai;570;Braille;braille;Braille;2004-05-01
Bugi;367;Buginese;bouguis;Buginese;2006-06-21
Buhd;372;Buhid;bouhide;Buhid;2004-05-01
Cakm;349;Chakma;chakma;Chakma;2012-02-06
Cans;440;Unified Canadian Aboriginal Syllabics;syllabaire autochtone canadien unifié;Canadian_Aboriginal;2004-05-29
Cari;201;Carian;carien;Carian;2007-07-02
Cham;358;Cham;cham (čam, tcham);Cham;2009-11-11
Cher;445;Cherokee;tchérokî;Cherokee;2004-05-01
Cirt;291;Cirth;cirth;;2004-05-01
Copt;204;Coptic;copte;Coptic;2006-06-21
Cprt;403;Cypriot;syllabaire chypriote;Cypriot;2004-05-01
Cyrl;220;Cyrillic;cyrillique;Cyrillic;2004-05-01
Cyrs;221;Cyrillic (Old Church Slavonic variant);cyrillique (variante slavonne);;2004-05-01
Deva;315;Devanagari (Nagari);dévanâgarî;Devanagari;2004-05-01
Dsrt;250;Deseret (Mormon);déseret (mormon);Deseret;2004-05-01
Dupl;755;Duployan shorthand, Duployan stenography;sténographie Duployé;;2010-07-18
Egyd;070;Egyptian demotic;démotique égyptien;;2004-05-01
Egyh;060;Egyptian hieratic;hiératique égyptien;;2004-05-01
Egyp;050;Egyptian hieroglyphs;hiéroglyphes égyptiens;Egyptian_Hieroglyphs;2009-06-01
Elba;226;Elbasan;elbasan;;2010-07-18
Ethi;430;Ethiopic (Geʻez);éthiopien (geʻez, guèze);Ethiopic;2004-10-25
Geor;240;Georgian (Mkhedruli);géorgien (mkhédrouli);Georgian;2004-05-29
Geok;241;Khutsuri (Asomtavruli and Nuskhuri);khoutsouri (assomtavrouli et nouskhouri);Georgian;2012-10-16
Glag;225;Glagolitic;glagolitique;Glagolitic;2006-06-21
Goth;206;Gothic;gotique;Gothic;2004-05-01
Gran;343;Grantha;grantha;;2009-11-11
Grek;200;Greek;grec;Greek;2004-05-01
Gujr;320;Gujarati;goudjarâtî (gujrâtî);Gujarati;2004-05-01
Guru;310;Gurmukhi;gourmoukhî;Gurmukhi;2004-05-01
Hang;286;Hangul (Hangŭl, Hangeul);hangûl (hangŭl, hangeul);Hangul;2004-05-29
Hani;500;Han (Hanzi, Kanji, Hanja);idéogrammes han (sinogrammes);Han;2009-02-23
Hano;371;Hanunoo (Hanunóo);hanounóo;Hanunoo;2004-05-29
Hans;501;Han (Simplified variant);idéogrammes han (variante simplifiée);;2004-05-29
Hant;502;Han (Traditional variant);idéogrammes han (variante traditionnelle);;2004-05-29
Hatr;127;Hatran;hatrénien;;2012-11-01
Hebr;125;Hebrew;hébreu;Hebrew;2004-05-01
Hira;410;Hiragana;hiragana;Hiragana;2004-05-01
Hluw;080;Anatolian Hieroglyphs (Luwian Hieroglyphs, Hittite Hieroglyphs);hiéroglyphes anatoliens (hiéroglyphes louvites, hiéroglyphes hittites);;2011-12-09
Hmng;450;Pahawh Hmong;pahawh hmong;;2004-05-01
Hrkt;412;Japanese syllabaries (alias for Hiragana + Katakana);syllabaires japonais (alias pour hiragana + katakana);Katakana_Or_Hiragana;2011-06-21
Hung;176;Old Hungarian (Hungarian Runic);runes hongroises (ancien hongrois);;2012-10-16
Inds;610;Indus (Harappan);indus;;2004-05-01
Ital;210;Old Italic (Etruscan, Oscan, etc.);ancien italique (étrusque, osque, etc.);Old_Italic;2004-05-29
Java;361;Javanese;javanais;Javanese;2009-06-01
Jpan;413;Japanese (alias for Han + Hiragana + Katakana);japonais (alias pour han + hiragana + katakana);;2006-06-21
Jurc;510;Jurchen;jurchen;;2010-12-21
Kali;357;Kayah Li;kayah li;Kayah_Li;2007-07-02
Kana;411;Katakana;katakana;Katakana;2004-05-01
Khar;305;Kharoshthi;kharochthî;Kharoshthi;2006-06-21
Khmr;355;Khmer;khmer;Khmer;2004-05-29
Khoj;322;Khojki;khojkî;;2011-06-21
Knda;345;Kannada;kannara (canara);Kannada;2004-05-29
Kore;287;Korean (alias for Hangul + Han);coréen (alias pour hangûl + han);;2007-06-13
Kpel;436;Kpelle;kpèllé;;2010-03-26
Kthi;317;Kaithi;kaithî;Kaithi;2009-06-01
Lana;351;Tai Tham (Lanna);taï tham (lanna);Tai_Tham;2009-06-01
Laoo;356;Lao;laotien;Lao;2004-05-01
Latf;217;Latin (Fraktur variant);latin (variante brisée);;2004-05-01
Latg;216;Latin (Gaelic variant);latin (variante gaélique);;2004-05-01
Latn;215;Latin;latin;Latin;2004-05-01
Lepc;335;Lepcha (Róng);lepcha (róng);Lepcha;2007-07-02
Limb;336;Limbu;limbou;Limbu;2004-05-29
Lina;400;Linear A;linéaire A;;2004-05-01
Linb;401;Linear B;linéaire B;Linear_B;2004-05-29
Lisu;399;Lisu (Fraser);lisu (Fraser);Lisu;2009-06-01
Loma;437;Loma;loma;;2010-03-26
Lyci;202;Lycian;lycien;Lycian;2007-07-02
Lydi;116;Lydian;lydien;Lydian;2007-07-02
Mahj;314;Mahajani;mahâjanî;;2012-10-16
Mand;140;Mandaic, Mandaean;mandéen;Mandaic;2010-07-23
Mani;139;Manichaean;manichéen;;2007-07-15
Maya;090;Mayan hieroglyphs;hiéroglyphes mayas;;2004-05-01
Mend;438;Mende Kikakui;mendé kikakui;;2013-10-12
Merc;101;Meroitic Cursive;cursif méroïtique;Meroitic_Cursive;2012-02-06
Mero;100;Meroitic Hieroglyphs;hiéroglyphes méroïtiques;Meroitic_Hieroglyphs;2012-02-06
Mlym;347;Malayalam;malayâlam;Malayalam;2004-05-01
Modi;323;Modi, Moḍī;modî;;2013-10-12
Moon;218;Moon (Moon code, Moon script, Moon type);écriture Moon;;2006-12-11
Mong;145;Mongolian;mongol;Mongolian;2004-05-01
Mroo;199;Mro, Mru;mro;;2010-12-21
Mtei;337;Meitei Mayek (Meithei, Meetei);meitei mayek;Meetei_Mayek;2009-06-01
Mult;323; Multani;multanî;;2012-11-01
Mymr;350;Myanmar (Burmese);birman;Myanmar;2004-05-01
Narb;106;Old North Arabian (Ancient North Arabian);nord-arabique;;2010-03-26
Nbat;159;Nabataean;nabatéen;;2010-03-26
Nkgb;420;Nakhi Geba ('Na-'Khi ²Ggŏ-¹baw, Naxi Geba);nakhi géba;;2009-02-23
Nkoo;165;NKo;nko;Nko;2006-10-10
Nshu;499;Nüshu;nüshu;;2010-12-21
Ogam;212;Ogham;ogam;Ogham;2004-05-01
Olck;261;Ol Chiki (Ol Cemet, Ol, Santali);ol tchiki;Ol_Chiki;2007-07-02
Orkh;175;Old Turkic, Orkhon Runic;orkhon;Old_Turkic;2009-06-01
Orya;327;Oriya;oriyâ;Oriya;2004-05-01
Osma;260;Osmanya;osmanais;Osmanya;2004-05-01
Palm;126;Palmyrene;palmyrénien;;2010-03-26
Pauc;263;Pau Cin Hau;paou chin haou;;2013-10-12
Perm;227;Old Permic;ancien permien;;2004-05-01
Phag;331;Phags-pa;phags pa;Phags_Pa;2006-10-10
Phli;131;Inscriptional Pahlavi;pehlevi des inscriptions;Inscriptional_Pahlavi;2009-06-01
Phlp;132;Psalter Pahlavi;pehlevi des psautiers;;2007-11-26
Phlv;133;Book Pahlavi;pehlevi des livres;;2007-07-15
Phnx;115;Phoenician;phénicien;Phoenician;2006-10-10
Plrd;282;Miao (Pollard);miao (Pollard);Miao;2012-02-06
Prti;130;Inscriptional Parthian;parthe des inscriptions;Inscriptional_Parthian;2009-06-01
Qaaa;900;Reserved for private use (start);réservé à lusage privé (début);;2004-05-29
Qabx;949;Reserved for private use (end);réservé à lusage privé (fin);;2004-05-29
Rjng;363;Rejang (Redjang, Kaganga);redjang (kaganga);Rejang;2009-02-23
Roro;620;Rongorongo;rongorongo;;2004-05-01
Runr;211;Runic;runique;Runic;2004-05-01
Samr;123;Samaritan;samaritain;Samaritan;2009-06-01
Sara;292;Sarati;sarati;;2004-05-29
Sarb;105;Old South Arabian;sud-arabique, himyarite;Old_South_Arabian;2009-06-01
Saur;344;Saurashtra;saurachtra;Saurashtra;2007-07-02
Sgnw;095;SignWriting;SignÉcriture, SignWriting;;2006-10-10
Shaw;281;Shavian (Shaw);shavien (Shaw);Shavian;2004-05-01
Shrd;319;Sharada, Śāradā;charada, shard;Sharada;2012-02-06
Sidd;302;Siddham, Siddhaṃ, Siddhamātṛkā;siddham;;2013-10-12
Sind;318;Khudawadi, Sindhi;khoudawadî, sindhî;;2010-12-21
Sinh;348;Sinhala;singhalais;Sinhala;2004-05-01
Sora;398;Sora Sompeng;sora sompeng;Sora_Sompeng;2012-02-06
Sund;362;Sundanese;sundanais;Sundanese;2007-07-02
Sylo;316;Syloti Nagri;sylotî nâgrî;Syloti_Nagri;2006-06-21
Syrc;135;Syriac;syriaque;Syriac;2004-05-01
Syre;138;Syriac (Estrangelo variant);syriaque (variante estranghélo);;2004-05-01
Syrj;137;Syriac (Western variant);syriaque (variante occidentale);;2004-05-01
Syrn;136;Syriac (Eastern variant);syriaque (variante orientale);;2004-05-01
Tagb;373;Tagbanwa;tagbanoua;Tagbanwa;2004-05-01
Takr;321;Takri, Ṭākrī, Ṭāṅkrī;tâkrî;Takri;2012-02-06
Tale;353;Tai Le;taï-le;Tai_Le;2004-10-25
Talu;354;New Tai Lue;nouveau taï-lue;New_Tai_Lue;2006-06-21
Taml;346;Tamil;tamoul;Tamil;2004-05-01
Tang;520;Tangut;tangoute;;2010-12-21
Tavt;359;Tai Viet;taï viêt;Tai_Viet;2009-06-01
Telu;340;Telugu;télougou;Telugu;2004-05-01
Teng;290;Tengwar;tengwar;;2004-05-01
Tfng;120;Tifinagh (Berber);tifinagh (berbère);Tifinagh;2006-06-21
Tglg;370;Tagalog (Baybayin, Alibata);tagal (baybayin, alibata);Tagalog;2009-02-23
Thaa;170;Thaana;thâna;Thaana;2004-05-01
Thai;352;Thai;thaï;Thai;2004-05-01
Tibt;330;Tibetan;tibétain;Tibetan;2004-05-01
Tirh;326;Tirhuta;tirhouta;;2011-12-09
Ugar;040;Ugaritic;ougaritique;Ugaritic;2004-05-01
Vaii;470;Vai;vaï;Vai;2007-07-02
Visp;280;Visible Speech;parole visible;;2004-05-01
Wara;262;Warang Citi (Varang Kshiti);warang citi;;2009-11-11
Wole;480;Woleai;woléaï;;2010-12-21
Xpeo;030;Old Persian;cunéiforme persépolitain;Old_Persian;2006-06-21
Xsux;020;Cuneiform, Sumero-Akkadian;cunéiforme suméro-akkadien;Cuneiform;2006-10-10
Yiii;460;Yi;yi;Yi;2004-05-01
Zinh;994;Code for inherited script;codet pour écriture héritée;Inherited;2009-02-23
Zmth;995;Mathematical notation;notation mathématique;;2007-11-26
Zsym;996;Symbols;symboles;;2007-11-26
Zxxx;997;Code for unwritten documents;codet pour les documents non écrits;;2011-06-21
Zyyy;998;Code for undetermined script;codet pour écriture indéterminée;Common;2004-05-29
Zzzz;999;Code for uncoded script;codet pour écriture non codée;Unknown;2006-10-10
@@ -1,474 +0,0 @@
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
aar aa Afar, afar 0 0
abk ab Abkhazian 0 0
ace Achinese 0 0
ach Acoli 0 0
ada Adangme 0 0
ady adyghé 0 0
afa Afro-Asiatic (Other) 0 0
afh Afrihili 0 0
afr af Afrikaans 1 0
ain Ainu 0 0
aka ak Akan 0 0
akk Akkadian 0 0
alb sq Albanian 1 1
ale Aleut 0 0
alg Algonquian languages 0 0
alt Southern Altai 0 0
amh am Amharic 0 0
ang English, Old (ca.450-1100) 0 0
apa Apache languages 0 0
ara ar Arabic 1 1
arc Aramaic 0 0
arg an Aragonese 0 0
arm hy Armenian 1 0
arn Araucanian 0 0
arp Arapaho 0 0
art Artificial (Other) 0 0
arw Arawak 0 0
asm as Assamese 0 0
ast Asturian, Bable 0 0
ath Athapascan languages 0 0
aus Australian languages 0 0
ava av Avaric 0 0
ave ae Avestan 0 0
awa Awadhi 0 0
aym ay Aymara 0 0
aze az Azerbaijani 0 0
bad Banda 0 0
bai Bamileke languages 0 0
bak ba Bashkir 0 0
bal Baluchi 0 0
bam bm Bambara 0 0
ban Balinese 0 0
baq eu Basque 1 1
bas Basa 0 0
bat Baltic (Other) 0 0
bej Beja 0 0
bel be Belarusian 0 0
bem Bemba 0 0
ben bn Bengali 1 0
ber Berber (Other) 0 0
bho Bhojpuri 0 0
bih bh Bihari 0 0
bik Bikol 0 0
bin Bini 0 0
bis bi Bislama 0 0
bla Siksika 0 0
bnt Bantu (Other) 0 0
bos bs Bosnian 1 0
bra Braj 0 0
bre br Breton 1 0
btk Batak (Indonesia) 0 0
bua Buriat 0 0
bug Buginese 0 0
bul bg Bulgarian 1 1
bur my Burmese 1 0
byn Blin 0 0
cad Caddo 0 0
cai Central American Indian (Other) 0 0
car Carib 0 0
cat ca Catalan 1 1
cau Caucasian (Other) 0 0
ceb Cebuano 0 0
cel Celtic (Other) 0 0
cha ch Chamorro 0 0
chb Chibcha 0 0
che ce Chechen 0 0
chg Chagatai 0 0
chi zh Chinese 1 1
chk Chuukese 0 0
chm Mari 0 0
chn Chinook jargon 0 0
cho Choctaw 0 0
chp Chipewyan 0 0
chr Cherokee 0 0
chu cu Church Slavic 0 0
chv cv Chuvash 0 0
chy Cheyenne 0 0
cmc Chamic languages 0 0
cop Coptic 0 0
cor kw Cornish 0 0
cos co Corsican 0 0
cpe Creoles and pidgins, English based (Other) 0 0
cpf Creoles and pidgins, French-based (Other) 0 0
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
cre cr Cree 0 0
crh Crimean Tatar 0 0
crp Creoles and pidgins (Other) 0 0
csb Kashubian 0 0
cus Cushitic (Other)' couchitiques, autres langues 0 0
cze cs Czech 1 1
dak Dakota 0 0
dan da Danish 1 1
dar Dargwa 0 0
day Dayak 0 0
del Delaware 0 0
den Slave (Athapascan) 0 0
dgr Dogrib 0 0
din Dinka 0 0
div dv Divehi 0 0
doi Dogri 0 0
dra Dravidian (Other) 0 0
dua Duala 0 0
dum Dutch, Middle (ca.1050-1350) 0 0
dut nl Dutch 1 1
dyu Dyula 0 0
dzo dz Dzongkha 0 0
efi Efik 0 0
egy Egyptian (Ancient) 0 0
eka Ekajuk 0 0
elx Elamite 0 0
eng en English 1 1
enm English, Middle (1100-1500) 0 0
epo eo Esperanto 1 0
est et Estonian 1 1
ewe ee Ewe 0 0
ewo Ewondo 0 0
fan Fang 0 0
fao fo Faroese 0 0
fat Fanti 0 0
fij fj Fijian 0 0
fil Filipino 0 0
fin fi Finnish 1 1
fiu Finno-Ugrian (Other) 0 0
fon Fon 0 0
fre fr French 1 1
frm French, Middle (ca.1400-1600) 0 0
fro French, Old (842-ca.1400) 0 0
fry fy Frisian 0 0
ful ff Fulah 0 0
fur Friulian 0 0
gaa Ga 0 0
gay Gayo 0 0
gba Gbaya 0 0
gem Germanic (Other) 0 0
geo ka Georgian 1 1
ger de German 1 1
gez Geez 0 0
gil Gilbertese 0 0
gla gd Gaelic 0 0
gle ga Irish 0 0
glg gl Galician 1 1
glv gv Manx 0 0
gmh German, Middle High (ca.1050-1500) 0 0
goh German, Old High (ca.750-1050) 0 0
gon Gondi 0 0
gor Gorontalo 0 0
got Gothic 0 0
grb Grebo 0 0
grc Greek, Ancient (to 1453) 0 0
ell el Greek 1 1
grn gn Guarani 0 0
guj gu Gujarati 0 0
gwi Gwich´in 0 0
hai Haida 0 0
hat ht Haitian 0 0
hau ha Hausa 0 0
haw Hawaiian 0 0
heb he Hebrew 1 1
her hz Herero 0 0
hil Hiligaynon 0 0
him Himachali 0 0
hin hi Hindi 1 1
hit Hittite 0 0
hmn Hmong 0 0
hmo ho Hiri Motu 0 0
hrv hr Croatian 1 1
hun hu Hungarian 1 1
hup Hupa 0 0
iba Iban 0 0
ibo ig Igbo 0 0
ice is Icelandic 1 1
ido io Ido 0 0
iii ii Sichuan Yi 0 0
ijo Ijo 0 0
iku iu Inuktitut 0 0
ile ie Interlingue 0 0
ilo Iloko 0 0
ina ia Interlingua (International Auxiliary Language Asso 0 0
inc Indic (Other) 0 0
ind id Indonesian 1 1
ine Indo-European (Other) 0 0
inh Ingush 0 0
ipk ik Inupiaq 0 0
ira Iranian (Other) 0 0
iro Iroquoian languages 0 0
ita it Italian 1 1
jav jv Javanese 0 0
jpn ja Japanese 1 1
jpr Judeo-Persian 0 0
jrb Judeo-Arabic 0 0
kaa Kara-Kalpak 0 0
kab Kabyle 0 0
kac Kachin 0 0
kal kl Kalaallisut 0 0
kam Kamba 0 0
kan kn Kannada 0 0
kar Karen 0 0
kas ks Kashmiri 0 0
kau kr Kanuri 0 0
kaw Kawi 0 0
kaz kk Kazakh 1 0
kbd Kabardian 0 0
kha Khasi 0 0
khi Khoisan (Other) 0 0
khm km Khmer 1 1
kho Khotanese 0 0
kik ki Kikuyu 0 0
kin rw Kinyarwanda 0 0
kir ky Kirghiz 0 0
kmb Kimbundu 0 0
kok Konkani 0 0
kom kv Komi 0 0
kon kg Kongo 0 0
kor ko Korean 1 1
kos Kosraean 0 0
kpe Kpelle 0 0
krc Karachay-Balkar 0 0
kro Kru 0 0
kru Kurukh 0 0
kua kj Kuanyama 0 0
kum Kumyk 0 0
kur ku Kurdish 0 0
kut Kutenai 0 0
lad Ladino 0 0
lah Lahnda 0 0
lam Lamba 0 0
lao lo Lao 0 0
lat la Latin 0 0
lav lv Latvian 1 0
lez Lezghian 0 0
lim li Limburgan 0 0
lin ln Lingala 0 0
lit lt Lithuanian 1 0
lol Mongo 0 0
loz Lozi 0 0
ltz lb Luxembourgish 1 0
lua Luba-Lulua 0 0
lub lu Luba-Katanga 0 0
lug lg Ganda 0 0
lui Luiseno 0 0
lun Lunda 0 0
luo Luo (Kenya and Tanzania) 0 0
lus lushai 0 0
mac mk Macedonian 1 1
mad Madurese 0 0
mag Magahi 0 0
mah mh Marshallese 0 0
mai Maithili 0 0
mak Makasar 0 0
mal ml Malayalam 1 0
man Mandingo 0 0
mao mi Maori 0 0
map Austronesian (Other) 0 0
mar mr Marathi 0 0
mas Masai 0 0
may ms Malay 1 1
mdf Moksha 0 0
mdr Mandar 0 0
men Mende 0 0
mga Irish, Middle (900-1200) 0 0
mic Mi'kmaq 0 0
min Minangkabau 0 0
mis Miscellaneous languages 0 0
mkh Mon-Khmer (Other) 0 0
mlg mg Malagasy 0 0
mlt mt Maltese 0 0
mnc Manchu 0 0
mni Manipuri 0 0
mno Manobo languages 0 0
moh Mohawk 0 0
mol mo Moldavian 0 0
mon mn Mongolian 1 0
mos Mossi 0 0
mwl Mirandese 0 0
mul Multiple languages 0 0
mun Munda languages 0 0
mus Creek 0 0
mwr Marwari 0 0
myn Mayan languages 0 0
myv Erzya 0 0
nah Nahuatl 0 0
nai North American Indian 0 0
nap Neapolitan 0 0
nau na Nauru 0 0
nav nv Navajo 0 0
nbl nr Ndebele, South 0 0
nde nd Ndebele, North 0 0
ndo ng Ndonga 0 0
nds Low German 0 0
nep ne Nepali 0 0
new Nepal Bhasa 0 0
nia Nias 0 0
nic Niger-Kordofanian (Other) 0 0
niu Niuean 0 0
nno nn Norwegian Nynorsk 0 0
nob nb Norwegian Bokmal 0 0
nog Nogai 0 0
non Norse, Old 0 0
nor no Norwegian 1 1
nso Northern Sotho 0 0
nub Nubian languages 0 0
nwc Classical Newari 0 0
nya ny Chichewa 0 0
nym Nyamwezi 0 0
nyn Nyankole 0 0
nyo Nyoro 0 0
nzi Nzima 0 0
oci oc Occitan 1 1
oji oj Ojibwa 0 0
ori or Oriya 0 0
orm om Oromo 0 0
osa Osage 0 0
oss os Ossetian 0 0
ota Turkish, Ottoman (1500-1928) 0 0
oto Otomian languages 0 0
paa Papuan (Other) 0 0
pag Pangasinan 0 0
pal Pahlavi 0 0
pam Pampanga 0 0
pan pa Panjabi 0 0
pap Papiamento 0 0
pau Palauan 0 0
peo Persian, Old (ca.600-400 B.C.) 0 0
per fa Persian 1 1
phi Philippine (Other) 0 0
phn Phoenician 0 0
pli pi Pali 0 0
pol pl Polish 1 1
pon Pohnpeian 0 0
por pt Portuguese 1 1
pra Prakrit languages 0 0
pro Provençal, Old (to 1500) 0 0
pus ps Pushto 0 0
que qu Quechua 0 0
raj Rajasthani 0 0
rap Rapanui 0 0
rar Rarotongan 0 0
roa Romance (Other) 0 0
roh rm Raeto-Romance 0 0
rom Romany 0 0
run rn Rundi 0 0
rup Aromanian 0 0
rus ru Russian 1 1
sad Sandawe 0 0
sag sg Sango 0 0
sah Yakut 0 0
sai South American Indian (Other) 0 0
sal Salishan languages 0 0
sam Samaritan Aramaic 0 0
san sa Sanskrit 0 0
sas Sasak 0 0
sat Santali 0 0
scc sr Serbian 1 1
scn Sicilian 0 0
sco Scots 0 0
sel Selkup 0 0
sem Semitic (Other) 0 0
sga Irish, Old (to 900) 0 0
sgn Sign Languages 0 0
shn Shan 0 0
sid Sidamo 0 0
sin si Sinhalese 1 1
sio Siouan languages 0 0
sit Sino-Tibetan (Other) 0 0
sla Slavic (Other) 0 0
slo sk Slovak 1 1
slv sl Slovenian 1 1
sma Southern Sami 0 0
sme se Northern Sami 0 0
smi Sami languages (Other) 0 0
smj Lule Sami 0 0
smn Inari Sami 0 0
smo sm Samoan 0 0
sms Skolt Sami 0 0
sna sn Shona 0 0
snd sd Sindhi 0 0
snk Soninke 0 0
sog Sogdian 0 0
som so Somali 0 0
son Songhai 0 0
sot st Sotho, Southern 0 0
spa es Spanish 1 1
srd sc Sardinian 0 0
srr Serer 0 0
ssa Nilo-Saharan (Other) 0 0
ssw ss Swati 0 0
suk Sukuma 0 0
sun su Sundanese 0 0
sus Susu 0 0
sux Sumerian 0 0
swa sw Swahili 1 0
swe sv Swedish 1 1
syr Syriac 1 0
tah ty Tahitian 0 0
tai Tai (Other) 0 0
tam ta Tamil 1 0
tat tt Tatar 0 0
tel te Telugu 1 0
tem Timne 0 0
ter Tereno 0 0
tet Tetum 0 0
tgk tg Tajik 0 0
tgl tl Tagalog 1 1
tha th Thai 1 1
tib bo Tibetan 0 0
tig Tigre 0 0
tir ti Tigrinya 0 0
tiv Tiv 0 0
tkl Tokelau 0 0
tlh Klingon 0 0
tli Tlingit 0 0
tmh Tamashek 0 0
tog Tonga (Nyasa) 0 0
ton to Tonga (Tonga Islands) 0 0
tpi Tok Pisin 0 0
tsi Tsimshian 0 0
tsn tn Tswana 0 0
tso ts Tsonga 0 0
tuk tk Turkmen 0 0
tum Tumbuka 0 0
tup Tupi languages 0 0
tur tr Turkish 1 1
tut Altaic (Other) 0 0
tvl Tuvalu 0 0
twi tw Twi 0 0
tyv Tuvinian 0 0
udm Udmurt 0 0
uga Ugaritic 0 0
uig ug Uighur 0 0
ukr uk Ukrainian 1 1
umb Umbundu 0 0
und Undetermined 0 0
urd ur Urdu 1 0
uzb uz Uzbek 0 0
vai Vai 0 0
ven ve Venda 0 0
vie vi Vietnamese 1 1
vol vo Volapük 0 0
vot Votic 0 0
wak Wakashan languages 0 0
wal Walamo 0 0
war Waray 0 0
was Washo 0 0
wel cy Welsh 0 0
wen Sorbian languages 0 0
wln wa Walloon 0 0
wol wo Wolof 0 0
xal Kalmyk 0 0
xho xh Xhosa 0 0
yao Yao 0 0
yap Yapese 0 0
yid yi Yiddish 0 0
yor yo Yoruba 0 0
ypk Yupik languages 0 0
zap Zapotec 0 0
zen Zenaga 0 0
zha za Zhuang 0 0
znd Zande 0 0
zul zu Zulu 0 0
zun Zuni 0 0
rum ro Romanian 1 1
pob pb Brazilian 1 1
mne Montenegrin 1 0
@@ -1,85 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
class Error(Exception):
"""Base class for all exceptions in babelfish"""
pass
class LanguageError(Error, AttributeError):
"""Base class for all language exceptions in babelfish"""
pass
class LanguageConvertError(LanguageError):
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
:param string alpha3: alpha3 code that failed conversion
:param country: country code that failed conversion, if any
:type country: string or None
:param script: script code that failed conversion, if any
:type script: string or None
"""
def __init__(self, alpha3, country=None, script=None):
self.alpha3 = alpha3
self.country = country
self.script = script
def __str__(self):
s = self.alpha3
if self.country is not None:
s += '-' + self.country
if self.script is not None:
s += '-' + self.script
return s
class LanguageReverseError(LanguageError):
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
:param string code: code that failed reverse conversion
"""
def __init__(self, code):
self.code = code
def __str__(self):
return repr(self.code)
class CountryError(Error, AttributeError):
"""Base class for all country exceptions in babelfish"""
pass
class CountryConvertError(CountryError):
"""Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
:param string alpha2: alpha2 code that failed conversion
"""
def __init__(self, alpha2):
self.alpha2 = alpha2
def __str__(self):
return self.alpha2
class CountryReverseError(CountryError):
"""Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
:param string code: code that failed reverse conversion
"""
def __init__(self, code):
self.code = code
def __str__(self):
return repr(self.code)
@@ -1,184 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from collections import namedtuple
from functools import partial
from pkg_resources import resource_stream # @UnresolvedImport
from .converters import ConverterManager
from .country import Country
from .exceptions import LanguageConvertError
from .script import Script
from . import basestr
LANGUAGES = set()
LANGUAGE_MATRIX = []
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
f = resource_stream('babelfish', 'data/iso-639-3.tab')
f.readline()
for l in f:
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
LANGUAGES.add(iso_language.alpha3)
LANGUAGE_MATRIX.append(iso_language)
f.close()
class LanguageConverterManager(ConverterManager):
""":class:`~babelfish.converters.ConverterManager` for language converters"""
entry_point = 'babelfish.language_converters'
internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
'name = babelfish.converters.name:NameConverter',
'scope = babelfish.converters.scope:ScopeConverter',
'type = babelfish.converters.type:LanguageTypeConverter',
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
language_converters = LanguageConverterManager()
class LanguageMeta(type):
"""The :class:`Language` metaclass
Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
"""
def __getattr__(cls, name):
if name.startswith('from'):
return partial(cls.fromcode, converter=name[4:])
return type.__getattribute__(cls, name)
class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
"""A human language
A human language is composed of a language part following the ISO-639
standard and can be country-specific when a :class:`~babelfish.country.Country`
is specified.
The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
:param string language: the language as a 3-letter ISO-639-3 code
:param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
:type country: string or :class:`~babelfish.country.Country` or None
:param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
:type script: string or :class:`~babelfish.script.Script` or None
:param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
:type unknown: string or None
:raise: ValueError if the language could not be recognized and `unknown` is ``None``
"""
def __init__(self, language, country=None, script=None, unknown=None):
if unknown is not None and language not in LANGUAGES:
language = unknown
if language not in LANGUAGES:
raise ValueError('%r is not a valid language' % language)
self.alpha3 = language
self.country = None
if isinstance(country, Country):
self.country = country
elif country is None:
self.country = None
else:
self.country = Country(country)
self.script = None
if isinstance(script, Script):
self.script = script
elif script is None:
self.script = None
else:
self.script = Script(script)
@classmethod
def fromcode(cls, code, converter):
"""Create a :class:`Language` by its `code` using `converter` to
:meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
:param string code: the code to reverse
:param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
:return: the corresponding :class:`Language` instance
:rtype: :class:`Language`
"""
return cls(*language_converters[converter].reverse(code))
@classmethod
def fromietf(cls, ietf):
"""Create a :class:`Language` by from an IETF language code
:param string ietf: the ietf code
:return: the corresponding :class:`Language` instance
:rtype: :class:`Language`
"""
subtags = ietf.split('-')
language_subtag = subtags.pop(0).lower()
if len(language_subtag) == 2:
language = cls.fromalpha2(language_subtag)
else:
language = cls(language_subtag)
while subtags:
subtag = subtags.pop(0)
if len(subtag) == 2:
language.country = Country(subtag.upper())
else:
language.script = Script(subtag.capitalize())
if language.script is not None:
if subtags:
raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
break
return language
def __getstate__(self):
return self.alpha3, self.country, self.script
def __setstate__(self, state):
self.alpha3, self.country, self.script = state
def __getattr__(self, name):
alpha3 = self.alpha3
country = self.country.alpha2 if self.country is not None else None
script = self.script.code if self.script is not None else None
try:
return language_converters[name].convert(alpha3, country, script)
except KeyError:
raise AttributeError(name)
def __hash__(self):
return hash(str(self))
def __eq__(self, other):
if isinstance(other, basestr):
return str(self) == other
if not isinstance(other, Language):
return False
return (self.alpha3 == other.alpha3 and
self.country == other.country and
self.script == other.script)
def __ne__(self, other):
return not self == other
def __bool__(self):
return self.alpha3 != 'und'
__nonzero__ = __bool__
def __repr__(self):
return '<Language [%s]>' % self
def __str__(self):
try:
s = self.alpha2
except LanguageConvertError:
s = self.alpha3
if self.country is not None:
s += '-' + str(self.country)
if self.script is not None:
s += '-' + str(self.script)
return s
@@ -1,75 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from collections import namedtuple
from pkg_resources import resource_stream # @UnresolvedImport
from . import basestr
#: Script code to script name mapping
SCRIPTS = {}
#: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
SCRIPT_MATRIX = []
#: The namedtuple used in the :data:`SCRIPT_MATRIX`
IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
f = resource_stream('babelfish', 'data/iso15924-utf8-20131012.txt')
f.readline()
for l in f:
l = l.decode('utf-8').strip()
if not l or l.startswith('#'):
continue
script = IsoScript._make(l.split(';'))
SCRIPT_MATRIX.append(script)
SCRIPTS[script.code] = script.name
f.close()
class Script(object):
"""A human writing system
A script is represented by a 4-letter code from the ISO-15924 standard
:param string script: 4-letter ISO-15924 script code
"""
def __init__(self, script):
if script not in SCRIPTS:
raise ValueError('%r is not a valid script' % script)
#: ISO-15924 4-letter script code
self.code = script
@property
def name(self):
"""English name of the script"""
return SCRIPTS[self.code]
def __getstate__(self):
return self.code
def __setstate__(self, state):
self.code = state
def __hash__(self):
return hash(self.code)
def __eq__(self, other):
if isinstance(other, basestr):
return self.code == other
if not isinstance(other, Script):
return False
return self.code == other.code
def __ne__(self, other):
return not self == other
def __repr__(self):
return '<Script [%s]>' % self
def __str__(self):
return self.code
@@ -1,377 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
import re
import sys
import pickle
from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
from pkg_resources import resource_stream # @UnresolvedImport
from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
if sys.version_info[:2] <= (2, 6):
_MAX_LENGTH = 80
def safe_repr(obj, short=False):
try:
result = repr(obj)
except Exception:
result = object.__repr__(obj)
if not short or len(result) < _MAX_LENGTH:
return result
return result[:_MAX_LENGTH] + ' [truncated]...'
class _AssertRaisesContext(object):
"""A context manager used to implement TestCase.assertRaises* methods."""
def __init__(self, expected, test_case, expected_regexp=None):
self.expected = expected
self.failureException = test_case.failureException
self.expected_regexp = expected_regexp
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, tb):
if exc_type is None:
try:
exc_name = self.expected.__name__
except AttributeError:
exc_name = str(self.expected)
raise self.failureException(
"{0} not raised".format(exc_name))
if not issubclass(exc_type, self.expected):
# let unexpected exceptions pass through
return False
self.exception = exc_value # store for later retrieval
if self.expected_regexp is None:
return True
expected_regexp = self.expected_regexp
if isinstance(expected_regexp, basestring):
expected_regexp = re.compile(expected_regexp)
if not expected_regexp.search(str(exc_value)):
raise self.failureException('"%s" does not match "%s"' %
(expected_regexp.pattern, str(exc_value)))
return True
class _Py26FixTestCase(object):
def assertIsNone(self, obj, msg=None):
"""Same as self.assertTrue(obj is None), with a nicer default message."""
if obj is not None:
standardMsg = '%s is not None' % (safe_repr(obj),)
self.fail(self._formatMessage(msg, standardMsg))
def assertIsNotNone(self, obj, msg=None):
"""Included for symmetry with assertIsNone."""
if obj is None:
standardMsg = 'unexpectedly None'
self.fail(self._formatMessage(msg, standardMsg))
def assertIn(self, member, container, msg=None):
"""Just like self.assertTrue(a in b), but with a nicer default message."""
if member not in container:
standardMsg = '%s not found in %s' % (safe_repr(member),
safe_repr(container))
self.fail(self._formatMessage(msg, standardMsg))
def assertNotIn(self, member, container, msg=None):
"""Just like self.assertTrue(a not in b), but with a nicer default message."""
if member in container:
standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
safe_repr(container))
self.fail(self._formatMessage(msg, standardMsg))
def assertIs(self, expr1, expr2, msg=None):
"""Just like self.assertTrue(a is b), but with a nicer default message."""
if expr1 is not expr2:
standardMsg = '%s is not %s' % (safe_repr(expr1),
safe_repr(expr2))
self.fail(self._formatMessage(msg, standardMsg))
def assertIsNot(self, expr1, expr2, msg=None):
"""Just like self.assertTrue(a is not b), but with a nicer default message."""
if expr1 is expr2:
standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
self.fail(self._formatMessage(msg, standardMsg))
else:
class _Py26FixTestCase(object):
pass
class TestScript(TestCase, _Py26FixTestCase):
def test_wrong_script(self):
self.assertRaises(ValueError, lambda: Script('Azer'))
def test_eq(self):
self.assertEqual(Script('Latn'), Script('Latn'))
def test_ne(self):
self.assertNotEqual(Script('Cyrl'), Script('Latn'))
def test_hash(self):
self.assertEqual(hash(Script('Hira')), hash('Hira'))
def test_pickle(self):
self.assertEqual(pickle.loads(pickle.dumps(Script('Latn'))), Script('Latn'))
class TestCountry(TestCase, _Py26FixTestCase):
def test_wrong_country(self):
self.assertRaises(ValueError, lambda: Country('ZZ'))
def test_eq(self):
self.assertEqual(Country('US'), Country('US'))
def test_ne(self):
self.assertNotEqual(Country('GB'), Country('US'))
self.assertIsNotNone(Country('US'))
def test_hash(self):
self.assertEqual(hash(Country('US')), hash('US'))
def test_pickle(self):
for country in [Country('GB'), Country('US')]:
self.assertEqual(pickle.loads(pickle.dumps(country)), country)
def test_converter_name(self):
self.assertEqual(Country('US').name, 'UNITED STATES')
self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
self.assertEqual(len(country_converters['name'].codes), 249)
class TestLanguage(TestCase, _Py26FixTestCase):
def test_languages(self):
self.assertEqual(len(LANGUAGES), 7874)
def test_wrong_language(self):
self.assertRaises(ValueError, lambda: Language('zzz'))
def test_unknown_language(self):
self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
def test_converter_alpha2(self):
self.assertEqual(Language('eng').alpha2, 'en')
self.assertEqual(Language.fromalpha2('en'), Language('eng'))
self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
self.assertEqual(len(language_converters['alpha2'].codes), 184)
def test_converter_alpha3b(self):
self.assertEqual(Language('fra').alpha3b, 'fre')
self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
self.assertEqual(len(language_converters['alpha3b'].codes), 418)
def test_converter_alpha3t(self):
self.assertEqual(Language('fra').alpha3t, 'fra')
self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
self.assertEqual(len(language_converters['alpha3t'].codes), 418)
def test_converter_name(self):
self.assertEqual(Language('eng').name, 'English')
self.assertEqual(Language.fromname('English'), Language('eng'))
self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
self.assertEqual(len(language_converters['name'].codes), 7874)
def test_converter_scope(self):
self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
self.assertEqual(Language('eng').scope, 'individual')
self.assertEqual(Language('und').scope, 'special')
def test_converter_type(self):
self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
self.assertEqual(Language('eng').type, 'living')
self.assertEqual(Language('und').type, 'special')
def test_converter_opensubtitles(self):
self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
# Montenegrin is not recognized as an ISO language (yet?) but for now it is
# unofficially accepted as Serbian from Montenegro
self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
self.assertEqual(len(language_converters['opensubtitles'].codes), 607)
# test with all the LANGUAGES from the opensubtitles api
# downloaded from: http://www.opensubtitles.org/addons/export_languages.php
f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
f.readline()
for l in f:
idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
if not int(upload_enabled) and not int(web_enabled):
# do not test LANGUAGES that are too esoteric / not widely available
continue
self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
if alpha2:
self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
f.close()
def test_converter_opensubtitles_codes(self):
for code in language_converters['opensubtitles'].from_opensubtitles.keys():
self.assertIn(code, language_converters['opensubtitles'].codes)
def test_fromietf_country_script(self):
language = Language.fromietf('fra-FR-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_country_no_script(self):
language = Language.fromietf('fra-FR')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertIsNone(language.script)
def test_fromietf_no_country_no_script(self):
language = Language.fromietf('fra-FR')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertIsNone(language.script)
def test_fromietf_no_country_script(self):
language = Language.fromietf('fra-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertIsNone(language.country)
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_alpha2_language(self):
language = Language.fromietf('fr-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertIsNone(language.country)
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_wrong_language(self):
self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
def test_fromietf_wrong_country(self):
self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
def test_fromietf_wrong_script(self):
self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
def test_eq(self):
self.assertEqual(Language('eng'), Language('eng'))
def test_ne(self):
self.assertNotEqual(Language('fra'), Language('eng'))
self.assertIsNotNone(Language('fra'))
def test_nonzero(self):
self.assertFalse(bool(Language('und')))
self.assertTrue(bool(Language('eng')))
def test_language_hasattr(self):
self.assertTrue(hasattr(Language('fra'), 'alpha3'))
self.assertTrue(hasattr(Language('fra'), 'alpha2'))
self.assertFalse(hasattr(Language('bej'), 'alpha2'))
def test_country_hasattr(self):
self.assertTrue(hasattr(Country('US'), 'name'))
self.assertTrue(hasattr(Country('FR'), 'alpha2'))
self.assertFalse(hasattr(Country('BE'), 'none'))
def test_country(self):
self.assertEqual(Language('por', 'BR').country, Country('BR'))
self.assertEqual(Language('eng', Country('US')).country, Country('US'))
def test_eq_with_country(self):
self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
def test_ne_with_country(self):
self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
def test_script(self):
self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
def test_eq_with_script(self):
self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
def test_ne_with_script(self):
self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
def test_eq_with_country_and_script(self):
self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
def test_ne_with_country_and_script(self):
self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
def test_hash(self):
self.assertEqual(hash(Language('fra')), hash('fr'))
self.assertEqual(hash(Language('ace')), hash('ace'))
self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
def test_pickle(self):
for lang in [Language('fra'),
Language('eng', 'US'),
Language('srp', script='Latn'),
Language('eng', 'US', 'Latn')]:
self.assertEqual(pickle.loads(pickle.dumps(lang)), lang)
def test_str(self):
self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
def test_register_converter(self):
class TestConverter(LanguageReverseConverter):
def __init__(self):
self.to_test = {'fra': 'test1', 'eng': 'test2'}
self.from_test = {'test1': 'fra', 'test2': 'eng'}
def convert(self, alpha3, country=None, script=None):
if alpha3 not in self.to_test:
raise LanguageConvertError(alpha3, country, script)
return self.to_test[alpha3]
def reverse(self, test):
if test not in self.from_test:
raise LanguageReverseError(test)
return (self.from_test[test], None)
language = Language('fra')
self.assertFalse(hasattr(language, 'test'))
language_converters['test'] = TestConverter()
self.assertTrue(hasattr(language, 'test'))
self.assertIn('test', language_converters)
self.assertEqual(Language('fra').test, 'test1')
self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
del language_converters['test']
self.assertNotIn('test', language_converters)
self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
self.assertRaises(AttributeError, lambda: Language('fra').test)
def suite():
suite = TestSuite()
suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
return suite
if __name__ == '__main__':
TextTestRunner().run(suite())
-43
View File
@@ -1,43 +0,0 @@
Behold, mortal, the origins of Beautiful Soup...
================================================
Leonard Richardson is the primary programmer.
Aaron DeVore is awesome.
Mark Pilgrim provided the encoding detection code that forms the base
of UnicodeDammit.
Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
Soup 4 working under Python 3.
Simon Willison wrote soupselect, which was used to make Beautiful Soup
support CSS selectors.
Sam Ruby helped with a lot of edge cases.
Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his
work in solving the nestable tags conundrum.
An incomplete list of people have contributed patches to Beautiful
Soup:
Istvan Albert, Andrew Lin, Anthony Baxter, Andrew Boyko, Tony Chang,
Zephyr Fang, Fuzzy, Roman Gaufman, Yoni Gilad, Richie Hindle, Peteris
Krumins, Kent Johnson, Ben Last, Robert Leftwich, Staffan Malmgren,
Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon", Ed
Oskiewicz, Greg Phillips, Giles Radford, Arthur Rudolph, Marko
Samastur, Jouni Seppänen, Alexander Schmolck, Andy Theyers, Glyn
Webster, Paul Wright, Danny Yoo
An incomplete list of people who made suggestions or found bugs or
found ways to break Beautiful Soup:
Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel,
Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes,
Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams,
warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison,
Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed
Summers, Dennis Sutch, Chris Smith, Aaron Sweep^W Swartz, Stuart
Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de
Sousa Rocha, Yichun Wei, Per Vognsen
-27
View File
@@ -1,27 +0,0 @@
Beautiful Soup is made available under the MIT license:
Copyright (c) 2004-2015 Leonard Richardson
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Beautiful Soup incorporates code from the html5lib library, which is
also made available under the MIT license. Copyright (c) 2006-2013
James Graham and other contributors
File diff suppressed because it is too large Load Diff
-63
View File
@@ -1,63 +0,0 @@
= Introduction =
>>> from bs4 import BeautifulSoup
>>> soup = BeautifulSoup("<p>Some<b>bad<i>HTML")
>>> print soup.prettify()
<html>
<body>
<p>
Some
<b>
bad
<i>
HTML
</i>
</b>
</p>
</body>
</html>
>>> soup.find(text="bad")
u'bad'
>>> soup.i
<i>HTML</i>
>>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml")
>>> print soup.prettify()
<?xml version="1.0" encoding="utf-8">
<tag1>
Some
<tag2 />
bad
<tag3>
XML
</tag3>
</tag1>
= Full documentation =
The bs4/doc/ directory contains full documentation in Sphinx
format. Run "make html" in that directory to create HTML
documentation.
= Running the unit tests =
Beautiful Soup supports unit test discovery from the project root directory:
$ nosetests
$ python -m unittest discover -s bs4 # Python 2.7 and up
If you checked out the source tree, you should see a script in the
home directory called test-all-versions. This script will run the unit
tests under Python 2.7, then create a temporary Python 3 conversion of
the source and run the unit tests again under Python 3.
= Links =
Homepage: http://www.crummy.com/software/BeautifulSoup/bs4/
Documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
http://readthedocs.org/docs/beautiful-soup-4/
Discussion group: http://groups.google.com/group/beautifulsoup/
Development: https://code.launchpad.net/beautifulsoup/
Bug tracker: https://bugs.launchpad.net/beautifulsoup/
-31
View File
@@ -1,31 +0,0 @@
Additions
---------
More of the jQuery API: nextUntil?
Optimizations
-------------
The html5lib tree builder doesn't use the standard tree-building API,
which worries me and has resulted in a number of bugs.
markup_attr_map can be optimized since it's always a map now.
Upon encountering UTF-16LE data or some other uncommon serialization
of Unicode, UnicodeDammit will convert the data to Unicode, then
encode it at UTF-8. This is wasteful because it will just get decoded
back to Unicode.
CDATA
-----
The elementtree XMLParser has a strip_cdata argument that, when set to
False, should allow Beautiful Soup to preserve CDATA sections instead
of treating them as text. Except it doesn't. (This argument is also
present for HTMLParser, and also does nothing there.)
Currently, htm5lib converts CDATA sections into comments. An
as-yet-unreleased version of html5lib changes the parser's handling of
CDATA sections to allow CDATA sections in tags like <svg> and
<math>. The HTML5TreeBuilder will need to be updated to create CData
objects instead of Comment objects in this situation.
-529
View File
@@ -1,529 +0,0 @@
"""Beautiful Soup
Elixir and Tonic
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
Beautiful Soup uses a pluggable XML or HTML parser to parse a
(possibly invalid) document into a tree representation. Beautiful Soup
provides methods and Pythonic idioms that make it easy to navigate,
search, and modify the parse tree.
Beautiful Soup works with Python 2.7 and up. It works better if lxml
and/or html5lib is installed.
For more than you ever wanted to know about Beautiful Soup, see the
documentation:
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.6.0"
__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
__license__ = "MIT"
__all__ = ['BeautifulSoup']
import os
import re
import traceback
import warnings
from .builder import builder_registry, ParserRejectedMarkup
from .dammit import UnicodeDammit
from .element import (
CData,
Comment,
DEFAULT_OUTPUT_ENCODING,
Declaration,
Doctype,
NavigableString,
PageElement,
ProcessingInstruction,
ResultSet,
SoupStrainer,
Tag,
)
# The very first thing we do is give a useful error if someone is
# running this code under Python 3 without converting it.
'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
class BeautifulSoup(Tag):
"""
This class defines the basic interface called by the tree builders.
These methods will be called by the parser:
reset()
feed(markup)
The tree builder may call these methods from its feed() implementation:
handle_starttag(name, attrs) # See note about return value
handle_endtag(name)
handle_data(data) # Appends to the current data node
endData(containerClass=NavigableString) # Ends the current data node
No matter how complicated the underlying parser is, you should be
able to build a tree using 'start tag' events, 'end tag' events,
'data' events, and "done with data" events.
If you encounter an empty-element tag (aka a self-closing tag,
like HTML's <br> tag), call handle_starttag and then
handle_endtag.
"""
ROOT_TAG_NAME = u'[document]'
# If the end-user gives no indication which tree builder they
# want, look for one with these features.
DEFAULT_BUILDER_FEATURES = ['html', 'fast']
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, exclude_encodings=None,
**kwargs):
"""The Soup object is initialized as the 'root tag', and the
provided markup (which can be a string or a file-like object)
is fed into the underlying parser."""
if 'convertEntities' in kwargs:
warnings.warn(
"BS4 does not respect the convertEntities argument to the "
"BeautifulSoup constructor. Entities are always converted "
"to Unicode characters.")
if 'markupMassage' in kwargs:
del kwargs['markupMassage']
warnings.warn(
"BS4 does not respect the markupMassage argument to the "
"BeautifulSoup constructor. The tree builder is responsible "
"for any necessary markup massage.")
if 'smartQuotesTo' in kwargs:
del kwargs['smartQuotesTo']
warnings.warn(
"BS4 does not respect the smartQuotesTo argument to the "
"BeautifulSoup constructor. Smart quotes are always converted "
"to Unicode characters.")
if 'selfClosingTags' in kwargs:
del kwargs['selfClosingTags']
warnings.warn(
"BS4 does not respect the selfClosingTags argument to the "
"BeautifulSoup constructor. The tree builder is responsible "
"for understanding self-closing tags.")
if 'isHTML' in kwargs:
del kwargs['isHTML']
warnings.warn(
"BS4 does not respect the isHTML argument to the "
"BeautifulSoup constructor. Suggest you use "
"features='lxml' for HTML and features='lxml-xml' for "
"XML.")
def deprecated_argument(old_name, new_name):
if old_name in kwargs:
warnings.warn(
'The "%s" argument to the BeautifulSoup constructor '
'has been renamed to "%s."' % (old_name, new_name))
value = kwargs[old_name]
del kwargs[old_name]
return value
return None
parse_only = parse_only or deprecated_argument(
"parseOnlyThese", "parse_only")
from_encoding = from_encoding or deprecated_argument(
"fromEncoding", "from_encoding")
if from_encoding and isinstance(markup, unicode):
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
from_encoding = None
if len(kwargs) > 0:
arg = kwargs.keys().pop()
raise TypeError(
"__init__() got an unexpected keyword argument '%s'" % arg)
if builder is None:
original_features = features
if isinstance(features, basestring):
features = [features]
if features is None or len(features) == 0:
features = self.DEFAULT_BUILDER_FEATURES
builder_class = builder_registry.lookup(*features)
if builder_class is None:
raise FeatureNotFound(
"Couldn't find a tree builder with the features you "
"requested: %s. Do you need to install a parser library?"
% ",".join(features))
builder = builder_class()
if not (original_features == builder.NAME or
original_features in builder.ALTERNATE_NAMES):
if builder.is_xml:
markup_type = "XML"
else:
markup_type = "HTML"
caller = traceback.extract_stack()[0]
filename = caller[0]
line_number = caller[1]
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
filename=filename,
line_number=line_number,
parser=builder.NAME,
markup_type=markup_type))
self.builder = builder
self.is_xml = builder.is_xml
self.known_xml = self.is_xml
self.builder.soup = self
self.parse_only = parse_only
if hasattr(markup, 'read'): # It's a file-type object.
markup = markup.read()
elif len(markup) <= 256 and (
(isinstance(markup, bytes) and not b'<' in markup)
or (isinstance(markup, unicode) and not u'<' in markup)
):
# Print out warnings for a couple beginner problems
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
# just in case that's what the user really wants.
if (isinstance(markup, unicode)
and not os.path.supports_unicode_filenames):
possible_filename = markup.encode("utf8")
else:
possible_filename = markup
is_file = False
try:
is_file = os.path.exists(possible_filename)
except Exception, e:
# This is almost certainly a problem involving
# characters not valid in filenames on this
# system. Just let it go.
pass
if is_file:
if isinstance(markup, unicode):
markup = markup.encode("utf8")
warnings.warn(
'"%s" looks like a filename, not markup. You should'
' probably open this file and pass the filehandle into'
' Beautiful Soup.' % markup)
self._check_markup_is_url(markup)
for (self.markup, self.original_encoding, self.declared_html_encoding,
self.contains_replacement_characters) in (
self.builder.prepare_markup(
markup, from_encoding, exclude_encodings=exclude_encodings)):
self.reset()
try:
self._feed()
break
except ParserRejectedMarkup:
pass
# Clear out the markup and remove the builder's circular
# reference to this object.
self.markup = None
self.builder.soup = None
def __copy__(self):
copy = type(self)(
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
)
# Although we encoded the tree to UTF-8, that may not have
# been the encoding of the original markup. Set the copy's
# .original_encoding to reflect the original object's
# .original_encoding.
copy.original_encoding = self.original_encoding
return copy
def __getstate__(self):
# Frequently a tree builder can't be pickled.
d = dict(self.__dict__)
if 'builder' in d and not self.builder.picklable:
d['builder'] = None
return d
@staticmethod
def _check_markup_is_url(markup):
"""
Check if markup looks like it's actually a url and raise a warning
if so. Markup can be unicode or str (py2) / bytes (py3).
"""
if isinstance(markup, bytes):
space = b' '
cant_start_with = (b"http:", b"https:")
elif isinstance(markup, unicode):
space = u' '
cant_start_with = (u"http:", u"https:")
else:
return
if any(markup.startswith(prefix) for prefix in cant_start_with):
if not space in markup:
if isinstance(markup, bytes):
decoded_markup = markup.decode('utf-8', 'replace')
else:
decoded_markup = markup
warnings.warn(
'"%s" looks like a URL. Beautiful Soup is not an'
' HTTP client. You should probably use an HTTP client like'
' requests to get the document behind the URL, and feed'
' that document to Beautiful Soup.' % decoded_markup
)
def _feed(self):
# Convert the document to Unicode.
self.builder.reset()
self.builder.feed(self.markup)
# Close out any unfinished strings and close all the open tags.
self.endData()
while self.currentTag.name != self.ROOT_TAG_NAME:
self.popTag()
def reset(self):
Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
self.hidden = 1
self.builder.reset()
self.current_data = []
self.currentTag = None
self.tagStack = []
self.preserve_whitespace_tag_stack = []
self.pushTag(self)
def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
"""Create a new tag associated with this soup."""
return Tag(None, self.builder, name, namespace, nsprefix, attrs)
def new_string(self, s, subclass=NavigableString):
"""Create a new NavigableString associated with this soup."""
return subclass(s)
def insert_before(self, successor):
raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
def insert_after(self, successor):
raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
def popTag(self):
tag = self.tagStack.pop()
if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
self.preserve_whitespace_tag_stack.pop()
#print "Pop", tag.name
if self.tagStack:
self.currentTag = self.tagStack[-1]
return self.currentTag
def pushTag(self, tag):
#print "Push", tag.name
if self.currentTag:
self.currentTag.contents.append(tag)
self.tagStack.append(tag)
self.currentTag = self.tagStack[-1]
if tag.name in self.builder.preserve_whitespace_tags:
self.preserve_whitespace_tag_stack.append(tag)
def endData(self, containerClass=NavigableString):
if self.current_data:
current_data = u''.join(self.current_data)
# If whitespace is not preserved, and this string contains
# nothing but ASCII spaces, replace it with a single space
# or newline.
if not self.preserve_whitespace_tag_stack:
strippable = True
for i in current_data:
if i not in self.ASCII_SPACES:
strippable = False
break
if strippable:
if '\n' in current_data:
current_data = '\n'
else:
current_data = ' '
# Reset the data collector.
self.current_data = []
# Should we add this string to the tree at all?
if self.parse_only and len(self.tagStack) <= 1 and \
(not self.parse_only.text or \
not self.parse_only.search(current_data)):
return
o = containerClass(current_data)
self.object_was_parsed(o)
def object_was_parsed(self, o, parent=None, most_recent_element=None):
"""Add an object to the parse tree."""
parent = parent or self.currentTag
previous_element = most_recent_element or self._most_recent_element
next_element = previous_sibling = next_sibling = None
if isinstance(o, Tag):
next_element = o.next_element
next_sibling = o.next_sibling
previous_sibling = o.previous_sibling
if not previous_element:
previous_element = o.previous_element
o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
self._most_recent_element = o
parent.contents.append(o)
if parent.next_sibling:
# This node is being inserted into an element that has
# already been parsed. Deal with any dangling references.
index = len(parent.contents)-1
while index >= 0:
if parent.contents[index] is o:
break
index -= 1
else:
raise ValueError(
"Error building tree: supposedly %r was inserted "
"into %r after the fact, but I don't see it!" % (
o, parent
)
)
if index == 0:
previous_element = parent
previous_sibling = None
else:
previous_element = previous_sibling = parent.contents[index-1]
if index == len(parent.contents)-1:
next_element = parent.next_sibling
next_sibling = None
else:
next_element = next_sibling = parent.contents[index+1]
o.previous_element = previous_element
if previous_element:
previous_element.next_element = o
o.next_element = next_element
if next_element:
next_element.previous_element = o
o.next_sibling = next_sibling
if next_sibling:
next_sibling.previous_sibling = o
o.previous_sibling = previous_sibling
if previous_sibling:
previous_sibling.next_sibling = o
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
"""Pops the tag stack up to and including the most recent
instance of the given tag. If inclusivePop is false, pops the tag
stack up to but *not* including the most recent instqance of
the given tag."""
#print "Popping to %s" % name
if name == self.ROOT_TAG_NAME:
# The BeautifulSoup object itself can never be popped.
return
most_recently_popped = None
stack_size = len(self.tagStack)
for i in range(stack_size - 1, 0, -1):
t = self.tagStack[i]
if (name == t.name and nsprefix == t.prefix):
if inclusivePop:
most_recently_popped = self.popTag()
break
most_recently_popped = self.popTag()
return most_recently_popped
def handle_starttag(self, name, namespace, nsprefix, attrs):
"""Push a start tag on to the stack.
If this method returns None, the tag was rejected by the
SoupStrainer. You should proceed as if the tag had not occurred
in the document. For instance, if this was a self-closing tag,
don't call handle_endtag.
"""
# print "Start tag %s: %s" % (name, attrs)
self.endData()
if (self.parse_only and len(self.tagStack) <= 1
and (self.parse_only.text
or not self.parse_only.search_tag(name, attrs))):
return None
tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
self.currentTag, self._most_recent_element)
if tag is None:
return tag
if self._most_recent_element:
self._most_recent_element.next_element = tag
self._most_recent_element = tag
self.pushTag(tag)
return tag
def handle_endtag(self, name, nsprefix=None):
#print "End tag: " + name
self.endData()
self._popToTag(name, nsprefix)
def handle_data(self, data):
self.current_data.append(data)
def decode(self, pretty_print=False,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"):
"""Returns a string or Unicode representation of this document.
To get Unicode, pass None for encoding."""
if self.is_xml:
# Print the XML declaration
encoding_part = ''
if eventual_encoding != None:
encoding_part = ' encoding="%s"' % eventual_encoding
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
else:
prefix = u''
if not pretty_print:
indent_level = None
else:
indent_level = 0
return prefix + super(BeautifulSoup, self).decode(
indent_level, eventual_encoding, formatter)
# Alias to make it easier to type import: 'from bs4 import _soup'
_s = BeautifulSoup
_soup = BeautifulSoup
class BeautifulStoneSoup(BeautifulSoup):
"""Deprecated interface to an XML parser."""
def __init__(self, *args, **kwargs):
kwargs['features'] = 'xml'
warnings.warn(
'The BeautifulStoneSoup class is deprecated. Instead of using '
'it, pass features="xml" into the BeautifulSoup constructor.')
super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
class StopParsing(Exception):
pass
class FeatureNotFound(ValueError):
pass
#By default, act as an HTML pretty-printer.
if __name__ == '__main__':
import sys
soup = BeautifulSoup(sys.stdin)
print soup.prettify()
@@ -1,333 +0,0 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from collections import defaultdict
import itertools
import sys
from bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
HTMLAwareEntitySubstitution,
whitespace_re
)
__all__ = [
'HTMLTreeBuilder',
'SAXTreeBuilder',
'TreeBuilder',
'TreeBuilderRegistry',
]
# Some useful features for a TreeBuilder to have.
FAST = 'fast'
PERMISSIVE = 'permissive'
STRICT = 'strict'
XML = 'xml'
HTML = 'html'
HTML_5 = 'html5'
class TreeBuilderRegistry(object):
def __init__(self):
self.builders_for_feature = defaultdict(list)
self.builders = []
def register(self, treebuilder_class):
"""Register a treebuilder based on its advertised features."""
for feature in treebuilder_class.features:
self.builders_for_feature[feature].insert(0, treebuilder_class)
self.builders.insert(0, treebuilder_class)
def lookup(self, *features):
if len(self.builders) == 0:
# There are no builders at all.
return None
if len(features) == 0:
# They didn't ask for any features. Give them the most
# recently registered builder.
return self.builders[0]
# Go down the list of features in order, and eliminate any builders
# that don't match every feature.
features = list(features)
features.reverse()
candidates = None
candidate_set = None
while len(features) > 0:
feature = features.pop()
we_have_the_feature = self.builders_for_feature.get(feature, [])
if len(we_have_the_feature) > 0:
if candidates is None:
candidates = we_have_the_feature
candidate_set = set(candidates)
else:
# Eliminate any candidates that don't have this feature.
candidate_set = candidate_set.intersection(
set(we_have_the_feature))
# The only valid candidates are the ones in candidate_set.
# Go through the original list of candidates and pick the first one
# that's in candidate_set.
if candidate_set is None:
return None
for candidate in candidates:
if candidate in candidate_set:
return candidate
return None
# The BeautifulSoup class will take feature lists from developers and use them
# to look up builders in this registry.
builder_registry = TreeBuilderRegistry()
class TreeBuilder(object):
"""Turn a document into a Beautiful Soup object tree."""
NAME = "[Unknown tree builder]"
ALTERNATE_NAMES = []
features = []
is_xml = False
picklable = False
preserve_whitespace_tags = set()
empty_element_tags = None # A tag will be considered an empty-element
# tag when and only when it has no contents.
# A value for these tag/attribute combinations is a space- or
# comma-separated list of CDATA, rather than a single CDATA.
cdata_list_attributes = {}
def __init__(self):
self.soup = None
def reset(self):
pass
def can_be_empty_element(self, tag_name):
"""Might a tag with this name be an empty-element tag?
The final markup may or may not actually present this tag as
self-closing.
For instance: an HTMLBuilder does not consider a <p> tag to be
an empty-element tag (it's not in
HTMLBuilder.empty_element_tags). This means an empty <p> tag
will be presented as "<p></p>", not "<p />".
The default implementation has no opinion about which tags are
empty-element tags, so a tag will be presented as an
empty-element tag if and only if it has no contents.
"<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
be left alone.
"""
if self.empty_element_tags is None:
return True
return tag_name in self.empty_element_tags
def feed(self, markup):
raise NotImplementedError()
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
return markup, None, None, False
def test_fragment_to_document(self, fragment):
"""Wrap an HTML fragment to make it look like a document.
Different parsers do this differently. For instance, lxml
introduces an empty <head> tag, and html5lib
doesn't. Abstracting this away lets us write simple tests
which run HTML fragments through the parser and compare the
results against other HTML fragments.
This method should not be used outside of tests.
"""
return fragment
def set_up_substitutions(self, tag):
return False
def _replace_cdata_list_attribute_values(self, tag_name, attrs):
"""Replaces class="foo bar" with class=["foo", "bar"]
Modifies its input in place.
"""
if not attrs:
return attrs
if self.cdata_list_attributes:
universal = self.cdata_list_attributes.get('*', [])
tag_specific = self.cdata_list_attributes.get(
tag_name.lower(), None)
for attr in attrs.keys():
if attr in universal or (tag_specific and attr in tag_specific):
# We have a "class"-type attribute whose string
# value is a whitespace-separated list of
# values. Split it into a list.
value = attrs[attr]
if isinstance(value, basestring):
values = whitespace_re.split(value)
else:
# html5lib sometimes calls setAttributes twice
# for the same tag when rearranging the parse
# tree. On the second call the attribute value
# here is already a list. If this happens,
# leave the value alone rather than trying to
# split it again.
values = value
attrs[attr] = values
return attrs
class SAXTreeBuilder(TreeBuilder):
"""A Beautiful Soup treebuilder that listens for SAX events."""
def feed(self, markup):
raise NotImplementedError()
def close(self):
pass
def startElement(self, name, attrs):
attrs = dict((key[1], value) for key, value in list(attrs.items()))
#print "Start %s, %r" % (name, attrs)
self.soup.handle_starttag(name, attrs)
def endElement(self, name):
#print "End %s" % name
self.soup.handle_endtag(name)
def startElementNS(self, nsTuple, nodeName, attrs):
# Throw away (ns, nodeName) for now.
self.startElement(nodeName, attrs)
def endElementNS(self, nsTuple, nodeName):
# Throw away (ns, nodeName) for now.
self.endElement(nodeName)
#handler.endElementNS((ns, node.nodeName), node.nodeName)
def startPrefixMapping(self, prefix, nodeValue):
# Ignore the prefix for now.
pass
def endPrefixMapping(self, prefix):
# Ignore the prefix for now.
# handler.endPrefixMapping(prefix)
pass
def characters(self, content):
self.soup.handle_data(content)
def startDocument(self):
pass
def endDocument(self):
pass
class HTMLTreeBuilder(TreeBuilder):
"""This TreeBuilder knows facts about HTML.
Such as which tags are empty-element tags.
"""
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
empty_element_tags = set([
# These are from HTML5.
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
# These are from HTML4, removed in HTML5.
'spacer', 'frame'
])
# The HTML standard defines these attributes as containing a
# space-separated list of values, not a single value. That is,
# class="foo bar" means that the 'class' attribute has two values,
# 'foo' and 'bar', not the single value 'foo bar'. When we
# encounter one of these attributes, we will parse its value into
# a list of values if possible. Upon output, the list will be
# converted back into a string.
cdata_list_attributes = {
"*" : ['class', 'accesskey', 'dropzone'],
"a" : ['rel', 'rev'],
"link" : ['rel', 'rev'],
"td" : ["headers"],
"th" : ["headers"],
"td" : ["headers"],
"form" : ["accept-charset"],
"object" : ["archive"],
# These are HTML5 specific, as are *.accesskey and *.dropzone above.
"area" : ["rel"],
"icon" : ["sizes"],
"iframe" : ["sandbox"],
"output" : ["for"],
}
def set_up_substitutions(self, tag):
# We are only interested in <meta> tags
if tag.name != 'meta':
return False
http_equiv = tag.get('http-equiv')
content = tag.get('content')
charset = tag.get('charset')
# We are interested in <meta> tags that say what encoding the
# document was originally in. This means HTML 5-style <meta>
# tags that provide the "charset" attribute. It also means
# HTML 4-style <meta> tags that provide the "content"
# attribute and have "http-equiv" set to "content-type".
#
# In both cases we will replace the value of the appropriate
# attribute with a standin object that can take on any
# encoding.
meta_encoding = None
if charset is not None:
# HTML 5 style:
# <meta charset="utf8">
meta_encoding = charset
tag['charset'] = CharsetMetaAttributeValue(charset)
elif (content is not None and http_equiv is not None
and http_equiv.lower() == 'content-type'):
# HTML 4 style:
# <meta http-equiv="content-type" content="text/html; charset=utf8">
tag['content'] = ContentMetaAttributeValue(content)
return (meta_encoding is not None)
def register_treebuilders_from(module):
"""Copy TreeBuilders from the given module into this module."""
# I'm fairly sure this is not the best way to do this.
this_module = sys.modules['bs4.builder']
for name in module.__all__:
obj = getattr(module, name)
if issubclass(obj, TreeBuilder):
setattr(this_module, name, obj)
this_module.__all__.append(name)
# Register the builder while we're at it.
this_module.builder_registry.register(obj)
class ParserRejectedMarkup(Exception):
pass
# Builders are registered in reverse order of priority, so that custom
# builder registrations will take precedence. In general, we want lxml
# to take precedence over html5lib, because it's faster. And we only
# want to use HTMLParser as a last result.
from . import _htmlparser
register_treebuilders_from(_htmlparser)
try:
from . import _html5lib
register_treebuilders_from(_html5lib)
except ImportError:
# They don't have html5lib installed.
pass
try:
from . import _lxml
register_treebuilders_from(_lxml)
except ImportError:
# They don't have lxml installed.
pass
@@ -1,426 +0,0 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__all__ = [
'HTML5TreeBuilder',
]
import warnings
import re
from bs4.builder import (
PERMISSIVE,
HTML,
HTML_5,
HTMLTreeBuilder,
)
from bs4.element import (
NamespacedAttribute,
whitespace_re,
)
import html5lib
from html5lib.constants import (
namespaces,
prefixes,
)
from bs4.element import (
Comment,
Doctype,
NavigableString,
Tag,
)
try:
# Pre-0.99999999
from html5lib.treebuilders import _base as treebuilder_base
new_html5lib = False
except ImportError, e:
# 0.99999999 and up
from html5lib.treebuilders import base as treebuilder_base
new_html5lib = True
class HTML5TreeBuilder(HTMLTreeBuilder):
"""Use html5lib to build a tree."""
NAME = "html5lib"
features = [NAME, PERMISSIVE, HTML_5, HTML]
def prepare_markup(self, markup, user_specified_encoding,
document_declared_encoding=None, exclude_encodings=None):
# Store the user-specified encoding for use later on.
self.user_specified_encoding = user_specified_encoding
# document_declared_encoding and exclude_encodings aren't used
# ATM because the html5lib TreeBuilder doesn't use
# UnicodeDammit.
if exclude_encodings:
warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
yield (markup, None, None, False)
# These methods are defined by Beautiful Soup.
def feed(self, markup):
if self.soup.parse_only is not None:
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
extra_kwargs = dict()
if not isinstance(markup, unicode):
if new_html5lib:
extra_kwargs['override_encoding'] = self.user_specified_encoding
else:
extra_kwargs['encoding'] = self.user_specified_encoding
doc = parser.parse(markup, **extra_kwargs)
# Set the character encoding detected by the tokenizer.
if isinstance(markup, unicode):
# We need to special-case this because html5lib sets
# charEncoding to UTF-8 if it gets Unicode input.
doc.original_encoding = None
else:
original_encoding = parser.tokenizer.stream.charEncoding[0]
if not isinstance(original_encoding, basestring):
# In 0.99999999 and up, the encoding is an html5lib
# Encoding object. We want to use a string for compatibility
# with other tree builders.
original_encoding = original_encoding.name
doc.original_encoding = original_encoding
def create_treebuilder(self, namespaceHTMLElements):
self.underlying_builder = TreeBuilderForHtml5lib(
namespaceHTMLElements, self.soup)
return self.underlying_builder
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<html><head></head><body>%s</body></html>' % fragment
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
def __init__(self, namespaceHTMLElements, soup=None):
if soup:
self.soup = soup
else:
from bs4 import BeautifulSoup
self.soup = BeautifulSoup("", "html.parser")
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
def documentClass(self):
self.soup.reset()
return Element(self.soup, self.soup, None)
def insertDoctype(self, token):
name = token["name"]
publicId = token["publicId"]
systemId = token["systemId"]
doctype = Doctype.for_name_and_ids(name, publicId, systemId)
self.soup.object_was_parsed(doctype)
def elementClass(self, name, namespace):
tag = self.soup.new_tag(name, namespace)
return Element(tag, self.soup, namespace)
def commentClass(self, data):
return TextNode(Comment(data), self.soup)
def fragmentClass(self):
from bs4 import BeautifulSoup
self.soup = BeautifulSoup("", "html.parser")
self.soup.name = "[document_fragment]"
return Element(self.soup, self.soup, None)
def appendChild(self, node):
# XXX This code is not covered by the BS4 tests.
self.soup.append(node.element)
def getDocument(self):
return self.soup
def getFragment(self):
return treebuilder_base.TreeBuilder.getFragment(self).element
def testSerializer(self, element):
from bs4 import BeautifulSoup
rv = []
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
def serializeElement(element, indent=0):
if isinstance(element, BeautifulSoup):
pass
if isinstance(element, Doctype):
m = doctype_re.match(element)
if m:
name = m.group(1)
if m.lastindex > 1:
publicId = m.group(2) or ""
systemId = m.group(3) or m.group(4) or ""
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
(' ' * indent, name, publicId, systemId))
else:
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name))
else:
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
elif isinstance(element, Comment):
rv.append("|%s<!-- %s -->" % (' ' * indent, element))
elif isinstance(element, NavigableString):
rv.append("|%s\"%s\"" % (' ' * indent, element))
else:
if element.namespace:
name = "%s %s" % (prefixes[element.namespace],
element.name)
else:
name = element.name
rv.append("|%s<%s>" % (' ' * indent, name))
if element.attrs:
attributes = []
for name, value in element.attrs.items():
if isinstance(name, NamespacedAttribute):
name = "%s %s" % (prefixes[name.namespace], name.name)
if isinstance(value, list):
value = " ".join(value)
attributes.append((name, value))
for name, value in sorted(attributes):
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
indent += 2
for child in element.children:
serializeElement(child, indent)
serializeElement(element, 0)
return "\n".join(rv)
class AttrList(object):
def __init__(self, element):
self.element = element
self.attrs = dict(self.element.attrs)
def __iter__(self):
return list(self.attrs.items()).__iter__()
def __setitem__(self, name, value):
# If this attribute is a multi-valued attribute for this element,
# turn its value into a list.
list_attr = HTML5TreeBuilder.cdata_list_attributes
if (name in list_attr['*']
or (self.element.name in list_attr
and name in list_attr[self.element.name])):
# A node that is being cloned may have already undergone
# this procedure.
if not isinstance(value, list):
value = whitespace_re.split(value)
self.element[name] = value
def items(self):
return list(self.attrs.items())
def keys(self):
return list(self.attrs.keys())
def __len__(self):
return len(self.attrs)
def __getitem__(self, name):
return self.attrs[name]
def __contains__(self, name):
return name in list(self.attrs.keys())
class Element(treebuilder_base.Node):
def __init__(self, element, soup, namespace):
treebuilder_base.Node.__init__(self, element.name)
self.element = element
self.soup = soup
self.namespace = namespace
def appendChild(self, node):
string_child = child = None
if isinstance(node, basestring):
# Some other piece of code decided to pass in a string
# instead of creating a TextElement object to contain the
# string.
string_child = child = node
elif isinstance(node, Tag):
# Some other piece of code decided to pass in a Tag
# instead of creating an Element object to contain the
# Tag.
child = node
elif node.element.__class__ == NavigableString:
string_child = child = node.element
node.parent = self
else:
child = node.element
node.parent = self
if not isinstance(child, basestring) and child.parent is not None:
node.element.extract()
if (string_child and self.element.contents
and self.element.contents[-1].__class__ == NavigableString):
# We are appending a string onto another string.
# TODO This has O(n^2) performance, for input like
# "a</a>a</a>a</a>..."
old_element = self.element.contents[-1]
new_element = self.soup.new_string(old_element + string_child)
old_element.replace_with(new_element)
self.soup._most_recent_element = new_element
else:
if isinstance(node, basestring):
# Create a brand new NavigableString from this string.
child = self.soup.new_string(node)
# Tell Beautiful Soup to act as if it parsed this element
# immediately after the parent's last descendant. (Or
# immediately after the parent, if it has no children.)
if self.element.contents:
most_recent_element = self.element._last_descendant(False)
elif self.element.next_element is not None:
# Something from further ahead in the parse tree is
# being inserted into this earlier element. This is
# very annoying because it means an expensive search
# for the last element in the tree.
most_recent_element = self.soup._last_descendant()
else:
most_recent_element = self.element
self.soup.object_was_parsed(
child, parent=self.element,
most_recent_element=most_recent_element)
def getAttributes(self):
if isinstance(self.element, Comment):
return {}
return AttrList(self.element)
def setAttributes(self, attributes):
if attributes is not None and len(attributes) > 0:
converted_attributes = []
for name, value in list(attributes.items()):
if isinstance(name, tuple):
new_name = NamespacedAttribute(*name)
del attributes[name]
attributes[new_name] = value
self.soup.builder._replace_cdata_list_attribute_values(
self.name, attributes)
for name, value in attributes.items():
self.element[name] = value
# The attributes may contain variables that need substitution.
# Call set_up_substitutions manually.
#
# The Tag constructor called this method when the Tag was created,
# but we just set/changed the attributes, so call it again.
self.soup.builder.set_up_substitutions(self.element)
attributes = property(getAttributes, setAttributes)
def insertText(self, data, insertBefore=None):
text = TextNode(self.soup.new_string(data), self.soup)
if insertBefore:
self.insertBefore(text, insertBefore)
else:
self.appendChild(text)
def insertBefore(self, node, refNode):
index = self.element.index(refNode.element)
if (node.element.__class__ == NavigableString and self.element.contents
and self.element.contents[index-1].__class__ == NavigableString):
# (See comments in appendChild)
old_node = self.element.contents[index-1]
new_str = self.soup.new_string(old_node + node.element)
old_node.replace_with(new_str)
else:
self.element.insert(index, node.element)
node.parent = self
def removeChild(self, node):
node.element.extract()
def reparentChildren(self, new_parent):
"""Move all of this tag's children into another tag."""
# print "MOVE", self.element.contents
# print "FROM", self.element
# print "TO", new_parent.element
element = self.element
new_parent_element = new_parent.element
# Determine what this tag's next_element will be once all the children
# are removed.
final_next_element = element.next_sibling
new_parents_last_descendant = new_parent_element._last_descendant(False, False)
if len(new_parent_element.contents) > 0:
# The new parent already contains children. We will be
# appending this tag's children to the end.
new_parents_last_child = new_parent_element.contents[-1]
new_parents_last_descendant_next_element = new_parents_last_descendant.next_element
else:
# The new parent contains no children.
new_parents_last_child = None
new_parents_last_descendant_next_element = new_parent_element.next_element
to_append = element.contents
if len(to_append) > 0:
# Set the first child's previous_element and previous_sibling
# to elements within the new parent
first_child = to_append[0]
if new_parents_last_descendant:
first_child.previous_element = new_parents_last_descendant
else:
first_child.previous_element = new_parent_element
first_child.previous_sibling = new_parents_last_child
if new_parents_last_descendant:
new_parents_last_descendant.next_element = first_child
else:
new_parent_element.next_element = first_child
if new_parents_last_child:
new_parents_last_child.next_sibling = first_child
# Find the very last element being moved. It is now the
# parent's last descendant. It has no .next_sibling and
# its .next_element is whatever the previous last
# descendant had.
last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
if new_parents_last_descendant_next_element:
# TODO: This code has no test coverage and I'm not sure
# how to get html5lib to go through this path, but it's
# just the other side of the previous line.
new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
last_childs_last_descendant.next_sibling = None
for child in to_append:
child.parent = new_parent_element
new_parent_element.contents.append(child)
# Now that this element has no children, change its .next_element.
element.contents = []
element.next_element = final_next_element
# print "DONE WITH MOVE"
# print "FROM", self.element
# print "TO", new_parent_element
def cloneNode(self):
tag = self.soup.new_tag(self.element.name, self.namespace)
node = Element(tag, self.soup, self.namespace)
for key,value in self.attributes:
node.attributes[key] = value
return node
def hasContent(self):
return self.element.contents
def getNameTuple(self):
if self.namespace == None:
return namespaces["html"], self.name
else:
return self.namespace, self.name
nameTuple = property(getNameTuple)
class TextNode(Element):
def __init__(self, element, soup):
treebuilder_base.Node.__init__(self, None)
self.element = element
self.soup = soup
def cloneNode(self):
raise NotImplementedError
@@ -1,314 +0,0 @@
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__all__ = [
'HTMLParserTreeBuilder',
]
from HTMLParser import HTMLParser
try:
from HTMLParser import HTMLParseError
except ImportError, e:
# HTMLParseError is removed in Python 3.5. Since it can never be
# thrown in 3.5, we can just define our own class as a placeholder.
class HTMLParseError(Exception):
pass
import sys
import warnings
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
# argument, which we'd like to set to False. Unfortunately,
# http://bugs.python.org/issue13273 makes strict=True a better bet
# before Python 3.2.3.
#
# At the end of this file, we monkeypatch HTMLParser so that
# strict=True works well on Python 3.2.2.
major, minor, release = sys.version_info[:3]
CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
from bs4.element import (
CData,
Comment,
Declaration,
Doctype,
ProcessingInstruction,
)
from bs4.dammit import EntitySubstitution, UnicodeDammit
from bs4.builder import (
HTML,
HTMLTreeBuilder,
STRICT,
)
HTMLPARSER = 'html.parser'
class BeautifulSoupHTMLParser(HTMLParser):
def __init__(self, *args, **kwargs):
HTMLParser.__init__(self, *args, **kwargs)
# Keep a list of empty-element tags that were encountered
# without an explicit closing tag. If we encounter a closing tag
# of this type, we'll associate it with one of those entries.
#
# This isn't a stack because we don't care about the
# order. It's a list of closing tags we've already handled and
# will ignore, assuming they ever show up.
self.already_closed_empty_element = []
def handle_startendtag(self, name, attrs):
# This is only called when the markup looks like
# <tag/>.
# is_startend() tells handle_starttag not to close the tag
# just because its name matches a known empty-element tag. We
# know that this is an empty-element tag and we want to call
# handle_endtag ourselves.
tag = self.handle_starttag(name, attrs, handle_empty_element=False)
self.handle_endtag(name)
def handle_starttag(self, name, attrs, handle_empty_element=True):
# XXX namespace
attr_dict = {}
for key, value in attrs:
# Change None attribute values to the empty string
# for consistency with the other tree builders.
if value is None:
value = ''
attr_dict[key] = value
attrvalue = '""'
#print "START", name
tag = self.soup.handle_starttag(name, None, None, attr_dict)
if tag and tag.is_empty_element and handle_empty_element:
# Unlike other parsers, html.parser doesn't send separate end tag
# events for empty-element tags. (It's handled in
# handle_startendtag, but only if the original markup looked like
# <tag/>.)
#
# So we need to call handle_endtag() ourselves. Since we
# know the start event is identical to the end event, we
# don't want handle_endtag() to cross off any previous end
# events for tags of this name.
self.handle_endtag(name, check_already_closed=False)
# But we might encounter an explicit closing tag for this tag
# later on. If so, we want to ignore it.
self.already_closed_empty_element.append(name)
def handle_endtag(self, name, check_already_closed=True):
#print "END", name
if check_already_closed and name in self.already_closed_empty_element:
# This is a redundant end tag for an empty-element tag.
# We've already called handle_endtag() for it, so just
# check it off the list.
# print "ALREADY CLOSED", name
self.already_closed_empty_element.remove(name)
else:
self.soup.handle_endtag(name)
def handle_data(self, data):
self.soup.handle_data(data)
def handle_charref(self, name):
# XXX workaround for a bug in HTMLParser. Remove this once
# it's fixed in all supported versions.
# http://bugs.python.org/issue13633
if name.startswith('x'):
real_name = int(name.lstrip('x'), 16)
elif name.startswith('X'):
real_name = int(name.lstrip('X'), 16)
else:
real_name = int(name)
try:
data = unichr(real_name)
except (ValueError, OverflowError), e:
data = u"\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
def handle_entityref(self, name):
character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
if character is not None:
data = character
else:
data = "&%s;" % name
self.handle_data(data)
def handle_comment(self, data):
self.soup.endData()
self.soup.handle_data(data)
self.soup.endData(Comment)
def handle_decl(self, data):
self.soup.endData()
if data.startswith("DOCTYPE "):
data = data[len("DOCTYPE "):]
elif data == 'DOCTYPE':
# i.e. "<!DOCTYPE>"
data = ''
self.soup.handle_data(data)
self.soup.endData(Doctype)
def unknown_decl(self, data):
if data.upper().startswith('CDATA['):
cls = CData
data = data[len('CDATA['):]
else:
cls = Declaration
self.soup.endData()
self.soup.handle_data(data)
self.soup.endData(cls)
def handle_pi(self, data):
self.soup.endData()
self.soup.handle_data(data)
self.soup.endData(ProcessingInstruction)
class HTMLParserTreeBuilder(HTMLTreeBuilder):
is_xml = False
picklable = True
NAME = HTMLPARSER
features = [NAME, HTML, STRICT]
def __init__(self, *args, **kwargs):
if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
kwargs['strict'] = False
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
kwargs['convert_charrefs'] = False
self.parser_args = (args, kwargs)
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None, exclude_encodings=None):
"""
:return: A 4-tuple (markup, original encoding, encoding
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, unicode):
yield (markup, None, None, False)
return
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True,
exclude_encodings=exclude_encodings)
yield (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
def feed(self, markup):
args, kwargs = self.parser_args
parser = BeautifulSoupHTMLParser(*args, **kwargs)
parser.soup = self.soup
try:
parser.feed(markup)
except HTMLParseError, e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
parser.already_closed_empty_element = []
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
# string.
#
# XXX This code can be removed once most Python 3 users are on 3.2.3.
if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
import re
attrfind_tolerant = re.compile(
r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
(?:\s+ # whitespace before attribute name
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
(?:\s*=\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|\"[^\"]*\" # LIT-enclosed value
|[^'\">\s]+ # bare value
)
)?
)
)*
\s* # trailing whitespace
""", re.VERBOSE)
BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
from html.parser import tagfind, attrfind
def parse_starttag(self, i):
self.__starttag_text = None
endpos = self.check_for_whole_start_tag(i)
if endpos < 0:
return endpos
rawdata = self.rawdata
self.__starttag_text = rawdata[i:endpos]
# Now parse the data between i+1 and j into a tag and attrs
attrs = []
match = tagfind.match(rawdata, i+1)
assert match, 'unexpected call to parse_starttag()'
k = match.end()
self.lasttag = tag = rawdata[i+1:k].lower()
while k < endpos:
if self.strict:
m = attrfind.match(rawdata, k)
else:
m = attrfind_tolerant.match(rawdata, k)
if not m:
break
attrname, rest, attrvalue = m.group(1, 2, 3)
if not rest:
attrvalue = None
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
attrvalue[:1] == '"' == attrvalue[-1:]:
attrvalue = attrvalue[1:-1]
if attrvalue:
attrvalue = self.unescape(attrvalue)
attrs.append((attrname.lower(), attrvalue))
k = m.end()
end = rawdata[k:endpos].strip()
if end not in (">", "/>"):
lineno, offset = self.getpos()
if "\n" in self.__starttag_text:
lineno = lineno + self.__starttag_text.count("\n")
offset = len(self.__starttag_text) \
- self.__starttag_text.rfind("\n")
else:
offset = offset + len(self.__starttag_text)
if self.strict:
self.error("junk characters in start tag: %r"
% (rawdata[k:endpos][:20],))
self.handle_data(rawdata[i:endpos])
return endpos
if end.endswith('/>'):
# XHTML-style empty tag: <span attr="value" />
self.handle_startendtag(tag, attrs)
else:
self.handle_starttag(tag, attrs)
if tag in self.CDATA_CONTENT_ELEMENTS:
self.set_cdata_mode(tag)
return endpos
def set_cdata_mode(self, elem):
self.cdata_elem = elem.lower()
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
BeautifulSoupHTMLParser.parse_starttag = parse_starttag
BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
CONSTRUCTOR_TAKES_STRICT = True
@@ -1,258 +0,0 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__all__ = [
'LXMLTreeBuilderForXML',
'LXMLTreeBuilder',
]
from io import BytesIO
from StringIO import StringIO
import collections
from lxml import etree
from bs4.element import (
Comment,
Doctype,
NamespacedAttribute,
ProcessingInstruction,
XMLProcessingInstruction,
)
from bs4.builder import (
FAST,
HTML,
HTMLTreeBuilder,
PERMISSIVE,
ParserRejectedMarkup,
TreeBuilder,
XML)
from bs4.dammit import EncodingDetector
LXML = 'lxml'
class LXMLTreeBuilderForXML(TreeBuilder):
DEFAULT_PARSER_CLASS = etree.XMLParser
is_xml = True
processing_instruction_class = XMLProcessingInstruction
NAME = "lxml-xml"
ALTERNATE_NAMES = ["xml"]
# Well, it's permissive by XML parser standards.
features = [NAME, LXML, XML, FAST, PERMISSIVE]
CHUNK_SIZE = 512
# This namespace mapping is specified in the XML Namespace
# standard.
DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
def default_parser(self, encoding):
# This can either return a parser object or a class, which
# will be instantiated with default arguments.
if self._default_parser is not None:
return self._default_parser
return etree.XMLParser(
target=self, strip_cdata=False, recover=True, encoding=encoding)
def parser_for(self, encoding):
# Use the default parser.
parser = self.default_parser(encoding)
if isinstance(parser, collections.Callable):
# Instantiate the parser with default arguments
parser = parser(target=self, strip_cdata=False, encoding=encoding)
return parser
def __init__(self, parser=None, empty_element_tags=None):
# TODO: Issue a warning if parser is present but not a
# callable, since that means there's no way to create new
# parsers for different encodings.
self._default_parser = parser
if empty_element_tags is not None:
self.empty_element_tags = set(empty_element_tags)
self.soup = None
self.nsmaps = [self.DEFAULT_NSMAPS]
def _getNsTag(self, tag):
# Split the namespace URL out of a fully-qualified lxml tag
# name. Copied from lxml's src/lxml/sax.py.
if tag[0] == '{':
return tuple(tag[1:].split('}', 1))
else:
return (None, tag)
def prepare_markup(self, markup, user_specified_encoding=None,
exclude_encodings=None,
document_declared_encoding=None):
"""
:yield: A series of 4-tuples.
(markup, encoding, declared encoding,
has undergone character replacement)
Each 4-tuple represents a strategy for parsing the document.
"""
# Instead of using UnicodeDammit to convert the bytestring to
# Unicode using different encodings, use EncodingDetector to
# iterate over the encodings, and tell lxml to try to parse
# the document as each one in turn.
is_html = not self.is_xml
if is_html:
self.processing_instruction_class = ProcessingInstruction
else:
self.processing_instruction_class = XMLProcessingInstruction
if isinstance(markup, unicode):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
yield markup, None, document_declared_encoding, False
if isinstance(markup, unicode):
# No, apparently not. Convert the Unicode to UTF-8 and
# tell lxml to parse it as UTF-8.
yield (markup.encode("utf8"), "utf8",
document_declared_encoding, False)
try_encodings = [user_specified_encoding, document_declared_encoding]
detector = EncodingDetector(
markup, try_encodings, is_html, exclude_encodings)
for encoding in detector.encodings:
yield (detector.markup, encoding, document_declared_encoding, False)
def feed(self, markup):
if isinstance(markup, bytes):
markup = BytesIO(markup)
elif isinstance(markup, unicode):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
# or the parser won't be initialized.
data = markup.read(self.CHUNK_SIZE)
try:
self.parser = self.parser_for(self.soup.original_encoding)
self.parser.feed(data)
while len(data) != 0:
# Now call feed() on the rest of the data, chunk by chunk.
data = markup.read(self.CHUNK_SIZE)
if len(data) != 0:
self.parser.feed(data)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
raise ParserRejectedMarkup(str(e))
def close(self):
self.nsmaps = [self.DEFAULT_NSMAPS]
def start(self, name, attrs, nsmap={}):
# Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
attrs = dict(attrs)
nsprefix = None
# Invert each namespace map as it comes in.
if len(self.nsmaps) > 1:
# There are no new namespaces for this tag, but
# non-default namespaces are in play, so we need a
# separate tag stack to know when they end.
self.nsmaps.append(None)
elif len(nsmap) > 0:
# A new namespace mapping has come into play.
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
self.nsmaps.append(inverted_nsmap)
# Also treat the namespace mapping as a set of attributes on the
# tag, so we can recreate it later.
attrs = attrs.copy()
for prefix, namespace in nsmap.items():
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
# Namespaces are in play. Find any attributes that came in
# from lxml with namespaces attached to their names, and
# turn then into NamespacedAttribute objects.
new_attrs = {}
for attr, value in attrs.items():
namespace, attr = self._getNsTag(attr)
if namespace is None:
new_attrs[attr] = value
else:
nsprefix = self._prefix_for_namespace(namespace)
attr = NamespacedAttribute(nsprefix, attr, namespace)
new_attrs[attr] = value
attrs = new_attrs
namespace, name = self._getNsTag(name)
nsprefix = self._prefix_for_namespace(namespace)
self.soup.handle_starttag(name, namespace, nsprefix, attrs)
def _prefix_for_namespace(self, namespace):
"""Find the currently active prefix for the given namespace."""
if namespace is None:
return None
for inverted_nsmap in reversed(self.nsmaps):
if inverted_nsmap is not None and namespace in inverted_nsmap:
return inverted_nsmap[namespace]
return None
def end(self, name):
self.soup.endData()
completed_tag = self.soup.tagStack[-1]
namespace, name = self._getNsTag(name)
nsprefix = None
if namespace is not None:
for inverted_nsmap in reversed(self.nsmaps):
if inverted_nsmap is not None and namespace in inverted_nsmap:
nsprefix = inverted_nsmap[namespace]
break
self.soup.handle_endtag(name, nsprefix)
if len(self.nsmaps) > 1:
# This tag, or one of its parents, introduced a namespace
# mapping, so pop it off the stack.
self.nsmaps.pop()
def pi(self, target, data):
self.soup.endData()
self.soup.handle_data(target + ' ' + data)
self.soup.endData(self.processing_instruction_class)
def data(self, content):
self.soup.handle_data(content)
def doctype(self, name, pubid, system):
self.soup.endData()
doctype = Doctype.for_name_and_ids(name, pubid, system)
self.soup.object_was_parsed(doctype)
def comment(self, content):
"Handle comments as Comment objects."
self.soup.endData()
self.soup.handle_data(content)
self.soup.endData(Comment)
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
NAME = LXML
ALTERNATE_NAMES = ["lxml-html"]
features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
is_xml = False
processing_instruction_class = ProcessingInstruction
def default_parser(self, encoding):
return etree.HTMLParser
def feed(self, markup):
encoding = self.soup.original_encoding
try:
self.parser = self.parser_for(encoding)
self.parser.feed(markup)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
raise ParserRejectedMarkup(str(e))
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<html><body>%s</body></html>' % fragment
-842
View File
@@ -1,842 +0,0 @@
# -*- coding: utf-8 -*-
"""Beautiful Soup bonus library: Unicode, Dammit
This library converts a bytestream to Unicode through any means
necessary. It is heavily based on code from Mark Pilgrim's Universal
Feed Parser. It works best on XML and HTML, but it does not rewrite the
XML or HTML to reflect a new encoding; that's the tree builder's job.
"""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__license__ = "MIT"
import codecs
from htmlentitydefs import codepoint2name
import re
import logging
import string
# Import a library to autodetect character encodings.
chardet_type = None
try:
# First try the fast C implementation.
# PyPI package: cchardet
import cchardet
def chardet_dammit(s):
return cchardet.detect(s)['encoding']
except ImportError:
try:
# Fall back to the pure Python implementation
# Debian package: python-chardet
# PyPI package: chardet
import chardet
def chardet_dammit(s):
return chardet.detect(s)['encoding']
#import chardet.constants
#chardet.constants._debug = 1
except ImportError:
# No chardet available.
def chardet_dammit(s):
return None
# Available from http://cjkpython.i18n.org/.
try:
import iconv_codec
except ImportError:
pass
xml_encoding_re = re.compile(
'^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
html_meta_re = re.compile(
'<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
class EntitySubstitution(object):
"""Substitute XML or HTML entities for the corresponding characters."""
def _populate_class_variables():
lookup = {}
reverse_lookup = {}
characters_for_re = []
for codepoint, name in list(codepoint2name.items()):
character = unichr(codepoint)
if codepoint != 34:
# There's no point in turning the quotation mark into
# &quot;, unless it happens within an attribute value, which
# is handled elsewhere.
characters_for_re.append(character)
lookup[character] = name
# But we do want to turn &quot; into the quotation mark.
reverse_lookup[name] = character
re_definition = "[%s]" % "".join(characters_for_re)
return lookup, reverse_lookup, re.compile(re_definition)
(CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
CHARACTER_TO_XML_ENTITY = {
"'": "apos",
'"': "quot",
"&": "amp",
"<": "lt",
">": "gt",
}
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
")")
AMPERSAND_OR_BRACKET = re.compile("([<>&])")
@classmethod
def _substitute_html_entity(cls, matchobj):
entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
return "&%s;" % entity
@classmethod
def _substitute_xml_entity(cls, matchobj):
"""Used with a regular expression to substitute the
appropriate XML entity for an XML special character."""
entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
return "&%s;" % entity
@classmethod
def quoted_attribute_value(self, value):
"""Make a value into a quoted XML attribute, possibly escaping it.
Most strings will be quoted using double quotes.
Bob's Bar -> "Bob's Bar"
If a string contains double quotes, it will be quoted using
single quotes.
Welcome to "my bar" -> 'Welcome to "my bar"'
If a string contains both single and double quotes, the
double quotes will be escaped, and the string will be quoted
using double quotes.
Welcome to "Bob's Bar" -> "Welcome to &quot;Bob's bar&quot;
"""
quote_with = '"'
if '"' in value:
if "'" in value:
# The string contains both single and double
# quotes. Turn the double quotes into
# entities. We quote the double quotes rather than
# the single quotes because the entity name is
# "&quot;" whether this is HTML or XML. If we
# quoted the single quotes, we'd have to decide
# between &apos; and &squot;.
replace_with = "&quot;"
value = value.replace('"', replace_with)
else:
# There are double quotes but no single quotes.
# We can use single quotes to quote the attribute.
quote_with = "'"
return quote_with + value + quote_with
@classmethod
def substitute_xml(cls, value, make_quoted_attribute=False):
"""Substitute XML entities for special XML characters.
:param value: A string to be substituted. The less-than sign
will become &lt;, the greater-than sign will become &gt;,
and any ampersands will become &amp;. If you want ampersands
that appear to be part of an entity definition to be left
alone, use substitute_xml_containing_entities() instead.
:param make_quoted_attribute: If True, then the string will be
quoted, as befits an attribute value.
"""
# Escape angle brackets and ampersands.
value = cls.AMPERSAND_OR_BRACKET.sub(
cls._substitute_xml_entity, value)
if make_quoted_attribute:
value = cls.quoted_attribute_value(value)
return value
@classmethod
def substitute_xml_containing_entities(
cls, value, make_quoted_attribute=False):
"""Substitute XML entities for special XML characters.
:param value: A string to be substituted. The less-than sign will
become &lt;, the greater-than sign will become &gt;, and any
ampersands that are not part of an entity defition will
become &amp;.
:param make_quoted_attribute: If True, then the string will be
quoted, as befits an attribute value.
"""
# Escape angle brackets, and ampersands that aren't part of
# entities.
value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
cls._substitute_xml_entity, value)
if make_quoted_attribute:
value = cls.quoted_attribute_value(value)
return value
@classmethod
def substitute_html(cls, s):
"""Replace certain Unicode characters with named HTML entities.
This differs from data.encode(encoding, 'xmlcharrefreplace')
in that the goal is to make the result more readable (to those
with ASCII displays) rather than to recover from
errors. There's absolutely nothing wrong with a UTF-8 string
containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
character with "&eacute;" will make it more readable to some
people.
"""
return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
cls._substitute_html_entity, s)
class EncodingDetector:
"""Suggests a number of possible encodings for a bytestring.
Order of precedence:
1. Encodings you specifically tell EncodingDetector to try first
(the override_encodings argument to the constructor).
2. An encoding declared within the bytestring itself, either in an
XML declaration (if the bytestring is to be interpreted as an XML
document), or in a <meta> tag (if the bytestring is to be
interpreted as an HTML document.)
3. An encoding detected through textual analysis by chardet,
cchardet, or a similar external library.
4. UTF-8.
5. Windows-1252.
"""
def __init__(self, markup, override_encodings=None, is_html=False,
exclude_encodings=None):
self.override_encodings = override_encodings or []
exclude_encodings = exclude_encodings or []
self.exclude_encodings = set([x.lower() for x in exclude_encodings])
self.chardet_encoding = None
self.is_html = is_html
self.declared_encoding = None
# First order of business: strip a byte-order mark.
self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup)
def _usable(self, encoding, tried):
if encoding is not None:
encoding = encoding.lower()
if encoding in self.exclude_encodings:
return False
if encoding not in tried:
tried.add(encoding)
return True
return False
@property
def encodings(self):
"""Yield a number of encodings that might work for this markup."""
tried = set()
for e in self.override_encodings:
if self._usable(e, tried):
yield e
# Did the document originally start with a byte-order mark
# that indicated its encoding?
if self._usable(self.sniffed_encoding, tried):
yield self.sniffed_encoding
# Look within the document for an XML or HTML encoding
# declaration.
if self.declared_encoding is None:
self.declared_encoding = self.find_declared_encoding(
self.markup, self.is_html)
if self._usable(self.declared_encoding, tried):
yield self.declared_encoding
# Use third-party character set detection to guess at the
# encoding.
if self.chardet_encoding is None:
self.chardet_encoding = chardet_dammit(self.markup)
if self._usable(self.chardet_encoding, tried):
yield self.chardet_encoding
# As a last-ditch effort, try utf-8 and windows-1252.
for e in ('utf-8', 'windows-1252'):
if self._usable(e, tried):
yield e
@classmethod
def strip_byte_order_mark(cls, data):
"""If a byte-order mark is present, strip it and return the encoding it implies."""
encoding = None
if isinstance(data, unicode):
# Unicode data cannot have a byte-order mark.
return data, encoding
if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
and (data[2:4] != '\x00\x00'):
encoding = 'utf-16be'
data = data[2:]
elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \
and (data[2:4] != '\x00\x00'):
encoding = 'utf-16le'
data = data[2:]
elif data[:3] == b'\xef\xbb\xbf':
encoding = 'utf-8'
data = data[3:]
elif data[:4] == b'\x00\x00\xfe\xff':
encoding = 'utf-32be'
data = data[4:]
elif data[:4] == b'\xff\xfe\x00\x00':
encoding = 'utf-32le'
data = data[4:]
return data, encoding
@classmethod
def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False):
"""Given a document, tries to find its declared encoding.
An XML encoding is declared at the beginning of the document.
An HTML encoding is declared in a <meta> tag, hopefully near the
beginning of the document.
"""
if search_entire_document:
xml_endpos = html_endpos = len(markup)
else:
xml_endpos = 1024
html_endpos = max(2048, int(len(markup) * 0.05))
declared_encoding = None
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
if not declared_encoding_match and is_html:
declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
if declared_encoding_match is not None:
declared_encoding = declared_encoding_match.groups()[0].decode(
'ascii', 'replace')
if declared_encoding:
return declared_encoding.lower()
return None
class UnicodeDammit:
"""A class for detecting the encoding of a *ML document and
converting it to a Unicode string. If the source encoding is
windows-1252, can replace MS smart quotes with their HTML or XML
equivalents."""
# This dictionary maps commonly seen values for "charset" in HTML
# meta tags to the corresponding Python codec names. It only covers
# values that aren't in Python's aliases and can't be determined
# by the heuristics in find_codec.
CHARSET_ALIASES = {"macintosh": "mac-roman",
"x-sjis": "shift-jis"}
ENCODINGS_WITH_SMART_QUOTES = [
"windows-1252",
"iso-8859-1",
"iso-8859-2",
]
def __init__(self, markup, override_encodings=[],
smart_quotes_to=None, is_html=False, exclude_encodings=[]):
self.smart_quotes_to = smart_quotes_to
self.tried_encodings = []
self.contains_replacement_characters = False
self.is_html = is_html
self.log = logging.getLogger(__name__)
self.detector = EncodingDetector(
markup, override_encodings, is_html, exclude_encodings)
# Short-circuit if the data is in Unicode to begin with.
if isinstance(markup, unicode) or markup == '':
self.markup = markup
self.unicode_markup = unicode(markup)
self.original_encoding = None
return
# The encoding detector may have stripped a byte-order mark.
# Use the stripped markup from this point on.
self.markup = self.detector.markup
u = None
for encoding in self.detector.encodings:
markup = self.detector.markup
u = self._convert_from(encoding)
if u is not None:
break
if not u:
# None of the encodings worked. As an absolute last resort,
# try them again with character replacement.
for encoding in self.detector.encodings:
if encoding != "ascii":
u = self._convert_from(encoding, "replace")
if u is not None:
self.log.warning(
"Some characters could not be decoded, and were "
"replaced with REPLACEMENT CHARACTER."
)
self.contains_replacement_characters = True
break
# If none of that worked, we could at this point force it to
# ASCII, but that would destroy so much data that I think
# giving up is better.
self.unicode_markup = u
if not u:
self.original_encoding = None
def _sub_ms_char(self, match):
"""Changes a MS smart quote character to an XML or HTML
entity, or an ASCII character."""
orig = match.group(1)
if self.smart_quotes_to == 'ascii':
sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
else:
sub = self.MS_CHARS.get(orig)
if type(sub) == tuple:
if self.smart_quotes_to == 'xml':
sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
else:
sub = '&'.encode() + sub[0].encode() + ';'.encode()
else:
sub = sub.encode()
return sub
def _convert_from(self, proposed, errors="strict"):
proposed = self.find_codec(proposed)
if not proposed or (proposed, errors) in self.tried_encodings:
return None
self.tried_encodings.append((proposed, errors))
markup = self.markup
# Convert smart quotes to HTML if coming from an encoding
# that might have them.
if (self.smart_quotes_to is not None
and proposed in self.ENCODINGS_WITH_SMART_QUOTES):
smart_quotes_re = b"([\x80-\x9f])"
smart_quotes_compiled = re.compile(smart_quotes_re)
markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
try:
#print "Trying to convert document to %s (errors=%s)" % (
# proposed, errors)
u = self._to_unicode(markup, proposed, errors)
self.markup = u
self.original_encoding = proposed
except Exception as e:
#print "That didn't work!"
#print e
return None
#print "Correct encoding: %s" % proposed
return self.markup
def _to_unicode(self, data, encoding, errors="strict"):
'''Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases'''
return unicode(data, encoding, errors)
@property
def declared_html_encoding(self):
if not self.is_html:
return None
return self.detector.declared_encoding
def find_codec(self, charset):
value = (self._codec(self.CHARSET_ALIASES.get(charset, charset))
or (charset and self._codec(charset.replace("-", "")))
or (charset and self._codec(charset.replace("-", "_")))
or (charset and charset.lower())
or charset
)
if value:
return value.lower()
return None
def _codec(self, charset):
if not charset:
return charset
codec = None
try:
codecs.lookup(charset)
codec = charset
except (LookupError, ValueError):
pass
return codec
# A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
MS_CHARS = {b'\x80': ('euro', '20AC'),
b'\x81': ' ',
b'\x82': ('sbquo', '201A'),
b'\x83': ('fnof', '192'),
b'\x84': ('bdquo', '201E'),
b'\x85': ('hellip', '2026'),
b'\x86': ('dagger', '2020'),
b'\x87': ('Dagger', '2021'),
b'\x88': ('circ', '2C6'),
b'\x89': ('permil', '2030'),
b'\x8A': ('Scaron', '160'),
b'\x8B': ('lsaquo', '2039'),
b'\x8C': ('OElig', '152'),
b'\x8D': '?',
b'\x8E': ('#x17D', '17D'),
b'\x8F': '?',
b'\x90': '?',
b'\x91': ('lsquo', '2018'),
b'\x92': ('rsquo', '2019'),
b'\x93': ('ldquo', '201C'),
b'\x94': ('rdquo', '201D'),
b'\x95': ('bull', '2022'),
b'\x96': ('ndash', '2013'),
b'\x97': ('mdash', '2014'),
b'\x98': ('tilde', '2DC'),
b'\x99': ('trade', '2122'),
b'\x9a': ('scaron', '161'),
b'\x9b': ('rsaquo', '203A'),
b'\x9c': ('oelig', '153'),
b'\x9d': '?',
b'\x9e': ('#x17E', '17E'),
b'\x9f': ('Yuml', ''),}
# A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
# horrors like stripping diacritical marks to turn á into a, but also
# contains non-horrors like turning “ into ".
MS_CHARS_TO_ASCII = {
b'\x80' : 'EUR',
b'\x81' : ' ',
b'\x82' : ',',
b'\x83' : 'f',
b'\x84' : ',,',
b'\x85' : '...',
b'\x86' : '+',
b'\x87' : '++',
b'\x88' : '^',
b'\x89' : '%',
b'\x8a' : 'S',
b'\x8b' : '<',
b'\x8c' : 'OE',
b'\x8d' : '?',
b'\x8e' : 'Z',
b'\x8f' : '?',
b'\x90' : '?',
b'\x91' : "'",
b'\x92' : "'",
b'\x93' : '"',
b'\x94' : '"',
b'\x95' : '*',
b'\x96' : '-',
b'\x97' : '--',
b'\x98' : '~',
b'\x99' : '(TM)',
b'\x9a' : 's',
b'\x9b' : '>',
b'\x9c' : 'oe',
b'\x9d' : '?',
b'\x9e' : 'z',
b'\x9f' : 'Y',
b'\xa0' : ' ',
b'\xa1' : '!',
b'\xa2' : 'c',
b'\xa3' : 'GBP',
b'\xa4' : '$', #This approximation is especially parochial--this is the
#generic currency symbol.
b'\xa5' : 'YEN',
b'\xa6' : '|',
b'\xa7' : 'S',
b'\xa8' : '..',
b'\xa9' : '',
b'\xaa' : '(th)',
b'\xab' : '<<',
b'\xac' : '!',
b'\xad' : ' ',
b'\xae' : '(R)',
b'\xaf' : '-',
b'\xb0' : 'o',
b'\xb1' : '+-',
b'\xb2' : '2',
b'\xb3' : '3',
b'\xb4' : ("'", 'acute'),
b'\xb5' : 'u',
b'\xb6' : 'P',
b'\xb7' : '*',
b'\xb8' : ',',
b'\xb9' : '1',
b'\xba' : '(th)',
b'\xbb' : '>>',
b'\xbc' : '1/4',
b'\xbd' : '1/2',
b'\xbe' : '3/4',
b'\xbf' : '?',
b'\xc0' : 'A',
b'\xc1' : 'A',
b'\xc2' : 'A',
b'\xc3' : 'A',
b'\xc4' : 'A',
b'\xc5' : 'A',
b'\xc6' : 'AE',
b'\xc7' : 'C',
b'\xc8' : 'E',
b'\xc9' : 'E',
b'\xca' : 'E',
b'\xcb' : 'E',
b'\xcc' : 'I',
b'\xcd' : 'I',
b'\xce' : 'I',
b'\xcf' : 'I',
b'\xd0' : 'D',
b'\xd1' : 'N',
b'\xd2' : 'O',
b'\xd3' : 'O',
b'\xd4' : 'O',
b'\xd5' : 'O',
b'\xd6' : 'O',
b'\xd7' : '*',
b'\xd8' : 'O',
b'\xd9' : 'U',
b'\xda' : 'U',
b'\xdb' : 'U',
b'\xdc' : 'U',
b'\xdd' : 'Y',
b'\xde' : 'b',
b'\xdf' : 'B',
b'\xe0' : 'a',
b'\xe1' : 'a',
b'\xe2' : 'a',
b'\xe3' : 'a',
b'\xe4' : 'a',
b'\xe5' : 'a',
b'\xe6' : 'ae',
b'\xe7' : 'c',
b'\xe8' : 'e',
b'\xe9' : 'e',
b'\xea' : 'e',
b'\xeb' : 'e',
b'\xec' : 'i',
b'\xed' : 'i',
b'\xee' : 'i',
b'\xef' : 'i',
b'\xf0' : 'o',
b'\xf1' : 'n',
b'\xf2' : 'o',
b'\xf3' : 'o',
b'\xf4' : 'o',
b'\xf5' : 'o',
b'\xf6' : 'o',
b'\xf7' : '/',
b'\xf8' : 'o',
b'\xf9' : 'u',
b'\xfa' : 'u',
b'\xfb' : 'u',
b'\xfc' : 'u',
b'\xfd' : 'y',
b'\xfe' : 'b',
b'\xff' : 'y',
}
# A map used when removing rogue Windows-1252/ISO-8859-1
# characters in otherwise UTF-8 documents.
#
# Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in
# Windows-1252.
WINDOWS_1252_TO_UTF8 = {
0x80 : b'\xe2\x82\xac', # €
0x82 : b'\xe2\x80\x9a', #
0x83 : b'\xc6\x92', # ƒ
0x84 : b'\xe2\x80\x9e', # „
0x85 : b'\xe2\x80\xa6', # …
0x86 : b'\xe2\x80\xa0', # †
0x87 : b'\xe2\x80\xa1', # ‡
0x88 : b'\xcb\x86', # ˆ
0x89 : b'\xe2\x80\xb0', # ‰
0x8a : b'\xc5\xa0', # Š
0x8b : b'\xe2\x80\xb9', #
0x8c : b'\xc5\x92', # Œ
0x8e : b'\xc5\xbd', # Ž
0x91 : b'\xe2\x80\x98', #
0x92 : b'\xe2\x80\x99', #
0x93 : b'\xe2\x80\x9c', # “
0x94 : b'\xe2\x80\x9d', # ”
0x95 : b'\xe2\x80\xa2', # •
0x96 : b'\xe2\x80\x93', #
0x97 : b'\xe2\x80\x94', # —
0x98 : b'\xcb\x9c', # ˜
0x99 : b'\xe2\x84\xa2', # ™
0x9a : b'\xc5\xa1', # š
0x9b : b'\xe2\x80\xba', #
0x9c : b'\xc5\x93', # œ
0x9e : b'\xc5\xbe', # ž
0x9f : b'\xc5\xb8', # Ÿ
0xa0 : b'\xc2\xa0', #  
0xa1 : b'\xc2\xa1', # ¡
0xa2 : b'\xc2\xa2', # ¢
0xa3 : b'\xc2\xa3', # £
0xa4 : b'\xc2\xa4', # ¤
0xa5 : b'\xc2\xa5', # ¥
0xa6 : b'\xc2\xa6', # ¦
0xa7 : b'\xc2\xa7', # §
0xa8 : b'\xc2\xa8', # ¨
0xa9 : b'\xc2\xa9', # ©
0xaa : b'\xc2\xaa', # ª
0xab : b'\xc2\xab', # «
0xac : b'\xc2\xac', # ¬
0xad : b'\xc2\xad', # ­
0xae : b'\xc2\xae', # ®
0xaf : b'\xc2\xaf', # ¯
0xb0 : b'\xc2\xb0', # °
0xb1 : b'\xc2\xb1', # ±
0xb2 : b'\xc2\xb2', # ²
0xb3 : b'\xc2\xb3', # ³
0xb4 : b'\xc2\xb4', # ´
0xb5 : b'\xc2\xb5', # µ
0xb6 : b'\xc2\xb6', # ¶
0xb7 : b'\xc2\xb7', # ·
0xb8 : b'\xc2\xb8', # ¸
0xb9 : b'\xc2\xb9', # ¹
0xba : b'\xc2\xba', # º
0xbb : b'\xc2\xbb', # »
0xbc : b'\xc2\xbc', # ¼
0xbd : b'\xc2\xbd', # ½
0xbe : b'\xc2\xbe', # ¾
0xbf : b'\xc2\xbf', # ¿
0xc0 : b'\xc3\x80', # À
0xc1 : b'\xc3\x81', # Á
0xc2 : b'\xc3\x82', # Â
0xc3 : b'\xc3\x83', # Ã
0xc4 : b'\xc3\x84', # Ä
0xc5 : b'\xc3\x85', # Å
0xc6 : b'\xc3\x86', # Æ
0xc7 : b'\xc3\x87', # Ç
0xc8 : b'\xc3\x88', # È
0xc9 : b'\xc3\x89', # É
0xca : b'\xc3\x8a', # Ê
0xcb : b'\xc3\x8b', # Ë
0xcc : b'\xc3\x8c', # Ì
0xcd : b'\xc3\x8d', # Í
0xce : b'\xc3\x8e', # Î
0xcf : b'\xc3\x8f', # Ï
0xd0 : b'\xc3\x90', # Ð
0xd1 : b'\xc3\x91', # Ñ
0xd2 : b'\xc3\x92', # Ò
0xd3 : b'\xc3\x93', # Ó
0xd4 : b'\xc3\x94', # Ô
0xd5 : b'\xc3\x95', # Õ
0xd6 : b'\xc3\x96', # Ö
0xd7 : b'\xc3\x97', # ×
0xd8 : b'\xc3\x98', # Ø
0xd9 : b'\xc3\x99', # Ù
0xda : b'\xc3\x9a', # Ú
0xdb : b'\xc3\x9b', # Û
0xdc : b'\xc3\x9c', # Ü
0xdd : b'\xc3\x9d', # Ý
0xde : b'\xc3\x9e', # Þ
0xdf : b'\xc3\x9f', # ß
0xe0 : b'\xc3\xa0', # à
0xe1 : b'\xa1', # á
0xe2 : b'\xc3\xa2', # â
0xe3 : b'\xc3\xa3', # ã
0xe4 : b'\xc3\xa4', # ä
0xe5 : b'\xc3\xa5', # å
0xe6 : b'\xc3\xa6', # æ
0xe7 : b'\xc3\xa7', # ç
0xe8 : b'\xc3\xa8', # è
0xe9 : b'\xc3\xa9', # é
0xea : b'\xc3\xaa', # ê
0xeb : b'\xc3\xab', # ë
0xec : b'\xc3\xac', # ì
0xed : b'\xc3\xad', # í
0xee : b'\xc3\xae', # î
0xef : b'\xc3\xaf', # ï
0xf0 : b'\xc3\xb0', # ð
0xf1 : b'\xc3\xb1', # ñ
0xf2 : b'\xc3\xb2', # ò
0xf3 : b'\xc3\xb3', # ó
0xf4 : b'\xc3\xb4', # ô
0xf5 : b'\xc3\xb5', # õ
0xf6 : b'\xc3\xb6', # ö
0xf7 : b'\xc3\xb7', # ÷
0xf8 : b'\xc3\xb8', # ø
0xf9 : b'\xc3\xb9', # ù
0xfa : b'\xc3\xba', # ú
0xfb : b'\xc3\xbb', # û
0xfc : b'\xc3\xbc', # ü
0xfd : b'\xc3\xbd', # ý
0xfe : b'\xc3\xbe', # þ
}
MULTIBYTE_MARKERS_AND_SIZES = [
(0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF
(0xe0, 0xef, 3), # 3-byte characters start with E0-EF
(0xf0, 0xf4, 4), # 4-byte characters start with F0-F4
]
FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0]
LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1]
@classmethod
def detwingle(cls, in_bytes, main_encoding="utf8",
embedded_encoding="windows-1252"):
"""Fix characters from one encoding embedded in some other encoding.
Currently the only situation supported is Windows-1252 (or its
subset ISO-8859-1), embedded in UTF-8.
The input must be a bytestring. If you've already converted
the document to Unicode, you're too late.
The output is a bytestring in which `embedded_encoding`
characters have been converted to their `main_encoding`
equivalents.
"""
if embedded_encoding.replace('_', '-').lower() not in (
'windows-1252', 'windows_1252'):
raise NotImplementedError(
"Windows-1252 and ISO-8859-1 are the only currently supported "
"embedded encodings.")
if main_encoding.lower() not in ('utf8', 'utf-8'):
raise NotImplementedError(
"UTF-8 is the only currently supported main encoding.")
byte_chunks = []
chunk_start = 0
pos = 0
while pos < len(in_bytes):
byte = in_bytes[pos]
if not isinstance(byte, int):
# Python 2.x
byte = ord(byte)
if (byte >= cls.FIRST_MULTIBYTE_MARKER
and byte <= cls.LAST_MULTIBYTE_MARKER):
# This is the start of a UTF-8 multibyte character. Skip
# to the end.
for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES:
if byte >= start and byte <= end:
pos += size
break
elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8:
# We found a Windows-1252 character!
# Save the string up to this point as a chunk.
byte_chunks.append(in_bytes[chunk_start:pos])
# Now translate the Windows-1252 character into UTF-8
# and add it as another, one-byte chunk.
byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte])
pos += 1
chunk_start = pos
else:
# Go on to the next character.
pos += 1
if chunk_start == 0:
# The string is unchanged.
return in_bytes
else:
# Store the final chunk.
byte_chunks.append(in_bytes[chunk_start:])
return b''.join(byte_chunks)
-219
View File
@@ -1,219 +0,0 @@
"""Diagnostic functions, mainly for use when doing tech support."""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__license__ = "MIT"
import cProfile
from StringIO import StringIO
from HTMLParser import HTMLParser
import bs4
from bs4 import BeautifulSoup, __version__
from bs4.builder import builder_registry
import os
import pstats
import random
import tempfile
import time
import traceback
import sys
import cProfile
def diagnose(data):
"""Diagnostic suite for isolating common problems."""
print "Diagnostic running on Beautiful Soup %s" % __version__
print "Python version %s" % sys.version
basic_parsers = ["html.parser", "html5lib", "lxml"]
for name in basic_parsers:
for builder in builder_registry.builders:
if name in builder.features:
break
else:
basic_parsers.remove(name)
print (
"I noticed that %s is not installed. Installing it may help." %
name)
if 'lxml' in basic_parsers:
basic_parsers.append(["lxml", "xml"])
try:
from lxml import etree
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
except ImportError, e:
print (
"lxml is not installed or couldn't be imported.")
if 'html5lib' in basic_parsers:
try:
import html5lib
print "Found html5lib version %s" % html5lib.__version__
except ImportError, e:
print (
"html5lib is not installed or couldn't be imported.")
if hasattr(data, 'read'):
data = data.read()
elif os.path.exists(data):
print '"%s" looks like a filename. Reading data from the file.' % data
with open(data) as fp:
data = fp.read()
elif data.startswith("http:") or data.startswith("https:"):
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
return
print
for parser in basic_parsers:
print "Trying to parse your markup with %s" % parser
success = False
try:
soup = BeautifulSoup(data, parser)
success = True
except Exception, e:
print "%s could not parse the markup." % parser
traceback.print_exc()
if success:
print "Here's what %s did with the markup:" % parser
print soup.prettify()
print "-" * 80
def lxml_trace(data, html=True, **kwargs):
"""Print out the lxml events that occur during parsing.
This lets you see how lxml parses a document when no Beautiful
Soup code is running.
"""
from lxml import etree
for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
print("%s, %4s, %s" % (event, element.tag, element.text))
class AnnouncingParser(HTMLParser):
"""Announces HTMLParser parse events, without doing anything else."""
def _p(self, s):
print(s)
def handle_starttag(self, name, attrs):
self._p("%s START" % name)
def handle_endtag(self, name):
self._p("%s END" % name)
def handle_data(self, data):
self._p("%s DATA" % data)
def handle_charref(self, name):
self._p("%s CHARREF" % name)
def handle_entityref(self, name):
self._p("%s ENTITYREF" % name)
def handle_comment(self, data):
self._p("%s COMMENT" % data)
def handle_decl(self, data):
self._p("%s DECL" % data)
def unknown_decl(self, data):
self._p("%s UNKNOWN-DECL" % data)
def handle_pi(self, data):
self._p("%s PI" % data)
def htmlparser_trace(data):
"""Print out the HTMLParser events that occur during parsing.
This lets you see how HTMLParser parses a document when no
Beautiful Soup code is running.
"""
parser = AnnouncingParser()
parser.feed(data)
_vowels = "aeiou"
_consonants = "bcdfghjklmnpqrstvwxyz"
def rword(length=5):
"Generate a random word-like string."
s = ''
for i in range(length):
if i % 2 == 0:
t = _consonants
else:
t = _vowels
s += random.choice(t)
return s
def rsentence(length=4):
"Generate a random sentence-like string."
return " ".join(rword(random.randint(4,9)) for i in range(length))
def rdoc(num_elements=1000):
"""Randomly generate an invalid HTML document."""
tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
elements = []
for i in range(num_elements):
choice = random.randint(0,3)
if choice == 0:
# New tag.
tag_name = random.choice(tag_names)
elements.append("<%s>" % tag_name)
elif choice == 1:
elements.append(rsentence(random.randint(1,4)))
elif choice == 2:
# Close a tag.
tag_name = random.choice(tag_names)
elements.append("</%s>" % tag_name)
return "<html>" + "\n".join(elements) + "</html>"
def benchmark_parsers(num_elements=100000):
"""Very basic head-to-head performance benchmark."""
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
data = rdoc(num_elements)
print "Generated a large invalid HTML document (%d bytes)." % len(data)
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
success = False
try:
a = time.time()
soup = BeautifulSoup(data, parser)
b = time.time()
success = True
except Exception, e:
print "%s could not parse the markup." % parser
traceback.print_exc()
if success:
print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
from lxml import etree
a = time.time()
etree.HTML(data)
b = time.time()
print "Raw lxml parsed the markup in %.2fs." % (b-a)
import html5lib
parser = html5lib.HTMLParser()
a = time.time()
parser.parse(data)
b = time.time()
print "Raw html5lib parsed the markup in %.2fs." % (b-a)
def profile(num_elements=100000, parser="lxml"):
filehandle = tempfile.NamedTemporaryFile()
filename = filehandle.name
data = rdoc(num_elements)
vars = dict(bs4=bs4, data=data, parser=parser)
cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
stats = pstats.Stats(filename)
# stats.strip_dirs()
stats.sort_stats("cumulative")
stats.print_stats('_html5lib|bs4', 50)
if __name__ == '__main__':
diagnose(sys.stdin.read())
File diff suppressed because it is too large Load Diff
-770
View File
@@ -1,770 +0,0 @@
"""Helper classes for tests."""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__license__ = "MIT"
import pickle
import copy
import functools
import unittest
from unittest import TestCase
from bs4 import BeautifulSoup
from bs4.element import (
CharsetMetaAttributeValue,
Comment,
ContentMetaAttributeValue,
Doctype,
SoupStrainer,
)
from bs4.builder import HTMLParserTreeBuilder
default_builder = HTMLParserTreeBuilder
class SoupTest(unittest.TestCase):
@property
def default_builder(self):
return default_builder()
def soup(self, markup, **kwargs):
"""Build a Beautiful Soup object from markup."""
builder = kwargs.pop('builder', self.default_builder)
return BeautifulSoup(markup, builder=builder, **kwargs)
def document_for(self, markup):
"""Turn an HTML fragment into a document.
The details depend on the builder.
"""
return self.default_builder.test_fragment_to_document(markup)
def assertSoupEquals(self, to_parse, compare_parsed_to=None):
builder = self.default_builder
obj = BeautifulSoup(to_parse, builder=builder)
if compare_parsed_to is None:
compare_parsed_to = to_parse
self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
def assertConnectedness(self, element):
"""Ensure that next_element and previous_element are properly
set for all descendants of the given element.
"""
earlier = None
for e in element.descendants:
if earlier:
self.assertEqual(e, earlier.next_element)
self.assertEqual(earlier, e.previous_element)
earlier = e
class HTMLTreeBuilderSmokeTest(object):
"""A basic test of a treebuilder's competence.
Any HTML treebuilder, present or future, should be able to pass
these tests. With invalid markup, there's room for interpretation,
and different parsers can handle it differently. But with the
markup in these tests, there's not much room for interpretation.
"""
def test_empty_element_tags(self):
"""Verify that all HTML4 and HTML5 empty element (aka void element) tags
are handled correctly.
"""
for name in [
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
'spacer', 'frame'
]:
soup = self.soup("")
new_tag = soup.new_tag(name)
self.assertEqual(True, new_tag.is_empty_element)
def test_pickle_and_unpickle_identity(self):
# Pickling a tree, then unpickling it, yields a tree identical
# to the original.
tree = self.soup("<a><b>foo</a>")
dumped = pickle.dumps(tree, 2)
loaded = pickle.loads(dumped)
self.assertEqual(loaded.__class__, BeautifulSoup)
self.assertEqual(loaded.decode(), tree.decode())
def assertDoctypeHandled(self, doctype_fragment):
"""Assert that a given doctype string is handled correctly."""
doctype_str, soup = self._document_with_doctype(doctype_fragment)
# Make sure a Doctype object was created.
doctype = soup.contents[0]
self.assertEqual(doctype.__class__, Doctype)
self.assertEqual(doctype, doctype_fragment)
self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
# Make sure that the doctype was correctly associated with the
# parse tree and that the rest of the document parsed.
self.assertEqual(soup.p.contents[0], 'foo')
def _document_with_doctype(self, doctype_fragment):
"""Generate and parse a document with the given doctype."""
doctype = '<!DOCTYPE %s>' % doctype_fragment
markup = doctype + '\n<p>foo</p>'
soup = self.soup(markup)
return doctype, soup
def test_normal_doctypes(self):
"""Make sure normal, everyday HTML doctypes are handled correctly."""
self.assertDoctypeHandled("html")
self.assertDoctypeHandled(
'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
def test_empty_doctype(self):
soup = self.soup("<!DOCTYPE>")
doctype = soup.contents[0]
self.assertEqual("", doctype.strip())
def test_public_doctype_with_url(self):
doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
self.assertDoctypeHandled(doctype)
def test_system_doctype(self):
self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
def test_namespaced_system_doctype(self):
# We can handle a namespaced doctype with a system ID.
self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
def test_namespaced_public_doctype(self):
# Test a namespaced doctype with a public id.
self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
def test_real_xhtml_document(self):
"""A real XHTML document should come out more or less the same as it went in."""
markup = b"""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Hello.</title></head>
<body>Goodbye.</body>
</html>"""
soup = self.soup(markup)
self.assertEqual(
soup.encode("utf-8").replace(b"\n", b""),
markup.replace(b"\n", b""))
def test_processing_instruction(self):
# We test both Unicode and bytestring to verify that
# process_markup correctly sets processing_instruction_class
# even when the markup is already Unicode and there is no
# need to process anything.
markup = u"""<?PITarget PIContent?>"""
soup = self.soup(markup)
self.assertEqual(markup, soup.decode())
markup = b"""<?PITarget PIContent?>"""
soup = self.soup(markup)
self.assertEqual(markup, soup.encode("utf8"))
def test_deepcopy(self):
"""Make sure you can copy the tree builder.
This is important because the builder is part of a
BeautifulSoup object, and we want to be able to copy that.
"""
copy.deepcopy(self.default_builder)
def test_p_tag_is_never_empty_element(self):
"""A <p> tag is never designated as an empty-element tag.
Even if the markup shows it as an empty-element tag, it
shouldn't be presented that way.
"""
soup = self.soup("<p/>")
self.assertFalse(soup.p.is_empty_element)
self.assertEqual(str(soup.p), "<p></p>")
def test_unclosed_tags_get_closed(self):
"""A tag that's not closed by the end of the document should be closed.
This applies to all tags except empty-element tags.
"""
self.assertSoupEquals("<p>", "<p></p>")
self.assertSoupEquals("<b>", "<b></b>")
self.assertSoupEquals("<br>", "<br/>")
def test_br_is_always_empty_element_tag(self):
"""A <br> tag is designated as an empty-element tag.
Some parsers treat <br></br> as one <br/> tag, some parsers as
two tags, but it should always be an empty-element tag.
"""
soup = self.soup("<br></br>")
self.assertTrue(soup.br.is_empty_element)
self.assertEqual(str(soup.br), "<br/>")
def test_nested_formatting_elements(self):
self.assertSoupEquals("<em><em></em></em>")
def test_double_head(self):
html = '''<!DOCTYPE html>
<html>
<head>
<title>Ordinary HEAD element test</title>
</head>
<script type="text/javascript">
alert("Help!");
</script>
<body>
Hello, world!
</body>
</html>
'''
soup = self.soup(html)
self.assertEqual("text/javascript", soup.find('script')['type'])
def test_comment(self):
# Comments are represented as Comment objects.
markup = "<p>foo<!--foobar-->baz</p>"
self.assertSoupEquals(markup)
soup = self.soup(markup)
comment = soup.find(text="foobar")
self.assertEqual(comment.__class__, Comment)
# The comment is properly integrated into the tree.
foo = soup.find(text="foo")
self.assertEqual(comment, foo.next_element)
baz = soup.find(text="baz")
self.assertEqual(comment, baz.previous_element)
def test_preserved_whitespace_in_pre_and_textarea(self):
"""Whitespace must be preserved in <pre> and <textarea> tags,
even if that would mean not prettifying the markup.
"""
pre_markup = "<pre> </pre>"
textarea_markup = "<textarea> woo\nwoo </textarea>"
self.assertSoupEquals(pre_markup)
self.assertSoupEquals(textarea_markup)
soup = self.soup(pre_markup)
self.assertEqual(soup.pre.prettify(), pre_markup)
soup = self.soup(textarea_markup)
self.assertEqual(soup.textarea.prettify(), textarea_markup)
soup = self.soup("<textarea></textarea>")
self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
def test_nested_inline_elements(self):
"""Inline elements can be nested indefinitely."""
b_tag = "<b>Inside a B tag</b>"
self.assertSoupEquals(b_tag)
nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
self.assertSoupEquals(nested_b_tag)
double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
self.assertSoupEquals(nested_b_tag)
def test_nested_block_level_elements(self):
"""Block elements can be nested."""
soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
blockquote = soup.blockquote
self.assertEqual(blockquote.p.b.string, 'Foo')
self.assertEqual(blockquote.b.string, 'Foo')
def test_correctly_nested_tables(self):
"""One table can go inside another one."""
markup = ('<table id="1">'
'<tr>'
"<td>Here's another table:"
'<table id="2">'
'<tr><td>foo</td></tr>'
'</table></td>')
self.assertSoupEquals(
markup,
'<table id="1"><tr><td>Here\'s another table:'
'<table id="2"><tr><td>foo</td></tr></table>'
'</td></tr></table>')
self.assertSoupEquals(
"<table><thead><tr><td>Foo</td></tr></thead>"
"<tbody><tr><td>Bar</td></tr></tbody>"
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
def test_deeply_nested_multivalued_attribute(self):
# html5lib can set the attributes of the same tag many times
# as it rearranges the tree. This has caused problems with
# multivalued attributes.
markup = '<table><div><div class="css"></div></div></table>'
soup = self.soup(markup)
self.assertEqual(["css"], soup.div.div['class'])
def test_multivalued_attribute_on_html(self):
# html5lib uses a different API to set the attributes ot the
# <html> tag. This has caused problems with multivalued
# attributes.
markup = '<html class="a b"></html>'
soup = self.soup(markup)
self.assertEqual(["a", "b"], soup.html['class'])
def test_angle_brackets_in_attribute_values_are_escaped(self):
self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
def test_entities_in_attributes_converted_to_unicode(self):
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
def test_entities_in_text_converted_to_unicode(self):
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
self.assertSoupEquals("<p>pi&ntilde;ata</p>", expect)
def test_quot_entity_converted_to_quotation_mark(self):
self.assertSoupEquals("<p>I said &quot;good day!&quot;</p>",
'<p>I said "good day!"</p>')
def test_out_of_range_entity(self):
expect = u"\N{REPLACEMENT CHARACTER}"
self.assertSoupEquals("&#10000000000000;", expect)
self.assertSoupEquals("&#x10000000000000;", expect)
self.assertSoupEquals("&#1000000000;", expect)
def test_multipart_strings(self):
"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
self.assertEqual("p", soup.h2.string.next_element.name)
self.assertEqual("p", soup.p.name)
self.assertConnectedness(soup)
def test_empty_element_tags(self):
"""Verify consistent handling of empty-element tags,
no matter how they come in through the markup.
"""
self.assertSoupEquals('<br/><br/><br/>', "<br/><br/><br/>")
self.assertSoupEquals('<br /><br /><br />', "<br/><br/><br/>")
def test_head_tag_between_head_and_body(self):
"Prevent recurrence of a bug in the html5lib treebuilder."
content = """<html><head></head>
<link></link>
<body>foo</body>
</html>
"""
soup = self.soup(content)
self.assertNotEqual(None, soup.html.body)
self.assertConnectedness(soup)
def test_multiple_copies_of_a_tag(self):
"Prevent recurrence of a bug in the html5lib treebuilder."
content = """<!DOCTYPE html>
<html>
<body>
<article id="a" >
<div><a href="1"></div>
<footer>
<a href="2"></a>
</footer>
</article>
</body>
</html>
"""
soup = self.soup(content)
self.assertConnectedness(soup.article)
def test_basic_namespaces(self):
"""Parsers don't need to *understand* namespaces, but at the
very least they should not choke on namespaces or lose
data."""
markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
soup = self.soup(markup)
self.assertEqual(markup, soup.encode())
html = soup.html
self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
self.assertEqual(
'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
self.assertEqual(
'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
def test_multivalued_attribute_value_becomes_list(self):
markup = b'<a class="foo bar">'
soup = self.soup(markup)
self.assertEqual(['foo', 'bar'], soup.a['class'])
#
# Generally speaking, tests below this point are more tests of
# Beautiful Soup than tests of the tree builders. But parsers are
# weird, so we run these tests separately for every tree builder
# to detect any differences between them.
#
def test_can_parse_unicode_document(self):
# A seemingly innocuous document... but it's in Unicode! And
# it contains characters that can't be represented in the
# encoding found in the declaration! The horror!
markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
soup = self.soup(markup)
self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
def test_soupstrainer(self):
"""Parsers should be able to work with SoupStrainers."""
strainer = SoupStrainer("b")
soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
parse_only=strainer)
self.assertEqual(soup.decode(), "<b>bold</b>")
def test_single_quote_attribute_values_become_double_quotes(self):
self.assertSoupEquals("<foo attr='bar'></foo>",
'<foo attr="bar"></foo>')
def test_attribute_values_with_nested_quotes_are_left_alone(self):
text = """<foo attr='bar "brawls" happen'>a</foo>"""
self.assertSoupEquals(text)
def test_attribute_values_with_double_nested_quotes_get_quoted(self):
text = """<foo attr='bar "brawls" happen'>a</foo>"""
soup = self.soup(text)
soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
self.assertSoupEquals(
soup.foo.decode(),
"""<foo attr="Brawls happen at &quot;Bob\'s Bar&quot;">a</foo>""")
def test_ampersand_in_attribute_value_gets_escaped(self):
self.assertSoupEquals('<this is="really messed up & stuff"></this>',
'<this is="really messed up &amp; stuff"></this>')
self.assertSoupEquals(
'<a href="http://example.org?a=1&b=2;3">foo</a>',
'<a href="http://example.org?a=1&amp;b=2;3">foo</a>')
def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
self.assertSoupEquals('<a href="http://example.org?a=1&amp;b=2;3"></a>')
def test_entities_in_strings_converted_during_parsing(self):
# Both XML and HTML entities are converted to Unicode characters
# during parsing.
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
self.assertSoupEquals(text, expected)
def test_smart_quotes_converted_on_the_way_in(self):
# Microsoft smart quotes are converted to Unicode characters during
# parsing.
quote = b"<p>\x91Foo\x92</p>"
soup = self.soup(quote)
self.assertEqual(
soup.p.string,
u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
def test_non_breaking_spaces_converted_on_the_way_in(self):
soup = self.soup("<a>&nbsp;&nbsp;</a>")
self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
def test_entities_converted_on_the_way_out(self):
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
soup = self.soup(text)
self.assertEqual(soup.p.encode("utf-8"), expected)
def test_real_iso_latin_document(self):
# Smoke test of interrelated functionality, using an
# easy-to-understand document.
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
# That's because we're going to encode it into ISO-Latin-1, and use
# that to test.
iso_latin_html = unicode_html.encode("iso-8859-1")
# Parse the ISO-Latin-1 HTML.
soup = self.soup(iso_latin_html)
# Encode it to UTF-8.
result = soup.encode("utf-8")
# What do we expect the result to look like? Well, it would
# look like unicode_html, except that the META tag would say
# UTF-8 instead of ISO-Latin-1.
expected = unicode_html.replace("ISO-Latin-1", "utf-8")
# And, of course, it would be in UTF-8, not Unicode.
expected = expected.encode("utf-8")
# Ta-da!
self.assertEqual(result, expected)
def test_real_shift_jis_document(self):
# Smoke test to make sure the parser can handle a document in
# Shift-JIS encoding, without choking.
shift_jis_html = (
b'<html><head></head><body><pre>'
b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
b'</pre></body></html>')
unicode_html = shift_jis_html.decode("shift-jis")
soup = self.soup(unicode_html)
# Make sure the parse tree is correctly encoded to various
# encodings.
self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
def test_real_hebrew_document(self):
# A real-world test to make sure we can convert ISO-8859-9 (a
# Hebrew encoding) to UTF-8.
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
soup = self.soup(
hebrew_document, from_encoding="iso8859-8")
# Some tree builders call it iso8859-8, others call it iso-8859-9.
# That's not a difference we really care about.
assert soup.original_encoding in ('iso8859-8', 'iso-8859-8')
self.assertEqual(
soup.encode('utf-8'),
hebrew_document.decode("iso8859-8").encode("utf-8"))
def test_meta_tag_reflects_current_encoding(self):
# Here's the <meta> tag saying that a document is
# encoded in Shift-JIS.
meta_tag = ('<meta content="text/html; charset=x-sjis" '
'http-equiv="Content-type"/>')
# Here's a document incorporating that meta tag.
shift_jis_html = (
'<html><head>\n%s\n'
'<meta http-equiv="Content-language" content="ja"/>'
'</head><body>Shift-JIS markup goes here.') % meta_tag
soup = self.soup(shift_jis_html)
# Parse the document, and the charset is seemingly unaffected.
parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
content = parsed_meta['content']
self.assertEqual('text/html; charset=x-sjis', content)
# But that value is actually a ContentMetaAttributeValue object.
self.assertTrue(isinstance(content, ContentMetaAttributeValue))
# And it will take on a value that reflects its current
# encoding.
self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
# For the rest of the story, see TestSubstitutions in
# test_tree.py.
def test_html5_style_meta_tag_reflects_current_encoding(self):
# Here's the <meta> tag saying that a document is
# encoded in Shift-JIS.
meta_tag = ('<meta id="encoding" charset="x-sjis" />')
# Here's a document incorporating that meta tag.
shift_jis_html = (
'<html><head>\n%s\n'
'<meta http-equiv="Content-language" content="ja"/>'
'</head><body>Shift-JIS markup goes here.') % meta_tag
soup = self.soup(shift_jis_html)
# Parse the document, and the charset is seemingly unaffected.
parsed_meta = soup.find('meta', id="encoding")
charset = parsed_meta['charset']
self.assertEqual('x-sjis', charset)
# But that value is actually a CharsetMetaAttributeValue object.
self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
# And it will take on a value that reflects its current
# encoding.
self.assertEqual('utf8', charset.encode("utf8"))
def test_tag_with_no_attributes_can_have_attributes_added(self):
data = self.soup("<a>text</a>")
data.a['foo'] = 'bar'
self.assertEqual('<a foo="bar">text</a>', data.a.decode())
class XMLTreeBuilderSmokeTest(object):
def test_pickle_and_unpickle_identity(self):
# Pickling a tree, then unpickling it, yields a tree identical
# to the original.
tree = self.soup("<a><b>foo</a>")
dumped = pickle.dumps(tree, 2)
loaded = pickle.loads(dumped)
self.assertEqual(loaded.__class__, BeautifulSoup)
self.assertEqual(loaded.decode(), tree.decode())
def test_docstring_generated(self):
soup = self.soup("<root/>")
self.assertEqual(
soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
def test_xml_declaration(self):
markup = b"""<?xml version="1.0" encoding="utf8"?>\n<foo/>"""
soup = self.soup(markup)
self.assertEqual(markup, soup.encode("utf8"))
def test_processing_instruction(self):
markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
soup = self.soup(markup)
self.assertEqual(markup, soup.encode("utf8"))
def test_real_xhtml_document(self):
"""A real XHTML document should come out *exactly* the same as it went in."""
markup = b"""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Hello.</title></head>
<body>Goodbye.</body>
</html>"""
soup = self.soup(markup)
self.assertEqual(
soup.encode("utf-8"), markup)
def test_formatter_processes_script_tag_for_xml_documents(self):
doc = """
<script type="text/javascript">
</script>
"""
soup = BeautifulSoup(doc, "lxml-xml")
# lxml would have stripped this while parsing, but we can add
# it later.
soup.script.string = 'console.log("< < hey > > ");'
encoded = soup.encode()
self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
def test_can_parse_unicode_document(self):
markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
soup = self.soup(markup)
self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
def test_popping_namespaced_tag(self):
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
soup = self.soup(markup)
self.assertEqual(
unicode(soup.rss), markup)
def test_docstring_includes_correct_encoding(self):
soup = self.soup("<root/>")
self.assertEqual(
soup.encode("latin1"),
b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
def test_large_xml_document(self):
"""A large XML document should come out the same as it went in."""
markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
+ b'0' * (2**12)
+ b'</root>')
soup = self.soup(markup)
self.assertEqual(soup.encode("utf-8"), markup)
def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
self.assertSoupEquals("<p>", "<p/>")
self.assertSoupEquals("<p>foo</p>")
def test_namespaces_are_preserved(self):
markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
soup = self.soup(markup)
root = soup.root
self.assertEqual("http://example.com/", root['xmlns:a'])
self.assertEqual("http://example.net/", root['xmlns:b'])
def test_closing_namespaced_tag(self):
markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.p), markup)
def test_namespaced_attributes(self):
markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.foo), markup)
def test_namespaced_attributes_xml_namespace(self):
markup = '<foo xml:lang="fr">bar</foo>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.foo), markup)
def test_find_by_prefixed_name(self):
doc = """<?xml version="1.0" encoding="utf-8"?>
<Document xmlns="http://example.com/ns0"
xmlns:ns1="http://example.com/ns1"
xmlns:ns2="http://example.com/ns2"
<ns1:tag>foo</ns1:tag>
<ns1:tag>bar</ns1:tag>
<ns2:tag key="value">baz</ns2:tag>
</Document>
"""
soup = self.soup(doc)
# There are three <tag> tags.
self.assertEqual(3, len(soup.find_all('tag')))
# But two of them are ns1:tag and one of them is ns2:tag.
self.assertEqual(2, len(soup.find_all('ns1:tag')))
self.assertEqual(1, len(soup.find_all('ns2:tag')))
self.assertEqual(1, len(soup.find_all('ns2:tag', key='value')))
self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag'])))
def test_copy_tag_preserves_namespace(self):
xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://example.com/ns0"/>"""
soup = self.soup(xml)
tag = soup.document
duplicate = copy.copy(tag)
# The two tags have the same namespace prefix.
self.assertEqual(tag.prefix, duplicate.prefix)
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
"""Smoke test for a tree builder that supports HTML5."""
def test_real_xhtml_document(self):
# Since XHTML is not HTML5, HTML5 parsers are not tested to handle
# XHTML documents in any particular way.
pass
def test_html_tags_have_namespace(self):
markup = "<a>"
soup = self.soup(markup)
self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
def test_svg_tags_have_namespace(self):
markup = '<svg><circle/></svg>'
soup = self.soup(markup)
namespace = "http://www.w3.org/2000/svg"
self.assertEqual(namespace, soup.svg.namespace)
self.assertEqual(namespace, soup.circle.namespace)
def test_mathml_tags_have_namespace(self):
markup = '<math><msqrt>5</msqrt></math>'
soup = self.soup(markup)
namespace = 'http://www.w3.org/1998/Math/MathML'
self.assertEqual(namespace, soup.math.namespace)
self.assertEqual(namespace, soup.msqrt.namespace)
def test_xml_declaration_becomes_comment(self):
markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
soup = self.soup(markup)
self.assertTrue(isinstance(soup.contents[0], Comment))
self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
self.assertEqual("html", soup.contents[0].next_element.name)
def skipIf(condition, reason):
def nothing(test, *args, **kwargs):
return None
def decorator(test_item):
if condition:
return nothing
else:
return test_item
return decorator
@@ -1 +0,0 @@
"The beautifulsoup tests."
@@ -1,147 +0,0 @@
"""Tests of the builder registry."""
import unittest
import warnings
from bs4 import BeautifulSoup
from bs4.builder import (
builder_registry as registry,
HTMLParserTreeBuilder,
TreeBuilderRegistry,
)
try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError:
HTML5LIB_PRESENT = False
try:
from bs4.builder import (
LXMLTreeBuilderForXML,
LXMLTreeBuilder,
)
LXML_PRESENT = True
except ImportError:
LXML_PRESENT = False
class BuiltInRegistryTest(unittest.TestCase):
"""Test the built-in registry with the default builders registered."""
def test_combination(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('fast', 'html'),
LXMLTreeBuilder)
if LXML_PRESENT:
self.assertEqual(registry.lookup('permissive', 'xml'),
LXMLTreeBuilderForXML)
self.assertEqual(registry.lookup('strict', 'html'),
HTMLParserTreeBuilder)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html5lib', 'html'),
HTML5TreeBuilder)
def test_lookup_by_markup_type(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
else:
self.assertEqual(registry.lookup('xml'), None)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
else:
self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
def test_named_library(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('lxml', 'xml'),
LXMLTreeBuilderForXML)
self.assertEqual(registry.lookup('lxml', 'html'),
LXMLTreeBuilder)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html5lib'),
HTML5TreeBuilder)
self.assertEqual(registry.lookup('html.parser'),
HTMLParserTreeBuilder)
def test_beautifulsoup_constructor_does_lookup(self):
with warnings.catch_warnings(record=True) as w:
# This will create a warning about not explicitly
# specifying a parser, but we'll ignore it.
# You can pass in a string.
BeautifulSoup("", features="html")
# Or a list of strings.
BeautifulSoup("", features=["html", "fast"])
# You'll get an exception if BS can't find an appropriate
# builder.
self.assertRaises(ValueError, BeautifulSoup,
"", features="no-such-feature")
class RegistryTest(unittest.TestCase):
"""Test the TreeBuilderRegistry class in general."""
def setUp(self):
self.registry = TreeBuilderRegistry()
def builder_for_features(self, *feature_list):
cls = type('Builder_' + '_'.join(feature_list),
(object,), {'features' : feature_list})
self.registry.register(cls)
return cls
def test_register_with_no_features(self):
builder = self.builder_for_features()
# Since the builder advertises no features, you can't find it
# by looking up features.
self.assertEqual(self.registry.lookup('foo'), None)
# But you can find it by doing a lookup with no features, if
# this happens to be the only registered builder.
self.assertEqual(self.registry.lookup(), builder)
def test_register_with_features_makes_lookup_succeed(self):
builder = self.builder_for_features('foo', 'bar')
self.assertEqual(self.registry.lookup('foo'), builder)
self.assertEqual(self.registry.lookup('bar'), builder)
def test_lookup_fails_when_no_builder_implements_feature(self):
builder = self.builder_for_features('foo', 'bar')
self.assertEqual(self.registry.lookup('baz'), None)
def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
builder1 = self.builder_for_features('foo')
builder2 = self.builder_for_features('bar')
self.assertEqual(self.registry.lookup(), builder2)
def test_lookup_fails_when_no_tree_builders_registered(self):
self.assertEqual(self.registry.lookup(), None)
def test_lookup_gets_most_recent_builder_supporting_all_features(self):
has_one = self.builder_for_features('foo')
has_the_other = self.builder_for_features('bar')
has_both_early = self.builder_for_features('foo', 'bar', 'baz')
has_both_late = self.builder_for_features('foo', 'bar', 'quux')
lacks_one = self.builder_for_features('bar')
has_the_other = self.builder_for_features('foo')
# There are two builders featuring 'foo' and 'bar', but
# the one that also features 'quux' was registered later.
self.assertEqual(self.registry.lookup('foo', 'bar'),
has_both_late)
# There is only one builder featuring 'foo', 'bar', and 'baz'.
self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
has_both_early)
def test_lookup_fails_when_cannot_reconcile_requested_features(self):
builder1 = self.builder_for_features('foo', 'bar')
builder2 = self.builder_for_features('foo', 'baz')
self.assertEqual(self.registry.lookup('bar', 'baz'), None)
@@ -1,36 +0,0 @@
"Test harness for doctests."
# pylint: disable-msg=E0611,W0142
__metaclass__ = type
__all__ = [
'additional_tests',
]
import atexit
import doctest
import os
#from pkg_resources import (
# resource_filename, resource_exists, resource_listdir, cleanup_resources)
import unittest
DOCTEST_FLAGS = (
doctest.ELLIPSIS |
doctest.NORMALIZE_WHITESPACE |
doctest.REPORT_NDIFF)
# def additional_tests():
# "Run the doc tests (README.txt and docs/*, if any exist)"
# doctest_files = [
# os.path.abspath(resource_filename('bs4', 'README.txt'))]
# if resource_exists('bs4', 'docs'):
# for name in resource_listdir('bs4', 'docs'):
# if name.endswith('.txt'):
# doctest_files.append(
# os.path.abspath(
# resource_filename('bs4', 'docs/%s' % name)))
# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
# atexit.register(cleanup_resources)
# return unittest.TestSuite((
# doctest.DocFileSuite(*doctest_files, **kwargs)))
@@ -1,130 +0,0 @@
"""Tests to ensure that the html5lib tree builder generates good trees."""
import warnings
try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError, e:
HTML5LIB_PRESENT = False
from bs4.element import SoupStrainer
from bs4.testing import (
HTML5TreeBuilderSmokeTest,
SoupTest,
skipIf,
)
@skipIf(
not HTML5LIB_PRESENT,
"html5lib seems not to be present, not testing its tree builder.")
class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
"""See ``HTML5TreeBuilderSmokeTest``."""
@property
def default_builder(self):
return HTML5TreeBuilder()
def test_soupstrainer(self):
# The html5lib tree builder does not support SoupStrainers.
strainer = SoupStrainer("b")
markup = "<p>A <b>bold</b> statement.</p>"
with warnings.catch_warnings(record=True) as w:
soup = self.soup(markup, parse_only=strainer)
self.assertEqual(
soup.decode(), self.document_for(markup))
self.assertTrue(
"the html5lib tree builder doesn't support parse_only" in
str(w[0].message))
def test_correctly_nested_tables(self):
"""html5lib inserts <tbody> tags where other parsers don't."""
markup = ('<table id="1">'
'<tr>'
"<td>Here's another table:"
'<table id="2">'
'<tr><td>foo</td></tr>'
'</table></td>')
self.assertSoupEquals(
markup,
'<table id="1"><tbody><tr><td>Here\'s another table:'
'<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
'</td></tr></tbody></table>')
self.assertSoupEquals(
"<table><thead><tr><td>Foo</td></tr></thead>"
"<tbody><tr><td>Bar</td></tr></tbody>"
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
def test_xml_declaration_followed_by_doctype(self):
markup = '''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<p>foo</p>
</body>
</html>'''
soup = self.soup(markup)
# Verify that we can reach the <p> tag; this means the tree is connected.
self.assertEqual(b"<p>foo</p>", soup.p.encode())
def test_reparented_markup(self):
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
soup = self.soup(markup)
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
def test_reparented_markup_ends_with_whitespace(self):
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
soup = self.soup(markup)
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
def test_reparented_markup_containing_identical_whitespace_nodes(self):
"""Verify that we keep the two whitespace nodes in this
document distinct when reparenting the adjacent <tbody> tags.
"""
markup = '<table> <tbody><tbody><ims></tbody> </table>'
soup = self.soup(markup)
space1, space2 = soup.find_all(string=' ')
tbody1, tbody2 = soup.find_all('tbody')
assert space1.next_element is tbody1
assert tbody2.next_element is space2
def test_reparented_markup_containing_children(self):
markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
soup = self.soup(markup)
noscript = soup.noscript
self.assertEqual("target", noscript.next_element)
target = soup.find(string='target')
# The 'aftermath' string was duplicated; we want the second one.
final_aftermath = soup.find_all(string='aftermath')[-1]
# The <noscript> tag was moved beneath a copy of the <a> tag,
# but the 'target' string within is still connected to the
# (second) 'aftermath' string.
self.assertEqual(final_aftermath, target.next_element)
self.assertEqual(target, final_aftermath.previous_element)
def test_processing_instruction(self):
"""Processing instructions become comments."""
markup = b"""<?PITarget PIContent?>"""
soup = self.soup(markup)
assert str(soup).startswith("<!--?PITarget PIContent?-->")
def test_cloned_multivalue_node(self):
markup = b"""<a class="my_class"><p></a>"""
soup = self.soup(markup)
a1, a2 = soup.find_all('a')
self.assertEqual(a1, a2)
assert a1 is not a2
def test_foster_parenting(self):
markup = b"""<table><td></tbody>A"""
soup = self.soup(markup)
self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
@@ -1,34 +0,0 @@
"""Tests to ensure that the html.parser tree builder generates good
trees."""
from pdb import set_trace
import pickle
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
from bs4.builder import HTMLParserTreeBuilder
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
@property
def default_builder(self):
return HTMLParserTreeBuilder()
def test_namespaced_system_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one.
pass
def test_namespaced_public_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one.
pass
def test_builder_is_pickled(self):
"""Unlike most tree builders, HTMLParserTreeBuilder and will
be restored after pickling.
"""
tree = self.soup("<a><b>foo</a>")
dumped = pickle.dumps(tree, 2)
loaded = pickle.loads(dumped)
self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
def test_redundant_empty_element_closing_tags(self):
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
self.assertSoupEquals('</br></br></br>', "")
@@ -1,76 +0,0 @@
"""Tests to ensure that the lxml tree builder generates good trees."""
import re
import warnings
try:
import lxml.etree
LXML_PRESENT = True
LXML_VERSION = lxml.etree.LXML_VERSION
except ImportError, e:
LXML_PRESENT = False
LXML_VERSION = (0,)
if LXML_PRESENT:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
from bs4 import (
BeautifulSoup,
BeautifulStoneSoup,
)
from bs4.element import Comment, Doctype, SoupStrainer
from bs4.testing import skipIf
from bs4.tests import test_htmlparser
from bs4.testing import (
HTMLTreeBuilderSmokeTest,
XMLTreeBuilderSmokeTest,
SoupTest,
skipIf,
)
@skipIf(
not LXML_PRESENT,
"lxml seems not to be present, not testing its tree builder.")
class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
"""See ``HTMLTreeBuilderSmokeTest``."""
@property
def default_builder(self):
return LXMLTreeBuilder()
def test_out_of_range_entity(self):
self.assertSoupEquals(
"<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
self.assertSoupEquals(
"<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
self.assertSoupEquals(
"<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
# test if an old version of lxml is installed.
@skipIf(
not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
"Skipping doctype test for old version of lxml to avoid segfault.")
def test_empty_doctype(self):
soup = self.soup("<!DOCTYPE>")
doctype = soup.contents[0]
self.assertEqual("", doctype.strip())
def test_beautifulstonesoup_is_xml_parser(self):
# Make sure that the deprecated BSS class uses an xml builder
# if one is installed.
with warnings.catch_warnings(record=True) as w:
soup = BeautifulStoneSoup("<b />")
self.assertEqual(u"<b/>", unicode(soup.b))
self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
@skipIf(
not LXML_PRESENT,
"lxml seems not to be present, not testing its XML tree builder.")
class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
"""See ``HTMLTreeBuilderSmokeTest``."""
@property
def default_builder(self):
return LXMLTreeBuilderForXML()
@@ -1,501 +0,0 @@
# -*- coding: utf-8 -*-
"""Tests of Beautiful Soup as a whole."""
from pdb import set_trace
import logging
import unittest
import sys
import tempfile
from bs4 import (
BeautifulSoup,
BeautifulStoneSoup,
)
from bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
SoupStrainer,
NamespacedAttribute,
)
import bs4.dammit
from bs4.dammit import (
EntitySubstitution,
UnicodeDammit,
EncodingDetector,
)
from bs4.testing import (
SoupTest,
skipIf,
)
import warnings
try:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
LXML_PRESENT = True
except ImportError, e:
LXML_PRESENT = False
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
class TestConstructor(SoupTest):
def test_short_unicode_input(self):
data = u"<h1>éé</h1>"
soup = self.soup(data)
self.assertEqual(u"éé", soup.h1.string)
def test_embedded_null(self):
data = u"<h1>foo\0bar</h1>"
soup = self.soup(data)
self.assertEqual(u"foo\0bar", soup.h1.string)
def test_exclude_encodings(self):
utf8_data = u"Räksmörgås".encode("utf-8")
soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
self.assertEqual("windows-1252", soup.original_encoding)
class TestWarnings(SoupTest):
def _no_parser_specified(self, s, is_there=True):
v = s.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:80])
self.assertTrue(v)
def test_warning_if_no_parser_specified(self):
with warnings.catch_warnings(record=True) as w:
soup = self.soup("<a><b></b></a>")
msg = str(w[0].message)
self._assert_no_parser_specified(msg)
def test_warning_if_parser_specified_too_vague(self):
with warnings.catch_warnings(record=True) as w:
soup = self.soup("<a><b></b></a>", "html")
msg = str(w[0].message)
self._assert_no_parser_specified(msg)
def test_no_warning_if_explicit_parser_specified(self):
with warnings.catch_warnings(record=True) as w:
soup = self.soup("<a><b></b></a>", "html.parser")
self.assertEqual([], w)
def test_parseOnlyThese_renamed_to_parse_only(self):
with warnings.catch_warnings(record=True) as w:
soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
msg = str(w[0].message)
self.assertTrue("parseOnlyThese" in msg)
self.assertTrue("parse_only" in msg)
self.assertEqual(b"<b></b>", soup.encode())
def test_fromEncoding_renamed_to_from_encoding(self):
with warnings.catch_warnings(record=True) as w:
utf8 = b"\xc3\xa9"
soup = self.soup(utf8, fromEncoding="utf8")
msg = str(w[0].message)
self.assertTrue("fromEncoding" in msg)
self.assertTrue("from_encoding" in msg)
self.assertEqual("utf8", soup.original_encoding)
def test_unrecognized_keyword_argument(self):
self.assertRaises(
TypeError, self.soup, "<a>", no_such_argument=True)
class TestWarnings(SoupTest):
def test_disk_file_warning(self):
filehandle = tempfile.NamedTemporaryFile()
filename = filehandle.name
try:
with warnings.catch_warnings(record=True) as w:
soup = self.soup(filename)
msg = str(w[0].message)
self.assertTrue("looks like a filename" in msg)
finally:
filehandle.close()
# The file no longer exists, so Beautiful Soup will no longer issue the warning.
with warnings.catch_warnings(record=True) as w:
soup = self.soup(filename)
self.assertEqual(0, len(w))
def test_url_warning_with_bytes_url(self):
with warnings.catch_warnings(record=True) as warning_list:
soup = self.soup(b"http://www.crummybytes.com/")
# Be aware this isn't the only warning that can be raised during
# execution..
self.assertTrue(any("looks like a URL" in str(w.message)
for w in warning_list))
def test_url_warning_with_unicode_url(self):
with warnings.catch_warnings(record=True) as warning_list:
# note - this url must differ from the bytes one otherwise
# python's warnings system swallows the second warning
soup = self.soup(u"http://www.crummyunicode.com/")
self.assertTrue(any("looks like a URL" in str(w.message)
for w in warning_list))
def test_url_warning_with_bytes_and_space(self):
with warnings.catch_warnings(record=True) as warning_list:
soup = self.soup(b"http://www.crummybytes.com/ is great")
self.assertFalse(any("looks like a URL" in str(w.message)
for w in warning_list))
def test_url_warning_with_unicode_and_space(self):
with warnings.catch_warnings(record=True) as warning_list:
soup = self.soup(u"http://www.crummyuncode.com/ is great")
self.assertFalse(any("looks like a URL" in str(w.message)
for w in warning_list))
class TestSelectiveParsing(SoupTest):
def test_parse_with_soupstrainer(self):
markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
strainer = SoupStrainer("b")
soup = self.soup(markup, parse_only=strainer)
self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
class TestEntitySubstitution(unittest.TestCase):
"""Standalone tests of the EntitySubstitution class."""
def setUp(self):
self.sub = EntitySubstitution
def test_simple_html_substitution(self):
# Unicode characters corresponding to named HTML entites
# are substituted, and no others.
s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
self.assertEqual(self.sub.substitute_html(s),
u"foo&forall;\N{SNOWMAN}&otilde;bar")
def test_smart_quote_substitution(self):
# MS smart quotes are a common source of frustration, so we
# give them a special test.
quotes = b"\x91\x92foo\x93\x94"
dammit = UnicodeDammit(quotes)
self.assertEqual(self.sub.substitute_html(dammit.markup),
"&lsquo;&rsquo;foo&ldquo;&rdquo;")
def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
s = 'Welcome to "my bar"'
self.assertEqual(self.sub.substitute_xml(s, False), s)
def test_xml_attribute_quoting_normally_uses_double_quotes(self):
self.assertEqual(self.sub.substitute_xml("Welcome", True),
'"Welcome"')
self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
'"Bob\'s Bar"')
def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
s = 'Welcome to "my bar"'
self.assertEqual(self.sub.substitute_xml(s, True),
"'Welcome to \"my bar\"'")
def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
s = 'Welcome to "Bob\'s Bar"'
self.assertEqual(
self.sub.substitute_xml(s, True),
'"Welcome to &quot;Bob\'s Bar&quot;"')
def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
quoted = 'Welcome to "Bob\'s Bar"'
self.assertEqual(self.sub.substitute_xml(quoted), quoted)
def test_xml_quoting_handles_angle_brackets(self):
self.assertEqual(
self.sub.substitute_xml("foo<bar>"),
"foo&lt;bar&gt;")
def test_xml_quoting_handles_ampersands(self):
self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&amp;T")
def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self):
self.assertEqual(
self.sub.substitute_xml("&Aacute;T&T"),
"&amp;Aacute;T&amp;T")
def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self):
self.assertEqual(
self.sub.substitute_xml_containing_entities("&Aacute;T&T"),
"&Aacute;T&amp;T")
def test_quotes_not_html_substituted(self):
"""There's no need to do this except inside attribute values."""
text = 'Bob\'s "bar"'
self.assertEqual(self.sub.substitute_html(text), text)
class TestEncodingConversion(SoupTest):
# Test Beautiful Soup's ability to decode and encode from various
# encodings.
def setUp(self):
super(TestEncodingConversion, self).setUp()
self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
self.utf8_data = self.unicode_data.encode("utf-8")
# Just so you know what it looks like.
self.assertEqual(
self.utf8_data,
b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>')
def test_ascii_in_unicode_out(self):
# ASCII input is converted to Unicode. The original_encoding
# attribute is set to 'utf-8', a superset of ASCII.
chardet = bs4.dammit.chardet_dammit
logging.disable(logging.WARNING)
try:
def noop(str):
return None
# Disable chardet, which will realize that the ASCII is ASCII.
bs4.dammit.chardet_dammit = noop
ascii = b"<foo>a</foo>"
soup_from_ascii = self.soup(ascii)
unicode_output = soup_from_ascii.decode()
self.assertTrue(isinstance(unicode_output, unicode))
self.assertEqual(unicode_output, self.document_for(ascii.decode()))
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
finally:
logging.disable(logging.NOTSET)
bs4.dammit.chardet_dammit = chardet
def test_unicode_in_unicode_out(self):
# Unicode input is left alone. The original_encoding attribute
# is not set.
soup_from_unicode = self.soup(self.unicode_data)
self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
self.assertEqual(soup_from_unicode.original_encoding, None)
def test_utf8_in_unicode_out(self):
# UTF-8 input is converted to Unicode. The original_encoding
# attribute is set.
soup_from_utf8 = self.soup(self.utf8_data)
self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
def test_utf8_out(self):
# The internal data structures can be encoded as UTF-8.
soup_from_unicode = self.soup(self.unicode_data)
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
@skipIf(
PYTHON_3_PRE_3_2,
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
def test_attribute_name_containing_unicode_characters(self):
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
class TestUnicodeDammit(unittest.TestCase):
"""Standalone tests of UnicodeDammit."""
def test_unicode_input(self):
markup = u"I'm already Unicode! \N{SNOWMAN}"
dammit = UnicodeDammit(markup)
self.assertEqual(dammit.unicode_markup, markup)
def test_smart_quotes_to_unicode(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup)
self.assertEqual(
dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
def test_smart_quotes_to_xml_entities(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="xml")
self.assertEqual(
dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
def test_smart_quotes_to_html_entities(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="html")
self.assertEqual(
dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
def test_smart_quotes_to_ascii(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
self.assertEqual(
dammit.unicode_markup, """<foo>''""</foo>""")
def test_detect_utf8(self):
utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
dammit = UnicodeDammit(utf8)
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
self.assertEqual(dammit.unicode_markup, u'Sacr\xe9 bleu! \N{SNOWMAN}')
def test_convert_hebrew(self):
hebrew = b"\xed\xe5\xec\xf9"
dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
def test_dont_see_smart_quotes_where_there_are_none(self):
utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
dammit = UnicodeDammit(utf_8)
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
def test_ignore_inappropriate_codecs(self):
utf8_data = u"Räksmörgås".encode("utf-8")
dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_ignore_invalid_codecs(self):
utf8_data = u"Räksmörgås".encode("utf-8")
for bad_encoding in ['.utf8', '...', 'utF---16.!']:
dammit = UnicodeDammit(utf8_data, [bad_encoding])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_exclude_encodings(self):
# This is UTF-8.
utf8_data = u"Räksmörgås".encode("utf-8")
# But if we exclude UTF-8 from consideration, the guess is
# Windows-1252.
dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"])
self.assertEqual(dammit.original_encoding.lower(), 'windows-1252')
# And if we exclude that, there is no valid guess at all.
dammit = UnicodeDammit(
utf8_data, exclude_encodings=["utf-8", "windows-1252"])
self.assertEqual(dammit.original_encoding, None)
def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self):
detected = EncodingDetector(
b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
encodings = list(detected.encodings)
assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings
def test_detect_html5_style_meta_tag(self):
for data in (
b'<html><meta charset="euc-jp" /></html>',
b"<html><meta charset='euc-jp' /></html>",
b"<html><meta charset=euc-jp /></html>",
b"<html><meta charset=euc-jp/></html>"):
dammit = UnicodeDammit(data, is_html=True)
self.assertEqual(
"euc-jp", dammit.original_encoding)
def test_last_ditch_entity_replacement(self):
# This is a UTF-8 document that contains bytestrings
# completely incompatible with UTF-8 (ie. encoded with some other
# encoding).
#
# Since there is no consistent encoding for the document,
# Unicode, Dammit will eventually encode the document as UTF-8
# and encode the incompatible characters as REPLACEMENT
# CHARACTER.
#
# If chardet is installed, it will detect that the document
# can be converted into ISO-8859-1 without errors. This happens
# to be the wrong encoding, but it is a consistent encoding, so the
# code we're testing here won't run.
#
# So we temporarily disable chardet if it's present.
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
<html><b>\330\250\330\252\330\261</b>
<i>\310\322\321\220\312\321\355\344</i></html>"""
chardet = bs4.dammit.chardet_dammit
logging.disable(logging.WARNING)
try:
def noop(str):
return None
bs4.dammit.chardet_dammit = noop
dammit = UnicodeDammit(doc)
self.assertEqual(True, dammit.contains_replacement_characters)
self.assertTrue(u"\ufffd" in dammit.unicode_markup)
soup = BeautifulSoup(doc, "html.parser")
self.assertTrue(soup.contains_replacement_characters)
finally:
logging.disable(logging.NOTSET)
bs4.dammit.chardet_dammit = chardet
def test_byte_order_mark_removed(self):
# A document written in UTF-16LE will have its byte order marker stripped.
data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
dammit = UnicodeDammit(data)
self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
self.assertEqual("utf-16le", dammit.original_encoding)
def test_detwingle(self):
# Here's a UTF8 document.
utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
# Here's a Windows-1252 document.
windows_1252 = (
u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
# Through some unholy alchemy, they've been stuck together.
doc = utf8 + windows_1252 + utf8
# The document can't be turned into UTF-8:
self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
# Unicode, Dammit thinks the whole document is Windows-1252,
# and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃"
# But if we run it through fix_embedded_windows_1252, it's fixed:
fixed = UnicodeDammit.detwingle(doc)
self.assertEqual(
u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
def test_detwingle_ignores_multibyte_characters(self):
# Each of these characters has a UTF-8 representation ending
# in \x93. \x93 is a smart quote if interpreted as
# Windows-1252. But our code knows to skip over multibyte
# UTF-8 characters, so they'll survive the process unscathed.
for tricky_unicode_char in (
u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
):
input = tricky_unicode_char.encode("utf8")
self.assertTrue(input.endswith(b'\x93'))
output = UnicodeDammit.detwingle(input)
self.assertEqual(output, input)
class TestNamedspacedAttribute(SoupTest):
def test_name_may_be_none(self):
a = NamespacedAttribute("xmlns", None)
self.assertEqual(a, "xmlns")
def test_attribute_is_equivalent_to_colon_separated_string(self):
a = NamespacedAttribute("a", "b")
self.assertEqual("a:b", a)
def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
a = NamespacedAttribute("a", "b", "c")
b = NamespacedAttribute("a", "b", "c")
self.assertEqual(a, b)
# The actual namespace is not considered.
c = NamespacedAttribute("a", "b", None)
self.assertEqual(a, c)
# But name and prefix are important.
d = NamespacedAttribute("a", "z", "c")
self.assertNotEqual(a, d)
e = NamespacedAttribute("z", "b", "c")
self.assertNotEqual(a, e)
class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
def test_content_meta_attribute_value(self):
value = CharsetMetaAttributeValue("euc-jp")
self.assertEqual("euc-jp", value)
self.assertEqual("euc-jp", value.original_value)
self.assertEqual("utf8", value.encode("utf8"))
def test_content_meta_attribute_value(self):
value = ContentMetaAttributeValue("text/html; charset=euc-jp")
self.assertEqual("text/html; charset=euc-jp", value)
self.assertEqual("text/html; charset=euc-jp", value.original_value)
self.assertEqual("text/html; charset=utf8", value.encode("utf8"))
File diff suppressed because it is too large Load Diff
@@ -1,3 +0,0 @@
from .core import where
__version__ = "2019.03.09"

Some files were not shown because too many files have changed in this diff Show More