release 1.3.33.522

Merge branch '#151_permission_check_windows' into 1.3-bugfixes
Merge branch '#149_treat_one_as_found' into 1.3-bugfixes
2016-05-05 04:46:49 +02:00 · 2016-05-05 04:18:40 +02:00 · 2016-05-05 04:18:34 +02:00 · 2016-05-05 04:18:27 +02:00 · 2016-05-05 04:09:48 +02:00 · 2016-05-05 03:41:27 +02:00
74 changed files with 3923 additions and 445 deletions
@@ -1,3 +1,63 @@
+1.3.31.513
+
+- core: add option to only download one language again (and skip the addition of .lang to the subtitle filename) (default: off); fixes #126 
+- core: add option to always encode saved subtitles to UTF-8 (default: on); fixes #128
+- core: add fallback encoding detection using bs4.UnicodeDammit; hopefully fixes #101
+- core: update libraries: chardet, beautifulsoup, six
+- menu/core: check Plex libraries for permission problems on plugin start and report them in the channel menu (option, default: on); fixes #143
+- menu: while a manual refresh takes place, add a refresh button to the top of the SZ menu for convenience
+- menu: move the "add/remove X to ignore list" menu item to the bottom of the list on item detail 
+
+
+1.3.27.491
+
+- menu/core: make Sub-Zero channel menu optional (setting: "Enable Sub-Zero channel (disabling doesn't affect the subtitle features)?")
+- OpenSubtitles: detect and match video/subtitle FPS (framerate) to reduce out of sync subtitle matches
+- core: internal fixes; add _markerlib library (rare)
+- core: don't score tvshow episode title matches, should improve episode subtitle matches quite a bit (and reduce out of sync subtitles)
+- OpenSubtitles: make tag/exact filename matches optional (setting: "I keep the exact (release-) filename of my media files")
+- menu: unicode video title errors fixed
+- TVSubtitles: correctly match certain show IDs (such as "Series Name (US)")
+- core: don't break subtitle evaluation on crashed guessing
+
+
+1.3.23.459
+
+- core: slight code cleanup and fixes
+- core: add physical (filesystem) ignore mode (create files named `subzero.ignore`, `.subzero.ignore`, `.nosz` to ignore specific files/seasons/series/libraries)
+- core: fix guessit hinting of tv series with rare folder layout (e.g. series_name/a/S01E01.mkv)
+- core: remove "format" necessity from (opensubtitles) hash-validation
+- OpenSubtitles: dramatically improve matching: add tag (exact filename) matching and treat it just like hash matches
+- core: ignore embedded forced subtitles (fixes #106)
+- docs: update
+- settings: clarify
+
+
+1.3.20.422  
+- tvsubtitles: show matching was partially broken
+- addic7ed: better show matching
+- core: correctly skip subtitles stored in filesystem if metadata storage was selected (Local Media Assets agent may still pick them up)  
+- core: fix local API access (switch from HTTPS to HTTP)
+- core: fix handling of library names and media paths with non-ascii chars in it  
+- core: fix bundle version to correctly display current bundle version
+- core: skip downloading multi-CD subtitle
+- settings: clarify
+
+
+1.3.20.403
+- core: handle & and - ("and" and dash) in names
+- core: fixed handling of internal metadata subtitles
+- re-upped the minimum tv score to 85 (may be even higher in the future)
+- opensubtitles: possibly significantly better movie matching (now also query for movie title, instead of only querying for video hash)
+
+
+1.3.20.396
+- core: fix logging handlers (when saving log_level settings loggers got duplicated)
+- core: better movie matching by only hinting the filename and the last subdirectory to guessit (instead of the full path)
+- core: don't fail on wrong detection/scanning of media file
+- lower minimum tv series score from 85 to 67 (removed title; composed of: series=44 + season=11 + episode=11 + hearing_impaired=1)
+
+
 1.3.19.379
 - core: new recent items implementation (used in "Items with missing subtitles"), now really picking up everything instead of using Plex's recently_added API endpoint
 - core: be more strict about title matching - a matched title doesn't automatically mean season and episode are correct, too
@@ -14,21 +14,24 @@ for key, value in getattr(module, "__builtins__").iteritems():
        globals()[key] = value

 import logger
+
 sys.modules["logger"] = logger

+from subzero import intent
 import subliminal
 import subliminal_patch
 import support

 import interface
+
 sys.modules["interface"] = interface

 from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
 from subzero import intent
 from interface.menu import *
-from support.subtitlehelpers import getSubtitlesFromMetadata
+from support.subtitlehelpers import getSubtitlesFromMetadata, force_utf8
 from support.storage import storeSubtitleInfo
-from support.config import config
+from support.config import config, IGNORE_FN


 def Start():
@@ -46,6 +49,12 @@ def Start():
        Log.Error(lib_unaccessible_error)
        return

+    if not config.permissions_ok:
+        Log.Error("Insufficient permissions on library folders:")
+        for title, path in config.missing_permissions:
+            Log.Error("Insufficient permissions on library %s, folder: %s" % (title, path))
+        return
+
    scheduler.run()


@@ -57,36 +66,91 @@ def initSubliminalPatches():
    subliminal_patch.patch_providers.addic7ed.USE_BOOST = bool(Prefs['provider.addic7ed.boost'])


-def scanTvMedia(media):
-    videos = {}
-    for season in media.seasons:
-        for episode in media.seasons[season].episodes:
-            ep = media.seasons[season].episodes[episode]
-            force_refresh = intent.get("force", ep.id)
-            for item in media.seasons[season].episodes[episode].items:
-                for part in item.parts:
-                    scanned_video = scanVideo(part, ignore_all=force_refresh,
-                                              hints={"type": "episode", "expected_series": [media.title], "expected_title": [ep.title]})
-                    if not scanned_video:
-                        continue
-
-                    scanned_video.id = media.seasons[season].episodes[episode].id
-                    videos[scanned_video] = part
-    return videos
+def flattenToParts(media, kind="series"):
+    """
+    iterates through media and returns the associated parts (videos)
+    :param media:
+    :param kind:
+    :return:
+    """
+    parts = []
+    if kind == "series":
+        for season in media.seasons:
+            for episode in media.seasons[season].episodes:
+                ep = media.seasons[season].episodes[episode]
+                for item in media.seasons[season].episodes[episode].items:
+                    for part in item.parts:
+                        parts.append({"video": part, "type": "episode", "title": ep.title, "series": media.title, "id": ep.id})
+    else:
+        for item in media.items:
+            for part in item.parts:
+                parts.append({"video": part, "type": "movie", "title": media.title, "id": media.id})
+    return parts


-def scanMovieMedia(media):
-    videos = {}
-    force_refresh = intent.get("force", media.id)
-    for item in media.items:
-        for part in item.parts:
-            scanned_video = scanVideo(part, ignore_all=force_refresh, hints={"type": "movie", "expected_title": [media.title]})
-            if not scanned_video:
-                continue
+def parseMediaToParts(media, kind="series"):
+    """
+    returns a list of parts to be used later on; ignores folders with an existing "subzero.ignore" file
+    :param media:
+    :param kind:
+    :return:
+    """
+    parts = flattenToParts(media, kind=kind)
+    if not Prefs["subtitles.ignore_fs"]:
+        return parts

-            scanned_video.id = media.id
-            videos[scanned_video] = part
-    return videos
+    use_parts = []
+    check_ignore_paths = [".", "../"]
+    if kind == "series":
+        check_ignore_paths.append("../../")
+
+    for part in parts:
+        base_folder, fn = os.path.split(part["video"].file)
+
+        ignore = False
+        for rel_path in check_ignore_paths:
+            fld = os.path.abspath(os.path.join(base_folder, rel_path))
+            for ifn in IGNORE_FN:
+                if os.path.isfile(os.path.join(fld, ifn)):
+                    Log.Info(u'Ignoring "%s" because "%s" exists in "%s"', fn, ifn, fld)
+                    ignore = True
+                    break
+            if ignore:
+                break
+
+        if not ignore:
+            use_parts.append(part)
+    return use_parts
+
+
+def getFPS(streams):
+    for stream in streams:
+        # video
+        if stream.type == 1:
+            return stream.frameRate
+    return "25.000"
+
+
+def scanParts(parts, kind="series"):
+    """
+    receives a list of parts containing dictionaries returned by flattenToParts
+    :param parts:
+    :param kind: series or movies
+    :return: dictionary of scanned videos of subliminal.video.scan_video
+    """
+    ret = {}
+    for part in parts:
+        force_refresh = intent.get("force", part["id"])
+        hints = {"expected_title": [part["title"]]}
+        hints.update({"type": "episode", "expected_series": [part["series"]]} if kind == "series" else {"type": "movie"})
+        part["video"].fps = getFPS(part["video"].streams)
+        scanned_video = scanVideo(part["video"], ignore_all=force_refresh, hints=hints)
+        if not scanned_video:
+            continue
+
+        scanned_video.id = part["id"]
+        ret[scanned_video] = part["video"]
+    return ret


 def getItemIDs(media, kind="series"):
@@ -111,7 +175,8 @@ def scanVideo(part, ignore_all=False, hints=None):
    Log.Debug("Scanning video: %s, subtitles=%s, embedded_subtitles=%s" % (part.file, external_subtitles, embedded_subtitles))

    try:
-        return subliminal.video.scan_video(part.file, subtitles=external_subtitles, embedded_subtitles=embedded_subtitles, hints=hints or {})
+        return subliminal.video.scan_video(part.file, subtitles=external_subtitles, embedded_subtitles=embedded_subtitles, hints=hints or {},
+                                           video_fps=part.fps)

    except ValueError:
        Log.Warn("File could not be guessed by subliminal")
@@ -133,8 +198,16 @@ def downloadBestSubtitles(video_part_map, min_score=0):
                    video.subtitle_languages.add(language)
                    Log.Debug("Found metadata subtitle %s for %s", language, video)

-        if not (languages - video.subtitle_languages):
-            Log.Debug('All languages %r exist for %s', languages, video)
+        missing_subs = (languages - video.subtitle_languages)
+
+        # all languages are found if we either really have subs for all languages or we only want to have exactly one language
+        # and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
+        found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
+        if not missing_subs or found_one_which_is_enough:
+            if found_one_which_is_enough:
+                Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
+            else:
+                Log.Debug('All languages %r exist for %s', languages, video)
            continue
        missing_languages = True
        break
@@ -181,14 +254,16 @@ def saveSubtitlesToFile(subtitles):
                fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
            if not os.path.exists(fld):
                os.makedirs(fld)
-        subliminal.api.save_subtitles(video, video_subtitles, directory=fld)
+        subliminal.api.save_subtitles(video, video_subtitles, directory=fld, single=Prefs['subtitles.only_one'],
+                                      encode_with=force_utf8 if Prefs['subtitles.enforce_encoding'] else None)


 def saveSubtitlesToMetadata(videos, subtitles):
    for video, video_subtitles in subtitles.items():
        mediaPart = videos[video]
        for subtitle in video_subtitles:
-            mediaPart.subtitles[Locale.Language.Match(subtitle.language.alpha2)][subtitle.page_link] = Proxy.Media(subtitle.content, ext="srt")
+            content = force_utf8(subtitle.text) if Prefs['subtitles.enforce_encoding'] else subtitle.content
+            mediaPart.subtitles[Locale.Language.Match(subtitle.language.alpha2)][subtitle.page_link] = Proxy.Media(content, ext="srt")


 def updateLocalMedia(metadata, media, media_type="movies"):
@@ -216,13 +291,15 @@ def updateLocalMedia(metadata, media, media_type="movies"):

 class SubZeroAgent(object):
    agent_type = None
+    agent_type_verbose = None
    languages = [Locale.Language.English]
    primary_provider = False
+    score_prefs_key = None

    def __init__(self, *args, **kwargs):
        super(SubZeroAgent, self).__init__(*args, **kwargs)
        self.agent_type = "movies" if isinstance(self, Agent.Movies) else "series"
-        self.name = "Sub-Zero Subtitles (%s, %s)" % ("Movies" if self.agent_type == "movies" else "TV", config.getVersion())
+        self.name = "Sub-Zero Subtitles (%s, %s)" % (self.agent_type_verbose, config.getVersion())

    def search(self, results, media, lang):
        Log.Debug("Sub-Zero %s, %s search" % (config.version, self.agent_type))
@@ -231,16 +308,23 @@ class SubZeroAgent(object):
    def update(self, metadata, media, lang):
        Log.Debug("Sub-Zero %s, %s update called" % (config.version, self.agent_type))

+        if not media:
+            Log.Error("Called with empty media, something is really wrong with your setup!")
+            return
+
        set_refresh_menu_state(media, media_type=self.agent_type)

        item_ids = []
        try:
            initSubliminalPatches()
-            videos, subtitles = getattr(self, "update_%s" % self.agent_type)(metadata, media, lang)
+            parts = parseMediaToParts(media, kind=self.agent_type)
+            use_score = Prefs[self.score_prefs_key]
+            scanned_parts = scanParts(parts, kind=self.agent_type)
+            subtitles = downloadBestSubtitles(scanned_parts, min_score=int(use_score))
            item_ids = getItemIDs(media, kind=self.agent_type)

            if subtitles:
-                saveSubtitles(videos, subtitles)
+                saveSubtitles(scanned_parts, subtitles)

            updateLocalMedia(metadata, media, media_type=self.agent_type)

@@ -254,21 +338,17 @@ class SubZeroAgent(object):

                # resolve existing intent for that id
                intent.resolve("force", item_id)
-
-    def update_movies(self, metadata, media, lang):
-        videos = scanMovieMedia(media)
-        subtitles = downloadBestSubtitles(videos, min_score=int(Prefs["subtitles.search.minimumMovieScore"]))
-        return videos, subtitles
-
-    def update_series(self, metadata, media, lang):
-        videos = scanTvMedia(media)
-        subtitles = downloadBestSubtitles(videos, min_score=int(Prefs["subtitles.search.minimumTVScore"]))
-        return videos, subtitles
+            Dict.Save()


 class SubZeroSubtitlesAgentMovies(SubZeroAgent, Agent.Movies):
-    contributes_to = ['com.plexapp.agents.imdb', 'com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.themoviedb']
+    contributes_to = ['com.plexapp.agents.imdb', 'com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.themoviedb', 'com.plexapp.agents.hama']
+    score_prefs_key = "subtitles.search.minimumMovieScore"
+    agent_type_verbose = "Movies"


 class SubZeroSubtitlesAgentTvShows(SubZeroAgent, Agent.TV_Shows):
-    contributes_to = ['com.plexapp.agents.thetvdb', 'com.plexapp.agents.thetvdbdvdorder', 'com.plexapp.agents.xbmcnfotv']
+    contributes_to = ['com.plexapp.agents.thetvdb', 'com.plexapp.agents.thetvdbdvdorder', 'com.plexapp.agents.xbmcnfotv',
+                      'com.plexapp.agents.hama']
+    score_prefs_key = "subtitles.search.minimumTVScore"
+    agent_type_verbose = "TV"
@@ -1,23 +1,29 @@
 # coding=utf-8
 import logging
+
 import logger
+from menu_helpers import add_ignore_options, dig_tree, set_refresh_menu_state, should_display_ignore, enable_channel_wrapper
 from subzero.constants import TITLE, ART, ICON, PREFIX, PLUGIN_IDENTIFIER, DEPENDENCY_MODULE_NAMES
+from support.auth import refresh_plex_token
+from support.background import scheduler
 from support.config import config
 from support.helpers import pad_title, timestamp
-from support.auth import refresh_plex_token
 from support.ignore import ignore_list
+from support.items import getOnDeckItems, refreshItem, getAllItems
+from support.items import getRecentItems, get_items_info
+from support.lib import Plex, lib_unaccessible_error
 from support.missing_subtitles import getAllMissing
 from support.storage import resetStorage, logStorage
-from support.items import getRecentItems, MI_DEEPER, MI_KIND, get_items_info
-from support.items import getOnDeckItems, refreshItem, getAllItems
-from support.background import scheduler
-from support.lib import Plex, lib_unaccessible_error
-from menu_helpers import add_ignore_options, dig_tree, set_refresh_menu_state, should_display_ignore

 # init GUI
 ObjectContainer.art = R(ART)
 ObjectContainer.no_cache = True

+# noinspection PyUnboundLocalVariable
+route = enable_channel_wrapper(route)
+# noinspection PyUnboundLocalVariable
+handler = enable_channel_wrapper(handler)
+

@handler(PREFIX, TITLE, art=ART, thumb=ICON)
@route(PREFIX)
@@ -37,7 +43,26 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
        ))
        return oc

+    if not config.permissions_ok:
+        for title, path in config.missing_permissions:
+            oc.add(DirectoryObject(
+                key=Callback(fatality, randomize=timestamp()),
+                title=pad_title("Insufficient permissions"),
+                summary="Insufficient permissions on library %s, folder: %s" % (title, path)
+            ))
+        return oc
+
    if not only_refresh:
+        if Dict["current_refresh_state"]:
+            oc.add(DirectoryObject(
+                key=Callback(fatality, force_title=" ", randomize=timestamp()),
+                title=pad_title("Working ... refresh here"),
+                summary="Current state: %s; Last state: %s" % (
+                    (Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
+                    (Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
+                )
+            ))
+
        oc.add(DirectoryObject(
            key=Callback(OnDeckMenu),
            title=pad_title("On Deck items"),
@@ -195,6 +220,7 @@ def SectionMenu(rating_key, title=None, base_title=None, section_title=None, ign
    items = getAllItems(key="all", value=rating_key, base="library/sections")

    kind, deeper = get_items_info(items)
+    title = unicode(title)

    section_title = title
    title = base_title + " > " + title
@@ -258,11 +284,11 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
    if display_items:
        items = getAllItems(key="children", value=rating_key, base="library/metadata")
        kind, deeper = get_items_info(items)
+        dig_tree(oc, items, MetadataMenu,
+                 pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": kind, "previous_rating_key": rating_key})
        # we don't know exactly where we are here, only add ignore option to series
        if should_display_ignore(items, previous=previous_item_type):
            add_ignore_options(oc, "series", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
-        dig_tree(oc, items, MetadataMenu,
-                 pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": kind, "previous_rating_key": rating_key})
    else:
        return RefreshItemMenu(rating_key=rating_key, title=title, item_title=item_title)

@@ -283,17 +309,17 @@ def IgnoreListMenu():
 def RefreshItemMenu(rating_key, title=None, base_title=None, item_title=None, came_from="/recent"):
    title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
    oc = ObjectContainer(title2=title, replace_parent=True)
-    add_ignore_options(oc, "videos", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
    oc.add(DirectoryObject(
        key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title),
-        title="Refresh: %s" % item_title,
+        title=u"Refresh: %s" % item_title,
        summary="Refreshes the item, possibly picking up new subtitles on disk"
    ))
    oc.add(DirectoryObject(
        key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True),
-        title="Force-Refresh: %s" % item_title,
+        title=u"Force-Refresh: %s" % item_title,
        summary="Issues a forced refresh, ignoring known subtitles and searching for new ones"
    ))
+    add_ignore_options(oc, "videos", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)

    return oc

@@ -301,9 +327,9 @@ def RefreshItemMenu(rating_key, title=None, base_title=None, item_title=None, ca
@route(PREFIX + '/item/{rating_key}')
 def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=False):
    assert rating_key
-    set_refresh_menu_state("Triggering %sRefresh for %s" % ("Force-" if force else "", item_title))
+    set_refresh_menu_state(u"Triggering %sRefresh for %s" % ("Force-" if force else "", item_title))
    Thread.Create(refreshItem, rating_key=rating_key, force=force)
-    return fatality(randomize=timestamp(), header="%s of item %s triggered" % ("Refresh" if not force else "Forced-refresh", rating_key),
+    return fatality(randomize=timestamp(), header=u"%s of item %s triggered" % ("Refresh" if not force else "Forced-refresh", rating_key),
                    replace_parent=True)


@@ -353,10 +379,29 @@ def AdvancedMenu(randomize=None, header=None, message=None):
    return oc


-@route(PREFIX + '/ValidatePrefs')
+@route(PREFIX + '/ValidatePrefs', enforce_route=True)
 def ValidatePrefs():
    Core.log.setLevel(logging.DEBUG)
    Log.Debug("Validate Prefs called.")
+
+    # cache the channel state
+    update_dict = False
+    restart = False
+    if "channel_enabled" not in Dict:
+        update_dict = True
+
+    elif Dict["channel_enabled"] != Prefs["enable_channel"]:
+        Log.Debug("Channel features %s, restarting plugin", "enabled" if Prefs["enable_channel"] else "disabled")
+        update_dict = True
+        restart = True
+
+    if update_dict:
+        Dict["channel_enabled"] = Prefs["enable_channel"]
+        Dict.Save()
+
+    if restart:
+        DispatchRestart()
+
    config.initialize()
    scheduler.setup_tasks()
    set_refresh_menu_state(None)
@@ -375,10 +420,14 @@ def ValidatePrefs():
    return


+def DispatchRestart():
+    Thread.CreateTimer(1.0, Restart)
+
+
@route(PREFIX + '/advanced/restart/trigger')
 def TriggerRestart(randomize=None):
    set_refresh_menu_state("Restarting the plugin")
-    Thread.CreateTimer(1.0, Restart)
+    DispatchRestart()
    return fatality(header="Restart triggered, please wait about 5 seconds", force_title=" ", only_refresh=True, replace_parent=True,
                    no_history=True)

@@ -36,7 +36,7 @@ def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None

    oc.add(DirectoryObject(
        key=Callback(callback_menu, kind=use_kind, rating_key=rating_key, title=title),
-        title="%s %s \"%s\" %s the ignore list" % (
+        title=u"%s %s \"%s\" %s the ignore list" % (
            "Remove" if in_list else "Add", ignore_list.verbose(kind) if add_kind else "", unicode(title), "from" if in_list else "to")
    )
    )
@@ -88,4 +88,28 @@ def set_refresh_menu_state(state_or_media, media_type="movies"):
        title = format_video("movie", media.title)
    force_refresh = intent.get("force", media_id)

-    Dict["current_refresh_state"] = "%sRefreshing %s" % ("Force-" if force_refresh else "", title)
+    Dict["current_refresh_state"] = u"%sRefreshing %s" % ("Force-" if force_refresh else "", unicode(title))
+
+
+def enable_channel_wrapper(func):
+    """
+    returns the original wrapper :func: (route or handler) if applicable, else the plain to-be-wrapped function
+    :param func: original wrapper
+    :return: original wrapper or wrapped function
+    """
+    def noop(*args, **kwargs):
+        def inner(*a, **k):
+            """
+            :param a: args
+            :param k: kwargs
+            :return: originally to-be-wrapped function
+            """
+            return a[0]
+
+        return inner
+
+    def wrap(*args, **kwargs):
+        enforce_route = kwargs.pop("enforce_route", None)
+        return (func if Prefs["enable_channel"] or enforce_route else noop)(*args, **kwargs)
+
+    return wrap
@@ -3,11 +3,12 @@
 import os
 import re
 import inspect
+
 from babelfish import Language
 from subzero.lib.io import FileIO
 from subzero.constants import PLUGIN_NAME
-from auth import refresh_plex_token
 from lib import configure_plex, Plex
+from helpers import check_write_permissions

 SUBTITLE_EXTS = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'txt', 'psb']
 VIDEO_EXTS = ['3g2', '3gp', 'asf', 'asx', 'avc', 'avi', 'avs', 'bivx', 'bup', 'divx', 'dv', 'dvr-ms', 'evo', 'fli', 'flv',
@@ -15,6 +16,8 @@ VIDEO_EXTS = ['3g2', '3gp', 'asf', 'asx', 'avc', 'avi', 'avs', 'bivx', 'bup', 'd
              'pva', 'qt', 'rm', 'rmvb', 'sdp', 'svq3', 'strm', 'ts', 'ty', 'vdr', 'viv', 'vob', 'vp3', 'wmv', 'wpl', 'wtv', 'xsp', 'xvid',
              'webm']

+IGNORE_FN = ("subzero.ignore", ".subzero.ignore", ".nosz")
+
 VERSION_RE = re.compile(ur'CFBundleVersion.+?<string>([0-9\.]+)</string>', re.DOTALL)


@@ -33,6 +36,8 @@ class Config(object):
    providerSettings = None
    max_recent_items_per_library = 200
    plex_api_working = False
+    permissions_ok = False
+    missing_permissions = None

    initialized = False

@@ -47,10 +52,42 @@ class Config(object):
        self.initialized = True
        configure_plex()
        self.plex_api_working = self.checkPlexAPI()
+        self.missing_permissions = []
+        self.permissions_ok = self.checkPermissions()

    def checkPlexAPI(self):
        return bool(Plex["library"].sections())

+    def checkPermissions(self):
+        if not Prefs["subtitles.save.filesystem"] or not Prefs["check_permissions"]:
+            return True
+
+        if not self.plex_api_working:
+            return
+
+        use_ignore_fs = Prefs["subtitles.ignore_fs"]
+        sections = Plex["library"].sections()
+        all_permissions_ok = True
+        for section in list(sections):
+            title = section.title
+            for location in section:
+                if use_ignore_fs:
+                    ignore = False
+                    # check whether we've got an ignore file inside the section path
+                    for ifn in IGNORE_FN:
+                        if os.path.isfile(os.path.join(location.path, ifn)):
+                            ignore = True
+                    if ignore:
+                        continue
+
+                # section not ignored, check for write permissions
+                if not check_write_permissions(location.path):
+                    # not enough permissions
+                    self.missing_permissions.append((title, location.path))
+                    all_permissions_ok = False
+
+        return all_permissions_ok
+
    def getVersion(self):
        curDir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
        info_file_path = os.path.abspath(os.path.join(curDir, "..", "..", "Info.plist"))
@@ -67,6 +104,9 @@ class Config(object):
        l = {Language.fromietf(Prefs["langPref1"])}
        langCustom = Prefs["langPrefCustom"].strip()

+        if Prefs['subtitles.only_one']:
+            return l
+
        if Prefs["langPref2"] != "None":
            l.update({Language.fromietf(Prefs["langPref2"])})

@@ -110,6 +150,7 @@ class Config(object):
                                          },
                             'opensubtitles': {'username': Prefs['provider.opensubtitles.username'],
                                               'password': Prefs['provider.opensubtitles.password'],
+                                               'use_tag_search': Prefs['provider.opensubtitles.use_tags']
                                               },
                             }

@@ -1,10 +1,11 @@
 # coding=utf-8
-
+import os
 import unicodedata
 import datetime
 import urllib
 import time
 import re
+import platform

 # Unicode control characters can appear in ID3v2 tags but are not legal in XML.

@@ -141,3 +142,21 @@ def query_plex(url, args):

    return HTTP.Request(url + ("?%s" % computed_args) if computed_args else "", immediate=True)

+
+def check_write_permissions(path):
+    if platform.system() == "Windows":
+        # physical access check
+        check_path = os.path.join(os.path.realpath(path), ".sz_perm_chk")
+        try:
+            if os.path.exists(check_path):
+                os.rmdir(check_path)
+            os.mkdir(check_path)
+            os.rmdir(check_path)
+            return True
+        except OSError:
+            pass
+
+    else:
+        # os.access check
+        return os.access(path, os.W_OK | os.X_OK)
+    return False
@@ -31,7 +31,7 @@ def getSectionSize(key):
    :return:
    """
    size = None
-    url = "https://127.0.0.1:32400/library/sections/%s/all" % int(key)
+    url = "http://127.0.0.1:32400/library/sections/%s/all" % int(key)
    use_args = {
        "X-Plex-Container-Size": "0",
        "X-Plex-Container-Start": "0"
@@ -136,7 +136,7 @@ def getRecentItems():
        if section.type == "show":
            use_args["type"] = "4"

-        url = "https://127.0.0.1:32400/library/sections/%s/all" % int(section.key)
+        url = "http://127.0.0.1:32400/library/sections/%s/all" % int(section.key)
        response = query_plex(url, use_args)

        matcher = episode_re if section.type == "show" else movie_re
@@ -12,36 +12,39 @@ def findSubtitles(part):
    lang_sub_map = {}
    part_filename = helpers.unicodize(part.file)
    part_basename = os.path.splitext(os.path.basename(part_filename))[0]
-    paths = [os.path.dirname(part_filename)]
+    use_filesystem = bool(Prefs["subtitles.save.filesystem"])
+    paths = [os.path.dirname(part_filename)] if use_filesystem else []

-    # Check for local subtitles subdirectory
-    sub_dirs_default = ["sub", "subs", "subtitle", "subtitles"]
-    sub_dir_base = paths[0]
+    global_subtitle_folder = None

-    sub_dir_list = []
+    if use_filesystem:
+        # Check for local subtitles subdirectory
+        sub_dir_base = paths[0]

-    if Prefs["subtitles.save.subFolder"] != "current folder":
-        # got selected subfolder
-        sub_dir_list.append(os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"]))
+        sub_dir_list = []

-    sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if bool(Prefs["subtitles.save.subFolder.Custom"]) else None
-    if sub_dir_custom:
-        # got custom subfolder
-        if sub_dir_custom.startswith("/"):
-            # absolute folder
-            sub_dir_list.append(sub_dir_custom)
-        else:
-            # relative folder
-            sub_dir_list.append(os.path.join(sub_dir_base, sub_dir_custom))
+        if Prefs["subtitles.save.subFolder"] != "current folder":
+            # got selected subfolder
+            sub_dir_list.append(os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"]))

-    for sub_dir in sub_dir_list:
-        if os.path.isdir(sub_dir):
-            paths.append(sub_dir)
+        sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if bool(Prefs["subtitles.save.subFolder.Custom"]) else None
+        if sub_dir_custom:
+            # got custom subfolder
+            if sub_dir_custom.startswith("/"):
+                # absolute folder
+                sub_dir_list.append(sub_dir_custom)
+            else:
+                # relative folder
+                sub_dir_list.append(os.path.join(sub_dir_base, sub_dir_custom))

-    # Check for a global subtitle location
-    global_subtitle_folder = os.path.join(Core.app_support_path, 'Subtitles')
-    if os.path.exists(global_subtitle_folder):
-        paths.append(global_subtitle_folder)
+        for sub_dir in sub_dir_list:
+            if os.path.isdir(sub_dir):
+                paths.append(sub_dir)
+
+        # Check for a global subtitle location
+        global_subtitle_folder = os.path.join(Core.app_support_path, 'Subtitles')
+        if os.path.exists(global_subtitle_folder):
+            paths.append(global_subtitle_folder)

    # We start by building a dictionary of files to their absolute paths. We also need to know
    # the number of media files that are actually present, in case the found local media asset
@@ -78,7 +81,7 @@ def findSubtitles(part):
        # If the file is located within the global subtitle folder and it's name doesn't match exactly
        # then we should simply ignore it.
        #
-        if file_path.count(global_subtitle_folder) and not filename_matches_part:
+        if global_subtitle_folder and file_path.count(global_subtitle_folder) and not filename_matches_part:
            continue

        # If we have more than one media file within the folder and located filename doesn't match
@@ -100,10 +103,10 @@ def findSubtitles(part):
                lang_sub_map[new_language] = lang_sub_map[new_language] + subtitles

    # add known metadata subs to our sub list
-    if not Prefs['subtitles.save.filesystem']:
+    if not use_filesystem:
        for language, sub_list in subtitlehelpers.getSubtitlesFromMetadata(part).iteritems():
            if sub_list:
-                if not language in lang_sub_map:
+                if language not in lang_sub_map:
                    lang_sub_map[language] = []
                lang_sub_map[language] = lang_sub_map[language] + sub_list

@@ -35,7 +35,7 @@ def itemDiscoverMissing(rating_key, kind="show", added_at=None, section_title=No
    if existing_subs["count"]:
        existing_flat = (existing_subs["internal"] if internal else []) + (existing_subs["external"] if external else [])
        languages_set = set(languages)
-        if languages_set.issubset(existing_flat):
+        if languages_set.issubset(existing_flat) or (len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
            # all subs found
            Log.Info(u"All subtitles exist for '%s'", item_title)
            return
@@ -4,6 +4,8 @@ import re, unicodedata, os
 import config
 import helpers

+from bs4 import UnicodeDammit
+

 class SubtitleHelper(object):
    def __init__(self, filename):
@@ -140,13 +142,26 @@ def getSubtitlesFromMetadata(part):
    for language in part.subtitles:
        subs[language] = []
        for key, proxy in getattr(part.subtitles[language], "_proxies").iteritems():
-            try:
-                p_type, p_value, p_sort, p_index, p_codec, p_format = proxy
-            except ValueError:
-                Log.Error("Couldn't parse subtitle info, got proxy %s" % proxy)
+            if not proxy or not len(proxy) >= 5:
+                Log.Debug("Can't parse metadata: %s" % repr(proxy))
                continue

+            p_type = proxy[0]
+
            if p_type == "Media":
                # metadata subtitle
+                Log.Debug(u"Found metadata subtitle: %s, %s" % (language, repr(proxy)))
                subs[language].append(key)
    return subs
+
+
+def force_utf8(content):
+    a = UnicodeDammit(content)
+
+    Log.Debug("detected encoding: %s (None: most likely already successfully decoded)" % a.original_encoding)
+
+    # easy way out - already utf-8
+    if a.original_encoding and a.original_encoding == "utf-8":
+        return content
+
+    return (a.unicode_markup if a.unicode_markup else content.decode('ascii', 'replace')).encode("utf-8")
@@ -1,4 +1,10 @@
 [
+  {
+    "id": "enable_channel",
+    "label": "Enable Sub-Zero channel (disabling doesn't affect the subtitle features)?",
+    "type": "bool",
+    "default": "true"
+  },
  {
    "id": "subtitles.try_downloads",
    "label": "How many download tries per subtitle (on timeout or error)",
@@ -29,7 +35,7 @@
    "id": "provider.addic7ed.username",
    "label": "Addic7ed Username",
    "type": "text",
-    "default": "Username"
+    "default": ""
  },
  {
    "id": "provider.addic7ed.password",
@@ -226,6 +232,18 @@
    "type": "text",
    "default": "None"
  },
+  {
+    "id": "subtitles.only_one",
+    "label": "Restrict to one language (skips adding \".lang.\" to the subtitle filename; only uses \"Subtitle Language (1)\")",
+    "type": "bool",
+    "default": "false"
+  },
+  {
+    "id": "subtitles.enforce_encoding",
+    "label": "Normalize subtitle encoding to UTF-8",
+    "type": "bool",
+    "default": "true"
+  },
  {
    "id": "provider.opensubtitles.enabled",
    "label": "Provider: Enable OpenSubtitles",
@@ -252,7 +270,7 @@
  },
  {
    "id": "provider.addic7ed.boost",
-    "label": "Addic7ed: boost over hash score if requirements met (prefer over other providers)",
+    "label": "Addic7ed: prefer over other providers (if requirements met)",
    "type": "bool",
    "default": "false"
  },
@@ -262,15 +280,21 @@
    "type": "bool",
    "default": "true"
  },
+  {
+    "id": "provider.opensubtitles.use_tags",
+    "label": "I keep the exact (release-) filename of my media files",
+    "type": "bool",
+    "default": "true"
+  },
  {
    "id": "subtitles.scan.embedded",
-    "label": "Scan: include subtitles embedded in the media file (and don't download seperate ones)",
+    "label": "Scan: include embedded subtitles (in the media file (MKV/MP4), don't download if existing)",
    "type": "bool",
    "default": "false"
  },
  {
    "id": "subtitles.scan.external",
-    "label": "Scan: include external subtitles (and don't download new ones)",
+    "label": "Scan: include external subtitles (metadata/filesystem, don't download if existing)",
    "type": "bool",
    "default": "true"
  },
@@ -302,7 +326,7 @@
      "5",
      "0"
    ],
-    "default": "67"
+    "default": "85"
  },
  {
    "id": "subtitles.search.minimumMovieScore",
@@ -377,6 +401,12 @@
    "type": "bool",
    "default": "true"
  },
+  {
+    "id": "subtitles.ignore_fs",
+    "label": "Ignore folders (with \"subzero.ignore/.subzero.ignore/.nosz\" files in them)",
+    "type": "bool",
+    "default": "false"
+  },
  {
    "id": "scheduler.tasks.searchAllRecentlyAddedMissing",
    "label": "Scheduler: Periodically search for recent items with missing subtitles",
@@ -415,6 +445,12 @@
    "type": "text",
    "default": "200"
  },
+  {
+    "id": "check_permissions",
+    "label": "Check for correct folder permissions of every library on plugin start",
+    "type": "bool",
+    "default": "true"
+  },
  {
    "id": "log_level",
    "label": "How verbose should the logging be?",
@@ -9,11 +9,11 @@
        <key>CFBundleInfoDictionaryVersion</key>
        <string>6.0</string>
        <key>CFBundleShortVersionString</key>
-        <string>1.3.6</string>
+        <string>1.3.31</string>
        <key>CFBundleSignature</key>
        <string>????</string>
        <key>CFBundleVersion</key>
-        <string>1.3.20.396</string>
+        <string>1.3.33.522</string>
        <key>PlexFrameworkVersion</key>
        <string>2</string>
        <key>PlexPluginClass</key>
@@ -32,7 +32,7 @@

 &lt;h1&gt;Sub-Zero for Plex&lt;/h1&gt;&lt;i&gt;Subtitles done right&lt;/i&gt;

-Version 1.3.20.396
+Version 1.3.33.522

 Originally based on @bramwalet's awesome &lt;a href=&quot;https://github.com/bramwalet/Subliminal.bundle&quot;&gt;Subliminal.bundle&lt;/a&gt;

@@ -42,7 +42,7 @@ If you like this, buy me a beer: &lt;a href=&quot;https://www.paypal.com/cgi-bin
 Plex thread: &lt;a href=&quot;https://forums.plex.tv/discussion/186575&quot;>https://forums.plex.tv/discussion/186575&lt;/a&gt;
 Github: &lt;a href=&quot;https://github.com/pannal/Sub-Zero.bundle&quot;&gt;https://github.com/pannal/Sub-Zero&lt;/a&gt;

-panni, 2015
+panni, 2016
 &lt;/div&gt;
 	</string>
    </dict>
@@ -0,0 +1,16 @@
+try:
+    import ast
+    from _markerlib.markers import default_environment, compile, interpret
+except ImportError:
+    if 'ast' in globals():
+        raise
+    def default_environment():
+        return {}
+    def compile(marker):
+        def marker_fn(environment=None, override=None):
+            # 'empty markers are True' heuristic won't install extra deps.
+            return not marker.strip()
+        marker_fn.__doc__ = marker
+        return marker_fn
+    def interpret(marker, environment=None, override=None):
+        return compile(marker)()
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+"""Interpret PEP 345 environment markers.
+
+EXPR [in|==|!=|not in] EXPR [or|and] ...
+
+where EXPR belongs to any of those:
+
+    python_version = '%s.%s' % (sys.version_info[0], sys.version_info[1])
+    python_full_version = sys.version.split()[0]
+    os.name = os.name
+    sys.platform = sys.platform
+    platform.version = platform.version()
+    platform.machine = platform.machine()
+    platform.python_implementation = platform.python_implementation()
+    a free string, like '2.6', or 'win32'
+"""
+
+__all__ = ['default_environment', 'compile', 'interpret']
+
+import ast
+import os
+import platform
+import sys
+import weakref
+
+_builtin_compile = compile
+
+try:
+    from platform import python_implementation
+except ImportError:
+    if os.name == "java":
+        # Jython 2.5 has ast module, but not platform.python_implementation() function.
+        def python_implementation():
+            return "Jython"
+    else:
+        raise
+
+
+# restricted set of variables
+_VARS = {'sys.platform': sys.platform,
+         'python_version': '%s.%s' % sys.version_info[:2],
+         # FIXME parsing sys.platform is not reliable, but there is no other
+         # way to get e.g. 2.7.2+, and the PEP is defined with sys.version
+         'python_full_version': sys.version.split(' ', 1)[0],
+         'os.name': os.name,
+         'platform.version': platform.version(),
+         'platform.machine': platform.machine(),
+         'platform.python_implementation': python_implementation(),
+         'extra': None # wheel extension
+        }
+
+for var in list(_VARS.keys()):
+    if '.' in var:
+        _VARS[var.replace('.', '_')] = _VARS[var]
+
+def default_environment():
+    """Return copy of default PEP 385 globals dictionary."""
+    return dict(_VARS)
+
+class ASTWhitelist(ast.NodeTransformer):
+    def __init__(self, statement):
+        self.statement = statement # for error messages
+
+    ALLOWED = (ast.Compare, ast.BoolOp, ast.Attribute, ast.Name, ast.Load, ast.Str)
+    # Bool operations
+    ALLOWED += (ast.And, ast.Or)
+    # Comparison operations
+    ALLOWED += (ast.Eq, ast.Gt, ast.GtE, ast.In, ast.Is, ast.IsNot, ast.Lt, ast.LtE, ast.NotEq, ast.NotIn)
+
+    def visit(self, node):
+        """Ensure statement only contains allowed nodes."""
+        if not isinstance(node, self.ALLOWED):
+            raise SyntaxError('Not allowed in environment markers.\n%s\n%s' %
+                               (self.statement,
+                               (' ' * node.col_offset) + '^'))
+        return ast.NodeTransformer.visit(self, node)
+
+    def visit_Attribute(self, node):
+        """Flatten one level of attribute access."""
+        new_node = ast.Name("%s.%s" % (node.value.id, node.attr), node.ctx)
+        return ast.copy_location(new_node, node)
+
+def parse_marker(marker):
+    tree = ast.parse(marker, mode='eval')
+    new_tree = ASTWhitelist(marker).generic_visit(tree)
+    return new_tree
+
+def compile_marker(parsed_marker):
+    return _builtin_compile(parsed_marker, '<environment marker>', 'eval',
+                   dont_inherit=True)
+
+_cache = weakref.WeakValueDictionary()
+
+def compile(marker):
+    """Return compiled marker as a function accepting an environment dict."""
+    try:
+        return _cache[marker]
+    except KeyError:
+        pass
+    if not marker.strip():
+        def marker_fn(environment=None, override=None):
+            """"""
+            return True
+    else:
+        compiled_marker = compile_marker(parse_marker(marker))
+        def marker_fn(environment=None, override=None):
+            """override updates environment"""
+            if override is None:
+                override = {}
+            if environment is None:
+                environment = default_environment()
+            environment.update(override)
+            return eval(compiled_marker, environment)
+    marker_fn.__doc__ = marker
+    _cache[marker] = marker_fn
+    return _cache[marker]
+
+def interpret(marker, environment=None):
+    return compile(marker)(environment)
@@ -1,6 +1,6 @@
 Beautiful Soup is made available under the MIT license:

- Copyright (c) 2004-2012 Leonard Richardson
+ Copyright (c) 2004-2015 Leonard Richardson

 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
@@ -20,7 +20,8 @@ Beautiful Soup is made available under the MIT license:
 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE, DAMMIT.
+ SOFTWARE.

 Beautiful Soup incorporates code from the html5lib library, which is
-also made available under the MIT license.
+also made available under the MIT license. Copyright (c) 2006-2013
+James Graham and other contributors
@@ -1,3 +1,127 @@
+= 4.4.1 (20150928) =
+
+* Fixed a bug that deranged the tree when part of it was
+  removed. Thanks to Eric Weiser for the patch and John Wiseman for a
+  test. [bug=1481520]
+
+* Fixed a parse bug with the html5lib tree-builder. Thanks to Roel
+  Kramer for the patch. [bug=1483781]
+
+* Improved the implementation of CSS selector grouping. Thanks to
+  Orangain for the patch. [bug=1484543]
+
+* Fixed the test_detect_utf8 test so that it works when chardet is
+  installed. [bug=1471359]
+
+* Corrected the output of Declaration objects. [bug=1477847]
+
+
+= 4.4.0 (20150703) =
+
+Especially important changes:
+
+* Added a warning when you instantiate a BeautifulSoup object without
+  explicitly naming a parser. [bug=1398866]
+
+* __repr__ now returns an ASCII bytestring in Python 2, and a Unicode
+  string in Python 3, instead of a UTF8-encoded bytestring in both
+  versions. In Python 3, __str__ now returns a Unicode string instead
+  of a bytestring. [bug=1420131]
+
+* The `text` argument to the find_* methods is now called `string`,
+  which is more accurate. `text` still works, but `string` is the
+  argument described in the documentation. `text` may eventually
+  change its meaning, but not for a very long time. [bug=1366856]
+
+* Changed the way soup objects work under copy.copy(). Copying a
+  NavigableString or a Tag will give you a new NavigableString that's
+  equal to the old one but not connected to the parse tree. Patch by
+  Martijn Peters. [bug=1307490]
+
+* Started using a standard MIT license. [bug=1294662]
+
+* Added a Chinese translation of the documentation by Delong .w.
+
+New features:
+
+* Introduced the select_one() method, which uses a CSS selector but
+  only returns the first match, instead of a list of
+  matches. [bug=1349367]
+
+* You can now create a Tag object without specifying a
+  TreeBuilder. Patch by Martijn Pieters. [bug=1307471]
+
+* You can now create a NavigableString or a subclass just by invoking
+  the constructor. [bug=1294315]
+
+* Added an `exclude_encodings` argument to UnicodeDammit and to the
+  Beautiful Soup constructor, which lets you prohibit the detection of
+  an encoding that you know is wrong. [bug=1469408]
+
+* The select() method now supports selector grouping. Patch by
+  Francisco Canas [bug=1191917]
+
+Bug fixes:
+
+* Fixed yet another problem that caused the html5lib tree builder to
+  create a disconnected parse tree. [bug=1237763]
+
+* Force object_was_parsed() to keep the tree intact even when an element
+  from later in the document is moved into place. [bug=1430633]
+
+* Fixed yet another bug that caused a disconnected tree when html5lib
+  copied an element from one part of the tree to another. [bug=1270611]
+
+* Fixed a bug where Element.extract() could create an infinite loop in
+  the remaining tree.
+
+* The select() method can now find tags whose names contain
+  dashes. Patch by Francisco Canas. [bug=1276211]
+
+* The select() method can now find tags with attributes whose names
+  contain dashes. Patch by Marek Kapolka. [bug=1304007]
+
+* Improved the lxml tree builder's handling of processing
+  instructions. [bug=1294645]
+
+* Restored the helpful syntax error that happens when you try to
+  import the Python 2 edition of Beautiful Soup under Python
+  3. [bug=1213387]
+
+* In Python 3.4 and above, set the new convert_charrefs argument to
+  the html.parser constructor to avoid a warning and future
+  failures. Patch by Stefano Revera. [bug=1375721]
+
+* The warning when you pass in a filename or URL as markup will now be
+  displayed correctly even if the filename or URL is a Unicode
+  string. [bug=1268888]
+
+* If the initial <html> tag contains a CDATA list attribute such as
+  'class', the html5lib tree builder will now turn its value into a
+  list, as it would with any other tag. [bug=1296481]
+
+* Fixed an import error in Python 3.5 caused by the removal of the
+  HTMLParseError class. [bug=1420063]
+
+* Improved docstring for encode_contents() and
+  decode_contents(). [bug=1441543]
+
+* Fixed a crash in Unicode, Dammit's encoding detector when the name
+  of the encoding itself contained invalid bytes. [bug=1360913]
+
+* Improved the exception raised when you call .unwrap() or
+  .replace_with() on an element that's not attached to a tree.
+
+* Raise a NotImplementedError whenever an unsupported CSS pseudoclass
+  is used in select(). Previously some cases did not result in a
+  NotImplementedError.
+
+* It's now possible to pickle a BeautifulSoup object no matter which
+  tree builder was used to create it. However, the only tree builder
+  that survives the pickling process is the HTMLParserTreeBuilder
+  ('html.parser'). If you unpickle a BeautifulSoup object created with
+  some other tree builder, soup.builder will be None. [bug=1231545]
+
 = 4.3.2 (20131002) =

 * Fixed a bug in which short Unicode input was improperly encoded to
@@ -0,0 +1,31 @@
+Additions
+---------
+
+More of the jQuery API: nextUntil?
+
+Optimizations
+-------------
+
+The html5lib tree builder doesn't use the standard tree-building API,
+which worries me and has resulted in a number of bugs.
+
+markup_attr_map can be optimized since it's always a map now.
+
+Upon encountering UTF-16LE data or some other uncommon serialization
+of Unicode, UnicodeDammit will convert the data to Unicode, then
+encode it at UTF-8. This is wasteful because it will just get decoded
+back to Unicode.
+
+CDATA
+-----
+
+The elementtree XMLParser has a strip_cdata argument that, when set to
+False, should allow Beautiful Soup to preserve CDATA sections instead
+of treating them as text. Except it doesn't. (This argument is also
+present for HTMLParser, and also does nothing there.)
+
+Currently, htm5lib converts CDATA sections into comments. An
+as-yet-unreleased version of html5lib changes the parser's handling of
+CDATA sections to allow CDATA sections in tags like <svg> and
+<math>. The HTML5TreeBuilder will need to be updated to create CData
+objects instead of Comment objects in this situation.
@@ -17,8 +17,8 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """

 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.3.2"
-__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson"
+__version__ = "4.4.1"
+__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
 __license__ = "MIT"

 __all__ = ['BeautifulSoup']
@@ -45,7 +45,7 @@ from .element import (

 # The very first thing we do is give a useful error if someone is
 # running this code under Python 3 without converting it.
-syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'

 class BeautifulSoup(Tag):
    """
@@ -77,8 +77,11 @@ class BeautifulSoup(Tag):

    ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'

+    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
+
    def __init__(self, markup="", features=None, builder=None,
-                 parse_only=None, from_encoding=None, **kwargs):
+                 parse_only=None, from_encoding=None, exclude_encodings=None,
+                 **kwargs):
        """The Soup object is initialized as the 'root tag', and the
        provided markup (which can be a string or a file-like object)
        is fed into the underlying parser."""
@@ -114,9 +117,9 @@ class BeautifulSoup(Tag):
            del kwargs['isHTML']
            warnings.warn(
                "BS4 does not respect the isHTML argument to the "
-                "BeautifulSoup constructor. You can pass in features='html' "
-                "or features='xml' to get a builder capable of handling "
-                "one or the other.")
+                "BeautifulSoup constructor. Suggest you use "
+                "features='lxml' for HTML and features='lxml-xml' for "
+                "XML.")

        def deprecated_argument(old_name, new_name):
            if old_name in kwargs:
@@ -140,6 +143,7 @@ class BeautifulSoup(Tag):
                "__init__() got an unexpected keyword argument '%s'" % arg)

        if builder is None:
+            original_features = features
            if isinstance(features, basestring):
                features = [features]
            if features is None or len(features) == 0:
@@ -151,6 +155,16 @@ class BeautifulSoup(Tag):
                    "requested: %s. Do you need to install a parser library?"
                    % ",".join(features))
            builder = builder_class()
+            if not (original_features == builder.NAME or
+                    original_features in builder.ALTERNATE_NAMES):
+                if builder.is_xml:
+                    markup_type = "XML"
+                else:
+                    markup_type = "HTML"
+                warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
+                    parser=builder.NAME,
+                    markup_type=markup_type))
+
        self.builder = builder
        self.is_xml = builder.is_xml
        self.builder.soup = self
@@ -178,6 +192,8 @@ class BeautifulSoup(Tag):
                # system. Just let it go.
                pass
            if is_file:
+                if isinstance(markup, unicode):
+                    markup = markup.encode("utf8")
                warnings.warn(
                    '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
            if markup[:5] == "http:" or markup[:6] == "https:":
@@ -185,12 +201,15 @@ class BeautifulSoup(Tag):
                # Python 3 otherwise.
                if ((isinstance(markup, bytes) and not b' ' in markup)
                    or (isinstance(markup, unicode) and not u' ' in markup)):
+                    if isinstance(markup, unicode):
+                        markup = markup.encode("utf8")
                    warnings.warn(
                        '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)

        for (self.markup, self.original_encoding, self.declared_html_encoding,
         self.contains_replacement_characters) in (
-            self.builder.prepare_markup(markup, from_encoding)):
+             self.builder.prepare_markup(
+                 markup, from_encoding, exclude_encodings=exclude_encodings)):
            self.reset()
            try:
                self._feed()
@@ -203,6 +222,16 @@ class BeautifulSoup(Tag):
        self.markup = None
        self.builder.soup = None

+    def __copy__(self):
+        return type(self)(self.encode(), builder=self.builder)
+
+    def __getstate__(self):
+        # Frequently a tree builder can't be pickled.
+        d = dict(self.__dict__)
+        if 'builder' in d and not self.builder.picklable:
+            del d['builder']
+        return d
+
    def _feed(self):
        # Convert the document to Unicode.
        self.builder.reset()
@@ -229,9 +258,7 @@ class BeautifulSoup(Tag):

    def new_string(self, s, subclass=NavigableString):
        """Create a new NavigableString associated with this soup."""
-        navigable = subclass(s)
-        navigable.setup()
-        return navigable
+        return subclass(s)

    def insert_before(self, successor):
        raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
@@ -290,14 +317,49 @@ class BeautifulSoup(Tag):
    def object_was_parsed(self, o, parent=None, most_recent_element=None):
        """Add an object to the parse tree."""
        parent = parent or self.currentTag
-        most_recent_element = most_recent_element or self._most_recent_element
-        o.setup(parent, most_recent_element)
+        previous_element = most_recent_element or self._most_recent_element
+
+        next_element = previous_sibling = next_sibling = None
+        if isinstance(o, Tag):
+            next_element = o.next_element
+            next_sibling = o.next_sibling
+            previous_sibling = o.previous_sibling
+            if not previous_element:
+                previous_element = o.previous_element
+
+        o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)

-        if most_recent_element is not None:
-            most_recent_element.next_element = o
        self._most_recent_element = o
        parent.contents.append(o)

+        if parent.next_sibling:
+            # This node is being inserted into an element that has
+            # already been parsed. Deal with any dangling references.
+            index = parent.contents.index(o)
+            if index == 0:
+                previous_element = parent
+                previous_sibling = None
+            else:
+                previous_element = previous_sibling = parent.contents[index-1]
+            if index == len(parent.contents)-1:
+                next_element = parent.next_sibling
+                next_sibling = None
+            else:
+                next_element = next_sibling = parent.contents[index+1]
+
+            o.previous_element = previous_element
+            if previous_element:
+                previous_element.next_element = o
+            o.next_element = next_element
+            if next_element:
+                next_element.previous_element = o
+            o.next_sibling = next_sibling
+            if next_sibling:
+                next_sibling.previous_sibling = o
+            o.previous_sibling = previous_sibling
+            if previous_sibling:
+                previous_sibling.next_sibling = o
+
    def _popToTag(self, name, nsprefix=None, inclusivePop=True):
        """Pops the tag stack up to and including the most recent
        instance of the given tag. If inclusivePop is false, pops the tag
@@ -80,9 +80,12 @@ builder_registry = TreeBuilderRegistry()
 class TreeBuilder(object):
    """Turn a document into a Beautiful Soup object tree."""

+    NAME = "[Unknown tree builder]"
+    ALTERNATE_NAMES = []
    features = []

    is_xml = False
+    picklable = False
    preserve_whitespace_tags = set()
    empty_element_tags = None # A tag will be considered an empty-element
                              # tag when and only when it has no contents.
@@ -2,6 +2,7 @@ __all__ = [
    'HTML5TreeBuilder',
    ]

+from pdb import set_trace
 import warnings
 from bs4.builder import (
    PERMISSIVE,
@@ -9,7 +10,10 @@ from bs4.builder import (
    HTML_5,
    HTMLTreeBuilder,
    )
-from bs4.element import NamespacedAttribute
+from bs4.element import (
+    NamespacedAttribute,
+    whitespace_re,
+)
 import html5lib
 from html5lib.constants import namespaces
 from bs4.element import (
@@ -22,11 +26,20 @@ from bs4.element import (
 class HTML5TreeBuilder(HTMLTreeBuilder):
    """Use html5lib to build a tree."""

-    features = ['html5lib', PERMISSIVE, HTML_5, HTML]
+    NAME = "html5lib"

-    def prepare_markup(self, markup, user_specified_encoding):
+    features = [NAME, PERMISSIVE, HTML_5, HTML]
+
+    def prepare_markup(self, markup, user_specified_encoding,
+                       document_declared_encoding=None, exclude_encodings=None):
        # Store the user-specified encoding for use later on.
        self.user_specified_encoding = user_specified_encoding
+
+        # document_declared_encoding and exclude_encodings aren't used
+        # ATM because the html5lib TreeBuilder doesn't use
+        # UnicodeDammit.
+        if exclude_encodings:
+            warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
        yield (markup, None, None, False)

    # These methods are defined by Beautiful Soup.
@@ -101,7 +114,16 @@ class AttrList(object):
    def __iter__(self):
        return list(self.attrs.items()).__iter__()
    def __setitem__(self, name, value):
-        "set attr", name, value
+        # If this attribute is a multi-valued attribute for this element,
+        # turn its value into a list.
+        list_attr = HTML5TreeBuilder.cdata_list_attributes
+        if (name in list_attr['*']
+            or (self.element.name in list_attr
+                and name in list_attr[self.element.name])):
+            # A node that is being cloned may have already undergone
+            # this procedure.
+            if not isinstance(value, list):
+                value = whitespace_re.split(value)
        self.element[name] = value
    def items(self):
        return list(self.attrs.items())
@@ -161,6 +183,12 @@ class Element(html5lib.treebuilders._base.Node):
            # immediately after the parent, if it has no children.)
            if self.element.contents:
                most_recent_element = self.element._last_descendant(False)
+            elif self.element.next_element is not None:
+                # Something from further ahead in the parse tree is
+                # being inserted into this earlier element. This is
+                # very annoying because it means an expensive search
+                # for the last element in the tree.
+                most_recent_element = self.soup._last_descendant()
            else:
                most_recent_element = self.element

@@ -172,6 +200,7 @@ class Element(html5lib.treebuilders._base.Node):
        return AttrList(self.element)

    def setAttributes(self, attributes):
+
        if attributes is not None and len(attributes) > 0:

            converted_attributes = []
@@ -218,6 +247,9 @@ class Element(html5lib.treebuilders._base.Node):

    def reparentChildren(self, new_parent):
        """Move all of this tag's children into another tag."""
+        # print "MOVE", self.element.contents
+        # print "FROM", self.element
+        # print "TO", new_parent.element
        element = self.element
        new_parent_element = new_parent.element
        # Determine what this tag's next_element will be once all the children
@@ -236,17 +268,28 @@ class Element(html5lib.treebuilders._base.Node):
            new_parents_last_descendant_next_element = new_parent_element.next_element

        to_append = element.contents
-        append_after = new_parent.element.contents
+        append_after = new_parent_element.contents
        if len(to_append) > 0:
            # Set the first child's previous_element and previous_sibling
            # to elements within the new parent
            first_child = to_append[0]
-            first_child.previous_element = new_parents_last_descendant
+            if new_parents_last_descendant:
+                first_child.previous_element = new_parents_last_descendant
+            else:
+                first_child.previous_element = new_parent_element
            first_child.previous_sibling = new_parents_last_child
+            if new_parents_last_descendant:
+                new_parents_last_descendant.next_element = first_child
+            else:
+                new_parent_element.next_element = first_child
+            if new_parents_last_child:
+                new_parents_last_child.next_sibling = first_child

            # Fix the last child's next_element and next_sibling
            last_child = to_append[-1]
            last_child.next_element = new_parents_last_descendant_next_element
+            if new_parents_last_descendant_next_element:
+                new_parents_last_descendant_next_element.previous_element = last_child
            last_child.next_sibling = None

        for child in to_append:
@@ -257,6 +300,10 @@ class Element(html5lib.treebuilders._base.Node):
        element.contents = []
        element.next_element = final_next_element

+        # print "DONE WITH MOVE"
+        # print "FROM", self.element
+        # print "TO", new_parent_element
+
    def cloneNode(self):
        tag = self.soup.new_tag(self.element.name, self.namespace)
        node = Element(tag, self.soup, self.namespace)
@@ -4,10 +4,16 @@ __all__ = [
    'HTMLParserTreeBuilder',
    ]

-from HTMLParser import (
-    HTMLParser,
-    HTMLParseError,
-    )
+from HTMLParser import HTMLParser
+
+try:
+    from HTMLParser import HTMLParseError
+except ImportError, e:
+    # HTMLParseError is removed in Python 3.5. Since it can never be
+    # thrown in 3.5, we can just define our own class as a placeholder.
+    class HTMLParseError(Exception):
+        pass
+
 import sys
 import warnings

@@ -19,10 +25,10 @@ import warnings
 # At the end of this file, we monkeypatch HTMLParser so that
 # strict=True works well on Python 3.2.2.
 major, minor, release = sys.version_info[:3]
-CONSTRUCTOR_TAKES_STRICT = (
-    major > 3
-    or (major == 3 and minor > 2)
-    or (major == 3 and minor == 2 and release >= 3))
+CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
+CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
+CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
+

 from bs4.element import (
    CData,
@@ -63,7 +69,8 @@ class BeautifulSoupHTMLParser(HTMLParser):

    def handle_charref(self, name):
        # XXX workaround for a bug in HTMLParser. Remove this once
-        # it's fixed.
+        # it's fixed in all supported versions.
+        # http://bugs.python.org/issue13633
        if name.startswith('x'):
            real_name = int(name.lstrip('x'), 16)
        elif name.startswith('X'):
@@ -113,14 +120,6 @@ class BeautifulSoupHTMLParser(HTMLParser):

    def handle_pi(self, data):
        self.soup.endData()
-        if data.endswith("?") and data.lower().startswith("xml"):
-            # "An XHTML processing instruction using the trailing '?'
-            # will cause the '?' to be included in data." - HTMLParser
-            # docs.
-            #
-            # Strip the question mark so we don't end up with two
-            # question marks.
-            data = data[:-1]
        self.soup.handle_data(data)
        self.soup.endData(ProcessingInstruction)

@@ -128,15 +127,19 @@ class BeautifulSoupHTMLParser(HTMLParser):
 class HTMLParserTreeBuilder(HTMLTreeBuilder):

    is_xml = False
-    features = [HTML, STRICT, HTMLPARSER]
+    picklable = True
+    NAME = HTMLPARSER
+    features = [NAME, HTML, STRICT]

    def __init__(self, *args, **kwargs):
-        if CONSTRUCTOR_TAKES_STRICT:
+        if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
            kwargs['strict'] = False
+        if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
+            kwargs['convert_charrefs'] = False
        self.parser_args = (args, kwargs)

    def prepare_markup(self, markup, user_specified_encoding=None,
-                       document_declared_encoding=None):
+                       document_declared_encoding=None, exclude_encodings=None):
        """
        :return: A 4-tuple (markup, original encoding, encoding
        declared within markup, whether any characters had to be
@@ -147,7 +150,8 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
            return

        try_encodings = [user_specified_encoding, document_declared_encoding]
-        dammit = UnicodeDammit(markup, try_encodings, is_html=True)
+        dammit = UnicodeDammit(markup, try_encodings, is_html=True,
+                               exclude_encodings=exclude_encodings)
        yield (dammit.markup, dammit.original_encoding,
               dammit.declared_html_encoding,
               dammit.contains_replacement_characters)
@@ -7,7 +7,12 @@ from io import BytesIO
 from StringIO import StringIO
 import collections
 from lxml import etree
-from bs4.element import Comment, Doctype, NamespacedAttribute
+from bs4.element import (
+    Comment,
+    Doctype,
+    NamespacedAttribute,
+    ProcessingInstruction,
+)
 from bs4.builder import (
    FAST,
    HTML,
@@ -25,8 +30,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):

    is_xml = True

+    NAME = "lxml-xml"
+    ALTERNATE_NAMES = ["xml"]
+
    # Well, it's permissive by XML parser standards.
-    features = [LXML, XML, FAST, PERMISSIVE]
+    features = [NAME, LXML, XML, FAST, PERMISSIVE]

    CHUNK_SIZE = 512

@@ -70,6 +78,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
            return (None, tag)

    def prepare_markup(self, markup, user_specified_encoding=None,
+                       exclude_encodings=None,
                       document_declared_encoding=None):
        """
        :yield: A series of 4-tuples.
@@ -95,7 +104,8 @@ class LXMLTreeBuilderForXML(TreeBuilder):
        # the document as each one in turn.
        is_html = not self.is_xml
        try_encodings = [user_specified_encoding, document_declared_encoding]
-        detector = EncodingDetector(markup, try_encodings, is_html)
+        detector = EncodingDetector(
+            markup, try_encodings, is_html, exclude_encodings)
        for encoding in detector.encodings:
            yield (detector.markup, encoding, document_declared_encoding, False)

@@ -189,7 +199,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
            self.nsmaps.pop()

    def pi(self, target, data):
-        pass
+        self.soup.endData()
+        self.soup.handle_data(target + ' ' + data)
+        self.soup.endData(ProcessingInstruction)

    def data(self, content):
        self.soup.handle_data(content)
@@ -212,7 +224,10 @@ class LXMLTreeBuilderForXML(TreeBuilder):

 class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):

-    features = [LXML, HTML, FAST, PERMISSIVE]
+    NAME = LXML
+    ALTERNATE_NAMES = ["lxml-html"]
+
+    features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
    is_xml = False

    def default_parser(self, encoding):
@@ -3,10 +3,12 @@

 This library converts a bytestream to Unicode through any means
 necessary. It is heavily based on code from Mark Pilgrim's Universal
-Feed Parser. It works best on XML and XML, but it does not rewrite the
+Feed Parser. It works best on XML and HTML, but it does not rewrite the
 XML or HTML to reflect a new encoding; that's the tree builder's job.
 """
+__license__ = "MIT"

+from pdb import set_trace
 import codecs
 from htmlentitydefs import codepoint2name
 import re
@@ -212,8 +214,11 @@ class EncodingDetector:

    5. Windows-1252.
    """
-    def __init__(self, markup, override_encodings=None, is_html=False):
+    def __init__(self, markup, override_encodings=None, is_html=False,
+                 exclude_encodings=None):
        self.override_encodings = override_encodings or []
+        exclude_encodings = exclude_encodings or []
+        self.exclude_encodings = set([x.lower() for x in exclude_encodings])
        self.chardet_encoding = None
        self.is_html = is_html
        self.declared_encoding = None
@@ -224,6 +229,8 @@ class EncodingDetector:
    def _usable(self, encoding, tried):
        if encoding is not None:
            encoding = encoding.lower()
+            if encoding in self.exclude_encodings:
+                return False
            if encoding not in tried:
                tried.add(encoding)
                return True
@@ -266,6 +273,9 @@ class EncodingDetector:
    def strip_byte_order_mark(cls, data):
        """If a byte-order mark is present, strip it and return the encoding it implies."""
        encoding = None
+        if isinstance(data, unicode):
+            # Unicode data cannot have a byte-order mark.
+            return data, encoding
        if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
               and (data[2:4] != '\x00\x00'):
            encoding = 'utf-16be'
@@ -306,7 +316,7 @@ class EncodingDetector:
            declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
        if declared_encoding_match is not None:
            declared_encoding = declared_encoding_match.groups()[0].decode(
-                'ascii')
+                'ascii', 'replace')
        if declared_encoding:
            return declared_encoding.lower()
        return None
@@ -331,13 +341,14 @@ class UnicodeDammit:
        ]

    def __init__(self, markup, override_encodings=[],
-                 smart_quotes_to=None, is_html=False):
+                 smart_quotes_to=None, is_html=False, exclude_encodings=[]):
        self.smart_quotes_to = smart_quotes_to
        self.tried_encodings = []
        self.contains_replacement_characters = False
        self.is_html = is_html

-        self.detector = EncodingDetector(markup, override_encodings, is_html)
+        self.detector = EncodingDetector(
+            markup, override_encodings, is_html, exclude_encodings)

        # Short-circuit if the data is in Unicode to begin with.
        if isinstance(markup, unicode) or markup == '':
@@ -1,4 +1,7 @@
 """Diagnostic functions, mainly for use when doing tech support."""
+
+__license__ = "MIT"
+
 import cProfile
 from StringIO import StringIO
 from HTMLParser import HTMLParser
@@ -33,12 +36,21 @@ def diagnose(data):

    if 'lxml' in basic_parsers:
        basic_parsers.append(["lxml", "xml"])
-        from lxml import etree
-        print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
+        try:
+            from lxml import etree
+            print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
+        except ImportError, e:
+            print (
+                "lxml is not installed or couldn't be imported.")
+

    if 'html5lib' in basic_parsers:
-        import html5lib
-        print "Found html5lib version %s" % html5lib.__version__
+        try:
+            import html5lib
+            print "Found html5lib version %s" % html5lib.__version__
+        except ImportError, e:
+            print (
+                "html5lib is not installed or couldn't be imported.")

    if hasattr(data, 'read'):
        data = data.read()
@@ -1,3 +1,6 @@
+__license__ = "MIT"
+
+from pdb import set_trace
 import collections
 import re
 import sys
@@ -185,24 +188,40 @@ class PageElement(object):
            return self.HTML_FORMATTERS.get(
                name, HTMLAwareEntitySubstitution.substitute_xml)

-    def setup(self, parent=None, previous_element=None):
+    def setup(self, parent=None, previous_element=None, next_element=None,
+              previous_sibling=None, next_sibling=None):
        """Sets up the initial relations between this element and
        other elements."""
        self.parent = parent
+
        self.previous_element = previous_element
        if previous_element is not None:
            self.previous_element.next_element = self
-        self.next_element = None
-        self.previous_sibling = None
-        self.next_sibling = None
-        if self.parent is not None and self.parent.contents:
-            self.previous_sibling = self.parent.contents[-1]
+
+        self.next_element = next_element
+        if self.next_element:
+            self.next_element.previous_element = self
+
+        self.next_sibling = next_sibling
+        if self.next_sibling:
+            self.next_sibling.previous_sibling = self
+
+        if (not previous_sibling
+            and self.parent is not None and self.parent.contents):
+            previous_sibling = self.parent.contents[-1]
+
+        self.previous_sibling = previous_sibling
+        if previous_sibling:
            self.previous_sibling.next_sibling = self

    nextSibling = _alias("next_sibling")  # BS3
    previousSibling = _alias("previous_sibling")  # BS3

    def replace_with(self, replace_with):
+        if not self.parent:
+            raise ValueError(
+                "Cannot replace one element with another when the"
+                "element to be replaced is not part of a tree.")
        if replace_with is self:
            return
        if replace_with is self.parent:
@@ -216,6 +235,10 @@ class PageElement(object):

    def unwrap(self):
        my_parent = self.parent
+        if not self.parent:
+            raise ValueError(
+                "Cannot replace an element with its contents when that"
+                "element is not part of a tree.")
        my_index = self.parent.index(self)
        self.extract()
        for child in reversed(self.contents[:]):
@@ -240,17 +263,20 @@ class PageElement(object):
        last_child = self._last_descendant()
        next_element = last_child.next_element

-        if self.previous_element is not None:
+        if (self.previous_element is not None and
+            self.previous_element is not next_element):
            self.previous_element.next_element = next_element
-        if next_element is not None:
+        if next_element is not None and next_element is not self.previous_element:
            next_element.previous_element = self.previous_element
        self.previous_element = None
        last_child.next_element = None

        self.parent = None
-        if self.previous_sibling is not None:
+        if (self.previous_sibling is not None
+            and self.previous_sibling is not self.next_sibling):
            self.previous_sibling.next_sibling = self.next_sibling
-        if self.next_sibling is not None:
+        if (self.next_sibling is not None
+            and self.next_sibling is not self.previous_sibling):
            self.next_sibling.previous_sibling = self.previous_sibling
        self.previous_sibling = self.next_sibling = None
        return self
@@ -263,13 +289,15 @@ class PageElement(object):
            last_child = self
            while isinstance(last_child, Tag) and last_child.contents:
                last_child = last_child.contents[-1]
-        if not accept_self and last_child == self:
+        if not accept_self and last_child is self:
            last_child = None
        return last_child
    # BS3: Not part of the API!
    _lastRecursiveChild = _last_descendant

    def insert(self, position, new_child):
+        if new_child is None:
+            raise ValueError("Cannot insert None into a tag.")
        if new_child is self:
            raise ValueError("Cannot insert a tag into itself.")
        if (isinstance(new_child, basestring)
@@ -478,6 +506,10 @@ class PageElement(object):
    def _find_all(self, name, attrs, text, limit, generator, **kwargs):
        "Iterates over a generator looking for things that match."

+        if text is None and 'string' in kwargs:
+            text = kwargs['string']
+            del kwargs['string']
+
        if isinstance(name, SoupStrainer):
            strainer = name
        else:
@@ -548,17 +580,17 @@ class PageElement(object):

    # Methods for supporting CSS selectors.

-    tag_name_re = re.compile('^[a-z0-9]+$')
+    tag_name_re = re.compile('^[a-zA-Z0-9][-.a-zA-Z0-9:_]*$')

-    # /^(\w+)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
-    #   \---/  \---/\-------------/    \-------/
-    #     |      |         |               |
-    #     |      |         |           The value
-    #     |      |    ~,|,^,$,* or =
-    #     |   Attribute
+    # /^([a-zA-Z0-9][-.a-zA-Z0-9:_]*)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
+    #   \---------------------------/  \---/\-------------/    \-------/
+    #     |                              |         |               |
+    #     |                              |         |           The value
+    #     |                              |    ~,|,^,$,* or =
+    #     |                           Attribute
    #    Tag
    attribselect_re = re.compile(
-        r'^(?P<tag>\w+)?\[(?P<attribute>\w+)(?P<operator>[=~\|\^\$\*]?)' +
+        r'^(?P<tag>[a-zA-Z0-9][-.a-zA-Z0-9:_]*)?\[(?P<attribute>[\w-]+)(?P<operator>[=~\|\^\$\*]?)' +
        r'=?"?(?P<value>[^\]"]*)"?\]$'
        )

@@ -654,11 +686,17 @@ class NavigableString(unicode, PageElement):
        how to handle non-ASCII characters.
        """
        if isinstance(value, unicode):
-            return unicode.__new__(cls, value)
-        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+            u = unicode.__new__(cls, value)
+        else:
+            u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+        u.setup()
+        return u

    def __copy__(self):
-        return self
+        """A copy of a NavigableString has the same contents and class
+        as the original, but it is not connected to the parse tree.
+        """
+        return type(self)(self)

    def __getnewargs__(self):
        return (unicode(self),)
@@ -707,7 +745,7 @@ class CData(PreformattedString):
 class ProcessingInstruction(PreformattedString):

    PREFIX = u'<?'
-    SUFFIX = u'?>'
+    SUFFIX = u'>'

 class Comment(PreformattedString):

@@ -716,8 +754,8 @@ class Comment(PreformattedString):


 class Declaration(PreformattedString):
-    PREFIX = u'<!'
-    SUFFIX = u'!>'
+    PREFIX = u'<?'
+    SUFFIX = u'?>'


 class Doctype(PreformattedString):
@@ -759,9 +797,12 @@ class Tag(PageElement):
        self.prefix = prefix
        if attrs is None:
            attrs = {}
-        elif attrs and builder.cdata_list_attributes:
-            attrs = builder._replace_cdata_list_attribute_values(
-                self.name, attrs)
+        elif attrs:
+            if builder is not None and builder.cdata_list_attributes:
+                attrs = builder._replace_cdata_list_attribute_values(
+                    self.name, attrs)
+            else:
+                attrs = dict(attrs)
        else:
            attrs = dict(attrs)
        self.attrs = attrs
@@ -778,6 +819,18 @@ class Tag(PageElement):

    parserClass = _alias("parser_class")  # BS3

+    def __copy__(self):
+        """A copy of a Tag is a new Tag, unconnected to the parse tree.
+        Its contents are a copy of the old Tag's contents.
+        """
+        clone = type(self)(None, self.builder, self.name, self.namespace,
+                           self.nsprefix, self.attrs)
+        for attr in ('can_be_empty_element', 'hidden'):
+            setattr(clone, attr, getattr(self, attr))
+        for child in self.contents:
+            clone.append(child.__copy__())
+        return clone
+
    @property
    def is_empty_element(self):
        """Is this tag an empty-element tag? (aka a self-closing tag)
@@ -971,15 +1024,25 @@ class Tag(PageElement):
        as defined in __eq__."""
        return not self == other

-    def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+    def __repr__(self, encoding="unicode-escape"):
        """Renders this tag as a string."""
-        return self.encode(encoding)
+        if PY3K:
+            # "The return value must be a string object", i.e. Unicode
+            return self.decode()
+        else:
+            # "The return value must be a string object", i.e. a bytestring.
+            # By convention, the return value of __repr__ should also be
+            # an ASCII string.
+            return self.encode(encoding)

    def __unicode__(self):
        return self.decode()

    def __str__(self):
-        return self.encode()
+        if PY3K:
+            return self.decode()
+        else:
+            return self.encode()

    if PY3K:
        __str__ = __repr__ = __unicode__
@@ -1103,12 +1166,18 @@ class Tag(PageElement):
                       formatter="minimal"):
        """Renders the contents of this tag as a Unicode string.

+        :param indent_level: Each line of the rendering will be
+           indented this many spaces.
+
        :param eventual_encoding: The tag is destined to be
           encoded into this encoding. This method is _not_
           responsible for performing that encoding. This information
           is passed in so that it can be substituted in if the
           document contains a <META> tag that mentions the document's
           encoding.
+
+        :param formatter: The output formatter responsible for converting
+           entities to Unicode characters.
        """
        # First off, turn a string formatter into a function. This
        # will stop the lookup from happening over and over again.
@@ -1137,7 +1206,17 @@ class Tag(PageElement):
    def encode_contents(
        self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
        formatter="minimal"):
-        """Renders the contents of this tag as a bytestring."""
+        """Renders the contents of this tag as a bytestring.
+
+        :param indent_level: Each line of the rendering will be
+           indented this many spaces.
+
+        :param eventual_encoding: The bytestring will be in this encoding.
+
+        :param formatter: The output formatter responsible for converting
+           entities to Unicode characters.
+        """
+
        contents = self.decode_contents(indent_level, encoding, formatter)
        return contents.encode(encoding)

@@ -1201,26 +1280,57 @@ class Tag(PageElement):

    _selector_combinators = ['>', '+', '~']
    _select_debug = False
-    def select(self, selector, _candidate_generator=None):
+    def select_one(self, selector):
        """Perform a CSS selection operation on the current element."""
+        value = self.select(selector, limit=1)
+        if value:
+            return value[0]
+        return None
+
+    def select(self, selector, _candidate_generator=None, limit=None):
+        """Perform a CSS selection operation on the current element."""
+
+        # Handle grouping selectors if ',' exists, ie: p,a
+        if ',' in selector:
+            context = []
+            for partial_selector in selector.split(','):
+                partial_selector = partial_selector.strip()
+                if partial_selector == '':
+                    raise ValueError('Invalid group selection syntax: %s' % selector)
+                candidates = self.select(partial_selector, limit=limit)
+                for candidate in candidates:
+                    if candidate not in context:
+                        context.append(candidate)
+
+                if limit and len(context) >= limit:
+                    break
+            return context
+
        tokens = selector.split()
        current_context = [self]

        if tokens[-1] in self._selector_combinators:
            raise ValueError(
                'Final combinator "%s" is missing an argument.' % tokens[-1])
+
        if self._select_debug:
            print 'Running CSS selector "%s"' % selector
+
        for index, token in enumerate(tokens):
-            if self._select_debug:
-                print ' Considering token "%s"' % token
-            recursive_candidate_generator = None
-            tag_name = None
+            new_context = []
+            new_context_ids = set([])
+
            if tokens[index-1] in self._selector_combinators:
                # This token was consumed by the previous combinator. Skip it.
                if self._select_debug:
                    print '  Token was consumed by the previous combinator.'
                continue
+
+            if self._select_debug:
+                print ' Considering token "%s"' % token
+            recursive_candidate_generator = None
+            tag_name = None
+
            # Each operation corresponds to a checker function, a rule
            # for determining whether a candidate matches the
            # selector. Candidates are generated by the active
@@ -1256,35 +1366,38 @@ class Tag(PageElement):
                        "A pseudo-class must be prefixed with a tag name.")
                pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
                found = []
-                if pseudo_attributes is not None:
+                if pseudo_attributes is None:
+                    pseudo_type = pseudo
+                    pseudo_value = None
+                else:
                    pseudo_type, pseudo_value = pseudo_attributes.groups()
-                    if pseudo_type == 'nth-of-type':
-                        try:
-                            pseudo_value = int(pseudo_value)
-                        except:
-                            raise NotImplementedError(
-                                'Only numeric values are currently supported for the nth-of-type pseudo-class.')
-                        if pseudo_value < 1:
-                            raise ValueError(
-                                'nth-of-type pseudo-class value must be at least 1.')
-                        class Counter(object):
-                            def __init__(self, destination):
-                                self.count = 0
-                                self.destination = destination
-
-                            def nth_child_of_type(self, tag):
-                                self.count += 1
-                                if self.count == self.destination:
-                                    return True
-                                if self.count > self.destination:
-                                    # Stop the generator that's sending us
-                                    # these things.
-                                    raise StopIteration()
-                                return False
-                        checker = Counter(pseudo_value).nth_child_of_type
-                    else:
+                if pseudo_type == 'nth-of-type':
+                    try:
+                        pseudo_value = int(pseudo_value)
+                    except:
                        raise NotImplementedError(
-                            'Only the following pseudo-classes are implemented: nth-of-type.')
+                            'Only numeric values are currently supported for the nth-of-type pseudo-class.')
+                    if pseudo_value < 1:
+                        raise ValueError(
+                            'nth-of-type pseudo-class value must be at least 1.')
+                    class Counter(object):
+                        def __init__(self, destination):
+                            self.count = 0
+                            self.destination = destination
+
+                        def nth_child_of_type(self, tag):
+                            self.count += 1
+                            if self.count == self.destination:
+                                return True
+                            if self.count > self.destination:
+                                # Stop the generator that's sending us
+                                # these things.
+                                raise StopIteration()
+                            return False
+                    checker = Counter(pseudo_value).nth_child_of_type
+                else:
+                    raise NotImplementedError(
+                        'Only the following pseudo-classes are implemented: nth-of-type.')

            elif token == '*':
                # Star selector -- matches everything
@@ -1311,7 +1424,6 @@ class Tag(PageElement):
            else:
                raise ValueError(
                    'Unsupported or invalid CSS selector: "%s"' % token)
-
            if recursive_candidate_generator:
                # This happens when the selector looks like  "> foo".
                #
@@ -1361,8 +1473,7 @@ class Tag(PageElement):
            else:
                _use_candidate_generator = _candidate_generator

-            new_context = []
-            new_context_ids = set([])
+            count = 0
            for tag in current_context:
                if self._select_debug:
                    print "    Running candidate generator on %s %s" % (
@@ -1387,9 +1498,12 @@ class Tag(PageElement):
                            # don't include it in the context more than once.
                            new_context.append(candidate)
                            new_context_ids.add(id(candidate))
+                            if limit and len(new_context) >= limit:
+                                break
                    elif self._select_debug:
                        print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))

+
            current_context = new_context

        if self._select_debug:
@@ -1,5 +1,8 @@
 """Helper classes for tests."""

+__license__ = "MIT"
+
+import pickle
 import copy
 import functools
 import unittest
@@ -43,6 +46,16 @@ class SoupTest(unittest.TestCase):

        self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))

+    def assertConnectedness(self, element):
+        """Ensure that next_element and previous_element are properly
+        set for all descendants of the given element.
+        """
+        earlier = None
+        for e in element.descendants:
+            if earlier:
+                self.assertEqual(e, earlier.next_element)
+                self.assertEqual(earlier, e.previous_element)
+            earlier = e

 class HTMLTreeBuilderSmokeTest(object):

@@ -54,6 +67,15 @@ class HTMLTreeBuilderSmokeTest(object):
    markup in these tests, there's not much room for interpretation.
    """

+    def test_pickle_and_unpickle_identity(self):
+        # Pickling a tree, then unpickling it, yields a tree identical
+        # to the original.
+        tree = self.soup("<a><b>foo</a>")
+        dumped = pickle.dumps(tree, 2)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.__class__, BeautifulSoup)
+        self.assertEqual(loaded.decode(), tree.decode())
+
    def assertDoctypeHandled(self, doctype_fragment):
        """Assert that a given doctype string is handled correctly."""
        doctype_str, soup = self._document_with_doctype(doctype_fragment)
@@ -114,6 +136,11 @@ class HTMLTreeBuilderSmokeTest(object):
            soup.encode("utf-8").replace(b"\n", b""),
            markup.replace(b"\n", b""))

+    def test_processing_instruction(self):
+        markup = b"""<?PITarget PIContent?>"""
+        soup = self.soup(markup)
+        self.assertEqual(markup, soup.encode("utf8"))
+
    def test_deepcopy(self):
        """Make sure you can copy the tree builder.

@@ -155,6 +182,23 @@ class HTMLTreeBuilderSmokeTest(object):
    def test_nested_formatting_elements(self):
        self.assertSoupEquals("<em><em></em></em>")

+    def test_double_head(self):
+        html = '''<!DOCTYPE html>
+<html>
+<head>
+<title>Ordinary HEAD element test</title>
+</head>
+<script type="text/javascript">
+alert("Help!");
+</script>
+<body>
+Hello, world!
+</body>
+</html>
+'''
+        soup = self.soup(html)
+        self.assertEqual("text/javascript", soup.find('script')['type'])
+
    def test_comment(self):
        # Comments are represented as Comment objects.
        markup = "<p>foo<!--foobar-->baz</p>"
@@ -221,6 +265,14 @@ class HTMLTreeBuilderSmokeTest(object):
        soup = self.soup(markup)
        self.assertEqual(["css"], soup.div.div['class'])

+    def test_multivalued_attribute_on_html(self):
+        # html5lib uses a different API to set the attributes ot the
+        # <html> tag. This has caused problems with multivalued
+        # attributes.
+        markup = '<html class="a b"></html>'
+        soup = self.soup(markup)
+        self.assertEqual(["a", "b"], soup.html['class'])
+
    def test_angle_brackets_in_attribute_values_are_escaped(self):
        self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')

@@ -253,6 +305,35 @@ class HTMLTreeBuilderSmokeTest(object):
        soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
        self.assertEqual("p", soup.h2.string.next_element.name)
        self.assertEqual("p", soup.p.name)
+        self.assertConnectedness(soup)
+
+    def test_head_tag_between_head_and_body(self):
+        "Prevent recurrence of a bug in the html5lib treebuilder."
+        content = """<html><head></head>
+  <link></link>
+  <body>foo</body>
+</html>
+"""
+        soup = self.soup(content)
+        self.assertNotEqual(None, soup.html.body)
+        self.assertConnectedness(soup)
+
+    def test_multiple_copies_of_a_tag(self):
+        "Prevent recurrence of a bug in the html5lib treebuilder."
+        content = """<!DOCTYPE html>
+<html>
+ <body>
+   <article id="a" >
+   <div><a href="1"></div>
+   <footer>
+     <a href="2"></a>
+   </footer>
+  </article>
+  </body>
+</html>
+"""
+        soup = self.soup(content)
+        self.assertConnectedness(soup.article)

    def test_basic_namespaces(self):
        """Parsers don't need to *understand* namespaces, but at the
@@ -463,11 +544,25 @@ class HTMLTreeBuilderSmokeTest(object):

 class XMLTreeBuilderSmokeTest(object):

+    def test_pickle_and_unpickle_identity(self):
+        # Pickling a tree, then unpickling it, yields a tree identical
+        # to the original.
+        tree = self.soup("<a><b>foo</a>")
+        dumped = pickle.dumps(tree, 2)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.__class__, BeautifulSoup)
+        self.assertEqual(loaded.decode(), tree.decode())
+
    def test_docstring_generated(self):
        soup = self.soup("<root/>")
        self.assertEqual(
            soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')

+    def test_xml_declaration(self):
+        markup = b"""<?xml version="1.0" encoding="utf8"?>\n<foo/>"""
+        soup = self.soup(markup)
+        self.assertEqual(markup, soup.encode("utf8"))
+
    def test_real_xhtml_document(self):
        """A real XHTML document should come out *exactly* the same as it went in."""
        markup = b"""<?xml version="1.0" encoding="utf-8"?>
@@ -485,7 +580,7 @@ class XMLTreeBuilderSmokeTest(object):
  <script type="text/javascript">
  </script>
 """
-        soup = BeautifulSoup(doc, "xml")
+        soup = BeautifulSoup(doc, "lxml-xml")
        # lxml would have stripped this while parsing, but we can add
        # it later.
        soup.script.string = 'console.log("< < hey > > ");'
@@ -1,6 +1,7 @@
 """Tests of the builder registry."""

 import unittest
+import warnings

 from bs4 import BeautifulSoup
 from bs4.builder import (
@@ -67,10 +68,15 @@ class BuiltInRegistryTest(unittest.TestCase):
                          HTMLParserTreeBuilder)

    def test_beautifulsoup_constructor_does_lookup(self):
-        # You can pass in a string.
-        BeautifulSoup("", features="html")
-        # Or a list of strings.
-        BeautifulSoup("", features=["html", "fast"])
+
+        with warnings.catch_warnings(record=True) as w:
+            # This will create a warning about not explicitly
+            # specifying a parser, but we'll ignore it.
+
+            # You can pass in a string.
+            BeautifulSoup("", features="html")
+            # Or a list of strings.
+            BeautifulSoup("", features=["html", "fast"])

        # You'll get an exception if BS can't find an appropriate
        # builder.
@@ -83,3 +83,16 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
        soup = self.soup(markup)
        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
        self.assertEqual(2, len(soup.find_all('p')))
+
+    def test_processing_instruction(self):
+        """Processing instructions become comments."""
+        markup = b"""<?PITarget PIContent?>"""
+        soup = self.soup(markup)
+        assert str(soup).startswith("<!--?PITarget PIContent?-->")
+
+    def test_cloned_multivalue_node(self):
+        markup = b"""<a class="my_class"><p></a>"""
+        soup = self.soup(markup)
+        a1, a2 = soup.find_all('a')
+        self.assertEqual(a1, a2)
+        assert a1 is not a2
@@ -1,6 +1,8 @@
 """Tests to ensure that the html.parser tree builder generates good
 trees."""

+from pdb import set_trace
+import pickle
 from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
 from bs4.builder import HTMLParserTreeBuilder

@@ -17,3 +19,14 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
    def test_namespaced_public_doctype(self):
        # html.parser can't handle namespaced doctypes, so skip this one.
        pass
+
+    def test_builder_is_pickled(self):
+        """Unlike most tree builders, HTMLParserTreeBuilder and will
+        be restored after pickling.
+        """
+        tree = self.soup("<a><b>foo</a>")
+        dumped = pickle.dumps(tree, 2)
+        loaded = pickle.loads(dumped)
+        self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
+
+
@@ -65,21 +65,6 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
        self.assertEqual(u"<b/>", unicode(soup.b))
        self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))

-    def test_real_xhtml_document(self):
-        """lxml strips the XML definition from an XHTML doc, which is fine."""
-        markup = b"""<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head><title>Hello.</title></head>
-<body>Goodbye.</body>
-</html>"""
-        soup = self.soup(markup)
-        self.assertEqual(
-            soup.encode("utf-8").replace(b"\n", b''),
-            markup.replace(b'\n', b'').replace(
-                b'<?xml version="1.0" encoding="utf-8"?>', b''))
-
-
@skipIf(
    not LXML_PRESENT,
    "lxml seems not to be present, not testing its XML tree builder.")
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 """Tests of Beautiful Soup as a whole."""

+from pdb import set_trace
 import logging
 import unittest
 import sys
@@ -20,6 +21,7 @@ import bs4.dammit
 from bs4.dammit import (
    EntitySubstitution,
    UnicodeDammit,
+    EncodingDetector,
 )
 from bs4.testing import (
    SoupTest,
@@ -48,8 +50,34 @@ class TestConstructor(SoupTest):
        soup = self.soup(data)
        self.assertEqual(u"foo\0bar", soup.h1.string)

+    def test_exclude_encodings(self):
+        utf8_data = u"Räksmörgås".encode("utf-8")
+        soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
+        self.assertEqual("windows-1252", soup.original_encoding)

-class TestDeprecatedConstructorArguments(SoupTest):
+
+class TestWarnings(SoupTest):
+
+    def _no_parser_specified(self, s, is_there=True):
+        v = s.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:80])
+        self.assertTrue(v)
+
+    def test_warning_if_no_parser_specified(self):
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup("<a><b></b></a>")
+        msg = str(w[0].message)
+        self._assert_no_parser_specified(msg)
+
+    def test_warning_if_parser_specified_too_vague(self):
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup("<a><b></b></a>", "html")
+        msg = str(w[0].message)
+        self._assert_no_parser_specified(msg)
+
+    def test_no_warning_if_explicit_parser_specified(self):
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup("<a><b></b></a>", "html.parser")
+        self.assertEquals([], w)

    def test_parseOnlyThese_renamed_to_parse_only(self):
        with warnings.catch_warnings(record=True) as w:
@@ -271,10 +299,11 @@ class TestUnicodeDammit(unittest.TestCase):
            dammit.unicode_markup, """<foo>''""</foo>""")

    def test_detect_utf8(self):
-        utf8 = b"\xc3\xa9"
+        utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
        dammit = UnicodeDammit(utf8)
-        self.assertEqual(dammit.unicode_markup, u'\xe9')
        self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
+        self.assertEqual(dammit.unicode_markup, u'Sacr\xe9 bleu! \N{SNOWMAN}')
+

    def test_convert_hebrew(self):
        hebrew = b"\xed\xe5\xec\xf9"
@@ -299,6 +328,26 @@ class TestUnicodeDammit(unittest.TestCase):
            dammit = UnicodeDammit(utf8_data, [bad_encoding])
            self.assertEqual(dammit.original_encoding.lower(), 'utf-8')

+    def test_exclude_encodings(self):
+        # This is UTF-8.
+        utf8_data = u"Räksmörgås".encode("utf-8")
+
+        # But if we exclude UTF-8 from consideration, the guess is
+        # Windows-1252.
+        dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"])
+        self.assertEqual(dammit.original_encoding.lower(), 'windows-1252')
+
+        # And if we exclude that, there is no valid guess at all.
+        dammit = UnicodeDammit(
+            utf8_data, exclude_encodings=["utf-8", "windows-1252"])
+        self.assertEqual(dammit.original_encoding, None)
+
+    def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self):
+        detected = EncodingDetector(
+            b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
+        encodings = list(detected.encodings)
+        assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings
+
    def test_detect_html5_style_meta_tag(self):

        for data in (
@@ -9,6 +9,7 @@ same markup, but all Beautiful Soup trees can be traversed with the
 methods tested here.
 """

+from pdb import set_trace
 import copy
 import pickle
 import re
@@ -19,8 +20,10 @@ from bs4.builder import (
    HTMLParserTreeBuilder,
 )
 from bs4.element import (
+    PY3K,
    CData,
    Comment,
+    Declaration,
    Doctype,
    NavigableString,
    SoupStrainer,
@@ -68,7 +71,13 @@ class TestFind(TreeTest):

    def test_unicode_text_find(self):
        soup = self.soup(u'<h1>Räksmörgås</h1>')
-        self.assertEqual(soup.find(text=u'Räksmörgås'), u'Räksmörgås')
+        self.assertEqual(soup.find(string=u'Räksmörgås'), u'Räksmörgås')
+
+    def test_unicode_attribute_find(self):
+        soup = self.soup(u'<h1 id="Räksmörgås">here it is</h1>')
+        str(soup)
+        self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text)
+

    def test_find_everything(self):
        """Test an optimization that finds all tags."""
@@ -87,6 +96,7 @@ class TestFindAll(TreeTest):
        """You can search the tree for text nodes."""
        soup = self.soup("<html>Foo<b>bar</b>\xbb</html>")
        # Exact match.
+        self.assertEqual(soup.find_all(string="bar"), [u"bar"])
        self.assertEqual(soup.find_all(text="bar"), [u"bar"])
        # Match any of a number of strings.
        self.assertEqual(
@@ -688,7 +698,7 @@ class TestTagCreation(SoupTest):

    def test_tag_inherits_self_closing_rules_from_builder(self):
        if XML_BUILDER_PRESENT:
-            xml_soup = BeautifulSoup("", "xml")
+            xml_soup = BeautifulSoup("", "lxml-xml")
            xml_br = xml_soup.new_tag("br")
            xml_p = xml_soup.new_tag("p")

@@ -697,7 +707,7 @@ class TestTagCreation(SoupTest):
            self.assertEqual(b"<br/>", xml_br.encode())
            self.assertEqual(b"<p/>", xml_p.encode())

-        html_soup = BeautifulSoup("", "html")
+        html_soup = BeautifulSoup("", "html.parser")
        html_br = html_soup.new_tag("br")
        html_p = html_soup.new_tag("p")

@@ -773,6 +783,14 @@ class TestTreeModification(SoupTest):
        new_a = a.unwrap()
        self.assertEqual(a, new_a)

+    def test_replace_with_and_unwrap_give_useful_exception_when_tag_has_no_parent(self):
+        soup = self.soup("<a><b>Foo</b></a><c>Bar</c>")
+        a = soup.a
+        a.extract()
+        self.assertEqual(None, a.parent)
+        self.assertRaises(ValueError, a.unwrap)
+        self.assertRaises(ValueError, a.replace_with, soup.c)
+
    def test_replace_tag_with_itself(self):
        text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>"
        soup = self.soup(text)
@@ -1067,6 +1085,31 @@ class TestTreeModification(SoupTest):
        self.assertEqual(foo_2, soup.a.string)
        self.assertEqual(bar_2, soup.b.string)

+    def test_extract_multiples_of_same_tag(self):
+        soup = self.soup("""
+<html>
+<head>
+<script>foo</script>
+</head>
+<body>
+ <script>bar</script>
+ <a></a>
+</body>
+<script>baz</script>
+</html>""")
+        [soup.script.extract() for i in soup.find_all("script")]
+        self.assertEqual("<body>\n\n<a></a>\n</body>", unicode(soup.body))
+
+
+    def test_extract_works_when_element_is_surrounded_by_identical_strings(self):
+        soup = self.soup(
+ '<html>\n'
+ '<body>hi</body>\n'
+ '</html>')
+        soup.find('body').extract()
+        self.assertEqual(None, soup.find('body'))
+
+
    def test_clear(self):
        """Tag.clear()"""
        soup = self.soup("<p><a>String <em>Italicized</em></a> and another</p>")
@@ -1293,6 +1336,51 @@ class TestPersistence(SoupTest):
        loaded = pickle.loads(dumped)
        self.assertEqual(loaded.decode(), soup.decode())

+    def test_copy_navigablestring_is_not_attached_to_tree(self):
+        html = u"<b>Foo<a></a></b><b>Bar</b>"
+        soup = self.soup(html)
+        s1 = soup.find(string="Foo")
+        s2 = copy.copy(s1)
+        self.assertEqual(s1, s2)
+        self.assertEqual(None, s2.parent)
+        self.assertEqual(None, s2.next_element)
+        self.assertNotEqual(None, s1.next_sibling)
+        self.assertEqual(None, s2.next_sibling)
+        self.assertEqual(None, s2.previous_element)
+
+    def test_copy_navigablestring_subclass_has_same_type(self):
+        html = u"<b><!--Foo--></b>"
+        soup = self.soup(html)
+        s1 = soup.string
+        s2 = copy.copy(s1)
+        self.assertEqual(s1, s2)
+        self.assertTrue(isinstance(s2, Comment))
+
+    def test_copy_entire_soup(self):
+        html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
+        soup = self.soup(html)
+        soup_copy = copy.copy(soup)
+        self.assertEqual(soup, soup_copy)
+
+    def test_copy_tag_copies_contents(self):
+        html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
+        soup = self.soup(html)
+        div = soup.div
+        div_copy = copy.copy(div)
+
+        # The two tags look the same, and evaluate to equal.
+        self.assertEqual(unicode(div), unicode(div_copy))
+        self.assertEqual(div, div_copy)
+
+        # But they're not the same object.
+        self.assertFalse(div is div_copy)
+
+        # And they don't have the same relation to the parse tree. The
+        # copy is not associated with a parse tree at all.
+        self.assertEqual(None, div_copy.parent)
+        self.assertEqual(None, div_copy.previous_element)
+        self.assertEqual(None, div_copy.find(string='Bar').next_element)
+        self.assertNotEqual(None, div.find(string='Bar').next_element)

 class TestSubstitutions(SoupTest):

@@ -1366,7 +1454,7 @@ class TestSubstitutions(SoupTest):
   console.log("< < hey > > ");
  </script>
 """
-        encoded = BeautifulSoup(doc).encode()
+        encoded = BeautifulSoup(doc, 'html.parser').encode()
        self.assertTrue(b"< < hey > >" in encoded)

    def test_formatter_skips_style_tag_for_html_documents(self):
@@ -1375,7 +1463,7 @@ class TestSubstitutions(SoupTest):
   console.log("< < hey > > ");
  </style>
 """
-        encoded = BeautifulSoup(doc).encode()
+        encoded = BeautifulSoup(doc, 'html.parser').encode()
        self.assertTrue(b"< < hey > >" in encoded)

    def test_prettify_leaves_preformatted_text_alone(self):
@@ -1387,7 +1475,7 @@ class TestSubstitutions(SoupTest):
            soup.div.prettify())

    def test_prettify_accepts_formatter(self):
-        soup = BeautifulSoup("<html><body>foo</body></html>")
+        soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
        pretty = soup.prettify(formatter = lambda x: x.upper())
        self.assertTrue("FOO" in pretty)

@@ -1484,6 +1572,14 @@ class TestEncoding(SoupTest):
        self.assertEqual(
            u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())

+    def test_repr(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        if PY3K:
+            self.assertEqual(html, repr(soup))
+        else:
+            self.assertEqual(b'<b>\\u2603</b>', repr(soup))
+
 class TestNavigableStringSubclasses(SoupTest):

    def test_cdata(self):
@@ -1522,6 +1618,9 @@ class TestNavigableStringSubclasses(SoupTest):
        soup.insert(1, doctype)
        self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n")

+    def test_declaration(self):
+        d = Declaration("foo")
+        self.assertEqual("<?foo?>", d.output_ready())

 class TestSoupSelector(TreeTest):

@@ -1534,7 +1633,7 @@ class TestSoupSelector(TreeTest):
 <link rel="stylesheet" href="blah.css" type="text/css" id="l1">
 </head>
 <body>
-
+<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
 <div id="main" class="fancy">
 <div id="inner">
 <h1 id="header1">An H1</h1>
@@ -1552,8 +1651,18 @@ class TestSoupSelector(TreeTest):
 <a href="#" id="s2a1">span2a1</a>
 </span>
 <span class="span3"></span>
+<custom-dashed-tag class="dashed" id="dash2"/>
+<div data-tag="dashedvalue" id="data1"/>
 </span>
 </div>
+<x id="xid">
+<z id="zida"/>
+<z id="zidab"/>
+<z id="zidac"/>
+</x>
+<y id="yid">
+<z id="zidb"/>
+</y>
 <p lang="en" id="lang-en">English</p>
 <p lang="en-gb" id="lang-en-gb">English UK</p>
 <p lang="en-us" id="lang-en-us">English US</p>
@@ -1565,7 +1674,7 @@ class TestSoupSelector(TreeTest):
 """

    def setUp(self):
-        self.soup = BeautifulSoup(self.HTML)
+        self.soup = BeautifulSoup(self.HTML, 'html.parser')

    def assertSelects(self, selector, expected_ids):
        el_ids = [el['id'] for el in self.soup.select(selector)]
@@ -1591,17 +1700,25 @@ class TestSoupSelector(TreeTest):

    def test_one_tag_many(self):
        els = self.soup.select('div')
-        self.assertEqual(len(els), 3)
+        self.assertEqual(len(els), 4)
        for div in els:
            self.assertEqual(div.name, 'div')

+        el = self.soup.select_one('div')
+        self.assertEqual('main', el['id'])
+
+    def test_select_one_returns_none_if_no_match(self):
+        match = self.soup.select_one('nonexistenttag')
+        self.assertEqual(None, match)
+
+
    def test_tag_in_tag_one(self):
        els = self.soup.select('div div')
-        self.assertSelects('div div', ['inner'])
+        self.assertSelects('div div', ['inner', 'data1'])

    def test_tag_in_tag_many(self):
        for selector in ('html div', 'html body div', 'body div'):
-            self.assertSelects(selector, ['main', 'inner', 'footer'])
+            self.assertSelects(selector, ['data1', 'main', 'inner', 'footer'])

    def test_tag_no_match(self):
        self.assertEqual(len(self.soup.select('del')), 0)
@@ -1609,6 +1726,20 @@ class TestSoupSelector(TreeTest):
    def test_invalid_tag(self):
        self.assertRaises(ValueError, self.soup.select, 'tag%t')

+    def test_select_dashed_tag_ids(self):
+        self.assertSelects('custom-dashed-tag', ['dash1', 'dash2'])
+
+    def test_select_dashed_by_id(self):
+        dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
+        self.assertEqual(dashed[0].name, 'custom-dashed-tag')
+        self.assertEqual(dashed[0]['id'], 'dash2')
+
+    def test_dashed_tag_text(self):
+        self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.')
+
+    def test_select_dashed_matches_find_all(self):
+        self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag'))
+
    def test_header_tags(self):
        self.assertSelectMultiple(
            ('h1', ['header1']),
@@ -1709,6 +1840,7 @@ class TestSoupSelector(TreeTest):
            ('[id^="m"]', ['me', 'main']),
            ('div[id^="m"]', ['main']),
            ('a[id^="m"]', ['me']),
+            ('div[data-tag^="dashed"]', ['data1'])
        )

    def test_attribute_endswith(self):
@@ -1716,8 +1848,8 @@ class TestSoupSelector(TreeTest):
            ('[href$=".css"]', ['l1']),
            ('link[href$=".css"]', ['l1']),
            ('link[id$="1"]', ['l1']),
-            ('[id$="1"]', ['l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1']),
-            ('div[id$="1"]', []),
+            ('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
+            ('div[id$="1"]', ['data1']),
            ('[id$="noending"]', []),
        )

@@ -1730,7 +1862,6 @@ class TestSoupSelector(TreeTest):
            ('[rel*="notstyle"]', []),
            ('link[rel*="notstyle"]', []),
            ('link[href*="bla"]', ['l1']),
-            ('a[href*="http://"]', ['bob', 'me']),
            ('[href*="http://"]', ['bob', 'me']),
            ('[id*="p"]', ['pmulti', 'p1']),
            ('div[id*="m"]', ['main']),
@@ -1739,8 +1870,8 @@ class TestSoupSelector(TreeTest):
            ('[href*=".css"]', ['l1']),
            ('link[href*=".css"]', ['l1']),
            ('link[id*="1"]', ['l1']),
-            ('[id*="1"]', ['l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1']),
-            ('div[id*="1"]', []),
+            ('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
+            ('div[id*="1"]', ['data1']),
            ('[id*="noending"]', []),
            # New for this test
            ('[href*="."]', ['bob', 'me', 'l1']),
@@ -1748,6 +1879,7 @@ class TestSoupSelector(TreeTest):
            ('link[href*="."]', ['l1']),
            ('div[id*="n"]', ['main', 'inner']),
            ('div[id*="nn"]', ['inner']),
+            ('div[data-tag*="edval"]', ['data1'])
        )

    def test_attribute_exact_or_hypen(self):
@@ -1767,8 +1899,17 @@ class TestSoupSelector(TreeTest):
            ('p[class]', ['p1', 'pmulti']),
            ('[blah]', []),
            ('p[blah]', []),
+            ('div[data-tag]', ['data1'])
        )

+    def test_unsupported_pseudoclass(self):
+        self.assertRaises(
+            NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
+
+        self.assertRaises(
+            NotImplementedError, self.soup.select, "a:nth-of-type(a)")
+
+
    def test_nth_of_type(self):
        # Try to select first paragraph
        els = self.soup.select('div#inner p:nth-of-type(1)')
@@ -1803,7 +1944,7 @@ class TestSoupSelector(TreeTest):
        selected = inner.select("div")
        # The <div id="inner"> tag was selected. The <div id="footer">
        # tag was not.
-        self.assertSelectsIDs(selected, ['inner'])
+        self.assertSelectsIDs(selected, ['inner', 'data1'])

    def test_overspecified_child_id(self):
        self.assertSelects(".fancy #inner", ['inner'])
@@ -1827,3 +1968,44 @@ class TestSoupSelector(TreeTest):

    def test_sibling_combinator_wont_select_same_tag_twice(self):
        self.assertSelects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
+
+    # Test the selector grouping operator (the comma)
+    def test_multiple_select(self):
+        self.assertSelects('x, y', ['xid', 'yid'])
+
+    def test_multiple_select_with_no_space(self):
+        self.assertSelects('x,y', ['xid', 'yid'])
+
+    def test_multiple_select_with_more_space(self):
+        self.assertSelects('x,    y', ['xid', 'yid'])
+
+    def test_multiple_select_duplicated(self):
+        self.assertSelects('x, x', ['xid'])
+
+    def test_multiple_select_sibling(self):
+        self.assertSelects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
+
+    def test_multiple_select_tag_and_direct_descendant(self):
+        self.assertSelects('x, y > z', ['xid', 'zidb'])
+
+    def test_multiple_select_direct_descendant_and_tags(self):
+        self.assertSelects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
+
+    def test_multiple_select_indirect_descendant(self):
+        self.assertSelects('div x,y,  z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
+
+    def test_invalid_multiple_select(self):
+        self.assertRaises(ValueError, self.soup.select, ',x, y')
+        self.assertRaises(ValueError, self.soup.select, 'x,,y')
+
+    def test_multiple_select_attrs(self):
+        self.assertSelects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
+
+    def test_multiple_select_ids(self):
+        self.assertSelects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
+
+    def test_multiple_select_nested(self):
+        self.assertSelects('body > div > x, y > z', ['xid', 'zidb'])
+
+
+
@@ -4,7 +4,7 @@ Chardet: The Universal Character Encoding Detector
 Detects
 - ASCII, UTF-8, UTF-16 (2 variants), UTF-32 (4 variants)
 - Big5, GB2312, EUC-TW, HZ-GB-2312, ISO-2022-CN (Traditional and Simplified Chinese)
- - EUC-JP, SHIFT_JIS, ISO-2022-JP (Japanese)
+ - EUC-JP, SHIFT_JIS, CP932, ISO-2022-JP (Japanese)
 - EUC-KR, ISO-2022-KR (Korean)
 - KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251 (Cyrillic)
 - ISO-8859-2, windows-1250 (Hungarian)
@@ -16,6 +16,14 @@ Detects

 Requires Python 2.6 or later

+Installation
+------------
+
+Install from `PyPI <https://pypi.python.org/pypi/chardet>`_::
+
+    pip install chardet
+
+
 Command-line Tool
 -----------------

@@ -31,7 +39,7 @@ About

 This is a continuation of Mark Pilgrim's excellent chardet. Previously, two
 versions needed to be maintained: one that supported python 2.x and one that
-supported python 3.x.  We've recently merged with `Ian Corduscano <https://github.com/sigmavirus24>`_'s
+supported python 3.x.  We've recently merged with `Ian Cordasco <https://github.com/sigmavirus24>`_'s
 `charade <https://github.com/sigmavirus24/charade>`_ fork, so now we have one
 coherent version that works for Python 2.6+.

@@ -15,7 +15,7 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-__version__ = "2.2.1"
+__version__ = "2.3.0"
 from sys import version_info


@@ -12,34 +12,68 @@ Example::
 If no paths are provided, it takes its input from stdin.

 """
-from io import open
-from sys import argv, stdin

+from __future__ import absolute_import, print_function, unicode_literals
+
+import argparse
+import sys
+from io import open
+
+from chardet import __version__
 from chardet.universaldetector import UniversalDetector


-def description_of(file, name='stdin'):
-    """Return a string describing the probable encoding of a file."""
+def description_of(lines, name='stdin'):
+    """
+    Return a string describing the probable encoding of a file or
+    list of strings.
+
+    :param lines: The lines to get the encoding of.
+    :type lines: Iterable of bytes
+    :param name: Name of file or collection of lines
+    :type name: str
+    """
    u = UniversalDetector()
-    for line in file:
+    for line in lines:
        u.feed(line)
    u.close()
    result = u.result
    if result['encoding']:
-        return '%s: %s with confidence %s' % (name,
-                                              result['encoding'],
-                                              result['confidence'])
+        return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
+                                                     result['confidence'])
    else:
-        return '%s: no result' % name
+        return '{0}: no result'.format(name)


-def main():
-    if len(argv) <= 1:
-        print(description_of(stdin))
-    else:
-        for path in argv[1:]:
-            with open(path, 'rb') as f:
-                print(description_of(f, path))
+def main(argv=None):
+    '''
+    Handles command line arguments and gets things started.
+
+    :param argv: List of arguments, as if specified on the command-line.
+                 If None, ``sys.argv[1:]`` is used instead.
+    :type argv: list of str
+    '''
+    # Get command line arguments
+    parser = argparse.ArgumentParser(
+        description="Takes one or more file paths and reports their detected \
+                     encodings",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        conflict_handler='resolve')
+    parser.add_argument('input',
+                        help='File whose encoding we would like to determine.',
+                        type=argparse.FileType('rb'), nargs='*',
+                        default=[sys.stdin])
+    parser.add_argument('--version', action='version',
+                        version='%(prog)s {0}'.format(__version__))
+    args = parser.parse_args(argv)
+
+    for f in args.input:
+        if f.isatty():
+            print("You are running chardetect interactively. Press " +
+                  "CTRL-D twice at the start of a blank line to signal the " +
+                  "end of your input. If you want help, run chardetect " +
+                  "--help\n", file=sys.stderr)
+        print(description_of(f, f.name))


 if __name__ == '__main__':
@@ -177,6 +177,12 @@ class JapaneseContextAnalysis:
        return -1, 1

 class SJISContextAnalysis(JapaneseContextAnalysis):
+    def __init__(self):
+        self.charset_name = "SHIFT_JIS"
+
+    def get_charset_name(self):
+        return self.charset_name
+
    def get_order(self, aBuf):
        if not aBuf:
            return -1, 1
@@ -184,6 +190,8 @@ class SJISContextAnalysis(JapaneseContextAnalysis):
        first_char = wrap_ord(aBuf[0])
        if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
            charLen = 2
+            if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
+                self.charset_name = "CP932"
        else:
            charLen = 1

@@ -129,11 +129,11 @@ class Latin1Prober(CharSetProber):
        if total < 0.01:
            confidence = 0.0
        else:
-            confidence = ((self._mFreqCounter[3] / total)
-                          - (self._mFreqCounter[1] * 20.0 / total))
+            confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
+                          / total)
        if confidence < 0.0:
            confidence = 0.0
        # lower the confidence of latin1 so that other more accurate
        # detector can take priority.
-        confidence = confidence * 0.5
+        confidence = confidence * 0.73
        return confidence
@@ -353,7 +353,7 @@ SJIS_cls = (
    2,2,2,2,2,2,2,2,  # 68 - 6f
    2,2,2,2,2,2,2,2,  # 70 - 77
    2,2,2,2,2,2,2,1,  # 78 - 7f
-    3,3,3,3,3,3,3,3,  # 80 - 87
+    3,3,3,3,3,2,2,3,  # 80 - 87
    3,3,3,3,3,3,3,3,  # 88 - 8f
    3,3,3,3,3,3,3,3,  # 90 - 97
    3,3,3,3,3,3,3,3,  # 98 - 9f
@@ -369,9 +369,8 @@ SJIS_cls = (
    2,2,2,2,2,2,2,2,  # d8 - df
    3,3,3,3,3,3,3,3,  # e0 - e7
    3,3,3,3,3,4,4,4,  # e8 - ef
-    4,4,4,4,4,4,4,4,  # f0 - f7
-    4,4,4,4,4,0,0,0   # f8 - ff
-)
+    3,3,3,3,3,3,3,3,  # f0 - f7
+    3,3,3,3,3,0,0,0)  # f8 - ff


 SJIS_st = (
@@ -571,5 +570,3 @@ UTF8SMModel = {'classTable': UTF8_cls,
               'stateTable': UTF8_st,
               'charLenTable': UTF8CharLenTable,
               'name': 'UTF-8'}
-
-# flake8: noqa
@@ -47,7 +47,7 @@ class SJISProber(MultiByteCharSetProber):
        self._mContextAnalyzer.reset()

    def get_charset_name(self):
-        return "SHIFT_JIS"
+        return self._mContextAnalyzer.get_charset_name()

    def feed(self, aBuf):
        aLen = len(aBuf)
@@ -71,9 +71,9 @@ class UniversalDetector:

        if not self._mGotData:
            # If the data starts with BOM, we know it is UTF
-            if aBuf[:3] == codecs.BOM:
+            if aBuf[:3] == codecs.BOM_UTF8:
                # EF BB BF  UTF-8 with BOM
-                self.result = {'encoding': "UTF-8", 'confidence': 1.0}
+                self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0}
            elif aBuf[:4] == codecs.BOM_UTF32_LE:
                # FF FE 00 00  UTF-32, little-endian BOM
                self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
@@ -71,6 +71,18 @@ def findMissingSubtitles(list_item, _type="episode", internal=False, external=Tr


 def run():
+    token = sys.argv[1]
+    Plex.configuration.defaults.authentication(token)
+    sections = Plex["library"].sections()
+    #section = list(sections)[0]
+    #print section.title, section.path, dir(section), list(section._children)[0].path
+    #return
+    for container in sections:
+        print container.title
+        for location in container:
+            print location.path
+
+    return
    itemCount = 0
    dry_run = "--dry-run" in sys.argv
    with Plex.configuration.authentication("asdfasdfasdf"):
@@ -0,0 +1,12 @@
+from .ssafile import SSAFile
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from . import time, formats, cli
+from .exceptions import *
+from .common import Color, VERSION
+
+#: Alias for :meth:`SSAFile.load()`.
+load = SSAFile.load
+
+#: Alias for :meth:`pysubs2.time.make_time()`.
+make_time = time.make_time
@@ -0,0 +1,7 @@
+import sys
+from .cli import Pysubs2CLI
+
+if __name__ == "__main__":
+    cli = Pysubs2CLI()
+    rv = cli(sys.argv[1:])
+    sys.exit(rv)
@@ -0,0 +1,165 @@
+from __future__ import unicode_literals, print_function
+import argparse
+import codecs
+import os
+import re
+import os.path as op
+import io
+from io import open
+import sys
+from textwrap import dedent
+from .formats import get_file_extension
+from .time import make_time
+from .ssafile import SSAFile
+from .common import PY3, VERSION
+
+
+def positive_float(s):
+    x = float(s)
+    if not x > 0:
+        raise argparse.ArgumentTypeError("%r is not a positive number" % s)
+    return x
+
+def character_encoding(s):
+    try:
+        codecs.lookup(s)
+        return s
+    except LookupError:
+        raise argparse.ArgumentError
+
+def time(s):
+    d = {}
+    for v, k in re.findall(r"(\d*\.?\d*)(ms|m|s|h)", s):
+        d[k] = float(v)
+    return make_time(**d)
+
+
+def change_ext(path, ext):
+    base, _ = op.splitext(path)
+    return base + ext
+
+
+class Pysubs2CLI(object):
+    def __init__(self):
+        parser = self.parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
+                                                       prog="pysubs2",
+                                                       description=dedent("""
+                                                       The pysubs2 CLI for processing subtitle files.
+                                                       https://github.com/tkarabela/pysubs2
+                                                       """),
+                                                       epilog=dedent("""
+                                                       usage examples:
+                                                         python -m pysubs2 --to srt *.ass
+                                                         python -m pysubs2 --to microdvd --fps 23.976 *.ass
+                                                         python -m pysubs2 --shift 0.3s *.srt
+                                                         python -m pysubs2 --shift 0.3s <my_file.srt >retimed_file.srt
+                                                         python -m pysubs2 --shift-back 0.3s --output-dir retimed *.srt
+                                                         python -m pysubs2 --transform-framerate 25 23.976 *.srt"""))
+
+        parser.add_argument("files", nargs="*", metavar="FILE",
+                            help="Input subtitle files. Can be in SubStation Alpha (*.ass, *.ssa), SubRip (*.srt) or "
+                                 "MicroDVD (*.sub) formats. When no files are specified, pysubs2 will work as a pipe, "
+                                 "reading from standard input and writing to standard output.")
+
+        parser.add_argument("-v", "--version", action="version", version="pysubs2 %s" % VERSION)
+
+        parser.add_argument("-f", "--from", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="input_format",
+                            help="By default, subtitle format is detected from the file. This option can be used to "
+                                 "skip autodetection and force specific format. Generally, it should never be needed.")
+        parser.add_argument("-t", "--to", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="output_format",
+                            help="Convert subtitle files to given format. By default, each file is saved in its "
+                                 "original format.")
+        parser.add_argument("--input-enc", metavar="ENCODING", default="iso-8859-1", type=character_encoding,
+                            help="Character encoding for input files. By default, ISO-8859-1 is used for both "
+                                 "input and output, which should generally work (for 8-bit encodings).")
+        parser.add_argument("--output-enc", metavar="ENCODING", type=character_encoding,
+                            help="Character encoding for output files. By default, it is the same as input encoding. "
+                                 "If you wish to convert between encodings, make sure --input-enc is set correctly! "
+                                 "Otherwise, your output files will probably be corrupted. It's a good idea to "
+                                 "back up your files or use the -o option.")
+        parser.add_argument("--fps", metavar="FPS", type=positive_float,
+                            help="This argument specifies framerate for MicroDVD files. By default, framerate "
+                                 "is detected from the file. Use this when framerate specification is missing "
+                                 "or to force different framerate.")
+        parser.add_argument("-o", "--output-dir", metavar="DIR",
+                            help="Use this to save all files to given directory. By default, every file is saved to its parent directory, "
+                                 "ie. unless it's being saved in different subtitle format (and thus with different file extension), "
+                                 "it overwrites the original file.")
+
+        group = parser.add_mutually_exclusive_group()
+
+        group.add_argument("--shift", metavar="TIME", type=time,
+                           help="Delay all subtitles by given time amount. Time is specified like this: '1m30s', '0.5s', ...")
+        group.add_argument("--shift-back", metavar="TIME", type=time,
+                           help="The opposite of --shift (subtitles will appear sooner).")
+        group.add_argument("--transform-framerate", nargs=2, metavar=("FPS1", "FPS2"), type=positive_float,
+                           help="Multiply all timestamps by FPS1/FPS2 ratio.")
+
+    def __call__(self, argv):
+        try:
+            self.main(argv)
+        except KeyboardInterrupt:
+            exit("\nAborted by user.")
+
+    def main(self, argv):
+        args = self.parser.parse_args(argv)
+        errors = 0
+
+        if args.output_dir and not op.exists(args.output_dir):
+            os.makedirs(args.output_dir)
+
+        if args.output_enc is None:
+            args.output_enc = args.input_enc
+
+        if args.files:
+            for path in args.files:
+                if not op.exists(path):
+                    print("Skipping", path, "(does not exist)")
+                    errors += 1
+                elif not op.isfile(path):
+                    print("Skipping", path, "(not a file)")
+                    errors += 1
+                else:
+                    with open(path, encoding=args.input_enc) as infile:
+                        subs = SSAFile.from_file(infile, args.input_format, args.fps)
+
+                    self.process(subs, args)
+
+                    if args.output_format is None:
+                        outpath = path
+                        output_format = subs.format
+                    else:
+                        ext = get_file_extension(args.output_format)
+                        outpath = change_ext(path, ext)
+                        output_format = args.output_format
+
+                    if args.output_dir is not None:
+                        _, filename = op.split(outpath)
+                        outpath = op.join(args.output_dir, filename)
+
+                    with open(outpath, "w", encoding=args.output_enc) as outfile:
+                        subs.to_file(outfile, output_format, args.fps)
+        else:
+            if PY3:
+                infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc)
+                outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc)
+            else:
+                infile = io.TextIOWrapper(sys.stdin, args.input_enc)
+                outfile = io.TextIOWrapper(sys.stdout, args.output_enc)
+
+            subs = SSAFile.from_file(infile, args.input_format, args.fps)
+            self.process(subs, args)
+            output_format = args.output_format or subs.format
+            subs.to_file(outfile, output_format, args.fps)
+
+        return (0 if errors == 0 else 1)
+
+    @staticmethod
+    def process(subs, args):
+        if args.shift is not None:
+            subs.shift(ms=args.shift)
+        elif args.shift_back is not None:
+            subs.shift(ms=-args.shift_back)
+        elif args.transform_framerate is not None:
+            in_fps, out_fps = args.transform_framerate
+            subs.transform_framerate(in_fps, out_fps)
@@ -0,0 +1,28 @@
+from collections import namedtuple
+import sys
+
+_Color = namedtuple("Color", "r g b a")
+
+class Color(_Color):
+    """
+    (r, g, b, a) namedtuple for 8-bit RGB color with alpha channel.
+
+    All values are ints from 0 to 255.
+    """
+    def __new__(cls, r, g, b, a=0):
+        for value in r, g, b, a:
+            if value not in range(256):
+                raise ValueError("Color channels must have values 0-255")
+
+        return _Color.__new__(cls, r, g, b, a)
+
+#: Version of the pysubs2 library.
+VERSION = "0.2.1"
+
+
+PY3 = sys.version_info.major == 3
+
+if PY3:
+    text_type = str
+else:
+    text_type = unicode
@@ -0,0 +1,14 @@
+class Pysubs2Error(Exception):
+    """Base class for pysubs2 exceptions."""
+
+class UnknownFPSError(Pysubs2Error):
+    """Framerate was not specified and couldn't be inferred otherwise."""
+
+class UnknownFileExtensionError(Pysubs2Error):
+    """File extension does not pertain to any known subtitle format."""
+
+class UnknownFormatIdentifierError(Pysubs2Error):
+    """Unknown subtitle format identifier (ie. string like ``"srt"``)."""
+
+class FormatAutodetectionError(Pysubs2Error):
+    """Subtitle format is ambiguous or unknown."""
@@ -0,0 +1,76 @@
+class FormatBase(object):
+    """
+    Base class for subtitle format implementations.
+
+    How to implement a new subtitle format:
+
+    1. Create a subclass of FormatBase and override the methods you want to support.
+    2. Decide on a format identifier, like the ``"srt"`` or ``"microdvd"`` already used in the library.
+    3. Add your identifier and class to :data:`pysubs2.formats.FORMAT_IDENTIFIER_TO_FORMAT_CLASS`.
+    4. (optional) Add your file extension and class to :data:`pysubs2.formats.FILE_EXTENSION_TO_FORMAT_IDENTIFIER`.
+
+    After finishing these steps, you can call :meth:`SSAFile.load()` and :meth:`SSAFile.save()` with your
+    format, including autodetection from content and file extension (if you provided these).
+
+    """
+    @classmethod
+    def from_file(cls, subs, fp, format_, **kwargs):
+        """
+        Load subtitle file into an empty SSAFile.
+
+        If the parser autodetects framerate, set it as ``subs.fps``.
+
+        Arguments:
+            subs (SSAFile): An empty :class:`SSAFile`.
+            fp (file object): Text file object, the subtitle file.
+            format_ (str): Format identifier. Used when one format class
+                implements multiple formats (see :class:`SubstationFormat`).
+            kwargs: Extra options, eg. `fps`.
+
+        Returns:
+            None
+
+        Raises:
+            pysubs2.exceptions.UnknownFPSError: Framerate was not provided and cannot
+                be detected.
+        """
+        raise NotImplementedError("Parsing is not supported for this format")
+
+    @classmethod
+    def to_file(cls, subs, fp, format_, **kwargs):
+        """
+        Write SSAFile into a file.
+
+        If you need framerate and it is not passed in keyword arguments,
+        use ``subs.fps``.
+
+        Arguments:
+            subs (SSAFile): Subtitle file to write.
+            fp (file object): Text file object used as output.
+            format_ (str): Format identifier of desired output format.
+                Used when one format class implements multiple formats
+                (see :class:`SubstationFormat`).
+            kwargs: Extra options, eg. `fps`.
+
+        Returns:
+            None
+
+        Raises:
+            pysubs2.exceptions.UnknownFPSError: Framerate was not provided and
+                ``subs.fps is None``.
+        """
+        raise NotImplementedError("Writing is not supported for this format")
+
+    @classmethod
+    def guess_format(self, text):
+        """
+        Return format identifier of recognized format, or None.
+
+        Arguments:
+            text (str): Content of subtitle file. When the file is long,
+                this may be only its first few thousand characters.
+
+        Returns:
+            format identifier (eg. ``"srt"``) or None (unknown format)
+        """
+        return None
@@ -0,0 +1,64 @@
+from .formatbase import FormatBase
+from .microdvd import MicroDVDFormat
+from .subrip import SubripFormat
+from .jsonformat import JSONFormat
+from .substation import SubstationFormat
+from .exceptions import *
+
+#: Dict mapping file extensions to format identifiers.
+FILE_EXTENSION_TO_FORMAT_IDENTIFIER = {
+    ".srt": "srt",
+    ".ass": "ass",
+    ".ssa": "ssa",
+    ".sub": "microdvd",
+    ".json": "json"
+}
+
+#: Dict mapping format identifiers to implementations (FormatBase subclasses).
+FORMAT_IDENTIFIER_TO_FORMAT_CLASS = {
+    "srt": SubripFormat,
+    "ass": SubstationFormat,
+    "ssa": SubstationFormat,
+    "microdvd": MicroDVDFormat,
+    "json": JSONFormat
+}
+
+def get_format_class(format_):
+    """Format identifier -> format class (ie. subclass of FormatBase)"""
+    try:
+        return FORMAT_IDENTIFIER_TO_FORMAT_CLASS[format_]
+    except KeyError:
+        raise UnknownFormatIdentifierError(format_)
+
+def get_format_identifier(ext):
+    """File extension -> format identifier"""
+    try:
+        return FILE_EXTENSION_TO_FORMAT_IDENTIFIER[ext]
+    except KeyError:
+        raise UnknownFileExtensionError(ext)
+
+def get_file_extension(format_):
+    """Format identifier -> file extension"""
+    if format_ not in FORMAT_IDENTIFIER_TO_FORMAT_CLASS:
+        raise UnknownFormatIdentifierError(format_)
+
+    for ext, f in FILE_EXTENSION_TO_FORMAT_IDENTIFIER.items():
+        if f == format_:
+            return ext
+
+    raise RuntimeError("No file extension for format %r" % format_)
+
+def autodetect_format(content):
+    """Return format identifier for given fragment or raise FormatAutodetectionError."""
+    formats = set()
+    for impl in FORMAT_IDENTIFIER_TO_FORMAT_CLASS.values():
+        guess = impl.guess_format(content)
+        if guess is not None:
+            formats.add(guess)
+
+    if len(formats) == 1:
+        return formats.pop()
+    elif not formats:
+        raise FormatAutodetectionError("No suitable formats")
+    else:
+        raise FormatAutodetectionError("Multiple suitable formats (%r)" % formats)
@@ -0,0 +1,46 @@
+from __future__ import unicode_literals, print_function
+
+import json
+from .common import Color, PY3
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from .formatbase import FormatBase
+
+
+class JSONFormat(FormatBase):
+    @classmethod
+    def guess_format(cls, text):
+        if text.startswith("{\""):
+            return "json"
+
+    @classmethod
+    def from_file(cls, subs, fp, format_, **kwargs):
+        data = json.load(fp)
+
+        subs.info.clear()
+        subs.info.update(data["info"])
+
+        subs.styles.clear()
+        for name, fields in data["styles"].items():
+            subs.styles[name] = sty = SSAStyle()
+            for k, v in fields.items():
+                if "color" in k:
+                    setattr(sty, k, Color(*v))
+                else:
+                    setattr(sty, k, v)
+
+        subs.events = [SSAEvent(**fields) for fields in data["events"]]
+
+    @classmethod
+    def to_file(cls, subs, fp, format_, **kwargs):
+        data = {
+            "info": dict(**subs.info),
+            "styles": {name: sty.as_dict() for name, sty in subs.styles.items()},
+            "events": [ev.as_dict() for ev in subs.events]
+        }
+
+        if PY3:
+            json.dump(data, fp)
+        else:
+            text = json.dumps(data, fp)
+            fp.write(unicode(text))
@@ -0,0 +1,103 @@
+from __future__ import unicode_literals, print_function
+
+from functools import partial
+import re
+from .common import text_type
+from .exceptions import UnknownFPSError
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from .formatbase import FormatBase
+from .substation import parse_tags
+from .time import ms_to_frames, frames_to_ms
+
+#: Matches a MicroDVD line.
+MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)")
+
+
+class MicroDVDFormat(FormatBase):
+    @classmethod
+    def guess_format(cls, text):
+        if any(map(MICRODVD_LINE.match, text.splitlines())):
+            return "microdvd"
+
+    @classmethod
+    def from_file(cls, subs, fp, format_, fps=None, **kwargs):
+        for line in fp:
+            match = MICRODVD_LINE.match(line)
+            if not match:
+                continue
+
+            fstart, fend, text = match.groups()
+            fstart, fend = map(int, (fstart, fend))
+
+            if fps is None:
+                # We don't know the framerate, but it is customary to include
+                # it as text of the first subtitle. In that case, we skip
+                # this auxiliary subtitle and proceed with reading.
+                try:
+                    fps = float(text)
+                    subs.fps = fps
+                    continue
+                except ValueError:
+                    raise UnknownFPSError("Framerate was not specified and "
+                                          "cannot be read from "
+                                          "the MicroDVD file.")
+
+            start, end = map(partial(frames_to_ms, fps=fps), (fstart, fend))
+
+            def prepare_text(text):
+                text = text.replace("|", r"\N")
+
+                def style_replacer(match):
+                    tags = [c for c in "biu" if c in match.group(0)]
+                    return "{%s}" % "".join(r"\%s1" % c for c in tags)
+
+                text = re.sub(r"\{[Yy]:[^}]+\}", style_replacer, text)
+                text = re.sub(r"\{[Ff]:([^}]+)\}", r"{\\fn\1}", text)
+                text = re.sub(r"\{[Ss]:([^}]+)\}", r"{\\fs\1}", text)
+                text = re.sub(r"\{P:(\d+),(\d+)\}", r"{\\pos(\1,\2)}", text)
+
+                return text.strip()
+
+            ev = SSAEvent(start=start, end=end, text=prepare_text(text))
+            subs.append(ev)
+
+    @classmethod
+    def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, **kwargs):
+        if fps is None:
+            fps = subs.fps
+
+        if fps is None:
+            raise UnknownFPSError("Framerate must be specified when writing MicroDVD.")
+        to_frames = partial(ms_to_frames, fps=fps)
+
+        def is_entirely_italic(line):
+            style = subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE)
+            for fragment, sty in parse_tags(line.text, style, subs.styles):
+                fragment = fragment.replace(r"\h", " ")
+                fragment = fragment.replace(r"\n", "\n")
+                fragment = fragment.replace(r"\N", "\n")
+                if not sty.italic and fragment and not fragment.isspace():
+                    return False
+            return True
+
+        # insert an artificial first line telling the framerate
+        if write_fps_declaration:
+            subs.insert(0, SSAEvent(start=0, end=0, text=text_type(fps)))
+
+        for line in (ev for ev in subs if not ev.is_comment):
+            text = "|".join(line.plaintext.splitlines())
+            if is_entirely_italic(line):
+                text = "{Y:i}" + text
+
+            start, end = map(to_frames, (line.start, line.end))
+
+            # XXX warn on underflow?
+            if start < 0: start = 0
+            if end < 0: end = 0
+
+            print("{%d}{%d}%s" % (start, end, text), file=fp)
+
+        # remove the artificial framerate-telling line
+        if write_fps_declaration:
+            subs.pop(0)
@@ -0,0 +1,153 @@
+from __future__ import unicode_literals
+import re
+from .time import ms_to_str, make_time
+from .common import PY3
+
+
+class SSAEvent(object):
+    """
+    A SubStation Event, ie. one subtitle.
+
+    In SubStation, each subtitle consists of multiple "fields" like Start, End and Text.
+    These are exposed as attributes (note that they are lowercase; see :attr:`SSAEvent.FIELDS` for a list).
+    Additionaly, there are some convenience properties like :attr:`SSAEvent.plaintext` or :attr:`SSAEvent.duration`.
+
+    This class defines an ordering with respect to (start, end) timestamps.
+
+    .. tip :: Use :func:`pysubs2.make_time()` to get times in milliseconds.
+
+    Example::
+
+        >>> ev = SSAEvent(start=make_time(s=1), end=make_time(s=2.5), text="Hello World!")
+
+    """
+    OVERRIDE_SEQUENCE = re.compile(r"{[^}]*}")
+
+    #: All fields in SSAEvent.
+    FIELDS = frozenset([
+        "start", "end", "text", "marked", "layer", "style",
+        "name", "marginl", "marginr", "marginv", "effect", "type"
+    ])
+
+    def __init__(self, **fields):
+        self.start = 0 #: Subtitle start time (in milliseconds)
+        self.end = 10000 #: Subtitle end time (in milliseconds)
+        self.text = "" #: Text of subtitle (with SubStation override tags)
+        self.marked = False #: (SSA only)
+        self.layer = 0 #: Layer number, 0 is the lowest layer (ASS only)
+        self.style = "Default" #: Style name
+        self.name = "" #: Actor name
+        self.marginl = 0 #: Left margin
+        self.marginr = 0 #: Right margin
+        self.marginv = 0 #: Vertical margin
+        self.effect = "" #: Line effect
+        self.type = "Dialogue" #: Line type (Dialogue/Comment)
+
+        for k, v in fields.items():
+            if k in self.FIELDS:
+                setattr(self, k, v)
+            else:
+                raise ValueError("SSAEvent has no field named %r" % k)
+
+    @property
+    def duration(self):
+        """
+        Subtitle duration in milliseconds (read/write property).
+
+        Writing to this property adjusts :attr:`SSAEvent.end`.
+        Setting negative durations raises :exc:`ValueError`.
+        """
+        return self.end - self.start
+
+    @duration.setter
+    def duration(self, ms):
+        if ms >= 0:
+            self.end = self.start + ms
+        else:
+            raise ValueError("Subtitle duration cannot be negative")
+
+    @property
+    def is_comment(self):
+        """
+        When true, the subtitle is a comment, ie. not visible (read/write property).
+
+        Setting this property is equivalent to changing
+        :attr:`SSAEvent.type` to ``"Dialogue"`` or ``"Comment"``.
+        """
+        return self.type == "Comment"
+
+    @is_comment.setter
+    def is_comment(self, value):
+        if value:
+            self.type = "Comment"
+        else:
+            self.type = "Dialogue"
+
+    @property
+    def plaintext(self):
+        """
+        Subtitle text as multi-line string with no tags (read/write property).
+
+        Writing to this property replaces :attr:`SSAEvent.text` with given plain
+        text. Newlines are converted to ``\\N`` tags.
+        """
+        text = self.text
+        text = self.OVERRIDE_SEQUENCE.sub("", text)
+        text = text.replace(r"\h", " ")
+        text = text.replace(r"\n", "\n")
+        text = text.replace(r"\N", "\n")
+        return text
+
+    @plaintext.setter
+    def plaintext(self, text):
+        self.text = text.replace("\n", r"\N")
+
+    def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None):
+        """
+        Shift start and end times.
+
+        See :meth:`SSAFile.shift()` for full description.
+
+        """
+        delta = make_time(h=h, m=m, s=s, ms=ms, frames=frames, fps=fps)
+        self.start += delta
+        self.end += delta
+
+    def copy(self):
+        """Return a copy of the SSAEvent."""
+        return SSAEvent(**self.as_dict())
+
+    def as_dict(self):
+        return {field: getattr(self, field) for field in self.FIELDS}
+
+    def equals(self, other):
+        """Field-based equality for SSAEvents."""
+        if isinstance(other, SSAEvent):
+            return self.as_dict() == other.as_dict()
+        else:
+            raise TypeError("Cannot compare to non-SSAEvent object")
+
+    def __eq__(self, other):
+        # XXX document this
+        return self.start == other.start and self.end == other.end
+
+    def __ne__(self, other):
+        return self.start != other.start or self.end != other.end
+
+    def __lt__(self, other):
+        return (self.start, self.end) < (other.start, other.end)
+
+    def __le__(self, other):
+        return (self.start, self.end) <= (other.start, other.end)
+
+    def __gt__(self, other):
+        return (self.start, self.end) > (other.start, other.end)
+
+    def __ge__(self, other):
+        return (self.start, self.end) >= (other.start, other.end)
+
+    def __repr__(self):
+        s = "<SSAEvent type={self.type} start={start} end={end} text='{self.text}'>".format(
+                self=self, start=ms_to_str(self.start), end=ms_to_str(self.end))
+        if not PY3: s = s.encode("utf-8")
+        return s
@@ -0,0 +1,419 @@
+from __future__ import print_function, unicode_literals, division
+from collections import MutableSequence, OrderedDict
+import io
+from io import open
+from itertools import starmap, chain
+import os.path
+import logging
+from .formats import autodetect_format, get_format_class, get_format_identifier
+from .substation import is_valid_field_content
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from .time import make_time, ms_to_str
+from .common import PY3
+
+
+class SSAFile(MutableSequence):
+    """
+    Subtitle file in SubStation Alpha format.
+
+    This class has a list-like interface which exposes :attr:`SSAFile.events`,
+    list of subtitles in the file::
+
+        subs = SSAFile.load("subtitles.srt")
+
+        for line in subs:
+            print(line.text)
+
+        subs.insert(0, SSAEvent(start=0, end=make_time(s=2.5), text="New first subtitle"))
+
+        del subs[0]
+
+    """
+
+    DEFAULT_INFO = OrderedDict([
+        ("WrapStyle", "0"),
+        ("ScaledBorderAndShadow", "yes"),
+        ("Collisions", "Normal")])
+
+    def __init__(self):
+        self.events = [] #: List of :class:`SSAEvent` instances, ie. individual subtitles.
+        self.styles = OrderedDict([("Default", SSAStyle.DEFAULT_STYLE.copy())]) #: Dict of :class:`SSAStyle` instances.
+        self.info = self.DEFAULT_INFO.copy() #: Dict with script metadata, ie. ``[Script Info]``.
+        self.aegisub_project = OrderedDict() #: Dict with Aegisub project, ie. ``[Aegisub Project Garbage]``.
+        self.fps = None #: Framerate used when reading the file, if applicable.
+        self.format = None #: Format of source subtitle file, if applicable, eg. ``"srt"``.
+
+    # ------------------------------------------------------------------------
+    # I/O methods
+    # ------------------------------------------------------------------------
+
+    @classmethod
+    def load(cls, path, encoding="utf-8", format_=None, fps=None, **kwargs):
+        """
+        Load subtitle file from given path.
+
+        Arguments:
+            path (str): Path to subtitle file.
+            encoding (str): Character encoding of input file.
+                Defaults to UTF-8, you may need to change this.
+            format_ (str): Optional, forces use of specific parser
+                (eg. `"srt"`, `"ass"`). Otherwise, format is detected
+                automatically from file contents. This argument should
+                be rarely needed.
+            fps (float): Framerate for frame-based formats (MicroDVD),
+                for other formats this argument is ignored. Framerate might
+                be detected from the file, in which case you don't need
+                to specify it here (when given, this argument overrides
+                autodetection).
+            kwargs: Extra options for the parser.
+
+        Returns:
+            SSAFile
+
+        Raises:
+            IOError
+            UnicodeDecodeError
+            pysubs2.exceptions.UnknownFPSError
+            pysubs2.exceptions.UnknownFormatIdentifierError
+            pysubs2.exceptions.FormatAutodetectionError
+
+        Note:
+            pysubs2 may autodetect subtitle format and/or framerate. These
+            values are set as :attr:`SSAFile.format` and :attr:`SSAFile.fps`
+            attributes.
+
+        Example:
+            >>> subs1 = pysubs2.load("subrip-subtitles.srt")
+            >>> subs2 = pysubs2.load("microdvd-subtitles.sub", fps=23.976)
+
+        """
+        with open(path, encoding=encoding) as fp:
+            return cls.from_file(fp, format_, fps=fps, **kwargs)
+
+    @classmethod
+    def from_string(cls, string, format_=None, fps=None, **kwargs):
+        """
+        Load subtitle file from string.
+
+        See :meth:`SSAFile.load()` for full description.
+
+        Arguments:
+            string (str): Subtitle file in a string. Note that the string
+                must be Unicode (in Python 2).
+
+        Returns:
+            SSAFile
+
+        Example:
+            >>> text = '''
+            ... 1
+            ... 00:00:00,000 --> 00:00:05,000
+            ... An example SubRip file.
+            ... '''
+            >>> subs = SSAFile.from_string(text)
+
+        """
+        fp = io.StringIO(string)
+        return cls.from_file(fp, format_, fps=fps, **kwargs)
+
+    @classmethod
+    def from_file(cls, fp, format_=None, fps=None, **kwargs):
+        """
+        Read subtitle file from file object.
+
+        See :meth:`SSAFile.load()` for full description.
+
+        Note:
+            This is a low-level method. Usually, one of :meth:`SSAFile.load()`
+            or :meth:`SSAFile.from_string()` is preferable.
+
+        Arguments:
+            fp (file object): A file object, ie. :class:`io.TextIOBase` instance.
+                Note that the file must be opened in text mode (as opposed to binary).
+
+        Returns:
+            SSAFile
+
+        """
+        if format_ is None:
+            # Autodetect subtitle format, then read again using correct parser.
+            # The file might be a pipe and we need to read it twice,
+            # so just buffer everything.
+            text = fp.read()
+            fragment = text[:10000]
+            format_ = autodetect_format(fragment)
+            fp = io.StringIO(text)
+
+        impl = get_format_class(format_)
+        subs = cls() # an empty subtitle file
+        subs.format = format_
+        subs.fps = fps
+        impl.from_file(subs, fp, format_, fps=fps, **kwargs)
+        return subs
+
+    def save(self, path, encoding="utf-8", format_=None, fps=None, **kwargs):
+        """
+        Save subtitle file to given path.
+
+        Arguments:
+            path (str): Path to subtitle file.
+            encoding (str): Character encoding of output file.
+                Defaults to UTF-8, which should be fine for most purposes.
+            format_ (str): Optional, specifies desired subtitle format
+                (eg. `"srt"`, `"ass"`). Otherwise, format is detected
+                automatically from file extension. Thus, this argument
+                is rarely needed.
+            fps (float): Framerate for frame-based formats (MicroDVD),
+                for other formats this argument is ignored. When omitted,
+                :attr:`SSAFile.fps` value is used (ie. the framerate used
+                for loading the file, if any). When the :class:`SSAFile`
+                wasn't loaded from MicroDVD, or if you wish save it with
+                different framerate, use this argument. See also
+                :meth:`SSAFile.transform_framerate()` for fixing bad
+                frame-based to time-based conversions.
+            kwargs: Extra options for the writer.
+
+        Raises:
+            IOError
+            UnicodeEncodeError
+            pysubs2.exceptions.UnknownFPSError
+            pysubs2.exceptions.UnknownFormatIdentifierError
+            pysubs2.exceptions.UnknownFileExtensionError
+
+        """
+        if format_ is None:
+            ext = os.path.splitext(path)[1].lower()
+            format_ = get_format_identifier(ext)
+
+        with open(path, "w", encoding=encoding) as fp:
+            self.to_file(fp, format_, fps=fps, **kwargs)
+
+    def to_string(self, format_, fps=None, **kwargs):
+        """
+        Get subtitle file as a string.
+
+        See :meth:`SSAFile.save()` for full description.
+
+        Returns:
+            str
+
+        """
+        fp = io.StringIO()
+        self.to_file(fp, format_, fps=fps, **kwargs)
+        return fp.getvalue()
+
+    def to_file(self, fp, format_, fps=None, **kwargs):
+        """
+        Write subtitle file to file object.
+
+        See :meth:`SSAFile.save()` for full description.
+
+        Note:
+            This is a low-level method. Usually, one of :meth:`SSAFile.save()`
+            or :meth:`SSAFile.to_string()` is preferable.
+
+        Arguments:
+            fp (file object): A file object, ie. :class:`io.TextIOBase` instance.
+                Note that the file must be opened in text mode (as opposed to binary).
+
+        """
+        impl = get_format_class(format_)
+        impl.to_file(self, fp, format_, fps=fps, **kwargs)
+
+    # ------------------------------------------------------------------------
+    # Retiming subtitles
+    # ------------------------------------------------------------------------
+
+    def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None):
+        """
+        Shift all subtitles by constant time amount.
+
+        Shift may be time-based (the default) or frame-based. In the latter
+        case, specify both frames and fps. h, m, s, ms will be ignored.
+
+        Arguments:
+            h, m, s, ms: Integer or float values, may be positive or negative.
+            frames (int): When specified, must be an integer number of frames.
+                May be positive or negative. fps must be also specified.
+            fps (float): When specified, must be a positive number.
+
+        Raises:
+            ValueError: Invalid fps or missing number of frames.
+
+        """
+        delta = make_time(h=h, m=m, s=s, ms=ms, frames=frames, fps=fps)
+        for line in self:
+            line.start += delta
+            line.end += delta
+
+    def transform_framerate(self, in_fps, out_fps):
+        """
+        Rescale all timestamps by ratio of in_fps/out_fps.
+
+        Can be used to fix files converted from frame-based to time-based
+        with wrongly assumed framerate.
+
+        Arguments:
+            in_fps (float)
+            out_fps (float)
+
+        Raises:
+            ValueError: Non-positive framerate given.
+
+        """
+        if in_fps <= 0 or out_fps <= 0:
+            raise ValueError("Framerates must be positive, cannot transform %f -> %f" % (in_fps, out_fps))
+
+        ratio = in_fps / out_fps
+        for line in self:
+            line.start = int(round(line.start * ratio))
+            line.end = int(round(line.end * ratio))
+
+    # ------------------------------------------------------------------------
+    # Working with styles
+    # ------------------------------------------------------------------------
+
+    def rename_style(self, old_name, new_name):
+        """
+        Rename a style, including references to it.
+
+        Arguments:
+            old_name (str): Style to be renamed.
+            new_name (str): New name for the style (must be unused).
+
+        Raises:
+            KeyError: No style named old_name.
+            ValueError: new_name is not a legal name (cannot use commas)
+                or new_name is taken.
+
+        """
+        if old_name not in self.styles:
+            raise KeyError("Style %r not found" % old_name)
+        if new_name in self.styles:
+            raise ValueError("There is already a style called %r" % new_name)
+        if not is_valid_field_content(new_name):
+            raise ValueError("%r is not a valid name" % new_name)
+
+        self.styles[new_name] = self.styles[old_name]
+        del self.styles[old_name]
+
+        for line in self:
+            # XXX also handle \r override tag
+            if line.style == old_name:
+                line.style = new_name
+
+    def import_styles(self, subs, overwrite=True):
+        """
+        Merge in styles from other SSAFile.
+
+        Arguments:
+            subs (SSAFile): Subtitle file imported from.
+            overwrite (bool): On name conflict, use style from the other file
+                (default: True).
+
+        """
+        if not isinstance(subs, SSAFile):
+            raise TypeError("Must supply an SSAFile.")
+
+        for name, style in subs.styles.items():
+            if name not in self.styles or overwrite:
+                self.styles[name] = style
+
+    # ------------------------------------------------------------------------
+    # Helper methods
+    # ------------------------------------------------------------------------
+
+    def equals(self, other):
+        """
+        Equality of two SSAFiles.
+
+        Compares :attr:`SSAFile.info`, :attr:`SSAFile.styles` and :attr:`SSAFile.events`.
+        Order of entries in OrderedDicts does not matter. "ScriptType" key in info is
+        considered an implementation detail and thus ignored.
+
+        Useful mostly in unit tests. Differences are logged at DEBUG level.
+
+        """
+
+        if isinstance(other, SSAFile):
+            for key in set(chain(self.info.keys(), other.info.keys())) - {"ScriptType"}:
+                sv, ov = self.info.get(key), other.info.get(key)
+                if sv is None:
+                    logging.debug("%r missing in self.info", key)
+                    return False
+                elif ov is None:
+                    logging.debug("%r missing in other.info", key)
+                    return False
+                elif sv != ov:
+                    logging.debug("info %r differs (self=%r, other=%r)", key, sv, ov)
+                    return False
+
+            for key in set(chain(self.styles.keys(), other.styles.keys())):
+                sv, ov = self.styles.get(key), other.styles.get(key)
+                if sv is None:
+                    logging.debug("%r missing in self.styles", key)
+                    return False
+                elif ov is None:
+                    logging.debug("%r missing in other.styles", key)
+                    return False
+                elif sv != ov:
+                    for k in sv.FIELDS:
+                        if getattr(sv, k) != getattr(ov, k): logging.debug("difference in field %r", k)
+                    logging.debug("style %r differs (self=%r, other=%r)", key, sv.as_dict(), ov.as_dict())
+                    return False
+
+            if len(self) != len(other):
+                logging.debug("different # of subtitles (self=%d, other=%d)", len(self), len(other))
+                return False
+
+            for i, (se, oe) in enumerate(zip(self.events, other.events)):
+                if not se.equals(oe):
+                    for k in se.FIELDS:
+                        if getattr(se, k) != getattr(oe, k): logging.debug("difference in field %r", k)
+                    logging.debug("event %d differs (self=%r, other=%r)", i, se.as_dict(), oe.as_dict())
+                    return False
+
+            return True
+        else:
+            raise TypeError("Cannot compare to non-SSAFile object")
+
+    def __repr__(self):
+        if self.events:
+            max_time = max(ev.end for ev in self)
+            s = "<SSAFile with %d events and %d styles, last timestamp %s>" % \
+                    (len(self), len(self.styles), ms_to_str(max_time))
+        else:
+            s = "<SSAFile with 0 events and %d styles>" % len(self.styles)
+
+        if not PY3: s = s.encode("utf-8")
+        return s
+
+    # ------------------------------------------------------------------------
+    # MutableSequence implementation + sort()
+    # ------------------------------------------------------------------------
+
+    def sort(self):
+        """Sort subtitles time-wise, in-place."""
+        self.events.sort()
+
+    def __getitem__(self, item):
+        return self.events[item]
+
+    def __setitem__(self, key, value):
+        if isinstance(value, SSAEvent):
+            self.events[key] = value
+        else:
+            raise TypeError("SSAFile.events must contain only SSAEvent objects")
+
+    def __delitem__(self, key):
+        del self.events[key]
+
+    def __len__(self):
+        return len(self.events)
+
+    def insert(self, index, value):
+        if isinstance(value, SSAEvent):
+            self.events.insert(index, value)
+        else:
+            raise TypeError("SSAFile.events must contain only SSAEvent objects")
@@ -0,0 +1,86 @@
+from __future__ import unicode_literals
+from .common import Color, PY3
+
+
+class SSAStyle(object):
+    """
+    A SubStation Style.
+
+    In SubStation, each subtitle (:class:`SSAEvent`) is associated with a style which defines its font, color, etc.
+    Like a subtitle event, a style also consists of "fields"; see :attr:`SSAStyle.FIELDS` for a list
+    (note the spelling, which is different from SubStation proper).
+
+    Subtitles and styles are connected via an :class:`SSAFile` they belong to. :attr:`SSAEvent.style` is a string
+    which is (or should be) a key in the :attr:`SSAFile.styles` dict. Note that style name is stored separately;
+    a given :class:`SSAStyle` instance has no particular name itself.
+
+    This class defines equality (equality of all fields).
+
+    """
+    DEFAULT_STYLE = None
+
+    #: All fields in SSAStyle.
+    FIELDS = frozenset([
+        "fontname", "fontsize", "primarycolor", "secondarycolor",
+        "tertiarycolor", "outlinecolor", "backcolor",
+        "bold", "italic", "underline", "strikeout",
+        "scalex", "scaley", "spacing", "angle", "borderstyle",
+        "outline", "shadow", "alignment",
+        "marginl", "marginr", "marginv", "alphalevel", "encoding"
+    ])
+
+    def __init__(self, **fields):
+        self.fontname = "Arial" #: Font name
+        self.fontsize = 20.0 #: Font size (in pixels)
+        self.primarycolor = Color(255, 255, 255, 0) #: Primary color (:class:`pysubs2.Color` instance)
+        self.secondarycolor = Color(255, 0, 0, 0) #: Secondary color (:class:`pysubs2.Color` instance)
+        self.tertiarycolor = Color(0, 0, 0, 0) #: Tertiary color (:class:`pysubs2.Color` instance)
+        self.outlinecolor = Color(0, 0, 0, 0) #: Outline color (:class:`pysubs2.Color` instance)
+        self.backcolor = Color(0, 0, 0, 0) #: Back, ie. shadow color (:class:`pysubs2.Color` instance)
+        self.bold = False #: Bold
+        self.italic = False #: Italic
+        self.underline = False #: Underline (ASS only)
+        self.strikeout = False #: Strikeout (ASS only)
+        self.scalex = 100.0 #: Horizontal scaling (ASS only)
+        self.scaley = 100.0 #: Vertical scaling (ASS only)
+        self.spacing = 0.0 #: Letter spacing (ASS only)
+        self.angle = 0.0 #: Rotation (ASS only)
+        self.borderstyle = 1 #: Border style
+        self.outline = 2.0 #: Outline width (in pixels)
+        self.shadow = 2.0 #: Shadow depth (in pixels)
+        self.alignment = 2 #: Numpad-style alignment, eg. 7 is "top left" (that is, ASS alignment semantics)
+        self.marginl = 10 #: Left margin (in pixels)
+        self.marginr = 10 #: Right margin (in pixels)
+        self.marginv = 10 #: Vertical margin (in pixels)
+        self.alphalevel = 0 #: Old, unused SSA-only field
+        self.encoding = 1 #: Charset
+
+        for k, v in fields.items():
+            if k in self.FIELDS:
+                setattr(self, k, v)
+            else:
+                raise ValueError("SSAStyle has no field named %r" % k)
+
+    def copy(self):
+        return SSAStyle(**self.as_dict())
+
+    def as_dict(self):
+        return {field: getattr(self, field) for field in self.FIELDS}
+
+    def __eq__(self, other):
+        return self.as_dict() == other.as_dict()
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __repr__(self):
+        s = "<SSAStyle "
+        s += "%rpx " % self.fontsize
+        if self.bold: s += "bold "
+        if self.italic: s += "italic "
+        s += "'%s'>" % self.fontname
+        if not PY3: s = s.encode("utf-8")
+        return s
+
+
+SSAStyle.DEFAULT_STYLE = SSAStyle()
@@ -0,0 +1,88 @@
+from __future__ import print_function, unicode_literals
+
+import re
+from .formatbase import FormatBase
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from .substation import parse_tags
+from .time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms
+
+#: Largest timestamp allowed in SubRip, ie. 99:59:59,999.
+MAX_REPRESENTABLE_TIME = make_time(h=100) - 1
+
+def ms_to_timestamp(ms):
+    """Convert ms to 'HH:MM:SS,mmm'"""
+    # XXX throw on overflow/underflow?
+    if ms < 0: ms = 0
+    if ms > MAX_REPRESENTABLE_TIME: ms = MAX_REPRESENTABLE_TIME
+    h, m, s, ms = ms_to_times(ms)
+    return "%02d:%02d:%02d,%03d" % (h, m, s, ms)
+
+
+class SubripFormat(FormatBase):
+    @classmethod
+    def guess_format(cls, text):
+        if "[Script Info]" in text or "[V4+ Styles]" in text:
+            # disambiguation vs. SSA/ASS
+            return None
+
+        for line in text.splitlines():
+            if len(TIMESTAMP.findall(line)) == 2:
+                return "srt"
+
+    @classmethod
+    def from_file(cls, subs, fp, format_, **kwargs):
+        timestamps = [] # (start, end)
+        following_lines = [] # contains lists of lines following each timestamp
+
+        for line in fp:
+            stamps = TIMESTAMP.findall(line)
+            if len(stamps) == 2: # timestamp line
+                start, end = map(timestamp_to_ms, stamps)
+                timestamps.append((start, end))
+                following_lines.append([])
+            else:
+                if timestamps:
+                    following_lines[-1].append(line)
+
+        def prepare_text(lines):
+            s = "".join(lines).strip()
+            s = re.sub(r"\n+ *\d+ *$", "", s) # strip number of next subtitle
+            s = re.sub(r"< *i *>", r"{\i1}", s)
+            s = re.sub(r"< */ *i *>", r"{\i0}", s)
+            s = re.sub(r"< *s *>", r"{\s1}", s)
+            s = re.sub(r"< */ *s *>", r"{\s0}", s)
+            s = re.sub(r"< *u *>", "{\\u1}", s) # not r" for Python 2.7 compat, triggers unicodeescape
+            s = re.sub(r"< */ *u *>", "{\\u0}", s)
+            s = re.sub(r"< */? *[a-zA-Z][^>]*>", "", s) # strip other HTML tags
+            s = re.sub(r"\n", r"\N", s) # convert newlines
+            return s
+
+        subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines))
+                       for (start, end), lines in zip(timestamps, following_lines)]
+
+    @classmethod
+    def to_file(cls, subs, fp, format_, **kwargs):
+        def prepare_text(text, style):
+            body = []
+            for fragment, sty in parse_tags(text, style, subs.styles):
+                fragment = fragment.replace(r"\h", " ")
+                fragment = fragment.replace(r"\n", "\n")
+                fragment = fragment.replace(r"\N", "\n")
+                if sty.italic: fragment = "<i>%s</i>" % fragment
+                if sty.underline: fragment = "<u>%s</u>" % fragment
+                if sty.strikeout: fragment = "<s>%s</s>" % fragment
+                body.append(fragment)
+
+            return re.sub("\n+", "\n", "".join(body).strip())
+
+        visible_lines = (line for line in subs if not line.is_comment)
+
+        for i, line in enumerate(visible_lines, 1):
+            start = ms_to_timestamp(line.start)
+            end = ms_to_timestamp(line.end)
+            text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
+
+            print("%d" % i, file=fp) # Python 2.7 compat
+            print(start, "-->", end, file=fp)
+            print(text, end="\n\n", file=fp)
@@ -0,0 +1,255 @@
+from __future__ import print_function, division, unicode_literals
+import re
+from numbers import Number
+from .formatbase import FormatBase
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from .common import text_type, Color
+from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP
+
+SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7)
+
+def ass_to_ssa_alignment(i):
+    return SSA_ALIGNMENT[i-1]
+
+def ssa_to_ass_alignment(i):
+    return SSA_ALIGNMENT.index(i) + 1
+
+SECTION_HEADING = re.compile(r"^.{,3}\[[^\]]+\]") # allow for UTF-8 BOM, which is 3 bytes
+
+STYLE_FORMAT_LINE = {
+    "ass": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic,"
+           " Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment,"
+           " MarginL, MarginR, MarginV, Encoding",
+    "ssa": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, TertiaryColour, BackColour, Bold, Italic,"
+           " BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, AlphaLevel, Encoding"
+}
+
+STYLE_FIELDS = {
+    "ass": ["fontname", "fontsize", "primarycolor", "secondarycolor", "outlinecolor", "backcolor", "bold", "italic",
+            "underline", "strikeout", "scalex", "scaley", "spacing", "angle", "borderstyle", "outline", "shadow",
+            "alignment", "marginl", "marginr", "marginv", "encoding"],
+    "ssa": ["fontname", "fontsize", "primarycolor", "secondarycolor", "tertiarycolor", "backcolor", "bold", "italic",
+            "borderstyle", "outline", "shadow", "alignment", "marginl", "marginr", "marginv", "alphalevel", "encoding"]
+}
+
+EVENT_FORMAT_LINE = {
+    "ass": "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text",
+    "ssa": "Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"
+}
+
+EVENT_FIELDS = {
+    "ass": ["layer", "start", "end", "style", "name", "marginl", "marginr", "marginv", "effect", "text"],
+    "ssa": ["marked", "start", "end", "style", "name", "marginl", "marginr", "marginv", "effect", "text"]
+}
+
+#: Largest timestamp allowed in SubStation, ie. 9:59:59.99.
+MAX_REPRESENTABLE_TIME = make_time(h=10) - 10
+
+def ms_to_timestamp(ms):
+    """Convert ms to 'H:MM:SS.cc'"""
+    # XXX throw on overflow/underflow?
+    if ms < 0: ms = 0
+    if ms > MAX_REPRESENTABLE_TIME: ms = MAX_REPRESENTABLE_TIME
+    h, m, s, ms = ms_to_times(ms)
+    return "%01d:%02d:%02d.%02d" % (h, m, s, ms//10)
+
+def color_to_ass_rgba(c):
+    return "&H%08X" % ((c.a << 24) | (c.b << 16) | (c.g << 8) | c.r)
+
+def color_to_ssa_rgb(c):
+    return "%d" % ((c.b << 16) | (c.g << 8) | c.r)
+
+def ass_rgba_to_color(s):
+    x = int(s[2:], base=16)
+    r = x & 0xff
+    g = (x >> 8) & 0xff
+    b = (x >> 16) & 0xff
+    a = (x >> 24) & 0xff
+    return Color(r, g, b, a)
+
+def ssa_rgb_to_color(s):
+    x = int(s)
+    r = x & 0xff
+    g = (x >> 8) & 0xff
+    b = (x >> 16) & 0xff
+    return Color(r, g, b)
+
+def is_valid_field_content(s):
+    """
+    Returns True if string s can be stored in a SubStation field.
+
+    Fields are written in CSV-like manner, thus commas and/or newlines
+    are not acceptable in the string.
+
+    """
+    return "\n" not in s and "," not in s
+
+
+def parse_tags(text, style=SSAStyle.DEFAULT_STYLE, styles={}):
+    """
+    Split text into fragments with computed SSAStyles.
+    
+    Returns list of tuples (fragment, style), where fragment is a part of text
+    between two brace-delimited override sequences, and style is the computed
+    styling of the fragment, ie. the original style modified by all override
+    sequences before the fragment.
+    
+    Newline and non-breakable space overrides are left as-is.
+    
+    Supported override tags:
+    
+    - i, b, u, s
+    - r (with or without style name)
+    
+    """
+    
+    fragments = SSAEvent.OVERRIDE_SEQUENCE.split(text)
+    if len(fragments) == 1:
+        return [(text, style)]
+    
+    def apply_overrides(all_overrides):
+        s = style.copy()
+        for tag in re.findall(r"\\[ibus][10]|\\r[a-zA-Z_0-9 ]*", all_overrides):
+            if tag == r"\r":
+                s = style.copy() # reset to original line style
+            elif tag.startswith(r"\r"):
+                name = tag[2:]
+                if name in styles:
+                    s = styles[name].copy() # reset to named style
+            else:
+                if "i" in tag: s.italic = "1" in tag
+                elif "b" in tag: s.bold = "1" in tag
+                elif "u" in tag: s.underline = "1" in tag
+                elif "s" in tag: s.strikeout = "1" in tag
+        return s
+    
+    overrides = SSAEvent.OVERRIDE_SEQUENCE.findall(text)
+    overrides_prefix_sum = ["".join(overrides[:i]) for i in range(len(overrides) + 1)]
+    computed_styles = map(apply_overrides, overrides_prefix_sum)
+    return list(zip(fragments, computed_styles))
+
+
+NOTICE = "Script generated by pysubs2\nhttps://pypi.python.org/pypi/pysubs2"
+
+class SubstationFormat(FormatBase):
+    @classmethod
+    def guess_format(cls, text):
+        if "V4+ Styles" in text:
+            return "ass"
+        elif "V4 Styles" in text:
+            return "ssa"
+
+    @classmethod
+    def from_file(cls, subs, fp, format_, **kwargs):
+
+        def string_to_field(f, v):
+            if f in {"start", "end"}:
+                return timestamp_to_ms(TIMESTAMP.match(v).groups())
+            elif "color" in f:
+                if format_ == "ass":
+                    return ass_rgba_to_color(v)
+                else:
+                    return ssa_rgb_to_color(v)
+            elif f in {"bold", "underline", "italic", "strikeout"}:
+                return v == "-1"
+            elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
+                return int(v)
+            elif f in {"fontsize", "scalex", "scaley", "spacing", "angle", "outline", "shadow"}:
+                return float(v)
+            elif f == "marked":
+                return v.endswith("1")
+            elif f == "alignment":
+                i = int(v)
+                if format_ == "ass":
+                    return i
+                else:
+                    return ssa_to_ass_alignment(i)
+            else:
+                return v
+
+        subs.info.clear()
+        subs.aegisub_project.clear()
+        subs.styles.clear()
+
+        inside_info_section = False
+        inside_aegisub_section = False
+
+        for line in fp:
+            line = line.strip()
+
+            if SECTION_HEADING.match(line):
+                inside_info_section = "Info" in line
+                inside_aegisub_section = "Aegisub" in line
+            elif inside_info_section or inside_aegisub_section:
+                if line.startswith(";"): continue # skip comments
+                try:
+                    k, v = line.split(": ", 1)
+                    if inside_info_section:
+                        subs.info[k] = v
+                    elif inside_aegisub_section:
+                        subs.aegisub_project[k] = v
+                except ValueError:
+                    pass
+            elif line.startswith("Style:"):
+                _, rest = line.split(": ", 1)
+                buf = rest.strip().split(",")
+                name, raw_fields = buf[0], buf[1:] # splat workaround for Python 2.7
+                field_dict = {f: string_to_field(f, v) for f, v in zip(STYLE_FIELDS[format_], raw_fields)}
+                sty = SSAStyle(**field_dict)
+                subs.styles[name] = sty
+            elif line.startswith("Dialogue:") or line.startswith("Comment:"):
+                ev_type, rest = line.split(": ", 1)
+                raw_fields = rest.strip().split(",", len(EVENT_FIELDS[format_])-1)
+                field_dict = {f: string_to_field(f, v) for f, v in zip(EVENT_FIELDS[format_], raw_fields)}
+                field_dict["type"] = ev_type
+                ev = SSAEvent(**field_dict)
+                subs.events.append(ev)
+
+
+    @classmethod
+    def to_file(cls, subs, fp, format_, header_notice=NOTICE, **kwargs):
+        print("[Script Info]", file=fp)
+        for line in header_notice.splitlines(False):
+            print(";", line, file=fp)
+
+        subs.info["ScriptType"] = "v4.00+" if format_ == "ass" else "v4.00"
+        for k, v in subs.info.items():
+            print(k, v, sep=": ", file=fp)
+
+        if subs.aegisub_project:
+            print("\n[Aegisub Project Garbage]", file=fp)
+            for k, v in subs.aegisub_project.items():
+                print(k, v, sep=": ", file=fp)
+
+        def field_to_string(f, v):
+            if f in {"start", "end"}:
+                return ms_to_timestamp(v)
+            elif f == "marked":
+                return "Marked=%d" % v
+            elif f == "alignment" and format_ == "ssa":
+                return text_type(ass_to_ssa_alignment(v))
+            elif isinstance(v, bool):
+                return "-1" if v else "0"
+            elif isinstance(v, (text_type, Number)):
+                return text_type(v)
+            elif isinstance(v, Color):
+                if format_ == "ass":
+                    return color_to_ass_rgba(v)
+                else:
+                    return color_to_ssa_rgb(v)
+            else:
+                raise TypeError("Unexpected type when writing a SubStation field")
+
+        print("\n[V4+ Styles]" if format_ == "ass" else "\n[V4 Styles]", file=fp)
+        print(STYLE_FORMAT_LINE[format_], file=fp)
+        for name, sty in subs.styles.items():
+            fields = [field_to_string(f, getattr(sty, f)) for f in STYLE_FIELDS[format_]]
+            print("Style: %s" % name, *fields, sep=",", file=fp)
+
+        print("\n[Events]", file=fp)
+        print(EVENT_FORMAT_LINE[format_], file=fp)
+        for ev in subs.events:
+            fields = [field_to_string(f, getattr(ev, f)) for f in EVENT_FIELDS[format_]]
+            print(ev.type, end=": ", file=fp)
+            print(*fields, sep=",", file=fp)
@@ -0,0 +1,147 @@
+from __future__ import division
+
+from collections import namedtuple
+import re
+
+
+#: Pattern that matches both SubStation and SubRip timestamps.
+TIMESTAMP = re.compile(r"(\d{1,2}):(\d{2}):(\d{2})[.,](\d{2,3})")
+
+Times = namedtuple("Times", ["h", "m", "s", "ms"])
+
+def make_time(h=0, m=0, s=0, ms=0, frames=None, fps=None):
+    """
+    Convert time to milliseconds.
+
+    See :func:`pysubs2.time.times_to_ms()`. When both frames and fps are specified,
+    :func:`pysubs2.time.frames_to_ms()` is called instead.
+
+    Raises:
+        ValueError: Invalid fps, or one of frames/fps is missing.
+
+    Example:
+        >>> make_time(s=1.5)
+        1500
+        >>> make_time(frames=50, fps=25)
+        2000
+
+    """
+    if frames is None and fps is None:
+        return times_to_ms(h, m, s, ms)
+    elif frames is not None and fps is not None:
+        return frames_to_ms(frames, fps)
+    else:
+        raise ValueError("Both fps and frames must be specified")
+
+def timestamp_to_ms(groups):
+    """
+    Convert groups from :data:`pysubs2.time.TIMESTAMP` match to milliseconds.
+    
+    Example:
+        >>> timestamp_to_ms(TIMESTAMP.match("0:00:00.42").groups())
+        420
+    
+    """
+    h, m, s, frac = map(int, groups)
+    ms = frac * 10**(3 - len(groups[-1]))
+    ms += s * 1000
+    ms += m * 60000
+    ms += h * 3600000
+    return ms
+
+def times_to_ms(h=0, m=0, s=0, ms=0):
+    """
+    Convert hours, minutes, seconds to milliseconds.
+    
+    Arguments may be positive or negative, int or float,
+    need not be normalized (``s=120`` is okay).
+    
+    Returns:
+        Number of milliseconds (rounded to int).
+    
+    """
+    ms += s * 1000
+    ms += m * 60000
+    ms += h * 3600000
+    return int(round(ms))
+
+def frames_to_ms(frames, fps):
+    """
+    Convert frame-based duration to milliseconds.
+    
+    Arguments:
+        frames: Number of frames (should be int).
+        fps: Framerate (must be a positive number, eg. 23.976).
+    
+    Returns:
+        Number of milliseconds (rounded to int).
+        
+    Raises:
+        ValueError: fps was negative or zero.
+    
+    """
+    if fps <= 0:
+        raise ValueError("Framerate must be positive number (%f)." % fps)
+
+    return int(round(frames * (1000 / fps)))
+
+def ms_to_frames(ms, fps):
+    """
+    Convert milliseconds to number of frames.
+    
+    Arguments:
+        ms: Number of milliseconds (may be int, float or other numeric class).
+        fps: Framerate (must be a positive number, eg. 23.976).
+    
+    Returns:
+        Number of frames (int).
+        
+    Raises:
+        ValueError: fps was negative or zero.
+    
+    """
+    if fps <= 0:
+        raise ValueError("Framerate must be positive number (%f)." % fps)
+
+    return int(round((ms / 1000) * fps))
+
+def ms_to_times(ms):
+    """
+    Convert milliseconds to normalized tuple (h, m, s, ms).
+    
+    Arguments:
+        ms: Number of milliseconds (may be int, float or other numeric class).
+            Should be non-negative.
+    
+    Returns:
+        Named tuple (h, m, s, ms) of ints.
+        Invariants: ``ms in range(1000) and s in range(60) and m in range(60)``
+    
+    """
+    ms = int(round(ms))
+    h, ms = divmod(ms, 3600000)
+    m, ms = divmod(ms, 60000)
+    s, ms = divmod(ms, 1000)
+    return Times(h, m, s, ms)
+
+def ms_to_str(ms, fractions=False):
+    """
+    Prettyprint milliseconds to [-]H:MM:SS[.mmm]
+    
+    Handles huge and/or negative times. Non-negative times with ``fractions=True``
+    are matched by :data:`pysubs2.time.TIMESTAMP`.
+    
+    Arguments:
+        ms: Number of milliseconds (int, float or other numeric class).
+        fractions: Whether to print up to millisecond precision.
+    
+    Returns:
+        str
+    
+    """
+    sgn = "-" if ms < 0 else ""
+    h, m, s, ms = ms_to_times(abs(ms))
+    if fractions:
+        return sgn + "{:01d}:{:02d}:{:02d}.{:03d}".format(h, m, s, ms)
+    else:
+        return sgn + "{:01d}:{:02d}:{:02d}".format(h, m, s)
@@ -1,6 +1,6 @@
 """Utilities for writing code that runs on Python 2 and 3"""

-# Copyright (c) 2010-2014 Benjamin Peterson
+# Copyright (c) 2010-2015 Benjamin Peterson
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -20,17 +20,22 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

+from __future__ import absolute_import
+
+import functools
+import itertools
 import operator
 import sys
 import types

 __author__ = "Benjamin Peterson <benjamin@python.org>"
-__version__ = "1.5.2"
+__version__ = "1.10.0"


 # Useful for very coarse version differentiation.
 PY2 = sys.version_info[0] == 2
 PY3 = sys.version_info[0] == 3
+PY34 = sys.version_info[0:2] >= (3, 4)

 if PY3:
    string_types = str,
@@ -53,6 +58,7 @@ else:
    else:
        # It's possible to have sizeof(long) != sizeof(Py_ssize_t).
        class X(object):
+
            def __len__(self):
                return 1 << 31
        try:
@@ -84,9 +90,13 @@ class _LazyDescr(object):

    def __get__(self, obj, tp):
        result = self._resolve()
-        setattr(obj, self.name, result) # Invokes __set__.
-        # This is a bit ugly, but it avoids running this again.
-        delattr(obj.__class__, self.name)
+        setattr(obj, self.name, result)  # Invokes __set__.
+        try:
+            # This is a bit ugly, but it avoids running this again by
+            # removing this descriptor.
+            delattr(obj.__class__, self.name)
+        except AttributeError:
+            pass
        return result


@@ -105,14 +115,6 @@ class MovedModule(_LazyDescr):
        return _import_module(self.mod)

    def __getattr__(self, attr):
-        # Hack around the Django autoreloader. The reloader tries to get
-        # __file__ or __name__ of every module in sys.modules. This doesn't work
-        # well if this MovedModule is for an module that is unavailable on this
-        # machine (like winreg on Unix systems). Thus, we pretend __file__ and
-        # __name__ don't exist if the module hasn't been loaded yet. See issues
-        # #51 and #53.
-        if attr in ("__file__", "__name__") and self.mod not in sys.modules:
-            raise AttributeError
        _module = self._resolve()
        value = getattr(_module, attr)
        setattr(self, attr, value)
@@ -159,9 +161,75 @@ class MovedAttribute(_LazyDescr):
        return getattr(module, self.attr)


+class _SixMetaPathImporter(object):
+
+    """
+    A meta path importer to import six.moves and its submodules.
+
+    This class implements a PEP302 finder and loader. It should be compatible
+    with Python 2.5 and all existing versions of Python3
+    """
+
+    def __init__(self, six_module_name):
+        self.name = six_module_name
+        self.known_modules = {}
+
+    def _add_module(self, mod, *fullnames):
+        for fullname in fullnames:
+            self.known_modules[self.name + "." + fullname] = mod
+
+    def _get_module(self, fullname):
+        return self.known_modules[self.name + "." + fullname]
+
+    def find_module(self, fullname, path=None):
+        if fullname in self.known_modules:
+            return self
+        return None
+
+    def __get_module(self, fullname):
+        try:
+            return self.known_modules[fullname]
+        except KeyError:
+            raise ImportError("This loader does not know module " + fullname)
+
+    def load_module(self, fullname):
+        try:
+            # in case of a reload
+            return sys.modules[fullname]
+        except KeyError:
+            pass
+        mod = self.__get_module(fullname)
+        if isinstance(mod, MovedModule):
+            mod = mod._resolve()
+        else:
+            mod.__loader__ = self
+        sys.modules[fullname] = mod
+        return mod
+
+    def is_package(self, fullname):
+        """
+        Return true, if the named module is a package.
+
+        We need this method to get correct spec objects with
+        Python 3.4 (see PEP451)
+        """
+        return hasattr(self.__get_module(fullname), "__path__")
+
+    def get_code(self, fullname):
+        """Return None
+
+        Required, if is_package is implemented"""
+        self.__get_module(fullname)  # eventually raises ImportError
+        return None
+    get_source = get_code  # same as get_code
+
+_importer = _SixMetaPathImporter(__name__)
+

 class _MovedItems(_LazyModule):
+
    """Lazy loading of moved objects"""
+    __path__ = []  # mark as package


 _moved_attributes = [
@@ -169,26 +237,33 @@ _moved_attributes = [
    MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
    MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"),
    MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
+    MovedAttribute("intern", "__builtin__", "sys"),
    MovedAttribute("map", "itertools", "builtins", "imap", "map"),
+    MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"),
+    MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"),
    MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"),
-    MovedAttribute("reload_module", "__builtin__", "imp", "reload"),
+    MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"),
    MovedAttribute("reduce", "__builtin__", "functools"),
+    MovedAttribute("shlex_quote", "pipes", "shlex", "quote"),
    MovedAttribute("StringIO", "StringIO", "io"),
+    MovedAttribute("UserDict", "UserDict", "collections"),
+    MovedAttribute("UserList", "UserList", "collections"),
    MovedAttribute("UserString", "UserString", "collections"),
    MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
    MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
    MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"),
-
    MovedModule("builtins", "__builtin__"),
    MovedModule("configparser", "ConfigParser"),
    MovedModule("copyreg", "copy_reg"),
    MovedModule("dbm_gnu", "gdbm", "dbm.gnu"),
+    MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"),
    MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
    MovedModule("http_cookies", "Cookie", "http.cookies"),
    MovedModule("html_entities", "htmlentitydefs", "html.entities"),
    MovedModule("html_parser", "HTMLParser", "html.parser"),
    MovedModule("http_client", "httplib", "http.client"),
    MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
+    MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"),
    MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
    MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
    MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
@@ -222,25 +297,34 @@ _moved_attributes = [
    MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"),
    MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
    MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"),
-    MovedModule("winreg", "_winreg"),
+    MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"),
 ]
+# Add windows specific modules.
+if sys.platform == "win32":
+    _moved_attributes += [
+        MovedModule("winreg", "_winreg"),
+    ]
+
 for attr in _moved_attributes:
    setattr(_MovedItems, attr.name, attr)
    if isinstance(attr, MovedModule):
-        sys.modules[__name__ + ".moves." + attr.name] = attr
+        _importer._add_module(attr, "moves." + attr.name)
 del attr

 _MovedItems._moved_attributes = _moved_attributes

-moves = sys.modules[__name__ + ".moves"] = _MovedItems(__name__ + ".moves")
+moves = _MovedItems(__name__ + ".moves")
+_importer._add_module(moves, "moves")


 class Module_six_moves_urllib_parse(_LazyModule):
+
    """Lazy loading of moved objects in six.moves.urllib_parse"""


 _urllib_parse_moved_attributes = [
    MovedAttribute("ParseResult", "urlparse", "urllib.parse"),
+    MovedAttribute("SplitResult", "urlparse", "urllib.parse"),
    MovedAttribute("parse_qs", "urlparse", "urllib.parse"),
    MovedAttribute("parse_qsl", "urlparse", "urllib.parse"),
    MovedAttribute("urldefrag", "urlparse", "urllib.parse"),
@@ -254,6 +338,14 @@ _urllib_parse_moved_attributes = [
    MovedAttribute("unquote", "urllib", "urllib.parse"),
    MovedAttribute("unquote_plus", "urllib", "urllib.parse"),
    MovedAttribute("urlencode", "urllib", "urllib.parse"),
+    MovedAttribute("splitquery", "urllib", "urllib.parse"),
+    MovedAttribute("splittag", "urllib", "urllib.parse"),
+    MovedAttribute("splituser", "urllib", "urllib.parse"),
+    MovedAttribute("uses_fragment", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_netloc", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_params", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_query", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_relative", "urlparse", "urllib.parse"),
 ]
 for attr in _urllib_parse_moved_attributes:
    setattr(Module_six_moves_urllib_parse, attr.name, attr)
@@ -261,10 +353,12 @@ del attr

 Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes

-sys.modules[__name__ + ".moves.urllib_parse"] = sys.modules[__name__ + ".moves.urllib.parse"] = Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse")
+_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"),
+                      "moves.urllib_parse", "moves.urllib.parse")


 class Module_six_moves_urllib_error(_LazyModule):
+
    """Lazy loading of moved objects in six.moves.urllib_error"""


@@ -279,10 +373,12 @@ del attr

 Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes

-sys.modules[__name__ + ".moves.urllib_error"] = sys.modules[__name__ + ".moves.urllib.error"] = Module_six_moves_urllib_error(__name__ + ".moves.urllib.error")
+_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"),
+                      "moves.urllib_error", "moves.urllib.error")


 class Module_six_moves_urllib_request(_LazyModule):
+
    """Lazy loading of moved objects in six.moves.urllib_request"""


@@ -327,10 +423,12 @@ del attr

 Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes

-sys.modules[__name__ + ".moves.urllib_request"] = sys.modules[__name__ + ".moves.urllib.request"] = Module_six_moves_urllib_request(__name__ + ".moves.urllib.request")
+_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"),
+                      "moves.urllib_request", "moves.urllib.request")


 class Module_six_moves_urllib_response(_LazyModule):
+
    """Lazy loading of moved objects in six.moves.urllib_response"""


@@ -346,10 +444,12 @@ del attr

 Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes

-sys.modules[__name__ + ".moves.urllib_response"] = sys.modules[__name__ + ".moves.urllib.response"] = Module_six_moves_urllib_response(__name__ + ".moves.urllib.response")
+_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"),
+                      "moves.urllib_response", "moves.urllib.response")


 class Module_six_moves_urllib_robotparser(_LazyModule):
+
    """Lazy loading of moved objects in six.moves.urllib_robotparser"""


@@ -362,22 +462,25 @@ del attr

 Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes

-sys.modules[__name__ + ".moves.urllib_robotparser"] = sys.modules[__name__ + ".moves.urllib.robotparser"] = Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser")
+_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"),
+                      "moves.urllib_robotparser", "moves.urllib.robotparser")


 class Module_six_moves_urllib(types.ModuleType):
+
    """Create a six.moves.urllib namespace that resembles the Python 3 namespace"""
-    parse = sys.modules[__name__ + ".moves.urllib_parse"]
-    error = sys.modules[__name__ + ".moves.urllib_error"]
-    request = sys.modules[__name__ + ".moves.urllib_request"]
-    response = sys.modules[__name__ + ".moves.urllib_response"]
-    robotparser = sys.modules[__name__ + ".moves.urllib_robotparser"]
+    __path__ = []  # mark as package
+    parse = _importer._get_module("moves.urllib_parse")
+    error = _importer._get_module("moves.urllib_error")
+    request = _importer._get_module("moves.urllib_request")
+    response = _importer._get_module("moves.urllib_response")
+    robotparser = _importer._get_module("moves.urllib_robotparser")

    def __dir__(self):
        return ['parse', 'error', 'request', 'response', 'robotparser']

-
-sys.modules[__name__ + ".moves.urllib"] = Module_six_moves_urllib(__name__ + ".moves.urllib")
+_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"),
+                      "moves.urllib")


 def add_move(move):
@@ -404,11 +507,6 @@ if PY3:
    _func_code = "__code__"
    _func_defaults = "__defaults__"
    _func_globals = "__globals__"
-
-    _iterkeys = "keys"
-    _itervalues = "values"
-    _iteritems = "items"
-    _iterlists = "lists"
 else:
    _meth_func = "im_func"
    _meth_self = "im_self"
@@ -418,11 +516,6 @@ else:
    _func_defaults = "func_defaults"
    _func_globals = "func_globals"

-    _iterkeys = "iterkeys"
-    _itervalues = "itervalues"
-    _iteritems = "iteritems"
-    _iterlists = "iterlists"
-

 try:
    advance_iterator = next
@@ -445,6 +538,9 @@ if PY3:

    create_bound_method = types.MethodType

+    def create_unbound_method(func, cls):
+        return func
+
    Iterator = object
 else:
    def get_unbound_function(unbound):
@@ -453,6 +549,9 @@ else:
    def create_bound_method(func, obj):
        return types.MethodType(func, obj, obj.__class__)

+    def create_unbound_method(func, cls):
+        return types.MethodType(func, None, cls)
+
    class Iterator(object):

        def next(self):
@@ -471,66 +570,117 @@ get_function_defaults = operator.attrgetter(_func_defaults)
 get_function_globals = operator.attrgetter(_func_globals)


-def iterkeys(d, **kw):
-    """Return an iterator over the keys of a dictionary."""
-    return iter(getattr(d, _iterkeys)(**kw))
+if PY3:
+    def iterkeys(d, **kw):
+        return iter(d.keys(**kw))

-def itervalues(d, **kw):
-    """Return an iterator over the values of a dictionary."""
-    return iter(getattr(d, _itervalues)(**kw))
+    def itervalues(d, **kw):
+        return iter(d.values(**kw))

-def iteritems(d, **kw):
-    """Return an iterator over the (key, value) pairs of a dictionary."""
-    return iter(getattr(d, _iteritems)(**kw))
+    def iteritems(d, **kw):
+        return iter(d.items(**kw))

-def iterlists(d, **kw):
-    """Return an iterator over the (key, [values]) pairs of a dictionary."""
-    return iter(getattr(d, _iterlists)(**kw))
+    def iterlists(d, **kw):
+        return iter(d.lists(**kw))
+
+    viewkeys = operator.methodcaller("keys")
+
+    viewvalues = operator.methodcaller("values")
+
+    viewitems = operator.methodcaller("items")
+else:
+    def iterkeys(d, **kw):
+        return d.iterkeys(**kw)
+
+    def itervalues(d, **kw):
+        return d.itervalues(**kw)
+
+    def iteritems(d, **kw):
+        return d.iteritems(**kw)
+
+    def iterlists(d, **kw):
+        return d.iterlists(**kw)
+
+    viewkeys = operator.methodcaller("viewkeys")
+
+    viewvalues = operator.methodcaller("viewvalues")
+
+    viewitems = operator.methodcaller("viewitems")
+
+_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.")
+_add_doc(itervalues, "Return an iterator over the values of a dictionary.")
+_add_doc(iteritems,
+         "Return an iterator over the (key, value) pairs of a dictionary.")
+_add_doc(iterlists,
+         "Return an iterator over the (key, [values]) pairs of a dictionary.")


 if PY3:
    def b(s):
        return s.encode("latin-1")
+
    def u(s):
        return s
    unichr = chr
-    if sys.version_info[1] <= 1:
-        def int2byte(i):
-            return bytes((i,))
-    else:
-        # This is about 2x faster than the implementation above on 3.2+
-        int2byte = operator.methodcaller("to_bytes", 1, "big")
+    import struct
+    int2byte = struct.Struct(">B").pack
+    del struct
    byte2int = operator.itemgetter(0)
    indexbytes = operator.getitem
    iterbytes = iter
    import io
    StringIO = io.StringIO
    BytesIO = io.BytesIO
+    _assertCountEqual = "assertCountEqual"
+    if sys.version_info[1] <= 1:
+        _assertRaisesRegex = "assertRaisesRegexp"
+        _assertRegex = "assertRegexpMatches"
+    else:
+        _assertRaisesRegex = "assertRaisesRegex"
+        _assertRegex = "assertRegex"
 else:
    def b(s):
        return s
    # Workaround for standalone backslash
+
    def u(s):
        return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape")
    unichr = unichr
    int2byte = chr
+
    def byte2int(bs):
        return ord(bs[0])
+
    def indexbytes(buf, i):
        return ord(buf[i])
-    def iterbytes(buf):
-        return (ord(byte) for byte in buf)
+    iterbytes = functools.partial(itertools.imap, ord)
    import StringIO
    StringIO = BytesIO = StringIO.StringIO
+    _assertCountEqual = "assertItemsEqual"
+    _assertRaisesRegex = "assertRaisesRegexp"
+    _assertRegex = "assertRegexpMatches"
 _add_doc(b, """Byte literal""")
 _add_doc(u, """Text literal""")


+def assertCountEqual(self, *args, **kwargs):
+    return getattr(self, _assertCountEqual)(*args, **kwargs)
+
+
+def assertRaisesRegex(self, *args, **kwargs):
+    return getattr(self, _assertRaisesRegex)(*args, **kwargs)
+
+
+def assertRegex(self, *args, **kwargs):
+    return getattr(self, _assertRegex)(*args, **kwargs)
+
+
 if PY3:
    exec_ = getattr(moves.builtins, "exec")

-
    def reraise(tp, value, tb=None):
+        if value is None:
+            value = tp()
        if value.__traceback__ is not tb:
            raise value.with_traceback(tb)
        raise value
@@ -548,12 +698,26 @@ else:
            _locs_ = _globs_
        exec("""exec _code_ in _globs_, _locs_""")

-
    exec_("""def reraise(tp, value, tb=None):
    raise tp, value, tb
 """)


+if sys.version_info[:2] == (3, 2):
+    exec_("""def raise_from(value, from_value):
+    if from_value is None:
+        raise value
+    raise value from from_value
+""")
+elif sys.version_info[:2] > (3, 2):
+    exec_("""def raise_from(value, from_value):
+    raise value from from_value
+""")
+else:
+    def raise_from(value, from_value):
+        raise value
+
+
 print_ = getattr(moves.builtins, "print", None)
 if print_ is None:
    def print_(*args, **kwargs):
@@ -561,13 +725,14 @@ if print_ is None:
        fp = kwargs.pop("file", sys.stdout)
        if fp is None:
            return
+
        def write(data):
            if not isinstance(data, basestring):
                data = str(data)
            # If the file has an encoding, encode unicode with it.
            if (isinstance(fp, file) and
-                isinstance(data, unicode) and
-                fp.encoding is not None):
+                    isinstance(data, unicode) and
+                    fp.encoding is not None):
                errors = getattr(fp, "errors", None)
                if errors is None:
                    errors = "strict"
@@ -608,25 +773,96 @@ if print_ is None:
                write(sep)
            write(arg)
        write(end)
+if sys.version_info[:2] < (3, 3):
+    _print = print_
+
+    def print_(*args, **kwargs):
+        fp = kwargs.get("file", sys.stdout)
+        flush = kwargs.pop("flush", False)
+        _print(*args, **kwargs)
+        if flush and fp is not None:
+            fp.flush()

 _add_doc(reraise, """Reraise an exception.""")

+if sys.version_info[0:2] < (3, 4):
+    def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS,
+              updated=functools.WRAPPER_UPDATES):
+        def wrapper(f):
+            f = functools.wraps(wrapped, assigned, updated)(f)
+            f.__wrapped__ = wrapped
+            return f
+        return wrapper
+else:
+    wraps = functools.wraps
+

 def with_metaclass(meta, *bases):
    """Create a base class with a metaclass."""
-    return meta("NewBase", bases, {})
+    # This requires a bit of explanation: the basic idea is to make a dummy
+    # metaclass for one level of class instantiation that replaces itself with
+    # the actual metaclass.
+    class metaclass(meta):
+
+        def __new__(cls, name, this_bases, d):
+            return meta(name, bases, d)
+    return type.__new__(metaclass, 'temporary_class', (), {})
+

 def add_metaclass(metaclass):
    """Class decorator for creating a class with a metaclass."""
    def wrapper(cls):
        orig_vars = cls.__dict__.copy()
-        orig_vars.pop('__dict__', None)
-        orig_vars.pop('__weakref__', None)
        slots = orig_vars.get('__slots__')
        if slots is not None:
            if isinstance(slots, str):
                slots = [slots]
            for slots_var in slots:
                orig_vars.pop(slots_var)
+        orig_vars.pop('__dict__', None)
+        orig_vars.pop('__weakref__', None)
        return metaclass(cls.__name__, cls.__bases__, orig_vars)
    return wrapper
+
+
+def python_2_unicode_compatible(klass):
+    """
+    A decorator that defines __unicode__ and __str__ methods under Python 2.
+    Under Python 3 it does nothing.
+
+    To support Python 2 and 3 with a single code base, define a __str__ method
+    returning text and apply this decorator to the class.
+    """
+    if PY2:
+        if '__str__' not in klass.__dict__:
+            raise ValueError("@python_2_unicode_compatible cannot be applied "
+                             "to %s because it doesn't define __str__()." %
+                             klass.__name__)
+        klass.__unicode__ = klass.__str__
+        klass.__str__ = lambda self: self.__unicode__().encode('utf-8')
+    return klass
+
+
+# Complete the moves implementation.
+# This code is at the end of this module to speed up module loading.
+# Turn this module into a package.
+__path__ = []  # required for PEP 302 and PEP 451
+__package__ = __name__  # see PEP 366 @ReservedAssignment
+if globals().get("__spec__") is not None:
+    __spec__.submodule_search_locations = []  # PEP 451 @UndefinedVariable
+# Remove other six meta path importers, since they cause problems. This can
+# happen if six is removed from sys.modules and then reloaded. (Setuptools does
+# this for some reason.)
+if sys.meta_path:
+    for i, importer in enumerate(sys.meta_path):
+        # Here's some real nastiness: Another "instance" of the six module might
+        # be floating around. Therefore, we can't use isinstance() to check for
+        # the six meta path importer, since the other six instance will have
+        # inserted an importer with different class.
+        if (type(importer).__name__ == "_SixMetaPathImporter" and
+                importer.name == __name__):
+            del sys.meta_path[i]
+            break
+    del i, importer
+# Finally, add the importer to the meta path import hook.
+sys.meta_path.append(_importer)
@@ -17,6 +17,7 @@ logger = logging.getLogger(__name__)

 series_year_re = re.compile('^(?P<series>[ \w]+)(?: \((?P<year>\d{4})\))?$')

+
 class Addic7edSubtitle(Subtitle):
    provider_name = 'addic7ed'

@@ -7,6 +7,7 @@ import chardet
 from guessit.matchtree import MatchTree
 from guessit.plugins.transformers import get_transformer
 import pysrt
+import pysubs2

 from .video import Episode, Movie

@@ -69,11 +70,22 @@ class Subtitle(object):
        if not self.text:
            return False

+        # valid srt
        try:
            pysrt.from_string(self.text, error_handling=pysrt.ERROR_RAISE)
        except pysrt.Error as e:
            if e.args[0] < 80:
                return False
+        else:
+            return True
+
+        # something else, try to return srt
+        try:
+            subs = pysubs2.SSAFile.from_string(self.text)
+            self.content = subs.to_string("srt")
+        except:
+            logger.exception("Couldn't convert subtitle %s to .srt format", self)
+            return False

        return True

@@ -2,6 +2,11 @@

 import subliminal
 import babelfish
+
+# patch subliminal's subtitle encoding detection
+from .patch_subtitle import PatchedSubtitle
+subliminal.subtitle.Subtitle = PatchedSubtitle
+
 from .patch_provider_pool import PatchedProviderPool
 from .patch_video import patched_search_external_subtitles, scan_video
 from .patch_providers import addic7ed, podnapisi, tvsubtitles, opensubtitles
@@ -42,3 +47,5 @@ subliminal.video.search_external_subtitles = patched_search_external_subtitles
 subliminal.video.scan_video = scan_video

 subliminal.video.Episode.scores["boost"] = 40
+
+subliminal.video.Episode.scores["title"] = 0
@@ -1,13 +1,14 @@
 # coding=utf-8
 import os
 import logging
+from bs4 import UnicodeDammit
 from subliminal.api import get_subtitle_path, io
 from subzero.lib.io import getViableEncoding

 logger = logging.getLogger(__name__)


-def save_subtitles(video, subtitles, single=False, directory=None, encoding=None):
+def save_subtitles(video, subtitles, single=False, directory=None, encoding=None, encode_with=None):
    """Save subtitles on filesystem.

    Subtitles are saved in the order of the list. If a subtitle with a language has already been saved, other subtitles
@@ -45,20 +46,35 @@ def save_subtitles(video, subtitles, single=False, directory=None, encoding=None
        if directory is not None:
            subtitle_path = os.path.join(directory, os.path.split(subtitle_path)[1])

-        subtitle_path = subtitle_path.encode(getViableEncoding())
+        # force unicode
+        subtitle_path = UnicodeDammit(subtitle_path).unicode_markup

        # save content as is or in the specified encoding
        logger.info('Saving %r to %r', subtitle, subtitle_path)
-        if encoding is None:
+        has_encoder = callable(encode_with)
+
+        if has_encoder:
+            logger.info('Using encoder %s' % encode_with.__name__)
+
+        # save normalized subtitle if encoder or no encoding is given
+        if has_encoder or encoding is None:
+            content = encode_with(subtitle.text) if has_encoder else subtitle.content
            with io.open(subtitle_path, 'wb') as f:
-                f.write(subtitle.content)
-        else:
+                f.write(content)
+
+            if single:
+                break
+            continue
+
+        # save subtitle if encoding given
+        if encoding is not None:
            with io.open(subtitle_path, 'w', encoding=encoding) as f:
                f.write(subtitle.text)
+
        saved_subtitles.append(subtitle)

        # check single
        if single:
            break

-    return saved_subtitles
+    return saved_subtitles
@@ -8,6 +8,7 @@ import operator
 import time
 from babelfish.exceptions import LanguageReverseError
 from pkg_resources import EntryPoint, iter_entry_points
+from subliminal import ProviderError
 from subliminal.api import ProviderPool
 from subliminal_patch.patch_subtitle import compute_score

@@ -213,6 +214,9 @@ class PatchedProviderPool(ProviderPool):
                self[subtitle.provider_name].download_subtitle(subtitle)
            except (requests.Timeout, socket.timeout):
                logger.error('Provider %r timed out', subtitle.provider_name)
+            except ProviderError:
+                logger.error('Unexpected error in provider %r, Traceback: %s', subtitle.provider_name, traceback.format_exc())
+                break
            except:
                logger.exception('Unexpected error in provider %r, Traceback: %s', subtitle.provider_name, traceback.format_exc())
            else:
@@ -258,7 +262,12 @@ class PatchedProviderPool(ProviderPool):
        unsorted_subtitles = []
        for s in subtitles:
            logger.debug("Starting score computation for %s", s)
-            matches = s.get_matches(video, hearing_impaired=use_hearing_impaired)
+            try:
+                matches = s.get_matches(video, hearing_impaired=use_hearing_impaired)
+            except AttributeError:
+                logger.error("Match computation failed for %s: %s", s, traceback.format_exc())
+                continue
+
            unsorted_subtitles.append((s, compute_score(matches, video, scores=scores), matches))
        scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1), reverse=True)

@@ -281,7 +290,7 @@ class PatchedProviderPool(ProviderPool):
                continue

            # bail out if hearing_impaired was wrong
-            if not "hearing_impaired" in matches and hearing_impaired in ("force HI", "force non-HI"):
+            if "hearing_impaired" not in matches and hearing_impaired in ("force HI", "force non-HI"):
                logger.debug('Skipping subtitle: %r with score %d because hearing-impaired set to %s', subtitle, score, hearing_impaired)
                continue

@@ -10,6 +10,7 @@ from .mixins import PunctuationMixin
 logger = logging.getLogger(__name__)

 series_year_re = re.compile('^(?P<series>.+?)(?: \((?P<year>\d{4})\))?$')
+remove_brackets_re = re.compile("^(.+?)( \([^\d]+\))$")

 USE_BOOST = False

@@ -106,6 +107,15 @@ class PatchedAddic7edProvider(PunctuationMixin, Addic7edProvider):
            logger.debug('Getting show id')
            show_id = show_ids.get(series_clean)

+            if not show_id:
+                # show not found, try to match it without modifiers (mostly country codes such as US/UK)
+                match = remove_brackets_re.match(series_clean)
+                if match:
+                    matched = match.group(1)
+                    show_id = show_ids.get(matched)
+                    if show_id:
+                        logger.debug("show '%s' matched to '%s': %s", series, matched, show_id)
+
        # search as last resort
        if not show_id:
            logger.warning('Series not found in show ids, attempting search')
@@ -13,7 +13,7 @@ class PunctuationMixin(object):
    """

    def clean_punctuation(self, s):
-        return s.replace(".", "").replace(":", "").replace("'", "")
+        return s.replace(".", "").replace(":", "").replace("'", "").replace("&", "").replace("-", "")

    def clean_whitespace(self, s):
        return clean_whitespace_re.sub("", s)
@@ -1,20 +1,58 @@
 # coding=utf-8

 import logging
+import os

+from babelfish import Language
 from subliminal.exceptions import ConfigurationError
-from subliminal.providers.opensubtitles import OpenSubtitlesProvider, checked, get_version, __version__
+from subliminal.providers.opensubtitles import OpenSubtitlesProvider, checked, get_version, __version__, OpenSubtitlesSubtitle, Episode

 logger = logging.getLogger(__name__)


+class PatchedOpenSubtitlesSubtitle(OpenSubtitlesSubtitle):
+    def __init__(self, language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name,
+                 movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, fps):
+        super(PatchedOpenSubtitlesSubtitle, self).__init__(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash,
+                                                           movie_name,
+                                                           movie_release_name, movie_year, movie_imdb_id, series_season, series_episode)
+        self.query_parameters = query_parameters or {}
+        self.fps = fps
+
+    def get_matches(self, video, hearing_impaired=False):
+        matches = super(PatchedOpenSubtitlesSubtitle, self).get_matches(video, hearing_impaired=hearing_impaired)
+
+        sub_fps = None
+        try:
+            sub_fps = float(self.fps)
+        except ValueError:
+            pass
+
+        # video has fps info, sub also, and sub's fps is greater than 0
+        if video.fps and sub_fps and (video.fps != self.fps):
+            logger.debug("Wrong FPS (expected: %s, got: %s, lowering score massively)", video.fps, self.fps)
+            # fixme: may be too harsh
+            return set()
+
+        # matched by tag?
+        if self.matched_by == "tag":
+            # treat a tag match equally to a hash match
+            logger.debug("Subtitle matched by tag, treating it as a hash-match. Tag: '%s'", self.query_parameters.get("tag", None))
+            matches.add("hash")
+        return matches
+
+
 class PatchedOpenSubtitlesProvider(OpenSubtitlesProvider):
-    def __init__(self, username=None, password=None):
+    def __init__(self, username=None, password=None, use_tag_search=False):
        if username is not None and password is None or username is None and password is not None:
            raise ConfigurationError('Username and password must be specified')

        self.username = username or ''
        self.password = password or ''
+        self.use_tag_search = use_tag_search
+
+        if use_tag_search:
+            logger.info("Using tag/exact filename search")

        super(PatchedOpenSubtitlesProvider, self).__init__()

@@ -23,3 +61,81 @@ class PatchedOpenSubtitlesProvider(OpenSubtitlesProvider):
        response = checked(self.server.LogIn(self.username, self.password, 'eng', 'subliminal v%s' % get_version(__version__)))
        self.token = response['token']
        logger.debug('Logged in with token %r', self.token)
+
+    def list_subtitles(self, video, languages):
+        """
+        :param video:
+        :param languages:
+        :return:
+
+         patch: query movies even if hash is known; add tag parameter
+        """
+        season = episode = None
+        if isinstance(video, Episode):
+            query = video.series
+            season = video.season
+            episode = video.episode
+        # elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
+        #    query = video.name.split(os.sep)[-1]
+        else:
+            query = video.title
+
+        return self.query(languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id,
+                          query=query, season=season, episode=episode, tag=os.path.basename(video.name), use_tag_search=self.use_tag_search)
+
+    def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False):
+        # fill the search criteria
+        criteria = []
+        if hash and size:
+            criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
+        if use_tag_search and tag:
+            criteria.append({'tag': tag})
+        if imdb_id:
+            criteria.append({'imdbid': imdb_id})
+        if query and season and episode:
+            criteria.append({'query': query, 'season': season, 'episode': episode})
+        elif query:
+            criteria.append({'query': query})
+        if not criteria:
+            raise ValueError('Not enough information')
+
+        # add the language
+        for criterion in criteria:
+            criterion['sublanguageid'] = ','.join(sorted(l.opensubtitles for l in languages))
+
+        # query the server
+        logger.info('Searching subtitles %r', criteria)
+        response = checked(self.server.SearchSubtitles(self.token, criteria))
+        subtitles = []
+
+        # exit if no data
+        if not response['data']:
+            logger.info('No subtitles found')
+            return subtitles
+
+        # loop over subtitle items
+        for subtitle_item in response['data']:
+            # read the item
+            language = Language.fromopensubtitles(subtitle_item['SubLanguageID'])
+            hearing_impaired = bool(int(subtitle_item['SubHearingImpaired']))
+            page_link = subtitle_item['SubtitlesLink']
+            subtitle_id = int(subtitle_item['IDSubtitleFile'])
+            matched_by = subtitle_item['MatchedBy']
+            movie_kind = subtitle_item['MovieKind']
+            hash = subtitle_item['MovieHash']
+            movie_name = subtitle_item['MovieName']
+            movie_release_name = subtitle_item['MovieReleaseName']
+            movie_year = int(subtitle_item['MovieYear']) if subtitle_item['MovieYear'] else None
+            movie_imdb_id = int(subtitle_item['IDMovieImdb'])
+            movie_fps = subtitle_item.get('MovieFPS')
+            series_season = int(subtitle_item['SeriesSeason']) if subtitle_item['SeriesSeason'] else None
+            series_episode = int(subtitle_item['SeriesEpisode']) if subtitle_item['SeriesEpisode'] else None
+            query_parameters = subtitle_item.get("QueryParameters")
+
+            subtitle = PatchedOpenSubtitlesSubtitle(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind,
+                                                    hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
+                                                    series_season, series_episode, query_parameters, fps=movie_fps)
+            logger.debug('Found subtitle %r', subtitle)
+            subtitles.append(subtitle)
+
+        return subtitles
@@ -1,13 +1,18 @@
 # coding=utf-8

+import re
 import logging
 from subliminal.providers import ParserBeautifulSoup
 from subliminal.cache import SHOW_EXPIRATION_TIME, region
-from subliminal.providers.tvsubtitles import TVsubtitlesProvider, link_re
+from subliminal.providers.tvsubtitles import TVsubtitlesProvider
 from .mixins import PunctuationMixin

 logger = logging.getLogger(__name__)

+# clean_punctuation actually removes the dash in YYYY-YYYY year range
+# fixme: clean_punctuation is stupid
+link_re = re.compile('^(?P<series>.+)(?: \(?\d{4}\)?| \((?:US|UK)\))? \((?P<first_year>\d{4})\d{4}\)$')
+

 class PatchedTVsubtitlesProvider(PunctuationMixin, TVsubtitlesProvider):
    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
@@ -1,7 +1,11 @@
 # coding=utf-8

 import logging
+
+import chardet
+from bs4 import UnicodeDammit
 from subliminal.video import Episode, Movie
+from subliminal import Subtitle

 logger = logging.getLogger(__name__)

@@ -29,8 +33,8 @@ def compute_score(matches, video, scores=None):

    is_episode = isinstance(video, Episode)

-    episode_hash_valid_if = {"series", "season", "episode", "format"}
-    movie_hash_valid_if = {"title", "format", "video_codec"}
+    episode_hash_valid_if = {"series", "season", "episode"}
+    movie_hash_valid_if = {"title", "video_codec"}

    # remove equivalent match combinations
    if 'hash' in final_matches:
@@ -58,3 +62,68 @@ def compute_score(matches, video, scores=None):
    logger.info('Computed score %d', score)

    return score
+
+
+class PatchedSubtitle(Subtitle):
+    def guess_encoding(self):
+        """Guess encoding using the language, falling back on chardet.
+
+        :return: the guessed encoding.
+        :rtype: str
+
+        """
+        logger.info('Guessing encoding for language %s', self.language)
+
+        # always try utf-8 first
+        encodings = ['utf-8']
+
+        # add language-specific encodings
+        if self.language.alpha3 == 'zho':
+            encodings.extend(['gb18030', 'big5'])
+        elif self.language.alpha3 == 'jpn':
+            encodings.append('shift-jis')
+        elif self.language.alpha3 == 'ara':
+            encodings.append('windows-1256')
+        elif self.language.alpha3 == 'heb':
+            encodings.append('windows-1255')
+        elif self.language.alpha3 == 'tur':
+            encodings.extend(['iso-8859-9', 'windows-1254'])
+        elif self.language.alpha3 == 'pol':
+            # Eastern European Group 1
+            encodings.extend(['windows-1250'])
+        elif self.language.alpha3 == 'bul':
+            # Eastern European Group 2
+            encodings.extend(['windows-1251'])
+        else:
+            # Western European (windows-1252)
+            encodings.append('latin-1')
+
+        # try to decode
+        logger.debug('Trying encodings %r', encodings)
+        for encoding in encodings:
+            try:
+                self.content.decode(encoding)
+            except UnicodeDecodeError:
+                pass
+            else:
+                logger.info('Guessed encoding %s', encoding)
+                return encoding
+
+        logger.warning('Could not guess encoding from language')
+
+        # fallback on chardet
+        encoding = chardet.detect(self.content)['encoding']
+        logger.info('Chardet found encoding %s', encoding)
+
+        if not encoding:
+            # fallback on bs4
+            logger.info('Falling back to bs4 detection')
+            a = UnicodeDammit(self.content)
+
+            Log.Debug("bs4 detected encoding: %s" % a.original_encoding)
+
+            if a.original_encoding:
+                return a.original_encoding
+            raise ValueError(u"Couldn't guess the proper encoding for %s" % self)
+
+        return encoding
@@ -67,7 +67,7 @@ def patched_search_external_subtitles(path):
    return subtitles


-def scan_video(path, subtitles=True, embedded_subtitles=True, hints=None, dont_use_actual_file=False):
+def scan_video(path, subtitles=True, embedded_subtitles=True, hints=None, video_fps=None, dont_use_actual_file=False):
    """Scan a video and its subtitle languages from a video `path`.
    :param dont_use_actual_file: guess on filename, but don't use the actual file itself
    :param str path: existing path to the video.
@@ -79,6 +79,8 @@ def scan_video(path, subtitles=True, embedded_subtitles=True, hints=None, dont_u

    # patch: suggest video type to guessit beforehand
    """
+    hints = hints or {}
+
    # check for non-existing path
    if not dont_use_actual_file and not os.path.exists(path):
        raise ValueError('Path does not exist')
@@ -88,13 +90,16 @@ def scan_video(path, subtitles=True, embedded_subtitles=True, hints=None, dont_u
        raise ValueError('%s is not a valid video extension' % os.path.splitext(path)[1])

    dirpath, filename = os.path.split(path)
+
+    # hint guessit the filename itself and its 2 parent directories if we're an episode (most likely Series name/Season/filename), else only one
+    guess_from = os.path.join(*os.path.normpath(path).split(os.path.sep)[-3 if hints.get("type") == "episode" else -2:])
    hints = hints or {}
-    logger.info('Scanning video (hints: %s) %r in %r', hints, filename, dirpath)
-    guess_from = os.path.join(os.path.split(dirpath)[-1], filename)
+    logger.info('Scanning video (hints: %s) %r', hints, guess_from)

    # guess
    try:
        video = Video.fromguess(path, guess_file_info(guess_from, options=hints))
+        video.fps = video_fps

        if dont_use_actual_file:
            return video
@@ -114,6 +119,7 @@ def scan_video(path, subtitles=True, embedded_subtitles=True, hints=None, dont_u
            video.subtitle_languages |= set(patched_search_external_subtitles(path).values())
    except Exception:
        logger.error("Something went wrong when running guessit: %s", traceback.format_exc())
+        return

    # video metadata with enzyme
    try:
@@ -167,6 +173,9 @@ def scan_video(path, subtitles=True, embedded_subtitles=True, hints=None, dont_u
                if embedded_subtitles:
                    embedded_subtitle_languages = set()
                    for st in mkv.subtitle_tracks:
+                        if st.forced:
+                            logger.debug("Ignoring forced subtitle track %r", st)
+                            continue
                        if st.language:
                            try:
                                embedded_subtitle_languages.add(Language.fromalpha3b(st.language))
@@ -11,4 +11,4 @@ PREFIX = "/video/%s" % PLUGIN_IDENTIFIER_SHORT

 TITLE = "%s Subtitles" % PLUGIN_NAME
 ART      = 'art-default.jpg'
-ICON     = 'icon-default.png'
+ICON     = 'icon-default.jpg'
@@ -1,6 +1,9 @@
 # coding=utf-8

 import datetime
+import threading
+
+lock = threading.Lock()


 class TempIntent(dict):
@@ -9,7 +12,8 @@ class TempIntent(dict):

    def __init__(self, timeout=1000):
        self.timeout = timeout
-        self.store = {}
+        with lock:
+            self.store = {}

    def __getattr__(self, name):
        if name in self:
@@ -23,47 +27,51 @@ class TempIntent(dict):
            del self[name]

    def get(self, kind, key):
-        if kind in self["store"]:
-            now = datetime.datetime.now()
-            hit = False
-            for known_key in self["store"][kind].keys():
-                # may need locking, for now just play it safe
-                ends = self["store"][kind].get(known_key, None)
-                if not ends:
-                    continue
-
-                timed_out = False
-                if now > ends:
-                    timed_out = True
-
-                if known_key == key and not timed_out:
-                    hit = True
-
-                if timed_out:
-                    try:
-                        del self["store"][kind][key]
-                    except:
+        with lock:
+            if kind in self["store"]:
+                now = datetime.datetime.now()
+                hit = False
+                for known_key in self["store"][kind].keys():
+                    # may need locking, for now just play it safe
+                    ends = self["store"][kind].get(known_key, None)
+                    if not ends:
                        continue

-            if hit:
-                return True
-        return False
+                    timed_out = False
+                    if now > ends:
+                        timed_out = True
+
+                    if known_key == key and not timed_out:
+                        hit = True
+
+                    if timed_out:
+                        try:
+                            del self["store"][kind][key]
+                        except:
+                            continue
+
+                if hit:
+                    return True
+            return False

    def resolve(self, kind, key):
-        if kind in self["store"] and key in self["store"][kind]:
-            del self["store"][kind][key]
-            return True
-        return False
+        with lock:
+            if kind in self["store"] and key in self["store"][kind]:
+                del self["store"][kind][key]
+                return True
+            return False

    def set(self, kind, key, timeout=None):
-        if kind not in self["store"]:
-            self["store"][kind] = {}
-        self["store"][kind][key] = datetime.datetime.now() + datetime.timedelta(milliseconds=timeout or self.timeout)
+        with lock:
+            if kind not in self["store"]:
+                self["store"][kind] = {}
+            self["store"][kind][key] = datetime.datetime.now() + datetime.timedelta(milliseconds=timeout or self.timeout)

    def has(self, kind, key):
-        if kind not in self["store"]:
-            return False
-        return key in self["store"][kind]
+        with lock:
+            if kind not in self["store"]:
+                return False
+            return key in self["store"][kind]


 intent = TempIntent()
@@ -1,4 +1,4 @@
-Sub-Zero for Plex, 1.3.20.396
+Sub-Zero for Plex, 1.3.33.522
 =================

 ![logo](https://raw.githubusercontent.com/pannal/Sub-Zero.bundle/master/Contents/Resources/subzero.gif)
@@ -10,13 +10,16 @@ Plex forum thread: https://forums.plex.tv/discussion/186575

 If you like this, buy me a beer: [![Donate](https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=G9VKR2B8PMNKG)

-### Installation
+### Automatic Installation
+* Sub-Zero now is listed in the official Plex Channel Directory. You can install it from there.
+
+### Manual/Development/Testing Installation
 * go to ```Library/Application Support/Plex Media Server/Plug-ins/```
-* ```rm -r Sub-Zero.bundle```
+* ```rm -r Sub-Zero.bundle``` (remove the folder)
 * get the release you want from *https://github.com/pannal/Sub-Zero.bundle/releases/*
 * unzip the release
+* edit `Contents/Info.plist` and set `<key>PlexPluginDevMode</key>`'s value to `<string>1</string>` to avoid automatic updates with the stable release to your manual installation
 * restart your plex media server!!!
-* more indepth: see [article](https://support.plex.tv/hc/en-us/articles/201187656-How-do-I-manually-install-a-channel-) on Plex website. 

 ### Usage
 Use the following agent order:
@@ -25,33 +28,40 @@ Use the following agent order:
 2. Local Media Assets
 3. anything else

+##### Recommended steps
+Create an account and provide your credentials (in the plugin configuration) for:
+
+* [Addic7ed](http://www.addic7ed.com/newaccount.php)
+* [Opensubtitles](http://www.opensubtitles.org/en/newuser)
+* [Plex](https://plex.tv/users/sign_up)
+
 ### Attention on the initial refresh
-When you first use this plugin, and do a refresh on all of your media, you are most likely
-to be shut out by some or all of the subtitle providers depending on your libraries' size.
+When you first use this plugin and run a refresh on all of your media, you may be
+blacklisted out of excessive usage by some or all of the subtitle providers depending on your library's size.
 This will result in a bunch of errors in the log files as well as missing subtitles.

-Just be patient, after a day most of those providers will allow you access again and you can
+Just be patient, after a day most of those providers will allow you to access them again and you can
 refresh the remaining items. If you use the default settings, this will also skip the items
-it has already downloaded all the wanted languages for.
+it has already downloaded all the wanted languages for. Also, as subtitles will be missing, the scheduler should pick up
+the items with missing subtitles automatically.

 ### Encountered a bug?
 * be sure to post your logs: 
-  * set your log_level to DEBUG in the settings
+  * set your log_level to DEBUG in Sub-Zero's settings
  * get ```Library/Application Support/Plex Media Server/Logs/PMS Plugin Logs/com.plexapp.agents.subzero.log```; there may be multiple logs (com.plexapp.agents.subzero.log.*) depending on the amount of Videos you're refreshing
-* **Remember: before you open a bug-ticket please double-check, that you've deleted the Sub-Zero.bundle folder BEFORE every update** (to avoid .pyc leftovers)
+* **Remember: If you're using the manual installation, before you open a bug-ticket please double-check, that you've deleted the Sub-Zero.bundle folder BEFORE every update** (to avoid .pyc leftovers)

 ## Changelog

-1.3.20.396
+1.3.33.522

- core: fix logging handlers (when saving log_level settings loggers got duplicated)
- core: better movie matching by only hinting the filename and the last subdirectory to guessit (instead of the full path)
- core: don't fail on wrong detection/scanning of media file
- lower minimum tv series score from 85 to 67 (removed title; composed of: series=44 + season=11 + episode=11 + hearing_impaired=1)
+- core: fix library permission detection on windows; fixes #151
+- core: "Restrict to one language" now behaves like it should (one found subtitle of any language is treated as sufficient); fixes #149
+- core: add support for other subtitle formats such as ssa/ass/microdvd, convert to srt; fixes #138
+- core: hopefully more consistent force-refresh handling (intent); fixes #118

 [older changes](CHANGELOG.md)

-
 Description
 ------------

@@ -69,13 +79,19 @@ Configuration
 -------------
 Several options are provided in the preferences of this agent. 

+* Enable Sub-Zero channel (disabling doesn't affect the subtitle features)?: Show or hide the Sub-Zero channel from your PMS
+* How many download tries per subtitle (on timeout or error): How often should we retry a failed subtitle download? (default: on)
 * Addic7ed username/password: Provide your addic7ed username here, otherwise the provider won't work. Please make sure your account is activated, before using the agent.
 * Plex.tv username/password: Generally recommended to be provided; needed if you use Plex Home to make the API work (the whole channel menu depends on it)
+* Opensubtitles username/password: Generally recommended to be provided (not necessarily needed, but avoids errors)
 * Subtitle language (1)/(2)/(3): Your preferred languages to download subtitles for. 
 * Additional Subtitle Languages: Additional languages to download; comma-separated; use [ISO-639-1 codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes))
+* Restrict to one language (skips adding ".lang." to the subtitle filename; only uses "Subtitle Language (1)"): default: off
+* Normalize subtitle encoding to UTF-8: default: on
 * Provider: Enable ...: Enable/disable this provider. Affects both movies and series. 
-* Addic7ed: boost over hash score if requirements met: if an Addic7ed subtitle matches the video's series, season, episode, year, and format (e.g. WEB-DL), boost its score, possibly over OpenSubtitles/TheSubDB direct hash match
-* Scan: Include embedded subtitles: When enabled, subliminal finds embedded subtitles that are already present within the media file. 
+* Addic7ed: (TV only) boost over hash score if requirements met: if an Addic7ed subtitle matches the video's series, season, episode, year, boost its score, possibly over OpenSubtitles/TheSubDB direct hash match. Recommended for higher quality subtitle results. 
+* I keep the exact (release-) filename of my media files: If you don't rename your media files automatically or manually and keep the original release's file names, enabling this option may help finding suitable subtitles for your media. Otherwise: disable this. 
+* Scan: Include embedded subtitles: When enabled, subliminal finds embedded subtitles (ignoring forced) that are already present within the media file. 
 * Scan: Include external subtitles: When enabled, subliminal finds subtitles located near the media file on the filesystem.
 * Minimum score for download: When configured, what is the minimum score for subtitles to download them? Lower scored subtitles are not downloaded.
 * Download hearing impaired subtitles: 
@@ -87,10 +103,12 @@ Several options are provided in the preferences of this agent.
 * Subtitle folder: (default: current media file's folder) See Store as metadata or on filesystem
 * Custom Subtitle folder: See Store as metadata or on filesystem 
 * Treat IETF language tags as ISO 639-1: Treats subtitle files with IETF language identifiers, such as pt-BR, as their ISO 639-1 counterpart. Thus "pt-BR" will be shown as "Portuguese" instead of "Unknown"
+* Ignore folders (...): If a folder contains one of the files named `subzero.ignore`, `.subzero.ignore`, `.nosz`, don't process them. This applies to sections/libraries, movies, series, seasons, episodes 
 * Scheduler: 
  * Periodically search for recent items with missing subtitles: self-explanatory, executes the task "Search for missing subtitles" from the channel menu regularly. Configure how often it should do that. For the average library 6 hours minimum is recommended, to not hammer the providers too heavily
  * Item age to be considered recent: The "Search for missing subtitles"-task only considers those items in the recently-added list, that are at most this old
  * Recent items to consider per library: How many items to consider for every section/library you have - used in "Search for missing subtitles"-task and "Items with missing subtitles"-menu. Change at your own risk!
+* Check for correct folder permissions of every library on plugin start: if enabled, SZ checks for necessary permissions of your library folders and warns about them in the plugin channel
 * How verbose should the logging be?: Controls how much info we write into the log files (default: only warnings)
 * Log to console (for development/debugging): You know when you need it

@@ -134,6 +152,17 @@ The setting 'Subtitle folder' configures in which folder (current folder or othe

 **When a subfolder (either custom or predefined) is used, the automatic scheduled refresh of Plex won't pick up your subtitles, only a manual refresh will!**

+
+BETA: Physically Ignoring Media
+-------------------------
+Sometimes subtitles aren't needed or wanted for parts of your library.
+
+When creating a file named `subzero.ignore`, `.subzero.ignore`, or `.nosz` in any of your library's folders, be it
+the section itself, a TV show, a movie, or even a season, Sub-Zero will skip processing the contents of that folder.
+ 
+BETA notes: This may still mean that the scheduler task for missing subtitles triggers refresh actions on those items,
+but the refresh handler itself will skip those.
+
 License
 -------
 The Unlicense
Author	SHA1	Message	Date
panni	8a1b615fe9	release 1.3.33.522	2016-05-05 04:46:49 +02:00
panni	3f3bb2d830	Merge branch '#151_permission_check_windows' into 1.3-bugfixes	2016-05-05 04:18:40 +02:00
panni	ae4871f6dd	Merge branch '#149_treat_one_as_found' into 1.3-bugfixes	2016-05-05 04:18:34 +02:00
panni	f46da7b12f	Merge branch '#138_support_other_formats' into 1.3-bugfixes	2016-05-05 04:18:27 +02:00
panni	b3f5bdd58d	use locking on intents; fixes #118	2016-05-05 04:09:48 +02:00
panni	ca8ecd297b	try to handle other subtitle formats and return .srt; fixes #138	2016-05-05 03:41:27 +02:00
panni	d954d25a73	fix #149 ; if we've got a subtitle for a file and we only want one (without language suffix), treat any subtitle as a found one	2016-05-05 03:01:22 +02:00
panni	bda261b495	check for correct library permissions on windows; fixes #151	2016-05-05 02:37:46 +02:00
panni	af3142546e	update readme	2016-04-23 00:09:10 +02:00
panni	05b9a400fd	update readme/changelog/info to 1.3.31.513	2016-04-22 23:36:21 +02:00
panni	5a0f6969d9	finally call dict.save() at the end, always	2016-04-22 23:13:20 +02:00
panni	7ea0f3f73b	update six to 1.10.0	2016-04-22 22:48:17 +02:00
panni	a383682147	Merge remote-tracking branch 'origin/check_permissions' into intermediate_release	2016-04-22 22:41:09 +02:00
panni	7dd414bc8f	resolve #143 check permissions on plugin start	2016-04-21 14:02:26 +02:00
pannal	32fca9dadb	Update README.md	2016-04-19 22:51:01 +02:00
pannal	dd75eacebf	update installation instructions	2016-03-02 10:04:31 +01:00
pannal	ca42e7e7f1	Merge pull request #135 from plexinc-agents/master Remove default 'Username' value from 'Addic7ed Username'; update logo	2016-03-01 10:21:19 +01:00
sander1	3430702d51	Update logo (make it 512x512px and jpeg).	2016-03-01 00:43:16 +01:00
sander1	653a9087c4	Remove default 'Userame' value from 'Addic7ed Username'.	2016-03-01 00:42:39 +01:00
panni	05889e7554	move ignore list to the bottom	2016-02-27 03:51:42 +01:00
panni	7fca0cd201	add top menu item for refreshing the current state	2016-02-27 03:47:49 +01:00
panni	7321c9095e	fix #101 patch earlier	2016-02-23 18:36:38 +01:00
panni	bbb83d9cad	fix #101 better encoding detection with bs4 fallback	2016-02-23 18:32:29 +01:00
panni	275023c844	fix #128 actually use subliminal's subtitle.text if applicable (cherry picked from commit 101da21)	2016-02-23 18:01:01 +01:00
panni	35c6aee5dd	fix #128 add utf-8 enforcing	2016-02-21 05:59:29 +01:00
panni	25686e981f	updated beautifulsoup to 4.4.1	2016-02-21 04:53:07 +01:00
panni	ad8022666f	re-add chardet license	2016-02-21 04:40:38 +01:00
panni	68f246cda5	update chardet to 2.3.0	2016-02-21 04:39:34 +01:00
panni	cc977fce35	fix #126 ; re-add single language setting	2016-02-21 04:23:21 +01:00
panni	d4f7e2712e	update configuration docs again	2016-01-31 04:35:19 +01:00
panni	9a89b01741	update configuration docs	2016-01-31 04:33:22 +01:00
panni	009938bc06	bump to 1.3.27.491	2016-01-31 04:28:21 +01:00
panni	487e933c25	catch guessit/transfo AttributeError in download_best_subtitles; fixes #120	2016-01-31 04:01:38 +01:00
panni	539f621c0b	again menu unicode fixes	2016-01-31 03:57:20 +01:00
panni	bfe9860c92	TVSubtitles: remove greediness off link_re series match - correctly detect "Series Name (country)"; fixes #121	2016-01-31 03:52:13 +01:00
panni	5b5645e042	more menu unicode fixes	2016-01-31 03:49:04 +01:00
panni	b9053d1dfd	import intent early	2016-01-31 03:33:38 +01:00
panni	a7084ecd88	fix item refresh menu unicode errors	2016-01-31 03:27:44 +01:00
panni	b3b301332c	make tag/exact filename search optional; fixes #123	2016-01-31 03:15:07 +01:00
panni	eb43778718	Merge branch 'title_match' into develop	2016-01-31 02:56:04 +01:00
panni	d6ed4e6b0b	OpenSubtitles: handle "0.000" subtitle fps	2016-01-31 02:39:39 +01:00
panni	e38d696ac9	score episode title as zero	2016-01-30 05:57:05 +01:00
panni	07c3a48657	treat unspecified fps as no given fps #119	2016-01-24 07:34:03 +01:00
panni	ebede7a297	add markerlib; fix #115	2016-01-24 07:27:41 +01:00
panni	59ab5e16cc	revert: treat 23.976 fps like 23.98 #119	2016-01-24 07:11:04 +01:00
panni	889399fc04	treat 23.976 fps like 23.98 #119	2016-01-24 07:04:54 +01:00
panni	69eda1420b	broken debug log	2016-01-24 06:51:31 +01:00
panni	063920a2a5	detect and match FPS; maybe fix #119	2016-01-24 06:41:18 +01:00
panni	1623ee858f	move enable_channel function to menu_helpers; fix #111	2016-01-10 04:11:16 +01:00
panni	1edd13b229	rename channel enable setting #111	2016-01-10 04:10:24 +01:00
panni	d0b6fbb7b4	wrap @route and @handler and add global channel disabling #111	2016-01-10 03:39:26 +01:00
panni	4fc21a29e3	rename SubZeroAgent.agent_type_short to agent_type_verbose	2016-01-04 03:03:41 +01:00
panni	3e6d03eea1	core: simplify tv/movie agent detection	2016-01-04 02:57:24 +01:00
panni	b950485f6c	bump 2016	2016-01-03 03:32:25 +01:00
panni	3007c0d57f	messed up the versioning. 1.3.23.459 release	2016-01-03 03:16:30 +01:00
panni	5a2b30432c	Merge branch 'master' into develop	2016-01-03 03:13:07 +01:00
panni	cfb66db035	1.3.20.459 release	2016-01-03 03:11:23 +01:00
panni	1eec18b76d	Merge branch 'master' into develop	2016-01-03 02:59:06 +01:00
panni	1d2bfe2195	1.3.20.422 release	2016-01-03 02:58:12 +01:00
panni	f4a13b2e7a	Merge branch 'master' into 1.3-stable	2016-01-03 02:55:43 +01:00
panni	b29667b9f6	1.3.20.422 release	2016-01-03 02:55:01 +01:00
pannal	dcd21aab1c	Merge pull request #108 from pannal/opensubtitles-smarty Opensubtitles: Implement tag matching	2016-01-02 22:23:47 +01:00
panni	bfbfcd2d8b	OpenSubtitles: QueryParameters seems optional	2016-01-01 19:44:34 +01:00
panni	bb72181359	OpenSubtitles: move tag above imdb_id	2016-01-01 05:46:21 +01:00
panni	2d0b9ab9f1	OpenSubtitles: fix QueryParameters usage	2016-01-01 05:27:28 +01:00
panni	291f462955	OpenSubtitles: os.path.basename on video.name	2015-12-31 06:06:51 +01:00
panni	74d6de9c78	prefs: rename label for physical ignore	2015-12-31 04:57:45 +01:00
panni	9f99390145	readme: add documentation for physical ignore	2015-12-31 04:57:25 +01:00
panni	8cdf12bafd	readme: clarify scan: include embedded subtitles	2015-12-31 04:34:36 +01:00
panni	2b5442a2a8	readme: add plex signup link; minor corrections	2015-12-31 04:31:02 +01:00
panni	ebb9f42771	readme: more detailed recommended-steps docs	2015-12-31 04:26:08 +01:00
panni	c75e2b778f	readme: add registration links to OS and addic7ed #105	2015-12-31 04:23:16 +01:00
panni	1f6d198bf5	add opensubtitles configuration details; add recommended section to usage	2015-12-31 04:19:50 +01:00
panni	30bbfc37fc	don't treat embedded forced subtitles as found embedded subtitles; fixes #106	2015-12-31 04:07:44 +01:00
panni	5a693ae673	pep8	2015-12-31 03:50:07 +01:00
panni	38325f84ac	OpenSubitles: list_subtitles: provide tag parameter to query	2015-12-31 03:48:14 +01:00
panni	0eebd164ec	OpenSubitles: store QueryParameters for debug logging	2015-12-31 03:33:33 +01:00
panni	f60b730411	OpenSubitles: treat a tag match like a hash match	2015-12-31 03:16:28 +01:00
panni	16db1db748	remove "format" from the hash validation for now	2015-12-31 03:16:01 +01:00
panni	818cf4bc33	don't fail on empty hint (most likely command line debugging)	2015-12-31 03:15:37 +01:00
panni	789b7ba9aa	Merge remote-tracking branch 'origin/master' into opensubtitles-smarty	2015-12-31 02:40:58 +01:00
pannal	fdf389f62c	Merge pull request #107 from infernix/master Define sub_dir_* only if use_filesystem is true	2015-12-29 17:00:28 +01:00
Gerben Meijer	8ae8433463	Define sub_dir_* only if use_filesystem is true	2015-12-29 14:09:23 +01:00
pannal	71464cd5bf	Merge pull request #104 from pannal/deeper_guessit_hinting fix video referenced before assignment; hint guessit two parent folde…	2015-12-28 16:44:05 +01:00
panni	44ca3b9e34	fix video referenced before assignment; hint guessit two parent folders of an episode and one of a movie	2015-12-14 19:53:08 +01:00
pannal	2dd24f02c6	MediaTree has no len	2015-12-06 15:04:18 +01:00
panni	a6e6bc810a	error if media not given	2015-12-06 06:28:13 +01:00
panni	9d00a82343	move IGNORE_FN	2015-12-06 06:24:59 +01:00
panni	1246c53c77	Merge remote-tracking branch 'origin/master'	2015-12-06 06:22:45 +01:00
panni	8fc10c873e	move flattenToParts	2015-12-06 06:22:12 +01:00
panni	b48aac638f	Merge remote-tracking branch 'origin/master' into 1.3-stable	2015-12-06 06:20:28 +01:00
panni	e427565fcf	add filesystem ignore mode; fixes #87	2015-12-06 06:18:35 +01:00
panni	0e028b3ffe	flatten the agents even more	2015-12-06 05:33:28 +01:00
panni	c81e3a7def	generify scanTvMedia and scanMovieMedia to scanParts	2015-12-06 05:23:55 +01:00
pannal	669c9b4fb7	Update README.md	2015-12-05 15:17:36 +01:00
panni	5f015c3d69	1.3.20.422	2015-12-05 04:50:57 +01:00
panni	faa46a7e4d	update settings descriptions	2015-12-05 04:32:50 +01:00
panni	70d2a225f3	do not retry on generic providererror	2015-12-05 04:22:02 +01:00
panni	1521a77281	catch ProviderError; fixes #60	2015-12-05 04:20:50 +01:00
panni	516551714b	tvsubtitles: re-re-fix dashes in series name matching; stupid; fixes #93	2015-12-05 04:13:18 +01:00
panni	e794122b7f	addic7ed: match show ids with language modifier to non-modifier (US/UK...); fixes #90	2015-12-05 03:50:32 +01:00
panni	67282d1ebd	reuse use_filesystem instead of accessing prefs again	2015-12-05 03:18:00 +01:00
panni	2c5975cf26	Merge remote-tracking branch 'origin/master'	2015-12-05 03:17:18 +01:00
panni	dc142281f5	really skip filesystem if only metadata is wanted; fixes #94	2015-12-05 03:16:42 +01:00
pannal	5a445fc5bd	Merge pull request #96 from Erliz/master Add hama in to supported agents	2015-12-04 01:32:24 +01:00
pannal	7ff2f97ac3	Merge pull request #92 from pannal/unicode_test fix unicode problems	2015-12-01 01:21:32 +01:00
pannal	d47492188e	unicodize title parameter in SectionMenu	2015-11-30 19:46:25 +01:00
panni	263d3e7546	use http by default, not https, for local API queries	2015-11-29 04:05:18 +01:00
panni	ce31bf63e9	newline	2015-11-29 03:57:25 +01:00
panni	3c030dd6c3	use UnicodeDammit for path	2015-11-29 03:07:09 +01:00
panni	147c3dfe9d	Merge remote-tracking branch 'origin/master'	2015-11-28 01:36:29 +01:00
panni	c2e820f851	encoding test	2015-11-28 01:35:17 +01:00
pannal	f53f5f1870	CFBundleShortVersionString 1.3.20	2015-11-27 15:00:44 +01:00
panni	c20ecaa616	Merge remote-tracking branch 'origin/1.3-stable' into 1.3-stable	2015-11-27 02:03:24 +01:00
panni	2cc270708a	1.3.20.403	2015-11-27 02:01:49 +01:00
panni	ddf7d4fc96	add debug logging for found metadata subtitles	2015-11-27 01:58:25 +01:00
panni	1e73b530ed	leftover import	2015-11-27 01:43:29 +01:00
panni	5c4a1275fb	Merge branch 'master' into opensubtitles-smarty Conflicts: Contents/Libraries/Shared/subliminal_patch/patch_providers/opensubtitles.py	2015-11-27 00:08:13 +01:00
panni	d55a809493	don't use unneeded subtitle metadata proxy info	2015-11-26 22:30:02 +01:00
Stanislav Vetlovskiy	50ecf71879	Add hama in to supported agents	2015-11-26 22:29:00 +03:00
panni	af7434e35d	opensubtitles, movies: use query even if hash, size or imdb_id are known	2015-11-26 19:50:47 +01:00
panni	ad7239c5d8	set default score to 85 again	2015-11-26 10:34:03 +01:00
panni	f90efceac3	catch logging error on unexpected metadata storage	2015-11-26 10:31:46 +01:00
panni	c6f70dccca	punctuation fixes for & and dash	2015-11-23 15:36:22 +01:00
pannal	eca358e73a	Merge pull request #85 from pannal/master new stable	2015-11-22 03:48:43 +01:00
panni	014f34d813	add tag to possible opensubtitles query	2015-11-20 00:50:36 +01:00