Update README.md

fixed typo
Merge branch 'deep_scan_subs'
2015-09-23 14:40:42 +02:00 · 2015-09-23 14:31:55 +02:00 · 2015-09-23 14:29:21 +02:00 · 2015-09-23 14:26:21 +02:00 · 2015-09-23 13:36:18 +02:00 · 2015-09-23 12:46:07 +02:00
36 changed files with 1559 additions and 382 deletions
@@ -19,6 +19,8 @@ def Start():
    # configured cache to be in memory as per https://github.com/Diaoul/subliminal/issues/303
    subliminal.region.configure('dogpile.cache.memory')

+    
+
 def ValidatePrefs():
    Log.Debug("Validate Prefs called.")
    return 
@@ -33,6 +35,18 @@ def getLangList():
        
    return langList

+def getSubtitleDestinationFolder():
+    if not Prefs["subtitles.save.filesystem"]:
+	return
+
+    fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if bool(Prefs["subtitles.save.subFolder.Custom"]) else None
+    return fld_custom or (Prefs["subtitles.save.subFolder"] if Prefs["subtitles.save.subFolder"] != "current folder" else None)
+
+def initSubliminalPatches():
+    # configure custom subtitle destination folders for scanning pre-existing subs
+    dest_folder = getSubtitleDestinationFolder()
+    subliminal_patch.patch_video.CUSTOM_PATHS = [dest_folder] if dest_folder else []
+
 def getProviders():
    providers = {'opensubtitles' : Prefs['provider.opensubtitles.enabled'],
                 'thesubdb' : Prefs['provider.thesubdb.enabled'],
@@ -48,6 +62,7 @@ def getProviderSettings():
 				      'use_random_agents': Prefs['provider.addic7ed.use_random_agents'],
                                      },
                         }
+
    return provider_settings

 def scanTvMedia(media):
@@ -93,11 +108,15 @@ def saveSubtitles(videos, subtitles):
        Log.Debug("Saving subtitles as metadata")
        saveSubtitlesToMetadata(videos, subtitles)

+
+
 def saveSubtitlesToFile(subtitles):
    fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if bool(Prefs["subtitles.save.subFolder.Custom"]) else None
-    if Prefs["subtitles.save.subFolder"] != "current folder" or fld_custom:
-        # specific subFolder requested, create it if it doesn't exist
-        for video, video_subtitles in subtitles.items():
+    
+    for video, video_subtitles in subtitles.items():
+	fld = None
+	if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
+    	    # specific subFolder requested, create it if it doesn't exist
            fld_base = os.path.split(video.name)[0]
            if fld_custom:
                if fld_custom.startswith("/"):
@@ -109,10 +128,7 @@ def saveSubtitlesToFile(subtitles):
                fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
            if not os.path.exists(fld):
                os.makedirs(fld)
-            subliminal.api.save_subtitles(video, video_subtitles, directory=fld)
-    
-    else:
-        subliminal.api.save_subtitles(subtitles)
+        subliminal.api.save_subtitles(video, video_subtitles, directory=fld)

 def saveSubtitlesToMetadata(videos, subtitles):
    for video, video_subtitles in subtitles.items():
@@ -132,6 +148,7 @@ class SubliminalSubtitlesAgentMovies(Agent.Movies):

    def update(self, metadata, media, lang):
        Log.Debug("MOVIE UPDATE CALLED")
+	initSubliminalPatches()
        videos = scanMovieMedia(media)
        subtitles = downloadBestSubtitles(videos.keys())
        saveSubtitles(videos, subtitles)
@@ -149,6 +166,7 @@ class SubliminalSubtitlesAgentTvShows(Agent.TV_Shows):

    def update(self, metadata, media, lang):
        Log.Debug("TvUpdate. Lang %s" % lang)
+	initSubliminalPatches()
        videos = scanTvMedia(media)
        subtitles = downloadBestSubtitles(videos.keys())
        saveSubtitles(videos, subtitles)
@@ -110,7 +110,7 @@
        },
 		{
                "id": "subtitles.save.subFolder.Custom",
-                "label": "Custom Subtitle folder (computes to real paths; use for example \"bla\" as a subfolder of the current media file folder - can use real paths aswell)",
+                "label": "Custom Subtitle folder (overrides \"Subtitle Folder\"; computes to real paths; use for example \"bla\" as a subfolder of the current media file folder or an absolute path)",
                "type": "text",
                "default": ""
        },
@@ -89,10 +89,14 @@ from guessit.guess import Guess, smart_merge
 from guessit.language import Language
 from guessit.matcher import IterativeMatcher
 from guessit.textutils import clean_default, is_camel, from_camel
+from copy import deepcopy
 import babelfish
 import os.path
 import logging
-from copy import deepcopy
+from guessit.options import get_opts
+import shlex
+# Needed for guessit.plugins.transformers.reload() to be called.
+from guessit.plugins import transformers

 log = logging.getLogger(__name__)

@@ -117,7 +121,7 @@ def _build_filename_mtree(filename, options=None, **kwargs):
    mtree = IterativeMatcher(filename, options=options, **kwargs)
    second_pass_options = mtree.second_pass_options
    if second_pass_options:
-        log.debug("Running 2nd pass")
+        log.debug('Running 2nd pass with options: %s' % second_pass_options)
        merged_options = dict(options)
        merged_options.update(second_pass_options)
        mtree = IterativeMatcher(filename, options=merged_options, **kwargs)
@@ -271,8 +275,16 @@ def guess_file_info(filename, info=None, options=None, **kwargs):
    """
    info = info or 'filename'
    options = options or {}
+
+    if isinstance(options, base_text_type):
+        args = shlex.split(options)
+        options = vars(get_opts().parse_args(args))
    if default_options:
-        merged_options = deepcopy(default_options)
+        if isinstance(default_options, base_text_type):
+            default_args = shlex.split(default_options)
+            merged_options = vars(get_opts().parse_args(default_args))
+        else:
+            merged_options = deepcopy(default_options)
        merged_options.update(options)
        options = merged_options

@@ -181,16 +181,16 @@ def submit_bug(filename, options):
        opts = dict((k, v) for k, v in options.__dict__.items()
                    if v and k != 'submit_bug')

-        r = requests.post('http://localhost:5000/bugs', {'filename': filename,
+        r = requests.post('http://guessit.io/bugs', {'filename': filename,
                                                         'version': __version__,
                                                         'options': str(opts)})
        if r.status_code == 200:
            print('Successfully submitted file: %s' % r.text)
        else:
-            print('Could not submit bug at the moment, please try again later.')
+            print('Could not submit bug at the moment, please try again later: %s %s' % (r.status_code, r.reason))

    except RequestException as e:
-        print('Could not submit bug at the moment, please try again later.')
+        print('Could not submit bug at the moment, please try again later: %s' % e)


 def main(args=None, setup_logging=True):
@@ -17,4 +17,4 @@
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-__version__ = '0.10.4.dev0'
+__version__ = '0.11.0'
@@ -135,8 +135,14 @@ class SameKeyValidator(object):
        self.validator_function = validator_function

    def validate(self, prop, string, node, match, entry_start, entry_end):
+        path_nodes = [path_node for path_node in node.ancestors if path_node.category == 'path']
+        if path_nodes:
+            path_node = path_nodes[0]
+        else:
+            path_node = node.root
+
        for key in prop.keys:
-            for same_value_leaf in node.root.leaves_containing(key):
+            for same_value_leaf in path_node.leaves_containing(key):
                ret = self.validator_function(same_value_leaf, key, prop, string, node, match, entry_start, entry_end)
                if ret is not None:
                    return ret
@@ -144,6 +150,9 @@ class SameKeyValidator(object):


 class OnlyOneValidator(SameKeyValidator):
+    """
+    Check that there's only one occurence of key for current directory
+    """
    def __init__(self):
        super(OnlyOneValidator, self).__init__(lambda same_value_leaf, key, prop, string, node, match, entry_start, entry_end: False)

@@ -153,12 +162,16 @@ class DefaultValidator(object):
    def validate(self, prop, string, node, match, entry_start, entry_end):
        span = _get_span(prop, match)
        span = _trim_span(span, string[span[0]:span[1]])
+        return DefaultValidator.validate_string(string, span, entry_start, entry_end)
+
+    @staticmethod
+    def validate_string(string, span, entry_start=None, entry_end=None):
        start, end = span

        sep_start = start <= 0 or string[start - 1] in sep
        sep_end = end >= len(string) or string[end] in sep
-        start_by_other = start in entry_end
-        end_by_other = end in entry_start
+        start_by_other = start in entry_end if entry_end else False
+        end_by_other = end in entry_start if entry_start else False
        if (sep_start or start_by_other) and (sep_end or end_by_other):
            return True
        return False
@@ -235,6 +248,13 @@ class NeighborValidator(DefaultValidator):

        return False

+class FullMatchValidator(DefaultValidator):
+    """Make sure the node match fully"""
+    def validate(self, prop, string, node, match, entry_start, entry_end):
+        at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end)
+
+        return at_start and at_end
+

 class LeavesValidator(DefaultValidator):
    def __init__(self, lambdas=None, previous_lambdas=None, next_lambdas=None, both_side=False, default_=True):
@@ -290,7 +310,7 @@ class LeavesValidator(DefaultValidator):

 class _Property:
    """Represents a property configuration."""
-    def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None, disabler=None, confidence_lambda=None):
+    def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None, disabler=None, confidence_lambda=None, remove_duplicates=False):
        """
        :param keys: Keys of the property (format, screenSize, ...)
        :type keys: string
@@ -309,6 +329,8 @@ class _Property:
        :type validator: :class:`DefaultValidator`
        :param formatter: Formater to use
        :type formatter: function
+        :param remove_duplicates: Keep only the last match if multiple values are found
+        :type remove_duplicates: bool
        """
        if isinstance(keys, list):
            self.keys = keys
@@ -335,6 +357,7 @@ class _Property:
        self.validator = validator
        self.formatter = formatter
        self.disabler = disabler
+        self.remove_duplicates = remove_duplicates

    def disabled(self, options):
        if self.disabler:
@@ -479,7 +502,8 @@ class PropertiesContainer(object):
                        entries.append((prop, match))
                else:
                    matches = list(prop.compiled.finditer(string))
-                    duplicate_matches[prop] = matches
+                    if prop.remove_duplicates:
+                        duplicate_matches[prop] = matches
                    for match in matches:
                        entries.append((prop, match))

@@ -490,6 +514,9 @@ class PropertiesContainer(object):
                if computed_confidence is not None:
                    prop.confidence = computed_confidence

+        entries.sort(key=lambda entry: -entry[0].confidence)
+        # sort entries, from most confident to less confident
+
        if validate:
            # compute entries start and ends
            for prop, match in entries:
@@ -531,7 +558,7 @@ class PropertiesContainer(object):
                        del entry_end[end]

        for prop, prop_duplicate_matches in duplicate_matches.items():
-            # Keeping the last valid match.
+            # Keeping the last valid match only.
            # Needed for the.100.109.hdtv-lol.mp4
            for duplicate_match in prop_duplicate_matches[:-1]:
                entries.remove((prop, duplicate_match))
@@ -561,8 +588,8 @@ class PropertiesContainer(object):
                        for prop, match in key_entries:
                            start, end = _get_span(prop, match)
                            if not best_prop or \
-                            best_prop.confidence < best_prop.confidence or \
-                            best_prop.confidence == best_prop.confidence and \
+                            best_prop.confidence < prop.confidence or \
+                            best_prop.confidence == prop.confidence and \
                            best_match.span()[1] - best_match.span()[0] < match.span()[1] - match.span()[0]:
                                best_prop, best_match = prop, match

@@ -287,10 +287,10 @@ def choose_int(g1, g2):
    if v1 == v2:
        return v1, 1 - (1 - c1) * (1 - c2)
    else:
-        if c1 > c2:
-            return v1, c1 - c2
+        if c1 >= c2:
+            return v1, c1 - c2 / 2
        else:
-            return v2, c2 - c1
+            return v2, c2 - c1 / 2


 def choose_string(g1, g2):
@@ -308,7 +308,7 @@ def choose_string(g1, g2):
    prepended to it.

    >>> s(choose_string(('Hello', 0.75), ('World', 0.5)))
-    ('Hello', 0.25)
+    ('Hello', 0.5)

    >>> s(choose_string(('Hello', 0.5), ('hello', 0.5)))
    ('Hello', 0.75)
@@ -354,10 +354,10 @@ def choose_string(g1, g2):

    # in case of conflict, return the one with highest confidence
    else:
-        if c1 > c2:
-            return v1, c1 - c2
+        if c1 >= c2:
+            return v1, c1 - c2 / 2
        else:
-            return v2, c2 - c1
+            return v2, c2 - c1 / 2


 def _merge_similar_guesses_nocheck(guesses, prop, choose):
@@ -474,8 +474,8 @@ def merge_all(guesses, append=None):

    # delete very unlikely values
    for p in list(result.keys()):
-        if result.confidence(p) < 0.05:
-            del result[p]
+       if result.confidence(p) < 0.05:
+           del result[p]

    # make sure our appendable properties contain unique values
    for prop in append:
@@ -509,7 +509,7 @@ def smart_merge(guesses):
    for string_part in ('title', 'series', 'container', 'format',
                        'releaseGroup', 'website', 'audioCodec',
                        'videoCodec', 'screenSize', 'episodeFormat',
-                        'audioChannels', 'idNumber'):
+                        'audioChannels', 'idNumber', 'container'):
        merge_similar_guesses(guesses, string_part, choose_string)

    # 2- merge the rest, potentially discarding information not properly
@@ -173,8 +173,9 @@ LNG_COMMON_WORDS = frozenset([
    'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
    'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
    'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
-    'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb', 'bt',
-    'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice', 'ay', 'at',
+    'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb',
+    'bt', 'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice',
+    'ay', 'at', 'star', 'so',
    # french words
    'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
    'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me',
@@ -185,7 +186,7 @@ LNG_COMMON_WORDS = frozenset([
    'la', 'el', 'del', 'por', 'mar', 'al',
    # other
    'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
-    'vi', 'ben', 'da', 'lt', 'ch',
+    'vi', 'ben', 'da', 'lt', 'ch', 'sr', 'ps', 'cx',
    # new from babelfish
    'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
    'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
@@ -197,7 +198,7 @@ LNG_COMMON_WORDS = frozenset([
    'bs',  # Bosnian
    'kz',
    # countries
-    'gt', 'lt',
+    'gt', 'lt', 'im',
    # part/pt
    'pt'
    ])
@@ -206,9 +207,11 @@ LNG_COMMON_WORDS_STRICT = frozenset(['brazil'])


 subtitle_prefixes = ['sub', 'subs', 'st', 'vost', 'subforced', 'fansub', 'hardsub']
-subtitle_suffixes = ['subforced', 'fansub', 'hardsub']
+subtitle_suffixes = ['subforced', 'fansub', 'hardsub', 'sub', 'subs']
 lang_prefixes = ['true']

+all_lang_prefixes_suffixes = subtitle_prefixes + subtitle_suffixes + lang_prefixes
+

 def find_possible_languages(string, allowed_languages=None):
    """Find possible languages in the string
@@ -239,7 +242,7 @@ def find_possible_languages(string, allowed_languages=None):
        for prefix in lang_prefixes:
            if lang_word.startswith(prefix):
                lang_word = lang_word[len(prefix):]
-        if lang_word not in common_words:
+        if lang_word not in common_words and word.lower() not in common_words:
            try:
                lang = Language.fromguessit(lang_word)
                if allowed_languages:
@@ -215,94 +215,100 @@ def log_found_guess(guess, logger=None):
                              (k, v, guess.raw(k), guess.confidence(k)))


-def _get_split_spans(node, span):
-    partition_spans = node.get_partition_spans(span)
-    for to_remove_span in partition_spans:
-        if to_remove_span[0] == span[0] and to_remove_span[1] in [span[1], span[1] + 1]:
-            partition_spans.remove(to_remove_span)
-            break
-    return partition_spans
-
-
 class GuessFinder(object):
    def __init__(self, guess_func, confidence=None, logger=None, options=None):
        self.guess_func = guess_func
        self.confidence = confidence
        self.logger = logger or log
-        self.options = options
+        self.options = options or {}

    def process_nodes(self, nodes):
        for node in nodes:
            self.process_node(node)

-    def process_node(self, node, iterative=True, partial_span=None):
+    def process_node(self, node, iterative=True, partial_span=None, skip_nodes=True):
+        if skip_nodes and not isinstance(skip_nodes, list):
+            skip_nodes = self.options.get('skip_nodes')
+        elif not isinstance(skip_nodes, list):
+            skip_nodes = []
+
        if partial_span:
            value = node.value[partial_span[0]:partial_span[1]]
        else:
            value = node.value
        string = ' %s ' % value  # add sentinels

-        if not self.options:
-            matcher_result = self.guess_func(string, node)
+        matcher_result = self.guess_func(string, node, self.options)
+        if not matcher_result:
+            return
+
+        if not isinstance(matcher_result, Guess):
+            result, span = matcher_result
        else:
-            matcher_result = self.guess_func(string, node, self.options)
+            result, span = matcher_result, matcher_result.metadata().span
+            #log.error('span2 %s' % (span,))

-        if matcher_result:
-            if not isinstance(matcher_result, Guess):
-                result, span = matcher_result
-            else:
-                result, span = matcher_result, matcher_result.metadata().span
+        if not result:
+            return

-            if result:
-                # readjust span to compensate for sentinels
-                span = (span[0] - 1, span[1] - 1)
+        if span[1] == len(string):
+            # somehow, the sentinel got included in the span. Remove it
+            span = (span[0], span[1] - 1)

-                # readjust span to compensate for partial_span
-                if partial_span:
-                    span = (span[0] + partial_span[0], span[1] + partial_span[0])
+        # readjust span to compensate for sentinels
+        span = (span[0] - 1, span[1] - 1)

-                partition_spans = None
-                if self.options and 'skip_nodes' in self.options:
-                    skip_nodes = self.options.get('skip_nodes')
-                    for skip_node in skip_nodes:
-                        if skip_node.parent.node_idx == node.node_idx[:len(skip_node.parent.node_idx)] and\
-                            skip_node.span == span or\
-                                skip_node.span == (span[0] + skip_node.offset, span[1] + skip_node.offset):
-                            if partition_spans is None:
-                                partition_spans = _get_split_spans(node, skip_node.span)
-                            else:
-                                new_partition_spans = []
-                                for partition_span in partition_spans:
-                                    tmp_node = MatchTree(value, span=partition_span, parent=node)
-                                    tmp_partitions_spans = _get_split_spans(tmp_node, skip_node.span)
-                                    new_partition_spans.extend(tmp_partitions_spans)
-                                partition_spans.extend(new_partition_spans)
+        # readjust span to compensate for partial_span
+        if partial_span:
+            span = (span[0] + partial_span[0], span[1] + partial_span[0])

-                if not partition_spans:
-                    # restore sentinels compensation
+        if skip_nodes:
+            skip_nodes = [skip_node for skip_node in self.options.get('skip_nodes') if skip_node.parent.span[0] == node.span[0] or skip_node.parent.span[1] == node.span[1]]
+            # if we guessed a node that we need to skip, recurse down the tree and ignore that node
+            indices = set()
+            skip_nodes_spans = []
+            next_skip_nodes = []
+            for skip_node in skip_nodes:
+                skip_for_next = False
+                skip_nodes_spans.append(skip_node.span)
+                if node.offset <= skip_node.span[0] <= node.span[1]:
+                    indices.add(skip_node.span[0] - node.offset)
+                    skip_for_next = True
+                if node.offset <= skip_node.span[1] <= node.span[1]:
+                    indices.add(skip_node.span[1] - node.offset)
+                    skip_for_next = True
+                if not skip_for_next:
+                    next_skip_nodes.append(skip_node)
+            if indices:
+                partition_spans = [s for s in node.get_partition_spans(indices) if s not in skip_nodes_spans]
+                for partition_span in partition_spans:
+                    relative_span = (partition_span[0] - node.offset, partition_span[1] - node.offset)
+                    self.process_node(node, partial_span=relative_span, skip_nodes=next_skip_nodes)
+                return

-                    if isinstance(result, Guess):
-                        guess = result
-                    else:
-                        guess = Guess(result, confidence=self.confidence, input=string, span=span)
+        # restore sentinels compensation
+        if isinstance(result, Guess):
+            guess = result
+        else:
+            no_sentinel_string =string[1:-1]
+            guess = Guess(result, confidence=self.confidence, input=no_sentinel_string, span=span)
+
+        if not iterative:
+            found_guess(node, guess, logger=self.logger)
+        else:
+            absolute_span = (span[0] + node.offset, span[1] + node.offset)
+            node.partition(span)
+            found_child = None
+
+            for child in node.children:
+                if child.span == absolute_span:
+                    # if we have a match on one of our children, mark it as such...
+                    found_guess(child, guess, logger=self.logger)
+                    found_child = child
+                    break
+
+            # ...and only then recurse on the other children
+            for child in node.children:
+                if child is not found_child:
+                    self.process_node(child)

-                    if not iterative:
-                        found_guess(node, guess, logger=self.logger)
-                    else:
-                        absolute_span = (span[0] + node.offset, span[1] + node.offset)
-                        node.partition(span)
-                        if node.is_leaf():
-                            found_guess(node, guess, logger=self.logger)
-                        else:
-                            found_child = None
-                            for child in node.children:
-                                if child.span == absolute_span:
-                                    found_guess(child, guess, logger=self.logger)
-                                    found_child = child
-                                    break
-                            for child in node.children:
-                                if child is not found_child:
-                                    self.process_node(child)
-                else:
-                    for partition_span in partition_spans:
-                        self.process_node(node, partial_span=partition_span)
@@ -27,9 +27,7 @@ import guessit  # @UnusedImport needed for doctests
 from guessit import UnicodeMixin, base_text_type
 from guessit.textutils import clean_default, str_fill
 from guessit.patterns import group_delimiters
-from guessit.guess import (smart_merge,
-                           Guess)
-
+from guessit.guess import smart_merge, Guess

 log = logging.getLogger(__name__)

@@ -75,7 +73,7 @@ class BaseMatchTree(UnicodeMixin):
    (as shown by the ``f``'s on the last-but-one line).
    """

-    def __init__(self, string='', span=None, parent=None, clean_function=None):
+    def __init__(self, string='', span=None, parent=None, clean_function=None, category=None):
        self.string = string
        self.span = span or (0, len(string))
        self.parent = parent
@@ -83,6 +81,7 @@ class BaseMatchTree(UnicodeMixin):
        self.guess = Guess()
        self._clean_value = None
        self._clean_function = clean_function or clean_default
+        self.category = category

    @property
    def value(self):
@@ -116,6 +115,32 @@ class BaseMatchTree(UnicodeMixin):

        return result

+    @property
+    def raw(self):
+        result = {}
+        for guess in self.guesses:
+            for k in guess.keys():
+                result[k] = guess.raw(k)
+        return result
+
+    @property
+    def guesses(self):
+        """
+        List all guesses, including children ones.
+
+        :return: list of guesses objects
+        """
+
+        result = []
+
+        if self.guess:
+            result.append(self.guess)
+
+        for c in self.children:
+            result.extend(c.guesses)
+
+        return result
+
    @property
    def root(self):
        """Return the root node of the tree."""
@@ -124,6 +149,23 @@ class BaseMatchTree(UnicodeMixin):

        return self.parent.root

+    @property
+    def ancestors(self):
+        """
+        Retrieve all ancestors, from this node to root node.
+
+        :return: a list of MatchTree objects
+        """
+        ret = [self]
+
+        if not self.parent:
+            return ret
+
+        parent_ancestors = self.parent.ancestors
+        ret.extend(parent_ancestors)
+
+        return ret
+
    @property
    def depth(self):
        """Return the depth of this node."""
@@ -136,17 +178,30 @@ class BaseMatchTree(UnicodeMixin):
        """Return whether this node is a leaf or not."""
        return self.children == []

-    def add_child(self, span):
-        """Add a new child node to this node with the given span."""
-        child = MatchTree(self.string, span=span, parent=self, clean_function=self._clean_function)
+    def add_child(self, span, category=None):
+        """Add a new child node to this node with the given span.
+
+        :param span: span of the new MatchTree
+        :param category: category of the new MatchTree
+        :return: A new MatchTree instance having self as a parent
+        """
+        child = MatchTree(self.string, span=span, parent=self, clean_function=self._clean_function, category=category)
        self.children.append(child)
        return child

    def get_partition_spans(self, indices):
        """Return the list of absolute spans for the regions of the original
        string defined by splitting this node at the given indices (relative
-        to this node)"""
+        to this node)
+
+        :param indices: indices of the partition spans
+        :return: a list of tuple of the spans
+        """
        indices = sorted(indices)
+        if indices[-1] > len(self.value):
+            log.error('Filename: {}'.format(self.string))
+            log.error('Invalid call to get_partitions_spans, indices are too high: {}, len({}) == {:d}'
+                      .format(indices, self.value, len(self.value)))
        if indices[0] != 0:
            indices.insert(0, 0)
        if indices[-1] != len(self.value):
@@ -155,23 +210,33 @@ class BaseMatchTree(UnicodeMixin):
        spans = []
        for start, end in zip(indices[:-1], indices[1:]):
            spans.append((self.offset + start,
-                     self.offset + end))
+                          self.offset + end))
+
        return spans

-    def partition(self, indices):
+    def partition(self, indices, category=None):
        """Partition this node by splitting it at the given indices,
-        relative to this node."""
-        for partition_span in self.get_partition_spans(indices):
-            self.add_child(span=partition_span)
+        relative to this node.

-    def split_on_components(self, components):
+        :param indices: indices of the partition spans
+        :param category: category of the new MatchTree
+        :return: a list of created MatchTree instances
+        """
+        created = []
+        for partition_span in self.get_partition_spans(indices):
+            created.append(self.add_child(span=partition_span, category=category))
+        return created
+
+    def split_on_components(self, components, category=None):
        offset = 0
+        created = []
        for c in components:
            start = self.value.find(c, offset)
            end = start + len(c)
-            self.add_child(span=(self.offset + start,
-                                 self.offset + end))
+            created.append(self.add_child(span=(self.offset + start,
+                                       self.offset + end), category=category))
            offset = end
+        return created

    def nodes_at_depth(self, depth):
        """Return all the nodes at a given depth in the tree"""
@@ -208,7 +273,7 @@ class BaseMatchTree(UnicodeMixin):
            raise ValueError('Non-existent node index: %s' % (idx,))

    def nodes(self):
-        """Return all the nodes and subnodes in this tree."""
+        """Return a generator of all nodes and subnodes in this tree."""
        yield self
        for child in self.children:
            for node in child.nodes():
@@ -220,7 +285,6 @@ class BaseMatchTree(UnicodeMixin):
            yield self
        else:
            for child in self.children:
-                # pylint: disable=W0212
                for leaf in child.leaves():
                    yield leaf

@@ -29,4 +29,4 @@ info_exts = ['nfo']
 video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
              'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
              'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
-              'iso']
+              'iso', 'vob']
@@ -0,0 +1,80 @@
+import re
+from guessit.patterns import sep, build_or_pattern
+from guessit.patterns.numeral import parse_numeral
+
+range_separators = ['-', 'to', 'a']
+discrete_separators = ['&', 'and', 'et']
+excluded_separators = ['.']  # Dot cannot serve as a discrete_separator
+
+discrete_sep = sep
+for range_separator in range_separators:
+    discrete_sep = discrete_sep.replace(range_separator, '')
+for excluded_separator in excluded_separators:
+    discrete_sep = discrete_sep.replace(excluded_separator, '')
+discrete_separators.append(discrete_sep)
+all_separators = list(range_separators)
+all_separators.extend(discrete_separators)
+
+range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE)
+discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE)
+all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE)
+
+
+def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False):
+    discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value))
+    discrete_elements = [x.strip() for x in discrete_elements]
+
+    proper_discrete_elements = []
+    i = 0
+    while i < len(discrete_elements):
+        if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]):
+            proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2])
+            i += 3
+        else:
+            match = range_separators_re.search(discrete_elements[i])
+            if match and match.start() == 0:
+                proper_discrete_elements[i - 1] += discrete_elements[i]
+            elif match and match.end() == len(discrete_elements[i]):
+                proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1])
+            else:
+                proper_discrete_elements.append(discrete_elements[i])
+            i += 1
+
+    discrete_elements = proper_discrete_elements
+
+    ret = []
+
+    for discrete_element in discrete_elements:
+        range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element))
+        range_values = [x.strip() for x in range_values]
+        if len(range_values) > 1:
+            for x in range(0, len(range_values) - 1):
+                start_range_ep = parse_numeral(range_values[x])
+                end_range_ep = parse_numeral(range_values[x+1])
+                for range_ep in range(start_range_ep, end_range_ep + 1):
+                    if range_ep not in ret:
+                        ret.append(range_ep)
+        else:
+            discrete_value = parse_numeral(discrete_element)
+            if discrete_value not in ret:
+                ret.append(discrete_value)
+
+    if len(ret) > 1:
+        if not allow_discrete:
+            valid_ret = list()
+            # replace discrete elements by ranges
+            valid_ret.append(ret[0])
+            for i in range(0, len(ret) - 1):
+                previous = valid_ret[len(valid_ret) - 1]
+                if ret[i+1] < previous:
+                    pass
+                else:
+                    valid_ret.append(ret[i+1])
+            ret = valid_ret
+        if fill_gaps:
+            ret = list(range(min(ret), max(ret) + 1))
+        if len(ret) > 1:
+            return {None: ret[0], property_list_name: ret}
+    if len(ret) > 0:
+        return ret[0]
+    return None
@@ -19,11 +19,14 @@
 #

 from __future__ import absolute_import, division, print_function, unicode_literals
+from functools import wraps

 import logging
 import sys
 import os

+log = logging.getLogger(__name__)
+
 GREEN_FONT = "\x1B[0;32m"
 YELLOW_FONT = "\x1B[0;33m"
 BLUE_FONT = "\x1B[0;34m"
@@ -87,3 +90,27 @@ def setup_logging(colored=True, with_time=False, with_thread=False, filename=Non
            ch.setFormatter(SimpleFormatter(with_time, with_thread))

    logging.getLogger().addHandler(ch)
+
+
+def trace_func_call(f):
+    @wraps(f)
+    def wrapper(*args, **kwargs):
+        is_method = (f.__name__ != f.__qualname__)  # method is still not bound, we need to get around it
+        if is_method:
+            no_self_args = args[1:]
+        else:
+            no_self_args = args
+
+        args_str = ', '.join(repr(arg) for arg in no_self_args)
+        kwargs_str = ', '.join('{}={}'.format(k, v) for k, v in kwargs.items())
+        if not args_str:
+            args_str = kwargs_str
+        elif not kwargs_str:
+            args_str = args_str
+        else:
+            args_str = '{}, {}'.format(args_str, kwargs_str)
+
+        log.debug('Calling {}({})'.format(f.__name__, args_str))
+        return f(*args, **kwargs)
+
+    return wrapper
@@ -525,3 +525,29 @@
  screenSize: 720p
  season: 5
  series: Game of Thrones
+
+? Parks and Recreation - [04x12] - Ad Campaign.avi
+: type: episode
+  series: Parks and Recreation
+  season: 4
+  episodeNumber: 12
+  title: Ad Campaign
+
+? Star Trek Into Darkness (2013)/star.trek.into.darkness.2013.720p.web-dl.h264-publichd.mkv
+: type: movie
+  title: Star Trek Into Darkness
+  year: 2013
+  screenSize: 720p
+  format: WEB-DL
+  videoCodec: h264
+  releaseGroup: PublicHD
+
+? /var/medias/series/The Originals/Season 02/The.Originals.S02E15.720p.HDTV.X264-DIMENSION.mkv
+: type: episode
+  series: The Originals
+  season: 2
+  episodeNumber: 15
+  screenSize: 720p
+  format: HDTV
+  videoCodec: h264
+  releaseGroup: DIMENSION
@@ -282,12 +282,6 @@
  episodeNumber: 1
  title: The Impossible Astronaut

-? Parks and Recreation - [04x12] - Ad Campaign.avi
-: series: Parks and Recreation
-  season: 4
-  episodeNumber: 12
-  title: Ad Campaign
-
 ? The Sopranos - [05x07] - In Camelot.mp4
 : series: The Sopranos
  season: 5
@@ -635,7 +629,7 @@
  format: HDTV
  releaseGroup: lol

-? 03-Criminal.Minds.5x03.Reckoner.ENG.-.sub.FR.HDTV.XviD-STi.[tvu.org.ru].avi
+? Criminal.Minds.5x03.Reckoner.ENG.-.sub.FR.HDTV.XviD-STi.[tvu.org.ru].avi
 : series: Criminal Minds
  language: English
  subtitleLanguage: French
@@ -1186,3 +1180,684 @@
  videoCodec: h264
  releaseGroup: BS
  format: WEB-DL
+
+? How to Make It in America - S02E06 - I'm Sorry, Who's Yosi?.mkv
+: series: How to Make It in America
+  season: 2
+  episodeNumber: 6
+  title: I'm Sorry, Who's Yosi?
+
+? 24.S05E07.FRENCH.DVDRip.XviD-FiXi0N.avi
+: episodeNumber: 7
+  format: DVD
+  language: fr
+  season: 5
+  series: '24'
+  videoCodec: XviD
+  releaseGroup: FiXi0N
+
+? 12.Monkeys.S01E12.FRENCH.BDRip.x264-VENUE.mkv
+: episodeNumber: 12
+  format: BluRay
+  language: fr
+  releaseGroup: VENUE
+  season: 1
+  series: 12 Monkeys
+  videoCodec: h264
+
+? The.Daily.Show.2015.07.01.Kirsten.Gillibrand.Extended.720p.CC.WEBRip.AAC2.0.x264-BTW.mkv
+: audioChannels: '2.0'
+  audioCodec: AAC
+  date: 2015-07-01
+  format: WEBRip
+  other: CC
+  releaseGroup: BTW
+  screenSize: 720p
+  series: The Daily Show
+  title: Kirsten Gillibrand Extended
+  videoCodec: h264
+
+? The.Daily.Show.2015.07.02.Sarah.Vowell.CC.WEBRip.AAC2.0.x264-BTW.mkv
+: audioChannels: '2.0'
+  audioCodec: AAC
+  date: 2015-07-02
+  format: WEBRip
+  other: CC
+  releaseGroup: BTW
+  series: The Daily Show
+  title: Sarah Vowell
+  videoCodec: h264
+
+? 90.Day.Fiance.S02E07.I.Have.To.Tell.You.Something.720p.HDTV.x264-W4F
+: options: -n
+  episodeNumber: 7
+  format: HDTV
+  screenSize: 720p
+  season: 2
+  series: 90 Day Fiance
+  title: I Have To Tell You Something
+
+? Doctor.Who.2005.S04E06.FRENCH.LD.DVDRip.XviD-TRACKS.avi
+: episodeNumber: 6
+  format: DVD
+  language: fr
+  releaseGroup: TRACKS
+  season: 4
+  series: Doctor Who
+  other: LD
+  videoCodec: XviD
+  year: 2005
+
+? Astro.Le.Petit.Robot.S01E01+02.FRENCH.DVDRiP.X264.INT-BOOLZ.mkv
+: episodeNumber: 1
+  episodeList: [1, 2]
+  format: DVD
+  language: fr
+  releaseGroup: INT-BOOLZ
+  season: 1
+  series: Astro Le Petit Robot
+  videoCodec: h264
+
+? Annika.Bengtzon.2012.E01.Le.Testament.De.Nobel.FRENCH.DVDRiP.XViD-STVFRV.avi
+: episodeNumber: 1
+  format: DVD
+  language: fr
+  releaseGroup: STVFRV
+  series: Annika Bengtzon
+  title: Le Testament De Nobel
+  videoCodec: XviD
+  year: 2012
+
+? Dead.Set.02.FRENCH.LD.DVDRip.XviD-EPZ.avi
+: episodeNumber: 2
+  format: DVD
+  language: fr
+  other: LD
+  releaseGroup: EPZ
+  series: Dead Set
+  videoCodec: XviD
+
+? Phineas and Ferb S01E00 & S01E01 & S01E02
+: options: -n
+  episodeList:
+  - 0
+  - 1
+  - 2
+  episodeNumber: 0
+  season: 1
+  series: Phineas and Ferb
+
+? Show.Name.S01E02.S01E03.HDTV.XViD.Etc-Group
+: options: -n
+  episodeList:
+  - 2
+  - 3
+  episodeNumber: 2
+  format: HDTV
+  releaseGroup: Etc-Group
+  season: 1
+  series: Show Name
+  videoCodec: XviD
+
+? Show Name - S01E02 - S01E03 - S01E04 - Ep Name
+: options: -n
+  episodeList:
+  - 2
+  - 3
+  - 4
+  episodeNumber: 2
+  season: 1
+  series: Show Name
+  title: Ep Name
+
+? Show.Name.1x02.1x03.HDTV.XViD.Etc-Group
+: options: -n
+  episodeList:
+  - 2
+  - 3
+  episodeNumber: 2
+  format: HDTV
+  releaseGroup: Etc-Group
+  season: 1
+  series: Show Name
+  videoCodec: XviD
+
+? Show Name - 1x02 - 1x03 - 1x04 - Ep Name
+: options: -n
+  episodeList:
+  - 2
+  - 3
+  - 4
+  episodeNumber: 2
+  season: 1
+  series: Show Name
+  title: Ep Name
+
+? Show.Name.S01E02.HDTV.XViD.Etc-Group
+: options: -n
+  episodeNumber: 2
+  format: HDTV
+  releaseGroup: Etc-Group
+  season: 1
+  series: Show Name
+  videoCodec: XviD
+
+? Show Name - S01E02 - My Ep Name
+: options: -n
+  episodeNumber: 2
+  season: 1
+  series: Show Name
+  title: My Ep Name
+
+? Show Name - S01.E03 - My Ep Name
+: options: -n
+  episodeNumber: 3
+  season: 1
+  series: Show Name
+  title: My Ep Name
+
+? Show.Name.S01E02E03.HDTV.XViD.Etc-Group
+: options: -n
+  episodeList:
+  - 2
+  - 3
+  episodeNumber: 2
+  format: HDTV
+  releaseGroup: Etc-Group
+  season: 1
+  series: Show Name
+  videoCodec: XviD
+
+? Show Name - S01E02-03 - My Ep Name
+: options: -n
+  episodeList:
+  - 2
+  - 3
+  episodeNumber: 2
+  season: 1
+  series: Show Name
+  title: My Ep Name
+
+? Show.Name.S01.E02.E03
+: options: -n
+  episodeList:
+  - 2
+  - 3
+  episodeNumber: 2
+  season: 1
+  series: Show Name
+
+? Show_Name.1x02.HDTV_XViD_Etc-Group
+: options: -n
+  episodeNumber: 2
+  format: HDTV
+  releaseGroup: Etc-Group
+  season: 1
+  series: Show Name
+  videoCodec: XviD
+
+? Show Name - 1x02 - My Ep Name
+: options: -n
+  episodeNumber: 2
+  season: 1
+  series: Show Name
+  title: My Ep Name
+
+? Show_Name.1x02x03x04.HDTV_XViD_Etc-Group
+: options: -n
+  episodeList:
+  - 2
+  - 3
+  - 4
+  episodeNumber: 2
+  format: HDTV
+  releaseGroup: Etc-Group
+  season: 1
+  series: Show Name
+  videoCodec: XviD
+
+? Show Name - 1x02-03-04 - My Ep Name
+: options: -n
+  episodeList:
+  - 2
+  - 3
+  - 4
+  episodeNumber: 2
+  season: 1
+  series: Show Name
+  title: My Ep Name
+
+? Show.Name.100.Event.2010.11.23.HDTV.XViD.Etc-Group
+: options: -n
+  date: 2010-11-23
+  episodeNumber: 100
+  format: HDTV
+  releaseGroup: Etc-Group
+  series: Show Name
+  title: Event
+  videoCodec: XviD
+
+? Show.Name.2010.11.23.HDTV.XViD.Etc-Group
+: options: -n
+  date: 2010-11-23
+  format: HDTV
+  releaseGroup: Etc-Group
+  series: Show Name
+
+? Show Name - 2010-11-23 - Ep Name
+: options: -n
+  date: 2010-11-23
+  series: Show Name
+  title: Ep Name
+
+? Show Name Season 1 Episode 2 Ep Name
+: options: -n
+  episodeNumber: 2
+  season: 1
+  series: Show Name
+  title: Ep Name
+
+? Show.Name.S01.HDTV.XViD.Etc-Group
+: options: -n
+  format: HDTV
+  releaseGroup: Etc-Group
+  season: 1
+  series: Show Name
+  videoCodec: XviD
+
+? Show.Name.E02-03
+: options: -n
+  episodeNumber: 2
+  episodeList:
+  - 2
+  - 3
+  series: Show Name
+
+? Show.Name.E02.2010
+: options: -n
+  episodeNumber: 2
+  year: 2010
+  series: Show Name
+
+? Show.Name.E23.Test
+: options: -n
+  episodeNumber: 23
+  series: Show Name
+  title: Test
+
+? Show.Name.Part.3.HDTV.XViD.Etc-Group
+: options: -n -t episode
+  part: 3
+  series: Show Name
+  format: HDTV
+  videoCodec: XviD
+  releaseGroup: Etc-Group
+
+? Show.Name.Part.1.and.Part.2.Blah-Group
+: options: -n -t episode
+  part: 1
+  partList:
+  - 1
+  - 2
+  series: Show Name
+
+? Show Name - 01 - Ep Name
+: options: -n
+  episodeNumber: 1
+  series: Show Name
+  title: Ep Name
+
+? 01 - Ep Name
+: options: -n
+  episodeNumber: 1
+  series: Ep Name
+
+? Show.Name.102.HDTV.XViD.Etc-Group
+: options: -n
+  episodeNumber: 2
+  format: HDTV
+  releaseGroup: Etc-Group
+  season: 1
+  series: Show Name
+  videoCodec: XviD
+
+? '[HorribleSubs] Maria the Virgin Witch - 01 [720p].mkv'
+: episodeNumber: 1
+  releaseGroup: HorribleSubs
+  screenSize: 720p
+  series: Maria the Virgin Witch
+
+? '[ISLAND]One_Piece_679_[VOSTFR]_[V1]_[8bit]_[720p]_[EB7838FC].mp4'
+: options: -E
+  crc32: EB7838FC
+  episodeNumber: 679
+  releaseGroup: ISLAND
+  screenSize: 720p
+  series: One Piece
+  subtitleLanguage: fr
+  videoProfile: 8bit
+  version: 1
+
+
+? '[ISLAND]One_Piece_679_[VOSTFR]_[8bit]_[720p]_[EB7838FC].mp4'
+: options: -E
+  crc32: EB7838FC
+  episodeNumber: 679
+  releaseGroup: ISLAND
+  screenSize: 720p
+  series: One Piece
+  subtitleLanguage: fr
+  videoProfile: 8bit
+
+? '[Kaerizaki-Fansub]_One_Piece_679_[VOSTFR][HD_1280x720].mp4'
+: options: -E
+  episodeNumber: 679
+  other: HD
+  releaseGroup: Kaerizaki-Fansub
+  screenSize: 720p
+  series: One Piece
+  subtitleLanguage: fr
+
+? '[Kaerizaki-Fansub]_One_Piece_679_[VOSTFR][FANSUB][HD_1280x720].mp4'
+: options: -E
+  episodeNumber: 679
+  other:
+  - Fansub
+  - HD
+  releaseGroup: Kaerizaki-Fansub
+  screenSize: 720p
+  series: One Piece
+  subtitleLanguage: fr
+
+? '[Kaerizaki-Fansub]_One_Piece_681_[VOSTFR][HD_1280x720]_V2.mp4'
+: options: -E
+  episodeNumber: 681
+  other: HD
+  releaseGroup: Kaerizaki-Fansub
+  screenSize: 720p
+  series: One Piece
+  subtitleLanguage: fr
+  version: 2
+
+? '[Kaerizaki-Fansub] High School DxD New 04 VOSTFR HD (1280x720) V2.mp4'
+: options: -E
+  episodeNumber: 4
+  other: HD
+  releaseGroup: Kaerizaki-Fansub
+  screenSize: 720p
+  series: High School DxD New
+  subtitleLanguage: fr
+  version: 2
+
+? '[Kaerizaki-Fansub] One Piece 603 VOSTFR PS VITA (960x544) V2.mp4'
+: options: -E
+  episodeNumber: 603
+  releaseGroup: Kaerizaki-Fansub
+  screenSize: 960x544
+  series: One Piece
+  subtitleLanguage: fr
+  version: 2
+
+? '[Group Name] Show Name.13'
+: options: -n
+  episodeNumber: 13
+  releaseGroup: Group Name
+  series: Show Name
+
+? '[Group Name] Show Name - 13'
+: options: -n
+  episodeNumber: 13
+  releaseGroup: Group Name
+  series: Show Name
+
+? '[Group Name] Show Name 13'
+: options: -n
+  episodeNumber: 13
+  releaseGroup: Group Name
+  series: Show Name
+
+# [Group Name] Show Name.13-14
+# [Group Name] Show Name - 13-14
+# Show Name 13-14
+
+? '[Stratos-Subs]_Infinite_Stratos_-_12_(1280x720_H.264_AAC)_[379759DB]'
+: options: -n
+  audioCodec: AAC
+  crc32: 379759DB
+  episodeNumber: 12
+  releaseGroup: Stratos-Subs
+  screenSize: 720p
+  series: Infinite Stratos
+  videoCodec: h264
+
+# [ShinBunBu-Subs] Bleach - 02-03 (CX 1280x720 x264 AAC)
+
+? '[SGKK] Bleach 312v1 [720p/MKV]'
+: options: -n
+  episodeNumber: 312
+  releaseGroup: SGKK
+  screenSize: 720p
+  series: Bleach
+  version: 1
+
+? '[Ayako]_Infinite_Stratos_-_IS_-_07_[H264][720p][EB7838FC]'
+: options: -n
+  crc32: EB7838FC
+  episodeNumber: 7
+  releaseGroup: Ayako
+  screenSize: 720p
+  series: Infinite Stratos
+  videoCodec: h264
+
+? '[Ayako] Infinite Stratos - IS - 07v2 [H264][720p][44419534]'
+: options: -n
+  crc32: '44419534'
+  episodeNumber: 7
+  releaseGroup: Ayako
+  screenSize: 720p
+  series: Infinite Stratos
+  videoCodec: h264
+  version: 2
+
+? '[Ayako-Shikkaku] Oniichan no Koto Nanka Zenzen Suki Janain Dakara ne - 10 [LQ][h264][720p] [8853B21C]'
+: options: -n
+  crc32: 8853B21C
+  episodeNumber: 10
+  releaseGroup: Ayako-Shikkaku
+  screenSize: 720p
+  series: Oniichan no Koto Nanka Zenzen Suki Janain Dakara ne
+  videoCodec: h264
+
+# Add support for absolute episodes
+? Bleach - s16e03-04 - 313-314
+: options: -n
+  episodeList:
+  - 3
+  - 4
+  episodeNumber: 3
+  season: 16
+  series: Bleach
+
+? Bleach.s16e03-04.313-314
+: options: -n
+  episodeList:
+  - 3
+  - 4
+  episodeNumber: 3
+  season: 16
+  series: Bleach
+
+? Bleach.s16e03-04.313-314
+: options: -n
+  episodeList:
+  - 3
+  - 4
+  episodeNumber: 3
+  season: 16
+  series: Bleach
+
+? Bleach - 313-314
+: options: -En
+  episodeList:
+  - 313
+  - 314
+  episodeNumber: 313
+  series: Bleach
+
+? Bleach - s16e03-04 - 313-314
+: options: -n
+  episodeList:
+  - 3
+  - 4
+  episodeNumber: 3
+  season: 16
+  series: Bleach
+
+? Bleach.s16e03-04.313-314
+: options: -n
+  episodeList:
+  - 3
+  - 4
+  episodeNumber: 3
+  season: 16
+  series: Bleach
+
+
+? Bleach s16e03e04 313-314
+: options: -n
+  episodeList:
+  - 3
+  - 4
+  episodeNumber: 3
+  season: 16
+  series: Bleach
+
+? '[ShinBunBu-Subs] Bleach - 02-03 (CX 1280x720 x264 AAC)'
+: audioCodec: AAC
+  episodeList:
+  - 2
+  - 3
+  episodeNumber: 2
+  releaseGroup: ShinBunBu-Subs
+  screenSize: 720p
+  series: Bleach
+  videoCodec: h264
+
+? 003. Show Name - Ep Name.ext
+: episodeNumber: 3
+  series: Show Name
+  title: Ep Name
+
+? 003-004. Show Name - Ep Name.ext
+: episodeList:
+  - 3
+  - 4
+  episodeNumber: 3
+  series: Show Name
+  title: Ep Name
+
+? One Piece - 102
+: options: -n -t episode
+  episodeNumber: 2
+  season: 1
+  series: One Piece
+
+? "[ACX]_Wolf's_Spirit_001.mkv"
+: episodeNumber: 1
+  releaseGroup: ACX
+  series: "Wolf's Spirit"
+
+? Project.Runway.S14E00.and.S14E01.(Eng.Subs).SDTV.x264-[2Maverick].mp4
+: episodeList:
+  - 0
+  - 1
+  episodeNumber: 0
+  format: TV
+  releaseGroup: 2Maverick
+  season: 14
+  series: Project Runway
+  subtitleLanguage: en
+  videoCodec: h264
+
+? '[Hatsuyuki-Kaitou]_Fairy_Tail_2_-_16-20_[720p][10bit].torrent'
+: episodeList:
+  - 16
+  - 17
+  - 18
+  - 19
+  - 20
+  episodeNumber: 16
+  releaseGroup: Hatsuyuki-Kaitou
+  screenSize: 720p
+  series: Fairy Tail 2
+  videoProfile: 10bit
+
+? '[Hatsuyuki-Kaitou]_Fairy_Tail_2_-_16-20_(191-195)_[720p][10bit].torrent'
+: options: -E
+  episodeList:
+  - 16
+  - 17
+  - 18
+  - 19
+  - 20
+  episodeNumber: 16
+  releaseGroup: Hatsuyuki-Kaitou
+  screenSize: 720p
+  series: Fairy Tail 2
+
+? "Looney Tunes 1940x01 Porky's Last Stand.mkv"
+: episodeNumber: 1
+  season: 1940
+  series: Looney Tunes
+  title: Porky's Last Stand
+  year: 1940
+
+? The.Good.Wife.S06E01.E10.720p.WEB-DL.DD5.1.H.264-CtrlHD/The.Good.Wife.S06E09.Trust.Issues.720p.WEB-DL.DD5.1.H.264-CtrlHD.mkv
+: audioChannels: '5.1'
+  audioCodec: DolbyDigital
+  episodeList:
+  - 1
+  - 2
+  - 3
+  - 4
+  - 5
+  - 6
+  - 7
+  - 8
+  - 9
+  - 10
+  episodeNumber: 9
+  format: WEB-DL
+  releaseGroup: CtrlHD
+  screenSize: 720p
+  season: 6
+  series: The Good Wife
+  title: Trust Issues
+  videoCodec: h264
+
+? Fear the Walking Dead - 01x02 - So Close, Yet So Far.REPACK-KILLERS.French.C.updated.Addic7ed.com.mkv
+: episodeNumber: 2
+  language: fr
+  other: Proper
+  properCount: 1
+  season: 1
+  series: Fear the Walking Dead
+  title: So Close, Yet So Far
+
+? Fear the Walking Dead - 01x02 - En Close, Yet En Far.REPACK-KILLERS.French.C.updated.Addic7ed.com.mkv
+: episodeNumber: 2
+  language: fr
+  other: Proper
+  properCount: 1
+  season: 1
+  series: Fear the Walking Dead
+  title: En Close, Yet En Far
+
+? /av/unsorted/The.Daily.Show.2015.07.22.Jake.Gyllenhaal.720p.HDTV.x264-BATV.mkv
+: date: 2015-07-22
+  format: HDTV
+  releaseGroup: BATV
+  screenSize: 720p
+  series: The Daily Show
+  title: Jake Gyllenhaal
+  videoCodec: h264
@@ -22,7 +22,6 @@ from __future__ import absolute_import, division, print_function, unicode_litera

 from collections import defaultdict
 from unittest import TestCase, TestLoader
-import shlex
 import logging
 import os
 import sys
@@ -86,10 +85,6 @@ class TestGuessit(TestCase):

            options = required_fields.pop('options') if 'options' in required_fields else None

-            if options:
-                args = shlex.split(options)
-                options = get_opts().parse_args(args)
-                options = vars(options)
            try:
                found = guess_func(filename, options)
            except Exception as e:
@@ -606,7 +606,9 @@
 ? Yves.Saint.Laurent.2013.FRENCH.DVDSCR.MD.XviD-ViVARiUM.avi
 : format: DVD
  language: French
-  other: Screener
+  other:
+      - MD
+      - Screener
  releaseGroup: ViVARiUM
  title: Yves Saint Laurent
  videoCodec: XviD
@@ -759,3 +761,19 @@
  screenSize: 1080p
  title: transformers 2
  videoCodec: h265
+
+? 1.Angry.Man.1957.mkv
+: title: 1 Angry Man
+  year: 1957
+
+? 12.Angry.Men.1957.mkv
+: title: 12 Angry Men
+  year: 1957
+
+? 123.Angry.Men.1957.mkv
+: title: 123 Angry Men
+  year: 1957
+
+? "Looney Tunes 1444x866 Porky's Last Stand.mkv"
+: screenSize: 1444x866
+  title: Looney Tunes
@@ -31,10 +31,12 @@ keywords = yaml.load("""
 ? Xvid PROPER
 : videoCodec: Xvid
  other: PROPER
+  properCount: 1

 ? PROPER-Xvid
 : videoCodec: Xvid
  other: PROPER
+  properCount: 1

 """)

@@ -19,6 +19,7 @@
 #

 from __future__ import absolute_import, division, print_function, unicode_literals
+from guessit.containers import DefaultValidator

 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
@@ -41,10 +42,9 @@ class GuessDate(Transformer):
    @staticmethod
    def guess_date(string, node=None, options=None):
        date, span = search_date(string, options.get('date_year_first') if options else False, options.get('date_day_first') if options else False)
-        if date:
+        if date and span and DefaultValidator.validate_string(string, span): # ensure we have a separator before and after date
            return {'date': date}, span
-        else:
-            return None, None
+        return None, None

    def process(self, mtree, options=None):
        GuessFinder(self.guess_date, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
@@ -24,6 +24,8 @@ from guessit.plugins.transformers import Transformer, get_transformer
 from guessit.textutils import reorder_title

 from guessit.matcher import found_property
+from guessit.patterns.list import all_separators
+from guessit.language import all_lang_prefixes_suffixes


 class GuessEpisodeInfoFromPosition(Transformer):
@@ -33,39 +35,49 @@ class GuessEpisodeInfoFromPosition(Transformer):
    def supported_properties(self):
        return ['title', 'series']

-    def match_from_epnum_position(self, mtree, node, options):
-        epnum_idx = node.node_idx
+    @staticmethod
+    def excluded_word(*values):
+        for value in values:
+            if value.clean_value.lower() in (all_separators + all_lang_prefixes_suffixes):
+                return True
+        return False
+
+    def match_from_epnum_position(self, path_node, ep_node, options):
+        epnum_idx = ep_node.node_idx

        # a few helper functions to be able to filter using high-level semantics
        def before_epnum_in_same_pathgroup():
-            return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
+            return [leaf for leaf in path_node.unidentified_leaves(lambda x: len(x.clean_value) > 1)
                    if (leaf.node_idx[0] == epnum_idx[0] and
-                    leaf.node_idx[1:] < epnum_idx[1:])]
+                    leaf.node_idx[1:] < epnum_idx[1:] and
+                    not GuessEpisodeInfoFromPosition.excluded_word(leaf))]

        def after_epnum_in_same_pathgroup():
-            return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
+            return [leaf for leaf in path_node.unidentified_leaves(lambda x: len(x.clean_value) > 1)
                    if (leaf.node_idx[0] == epnum_idx[0] and
-                    leaf.node_idx[1:] > epnum_idx[1:])]
+                    leaf.node_idx[1:] > epnum_idx[1:] and
+                    not GuessEpisodeInfoFromPosition.excluded_word(leaf))]

        def after_epnum_in_same_explicitgroup():
-            return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
+            return [leaf for leaf in path_node.unidentified_leaves(lambda x: len(x.clean_value) > 1)
                    if (leaf.node_idx[:2] == epnum_idx[:2] and
-                    leaf.node_idx[2:] > epnum_idx[2:])]
+                    leaf.node_idx[2:] > epnum_idx[2:] and
+                    not GuessEpisodeInfoFromPosition.excluded_word(leaf))]

        # epnumber is the first group and there are only 2 after it in same
        # path group
        # -> series title - episode title
-        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
+        title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_pathgroup(), options)

-        if ('title' not in mtree.info and  # no title
-                'series' in mtree.info and # series present
+        if ('title' not in path_node.info and  # no title
+                'series' in path_node.info and # series present
                before_epnum_in_same_pathgroup() == [] and  # no groups before
                len(title_candidates) == 1):  # only 1 group after

            found_property(title_candidates[0], 'title', confidence=0.4)
            return

-        if ('title' not in mtree.info and  # no title
+        if ('title' not in path_node.info and  # no title
                before_epnum_in_same_pathgroup() == [] and  # no groups before
                len(title_candidates) == 2):  # only 2 groups after

@@ -77,17 +89,17 @@ class GuessEpisodeInfoFromPosition(Transformer):
        # probably the series name
        series_candidates = before_epnum_in_same_pathgroup()
        if len(series_candidates) >= 1:
-            found_property(series_candidates[0], 'series', confidence=0.7)
+                found_property(series_candidates[0], 'series', confidence=0.7)

        # only 1 group after (in the same path group) and it's probably the
        # episode title.
-        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
+        title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_pathgroup(), options)
        if len(title_candidates) == 1:
            found_property(title_candidates[0], 'title', confidence=0.5)
            return
        else:
            # try in the same explicit group, with lower confidence
-            title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup(), options)
+            title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_explicitgroup(), options)
            if len(title_candidates) == 1:
                found_property(title_candidates[0], 'title', confidence=0.4)
                return
@@ -96,7 +108,7 @@ class GuessEpisodeInfoFromPosition(Transformer):
                return

        # get the one with the longest value
-        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
+        title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_pathgroup(), options)
        if title_candidates:
            maxidx = -1
            maxv = -1
@@ -104,7 +116,8 @@ class GuessEpisodeInfoFromPosition(Transformer):
                if len(c.clean_value) > maxv:
                    maxidx = i
                    maxv = len(c.clean_value)
-            found_property(title_candidates[maxidx], 'title', confidence=0.3)
+            if maxidx > -1:
+                found_property(title_candidates[maxidx], 'title', confidence=0.3)

    def should_process(self, mtree, options=None):
        options = options or {}
@@ -114,9 +127,9 @@ class GuessEpisodeInfoFromPosition(Transformer):
    def _filter_candidates(candidates, options):
        episode_details_transformer = get_transformer('guess_episode_details')
        if episode_details_transformer:
-            return [n for n in candidates if not episode_details_transformer.container.find_properties(n.value, n, options, re_match=True)]
-        else:
-            return candidates
+            candidates = [n for n in candidates if not episode_details_transformer.container.find_properties(n.value, n, options, re_match=True)]
+        candidates = list(filter(lambda n: not GuessEpisodeInfoFromPosition.excluded_word(n), candidates))
+        return candidates

    def process(self, mtree, options=None):
        """
@@ -128,15 +141,26 @@ class GuessEpisodeInfoFromPosition(Transformer):
        if not eps:
            eps = [node for node in mtree.leaves() if 'date' in node.guess]

+        eps = sorted(eps, key=lambda ep: -ep.guess.confidence())
        if eps:
-            self.match_from_epnum_position(mtree, eps[0], options)
+            performed_path_nodes = []
+            for ep_node in eps:
+                # Perform only first episode node for each path node
+                path_node = [node for node in ep_node.ancestors if node.category == 'path']
+                if len(path_node) > 0:
+                    path_node = path_node[0]
+                else:
+                    path_node = ep_node.root
+                if path_node not in performed_path_nodes:
+                    self.match_from_epnum_position(path_node, ep_node, options)
+                    performed_path_nodes.append(path_node)

        else:
            # if we don't have the episode number, but at least 2 groups in the
            # basename, then it's probably series - eptitle
-            basename = mtree.node_at((-2,))
+            basename = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-2]

-            title_candidates = self._filter_candidates(basename.unidentified_leaves(), options)
+            title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(basename.unidentified_leaves(), options)

            if len(title_candidates) >= 2 and 'series' not in mtree.info:
                found_property(title_candidates[0], 'series', confidence=0.4)
@@ -147,12 +171,13 @@ class GuessEpisodeInfoFromPosition(Transformer):

        # if we only have 1 remaining valid group in the folder containing the
        # file, then it's likely that it is the series name
+        path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes()))
        try:
-            series_candidates = list(mtree.node_at((-3,)).unidentified_leaves())
-        except ValueError:
+            series_candidates = list(path_nodes[-3].unidentified_leaves())
+        except IndexError:
            series_candidates = []

-        if len(series_candidates) == 1:
+        if len(series_candidates) == 1 and not GuessEpisodeInfoFromPosition.excluded_word(series_candidates[0]):
            found_property(series_candidates[0], 'series', confidence=0.3)

        # if there's a path group that only contains the season info, then the
@@ -163,7 +188,7 @@ class GuessEpisodeInfoFromPosition(Transformer):
        if eps:
            previous = [node for node in mtree.unidentified_leaves()
                        if node.node_idx[0] == eps[0].node_idx[0] - 1]
-            if len(previous) == 1:
+            if len(previous) == 1 and not GuessEpisodeInfoFromPosition.excluded_word(previous[0]):
                found_property(previous[0], 'series', confidence=0.5)

        # If we have found title without any serie name, replace it by the serie name.
@@ -21,6 +21,7 @@
 from __future__ import absolute_import, division, print_function, unicode_literals

 import re
+from guessit.patterns.list import list_parser, all_separators_re

 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
@@ -34,9 +35,8 @@ class GuessEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 20)

-        range_separators = ['-', 'to', 'a']
-        discrete_separators = ['&', 'and', 'et']
        of_separators = ['of', 'sur', '/', '\\']
+        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']
@@ -44,85 +44,14 @@ class GuessEpisodesRexps(Transformer):
        season_markers = ['s']
        episode_markers = ['e', 'ep']

-        discrete_sep = sep
-        for range_separator in range_separators:
-            discrete_sep = discrete_sep.replace(range_separator, '')
-        discrete_separators.append(discrete_sep)
-        all_separators = list(range_separators)
-        all_separators.extend(discrete_separators)
-
        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

-        range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE)
-        discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE)
-        all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE)
-        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)
-
        season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE)

        season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE)

-        def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False):
-            discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value))
-            discrete_elements = [x.strip() for x in discrete_elements]
-
-            proper_discrete_elements = []
-            i = 0
-            while i < len(discrete_elements):
-                if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]):
-                    proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2])
-                    i += 3
-                else:
-                    match = range_separators_re.search(discrete_elements[i])
-                    if match and match.start() == 0:
-                        proper_discrete_elements[i - 1] += discrete_elements[i]
-                    elif match and match.end() == len(discrete_elements[i]):
-                        proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1])
-                    else:
-                        proper_discrete_elements.append(discrete_elements[i])
-                    i += 1
-
-            discrete_elements = proper_discrete_elements
-
-            ret = []
-
-            for discrete_element in discrete_elements:
-                range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element))
-                range_values = [x.strip() for x in range_values]
-                if len(range_values) > 1:
-                    for x in range(0, len(range_values) - 1):
-                        start_range_ep = parse_numeral(range_values[x])
-                        end_range_ep = parse_numeral(range_values[x+1])
-                        for range_ep in range(start_range_ep, end_range_ep + 1):
-                            if range_ep not in ret:
-                                ret.append(range_ep)
-                else:
-                    discrete_value = parse_numeral(discrete_element)
-                    if discrete_value not in ret:
-                        ret.append(discrete_value)
-
-            if len(ret) > 1:
-                if not allow_discrete:
-                    valid_ret = list()
-                    # replace discrete elements by ranges
-                    valid_ret.append(ret[0])
-                    for i in range(0, len(ret) - 1):
-                        previous = valid_ret[len(valid_ret) - 1]
-                        if ret[i+1] < previous:
-                            pass
-                        else:
-                            valid_ret.append(ret[i+1])
-                    ret = valid_ret
-                if fill_gaps:
-                    ret = list(range(min(ret), max(ret) + 1))
-                if len(ret) > 1:
-                    return {None: ret[0], property_list_name: ret}
-            if len(ret) > 0:
-                return ret[0]
-            return None
-
        def episode_parser_x(value):
            return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE))

@@ -138,34 +67,40 @@ class GuessEpisodesRexps(Transformer):
        class ResolutionCollisionValidator(object):
            @staticmethod
            def validate(prop, string, node, match, entry_start, entry_end):
-                return len(match.group(2)) < 3 # limit
+                # Invalidate when season or episode is more than 100.
+                try:
+                    season_value = season_parser(match.group(2))
+                    episode_value = episode_parser_x(match.group(3))
+                    return season_value < 100 or episode_value < 100
+                except:
+                    # This may occur for 1xAll or patterns like this.
+                    return True

        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral)
        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False)))

        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator())
-        # self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
+        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser, 'season': season_parser}, validator=NoValidator())
+
        self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator())

        self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral)
+        self.container.register_property('version', sep + r'(V\d+)' + sep, confidence=0.6, formatter=parse_numeral, validator=NoValidator())
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral)

-
        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})

-        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')'  + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
-        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')'  + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
+        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
+        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})

-
-        self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
-        self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
-        self.container.register_property('episodeNumber', r'^ ?0(\d{1,2})' + sep, confidence=0.4, formatter=parse_numeral)
-        self.container.register_property('episodeNumber', sep + r'(\d{2}) ?$', confidence=0.4, formatter=parse_numeral)
-        self.container.register_property('episodeNumber', sep + r'0(\d{1,2}) ?$', confidence=0.4, formatter=parse_numeral)
+        self.container.register_property('episodeNumber', r'^' + sep + '+(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser)
+        self.container.register_property('episodeNumber', r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser)
+        self.container.register_property('episodeNumber', sep + r'(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'\d{2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser)
+        self.container.register_property('episodeNumber', sep + r'0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser)

        self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
@@ -186,7 +121,29 @@ class GuessEpisodesRexps(Transformer):

    def guess_episodes_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
-        return self.container.as_guess(found, string)
+        guess = self.container.as_guess(found, string)
+        if guess and node:
+            if 'season' in guess and 'episodeNumber' in guess:
+                # If two guesses contains both season and episodeNumber in same group, create an episodeList
+                for existing_guess in node.group_node().guesses:
+                    if 'season' in existing_guess and 'episodeNumber' in existing_guess:
+                        if 'episodeList' not in existing_guess:
+                            existing_guess['episodeList'] = [existing_guess['episodeNumber']]
+                        existing_guess['episodeList'].append(guess['episodeNumber'])
+                        existing_guess['episodeList'].sort()
+                        if existing_guess['episodeNumber'] > guess['episodeNumber']:
+                            existing_guess.set_confidence('episodeNumber', 0)
+                        else:
+                            guess.set_confidence('episodeNumber', 0)
+                        guess['episodeList'] = list(existing_guess['episodeList'])
+            elif 'episodeNumber' in guess:
+                # If two guesses contains only episodeNumber in same group, remove the existing one.
+                for existing_guess in node.group_node().guesses:
+                    if 'episodeNumber' in existing_guess:
+                        for k, v in existing_guess.items():
+                            if k in guess:
+                                del guess[k]
+        return guess

    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')
@@ -156,6 +156,13 @@ class GuessFiletype(Transformer):

            weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
            if weak_episode_transformer:
+                found = weak_episode_transformer.container.find_properties(filename, mtree, options, 'episodeNumber')
+                guess = weak_episode_transformer.container.as_guess(found, filename)
+                if guess and (guess.raw('episodeNumber')[0] == '0' or guess['episodeNumber'] >= 10):
+                    self.log.debug('Found characteristic property of episodes: %s"', guess)
+                    upgrade_episode()
+                    return filetype_container[0], other
+
                found = properties_transformer.container.find_properties(filename, mtree, options, 'crc32')
                guess = properties_transformer.container.as_guess(found, filename)
                if guess:
@@ -217,7 +224,8 @@ class GuessFiletype(Transformer):
        if mime is not None:
            filetype_info.update({'mimetype': mime}, confidence=1.0)

-        node_ext = mtree.node_at((-1,))
+        # Retrieve the last node of category path (extension node)
+        node_ext = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-1]
        found_guess(node_ext, filetype_info)

        if mtree.guess.get('type') in [None, 'unknown']:
@@ -226,12 +234,21 @@ class GuessFiletype(Transformer):
            else:
                raise TransformerException(__name__, 'Unknown file type')

-    def post_process(self, mtree, options=None):
-        # now look whether there are some specific hints for episode vs movie
-        # If we have a date and no year, this is a TV Show.
-        if 'date' in mtree.info and 'year' not in mtree.info and mtree.info.get('type') != 'episode':
-            mtree.guess['type'] = 'episode'
-            for type_leaves in mtree.leaves_containing('type'):
-                type_leaves.guess['type'] = 'episode'
-            for title_leaves in mtree.leaves_containing('title'):
-                title_leaves.guess.rename('title', 'series')
+    def second_pass_options(self, mtree, options=None):
+        if 'type' not in options or not options['type']:
+            if mtree.info.get('type') != 'episode':
+                # now look whether there are some specific hints for episode vs movie
+                # If we have a date and no year, this is a TV Show.
+                if 'date' in mtree.info and 'year' not in mtree.info:
+                    return {'type': 'episode'}
+
+            if mtree.info.get('type') != 'movie':
+                # If we have a year, no season but raw episodeNumber is a number not starting with '0', this is a movie.
+                if 'year' in mtree.info and 'episodeNumber' in mtree.info and not 'season' in mtree.info:
+                    try:
+                        int(mtree.raw['episodeNumber'])
+                        return {'type': 'movie'}
+                    except ValueError:
+                        pass
+
+
@@ -43,6 +43,12 @@ class GuessLanguage(Transformer):
        allowed_languages = None
        if options and 'allowed_languages' in options:
            allowed_languages = options.get('allowed_languages')
+
+        directory = list(filter(lambda x: x.category == 'path', node.ancestors))[0]
+        if len(directory.clean_value) <= 3:
+            # skip if we have a langage code as directory
+            return None
+
        guess = search_language(string, allowed_languages)
        return guess

@@ -68,8 +74,10 @@ class GuessLanguage(Transformer):
        title_ends = {}

        for unidentified_node in mtree.unidentified_leaves():
-            unidentified_starts[unidentified_node.span[0]] = unidentified_node
-            unidentified_ends[unidentified_node.span[1]] = unidentified_node
+            if len(unidentified_node.clean_value) > 1:
+                # only consider unidentified leaves that have some meaningful content
+                unidentified_starts[unidentified_node.span[0]] = unidentified_node
+                unidentified_ends[unidentified_node.span[1]] = unidentified_node

        for property_node in mtree.leaves_containing('year'):
            property_starts[property_node.span[0]] = property_node
@@ -79,19 +87,20 @@ class GuessLanguage(Transformer):
            title_starts[title_node.span[0]] = title_node
            title_ends[title_node.span[1]] = title_node

-        return node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or node.span[1] + 1 in property_starts.keys()) or\
-            node.span[1] in title_starts.keys() and (node.span[0] == node.group_node().span[0] or node.span[0] in unidentified_ends.keys() or node.span[0] in property_ends.keys())
+        return (node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or
+                                                 node.span[1] + 1 in property_starts.keys()) or
+                node.span[1] in title_starts.keys() and (node.span[0] == node.group_node().span[0] or
+                                                         node.span[0] in unidentified_ends.keys() or
+                                                         node.span[0] in property_ends.keys()))

    def second_pass_options(self, mtree, options=None):
        m = mtree.matched()
-        to_skip_language_nodes = []
+        to_skip_langs = set()

        for lang_key in ('language', 'subtitleLanguage'):
-            langs = {}
            lang_nodes = set(mtree.leaves_containing(lang_key))

            for lang_node in lang_nodes:
-                lang = lang_node.guess.get(lang_key, None)
                if self._skip_language_on_second_pass(mtree, lang_node):
                    # Language probably split the title. Add to skip for 2nd pass.

@@ -99,38 +108,19 @@ class GuessLanguage(Transformer):
                    # the extension, then it is likely a subtitle language
                    parts = mtree.clean_string(lang_node.root.value).split()
                    if m.get('type') in ['moviesubtitle', 'episodesubtitle']:
-                        if lang_node.value in parts and \
-                                (parts.index(lang_node.value) == len(parts) - 2):
+                        if (lang_node.value in parts and parts.index(lang_node.value) == len(parts) - 2):
                            continue
-                    to_skip_language_nodes.append(lang_node)
-                elif lang not in langs:
-                    langs[lang] = lang_node
-                else:
-                    # The same language was found. Keep the more confident one,
-                    # and add others to skip for 2nd pass.
-                    existing_lang_node = langs[lang]
-                    to_skip = None
-                    if (existing_lang_node.guess.confidence('language') >=
-                        lang_node.guess.confidence('language')):
-                        # lang_node is to remove
-                        to_skip = lang_node
-                    else:
-                        # existing_lang_node is to remove
-                        langs[lang] = lang_node
-                        to_skip = existing_lang_node
-                    to_skip_language_nodes.append(to_skip)

-        if to_skip_language_nodes:
+                    to_skip_langs.add(lang_node.value)
+
+        if to_skip_langs:
            # Also skip same value nodes
-            skipped_values = [skip_node.value for skip_node in to_skip_language_nodes]
+            lang_nodes = (set(mtree.leaves_containing('language')) |
+                          set(mtree.leaves_containing('subtitleLanguage')))

-            for lang_key in ('language', 'subtitleLanguage'):
-                lang_nodes = set(mtree.leaves_containing(lang_key))
+            to_skip = [node for node in lang_nodes if node.value in to_skip_langs]
+            return {'skip_nodes': to_skip}

-                for lang_node in lang_nodes:
-                    if lang_node not in to_skip_language_nodes and lang_node.value in skipped_values:
-                        to_skip_language_nodes.append(lang_node)
-            return {'skip_nodes': to_skip_language_nodes}
        return None

    def should_process(self, mtree, options=None):
@@ -149,6 +139,8 @@ class GuessLanguage(Transformer):

    def post_process(self, mtree, options=None):
        # 1- try to promote language to subtitle language where it makes sense
+        prefixes = []
+
        for node in mtree.nodes():
            if 'language' not in node.guess:
                continue
@@ -157,7 +149,8 @@ class GuessLanguage(Transformer):
            #   the group is the last group of the filename, it is probably the
            #   language of the subtitle
            #   (eg: 'xxx.english.srt')
-            if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
+            ext_node = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-1]
+            if (ext_node.value.lower() in subtitle_exts and
                    node == list(mtree.leaves())[-2]):
                self.promote_subtitle(node)

@@ -171,11 +164,7 @@ class GuessLanguage(Transformer):
            for sub_prefix in subtitle_prefixes:
                if (sub_prefix in find_words(group_str) and
                        0 <= group_str.find(sub_prefix) < (node.span[0] - explicit_group.span[0])):
-                    self.promote_subtitle(node)
-
-            for sub_suffix in subtitle_suffixes:
-                if (sub_suffix in find_words(group_str) and
-                        (node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)):
+                    prefixes.append((explicit_group, sub_prefix))
                    self.promote_subtitle(node)

            # - if a language is in an explicit group just preceded by "st",
@@ -187,3 +176,21 @@ class GuessLanguage(Transformer):
                    self.promote_subtitle(node)
            except IndexError:
                pass
+
+        for node in mtree.nodes():
+            if 'language' not in node.guess:
+                continue
+
+            explicit_group = mtree.node_at(node.node_idx[:2])
+            group_str = explicit_group.value.lower()
+
+            for sub_suffix in subtitle_suffixes:
+                if (sub_suffix in find_words(group_str) and
+                            (node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)):
+                    is_a_prefix = False
+                    for prefix in prefixes:
+                        if prefix[0] == explicit_group and group_str.find(prefix[1]) == group_str.find(sub_suffix):
+                            is_a_prefix = True
+                            break
+                    if not is_a_prefix:
+                        self.promote_subtitle(node)
@@ -23,6 +23,8 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import found_property
 from guessit import u
+from guessit.patterns.list import all_separators
+from guessit.language import all_lang_prefixes_suffixes


 class GuessMovieTitleFromPosition(Transformer):
@@ -36,6 +38,13 @@ class GuessMovieTitleFromPosition(Transformer):
        options = options or {}
        return not options.get('skip_title') and not mtree.guess.get('type', '').startswith('episode')

+    @staticmethod
+    def excluded_word(*values):
+        for value in values:
+            if value.clean_value.lower() in all_separators + all_lang_prefixes_suffixes:
+                return True
+        return False
+
    def process(self, mtree, options=None):
        """
        try to identify the remaining unknown groups by looking at their
@@ -44,14 +53,16 @@ class GuessMovieTitleFromPosition(Transformer):
        if 'title' in mtree.info:
            return

-        basename = mtree.node_at((-2,))
+        path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes()))
+
+        basename = path_nodes[-2]
        all_valid = lambda leaf: len(leaf.clean_value) > 0
        basename_leftover = list(basename.unidentified_leaves(valid=all_valid))

        try:
-            folder = mtree.node_at((-3,))
+            folder = path_nodes[-3]
            folder_leftover = list(folder.unidentified_leaves())
-        except ValueError:
+        except IndexError:
            folder = None
            folder_leftover = []

@@ -61,7 +72,9 @@ class GuessMovieTitleFromPosition(Transformer):
        # specific cases:
        # if we find the same group both in the folder name and the filename,
        # it's a good candidate for title
-        if folder_leftover and basename_leftover and folder_leftover[0].clean_value == basename_leftover[0].clean_value:
+        if (folder_leftover and basename_leftover and
+                        folder_leftover[0].clean_value == basename_leftover[0].clean_value and
+                        not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0])):
            found_property(folder_leftover[0], 'title', confidence=0.8)
            return

@@ -89,7 +102,8 @@ class GuessMovieTitleFromPosition(Transformer):
                if (series.clean_value != title.clean_value and
                            series.clean_value != film_number.clean_value and
                            basename_leaves.index(film_number) == 0 and
-                            basename_leaves.index(title) == 1):
+                            basename_leaves.index(title) == 1 and
+                            not GuessMovieTitleFromPosition.excluded_word(title, series)):

                    found_property(title, 'title', confidence=0.6)
                    found_property(series, 'filmSeries', confidence=0.6)
@@ -103,8 +117,9 @@ class GuessMovieTitleFromPosition(Transformer):
                if groups_before:
                    try:
                        node = next(groups_before)
-                        found_property(node, 'title', confidence=0.8)
-                        return
+                        if not GuessMovieTitleFromPosition.excluded_word(node):
+                            found_property(node, 'title', confidence=0.8)
+                            return
                    except StopIteration:
                        pass

@@ -125,8 +140,10 @@ class GuessMovieTitleFromPosition(Transformer):
                # if they're all in the same group, take leftover info from there
                leftover = mtree.node_at((group_idx,)).unidentified_leaves()
                try:
-                    found_property(next(leftover), 'title', confidence=0.7)
-                    return
+                    node = next(leftover)
+                    if not GuessMovieTitleFromPosition.excluded_word(node):
+                        found_property(node, 'title', confidence=0.7)
+                        return
                except StopIteration:
                    pass

@@ -138,7 +155,8 @@ class GuessMovieTitleFromPosition(Transformer):
            # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
            # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi  <-- TODO: gets caught here?
            if (basename_leftover[0].clean_value.count(' ') == 0 and
-                    folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2):
+                    folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2 and
+                    not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0])):

                found_property(folder_leftover[0], 'title', confidence=0.7)
                return
@@ -148,26 +166,28 @@ class GuessMovieTitleFromPosition(Transformer):
            # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
            if basename_leftover[0].is_explicit():
                for basename_leftover_elt in basename_leftover:
-                    if not basename_leftover_elt.is_explicit():
+                    if not basename_leftover_elt.is_explicit() and not GuessMovieTitleFromPosition.excluded_word(basename_leftover_elt):
                        found_property(basename_leftover_elt, 'title', confidence=0.8)
                        return

            # if all else fails, take the first remaining unidentified group in the
            # basename as title
-            found_property(basename_leftover[0], 'title', confidence=0.6)
-            return
+            if not GuessMovieTitleFromPosition.excluded_word(basename_leftover[0]):
+                found_property(basename_leftover[0], 'title', confidence=0.6)
+                return

        # if there are no leftover groups in the basename, look in the folder name
-        if folder_leftover:
+        if folder_leftover and not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0]):
            found_property(folder_leftover[0], 'title', confidence=0.5)
            return

        # if nothing worked, look if we have a very small group at the beginning
        # of the basename
-        basename = mtree.node_at((-2,))
        basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
        try:
-            found_property(next(basename_leftover), 'title', confidence=0.4)
-            return
+            node = next(basename_leftover)
+            if not GuessMovieTitleFromPosition.excluded_word(node):
+                found_property(node, 'title', confidence=0.4)
+                return
        except StopIteration:
            pass
@@ -22,7 +22,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera

 import re

-from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer, ChainedValidator, DefaultValidator, OnlyOneValidator, LeftValidator, NeighborValidator
+from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer, ChainedValidator, DefaultValidator, OnlyOneValidator, LeftValidator, NeighborValidator, FullMatchValidator
 from guessit.patterns import sep, build_or_pattern
 from guessit.patterns.extension import subtitle_exts, video_exts, info_exts
 from guessit.patterns.numeral import numeral, parse_numeral
@@ -61,7 +61,6 @@ class GuessProperties(Transformer):
            for canonical_form, quality in quality_dict.items():
                self.qualities.register_quality(propname, canonical_form, quality)

-        register_property('container', {'mp4': ['MP4']})

        # http://en.wikipedia.org/wiki/Pirated_movie_release_types
        register_property('format', {'VHS': ['VHS', 'VHS-Rip'],
@@ -74,11 +73,11 @@ class GuessProperties(Transformer):
                                     'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
                                     'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
                                     'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'],
-                                     'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'],
+                                     'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP', 'HD-RIP'],
                                     'VOD': ['VOD', 'VOD-Rip'],
                                     'WEBRip': ['WEB-Rip'],
                                     'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'],
-                                     'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'],
+                                     'HD-DVD': ['HD-DVD-Rip', 'HD-DVD'],
                                     'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
                                     })

@@ -112,32 +111,13 @@ class GuessProperties(Transformer):
                                         },
                          validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()))

-        class ResolutionValidator(object):
-            """Make sure our match is surrounded by separators, or by another entry"""
-            @staticmethod
-            def validate(prop, string, node, match, entry_start, entry_end):
-                """
-                span = _get_span(prop, match)
-                span = _trim_span(span, string[span[0]:span[1]])
-                start, end = span
-
-                sep_start = start <= 0 or string[start - 1] in sep
-                sep_end = end >= len(string) or string[end] in sep
-                start_by_other = start in entry_end
-                end_by_other = end in entry_start
-                if (sep_start or start_by_other) and (sep_end or end_by_other):
-                    return True
-                return False
-                """
-                return True
-
        _digits_re = re.compile('\d+')

        def resolution_formatter(value):
            digits = _digits_re.findall(value)
            return 'x'.join(digits)

-        self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator()))
+        self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter)

        register_quality('screenSize', {'360p': -300,
                                        '368p': -200,
@@ -239,8 +219,8 @@ class GuessProperties(Transformer):

        self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False)

-        weak_episode_words = ['pt', 'part']
-        self.container.register_property(None, '(' + build_or_pattern(weak_episode_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)
+        part_words = ['pt', 'part']
+        self.container.register_property(None, '(' + build_or_pattern(part_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)

        register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
                                    'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
@@ -249,13 +229,15 @@ class GuessProperties(Transformer):
                                    'Netflix': ['Netflix', 'NF']
                                    })

-        self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=NeighborValidator())
+        self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
        self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper')
-        self.container.register_property('other', 'Fansub', canonical_form='Fansub')
-        self.container.register_property('other', 'Fastsub', canonical_form='Fastsub')
+        self.container.register_property('other', 'Fansub', canonical_form='Fansub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
+        self.container.register_property('other', 'Fastsub', canonical_form='Fastsub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
        self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete')
        self.container.register_property('other', 'R5', 'RC', canonical_form='R5')
        self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair')
+        self.container.register_property('other', 'CC')  # Close Caption
+        self.container.register_property('other', 'LD', 'MD')  # Line/Mic Dubbed

        self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ',
                                                     'DDC',
@@ -271,10 +253,29 @@ class GuessProperties(Transformer):

    def guess_properties(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
-        return self.container.as_guess(found, string)
+        guess = self.container.as_guess(found, string)
+
+        if guess and node:
+            if 'part' in guess:
+                # If two guesses contains both part in same group, create an partList
+                for existing_guess in node.group_node().guesses:
+                    if 'part' in existing_guess:
+                        if 'partList' not in existing_guess:
+                            existing_guess['partList'] = [existing_guess['part']]
+                        existing_guess['partList'].append(guess['part'])
+                        existing_guess['partList'].sort()
+                        if existing_guess['part'] > guess['part']:
+                            existing_guess.set_confidence('part', 0)
+                        else:
+                            guess.set_confidence('part', 0)
+                        guess['partList'] = list(existing_guess['partList'])
+
+        return guess

    def supported_properties(self):
-        return self.container.get_supported_properties()
+        supported_properties = list(self.container.get_supported_properties())
+        supported_properties.append('partList')
+        return supported_properties

    def process(self, mtree, options=None):
        GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
@@ -93,8 +93,12 @@ class GuessReleaseGroup(Transformer):
                return False
            if self.re_sep.match(val[-1]):
                val = val[:len(val)-1]
+            if not val:
+                return False
            if self.re_sep.match(val[0]):
                val = val[1:]
+            if not val:
+                return False
            guess['releaseGroup'] = val
            forbidden = False
            for forbidden_lambda in self._forbidden_groupname_lambda:
@@ -21,6 +21,7 @@
 from __future__ import absolute_import, division, print_function, unicode_literals

 import re
+from guessit.patterns.list import list_parser, all_separators_re

 from guessit.plugins.transformers import Transformer

@@ -38,11 +39,14 @@ class GuessWeakEpisodesRexps(Transformer):
        of_separators = ['of', 'sur', '/', '\\']
        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)

-        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
+        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False, remove_duplicates=True)

        episode_words = ['episodes?']

-        def _formater(episode_number):
+        def episode_list_parser(value):
+            return list_parser(value, 'episodeList')
+
+        def season_episode_parser(episode_number):
            epnum = parse_numeral(episode_number)
            if not valid_year(epnum):
                if epnum > 100:
@@ -55,24 +59,46 @@ class GuessWeakEpisodesRexps(Transformer):
                else:
                    return epnum

-        self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater, disabler=lambda options: options.get('episode_prefer_number') if options else False)
-        self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=_formater)
-        self.container.register_property('episodeNumber', '[^0-9](\d{1,3})', confidence=0.6, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
+        self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=season_episode_parser, disabler=lambda options: options.get('episode_prefer_number') if options else False)
+        self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=season_episode_parser)
        self.container.register_property(None, '(' + build_or_pattern(episode_words) + sep + '?(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.4, formatter=parse_numeral)
        self.container.register_property(None, r'(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +')', confidence=0.6, formatter=parse_numeral)
-        self.container.register_property('episodeNumber', r'^' + sep + '?(\d{1,3})' + sep, confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
-        self.container.register_property('episodeNumber', sep + r'(\d{1,3})' + sep + '?$', confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
+        self.container.register_property('episodeNumber', '[^0-9](\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)', confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
+        self.container.register_property('episodeNumber', r'^' + sep + '?(\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)' + sep, confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
+        self.container.register_property('episodeNumber', sep + r'(\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)' + sep + '?$', confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True)

    def supported_properties(self):
        return self.container.get_supported_properties()

    def guess_weak_episodes_rexps(self, string, node=None, options=None):
-        if node and 'episodeNumber' in node.root.info:
-            return None
-
        properties = self.container.find_properties(string, node, options)
        guess = self.container.as_guess(properties, string)

+        if node and guess:
+            if 'episodeNumber' in guess and 'season' in guess:
+                existing_guesses = list(filter(lambda x: 'season' in x and 'episodeNumber' in x, node.group_node().guesses))
+                if existing_guesses:
+                    return None
+            elif 'episodeNumber' in guess:
+                # If we only have episodeNumber in the guess, and another node contains both season and episodeNumber
+                # keep only the second.
+                safe_guesses = list(filter(lambda x: 'season' in x and 'episodeNumber' in x, node.group_node().guesses))
+                if safe_guesses:
+                    return None
+                else:
+                    # If we have other nodes containing episodeNumber, create an episodeList.
+                    existing_guesses = list(filter(lambda x: 'season' not in x and 'episodeNumber' in x, node.group_node().guesses))
+                    for existing_guess in existing_guesses:
+                        if 'episodeList' not in existing_guess:
+                            existing_guess['episodeList'] = [existing_guess['episodeNumber']]
+                        existing_guess['episodeList'].append(guess['episodeNumber'])
+                        existing_guess['episodeList'].sort()
+                        if existing_guess['episodeNumber'] > guess['episodeNumber']:
+                            existing_guess.set_confidence('episodeNumber', 0)
+                        else:
+                            guess.set_confidence('episodeNumber', 0)
+                        guess['episodeList'] = list(existing_guess['episodeList'])
+
        return guess

    def should_process(self, mtree, options=None):
@@ -42,8 +42,13 @@ class GuessYear(Transformer):

    def second_pass_options(self, mtree, options=None):
        year_nodes = list(mtree.leaves_containing('year'))
-        if len(year_nodes) > 1:
-            return {'skip_nodes': year_nodes[:len(year_nodes) - 1]}
+        # if we found a year, let's try by ignoring all instances of that year
+        # as a candidate, let's take the one that appears last in the filename
+        if year_nodes:
+            year_candidate = year_nodes[-1].guess['year']
+            year_nodes = [year for year in year_nodes if year.guess['year'] != year_candidate]
+            if year_nodes:
+                return {'skip_nodes': year_nodes}
        return None

    def process(self, mtree, options=None):
@@ -37,7 +37,7 @@ class SplitExplicitGroups(Transformer):
        :return: return the string split into explicit groups, that is, those either
        between parenthese, square brackets or curly braces, and those separated
        by a dash."""
-        for c in mtree.children:
+        for c in mtree.unidentified_leaves():
            groups = find_first_level_groups(c.value, group_delimiters[0])
            for delimiters in group_delimiters:
                flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
@@ -47,4 +47,24 @@ class SplitExplicitGroups(Transformer):
            # patterns, such as dates, etc...
            # groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])

-            c.split_on_components(groups)
+            c.split_on_components(groups, category='explicit')
+
+    def post_process(self, mtree, options=None):
+        """
+        Decrease confidence for properties found in explicit groups.
+
+        :param mtree:
+        :param options:
+        :return:
+        """
+        if not options.get('name_only'):
+            explicit_nodes = [node for node in mtree.nodes() if node.category == 'explicit' and node.is_explicit()]
+
+            for explicit_node in explicit_nodes:
+                self.alter_confidence(explicit_node, 0.5)
+
+    def alter_confidence(self, node, factor):
+        for guess in node.guesses:
+            for k in guess.keys():
+                confidence = guess.confidence(k)
+                guess.set_confidence(k, confidence * factor)
@@ -45,4 +45,4 @@ class SplitOnDash(Transformer):
                match = pattern.search(node.value, span[1])

            if indices:
-                node.partition(indices)
+                node.partition(indices, category='dash')
@@ -41,6 +41,32 @@ class SplitPathComponents(Transformer):
            components += list(splitext(basename))
            components[-1] = components[-1][1:]  # remove the '.' from the extension

-            mtree.split_on_components(components)
+            mtree.split_on_components(components, category='path')
        else:
-            mtree.split_on_components([mtree.value, ''])
+            mtree.split_on_components([mtree.value, ''], category='path')
+
+    def post_process(self, mtree, options=None):
+        """
+        Decrease confidence for properties found in directories, filename should always have priority.
+
+        :param mtree:
+        :param options:
+        :return:
+        """
+        if not options.get('name_only'):
+            path_nodes = [node for node in mtree.nodes() if node.category == 'path']
+
+            for path_node in path_nodes[:-2]:
+                self.alter_confidence(path_node, 0.3)
+
+            try:
+                last_directory_node = path_nodes[-2]
+                self.alter_confidence(last_directory_node, 0.6)
+            except IndexError:
+                pass
+
+    def alter_confidence(self, node, factor):
+        for guess in node.guesses:
+            for k in guess.keys():
+                confidence = guess.confidence(k)
+                guess.set_confidence(k, confidence * factor)
@@ -249,9 +249,9 @@ def search_external_subtitles(path):
    subtitles = {}
    for p in os.listdir(dirpath):
        # skip badly encoded filenames
-        #if isinstance(p, bytes):  # pragma: no cover
-        #    logger.error('Skipping badly encoded filename %r in %r', p.decode('utf-8', errors='replace'), dirpath)
-        #    continue
+        if isinstance(p, bytes):  # pragma: no cover
+            logger.error('Skipping badly encoded filename %r in %r', p.decode('utf-8', errors='replace'), dirpath)
+            continue

        # keep only valid subtitle filenames
        if not p.startswith(fileroot) or not p.endswith(SUBTITLE_EXTENSIONS):
@@ -2,6 +2,7 @@

 from .patch_provider_pool import PatchedProviderPool
 from .patch_providers import PatchedAddic7edProvider
+from .patch_video import patched_search_external_subtitles
 import subliminal
 import babelfish

@@ -14,3 +15,6 @@ subliminal.providers.addic7ed.Addic7edProvider = PatchedAddic7edProvider
 # add language converters
 babelfish.language_converters.register('addic7ed = subliminal_patch.patch_language:PatchedAddic7edConverter')
 babelfish.language_converters.register('tvsubtitles = subliminal.converters.tvsubtitles:TVsubtitlesConverter')
+
+# patch subliminal's external subtitles search algorithm
+subliminal.video.search_external_subtitles = patched_search_external_subtitles
@@ -2,7 +2,7 @@

 import logging
 from random import randint
-from subliminal.providers.addic7ed import Addic7edProvider
+from subliminal.providers.addic7ed import Addic7edProvider, Addic7edSubtitle, ParserBeautifulSoup, series_year_re, Language

 logger = logging.getLogger(__name__)

@@ -22,3 +22,50 @@ class PatchedAddic7edProvider(Addic7edProvider):
        	'User-Agent': AGENT_LIST[randint(0, len(AGENT_LIST)-1)],
        	'Referer': self.server_url,
    	    }
+    
+    def query(self, series, season, year=None, country=None):
+        # get the show id
+        show_id = self.get_show_id(series, year, country)
+        if show_id is None:
+            logger.error('No show id found for %r (%r)', series, {'year': year, 'country': country})
+            return []
+
+        # get the page of the season of the show
+        logger.info('Getting the page of show id %d, season %d', show_id, season)
+        r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
+        r.raise_for_status()
+        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
+
+        # loop over subtitle rows
+        header = soup.select('#header font')
+        if header:
+            match = series_year_re.match(header[0].text.strip()[:-10])
+            series = match.group('series')
+            year = int(match.group('year')) if match.group('year') else None
+
+        subtitles = []
+        for row in soup.select('tr.epeven'):
+            cells = row('td')
+
+            # ignore incomplete subtitles
+            status = cells[5].text
+            if status != 'Completed':
+                logger.debug('Ignoring subtitle with status %s', status)
+                continue
+
+            # read the item
+            language = Language.fromaddic7ed(cells[3].text)
+            hearing_impaired = bool(cells[6].text)
+            page_link = self.server_url + cells[2].a['href'][1:]
+            season = int(cells[0].text)
+            episode = int(cells[1].text)
+            title = cells[2].text
+            version = cells[4].text
+            download_link = cells[9].a['href'][1:]
+
+            subtitle = Addic7edSubtitle(language, hearing_impaired, page_link, series, season, episode, title, year,
+                                        version, download_link)
+            logger.debug('Found subtitle %r', subtitle)
+            subtitles.append(subtitle)
+
+        return subtitles
@@ -0,0 +1,61 @@
+# coding=utf-8
+
+import os
+import logging
+from subliminal.video import SUBTITLE_EXTENSIONS, Language
+
+logger = logging.getLogger(__name__)
+
+# may be absolute or relative paths; set to selected options
+CUSTOM_PATHS = []
+
+def _search_external_subtitles(path):
+    dirpath, filename = os.path.split(path)
+    dirpath = dirpath or '.'
+    fileroot, fileext = os.path.splitext(filename)
+    subtitles = {}
+    for p in os.listdir(dirpath):
+        # keep only valid subtitle filenames
+        if not p.startswith(fileroot) or not p.endswith(SUBTITLE_EXTENSIONS):
+            continue
+
+        # extract the potential language code
+        language_code = p[len(fileroot):-len(os.path.splitext(p)[1])].replace(fileext, '').replace('_', '-')[1:]
+
+        # default language is undefined
+        language = Language('und')
+
+        # attempt to parse
+        if language_code:
+            try:
+                language = Language.fromietf(language_code)
+            except ValueError:
+                logger.error('Cannot parse language code %r', language_code)
+
+        subtitles[p] = language
+
+    logger.debug('Found subtitles %r', subtitles)
+
+    return subtitles    
+
+def patched_search_external_subtitles(path):
+    """
+    wrap original search_external_subtitles function to search multiple paths for one given video
+    # todo: cleanup and merge with _search_external_subtitles
+    """
+    video_path, video_filename = os.path.split(path)
+    subtitles = {}
+    for folder_or_subfolder in [video_path] + CUSTOM_PATHS:
+	# folder_or_subfolder may be a relative path or an absolute one
+	try:
+	    abspath = unicode(os.path.abspath(os.path.join(*[video_path if not os.path.isabs(folder_or_subfolder) else "", folder_or_subfolder, video_filename])))
+	except Exception, e:
+	    logger.error("skipping path %s because of %s", repr(folder_or_subfolder), e)
+	    continue
+	logger.debug("external subs: scanning path %s", abspath)
+
+	if os.path.isdir(os.path.dirname(abspath)):
+	    subtitles.update(_search_external_subtitles(abspath))
+    logger.debug("external subs: found %s", subtitles)
+    return subtitles
+
@@ -1,16 +1,20 @@
 pannal's fork:

- ~~increased score of addic7ed subtitles a bit~~ (not existing currently)
- **support for newest Subliminal (1.0.1) and guessit (0.10.1)**
- **plugin now also works with com.plexapp.agents.thetvdbdvdorder**
- guessit's release-group detection bug fixed (*not the correct way, though. has already been fixed in guessit itself, need to merge*)
- providers fixed for subliminal 1.0.1 (at least addic7ed)
- support for addic7ed languages: French (Canadian)
- support for additional languages: pt-br (Portuguese (Brasil)), fa (Persian (Farsi))
- support for three (two optional) subtitle languages
+#### beta5
+- fix storing subtitles besides the actual video file, not subfolder (fixes #14)
+- "custom folder" setting now always used if given (properly overrides "subtitle folder" setting)
+- also scan (custom) given subtitle folders for existing subtitles instead of redownloading them on every refresh (fixes #9, #2)

-bugs:
- skip existing subtitles (not in video's path - e.g. subFolder given) currently broken
+beta4
+- ~~increased score of addic7ed subtitles a bit~~ (not existing currently)
+- **support for newest Subliminal ([1.0.1](27a6e51cd36ffb2910cd9a7add6d797a2c6469b7)) and guessit ([0.11.0](2814f57e8999dcc31575619f076c0c1a63ce78f2))**
+- **plugin now also [works with com.plexapp.agents.thetvdbdvdorder](924470d2c0db3a71529278bce4b7247eaf2f85b8)**
+- providers fixed for subliminal 1.0.1 ([at least addic7ed](131504e7eed8b3400c457fbe49beea3b115bc916))
+- providers [don't simply fail and get excluded on non-detected language](1a779020792e0201ad689eefbf5a126155e89c97)
+- support for addic7ed languages: [French (Canadian)](b11a051c233fd72033f0c3b5a8c1965260e7e19f)
+- support for additional languages: [pt-br (Portuguese (Brasil)), fa (Persian (Farsi))](131504e7eed8b3400c457fbe49beea3b115bc916)
+- support for [three (two optional) subtitle languages](e543c927cf49c264eaece36640c99d67a99c7da2)
+- optionally use [random user agent for addic7ed provider](83ace14faf75fbd75313f0ceda9b78161895fbcf) (should not be needed)

 Subliminal.bundle
 =================
Author	SHA1	Message	Date
pannal	4da63a8fd7	Update README.md	2015-09-23 14:40:42 +02:00
panni	fa27789608	fixed typo	2015-09-23 14:31:55 +02:00
panni	f9e9f35157	Merge branch 'deep_scan_subs' Conflicts: Contents/Code/__init__.py	2015-09-23 14:29:21 +02:00
panni	4a6604f0ab	custom folder now takes precedence; also scan subfolders for existing subtitles if configured; update custom folder settings description; remove direct subliminal.video patch and move it to subliminal_patch.patch_video	2015-09-23 14:26:21 +02:00
panni	971d1221da	don't die on missing header; maybe fixes #13	2015-09-23 13:36:18 +02:00
panni	ba69885477	fix saving subs to video folder without custom_path given; should fix #14	2015-09-23 12:46:07 +02:00
panni	8e23098037	add basic functionality to scan custom (sub-) folders for subtitles	2015-09-19 04:35:48 +02:00
pannal	8da7bf029c	Update README.md	2015-09-18 03:48:34 +02:00
pannal	e16e58cbfa	Update README.md	2015-09-18 03:29:34 +02:00
pannal	abb7cd3bfa	Update README.md	2015-09-18 03:19:04 +02:00
pannal	bfa06f3989	Update README.md	2015-09-18 03:16:37 +02:00
pannal	c63529939d	Merge pull request #11 from pannal/guessit-0.11.0 update guessit to 0.11.0	2015-09-18 03:16:20 +02:00
panni	2814f57e89	update guessit to 0.11.0	2015-09-18 03:14:21 +02:00
panni	70476883c6	Merge branch 'master' of github.com:pannal/Subliminal.bundle	2015-09-18 03:11:20 +02:00
panni	b5ed209453	Revert "update guessit to 0.11.0" This reverts commit `be7687f15d`.	2015-09-18 03:10:58 +02:00
panni	be7687f15d	update guessit to 0.11.0	2015-09-18 03:08:55 +02:00
pannal	b7fb8e1e76	Update README.md	2015-09-18 02:56:40 +02:00