Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4da63a8fd7 | |||
| fa27789608 | |||
| f9e9f35157 | |||
| 4a6604f0ab | |||
| 971d1221da | |||
| ba69885477 | |||
| 8e23098037 | |||
| 8da7bf029c | |||
| e16e58cbfa | |||
| abb7cd3bfa | |||
| bfa06f3989 | |||
| c63529939d | |||
| 2814f57e89 | |||
| 70476883c6 | |||
| b5ed209453 | |||
| be7687f15d | |||
| b7fb8e1e76 |
@@ -19,6 +19,8 @@ def Start():
|
||||
# configured cache to be in memory as per https://github.com/Diaoul/subliminal/issues/303
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
|
||||
|
||||
|
||||
def ValidatePrefs():
|
||||
Log.Debug("Validate Prefs called.")
|
||||
return
|
||||
@@ -33,6 +35,18 @@ def getLangList():
|
||||
|
||||
return langList
|
||||
|
||||
def getSubtitleDestinationFolder():
|
||||
if not Prefs["subtitles.save.filesystem"]:
|
||||
return
|
||||
|
||||
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if bool(Prefs["subtitles.save.subFolder.Custom"]) else None
|
||||
return fld_custom or (Prefs["subtitles.save.subFolder"] if Prefs["subtitles.save.subFolder"] != "current folder" else None)
|
||||
|
||||
def initSubliminalPatches():
|
||||
# configure custom subtitle destination folders for scanning pre-existing subs
|
||||
dest_folder = getSubtitleDestinationFolder()
|
||||
subliminal_patch.patch_video.CUSTOM_PATHS = [dest_folder] if dest_folder else []
|
||||
|
||||
def getProviders():
|
||||
providers = {'opensubtitles' : Prefs['provider.opensubtitles.enabled'],
|
||||
'thesubdb' : Prefs['provider.thesubdb.enabled'],
|
||||
@@ -48,6 +62,7 @@ def getProviderSettings():
|
||||
'use_random_agents': Prefs['provider.addic7ed.use_random_agents'],
|
||||
},
|
||||
}
|
||||
|
||||
return provider_settings
|
||||
|
||||
def scanTvMedia(media):
|
||||
@@ -93,11 +108,15 @@ def saveSubtitles(videos, subtitles):
|
||||
Log.Debug("Saving subtitles as metadata")
|
||||
saveSubtitlesToMetadata(videos, subtitles)
|
||||
|
||||
|
||||
|
||||
def saveSubtitlesToFile(subtitles):
|
||||
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if bool(Prefs["subtitles.save.subFolder.Custom"]) else None
|
||||
if Prefs["subtitles.save.subFolder"] != "current folder" or fld_custom:
|
||||
# specific subFolder requested, create it if it doesn't exist
|
||||
for video, video_subtitles in subtitles.items():
|
||||
|
||||
for video, video_subtitles in subtitles.items():
|
||||
fld = None
|
||||
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
# specific subFolder requested, create it if it doesn't exist
|
||||
fld_base = os.path.split(video.name)[0]
|
||||
if fld_custom:
|
||||
if fld_custom.startswith("/"):
|
||||
@@ -109,10 +128,7 @@ def saveSubtitlesToFile(subtitles):
|
||||
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
|
||||
if not os.path.exists(fld):
|
||||
os.makedirs(fld)
|
||||
subliminal.api.save_subtitles(video, video_subtitles, directory=fld)
|
||||
|
||||
else:
|
||||
subliminal.api.save_subtitles(subtitles)
|
||||
subliminal.api.save_subtitles(video, video_subtitles, directory=fld)
|
||||
|
||||
def saveSubtitlesToMetadata(videos, subtitles):
|
||||
for video, video_subtitles in subtitles.items():
|
||||
@@ -132,6 +148,7 @@ class SubliminalSubtitlesAgentMovies(Agent.Movies):
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
Log.Debug("MOVIE UPDATE CALLED")
|
||||
initSubliminalPatches()
|
||||
videos = scanMovieMedia(media)
|
||||
subtitles = downloadBestSubtitles(videos.keys())
|
||||
saveSubtitles(videos, subtitles)
|
||||
@@ -149,6 +166,7 @@ class SubliminalSubtitlesAgentTvShows(Agent.TV_Shows):
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
Log.Debug("TvUpdate. Lang %s" % lang)
|
||||
initSubliminalPatches()
|
||||
videos = scanTvMedia(media)
|
||||
subtitles = downloadBestSubtitles(videos.keys())
|
||||
saveSubtitles(videos, subtitles)
|
||||
|
||||
@@ -110,7 +110,7 @@
|
||||
},
|
||||
{
|
||||
"id": "subtitles.save.subFolder.Custom",
|
||||
"label": "Custom Subtitle folder (computes to real paths; use for example \"bla\" as a subfolder of the current media file folder - can use real paths aswell)",
|
||||
"label": "Custom Subtitle folder (overrides \"Subtitle Folder\"; computes to real paths; use for example \"bla\" as a subfolder of the current media file folder or an absolute path)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
|
||||
@@ -89,10 +89,14 @@ from guessit.guess import Guess, smart_merge
|
||||
from guessit.language import Language
|
||||
from guessit.matcher import IterativeMatcher
|
||||
from guessit.textutils import clean_default, is_camel, from_camel
|
||||
from copy import deepcopy
|
||||
import babelfish
|
||||
import os.path
|
||||
import logging
|
||||
from copy import deepcopy
|
||||
from guessit.options import get_opts
|
||||
import shlex
|
||||
# Needed for guessit.plugins.transformers.reload() to be called.
|
||||
from guessit.plugins import transformers
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -117,7 +121,7 @@ def _build_filename_mtree(filename, options=None, **kwargs):
|
||||
mtree = IterativeMatcher(filename, options=options, **kwargs)
|
||||
second_pass_options = mtree.second_pass_options
|
||||
if second_pass_options:
|
||||
log.debug("Running 2nd pass")
|
||||
log.debug('Running 2nd pass with options: %s' % second_pass_options)
|
||||
merged_options = dict(options)
|
||||
merged_options.update(second_pass_options)
|
||||
mtree = IterativeMatcher(filename, options=merged_options, **kwargs)
|
||||
@@ -271,8 +275,16 @@ def guess_file_info(filename, info=None, options=None, **kwargs):
|
||||
"""
|
||||
info = info or 'filename'
|
||||
options = options or {}
|
||||
|
||||
if isinstance(options, base_text_type):
|
||||
args = shlex.split(options)
|
||||
options = vars(get_opts().parse_args(args))
|
||||
if default_options:
|
||||
merged_options = deepcopy(default_options)
|
||||
if isinstance(default_options, base_text_type):
|
||||
default_args = shlex.split(default_options)
|
||||
merged_options = vars(get_opts().parse_args(default_args))
|
||||
else:
|
||||
merged_options = deepcopy(default_options)
|
||||
merged_options.update(options)
|
||||
options = merged_options
|
||||
|
||||
|
||||
@@ -181,16 +181,16 @@ def submit_bug(filename, options):
|
||||
opts = dict((k, v) for k, v in options.__dict__.items()
|
||||
if v and k != 'submit_bug')
|
||||
|
||||
r = requests.post('http://localhost:5000/bugs', {'filename': filename,
|
||||
r = requests.post('http://guessit.io/bugs', {'filename': filename,
|
||||
'version': __version__,
|
||||
'options': str(opts)})
|
||||
if r.status_code == 200:
|
||||
print('Successfully submitted file: %s' % r.text)
|
||||
else:
|
||||
print('Could not submit bug at the moment, please try again later.')
|
||||
print('Could not submit bug at the moment, please try again later: %s %s' % (r.status_code, r.reason))
|
||||
|
||||
except RequestException as e:
|
||||
print('Could not submit bug at the moment, please try again later.')
|
||||
print('Could not submit bug at the moment, please try again later: %s' % e)
|
||||
|
||||
|
||||
def main(args=None, setup_logging=True):
|
||||
|
||||
@@ -17,4 +17,4 @@
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
__version__ = '0.10.4.dev0'
|
||||
__version__ = '0.11.0'
|
||||
|
||||
@@ -135,8 +135,14 @@ class SameKeyValidator(object):
|
||||
self.validator_function = validator_function
|
||||
|
||||
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||
path_nodes = [path_node for path_node in node.ancestors if path_node.category == 'path']
|
||||
if path_nodes:
|
||||
path_node = path_nodes[0]
|
||||
else:
|
||||
path_node = node.root
|
||||
|
||||
for key in prop.keys:
|
||||
for same_value_leaf in node.root.leaves_containing(key):
|
||||
for same_value_leaf in path_node.leaves_containing(key):
|
||||
ret = self.validator_function(same_value_leaf, key, prop, string, node, match, entry_start, entry_end)
|
||||
if ret is not None:
|
||||
return ret
|
||||
@@ -144,6 +150,9 @@ class SameKeyValidator(object):
|
||||
|
||||
|
||||
class OnlyOneValidator(SameKeyValidator):
|
||||
"""
|
||||
Check that there's only one occurence of key for current directory
|
||||
"""
|
||||
def __init__(self):
|
||||
super(OnlyOneValidator, self).__init__(lambda same_value_leaf, key, prop, string, node, match, entry_start, entry_end: False)
|
||||
|
||||
@@ -153,12 +162,16 @@ class DefaultValidator(object):
|
||||
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||
span = _get_span(prop, match)
|
||||
span = _trim_span(span, string[span[0]:span[1]])
|
||||
return DefaultValidator.validate_string(string, span, entry_start, entry_end)
|
||||
|
||||
@staticmethod
|
||||
def validate_string(string, span, entry_start=None, entry_end=None):
|
||||
start, end = span
|
||||
|
||||
sep_start = start <= 0 or string[start - 1] in sep
|
||||
sep_end = end >= len(string) or string[end] in sep
|
||||
start_by_other = start in entry_end
|
||||
end_by_other = end in entry_start
|
||||
start_by_other = start in entry_end if entry_end else False
|
||||
end_by_other = end in entry_start if entry_start else False
|
||||
if (sep_start or start_by_other) and (sep_end or end_by_other):
|
||||
return True
|
||||
return False
|
||||
@@ -235,6 +248,13 @@ class NeighborValidator(DefaultValidator):
|
||||
|
||||
return False
|
||||
|
||||
class FullMatchValidator(DefaultValidator):
|
||||
"""Make sure the node match fully"""
|
||||
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||
at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end)
|
||||
|
||||
return at_start and at_end
|
||||
|
||||
|
||||
class LeavesValidator(DefaultValidator):
|
||||
def __init__(self, lambdas=None, previous_lambdas=None, next_lambdas=None, both_side=False, default_=True):
|
||||
@@ -290,7 +310,7 @@ class LeavesValidator(DefaultValidator):
|
||||
|
||||
class _Property:
|
||||
"""Represents a property configuration."""
|
||||
def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None, disabler=None, confidence_lambda=None):
|
||||
def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None, disabler=None, confidence_lambda=None, remove_duplicates=False):
|
||||
"""
|
||||
:param keys: Keys of the property (format, screenSize, ...)
|
||||
:type keys: string
|
||||
@@ -309,6 +329,8 @@ class _Property:
|
||||
:type validator: :class:`DefaultValidator`
|
||||
:param formatter: Formater to use
|
||||
:type formatter: function
|
||||
:param remove_duplicates: Keep only the last match if multiple values are found
|
||||
:type remove_duplicates: bool
|
||||
"""
|
||||
if isinstance(keys, list):
|
||||
self.keys = keys
|
||||
@@ -335,6 +357,7 @@ class _Property:
|
||||
self.validator = validator
|
||||
self.formatter = formatter
|
||||
self.disabler = disabler
|
||||
self.remove_duplicates = remove_duplicates
|
||||
|
||||
def disabled(self, options):
|
||||
if self.disabler:
|
||||
@@ -479,7 +502,8 @@ class PropertiesContainer(object):
|
||||
entries.append((prop, match))
|
||||
else:
|
||||
matches = list(prop.compiled.finditer(string))
|
||||
duplicate_matches[prop] = matches
|
||||
if prop.remove_duplicates:
|
||||
duplicate_matches[prop] = matches
|
||||
for match in matches:
|
||||
entries.append((prop, match))
|
||||
|
||||
@@ -490,6 +514,9 @@ class PropertiesContainer(object):
|
||||
if computed_confidence is not None:
|
||||
prop.confidence = computed_confidence
|
||||
|
||||
entries.sort(key=lambda entry: -entry[0].confidence)
|
||||
# sort entries, from most confident to less confident
|
||||
|
||||
if validate:
|
||||
# compute entries start and ends
|
||||
for prop, match in entries:
|
||||
@@ -531,7 +558,7 @@ class PropertiesContainer(object):
|
||||
del entry_end[end]
|
||||
|
||||
for prop, prop_duplicate_matches in duplicate_matches.items():
|
||||
# Keeping the last valid match.
|
||||
# Keeping the last valid match only.
|
||||
# Needed for the.100.109.hdtv-lol.mp4
|
||||
for duplicate_match in prop_duplicate_matches[:-1]:
|
||||
entries.remove((prop, duplicate_match))
|
||||
@@ -561,8 +588,8 @@ class PropertiesContainer(object):
|
||||
for prop, match in key_entries:
|
||||
start, end = _get_span(prop, match)
|
||||
if not best_prop or \
|
||||
best_prop.confidence < best_prop.confidence or \
|
||||
best_prop.confidence == best_prop.confidence and \
|
||||
best_prop.confidence < prop.confidence or \
|
||||
best_prop.confidence == prop.confidence and \
|
||||
best_match.span()[1] - best_match.span()[0] < match.span()[1] - match.span()[0]:
|
||||
best_prop, best_match = prop, match
|
||||
|
||||
|
||||
@@ -287,10 +287,10 @@ def choose_int(g1, g2):
|
||||
if v1 == v2:
|
||||
return v1, 1 - (1 - c1) * (1 - c2)
|
||||
else:
|
||||
if c1 > c2:
|
||||
return v1, c1 - c2
|
||||
if c1 >= c2:
|
||||
return v1, c1 - c2 / 2
|
||||
else:
|
||||
return v2, c2 - c1
|
||||
return v2, c2 - c1 / 2
|
||||
|
||||
|
||||
def choose_string(g1, g2):
|
||||
@@ -308,7 +308,7 @@ def choose_string(g1, g2):
|
||||
prepended to it.
|
||||
|
||||
>>> s(choose_string(('Hello', 0.75), ('World', 0.5)))
|
||||
('Hello', 0.25)
|
||||
('Hello', 0.5)
|
||||
|
||||
>>> s(choose_string(('Hello', 0.5), ('hello', 0.5)))
|
||||
('Hello', 0.75)
|
||||
@@ -354,10 +354,10 @@ def choose_string(g1, g2):
|
||||
|
||||
# in case of conflict, return the one with highest confidence
|
||||
else:
|
||||
if c1 > c2:
|
||||
return v1, c1 - c2
|
||||
if c1 >= c2:
|
||||
return v1, c1 - c2 / 2
|
||||
else:
|
||||
return v2, c2 - c1
|
||||
return v2, c2 - c1 / 2
|
||||
|
||||
|
||||
def _merge_similar_guesses_nocheck(guesses, prop, choose):
|
||||
@@ -474,8 +474,8 @@ def merge_all(guesses, append=None):
|
||||
|
||||
# delete very unlikely values
|
||||
for p in list(result.keys()):
|
||||
if result.confidence(p) < 0.05:
|
||||
del result[p]
|
||||
if result.confidence(p) < 0.05:
|
||||
del result[p]
|
||||
|
||||
# make sure our appendable properties contain unique values
|
||||
for prop in append:
|
||||
@@ -509,7 +509,7 @@ def smart_merge(guesses):
|
||||
for string_part in ('title', 'series', 'container', 'format',
|
||||
'releaseGroup', 'website', 'audioCodec',
|
||||
'videoCodec', 'screenSize', 'episodeFormat',
|
||||
'audioChannels', 'idNumber'):
|
||||
'audioChannels', 'idNumber', 'container'):
|
||||
merge_similar_guesses(guesses, string_part, choose_string)
|
||||
|
||||
# 2- merge the rest, potentially discarding information not properly
|
||||
|
||||
@@ -173,8 +173,9 @@ LNG_COMMON_WORDS = frozenset([
|
||||
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
|
||||
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
|
||||
'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
|
||||
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb', 'bt',
|
||||
'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice', 'ay', 'at',
|
||||
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb',
|
||||
'bt', 'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice',
|
||||
'ay', 'at', 'star', 'so',
|
||||
# french words
|
||||
'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
|
||||
'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me',
|
||||
@@ -185,7 +186,7 @@ LNG_COMMON_WORDS = frozenset([
|
||||
'la', 'el', 'del', 'por', 'mar', 'al',
|
||||
# other
|
||||
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
|
||||
'vi', 'ben', 'da', 'lt', 'ch',
|
||||
'vi', 'ben', 'da', 'lt', 'ch', 'sr', 'ps', 'cx',
|
||||
# new from babelfish
|
||||
'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
|
||||
'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
|
||||
@@ -197,7 +198,7 @@ LNG_COMMON_WORDS = frozenset([
|
||||
'bs', # Bosnian
|
||||
'kz',
|
||||
# countries
|
||||
'gt', 'lt',
|
||||
'gt', 'lt', 'im',
|
||||
# part/pt
|
||||
'pt'
|
||||
])
|
||||
@@ -206,9 +207,11 @@ LNG_COMMON_WORDS_STRICT = frozenset(['brazil'])
|
||||
|
||||
|
||||
subtitle_prefixes = ['sub', 'subs', 'st', 'vost', 'subforced', 'fansub', 'hardsub']
|
||||
subtitle_suffixes = ['subforced', 'fansub', 'hardsub']
|
||||
subtitle_suffixes = ['subforced', 'fansub', 'hardsub', 'sub', 'subs']
|
||||
lang_prefixes = ['true']
|
||||
|
||||
all_lang_prefixes_suffixes = subtitle_prefixes + subtitle_suffixes + lang_prefixes
|
||||
|
||||
|
||||
def find_possible_languages(string, allowed_languages=None):
|
||||
"""Find possible languages in the string
|
||||
@@ -239,7 +242,7 @@ def find_possible_languages(string, allowed_languages=None):
|
||||
for prefix in lang_prefixes:
|
||||
if lang_word.startswith(prefix):
|
||||
lang_word = lang_word[len(prefix):]
|
||||
if lang_word not in common_words:
|
||||
if lang_word not in common_words and word.lower() not in common_words:
|
||||
try:
|
||||
lang = Language.fromguessit(lang_word)
|
||||
if allowed_languages:
|
||||
|
||||
@@ -215,94 +215,100 @@ def log_found_guess(guess, logger=None):
|
||||
(k, v, guess.raw(k), guess.confidence(k)))
|
||||
|
||||
|
||||
def _get_split_spans(node, span):
|
||||
partition_spans = node.get_partition_spans(span)
|
||||
for to_remove_span in partition_spans:
|
||||
if to_remove_span[0] == span[0] and to_remove_span[1] in [span[1], span[1] + 1]:
|
||||
partition_spans.remove(to_remove_span)
|
||||
break
|
||||
return partition_spans
|
||||
|
||||
|
||||
class GuessFinder(object):
|
||||
def __init__(self, guess_func, confidence=None, logger=None, options=None):
|
||||
self.guess_func = guess_func
|
||||
self.confidence = confidence
|
||||
self.logger = logger or log
|
||||
self.options = options
|
||||
self.options = options or {}
|
||||
|
||||
def process_nodes(self, nodes):
|
||||
for node in nodes:
|
||||
self.process_node(node)
|
||||
|
||||
def process_node(self, node, iterative=True, partial_span=None):
|
||||
def process_node(self, node, iterative=True, partial_span=None, skip_nodes=True):
|
||||
if skip_nodes and not isinstance(skip_nodes, list):
|
||||
skip_nodes = self.options.get('skip_nodes')
|
||||
elif not isinstance(skip_nodes, list):
|
||||
skip_nodes = []
|
||||
|
||||
if partial_span:
|
||||
value = node.value[partial_span[0]:partial_span[1]]
|
||||
else:
|
||||
value = node.value
|
||||
string = ' %s ' % value # add sentinels
|
||||
|
||||
if not self.options:
|
||||
matcher_result = self.guess_func(string, node)
|
||||
matcher_result = self.guess_func(string, node, self.options)
|
||||
if not matcher_result:
|
||||
return
|
||||
|
||||
if not isinstance(matcher_result, Guess):
|
||||
result, span = matcher_result
|
||||
else:
|
||||
matcher_result = self.guess_func(string, node, self.options)
|
||||
result, span = matcher_result, matcher_result.metadata().span
|
||||
#log.error('span2 %s' % (span,))
|
||||
|
||||
if matcher_result:
|
||||
if not isinstance(matcher_result, Guess):
|
||||
result, span = matcher_result
|
||||
else:
|
||||
result, span = matcher_result, matcher_result.metadata().span
|
||||
if not result:
|
||||
return
|
||||
|
||||
if result:
|
||||
# readjust span to compensate for sentinels
|
||||
span = (span[0] - 1, span[1] - 1)
|
||||
if span[1] == len(string):
|
||||
# somehow, the sentinel got included in the span. Remove it
|
||||
span = (span[0], span[1] - 1)
|
||||
|
||||
# readjust span to compensate for partial_span
|
||||
if partial_span:
|
||||
span = (span[0] + partial_span[0], span[1] + partial_span[0])
|
||||
# readjust span to compensate for sentinels
|
||||
span = (span[0] - 1, span[1] - 1)
|
||||
|
||||
partition_spans = None
|
||||
if self.options and 'skip_nodes' in self.options:
|
||||
skip_nodes = self.options.get('skip_nodes')
|
||||
for skip_node in skip_nodes:
|
||||
if skip_node.parent.node_idx == node.node_idx[:len(skip_node.parent.node_idx)] and\
|
||||
skip_node.span == span or\
|
||||
skip_node.span == (span[0] + skip_node.offset, span[1] + skip_node.offset):
|
||||
if partition_spans is None:
|
||||
partition_spans = _get_split_spans(node, skip_node.span)
|
||||
else:
|
||||
new_partition_spans = []
|
||||
for partition_span in partition_spans:
|
||||
tmp_node = MatchTree(value, span=partition_span, parent=node)
|
||||
tmp_partitions_spans = _get_split_spans(tmp_node, skip_node.span)
|
||||
new_partition_spans.extend(tmp_partitions_spans)
|
||||
partition_spans.extend(new_partition_spans)
|
||||
# readjust span to compensate for partial_span
|
||||
if partial_span:
|
||||
span = (span[0] + partial_span[0], span[1] + partial_span[0])
|
||||
|
||||
if not partition_spans:
|
||||
# restore sentinels compensation
|
||||
if skip_nodes:
|
||||
skip_nodes = [skip_node for skip_node in self.options.get('skip_nodes') if skip_node.parent.span[0] == node.span[0] or skip_node.parent.span[1] == node.span[1]]
|
||||
# if we guessed a node that we need to skip, recurse down the tree and ignore that node
|
||||
indices = set()
|
||||
skip_nodes_spans = []
|
||||
next_skip_nodes = []
|
||||
for skip_node in skip_nodes:
|
||||
skip_for_next = False
|
||||
skip_nodes_spans.append(skip_node.span)
|
||||
if node.offset <= skip_node.span[0] <= node.span[1]:
|
||||
indices.add(skip_node.span[0] - node.offset)
|
||||
skip_for_next = True
|
||||
if node.offset <= skip_node.span[1] <= node.span[1]:
|
||||
indices.add(skip_node.span[1] - node.offset)
|
||||
skip_for_next = True
|
||||
if not skip_for_next:
|
||||
next_skip_nodes.append(skip_node)
|
||||
if indices:
|
||||
partition_spans = [s for s in node.get_partition_spans(indices) if s not in skip_nodes_spans]
|
||||
for partition_span in partition_spans:
|
||||
relative_span = (partition_span[0] - node.offset, partition_span[1] - node.offset)
|
||||
self.process_node(node, partial_span=relative_span, skip_nodes=next_skip_nodes)
|
||||
return
|
||||
|
||||
if isinstance(result, Guess):
|
||||
guess = result
|
||||
else:
|
||||
guess = Guess(result, confidence=self.confidence, input=string, span=span)
|
||||
# restore sentinels compensation
|
||||
if isinstance(result, Guess):
|
||||
guess = result
|
||||
else:
|
||||
no_sentinel_string =string[1:-1]
|
||||
guess = Guess(result, confidence=self.confidence, input=no_sentinel_string, span=span)
|
||||
|
||||
if not iterative:
|
||||
found_guess(node, guess, logger=self.logger)
|
||||
else:
|
||||
absolute_span = (span[0] + node.offset, span[1] + node.offset)
|
||||
node.partition(span)
|
||||
found_child = None
|
||||
|
||||
for child in node.children:
|
||||
if child.span == absolute_span:
|
||||
# if we have a match on one of our children, mark it as such...
|
||||
found_guess(child, guess, logger=self.logger)
|
||||
found_child = child
|
||||
break
|
||||
|
||||
# ...and only then recurse on the other children
|
||||
for child in node.children:
|
||||
if child is not found_child:
|
||||
self.process_node(child)
|
||||
|
||||
if not iterative:
|
||||
found_guess(node, guess, logger=self.logger)
|
||||
else:
|
||||
absolute_span = (span[0] + node.offset, span[1] + node.offset)
|
||||
node.partition(span)
|
||||
if node.is_leaf():
|
||||
found_guess(node, guess, logger=self.logger)
|
||||
else:
|
||||
found_child = None
|
||||
for child in node.children:
|
||||
if child.span == absolute_span:
|
||||
found_guess(child, guess, logger=self.logger)
|
||||
found_child = child
|
||||
break
|
||||
for child in node.children:
|
||||
if child is not found_child:
|
||||
self.process_node(child)
|
||||
else:
|
||||
for partition_span in partition_spans:
|
||||
self.process_node(node, partial_span=partition_span)
|
||||
|
||||
@@ -27,9 +27,7 @@ import guessit # @UnusedImport needed for doctests
|
||||
from guessit import UnicodeMixin, base_text_type
|
||||
from guessit.textutils import clean_default, str_fill
|
||||
from guessit.patterns import group_delimiters
|
||||
from guessit.guess import (smart_merge,
|
||||
Guess)
|
||||
|
||||
from guessit.guess import smart_merge, Guess
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -75,7 +73,7 @@ class BaseMatchTree(UnicodeMixin):
|
||||
(as shown by the ``f``'s on the last-but-one line).
|
||||
"""
|
||||
|
||||
def __init__(self, string='', span=None, parent=None, clean_function=None):
|
||||
def __init__(self, string='', span=None, parent=None, clean_function=None, category=None):
|
||||
self.string = string
|
||||
self.span = span or (0, len(string))
|
||||
self.parent = parent
|
||||
@@ -83,6 +81,7 @@ class BaseMatchTree(UnicodeMixin):
|
||||
self.guess = Guess()
|
||||
self._clean_value = None
|
||||
self._clean_function = clean_function or clean_default
|
||||
self.category = category
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
@@ -116,6 +115,32 @@ class BaseMatchTree(UnicodeMixin):
|
||||
|
||||
return result
|
||||
|
||||
@property
|
||||
def raw(self):
|
||||
result = {}
|
||||
for guess in self.guesses:
|
||||
for k in guess.keys():
|
||||
result[k] = guess.raw(k)
|
||||
return result
|
||||
|
||||
@property
|
||||
def guesses(self):
|
||||
"""
|
||||
List all guesses, including children ones.
|
||||
|
||||
:return: list of guesses objects
|
||||
"""
|
||||
|
||||
result = []
|
||||
|
||||
if self.guess:
|
||||
result.append(self.guess)
|
||||
|
||||
for c in self.children:
|
||||
result.extend(c.guesses)
|
||||
|
||||
return result
|
||||
|
||||
@property
|
||||
def root(self):
|
||||
"""Return the root node of the tree."""
|
||||
@@ -124,6 +149,23 @@ class BaseMatchTree(UnicodeMixin):
|
||||
|
||||
return self.parent.root
|
||||
|
||||
@property
|
||||
def ancestors(self):
|
||||
"""
|
||||
Retrieve all ancestors, from this node to root node.
|
||||
|
||||
:return: a list of MatchTree objects
|
||||
"""
|
||||
ret = [self]
|
||||
|
||||
if not self.parent:
|
||||
return ret
|
||||
|
||||
parent_ancestors = self.parent.ancestors
|
||||
ret.extend(parent_ancestors)
|
||||
|
||||
return ret
|
||||
|
||||
@property
|
||||
def depth(self):
|
||||
"""Return the depth of this node."""
|
||||
@@ -136,17 +178,30 @@ class BaseMatchTree(UnicodeMixin):
|
||||
"""Return whether this node is a leaf or not."""
|
||||
return self.children == []
|
||||
|
||||
def add_child(self, span):
|
||||
"""Add a new child node to this node with the given span."""
|
||||
child = MatchTree(self.string, span=span, parent=self, clean_function=self._clean_function)
|
||||
def add_child(self, span, category=None):
|
||||
"""Add a new child node to this node with the given span.
|
||||
|
||||
:param span: span of the new MatchTree
|
||||
:param category: category of the new MatchTree
|
||||
:return: A new MatchTree instance having self as a parent
|
||||
"""
|
||||
child = MatchTree(self.string, span=span, parent=self, clean_function=self._clean_function, category=category)
|
||||
self.children.append(child)
|
||||
return child
|
||||
|
||||
def get_partition_spans(self, indices):
|
||||
"""Return the list of absolute spans for the regions of the original
|
||||
string defined by splitting this node at the given indices (relative
|
||||
to this node)"""
|
||||
to this node)
|
||||
|
||||
:param indices: indices of the partition spans
|
||||
:return: a list of tuple of the spans
|
||||
"""
|
||||
indices = sorted(indices)
|
||||
if indices[-1] > len(self.value):
|
||||
log.error('Filename: {}'.format(self.string))
|
||||
log.error('Invalid call to get_partitions_spans, indices are too high: {}, len({}) == {:d}'
|
||||
.format(indices, self.value, len(self.value)))
|
||||
if indices[0] != 0:
|
||||
indices.insert(0, 0)
|
||||
if indices[-1] != len(self.value):
|
||||
@@ -155,23 +210,33 @@ class BaseMatchTree(UnicodeMixin):
|
||||
spans = []
|
||||
for start, end in zip(indices[:-1], indices[1:]):
|
||||
spans.append((self.offset + start,
|
||||
self.offset + end))
|
||||
self.offset + end))
|
||||
|
||||
return spans
|
||||
|
||||
def partition(self, indices):
|
||||
def partition(self, indices, category=None):
|
||||
"""Partition this node by splitting it at the given indices,
|
||||
relative to this node."""
|
||||
for partition_span in self.get_partition_spans(indices):
|
||||
self.add_child(span=partition_span)
|
||||
relative to this node.
|
||||
|
||||
def split_on_components(self, components):
|
||||
:param indices: indices of the partition spans
|
||||
:param category: category of the new MatchTree
|
||||
:return: a list of created MatchTree instances
|
||||
"""
|
||||
created = []
|
||||
for partition_span in self.get_partition_spans(indices):
|
||||
created.append(self.add_child(span=partition_span, category=category))
|
||||
return created
|
||||
|
||||
def split_on_components(self, components, category=None):
|
||||
offset = 0
|
||||
created = []
|
||||
for c in components:
|
||||
start = self.value.find(c, offset)
|
||||
end = start + len(c)
|
||||
self.add_child(span=(self.offset + start,
|
||||
self.offset + end))
|
||||
created.append(self.add_child(span=(self.offset + start,
|
||||
self.offset + end), category=category))
|
||||
offset = end
|
||||
return created
|
||||
|
||||
def nodes_at_depth(self, depth):
|
||||
"""Return all the nodes at a given depth in the tree"""
|
||||
@@ -208,7 +273,7 @@ class BaseMatchTree(UnicodeMixin):
|
||||
raise ValueError('Non-existent node index: %s' % (idx,))
|
||||
|
||||
def nodes(self):
|
||||
"""Return all the nodes and subnodes in this tree."""
|
||||
"""Return a generator of all nodes and subnodes in this tree."""
|
||||
yield self
|
||||
for child in self.children:
|
||||
for node in child.nodes():
|
||||
@@ -220,7 +285,6 @@ class BaseMatchTree(UnicodeMixin):
|
||||
yield self
|
||||
else:
|
||||
for child in self.children:
|
||||
# pylint: disable=W0212
|
||||
for leaf in child.leaves():
|
||||
yield leaf
|
||||
|
||||
|
||||
@@ -29,4 +29,4 @@ info_exts = ['nfo']
|
||||
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
|
||||
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
|
||||
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
|
||||
'iso']
|
||||
'iso', 'vob']
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
import re
|
||||
from guessit.patterns import sep, build_or_pattern
|
||||
from guessit.patterns.numeral import parse_numeral
|
||||
|
||||
range_separators = ['-', 'to', 'a']
|
||||
discrete_separators = ['&', 'and', 'et']
|
||||
excluded_separators = ['.'] # Dot cannot serve as a discrete_separator
|
||||
|
||||
discrete_sep = sep
|
||||
for range_separator in range_separators:
|
||||
discrete_sep = discrete_sep.replace(range_separator, '')
|
||||
for excluded_separator in excluded_separators:
|
||||
discrete_sep = discrete_sep.replace(excluded_separator, '')
|
||||
discrete_separators.append(discrete_sep)
|
||||
all_separators = list(range_separators)
|
||||
all_separators.extend(discrete_separators)
|
||||
|
||||
range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE)
|
||||
discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE)
|
||||
all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE)
|
||||
|
||||
|
||||
def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False):
|
||||
discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value))
|
||||
discrete_elements = [x.strip() for x in discrete_elements]
|
||||
|
||||
proper_discrete_elements = []
|
||||
i = 0
|
||||
while i < len(discrete_elements):
|
||||
if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]):
|
||||
proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2])
|
||||
i += 3
|
||||
else:
|
||||
match = range_separators_re.search(discrete_elements[i])
|
||||
if match and match.start() == 0:
|
||||
proper_discrete_elements[i - 1] += discrete_elements[i]
|
||||
elif match and match.end() == len(discrete_elements[i]):
|
||||
proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1])
|
||||
else:
|
||||
proper_discrete_elements.append(discrete_elements[i])
|
||||
i += 1
|
||||
|
||||
discrete_elements = proper_discrete_elements
|
||||
|
||||
ret = []
|
||||
|
||||
for discrete_element in discrete_elements:
|
||||
range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element))
|
||||
range_values = [x.strip() for x in range_values]
|
||||
if len(range_values) > 1:
|
||||
for x in range(0, len(range_values) - 1):
|
||||
start_range_ep = parse_numeral(range_values[x])
|
||||
end_range_ep = parse_numeral(range_values[x+1])
|
||||
for range_ep in range(start_range_ep, end_range_ep + 1):
|
||||
if range_ep not in ret:
|
||||
ret.append(range_ep)
|
||||
else:
|
||||
discrete_value = parse_numeral(discrete_element)
|
||||
if discrete_value not in ret:
|
||||
ret.append(discrete_value)
|
||||
|
||||
if len(ret) > 1:
|
||||
if not allow_discrete:
|
||||
valid_ret = list()
|
||||
# replace discrete elements by ranges
|
||||
valid_ret.append(ret[0])
|
||||
for i in range(0, len(ret) - 1):
|
||||
previous = valid_ret[len(valid_ret) - 1]
|
||||
if ret[i+1] < previous:
|
||||
pass
|
||||
else:
|
||||
valid_ret.append(ret[i+1])
|
||||
ret = valid_ret
|
||||
if fill_gaps:
|
||||
ret = list(range(min(ret), max(ret) + 1))
|
||||
if len(ret) > 1:
|
||||
return {None: ret[0], property_list_name: ret}
|
||||
if len(ret) > 0:
|
||||
return ret[0]
|
||||
return None
|
||||
@@ -19,11 +19,14 @@
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from functools import wraps
|
||||
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
GREEN_FONT = "\x1B[0;32m"
|
||||
YELLOW_FONT = "\x1B[0;33m"
|
||||
BLUE_FONT = "\x1B[0;34m"
|
||||
@@ -87,3 +90,27 @@ def setup_logging(colored=True, with_time=False, with_thread=False, filename=Non
|
||||
ch.setFormatter(SimpleFormatter(with_time, with_thread))
|
||||
|
||||
logging.getLogger().addHandler(ch)
|
||||
|
||||
|
||||
def trace_func_call(f):
|
||||
@wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
is_method = (f.__name__ != f.__qualname__) # method is still not bound, we need to get around it
|
||||
if is_method:
|
||||
no_self_args = args[1:]
|
||||
else:
|
||||
no_self_args = args
|
||||
|
||||
args_str = ', '.join(repr(arg) for arg in no_self_args)
|
||||
kwargs_str = ', '.join('{}={}'.format(k, v) for k, v in kwargs.items())
|
||||
if not args_str:
|
||||
args_str = kwargs_str
|
||||
elif not kwargs_str:
|
||||
args_str = args_str
|
||||
else:
|
||||
args_str = '{}, {}'.format(args_str, kwargs_str)
|
||||
|
||||
log.debug('Calling {}({})'.format(f.__name__, args_str))
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
@@ -525,3 +525,29 @@
|
||||
screenSize: 720p
|
||||
season: 5
|
||||
series: Game of Thrones
|
||||
|
||||
? Parks and Recreation - [04x12] - Ad Campaign.avi
|
||||
: type: episode
|
||||
series: Parks and Recreation
|
||||
season: 4
|
||||
episodeNumber: 12
|
||||
title: Ad Campaign
|
||||
|
||||
? Star Trek Into Darkness (2013)/star.trek.into.darkness.2013.720p.web-dl.h264-publichd.mkv
|
||||
: type: movie
|
||||
title: Star Trek Into Darkness
|
||||
year: 2013
|
||||
screenSize: 720p
|
||||
format: WEB-DL
|
||||
videoCodec: h264
|
||||
releaseGroup: PublicHD
|
||||
|
||||
? /var/medias/series/The Originals/Season 02/The.Originals.S02E15.720p.HDTV.X264-DIMENSION.mkv
|
||||
: type: episode
|
||||
series: The Originals
|
||||
season: 2
|
||||
episodeNumber: 15
|
||||
screenSize: 720p
|
||||
format: HDTV
|
||||
videoCodec: h264
|
||||
releaseGroup: DIMENSION
|
||||
|
||||
@@ -282,12 +282,6 @@
|
||||
episodeNumber: 1
|
||||
title: The Impossible Astronaut
|
||||
|
||||
? Parks and Recreation - [04x12] - Ad Campaign.avi
|
||||
: series: Parks and Recreation
|
||||
season: 4
|
||||
episodeNumber: 12
|
||||
title: Ad Campaign
|
||||
|
||||
? The Sopranos - [05x07] - In Camelot.mp4
|
||||
: series: The Sopranos
|
||||
season: 5
|
||||
@@ -635,7 +629,7 @@
|
||||
format: HDTV
|
||||
releaseGroup: lol
|
||||
|
||||
? 03-Criminal.Minds.5x03.Reckoner.ENG.-.sub.FR.HDTV.XviD-STi.[tvu.org.ru].avi
|
||||
? Criminal.Minds.5x03.Reckoner.ENG.-.sub.FR.HDTV.XviD-STi.[tvu.org.ru].avi
|
||||
: series: Criminal Minds
|
||||
language: English
|
||||
subtitleLanguage: French
|
||||
@@ -1186,3 +1180,684 @@
|
||||
videoCodec: h264
|
||||
releaseGroup: BS
|
||||
format: WEB-DL
|
||||
|
||||
? How to Make It in America - S02E06 - I'm Sorry, Who's Yosi?.mkv
|
||||
: series: How to Make It in America
|
||||
season: 2
|
||||
episodeNumber: 6
|
||||
title: I'm Sorry, Who's Yosi?
|
||||
|
||||
? 24.S05E07.FRENCH.DVDRip.XviD-FiXi0N.avi
|
||||
: episodeNumber: 7
|
||||
format: DVD
|
||||
language: fr
|
||||
season: 5
|
||||
series: '24'
|
||||
videoCodec: XviD
|
||||
releaseGroup: FiXi0N
|
||||
|
||||
? 12.Monkeys.S01E12.FRENCH.BDRip.x264-VENUE.mkv
|
||||
: episodeNumber: 12
|
||||
format: BluRay
|
||||
language: fr
|
||||
releaseGroup: VENUE
|
||||
season: 1
|
||||
series: 12 Monkeys
|
||||
videoCodec: h264
|
||||
|
||||
? The.Daily.Show.2015.07.01.Kirsten.Gillibrand.Extended.720p.CC.WEBRip.AAC2.0.x264-BTW.mkv
|
||||
: audioChannels: '2.0'
|
||||
audioCodec: AAC
|
||||
date: 2015-07-01
|
||||
format: WEBRip
|
||||
other: CC
|
||||
releaseGroup: BTW
|
||||
screenSize: 720p
|
||||
series: The Daily Show
|
||||
title: Kirsten Gillibrand Extended
|
||||
videoCodec: h264
|
||||
|
||||
? The.Daily.Show.2015.07.02.Sarah.Vowell.CC.WEBRip.AAC2.0.x264-BTW.mkv
|
||||
: audioChannels: '2.0'
|
||||
audioCodec: AAC
|
||||
date: 2015-07-02
|
||||
format: WEBRip
|
||||
other: CC
|
||||
releaseGroup: BTW
|
||||
series: The Daily Show
|
||||
title: Sarah Vowell
|
||||
videoCodec: h264
|
||||
|
||||
? 90.Day.Fiance.S02E07.I.Have.To.Tell.You.Something.720p.HDTV.x264-W4F
|
||||
: options: -n
|
||||
episodeNumber: 7
|
||||
format: HDTV
|
||||
screenSize: 720p
|
||||
season: 2
|
||||
series: 90 Day Fiance
|
||||
title: I Have To Tell You Something
|
||||
|
||||
? Doctor.Who.2005.S04E06.FRENCH.LD.DVDRip.XviD-TRACKS.avi
|
||||
: episodeNumber: 6
|
||||
format: DVD
|
||||
language: fr
|
||||
releaseGroup: TRACKS
|
||||
season: 4
|
||||
series: Doctor Who
|
||||
other: LD
|
||||
videoCodec: XviD
|
||||
year: 2005
|
||||
|
||||
? Astro.Le.Petit.Robot.S01E01+02.FRENCH.DVDRiP.X264.INT-BOOLZ.mkv
|
||||
: episodeNumber: 1
|
||||
episodeList: [1, 2]
|
||||
format: DVD
|
||||
language: fr
|
||||
releaseGroup: INT-BOOLZ
|
||||
season: 1
|
||||
series: Astro Le Petit Robot
|
||||
videoCodec: h264
|
||||
|
||||
? Annika.Bengtzon.2012.E01.Le.Testament.De.Nobel.FRENCH.DVDRiP.XViD-STVFRV.avi
|
||||
: episodeNumber: 1
|
||||
format: DVD
|
||||
language: fr
|
||||
releaseGroup: STVFRV
|
||||
series: Annika Bengtzon
|
||||
title: Le Testament De Nobel
|
||||
videoCodec: XviD
|
||||
year: 2012
|
||||
|
||||
? Dead.Set.02.FRENCH.LD.DVDRip.XviD-EPZ.avi
|
||||
: episodeNumber: 2
|
||||
format: DVD
|
||||
language: fr
|
||||
other: LD
|
||||
releaseGroup: EPZ
|
||||
series: Dead Set
|
||||
videoCodec: XviD
|
||||
|
||||
? Phineas and Ferb S01E00 & S01E01 & S01E02
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 0
|
||||
- 1
|
||||
- 2
|
||||
episodeNumber: 0
|
||||
season: 1
|
||||
series: Phineas and Ferb
|
||||
|
||||
? Show.Name.S01E02.S01E03.HDTV.XViD.Etc-Group
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 2
|
||||
- 3
|
||||
episodeNumber: 2
|
||||
format: HDTV
|
||||
releaseGroup: Etc-Group
|
||||
season: 1
|
||||
series: Show Name
|
||||
videoCodec: XviD
|
||||
|
||||
? Show Name - S01E02 - S01E03 - S01E04 - Ep Name
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
episodeNumber: 2
|
||||
season: 1
|
||||
series: Show Name
|
||||
title: Ep Name
|
||||
|
||||
? Show.Name.1x02.1x03.HDTV.XViD.Etc-Group
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 2
|
||||
- 3
|
||||
episodeNumber: 2
|
||||
format: HDTV
|
||||
releaseGroup: Etc-Group
|
||||
season: 1
|
||||
series: Show Name
|
||||
videoCodec: XviD
|
||||
|
||||
? Show Name - 1x02 - 1x03 - 1x04 - Ep Name
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
episodeNumber: 2
|
||||
season: 1
|
||||
series: Show Name
|
||||
title: Ep Name
|
||||
|
||||
? Show.Name.S01E02.HDTV.XViD.Etc-Group
|
||||
: options: -n
|
||||
episodeNumber: 2
|
||||
format: HDTV
|
||||
releaseGroup: Etc-Group
|
||||
season: 1
|
||||
series: Show Name
|
||||
videoCodec: XviD
|
||||
|
||||
? Show Name - S01E02 - My Ep Name
|
||||
: options: -n
|
||||
episodeNumber: 2
|
||||
season: 1
|
||||
series: Show Name
|
||||
title: My Ep Name
|
||||
|
||||
? Show Name - S01.E03 - My Ep Name
|
||||
: options: -n
|
||||
episodeNumber: 3
|
||||
season: 1
|
||||
series: Show Name
|
||||
title: My Ep Name
|
||||
|
||||
? Show.Name.S01E02E03.HDTV.XViD.Etc-Group
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 2
|
||||
- 3
|
||||
episodeNumber: 2
|
||||
format: HDTV
|
||||
releaseGroup: Etc-Group
|
||||
season: 1
|
||||
series: Show Name
|
||||
videoCodec: XviD
|
||||
|
||||
? Show Name - S01E02-03 - My Ep Name
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 2
|
||||
- 3
|
||||
episodeNumber: 2
|
||||
season: 1
|
||||
series: Show Name
|
||||
title: My Ep Name
|
||||
|
||||
? Show.Name.S01.E02.E03
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 2
|
||||
- 3
|
||||
episodeNumber: 2
|
||||
season: 1
|
||||
series: Show Name
|
||||
|
||||
? Show_Name.1x02.HDTV_XViD_Etc-Group
|
||||
: options: -n
|
||||
episodeNumber: 2
|
||||
format: HDTV
|
||||
releaseGroup: Etc-Group
|
||||
season: 1
|
||||
series: Show Name
|
||||
videoCodec: XviD
|
||||
|
||||
? Show Name - 1x02 - My Ep Name
|
||||
: options: -n
|
||||
episodeNumber: 2
|
||||
season: 1
|
||||
series: Show Name
|
||||
title: My Ep Name
|
||||
|
||||
? Show_Name.1x02x03x04.HDTV_XViD_Etc-Group
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
episodeNumber: 2
|
||||
format: HDTV
|
||||
releaseGroup: Etc-Group
|
||||
season: 1
|
||||
series: Show Name
|
||||
videoCodec: XviD
|
||||
|
||||
? Show Name - 1x02-03-04 - My Ep Name
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
episodeNumber: 2
|
||||
season: 1
|
||||
series: Show Name
|
||||
title: My Ep Name
|
||||
|
||||
? Show.Name.100.Event.2010.11.23.HDTV.XViD.Etc-Group
|
||||
: options: -n
|
||||
date: 2010-11-23
|
||||
episodeNumber: 100
|
||||
format: HDTV
|
||||
releaseGroup: Etc-Group
|
||||
series: Show Name
|
||||
title: Event
|
||||
videoCodec: XviD
|
||||
|
||||
? Show.Name.2010.11.23.HDTV.XViD.Etc-Group
|
||||
: options: -n
|
||||
date: 2010-11-23
|
||||
format: HDTV
|
||||
releaseGroup: Etc-Group
|
||||
series: Show Name
|
||||
|
||||
? Show Name - 2010-11-23 - Ep Name
|
||||
: options: -n
|
||||
date: 2010-11-23
|
||||
series: Show Name
|
||||
title: Ep Name
|
||||
|
||||
? Show Name Season 1 Episode 2 Ep Name
|
||||
: options: -n
|
||||
episodeNumber: 2
|
||||
season: 1
|
||||
series: Show Name
|
||||
title: Ep Name
|
||||
|
||||
? Show.Name.S01.HDTV.XViD.Etc-Group
|
||||
: options: -n
|
||||
format: HDTV
|
||||
releaseGroup: Etc-Group
|
||||
season: 1
|
||||
series: Show Name
|
||||
videoCodec: XviD
|
||||
|
||||
? Show.Name.E02-03
|
||||
: options: -n
|
||||
episodeNumber: 2
|
||||
episodeList:
|
||||
- 2
|
||||
- 3
|
||||
series: Show Name
|
||||
|
||||
? Show.Name.E02.2010
|
||||
: options: -n
|
||||
episodeNumber: 2
|
||||
year: 2010
|
||||
series: Show Name
|
||||
|
||||
? Show.Name.E23.Test
|
||||
: options: -n
|
||||
episodeNumber: 23
|
||||
series: Show Name
|
||||
title: Test
|
||||
|
||||
? Show.Name.Part.3.HDTV.XViD.Etc-Group
|
||||
: options: -n -t episode
|
||||
part: 3
|
||||
series: Show Name
|
||||
format: HDTV
|
||||
videoCodec: XviD
|
||||
releaseGroup: Etc-Group
|
||||
|
||||
? Show.Name.Part.1.and.Part.2.Blah-Group
|
||||
: options: -n -t episode
|
||||
part: 1
|
||||
partList:
|
||||
- 1
|
||||
- 2
|
||||
series: Show Name
|
||||
|
||||
? Show Name - 01 - Ep Name
|
||||
: options: -n
|
||||
episodeNumber: 1
|
||||
series: Show Name
|
||||
title: Ep Name
|
||||
|
||||
? 01 - Ep Name
|
||||
: options: -n
|
||||
episodeNumber: 1
|
||||
series: Ep Name
|
||||
|
||||
? Show.Name.102.HDTV.XViD.Etc-Group
|
||||
: options: -n
|
||||
episodeNumber: 2
|
||||
format: HDTV
|
||||
releaseGroup: Etc-Group
|
||||
season: 1
|
||||
series: Show Name
|
||||
videoCodec: XviD
|
||||
|
||||
? '[HorribleSubs] Maria the Virgin Witch - 01 [720p].mkv'
|
||||
: episodeNumber: 1
|
||||
releaseGroup: HorribleSubs
|
||||
screenSize: 720p
|
||||
series: Maria the Virgin Witch
|
||||
|
||||
? '[ISLAND]One_Piece_679_[VOSTFR]_[V1]_[8bit]_[720p]_[EB7838FC].mp4'
|
||||
: options: -E
|
||||
crc32: EB7838FC
|
||||
episodeNumber: 679
|
||||
releaseGroup: ISLAND
|
||||
screenSize: 720p
|
||||
series: One Piece
|
||||
subtitleLanguage: fr
|
||||
videoProfile: 8bit
|
||||
version: 1
|
||||
|
||||
|
||||
? '[ISLAND]One_Piece_679_[VOSTFR]_[8bit]_[720p]_[EB7838FC].mp4'
|
||||
: options: -E
|
||||
crc32: EB7838FC
|
||||
episodeNumber: 679
|
||||
releaseGroup: ISLAND
|
||||
screenSize: 720p
|
||||
series: One Piece
|
||||
subtitleLanguage: fr
|
||||
videoProfile: 8bit
|
||||
|
||||
? '[Kaerizaki-Fansub]_One_Piece_679_[VOSTFR][HD_1280x720].mp4'
|
||||
: options: -E
|
||||
episodeNumber: 679
|
||||
other: HD
|
||||
releaseGroup: Kaerizaki-Fansub
|
||||
screenSize: 720p
|
||||
series: One Piece
|
||||
subtitleLanguage: fr
|
||||
|
||||
? '[Kaerizaki-Fansub]_One_Piece_679_[VOSTFR][FANSUB][HD_1280x720].mp4'
|
||||
: options: -E
|
||||
episodeNumber: 679
|
||||
other:
|
||||
- Fansub
|
||||
- HD
|
||||
releaseGroup: Kaerizaki-Fansub
|
||||
screenSize: 720p
|
||||
series: One Piece
|
||||
subtitleLanguage: fr
|
||||
|
||||
? '[Kaerizaki-Fansub]_One_Piece_681_[VOSTFR][HD_1280x720]_V2.mp4'
|
||||
: options: -E
|
||||
episodeNumber: 681
|
||||
other: HD
|
||||
releaseGroup: Kaerizaki-Fansub
|
||||
screenSize: 720p
|
||||
series: One Piece
|
||||
subtitleLanguage: fr
|
||||
version: 2
|
||||
|
||||
? '[Kaerizaki-Fansub] High School DxD New 04 VOSTFR HD (1280x720) V2.mp4'
|
||||
: options: -E
|
||||
episodeNumber: 4
|
||||
other: HD
|
||||
releaseGroup: Kaerizaki-Fansub
|
||||
screenSize: 720p
|
||||
series: High School DxD New
|
||||
subtitleLanguage: fr
|
||||
version: 2
|
||||
|
||||
? '[Kaerizaki-Fansub] One Piece 603 VOSTFR PS VITA (960x544) V2.mp4'
|
||||
: options: -E
|
||||
episodeNumber: 603
|
||||
releaseGroup: Kaerizaki-Fansub
|
||||
screenSize: 960x544
|
||||
series: One Piece
|
||||
subtitleLanguage: fr
|
||||
version: 2
|
||||
|
||||
? '[Group Name] Show Name.13'
|
||||
: options: -n
|
||||
episodeNumber: 13
|
||||
releaseGroup: Group Name
|
||||
series: Show Name
|
||||
|
||||
? '[Group Name] Show Name - 13'
|
||||
: options: -n
|
||||
episodeNumber: 13
|
||||
releaseGroup: Group Name
|
||||
series: Show Name
|
||||
|
||||
? '[Group Name] Show Name 13'
|
||||
: options: -n
|
||||
episodeNumber: 13
|
||||
releaseGroup: Group Name
|
||||
series: Show Name
|
||||
|
||||
# [Group Name] Show Name.13-14
|
||||
# [Group Name] Show Name - 13-14
|
||||
# Show Name 13-14
|
||||
|
||||
? '[Stratos-Subs]_Infinite_Stratos_-_12_(1280x720_H.264_AAC)_[379759DB]'
|
||||
: options: -n
|
||||
audioCodec: AAC
|
||||
crc32: 379759DB
|
||||
episodeNumber: 12
|
||||
releaseGroup: Stratos-Subs
|
||||
screenSize: 720p
|
||||
series: Infinite Stratos
|
||||
videoCodec: h264
|
||||
|
||||
# [ShinBunBu-Subs] Bleach - 02-03 (CX 1280x720 x264 AAC)
|
||||
|
||||
? '[SGKK] Bleach 312v1 [720p/MKV]'
|
||||
: options: -n
|
||||
episodeNumber: 312
|
||||
releaseGroup: SGKK
|
||||
screenSize: 720p
|
||||
series: Bleach
|
||||
version: 1
|
||||
|
||||
? '[Ayako]_Infinite_Stratos_-_IS_-_07_[H264][720p][EB7838FC]'
|
||||
: options: -n
|
||||
crc32: EB7838FC
|
||||
episodeNumber: 7
|
||||
releaseGroup: Ayako
|
||||
screenSize: 720p
|
||||
series: Infinite Stratos
|
||||
videoCodec: h264
|
||||
|
||||
? '[Ayako] Infinite Stratos - IS - 07v2 [H264][720p][44419534]'
|
||||
: options: -n
|
||||
crc32: '44419534'
|
||||
episodeNumber: 7
|
||||
releaseGroup: Ayako
|
||||
screenSize: 720p
|
||||
series: Infinite Stratos
|
||||
videoCodec: h264
|
||||
version: 2
|
||||
|
||||
? '[Ayako-Shikkaku] Oniichan no Koto Nanka Zenzen Suki Janain Dakara ne - 10 [LQ][h264][720p] [8853B21C]'
|
||||
: options: -n
|
||||
crc32: 8853B21C
|
||||
episodeNumber: 10
|
||||
releaseGroup: Ayako-Shikkaku
|
||||
screenSize: 720p
|
||||
series: Oniichan no Koto Nanka Zenzen Suki Janain Dakara ne
|
||||
videoCodec: h264
|
||||
|
||||
# Add support for absolute episodes
|
||||
? Bleach - s16e03-04 - 313-314
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 3
|
||||
- 4
|
||||
episodeNumber: 3
|
||||
season: 16
|
||||
series: Bleach
|
||||
|
||||
? Bleach.s16e03-04.313-314
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 3
|
||||
- 4
|
||||
episodeNumber: 3
|
||||
season: 16
|
||||
series: Bleach
|
||||
|
||||
? Bleach.s16e03-04.313-314
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 3
|
||||
- 4
|
||||
episodeNumber: 3
|
||||
season: 16
|
||||
series: Bleach
|
||||
|
||||
? Bleach - 313-314
|
||||
: options: -En
|
||||
episodeList:
|
||||
- 313
|
||||
- 314
|
||||
episodeNumber: 313
|
||||
series: Bleach
|
||||
|
||||
? Bleach - s16e03-04 - 313-314
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 3
|
||||
- 4
|
||||
episodeNumber: 3
|
||||
season: 16
|
||||
series: Bleach
|
||||
|
||||
? Bleach.s16e03-04.313-314
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 3
|
||||
- 4
|
||||
episodeNumber: 3
|
||||
season: 16
|
||||
series: Bleach
|
||||
|
||||
|
||||
? Bleach s16e03e04 313-314
|
||||
: options: -n
|
||||
episodeList:
|
||||
- 3
|
||||
- 4
|
||||
episodeNumber: 3
|
||||
season: 16
|
||||
series: Bleach
|
||||
|
||||
? '[ShinBunBu-Subs] Bleach - 02-03 (CX 1280x720 x264 AAC)'
|
||||
: audioCodec: AAC
|
||||
episodeList:
|
||||
- 2
|
||||
- 3
|
||||
episodeNumber: 2
|
||||
releaseGroup: ShinBunBu-Subs
|
||||
screenSize: 720p
|
||||
series: Bleach
|
||||
videoCodec: h264
|
||||
|
||||
? 003. Show Name - Ep Name.ext
|
||||
: episodeNumber: 3
|
||||
series: Show Name
|
||||
title: Ep Name
|
||||
|
||||
? 003-004. Show Name - Ep Name.ext
|
||||
: episodeList:
|
||||
- 3
|
||||
- 4
|
||||
episodeNumber: 3
|
||||
series: Show Name
|
||||
title: Ep Name
|
||||
|
||||
? One Piece - 102
|
||||
: options: -n -t episode
|
||||
episodeNumber: 2
|
||||
season: 1
|
||||
series: One Piece
|
||||
|
||||
? "[ACX]_Wolf's_Spirit_001.mkv"
|
||||
: episodeNumber: 1
|
||||
releaseGroup: ACX
|
||||
series: "Wolf's Spirit"
|
||||
|
||||
? Project.Runway.S14E00.and.S14E01.(Eng.Subs).SDTV.x264-[2Maverick].mp4
|
||||
: episodeList:
|
||||
- 0
|
||||
- 1
|
||||
episodeNumber: 0
|
||||
format: TV
|
||||
releaseGroup: 2Maverick
|
||||
season: 14
|
||||
series: Project Runway
|
||||
subtitleLanguage: en
|
||||
videoCodec: h264
|
||||
|
||||
? '[Hatsuyuki-Kaitou]_Fairy_Tail_2_-_16-20_[720p][10bit].torrent'
|
||||
: episodeList:
|
||||
- 16
|
||||
- 17
|
||||
- 18
|
||||
- 19
|
||||
- 20
|
||||
episodeNumber: 16
|
||||
releaseGroup: Hatsuyuki-Kaitou
|
||||
screenSize: 720p
|
||||
series: Fairy Tail 2
|
||||
videoProfile: 10bit
|
||||
|
||||
? '[Hatsuyuki-Kaitou]_Fairy_Tail_2_-_16-20_(191-195)_[720p][10bit].torrent'
|
||||
: options: -E
|
||||
episodeList:
|
||||
- 16
|
||||
- 17
|
||||
- 18
|
||||
- 19
|
||||
- 20
|
||||
episodeNumber: 16
|
||||
releaseGroup: Hatsuyuki-Kaitou
|
||||
screenSize: 720p
|
||||
series: Fairy Tail 2
|
||||
|
||||
? "Looney Tunes 1940x01 Porky's Last Stand.mkv"
|
||||
: episodeNumber: 1
|
||||
season: 1940
|
||||
series: Looney Tunes
|
||||
title: Porky's Last Stand
|
||||
year: 1940
|
||||
|
||||
? The.Good.Wife.S06E01.E10.720p.WEB-DL.DD5.1.H.264-CtrlHD/The.Good.Wife.S06E09.Trust.Issues.720p.WEB-DL.DD5.1.H.264-CtrlHD.mkv
|
||||
: audioChannels: '5.1'
|
||||
audioCodec: DolbyDigital
|
||||
episodeList:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
- 5
|
||||
- 6
|
||||
- 7
|
||||
- 8
|
||||
- 9
|
||||
- 10
|
||||
episodeNumber: 9
|
||||
format: WEB-DL
|
||||
releaseGroup: CtrlHD
|
||||
screenSize: 720p
|
||||
season: 6
|
||||
series: The Good Wife
|
||||
title: Trust Issues
|
||||
videoCodec: h264
|
||||
|
||||
? Fear the Walking Dead - 01x02 - So Close, Yet So Far.REPACK-KILLERS.French.C.updated.Addic7ed.com.mkv
|
||||
: episodeNumber: 2
|
||||
language: fr
|
||||
other: Proper
|
||||
properCount: 1
|
||||
season: 1
|
||||
series: Fear the Walking Dead
|
||||
title: So Close, Yet So Far
|
||||
|
||||
? Fear the Walking Dead - 01x02 - En Close, Yet En Far.REPACK-KILLERS.French.C.updated.Addic7ed.com.mkv
|
||||
: episodeNumber: 2
|
||||
language: fr
|
||||
other: Proper
|
||||
properCount: 1
|
||||
season: 1
|
||||
series: Fear the Walking Dead
|
||||
title: En Close, Yet En Far
|
||||
|
||||
? /av/unsorted/The.Daily.Show.2015.07.22.Jake.Gyllenhaal.720p.HDTV.x264-BATV.mkv
|
||||
: date: 2015-07-22
|
||||
format: HDTV
|
||||
releaseGroup: BATV
|
||||
screenSize: 720p
|
||||
series: The Daily Show
|
||||
title: Jake Gyllenhaal
|
||||
videoCodec: h264
|
||||
|
||||
@@ -22,7 +22,6 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
||||
|
||||
from collections import defaultdict
|
||||
from unittest import TestCase, TestLoader
|
||||
import shlex
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
@@ -86,10 +85,6 @@ class TestGuessit(TestCase):
|
||||
|
||||
options = required_fields.pop('options') if 'options' in required_fields else None
|
||||
|
||||
if options:
|
||||
args = shlex.split(options)
|
||||
options = get_opts().parse_args(args)
|
||||
options = vars(options)
|
||||
try:
|
||||
found = guess_func(filename, options)
|
||||
except Exception as e:
|
||||
|
||||
@@ -606,7 +606,9 @@
|
||||
? Yves.Saint.Laurent.2013.FRENCH.DVDSCR.MD.XviD-ViVARiUM.avi
|
||||
: format: DVD
|
||||
language: French
|
||||
other: Screener
|
||||
other:
|
||||
- MD
|
||||
- Screener
|
||||
releaseGroup: ViVARiUM
|
||||
title: Yves Saint Laurent
|
||||
videoCodec: XviD
|
||||
@@ -759,3 +761,19 @@
|
||||
screenSize: 1080p
|
||||
title: transformers 2
|
||||
videoCodec: h265
|
||||
|
||||
? 1.Angry.Man.1957.mkv
|
||||
: title: 1 Angry Man
|
||||
year: 1957
|
||||
|
||||
? 12.Angry.Men.1957.mkv
|
||||
: title: 12 Angry Men
|
||||
year: 1957
|
||||
|
||||
? 123.Angry.Men.1957.mkv
|
||||
: title: 123 Angry Men
|
||||
year: 1957
|
||||
|
||||
? "Looney Tunes 1444x866 Porky's Last Stand.mkv"
|
||||
: screenSize: 1444x866
|
||||
title: Looney Tunes
|
||||
|
||||
@@ -31,10 +31,12 @@ keywords = yaml.load("""
|
||||
? Xvid PROPER
|
||||
: videoCodec: Xvid
|
||||
other: PROPER
|
||||
properCount: 1
|
||||
|
||||
? PROPER-Xvid
|
||||
: videoCodec: Xvid
|
||||
other: PROPER
|
||||
properCount: 1
|
||||
|
||||
""")
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from guessit.containers import DefaultValidator
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
@@ -41,10 +42,9 @@ class GuessDate(Transformer):
|
||||
@staticmethod
|
||||
def guess_date(string, node=None, options=None):
|
||||
date, span = search_date(string, options.get('date_year_first') if options else False, options.get('date_day_first') if options else False)
|
||||
if date:
|
||||
if date and span and DefaultValidator.validate_string(string, span): # ensure we have a separator before and after date
|
||||
return {'date': date}, span
|
||||
else:
|
||||
return None, None
|
||||
return None, None
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_date, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
|
||||
@@ -24,6 +24,8 @@ from guessit.plugins.transformers import Transformer, get_transformer
|
||||
from guessit.textutils import reorder_title
|
||||
|
||||
from guessit.matcher import found_property
|
||||
from guessit.patterns.list import all_separators
|
||||
from guessit.language import all_lang_prefixes_suffixes
|
||||
|
||||
|
||||
class GuessEpisodeInfoFromPosition(Transformer):
|
||||
@@ -33,39 +35,49 @@ class GuessEpisodeInfoFromPosition(Transformer):
|
||||
def supported_properties(self):
|
||||
return ['title', 'series']
|
||||
|
||||
def match_from_epnum_position(self, mtree, node, options):
|
||||
epnum_idx = node.node_idx
|
||||
@staticmethod
|
||||
def excluded_word(*values):
|
||||
for value in values:
|
||||
if value.clean_value.lower() in (all_separators + all_lang_prefixes_suffixes):
|
||||
return True
|
||||
return False
|
||||
|
||||
def match_from_epnum_position(self, path_node, ep_node, options):
|
||||
epnum_idx = ep_node.node_idx
|
||||
|
||||
# a few helper functions to be able to filter using high-level semantics
|
||||
def before_epnum_in_same_pathgroup():
|
||||
return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
|
||||
return [leaf for leaf in path_node.unidentified_leaves(lambda x: len(x.clean_value) > 1)
|
||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||
leaf.node_idx[1:] < epnum_idx[1:])]
|
||||
leaf.node_idx[1:] < epnum_idx[1:] and
|
||||
not GuessEpisodeInfoFromPosition.excluded_word(leaf))]
|
||||
|
||||
def after_epnum_in_same_pathgroup():
|
||||
return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
|
||||
return [leaf for leaf in path_node.unidentified_leaves(lambda x: len(x.clean_value) > 1)
|
||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||
leaf.node_idx[1:] > epnum_idx[1:])]
|
||||
leaf.node_idx[1:] > epnum_idx[1:] and
|
||||
not GuessEpisodeInfoFromPosition.excluded_word(leaf))]
|
||||
|
||||
def after_epnum_in_same_explicitgroup():
|
||||
return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
|
||||
return [leaf for leaf in path_node.unidentified_leaves(lambda x: len(x.clean_value) > 1)
|
||||
if (leaf.node_idx[:2] == epnum_idx[:2] and
|
||||
leaf.node_idx[2:] > epnum_idx[2:])]
|
||||
leaf.node_idx[2:] > epnum_idx[2:] and
|
||||
not GuessEpisodeInfoFromPosition.excluded_word(leaf))]
|
||||
|
||||
# epnumber is the first group and there are only 2 after it in same
|
||||
# path group
|
||||
# -> series title - episode title
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
|
||||
title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_pathgroup(), options)
|
||||
|
||||
if ('title' not in mtree.info and # no title
|
||||
'series' in mtree.info and # series present
|
||||
if ('title' not in path_node.info and # no title
|
||||
'series' in path_node.info and # series present
|
||||
before_epnum_in_same_pathgroup() == [] and # no groups before
|
||||
len(title_candidates) == 1): # only 1 group after
|
||||
|
||||
found_property(title_candidates[0], 'title', confidence=0.4)
|
||||
return
|
||||
|
||||
if ('title' not in mtree.info and # no title
|
||||
if ('title' not in path_node.info and # no title
|
||||
before_epnum_in_same_pathgroup() == [] and # no groups before
|
||||
len(title_candidates) == 2): # only 2 groups after
|
||||
|
||||
@@ -77,17 +89,17 @@ class GuessEpisodeInfoFromPosition(Transformer):
|
||||
# probably the series name
|
||||
series_candidates = before_epnum_in_same_pathgroup()
|
||||
if len(series_candidates) >= 1:
|
||||
found_property(series_candidates[0], 'series', confidence=0.7)
|
||||
found_property(series_candidates[0], 'series', confidence=0.7)
|
||||
|
||||
# only 1 group after (in the same path group) and it's probably the
|
||||
# episode title.
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
|
||||
title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_pathgroup(), options)
|
||||
if len(title_candidates) == 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.5)
|
||||
return
|
||||
else:
|
||||
# try in the same explicit group, with lower confidence
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup(), options)
|
||||
title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_explicitgroup(), options)
|
||||
if len(title_candidates) == 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.4)
|
||||
return
|
||||
@@ -96,7 +108,7 @@ class GuessEpisodeInfoFromPosition(Transformer):
|
||||
return
|
||||
|
||||
# get the one with the longest value
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
|
||||
title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_pathgroup(), options)
|
||||
if title_candidates:
|
||||
maxidx = -1
|
||||
maxv = -1
|
||||
@@ -104,7 +116,8 @@ class GuessEpisodeInfoFromPosition(Transformer):
|
||||
if len(c.clean_value) > maxv:
|
||||
maxidx = i
|
||||
maxv = len(c.clean_value)
|
||||
found_property(title_candidates[maxidx], 'title', confidence=0.3)
|
||||
if maxidx > -1:
|
||||
found_property(title_candidates[maxidx], 'title', confidence=0.3)
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
options = options or {}
|
||||
@@ -114,9 +127,9 @@ class GuessEpisodeInfoFromPosition(Transformer):
|
||||
def _filter_candidates(candidates, options):
|
||||
episode_details_transformer = get_transformer('guess_episode_details')
|
||||
if episode_details_transformer:
|
||||
return [n for n in candidates if not episode_details_transformer.container.find_properties(n.value, n, options, re_match=True)]
|
||||
else:
|
||||
return candidates
|
||||
candidates = [n for n in candidates if not episode_details_transformer.container.find_properties(n.value, n, options, re_match=True)]
|
||||
candidates = list(filter(lambda n: not GuessEpisodeInfoFromPosition.excluded_word(n), candidates))
|
||||
return candidates
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""
|
||||
@@ -128,15 +141,26 @@ class GuessEpisodeInfoFromPosition(Transformer):
|
||||
if not eps:
|
||||
eps = [node for node in mtree.leaves() if 'date' in node.guess]
|
||||
|
||||
eps = sorted(eps, key=lambda ep: -ep.guess.confidence())
|
||||
if eps:
|
||||
self.match_from_epnum_position(mtree, eps[0], options)
|
||||
performed_path_nodes = []
|
||||
for ep_node in eps:
|
||||
# Perform only first episode node for each path node
|
||||
path_node = [node for node in ep_node.ancestors if node.category == 'path']
|
||||
if len(path_node) > 0:
|
||||
path_node = path_node[0]
|
||||
else:
|
||||
path_node = ep_node.root
|
||||
if path_node not in performed_path_nodes:
|
||||
self.match_from_epnum_position(path_node, ep_node, options)
|
||||
performed_path_nodes.append(path_node)
|
||||
|
||||
else:
|
||||
# if we don't have the episode number, but at least 2 groups in the
|
||||
# basename, then it's probably series - eptitle
|
||||
basename = mtree.node_at((-2,))
|
||||
basename = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-2]
|
||||
|
||||
title_candidates = self._filter_candidates(basename.unidentified_leaves(), options)
|
||||
title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(basename.unidentified_leaves(), options)
|
||||
|
||||
if len(title_candidates) >= 2 and 'series' not in mtree.info:
|
||||
found_property(title_candidates[0], 'series', confidence=0.4)
|
||||
@@ -147,12 +171,13 @@ class GuessEpisodeInfoFromPosition(Transformer):
|
||||
|
||||
# if we only have 1 remaining valid group in the folder containing the
|
||||
# file, then it's likely that it is the series name
|
||||
path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes()))
|
||||
try:
|
||||
series_candidates = list(mtree.node_at((-3,)).unidentified_leaves())
|
||||
except ValueError:
|
||||
series_candidates = list(path_nodes[-3].unidentified_leaves())
|
||||
except IndexError:
|
||||
series_candidates = []
|
||||
|
||||
if len(series_candidates) == 1:
|
||||
if len(series_candidates) == 1 and not GuessEpisodeInfoFromPosition.excluded_word(series_candidates[0]):
|
||||
found_property(series_candidates[0], 'series', confidence=0.3)
|
||||
|
||||
# if there's a path group that only contains the season info, then the
|
||||
@@ -163,7 +188,7 @@ class GuessEpisodeInfoFromPosition(Transformer):
|
||||
if eps:
|
||||
previous = [node for node in mtree.unidentified_leaves()
|
||||
if node.node_idx[0] == eps[0].node_idx[0] - 1]
|
||||
if len(previous) == 1:
|
||||
if len(previous) == 1 and not GuessEpisodeInfoFromPosition.excluded_word(previous[0]):
|
||||
found_property(previous[0], 'series', confidence=0.5)
|
||||
|
||||
# If we have found title without any serie name, replace it by the serie name.
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import re
|
||||
from guessit.patterns.list import list_parser, all_separators_re
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
@@ -34,9 +35,8 @@ class GuessEpisodesRexps(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 20)
|
||||
|
||||
range_separators = ['-', 'to', 'a']
|
||||
discrete_separators = ['&', 'and', 'et']
|
||||
of_separators = ['of', 'sur', '/', '\\']
|
||||
of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)
|
||||
|
||||
season_words = ['seasons?', 'saisons?', 'series?']
|
||||
episode_words = ['episodes?']
|
||||
@@ -44,85 +44,14 @@ class GuessEpisodesRexps(Transformer):
|
||||
season_markers = ['s']
|
||||
episode_markers = ['e', 'ep']
|
||||
|
||||
discrete_sep = sep
|
||||
for range_separator in range_separators:
|
||||
discrete_sep = discrete_sep.replace(range_separator, '')
|
||||
discrete_separators.append(discrete_sep)
|
||||
all_separators = list(range_separators)
|
||||
all_separators.extend(discrete_separators)
|
||||
|
||||
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
|
||||
|
||||
range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE)
|
||||
discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE)
|
||||
all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE)
|
||||
of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)
|
||||
|
||||
season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE)
|
||||
episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE)
|
||||
|
||||
season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE)
|
||||
episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE)
|
||||
|
||||
def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False):
|
||||
discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value))
|
||||
discrete_elements = [x.strip() for x in discrete_elements]
|
||||
|
||||
proper_discrete_elements = []
|
||||
i = 0
|
||||
while i < len(discrete_elements):
|
||||
if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]):
|
||||
proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2])
|
||||
i += 3
|
||||
else:
|
||||
match = range_separators_re.search(discrete_elements[i])
|
||||
if match and match.start() == 0:
|
||||
proper_discrete_elements[i - 1] += discrete_elements[i]
|
||||
elif match and match.end() == len(discrete_elements[i]):
|
||||
proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1])
|
||||
else:
|
||||
proper_discrete_elements.append(discrete_elements[i])
|
||||
i += 1
|
||||
|
||||
discrete_elements = proper_discrete_elements
|
||||
|
||||
ret = []
|
||||
|
||||
for discrete_element in discrete_elements:
|
||||
range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element))
|
||||
range_values = [x.strip() for x in range_values]
|
||||
if len(range_values) > 1:
|
||||
for x in range(0, len(range_values) - 1):
|
||||
start_range_ep = parse_numeral(range_values[x])
|
||||
end_range_ep = parse_numeral(range_values[x+1])
|
||||
for range_ep in range(start_range_ep, end_range_ep + 1):
|
||||
if range_ep not in ret:
|
||||
ret.append(range_ep)
|
||||
else:
|
||||
discrete_value = parse_numeral(discrete_element)
|
||||
if discrete_value not in ret:
|
||||
ret.append(discrete_value)
|
||||
|
||||
if len(ret) > 1:
|
||||
if not allow_discrete:
|
||||
valid_ret = list()
|
||||
# replace discrete elements by ranges
|
||||
valid_ret.append(ret[0])
|
||||
for i in range(0, len(ret) - 1):
|
||||
previous = valid_ret[len(valid_ret) - 1]
|
||||
if ret[i+1] < previous:
|
||||
pass
|
||||
else:
|
||||
valid_ret.append(ret[i+1])
|
||||
ret = valid_ret
|
||||
if fill_gaps:
|
||||
ret = list(range(min(ret), max(ret) + 1))
|
||||
if len(ret) > 1:
|
||||
return {None: ret[0], property_list_name: ret}
|
||||
if len(ret) > 0:
|
||||
return ret[0]
|
||||
return None
|
||||
|
||||
def episode_parser_x(value):
|
||||
return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE))
|
||||
|
||||
@@ -138,34 +67,40 @@ class GuessEpisodesRexps(Transformer):
|
||||
class ResolutionCollisionValidator(object):
|
||||
@staticmethod
|
||||
def validate(prop, string, node, match, entry_start, entry_end):
|
||||
return len(match.group(2)) < 3 # limit
|
||||
# Invalidate when season or episode is more than 100.
|
||||
try:
|
||||
season_value = season_parser(match.group(2))
|
||||
episode_value = episode_parser_x(match.group(3))
|
||||
return season_value < 100 or episode_value < 100
|
||||
except:
|
||||
# This may occur for 1xAll or patterns like this.
|
||||
return True
|
||||
|
||||
self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral)
|
||||
self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False)))
|
||||
|
||||
self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator())
|
||||
# self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
|
||||
self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser, 'season': season_parser}, validator=NoValidator())
|
||||
|
||||
self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
|
||||
self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
|
||||
self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator())
|
||||
|
||||
self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral)
|
||||
self.container.register_property('version', sep + r'(V\d+)' + sep, confidence=0.6, formatter=parse_numeral, validator=NoValidator())
|
||||
self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral)
|
||||
self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral)
|
||||
|
||||
|
||||
self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||
self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||
|
||||
self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||
self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||
self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||
self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||
|
||||
|
||||
self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
|
||||
self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
|
||||
self.container.register_property('episodeNumber', r'^ ?0(\d{1,2})' + sep, confidence=0.4, formatter=parse_numeral)
|
||||
self.container.register_property('episodeNumber', sep + r'(\d{2}) ?$', confidence=0.4, formatter=parse_numeral)
|
||||
self.container.register_property('episodeNumber', sep + r'0(\d{1,2}) ?$', confidence=0.4, formatter=parse_numeral)
|
||||
self.container.register_property('episodeNumber', r'^' + sep + '+(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser)
|
||||
self.container.register_property('episodeNumber', r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser)
|
||||
self.container.register_property('episodeNumber', sep + r'(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'\d{2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser)
|
||||
self.container.register_property('episodeNumber', sep + r'0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser)
|
||||
|
||||
self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral)
|
||||
self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
|
||||
@@ -186,7 +121,29 @@ class GuessEpisodesRexps(Transformer):
|
||||
|
||||
def guess_episodes_rexps(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node, options)
|
||||
return self.container.as_guess(found, string)
|
||||
guess = self.container.as_guess(found, string)
|
||||
if guess and node:
|
||||
if 'season' in guess and 'episodeNumber' in guess:
|
||||
# If two guesses contains both season and episodeNumber in same group, create an episodeList
|
||||
for existing_guess in node.group_node().guesses:
|
||||
if 'season' in existing_guess and 'episodeNumber' in existing_guess:
|
||||
if 'episodeList' not in existing_guess:
|
||||
existing_guess['episodeList'] = [existing_guess['episodeNumber']]
|
||||
existing_guess['episodeList'].append(guess['episodeNumber'])
|
||||
existing_guess['episodeList'].sort()
|
||||
if existing_guess['episodeNumber'] > guess['episodeNumber']:
|
||||
existing_guess.set_confidence('episodeNumber', 0)
|
||||
else:
|
||||
guess.set_confidence('episodeNumber', 0)
|
||||
guess['episodeList'] = list(existing_guess['episodeList'])
|
||||
elif 'episodeNumber' in guess:
|
||||
# If two guesses contains only episodeNumber in same group, remove the existing one.
|
||||
for existing_guess in node.group_node().guesses:
|
||||
if 'episodeNumber' in existing_guess:
|
||||
for k, v in existing_guess.items():
|
||||
if k in guess:
|
||||
del guess[k]
|
||||
return guess
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
return mtree.guess.get('type', '').startswith('episode')
|
||||
|
||||
@@ -156,6 +156,13 @@ class GuessFiletype(Transformer):
|
||||
|
||||
weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
|
||||
if weak_episode_transformer:
|
||||
found = weak_episode_transformer.container.find_properties(filename, mtree, options, 'episodeNumber')
|
||||
guess = weak_episode_transformer.container.as_guess(found, filename)
|
||||
if guess and (guess.raw('episodeNumber')[0] == '0' or guess['episodeNumber'] >= 10):
|
||||
self.log.debug('Found characteristic property of episodes: %s"', guess)
|
||||
upgrade_episode()
|
||||
return filetype_container[0], other
|
||||
|
||||
found = properties_transformer.container.find_properties(filename, mtree, options, 'crc32')
|
||||
guess = properties_transformer.container.as_guess(found, filename)
|
||||
if guess:
|
||||
@@ -217,7 +224,8 @@ class GuessFiletype(Transformer):
|
||||
if mime is not None:
|
||||
filetype_info.update({'mimetype': mime}, confidence=1.0)
|
||||
|
||||
node_ext = mtree.node_at((-1,))
|
||||
# Retrieve the last node of category path (extension node)
|
||||
node_ext = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-1]
|
||||
found_guess(node_ext, filetype_info)
|
||||
|
||||
if mtree.guess.get('type') in [None, 'unknown']:
|
||||
@@ -226,12 +234,21 @@ class GuessFiletype(Transformer):
|
||||
else:
|
||||
raise TransformerException(__name__, 'Unknown file type')
|
||||
|
||||
def post_process(self, mtree, options=None):
|
||||
# now look whether there are some specific hints for episode vs movie
|
||||
# If we have a date and no year, this is a TV Show.
|
||||
if 'date' in mtree.info and 'year' not in mtree.info and mtree.info.get('type') != 'episode':
|
||||
mtree.guess['type'] = 'episode'
|
||||
for type_leaves in mtree.leaves_containing('type'):
|
||||
type_leaves.guess['type'] = 'episode'
|
||||
for title_leaves in mtree.leaves_containing('title'):
|
||||
title_leaves.guess.rename('title', 'series')
|
||||
def second_pass_options(self, mtree, options=None):
|
||||
if 'type' not in options or not options['type']:
|
||||
if mtree.info.get('type') != 'episode':
|
||||
# now look whether there are some specific hints for episode vs movie
|
||||
# If we have a date and no year, this is a TV Show.
|
||||
if 'date' in mtree.info and 'year' not in mtree.info:
|
||||
return {'type': 'episode'}
|
||||
|
||||
if mtree.info.get('type') != 'movie':
|
||||
# If we have a year, no season but raw episodeNumber is a number not starting with '0', this is a movie.
|
||||
if 'year' in mtree.info and 'episodeNumber' in mtree.info and not 'season' in mtree.info:
|
||||
try:
|
||||
int(mtree.raw['episodeNumber'])
|
||||
return {'type': 'movie'}
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
@@ -43,6 +43,12 @@ class GuessLanguage(Transformer):
|
||||
allowed_languages = None
|
||||
if options and 'allowed_languages' in options:
|
||||
allowed_languages = options.get('allowed_languages')
|
||||
|
||||
directory = list(filter(lambda x: x.category == 'path', node.ancestors))[0]
|
||||
if len(directory.clean_value) <= 3:
|
||||
# skip if we have a langage code as directory
|
||||
return None
|
||||
|
||||
guess = search_language(string, allowed_languages)
|
||||
return guess
|
||||
|
||||
@@ -68,8 +74,10 @@ class GuessLanguage(Transformer):
|
||||
title_ends = {}
|
||||
|
||||
for unidentified_node in mtree.unidentified_leaves():
|
||||
unidentified_starts[unidentified_node.span[0]] = unidentified_node
|
||||
unidentified_ends[unidentified_node.span[1]] = unidentified_node
|
||||
if len(unidentified_node.clean_value) > 1:
|
||||
# only consider unidentified leaves that have some meaningful content
|
||||
unidentified_starts[unidentified_node.span[0]] = unidentified_node
|
||||
unidentified_ends[unidentified_node.span[1]] = unidentified_node
|
||||
|
||||
for property_node in mtree.leaves_containing('year'):
|
||||
property_starts[property_node.span[0]] = property_node
|
||||
@@ -79,19 +87,20 @@ class GuessLanguage(Transformer):
|
||||
title_starts[title_node.span[0]] = title_node
|
||||
title_ends[title_node.span[1]] = title_node
|
||||
|
||||
return node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or node.span[1] + 1 in property_starts.keys()) or\
|
||||
node.span[1] in title_starts.keys() and (node.span[0] == node.group_node().span[0] or node.span[0] in unidentified_ends.keys() or node.span[0] in property_ends.keys())
|
||||
return (node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or
|
||||
node.span[1] + 1 in property_starts.keys()) or
|
||||
node.span[1] in title_starts.keys() and (node.span[0] == node.group_node().span[0] or
|
||||
node.span[0] in unidentified_ends.keys() or
|
||||
node.span[0] in property_ends.keys()))
|
||||
|
||||
def second_pass_options(self, mtree, options=None):
|
||||
m = mtree.matched()
|
||||
to_skip_language_nodes = []
|
||||
to_skip_langs = set()
|
||||
|
||||
for lang_key in ('language', 'subtitleLanguage'):
|
||||
langs = {}
|
||||
lang_nodes = set(mtree.leaves_containing(lang_key))
|
||||
|
||||
for lang_node in lang_nodes:
|
||||
lang = lang_node.guess.get(lang_key, None)
|
||||
if self._skip_language_on_second_pass(mtree, lang_node):
|
||||
# Language probably split the title. Add to skip for 2nd pass.
|
||||
|
||||
@@ -99,38 +108,19 @@ class GuessLanguage(Transformer):
|
||||
# the extension, then it is likely a subtitle language
|
||||
parts = mtree.clean_string(lang_node.root.value).split()
|
||||
if m.get('type') in ['moviesubtitle', 'episodesubtitle']:
|
||||
if lang_node.value in parts and \
|
||||
(parts.index(lang_node.value) == len(parts) - 2):
|
||||
if (lang_node.value in parts and parts.index(lang_node.value) == len(parts) - 2):
|
||||
continue
|
||||
to_skip_language_nodes.append(lang_node)
|
||||
elif lang not in langs:
|
||||
langs[lang] = lang_node
|
||||
else:
|
||||
# The same language was found. Keep the more confident one,
|
||||
# and add others to skip for 2nd pass.
|
||||
existing_lang_node = langs[lang]
|
||||
to_skip = None
|
||||
if (existing_lang_node.guess.confidence('language') >=
|
||||
lang_node.guess.confidence('language')):
|
||||
# lang_node is to remove
|
||||
to_skip = lang_node
|
||||
else:
|
||||
# existing_lang_node is to remove
|
||||
langs[lang] = lang_node
|
||||
to_skip = existing_lang_node
|
||||
to_skip_language_nodes.append(to_skip)
|
||||
|
||||
if to_skip_language_nodes:
|
||||
to_skip_langs.add(lang_node.value)
|
||||
|
||||
if to_skip_langs:
|
||||
# Also skip same value nodes
|
||||
skipped_values = [skip_node.value for skip_node in to_skip_language_nodes]
|
||||
lang_nodes = (set(mtree.leaves_containing('language')) |
|
||||
set(mtree.leaves_containing('subtitleLanguage')))
|
||||
|
||||
for lang_key in ('language', 'subtitleLanguage'):
|
||||
lang_nodes = set(mtree.leaves_containing(lang_key))
|
||||
to_skip = [node for node in lang_nodes if node.value in to_skip_langs]
|
||||
return {'skip_nodes': to_skip}
|
||||
|
||||
for lang_node in lang_nodes:
|
||||
if lang_node not in to_skip_language_nodes and lang_node.value in skipped_values:
|
||||
to_skip_language_nodes.append(lang_node)
|
||||
return {'skip_nodes': to_skip_language_nodes}
|
||||
return None
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
@@ -149,6 +139,8 @@ class GuessLanguage(Transformer):
|
||||
|
||||
def post_process(self, mtree, options=None):
|
||||
# 1- try to promote language to subtitle language where it makes sense
|
||||
prefixes = []
|
||||
|
||||
for node in mtree.nodes():
|
||||
if 'language' not in node.guess:
|
||||
continue
|
||||
@@ -157,7 +149,8 @@ class GuessLanguage(Transformer):
|
||||
# the group is the last group of the filename, it is probably the
|
||||
# language of the subtitle
|
||||
# (eg: 'xxx.english.srt')
|
||||
if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
|
||||
ext_node = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-1]
|
||||
if (ext_node.value.lower() in subtitle_exts and
|
||||
node == list(mtree.leaves())[-2]):
|
||||
self.promote_subtitle(node)
|
||||
|
||||
@@ -171,11 +164,7 @@ class GuessLanguage(Transformer):
|
||||
for sub_prefix in subtitle_prefixes:
|
||||
if (sub_prefix in find_words(group_str) and
|
||||
0 <= group_str.find(sub_prefix) < (node.span[0] - explicit_group.span[0])):
|
||||
self.promote_subtitle(node)
|
||||
|
||||
for sub_suffix in subtitle_suffixes:
|
||||
if (sub_suffix in find_words(group_str) and
|
||||
(node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)):
|
||||
prefixes.append((explicit_group, sub_prefix))
|
||||
self.promote_subtitle(node)
|
||||
|
||||
# - if a language is in an explicit group just preceded by "st",
|
||||
@@ -187,3 +176,21 @@ class GuessLanguage(Transformer):
|
||||
self.promote_subtitle(node)
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
for node in mtree.nodes():
|
||||
if 'language' not in node.guess:
|
||||
continue
|
||||
|
||||
explicit_group = mtree.node_at(node.node_idx[:2])
|
||||
group_str = explicit_group.value.lower()
|
||||
|
||||
for sub_suffix in subtitle_suffixes:
|
||||
if (sub_suffix in find_words(group_str) and
|
||||
(node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)):
|
||||
is_a_prefix = False
|
||||
for prefix in prefixes:
|
||||
if prefix[0] == explicit_group and group_str.find(prefix[1]) == group_str.find(sub_suffix):
|
||||
is_a_prefix = True
|
||||
break
|
||||
if not is_a_prefix:
|
||||
self.promote_subtitle(node)
|
||||
|
||||
@@ -23,6 +23,8 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import found_property
|
||||
from guessit import u
|
||||
from guessit.patterns.list import all_separators
|
||||
from guessit.language import all_lang_prefixes_suffixes
|
||||
|
||||
|
||||
class GuessMovieTitleFromPosition(Transformer):
|
||||
@@ -36,6 +38,13 @@ class GuessMovieTitleFromPosition(Transformer):
|
||||
options = options or {}
|
||||
return not options.get('skip_title') and not mtree.guess.get('type', '').startswith('episode')
|
||||
|
||||
@staticmethod
|
||||
def excluded_word(*values):
|
||||
for value in values:
|
||||
if value.clean_value.lower() in all_separators + all_lang_prefixes_suffixes:
|
||||
return True
|
||||
return False
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""
|
||||
try to identify the remaining unknown groups by looking at their
|
||||
@@ -44,14 +53,16 @@ class GuessMovieTitleFromPosition(Transformer):
|
||||
if 'title' in mtree.info:
|
||||
return
|
||||
|
||||
basename = mtree.node_at((-2,))
|
||||
path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes()))
|
||||
|
||||
basename = path_nodes[-2]
|
||||
all_valid = lambda leaf: len(leaf.clean_value) > 0
|
||||
basename_leftover = list(basename.unidentified_leaves(valid=all_valid))
|
||||
|
||||
try:
|
||||
folder = mtree.node_at((-3,))
|
||||
folder = path_nodes[-3]
|
||||
folder_leftover = list(folder.unidentified_leaves())
|
||||
except ValueError:
|
||||
except IndexError:
|
||||
folder = None
|
||||
folder_leftover = []
|
||||
|
||||
@@ -61,7 +72,9 @@ class GuessMovieTitleFromPosition(Transformer):
|
||||
# specific cases:
|
||||
# if we find the same group both in the folder name and the filename,
|
||||
# it's a good candidate for title
|
||||
if folder_leftover and basename_leftover and folder_leftover[0].clean_value == basename_leftover[0].clean_value:
|
||||
if (folder_leftover and basename_leftover and
|
||||
folder_leftover[0].clean_value == basename_leftover[0].clean_value and
|
||||
not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0])):
|
||||
found_property(folder_leftover[0], 'title', confidence=0.8)
|
||||
return
|
||||
|
||||
@@ -89,7 +102,8 @@ class GuessMovieTitleFromPosition(Transformer):
|
||||
if (series.clean_value != title.clean_value and
|
||||
series.clean_value != film_number.clean_value and
|
||||
basename_leaves.index(film_number) == 0 and
|
||||
basename_leaves.index(title) == 1):
|
||||
basename_leaves.index(title) == 1 and
|
||||
not GuessMovieTitleFromPosition.excluded_word(title, series)):
|
||||
|
||||
found_property(title, 'title', confidence=0.6)
|
||||
found_property(series, 'filmSeries', confidence=0.6)
|
||||
@@ -103,8 +117,9 @@ class GuessMovieTitleFromPosition(Transformer):
|
||||
if groups_before:
|
||||
try:
|
||||
node = next(groups_before)
|
||||
found_property(node, 'title', confidence=0.8)
|
||||
return
|
||||
if not GuessMovieTitleFromPosition.excluded_word(node):
|
||||
found_property(node, 'title', confidence=0.8)
|
||||
return
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
@@ -125,8 +140,10 @@ class GuessMovieTitleFromPosition(Transformer):
|
||||
# if they're all in the same group, take leftover info from there
|
||||
leftover = mtree.node_at((group_idx,)).unidentified_leaves()
|
||||
try:
|
||||
found_property(next(leftover), 'title', confidence=0.7)
|
||||
return
|
||||
node = next(leftover)
|
||||
if not GuessMovieTitleFromPosition.excluded_word(node):
|
||||
found_property(node, 'title', confidence=0.7)
|
||||
return
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
@@ -138,7 +155,8 @@ class GuessMovieTitleFromPosition(Transformer):
|
||||
# ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
|
||||
# ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here?
|
||||
if (basename_leftover[0].clean_value.count(' ') == 0 and
|
||||
folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2):
|
||||
folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2 and
|
||||
not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0])):
|
||||
|
||||
found_property(folder_leftover[0], 'title', confidence=0.7)
|
||||
return
|
||||
@@ -148,26 +166,28 @@ class GuessMovieTitleFromPosition(Transformer):
|
||||
# ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
|
||||
if basename_leftover[0].is_explicit():
|
||||
for basename_leftover_elt in basename_leftover:
|
||||
if not basename_leftover_elt.is_explicit():
|
||||
if not basename_leftover_elt.is_explicit() and not GuessMovieTitleFromPosition.excluded_word(basename_leftover_elt):
|
||||
found_property(basename_leftover_elt, 'title', confidence=0.8)
|
||||
return
|
||||
|
||||
# if all else fails, take the first remaining unidentified group in the
|
||||
# basename as title
|
||||
found_property(basename_leftover[0], 'title', confidence=0.6)
|
||||
return
|
||||
if not GuessMovieTitleFromPosition.excluded_word(basename_leftover[0]):
|
||||
found_property(basename_leftover[0], 'title', confidence=0.6)
|
||||
return
|
||||
|
||||
# if there are no leftover groups in the basename, look in the folder name
|
||||
if folder_leftover:
|
||||
if folder_leftover and not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0]):
|
||||
found_property(folder_leftover[0], 'title', confidence=0.5)
|
||||
return
|
||||
|
||||
# if nothing worked, look if we have a very small group at the beginning
|
||||
# of the basename
|
||||
basename = mtree.node_at((-2,))
|
||||
basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
|
||||
try:
|
||||
found_property(next(basename_leftover), 'title', confidence=0.4)
|
||||
return
|
||||
node = next(basename_leftover)
|
||||
if not GuessMovieTitleFromPosition.excluded_word(node):
|
||||
found_property(node, 'title', confidence=0.4)
|
||||
return
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
@@ -22,7 +22,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
||||
|
||||
import re
|
||||
|
||||
from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer, ChainedValidator, DefaultValidator, OnlyOneValidator, LeftValidator, NeighborValidator
|
||||
from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer, ChainedValidator, DefaultValidator, OnlyOneValidator, LeftValidator, NeighborValidator, FullMatchValidator
|
||||
from guessit.patterns import sep, build_or_pattern
|
||||
from guessit.patterns.extension import subtitle_exts, video_exts, info_exts
|
||||
from guessit.patterns.numeral import numeral, parse_numeral
|
||||
@@ -61,7 +61,6 @@ class GuessProperties(Transformer):
|
||||
for canonical_form, quality in quality_dict.items():
|
||||
self.qualities.register_quality(propname, canonical_form, quality)
|
||||
|
||||
register_property('container', {'mp4': ['MP4']})
|
||||
|
||||
# http://en.wikipedia.org/wiki/Pirated_movie_release_types
|
||||
register_property('format', {'VHS': ['VHS', 'VHS-Rip'],
|
||||
@@ -74,11 +73,11 @@ class GuessProperties(Transformer):
|
||||
'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
|
||||
'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
|
||||
'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'],
|
||||
'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'],
|
||||
'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP', 'HD-RIP'],
|
||||
'VOD': ['VOD', 'VOD-Rip'],
|
||||
'WEBRip': ['WEB-Rip'],
|
||||
'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'],
|
||||
'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'],
|
||||
'HD-DVD': ['HD-DVD-Rip', 'HD-DVD'],
|
||||
'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
|
||||
})
|
||||
|
||||
@@ -112,32 +111,13 @@ class GuessProperties(Transformer):
|
||||
},
|
||||
validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()))
|
||||
|
||||
class ResolutionValidator(object):
|
||||
"""Make sure our match is surrounded by separators, or by another entry"""
|
||||
@staticmethod
|
||||
def validate(prop, string, node, match, entry_start, entry_end):
|
||||
"""
|
||||
span = _get_span(prop, match)
|
||||
span = _trim_span(span, string[span[0]:span[1]])
|
||||
start, end = span
|
||||
|
||||
sep_start = start <= 0 or string[start - 1] in sep
|
||||
sep_end = end >= len(string) or string[end] in sep
|
||||
start_by_other = start in entry_end
|
||||
end_by_other = end in entry_start
|
||||
if (sep_start or start_by_other) and (sep_end or end_by_other):
|
||||
return True
|
||||
return False
|
||||
"""
|
||||
return True
|
||||
|
||||
_digits_re = re.compile('\d+')
|
||||
|
||||
def resolution_formatter(value):
|
||||
digits = _digits_re.findall(value)
|
||||
return 'x'.join(digits)
|
||||
|
||||
self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator()))
|
||||
self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter)
|
||||
|
||||
register_quality('screenSize', {'360p': -300,
|
||||
'368p': -200,
|
||||
@@ -239,8 +219,8 @@ class GuessProperties(Transformer):
|
||||
|
||||
self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False)
|
||||
|
||||
weak_episode_words = ['pt', 'part']
|
||||
self.container.register_property(None, '(' + build_or_pattern(weak_episode_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)
|
||||
part_words = ['pt', 'part']
|
||||
self.container.register_property(None, '(' + build_or_pattern(part_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)
|
||||
|
||||
register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
|
||||
'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
|
||||
@@ -249,13 +229,15 @@ class GuessProperties(Transformer):
|
||||
'Netflix': ['Netflix', 'NF']
|
||||
})
|
||||
|
||||
self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=NeighborValidator())
|
||||
self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
|
||||
self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper')
|
||||
self.container.register_property('other', 'Fansub', canonical_form='Fansub')
|
||||
self.container.register_property('other', 'Fastsub', canonical_form='Fastsub')
|
||||
self.container.register_property('other', 'Fansub', canonical_form='Fansub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
|
||||
self.container.register_property('other', 'Fastsub', canonical_form='Fastsub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
|
||||
self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete')
|
||||
self.container.register_property('other', 'R5', 'RC', canonical_form='R5')
|
||||
self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair')
|
||||
self.container.register_property('other', 'CC') # Close Caption
|
||||
self.container.register_property('other', 'LD', 'MD') # Line/Mic Dubbed
|
||||
|
||||
self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ',
|
||||
'DDC',
|
||||
@@ -271,10 +253,29 @@ class GuessProperties(Transformer):
|
||||
|
||||
def guess_properties(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node, options)
|
||||
return self.container.as_guess(found, string)
|
||||
guess = self.container.as_guess(found, string)
|
||||
|
||||
if guess and node:
|
||||
if 'part' in guess:
|
||||
# If two guesses contains both part in same group, create an partList
|
||||
for existing_guess in node.group_node().guesses:
|
||||
if 'part' in existing_guess:
|
||||
if 'partList' not in existing_guess:
|
||||
existing_guess['partList'] = [existing_guess['part']]
|
||||
existing_guess['partList'].append(guess['part'])
|
||||
existing_guess['partList'].sort()
|
||||
if existing_guess['part'] > guess['part']:
|
||||
existing_guess.set_confidence('part', 0)
|
||||
else:
|
||||
guess.set_confidence('part', 0)
|
||||
guess['partList'] = list(existing_guess['partList'])
|
||||
|
||||
return guess
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
supported_properties = list(self.container.get_supported_properties())
|
||||
supported_properties.append('partList')
|
||||
return supported_properties
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
|
||||
@@ -93,8 +93,12 @@ class GuessReleaseGroup(Transformer):
|
||||
return False
|
||||
if self.re_sep.match(val[-1]):
|
||||
val = val[:len(val)-1]
|
||||
if not val:
|
||||
return False
|
||||
if self.re_sep.match(val[0]):
|
||||
val = val[1:]
|
||||
if not val:
|
||||
return False
|
||||
guess['releaseGroup'] = val
|
||||
forbidden = False
|
||||
for forbidden_lambda in self._forbidden_groupname_lambda:
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import re
|
||||
from guessit.patterns.list import list_parser, all_separators_re
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
|
||||
@@ -38,11 +39,14 @@ class GuessWeakEpisodesRexps(Transformer):
|
||||
of_separators = ['of', 'sur', '/', '\\']
|
||||
of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)
|
||||
|
||||
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
|
||||
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False, remove_duplicates=True)
|
||||
|
||||
episode_words = ['episodes?']
|
||||
|
||||
def _formater(episode_number):
|
||||
def episode_list_parser(value):
|
||||
return list_parser(value, 'episodeList')
|
||||
|
||||
def season_episode_parser(episode_number):
|
||||
epnum = parse_numeral(episode_number)
|
||||
if not valid_year(epnum):
|
||||
if epnum > 100:
|
||||
@@ -55,24 +59,46 @@ class GuessWeakEpisodesRexps(Transformer):
|
||||
else:
|
||||
return epnum
|
||||
|
||||
self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater, disabler=lambda options: options.get('episode_prefer_number') if options else False)
|
||||
self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=_formater)
|
||||
self.container.register_property('episodeNumber', '[^0-9](\d{1,3})', confidence=0.6, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
|
||||
self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=season_episode_parser, disabler=lambda options: options.get('episode_prefer_number') if options else False)
|
||||
self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=season_episode_parser)
|
||||
self.container.register_property(None, '(' + build_or_pattern(episode_words) + sep + '?(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.4, formatter=parse_numeral)
|
||||
self.container.register_property(None, r'(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +')', confidence=0.6, formatter=parse_numeral)
|
||||
self.container.register_property('episodeNumber', r'^' + sep + '?(\d{1,3})' + sep, confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
|
||||
self.container.register_property('episodeNumber', sep + r'(\d{1,3})' + sep + '?$', confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
|
||||
self.container.register_property('episodeNumber', '[^0-9](\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)', confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
|
||||
self.container.register_property('episodeNumber', r'^' + sep + '?(\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)' + sep, confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
|
||||
self.container.register_property('episodeNumber', sep + r'(\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)' + sep + '?$', confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
|
||||
def guess_weak_episodes_rexps(self, string, node=None, options=None):
|
||||
if node and 'episodeNumber' in node.root.info:
|
||||
return None
|
||||
|
||||
properties = self.container.find_properties(string, node, options)
|
||||
guess = self.container.as_guess(properties, string)
|
||||
|
||||
if node and guess:
|
||||
if 'episodeNumber' in guess and 'season' in guess:
|
||||
existing_guesses = list(filter(lambda x: 'season' in x and 'episodeNumber' in x, node.group_node().guesses))
|
||||
if existing_guesses:
|
||||
return None
|
||||
elif 'episodeNumber' in guess:
|
||||
# If we only have episodeNumber in the guess, and another node contains both season and episodeNumber
|
||||
# keep only the second.
|
||||
safe_guesses = list(filter(lambda x: 'season' in x and 'episodeNumber' in x, node.group_node().guesses))
|
||||
if safe_guesses:
|
||||
return None
|
||||
else:
|
||||
# If we have other nodes containing episodeNumber, create an episodeList.
|
||||
existing_guesses = list(filter(lambda x: 'season' not in x and 'episodeNumber' in x, node.group_node().guesses))
|
||||
for existing_guess in existing_guesses:
|
||||
if 'episodeList' not in existing_guess:
|
||||
existing_guess['episodeList'] = [existing_guess['episodeNumber']]
|
||||
existing_guess['episodeList'].append(guess['episodeNumber'])
|
||||
existing_guess['episodeList'].sort()
|
||||
if existing_guess['episodeNumber'] > guess['episodeNumber']:
|
||||
existing_guess.set_confidence('episodeNumber', 0)
|
||||
else:
|
||||
guess.set_confidence('episodeNumber', 0)
|
||||
guess['episodeList'] = list(existing_guess['episodeList'])
|
||||
|
||||
return guess
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
|
||||
@@ -42,8 +42,13 @@ class GuessYear(Transformer):
|
||||
|
||||
def second_pass_options(self, mtree, options=None):
|
||||
year_nodes = list(mtree.leaves_containing('year'))
|
||||
if len(year_nodes) > 1:
|
||||
return {'skip_nodes': year_nodes[:len(year_nodes) - 1]}
|
||||
# if we found a year, let's try by ignoring all instances of that year
|
||||
# as a candidate, let's take the one that appears last in the filename
|
||||
if year_nodes:
|
||||
year_candidate = year_nodes[-1].guess['year']
|
||||
year_nodes = [year for year in year_nodes if year.guess['year'] != year_candidate]
|
||||
if year_nodes:
|
||||
return {'skip_nodes': year_nodes}
|
||||
return None
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
|
||||
@@ -37,7 +37,7 @@ class SplitExplicitGroups(Transformer):
|
||||
:return: return the string split into explicit groups, that is, those either
|
||||
between parenthese, square brackets or curly braces, and those separated
|
||||
by a dash."""
|
||||
for c in mtree.children:
|
||||
for c in mtree.unidentified_leaves():
|
||||
groups = find_first_level_groups(c.value, group_delimiters[0])
|
||||
for delimiters in group_delimiters:
|
||||
flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
|
||||
@@ -47,4 +47,24 @@ class SplitExplicitGroups(Transformer):
|
||||
# patterns, such as dates, etc...
|
||||
# groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
|
||||
|
||||
c.split_on_components(groups)
|
||||
c.split_on_components(groups, category='explicit')
|
||||
|
||||
def post_process(self, mtree, options=None):
|
||||
"""
|
||||
Decrease confidence for properties found in explicit groups.
|
||||
|
||||
:param mtree:
|
||||
:param options:
|
||||
:return:
|
||||
"""
|
||||
if not options.get('name_only'):
|
||||
explicit_nodes = [node for node in mtree.nodes() if node.category == 'explicit' and node.is_explicit()]
|
||||
|
||||
for explicit_node in explicit_nodes:
|
||||
self.alter_confidence(explicit_node, 0.5)
|
||||
|
||||
def alter_confidence(self, node, factor):
|
||||
for guess in node.guesses:
|
||||
for k in guess.keys():
|
||||
confidence = guess.confidence(k)
|
||||
guess.set_confidence(k, confidence * factor)
|
||||
|
||||
@@ -45,4 +45,4 @@ class SplitOnDash(Transformer):
|
||||
match = pattern.search(node.value, span[1])
|
||||
|
||||
if indices:
|
||||
node.partition(indices)
|
||||
node.partition(indices, category='dash')
|
||||
|
||||
@@ -41,6 +41,32 @@ class SplitPathComponents(Transformer):
|
||||
components += list(splitext(basename))
|
||||
components[-1] = components[-1][1:] # remove the '.' from the extension
|
||||
|
||||
mtree.split_on_components(components)
|
||||
mtree.split_on_components(components, category='path')
|
||||
else:
|
||||
mtree.split_on_components([mtree.value, ''])
|
||||
mtree.split_on_components([mtree.value, ''], category='path')
|
||||
|
||||
def post_process(self, mtree, options=None):
|
||||
"""
|
||||
Decrease confidence for properties found in directories, filename should always have priority.
|
||||
|
||||
:param mtree:
|
||||
:param options:
|
||||
:return:
|
||||
"""
|
||||
if not options.get('name_only'):
|
||||
path_nodes = [node for node in mtree.nodes() if node.category == 'path']
|
||||
|
||||
for path_node in path_nodes[:-2]:
|
||||
self.alter_confidence(path_node, 0.3)
|
||||
|
||||
try:
|
||||
last_directory_node = path_nodes[-2]
|
||||
self.alter_confidence(last_directory_node, 0.6)
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
def alter_confidence(self, node, factor):
|
||||
for guess in node.guesses:
|
||||
for k in guess.keys():
|
||||
confidence = guess.confidence(k)
|
||||
guess.set_confidence(k, confidence * factor)
|
||||
|
||||
@@ -249,9 +249,9 @@ def search_external_subtitles(path):
|
||||
subtitles = {}
|
||||
for p in os.listdir(dirpath):
|
||||
# skip badly encoded filenames
|
||||
#if isinstance(p, bytes): # pragma: no cover
|
||||
# logger.error('Skipping badly encoded filename %r in %r', p.decode('utf-8', errors='replace'), dirpath)
|
||||
# continue
|
||||
if isinstance(p, bytes): # pragma: no cover
|
||||
logger.error('Skipping badly encoded filename %r in %r', p.decode('utf-8', errors='replace'), dirpath)
|
||||
continue
|
||||
|
||||
# keep only valid subtitle filenames
|
||||
if not p.startswith(fileroot) or not p.endswith(SUBTITLE_EXTENSIONS):
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from .patch_provider_pool import PatchedProviderPool
|
||||
from .patch_providers import PatchedAddic7edProvider
|
||||
from .patch_video import patched_search_external_subtitles
|
||||
import subliminal
|
||||
import babelfish
|
||||
|
||||
@@ -14,3 +15,6 @@ subliminal.providers.addic7ed.Addic7edProvider = PatchedAddic7edProvider
|
||||
# add language converters
|
||||
babelfish.language_converters.register('addic7ed = subliminal_patch.patch_language:PatchedAddic7edConverter')
|
||||
babelfish.language_converters.register('tvsubtitles = subliminal.converters.tvsubtitles:TVsubtitlesConverter')
|
||||
|
||||
# patch subliminal's external subtitles search algorithm
|
||||
subliminal.video.search_external_subtitles = patched_search_external_subtitles
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import logging
|
||||
from random import randint
|
||||
from subliminal.providers.addic7ed import Addic7edProvider
|
||||
from subliminal.providers.addic7ed import Addic7edProvider, Addic7edSubtitle, ParserBeautifulSoup, series_year_re, Language
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -22,3 +22,50 @@ class PatchedAddic7edProvider(Addic7edProvider):
|
||||
'User-Agent': AGENT_LIST[randint(0, len(AGENT_LIST)-1)],
|
||||
'Referer': self.server_url,
|
||||
}
|
||||
|
||||
def query(self, series, season, year=None, country=None):
|
||||
# get the show id
|
||||
show_id = self.get_show_id(series, year, country)
|
||||
if show_id is None:
|
||||
logger.error('No show id found for %r (%r)', series, {'year': year, 'country': country})
|
||||
return []
|
||||
|
||||
# get the page of the season of the show
|
||||
logger.info('Getting the page of show id %d, season %d', show_id, season)
|
||||
r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
|
||||
r.raise_for_status()
|
||||
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
|
||||
|
||||
# loop over subtitle rows
|
||||
header = soup.select('#header font')
|
||||
if header:
|
||||
match = series_year_re.match(header[0].text.strip()[:-10])
|
||||
series = match.group('series')
|
||||
year = int(match.group('year')) if match.group('year') else None
|
||||
|
||||
subtitles = []
|
||||
for row in soup.select('tr.epeven'):
|
||||
cells = row('td')
|
||||
|
||||
# ignore incomplete subtitles
|
||||
status = cells[5].text
|
||||
if status != 'Completed':
|
||||
logger.debug('Ignoring subtitle with status %s', status)
|
||||
continue
|
||||
|
||||
# read the item
|
||||
language = Language.fromaddic7ed(cells[3].text)
|
||||
hearing_impaired = bool(cells[6].text)
|
||||
page_link = self.server_url + cells[2].a['href'][1:]
|
||||
season = int(cells[0].text)
|
||||
episode = int(cells[1].text)
|
||||
title = cells[2].text
|
||||
version = cells[4].text
|
||||
download_link = cells[9].a['href'][1:]
|
||||
|
||||
subtitle = Addic7edSubtitle(language, hearing_impaired, page_link, series, season, episode, title, year,
|
||||
version, download_link)
|
||||
logger.debug('Found subtitle %r', subtitle)
|
||||
subtitles.append(subtitle)
|
||||
|
||||
return subtitles
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
# coding=utf-8
|
||||
|
||||
import os
|
||||
import logging
|
||||
from subliminal.video import SUBTITLE_EXTENSIONS, Language
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# may be absolute or relative paths; set to selected options
|
||||
CUSTOM_PATHS = []
|
||||
|
||||
def _search_external_subtitles(path):
|
||||
dirpath, filename = os.path.split(path)
|
||||
dirpath = dirpath or '.'
|
||||
fileroot, fileext = os.path.splitext(filename)
|
||||
subtitles = {}
|
||||
for p in os.listdir(dirpath):
|
||||
# keep only valid subtitle filenames
|
||||
if not p.startswith(fileroot) or not p.endswith(SUBTITLE_EXTENSIONS):
|
||||
continue
|
||||
|
||||
# extract the potential language code
|
||||
language_code = p[len(fileroot):-len(os.path.splitext(p)[1])].replace(fileext, '').replace('_', '-')[1:]
|
||||
|
||||
# default language is undefined
|
||||
language = Language('und')
|
||||
|
||||
# attempt to parse
|
||||
if language_code:
|
||||
try:
|
||||
language = Language.fromietf(language_code)
|
||||
except ValueError:
|
||||
logger.error('Cannot parse language code %r', language_code)
|
||||
|
||||
subtitles[p] = language
|
||||
|
||||
logger.debug('Found subtitles %r', subtitles)
|
||||
|
||||
return subtitles
|
||||
|
||||
def patched_search_external_subtitles(path):
|
||||
"""
|
||||
wrap original search_external_subtitles function to search multiple paths for one given video
|
||||
# todo: cleanup and merge with _search_external_subtitles
|
||||
"""
|
||||
video_path, video_filename = os.path.split(path)
|
||||
subtitles = {}
|
||||
for folder_or_subfolder in [video_path] + CUSTOM_PATHS:
|
||||
# folder_or_subfolder may be a relative path or an absolute one
|
||||
try:
|
||||
abspath = unicode(os.path.abspath(os.path.join(*[video_path if not os.path.isabs(folder_or_subfolder) else "", folder_or_subfolder, video_filename])))
|
||||
except Exception, e:
|
||||
logger.error("skipping path %s because of %s", repr(folder_or_subfolder), e)
|
||||
continue
|
||||
logger.debug("external subs: scanning path %s", abspath)
|
||||
|
||||
if os.path.isdir(os.path.dirname(abspath)):
|
||||
subtitles.update(_search_external_subtitles(abspath))
|
||||
logger.debug("external subs: found %s", subtitles)
|
||||
return subtitles
|
||||
|
||||
@@ -1,16 +1,20 @@
|
||||
pannal's fork:
|
||||
|
||||
- ~~increased score of addic7ed subtitles a bit~~ (not existing currently)
|
||||
- **support for newest Subliminal (1.0.1) and guessit (0.10.1)**
|
||||
- **plugin now also works with com.plexapp.agents.thetvdbdvdorder**
|
||||
- guessit's release-group detection bug fixed (*not the correct way, though. has already been fixed in guessit itself, need to merge*)
|
||||
- providers fixed for subliminal 1.0.1 (at least addic7ed)
|
||||
- support for addic7ed languages: French (Canadian)
|
||||
- support for additional languages: pt-br (Portuguese (Brasil)), fa (Persian (Farsi))
|
||||
- support for three (two optional) subtitle languages
|
||||
#### beta5
|
||||
- fix storing subtitles besides the actual video file, not subfolder (fixes #14)
|
||||
- "custom folder" setting now always used if given (properly overrides "subtitle folder" setting)
|
||||
- also scan (custom) given subtitle folders for existing subtitles instead of redownloading them on every refresh (fixes #9, #2)
|
||||
|
||||
bugs:
|
||||
- skip existing subtitles (not in video's path - e.g. subFolder given) currently broken
|
||||
beta4
|
||||
- ~~increased score of addic7ed subtitles a bit~~ (not existing currently)
|
||||
- **support for newest Subliminal ([1.0.1](27a6e51cd36ffb2910cd9a7add6d797a2c6469b7)) and guessit ([0.11.0](2814f57e8999dcc31575619f076c0c1a63ce78f2))**
|
||||
- **plugin now also [works with com.plexapp.agents.thetvdbdvdorder](924470d2c0db3a71529278bce4b7247eaf2f85b8)**
|
||||
- providers fixed for subliminal 1.0.1 ([at least addic7ed](131504e7eed8b3400c457fbe49beea3b115bc916))
|
||||
- providers [don't simply fail and get excluded on non-detected language](1a779020792e0201ad689eefbf5a126155e89c97)
|
||||
- support for addic7ed languages: [French (Canadian)](b11a051c233fd72033f0c3b5a8c1965260e7e19f)
|
||||
- support for additional languages: [pt-br (Portuguese (Brasil)), fa (Persian (Farsi))](131504e7eed8b3400c457fbe49beea3b115bc916)
|
||||
- support for [three (two optional) subtitle languages](e543c927cf49c264eaece36640c99d67a99c7da2)
|
||||
- optionally use [random user agent for addic7ed provider](83ace14faf75fbd75313f0ceda9b78161895fbcf) (should not be needed)
|
||||
|
||||
Subliminal.bundle
|
||||
=================
|
||||
|
||||
Reference in New Issue
Block a user