Compare commits

..

4 Commits

Author SHA1 Message Date
Antoine Bertin 60610e2032 Merge branch 'develop'
Conflicts:
	requirements.txt
	setup.py
	subliminal/infos.py
2013-10-29 12:43:20 +01:00
Antoine Bertin 277b046b41 Fix requirements for enzyme 0.3 2013-05-19 15:44:49 +02:00
Antoine Bertin c823eda245 Update NEWS 2013-01-17 21:09:28 +01:00
Antoine Bertin 6340de0ddb Fix requirements due to requests 1.0 2013-01-17 20:49:41 +01:00
17 changed files with 181 additions and 429 deletions
+12 -10
View File
@@ -1,16 +1,6 @@
Changelog
=========
0.7.1
-----
**release date:** 2013-11-06
* Improve CLI
* Add login support for Addic7ed
* Remove lxml dependency
* Many fixes
0.7.0
-----
**release date:** 2013-10-29
@@ -29,6 +19,18 @@ Changelog
* Drop a few providers
* And much more...
0.6.4
-----
**release date:** 2013-05-19
* Fix requirements due to enzyme 0.3
0.6.3
-----
**release date:** 2013-01-17
* Fix requirements due to requests 1.0
0.6.2
-----
**release date:** 2012-09-15
-1
View File
@@ -1,4 +1,3 @@
sympy>=0.7.3
sphinx>=1.1.3
sphinxcontrib-programoutput>=0.8
Sphinx-PyPI-upload>=0.2.1
+1
View File
@@ -4,6 +4,7 @@ Video
.. autodata:: VIDEO_EXTENSIONS
.. autodata:: SUBTITLE_EXTENSIONS
.. autodata:: LANGUAGE_EXTENSIONS
.. autoclass:: Video
:members:
.. autoclass:: Episode
+4 -3
View File
@@ -1,9 +1,10 @@
beautifulsoup4>=4.3.2
guessit>=0.6.1
requests>=2.0.1
enzyme>=0.4.0
requests>=2.0.0
enzyme>=0.3.1
html5lib>=0.99
dogpile.cache>=0.5.1
babelfish>=0.2.1
babelfish>=0.1.5
lxml>=3.2.3
charade>=1.0.3
pysrt>=0.5.0
+1 -1
View File
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
setup(name='subliminal',
version='0.7.1',
version='0.7.0',
license='MIT',
description='Subtitles, faster than your thoughts',
long_description=open('README.rst').read() + '\n\n' + open('HISTORY.rst').read(),
+1 -1
View File
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
__title__ = 'subliminal'
__version__ = '0.7.1'
__version__ = '0.7.0'
__author__ = 'Antoine Bertin'
__license__ = 'MIT'
__copyright__ = 'Copyright 2013 Antoine Bertin'
+29 -55
View File
@@ -4,7 +4,6 @@ import collections
import io
import logging
import operator
import babelfish
import pkg_resources
from .exceptions import ProviderNotAvailable, InvalidSubtitle
from .subtitle import get_subtitle_path
@@ -34,18 +33,17 @@ def list_subtitles(videos, languages, providers=None, provider_configs=None):
provider_configs = provider_configs or {}
subtitles = collections.defaultdict(list)
# filter videos
videos = [v for v in videos if v.subtitle_languages & languages < languages]
videos = [v for v in videos if v.subtitle_languages != languages]
if not videos:
logger.info('No video to download subtitles for with languages %r', languages)
return subtitles
subtitle_languages = set.intersection(*[v.subtitle_languages for v in videos])
for provider_entry_point in pkg_resources.iter_entry_points(PROVIDERS_ENTRY_POINT):
# filter and initialize provider
if providers is not None and provider_entry_point.name not in providers:
logger.debug('Skipping provider %r: not in the list', provider_entry_point.name)
continue
Provider = provider_entry_point.load()
provider_languages = Provider.languages & languages - subtitle_languages
provider_languages = Provider.languages & languages
if not provider_languages:
logger.debug('Skipping provider %r: no language to search for', provider_entry_point.name)
continue
@@ -55,28 +53,20 @@ def list_subtitles(videos, languages, providers=None, provider_configs=None):
continue
# list subtitles with the provider
try:
with Provider(**provider_configs.get(provider_entry_point.name, {})) as provider:
for provider_video in provider_videos:
provider_video_languages = provider_languages - provider_video.subtitle_languages
if not provider_video_languages:
logger.debug('Skipping provider %r: no language to search for for video %r',
provider_entry_point.name, provider_video)
continue
logger.info('Listing subtitles with provider %r for video %r with languages %r',
provider_entry_point.name, provider_video, provider_video_languages)
try:
provider_subtitles = provider.list_subtitles(provider_video, provider_video_languages)
except ProviderNotAvailable:
logger.warning('Provider %r is not available, discarding it', provider_entry_point.name)
break
except:
logger.exception('Unexpected error in provider %r', provider_entry_point.name)
continue
logger.info('Found %d subtitles', len(provider_subtitles))
subtitles[provider_video].extend(provider_subtitles)
except ProviderNotAvailable:
logger.warning('Provider %r is not available, discarding it', provider_entry_point.name)
with Provider(**provider_configs.get(provider_entry_point.name, {})) as provider:
for provider_video in provider_videos:
logger.info('Listing subtitles with provider %r for video %r with languages %r',
provider_entry_point.name, provider_video, provider_languages)
try:
provider_subtitles = provider.list_subtitles(provider_video, provider_languages)
except ProviderNotAvailable:
logger.warning('Provider %r is not available, discarding it', provider_entry_point.name)
break
except:
logger.exception('Unexpected error in provider %r', provider_entry_point.name)
continue
logger.info('Found %d subtitles', len(provider_subtitles))
subtitles[provider_video].extend(provider_subtitles)
return subtitles
@@ -134,19 +124,14 @@ def download_subtitles(subtitles, provider_configs=None, single=False):
except:
logger.exception('Unexpected error in provider %r', subtitle.provider_name)
continue
with io.open(subtitle_path, 'w', encoding='utf-8') as f:
with io.open(subtitle_path, 'w') as f:
f.write(subtitle_text)
downloaded_languages.add(subtitle.language)
if single or downloaded_languages == languages:
break
finally: # terminate providers
for (provider_name, provider) in initialized_providers.items():
try:
provider.terminate()
except ProviderNotAvailable:
logger.warning('Provider %r is not available, unable to terminate', provider_name)
except:
logger.exception('Unexpected error in provider %r', provider_name)
for provider in initialized_providers.values():
provider.terminate()
def download_best_subtitles(videos, languages, providers=None, provider_configs=None, single=False, min_score=0,
@@ -170,20 +155,18 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs=
discarded_providers = set()
downloaded_subtitles = collections.defaultdict(list)
# filter videos
videos = [v for v in videos if v.subtitle_languages & languages < languages
and (not single or babelfish.Language('und') not in v.subtitle_languages)]
videos = [v for v in videos if v.subtitle_languages != languages]
if not videos:
logger.info('No video to download subtitles for with languages %r', languages)
return downloaded_subtitles
# filter and initialize providers
subtitle_languages = set.intersection(*[v.subtitle_languages for v in videos])
initialized_providers = {}
for provider_entry_point in pkg_resources.iter_entry_points(PROVIDERS_ENTRY_POINT):
if providers is not None and provider_entry_point.name not in providers:
logger.debug('Skipping provider %r: not in the list', provider_entry_point.name)
continue
Provider = provider_entry_point.load()
if not Provider.languages & languages - subtitle_languages:
if not Provider.languages & languages:
logger.debug('Skipping provider %r: no language to search for', provider_entry_point.name)
continue
if not [v for v in videos if Provider.check(v)]:
@@ -205,15 +188,11 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs=
if provider_name in discarded_providers:
logger.debug('Skipping discarded provider %r', provider_name)
continue
provider_video_languages = provider.languages & languages - video.subtitle_languages
if not provider_video_languages:
logger.debug('Skipping provider %r: no language to search for for video %r', provider_name,
video)
continue
provider_languages = provider.languages & languages
logger.info('Listing subtitles with provider %r for video %r with languages %r',
provider_name, video, provider_video_languages)
provider_name, video, provider_languages)
try:
provider_subtitles = provider.list_subtitles(video, provider_video_languages)
provider_subtitles = provider.list_subtitles(video, provider_languages)
except ProviderNotAvailable:
logger.warning('Provider %r is not available, discarding it', provider_name)
discarded_providers.add(provider_name)
@@ -225,7 +204,7 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs=
subtitles.extend(provider_subtitles)
# find the best subtitles and download them
downloaded_languages = video.subtitle_languages.copy()
downloaded_languages = set()
for subtitle, score in sorted([(s, s.compute_score(video)) for s in subtitles],
key=operator.itemgetter(1), reverse=True):
# filter
@@ -259,18 +238,13 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs=
except:
logger.exception('Unexpected error in provider %r', subtitle.provider_name)
continue
with io.open(subtitle_path, 'w', encoding='utf-8') as f:
with io.open(subtitle_path, 'w') as f:
f.write(subtitle_text)
downloaded_languages.add(subtitle.language)
if single or downloaded_languages >= languages:
if single or downloaded_languages == languages:
logger.debug('All languages downloaded')
break
finally: # terminate providers
for (provider_name, provider) in initialized_providers.items():
try:
provider.terminate()
except ProviderNotAvailable:
logger.warning('Provider %r is not available, unable to terminate', provider_name)
except:
logger.exception('Unexpected error in provider %r', provider_name)
for provider in initialized_providers.values():
provider.terminate()
return downloaded_subtitles
+31 -106
View File
@@ -11,71 +11,36 @@ import guessit
import pkg_resources
from subliminal import (__version__, PROVIDERS_ENTRY_POINT, cache_region, Video, Episode, Movie, scan_videos,
download_best_subtitles)
try:
import colorlog
except ImportError:
colorlog = None
DEFAULT_CACHE_FILE = os.path.join('~', '.config', 'subliminal.cache.dbm')
def subliminal_parser():
parser = argparse.ArgumentParser(description='Subtitles, faster than your thoughts')
parser.add_argument('-l', '--languages', nargs='+', metavar='LANGUAGE', help='wanted languages as alpha2 code (ISO-639-1)')
parser.add_argument('-p', '--providers', nargs='+', metavar='PROVIDER', help='providers to use from %s (default: all)' % ', '.join(ep.name for ep in pkg_resources.iter_entry_points(PROVIDERS_ENTRY_POINT)))
parser.add_argument('-m', '--min-score', type=int, help='minimum score for subtitles. 0-%d for episodes, 0-%d for movies' % (Episode.scores['hash'], Movie.scores['hash']))
parser.add_argument('-s', '--single', action='store_true', help='download without language code in subtitle\'s filename i.e. .srt only')
parser.add_argument('-f', '--force', action='store_true', help='overwrite existing subtitles')
parser.add_argument('-c', '--cache-file', default=DEFAULT_CACHE_FILE, help='cache file (default: %(default)s)')
parser.add_argument('-a', '--age', help='download subtitles for videos newer than AGE e.g. 12h, 1w2d')
parser.add_argument('--hearing-impaired', action='store_true', help='download hearing impaired subtitles')
group_verbosity = parser.add_mutually_exclusive_group()
group_verbosity.add_argument('-q', '--quiet', action='store_true', help='disable output')
group_verbosity.add_argument('-v', '--verbose', action='store_true', help='verbose output')
parser.add_argument('--version', action='version', version=__version__)
parser.add_argument('paths', nargs='+', metavar='PATH', help='path to video file or folder')
return parser
def subliminal():
parser = argparse.ArgumentParser(prog='subliminal', description='Subtitles, faster than your thoughts',
epilog='Suggestions and bug reports are greatly appreciated: '
'https://github.com/Diaoul/subliminal/issues', add_help=False)
# required arguments
required_arguments_group = parser.add_argument_group('required arguments')
required_arguments_group.add_argument('paths', nargs='+', metavar='PATH', help='path to video file or folder')
required_arguments_group.add_argument('-l', '--languages', nargs='+', required=True, metavar='LANGUAGE',
help='wanted languages as alpha2 code (ISO-639-1)')
# configuration
configuration_group = parser.add_argument_group('configuration')
configuration_group.add_argument('-s', '--single', action='store_true',
help='download without language code in subtitle\'s filename i.e. .srt only')
configuration_group.add_argument('-c', '--cache-file', default=DEFAULT_CACHE_FILE,
help='cache file (default: %(default)s)')
# filtering
filtering_group = parser.add_argument_group('filtering')
providers = [ep.name for ep in pkg_resources.iter_entry_points(PROVIDERS_ENTRY_POINT)]
filtering_group.add_argument('-p', '--providers', nargs='+', metavar='PROVIDER',
help='providers to use (%s)' % ', '.join(providers))
filtering_group.add_argument('-m', '--min-score', type=int,
help='minimum score for subtitles (0-%d for episodes, 0-%d for movies)'
% (Episode.scores['hash'], Movie.scores['hash']))
filtering_group.add_argument('-a', '--age', help='download subtitles for videos newer than AGE e.g. 12h, 1w2d')
filtering_group.add_argument('-h', '--hearing-impaired', action='store_true',
help='download hearing impaired subtitles')
filtering_group.add_argument('-f', '--force', action='store_true',
help='force subtitle download for videos with existing subtitles')
# addic7ed
addic7ed_group = parser.add_argument_group('addic7ed')
addic7ed_group.add_argument('--addic7ed-username', metavar='USERNAME', help='username for addic7ed provider')
addic7ed_group.add_argument('--addic7ed-password', metavar='PASSWORD', help='password for addic7ed provider')
# output
output_group = parser.add_argument_group('output')
output_exclusive_group = output_group.add_mutually_exclusive_group()
output_exclusive_group.add_argument('-q', '--quiet', action='store_true', help='disable output')
output_exclusive_group.add_argument('-v', '--verbose', action='store_true', help='verbose output')
output_group.add_argument('--color', action='store_true', help='add color to console output (requires colorlog)')
# troubleshooting
troubleshooting_group = parser.add_argument_group('troubleshooting')
troubleshooting_group.add_argument('--debug', action='store_true', help='debug output')
troubleshooting_group.add_argument('--version', action='version', version=__version__)
troubleshooting_group.add_argument('--help', action='help', help='show this help message and exit')
# parse args
parser = subliminal_parser()
args = parser.parse_args()
# parse paths
try:
args.paths = [os.path.abspath(os.path.expanduser(p.decode('utf-8'))) for p in args.paths]
args.paths = [p.decode('utf-8') for p in args.paths]
except UnicodeDecodeError:
parser.error('argument paths: encodings is not utf-8: %r' % args.paths)
@@ -90,70 +55,30 @@ def subliminal():
match = re.match(r'^(?:(?P<weeks>\d+?)w)?(?:(?P<days>\d+?)d)?(?:(?P<hours>\d+?)h)?$', args.age)
if not match:
parser.error('argument -a/--age: invalid age: %r' % args.age)
args.age = datetime.timedelta(**{k: int(v) for k, v in match.groupdict(0).items()})
args.age = datetime.timedelta(**match.groupdict())
# parse cache-file
args.cache_file = os.path.abspath(os.path.expanduser(args.cache_file))
if not os.path.exists(os.path.split(args.cache_file)[0]):
parser.error('argument -c/--cache-file: directory %r for cache file does not exist'
% os.path.split(args.cache_file)[0])
# parse provider configs
provider_configs = {}
if (args.addic7ed_username is not None and args.addic7ed_password is None
or args.addic7ed_username is None and args.addic7ed_password is not None):
parser.error('argument --addic7ed-username/--addic7ed-password: both arguments are required or none')
if args.addic7ed_username is not None and args.addic7ed_password is not None:
provider_configs['addic7ed'] = {'username': args.addic7ed_username, 'password': args.addic7ed_password}
# parse color
if args.color and colorlog is None:
parser.error('argument --color: colorlog required')
# setup output
if args.debug:
handler = logging.StreamHandler()
if args.color:
handler.setFormatter(colorlog.ColoredFormatter('%(log_color)s%(levelname)-8s%(reset)s [%(blue)s%(name)s-%(funcName)s:%(lineno)d%(reset)s] %(message)s',
log_colors=dict(colorlog.default_log_colors.items() + [('DEBUG', 'cyan')])))
else:
handler.setFormatter(logging.Formatter('%(levelname)-8s [%(name)s-%(funcName)s:%(lineno)d] %(message)s'))
logging.getLogger().addHandler(handler)
logging.getLogger().setLevel(logging.DEBUG)
elif args.verbose:
handler = logging.StreamHandler()
if args.color:
handler.setFormatter(colorlog.ColoredFormatter('%(log_color)s%(levelname)-8s%(reset)s [%(blue)s%(name)s%(reset)s] %(message)s'))
else:
handler.setFormatter(logging.Formatter('%(levelname)-8s [%(name)s] %(message)s'))
logging.getLogger('subliminal').addHandler(handler)
logging.getLogger('subliminal').setLevel(logging.INFO)
# setup verbosity
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
elif not args.quiet:
handler = logging.StreamHandler()
if args.color:
handler.setFormatter(colorlog.ColoredFormatter('[%(log_color)s%(levelname)s%(reset)s] %(message)s'))
else:
handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
logging.getLogger('subliminal.api').addHandler(handler)
logging.getLogger('subliminal.api').setLevel(logging.INFO)
logging.basicConfig(level=logging.WARN)
# configure cache
cache_region.configure('dogpile.cache.dbm', arguments={'filename': args.cache_file})
cache_region.configure('dogpile.cache.dbm', arguments={'filename': os.path.expanduser(args.cache_file)})
# scan videos
videos = scan_videos([p for p in args.paths if os.path.exists(p)], subtitles=not args.force,
embedded_subtitles=not args.force, age=args.age)
videos = scan_videos([p for p in args.paths if os.path.exists(p)], subtitles=not args.force, age=args.age)
# guess videos
videos.extend([Video.fromguess(os.path.split(p)[1], guessit.guess_file_info(p, 'autodetect')) for p in args.paths
if not os.path.exists(p)])
# download best subtitles
subtitles = download_best_subtitles(videos, args.languages, providers=args.providers,
provider_configs=provider_configs, single=args.single,
min_score=args.min_score, hearing_impaired=args.hearing_impaired)
subtitles = download_best_subtitles(videos, args.languages, providers=args.providers, provider_configs=None,
single=args.single, min_score=args.min_score,
hearing_impaired=args.hearing_impaired)
# result output
# output result
if not subtitles:
if not args.quiet:
sys.stderr.write('No subtitles downloaded\n')
+3 -34
View File
@@ -8,7 +8,7 @@ import requests
from . import Provider
from .. import __version__
from ..cache import region
from ..exceptions import ProviderConfigurationError, ProviderNotAvailable, InvalidSubtitle
from ..exceptions import InvalidSubtitle, ProviderNotAvailable
from ..subtitle import Subtitle, is_valid_subtitle
from ..video import Episode
@@ -58,43 +58,14 @@ class Addic7edProvider(Provider):
'fin', 'fra', 'glg', 'heb', 'hrv', 'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa',
'nld', 'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tha',
'tur', 'ukr', 'vie', 'zho']}
video_types = (Episode,)
videos = (Episode,)
server = 'http://www.addic7ed.com'
def __init__(self, username=None, password=None):
if username is not None and password is None or username is None and password is not None:
raise ProviderConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
def initialize(self):
self.session = requests.Session()
self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__}
# login
if self.username is not None and self.password is not None:
logger.debug('Logging in')
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
try:
r = self.session.post(self.server + '/dologin.php', data, timeout=10, allow_redirects=False)
except requests.Timeout:
raise ProviderNotAvailable('Timeout after 10 seconds')
if r.status_code == 302:
logger.info('Logged in')
self.logged_in = True
else:
logger.error('Failed to login')
def terminate(self):
# logout
if self.logged_in:
try:
r = self.session.get(self.server + '/logout.php', timeout=10)
logger.info('Logged out')
except requests.Timeout:
raise ProviderNotAvailable('Timeout after 10 seconds')
if r.status_code != 200:
raise ProviderNotAvailable('Request failed with status code %d' % r.status_code)
self.session.close()
def get(self, url, params=None):
@@ -183,9 +154,7 @@ class Addic7edProvider(Provider):
raise ProviderNotAvailable('Timeout after 10 seconds')
if r.status_code != 200:
raise ProviderNotAvailable('Request failed with status code %d' % r.status_code)
if r.headers['Content-Type'] == 'text/html':
raise ProviderNotAvailable('Download limit exceeded')
subtitle_text = r.content.decode(charade.detect(r.content)['encoding'], 'replace')
subtitle_text = r.content.decode(charade.detect(r.content)['encoding'])
if not is_valid_subtitle(subtitle_text):
raise InvalidSubtitle
return subtitle_text
+12 -12
View File
@@ -3,10 +3,10 @@ from __future__ import unicode_literals
import logging
import urllib
import babelfish
import bs4
import charade
import guessit
import requests
import xml.etree.ElementTree
from . import Provider
from .. import __version__
from ..cache import region
@@ -65,7 +65,7 @@ class BierDopjeProvider(Provider):
:param string url: API part of the URL to reach without the leading slash
:param \*\*params: format specs for the `url`
:return: the response
:rtype: :class:`xml.etree.ElementTree.Element`
:rtype: :class:`bs4.BeautifulSoup`
:raise: :class:`~subliminal.exceptions.ProviderNotAvailable`
"""
@@ -77,7 +77,7 @@ class BierDopjeProvider(Provider):
raise ProviderNotAvailable('Too Many Requests')
elif r.status_code != 200:
raise ProviderError('Request failed with status code %d' % r.status_code)
return xml.etree.ElementTree.fromstring(r.content)
return bs4.BeautifulSoup(r.content, ['xml'])
@region.cache_on_arguments()
def find_show_id(self, series):
@@ -89,11 +89,11 @@ class BierDopjeProvider(Provider):
"""
logger.debug('Searching for series %r', series)
root = self.get('FindShowByName/{series}', series=urllib.quote(series))
if root.find('response/status').text == 'false':
soup = self.get('FindShowByName/{series}', series=urllib.quote(series))
if soup.status.contents[0] == 'false':
logger.info('Series %r not found', series)
return None
return int(root.find('response/results/result[1]/showid').text)
return int(soup.showid.contents[0])
def query(self, language, season, episode, tvdb_id=None, series=None):
params = {'language': language.alpha2, 'season': season, 'episode': episode}
@@ -109,13 +109,13 @@ class BierDopjeProvider(Provider):
else:
raise ValueError('Missing parameter tvdb_id or series')
logger.debug('Searching subtitles %r', params)
root = self.get('GetAllSubsFor/{showid}/{season}/{episode}/{language}/{istvdbid}', **params)
if root.find('response/status').text == 'false':
soup = self.get('GetAllSubsFor/{showid}/{season}/{episode}/{language}/{istvdbid}', **params)
if soup.status.contents[0] == 'false':
logger.debug('No subtitle found')
return []
logger.debug('Found subtitles %r', root.find('response/results'))
return [BierDopjeSubtitle(language, season, episode, tvdb_id, series, result.find('filename').text,
result.find('downloadlink').text) for result in root.find('response/results')]
logger.debug('Found subtitles %r', soup.results('result'))
return [BierDopjeSubtitle(language, season, episode, tvdb_id, series, result.filename.contents[0],
result.downloadlink.contents[0]) for result in soup.results('result')]
def list_subtitles(self, video, languages):
subtitles = []
@@ -132,7 +132,7 @@ class BierDopjeProvider(Provider):
raise ProviderNotAvailable('Too Many Requests')
elif r.status_code != 200:
raise ProviderError('Request failed with status code %d' % r.status_code)
subtitle_text = r.content.decode(charade.detect(r.content)['encoding'], 'replace')
subtitle_text = r.content.decode(charade.detect(r.content)['encoding'])
if not is_valid_subtitle(subtitle_text):
raise InvalidSubtitle
return subtitle_text
+1 -1
View File
@@ -153,7 +153,7 @@ class OpenSubtitlesProvider(Provider):
if not response['data']:
raise ProviderError('Nothing to download')
subtitle_bytes = zlib.decompress(base64.b64decode(response['data'][0]['data']), 47)
subtitle_text = subtitle_bytes.decode(charade.detect(subtitle_bytes)['encoding'], 'replace')
subtitle_text = subtitle_bytes.decode(charade.detect(subtitle_bytes)['encoding'])
if not is_valid_subtitle(subtitle_text):
raise InvalidSubtitle
return subtitle_text
+3 -1
View File
@@ -8,6 +8,7 @@ from . import Provider
from .. import __version__
from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError
from ..subtitle import Subtitle, is_valid_subtitle
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
@@ -30,6 +31,7 @@ class TheSubDBSubtitle(Subtitle):
class TheSubDBProvider(Provider):
languages = {babelfish.Language.fromalpha2(l) for l in ['en', 'es', 'fr', 'it', 'nl', 'pl', 'pt', 'ro', 'sv', 'tr']}
video_types = (Episode, Movie)
required_hash = 'thesubdb'
def initialize(self):
@@ -75,7 +77,7 @@ class TheSubDBProvider(Provider):
r = self.get(params)
if r.status_code != 200:
raise ProviderError('Request failed with status code %d' % r.status_code)
subtitle_text = r.content.decode(charade.detect(r.content)['encoding'], 'replace')
subtitle_text = r.content.decode(charade.detect(r.content)['encoding'])
if not is_valid_subtitle(subtitle_text):
raise InvalidSubtitle
return subtitle_text
+2 -2
View File
@@ -59,7 +59,7 @@ class TVsubtitlesProvider(Provider):
languages = {babelfish.Language('por', 'BR')} | {babelfish.Language(l)
for l in ['ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra', 'hun', 'ita', 'jpn', 'kor',
'nld', 'pol', 'por', 'ron', 'rus', 'spa', 'swe', 'tur', 'ukr', 'zho']}
video_types = (Episode,)
videos = (Episode,)
server = 'http://www.tvsubtitles.net'
episode_id_re = re.compile('^episode-(\d+)\.html$')
subtitle_re = re.compile('^\/subtitle-(\d+)\.html$')
@@ -160,7 +160,7 @@ class TVsubtitlesProvider(Provider):
if len(zf.namelist()) > 1:
raise ProviderError('More than one file to unzip')
subtitle_bytes = zf.read(zf.namelist()[0])
subtitle_text = subtitle_bytes.decode(charade.detect(subtitle_bytes)['encoding'], 'replace')
subtitle_text = subtitle_bytes.decode(charade.detect(subtitle_bytes)['encoding'])
if not is_valid_subtitle(subtitle_text):
raise InvalidSubtitle
return subtitle_text
+1 -1
View File
@@ -90,7 +90,7 @@ def get_subtitle_path(video_path, language=None):
if language is not None:
try:
return subtitle_path + '.%s.%s' % (language.alpha2, 'srt')
except babelfish.ConvertError:
except babelfish.NoConversionError:
return subtitle_path + '.%s.%s' % (language.alpha3, 'srt')
return subtitle_path + '.srt'
+13 -25
View File
@@ -93,16 +93,6 @@ class Addic7edProviderTestCase(ProviderTestCase):
class BierDopjeProviderTestCase(ProviderTestCase):
provider_name = 'bierdopje'
def test_find_show_id(self):
with self.Provider() as provider:
show_id = provider.find_show_id('The Big Bang')
self.assertTrue(show_id == 9203)
def test_find_show_id_error(self):
with self.Provider() as provider:
show_id = provider.find_show_id('the big how i met your mother')
self.assertTrue(show_id is None)
def test_query_episode_0(self):
video = EPISODES[0]
language = Language('eng')
@@ -229,11 +219,10 @@ class OpenSubtitlesProviderTestCase(ProviderTestCase):
def test_query_movie_0_hash(self):
video = MOVIES[0]
languages = {Language('eng')}
matches = {frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'imdb_id']),
frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'release_group', 'imdb_id']),
frozenset(['year', 'video_codec', 'imdb_id', 'hash', 'title']),
frozenset(['year', 'resolution', 'imdb_id', 'hash', 'title']),
frozenset(['year', 'imdb_id', 'hash', 'title'])}
matches = {frozenset(['imdb_id', 'title', 'hash', 'year']),
frozenset(['imdb_id', 'hash', 'title', 'year', 'video_codec', 'resolution']),
frozenset(['imdb_id', 'video_codec', 'hash', 'title', 'year']),
frozenset(['imdb_id', 'hash', 'title', 'year', 'video_codec', 'resolution', 'release_group'])}
with self.Provider() as provider:
subtitles = provider.query(languages, hash=video.hashes['opensubtitles'], size=video.size)
self.assertTrue({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles} == matches)
@@ -254,16 +243,15 @@ class OpenSubtitlesProviderTestCase(ProviderTestCase):
def test_list_subtitles(self):
video = MOVIES[0]
languages = {Language('eng'), Language('fra')}
matches = {frozenset(['title', 'video_codec', 'year', 'resolution', 'release_group', 'imdb_id']),
frozenset(['imdb_id', 'year', 'title']),
frozenset(['year', 'video_codec', 'imdb_id', 'resolution', 'title']),
frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'release_group', 'imdb_id']),
frozenset(['year', 'video_codec', 'imdb_id', 'hash', 'title']),
frozenset(['year', 'resolution', 'imdb_id', 'hash', 'title']),
frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'imdb_id']),
frozenset(['year', 'imdb_id', 'hash', 'title']),
frozenset(['video_codec', 'imdb_id', 'year', 'title']),
frozenset(['year', 'imdb_id', 'resolution', 'title'])}
matches = {frozenset(['imdb_id', 'title', 'hash', 'year']),
frozenset(['imdb_id', 'resolution', 'title', 'year']),
frozenset(['imdb_id', 'title', 'year']),
frozenset(['imdb_id', 'video_codec', 'title', 'year']),
frozenset(['imdb_id', 'resolution', 'title', 'video_codec', 'year']),
frozenset(['imdb_id', 'hash', 'title', 'year', 'video_codec', 'resolution', 'release_group']),
frozenset(['imdb_id', 'video_codec', 'hash', 'title', 'year']),
frozenset(['imdb_id', 'title', 'year', 'video_codec', 'resolution', 'release_group']),
frozenset(['imdb_id', 'hash', 'title', 'year', 'video_codec', 'resolution'])}
with self.Provider() as provider:
subtitles = provider.list_subtitles(video, languages)
self.assertTrue({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles} == matches)
+1 -64
View File
@@ -5,7 +5,7 @@ import os
import shutil
from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
from babelfish import Language
from subliminal import list_subtitles, download_subtitles, download_best_subtitles, scan_video, scan_videos
from subliminal import list_subtitles, download_subtitles, download_best_subtitles
from subliminal.tests.common import MOVIES, EPISODES
@@ -99,72 +99,9 @@ class ApiTestCase(TestCase):
self.assertTrue(subtitles[videos[0]][0].hearing_impaired == True)
class VideoTestCase(TestCase):
def setUp(self):
os.mkdir(TEST_DIR)
for video in MOVIES + EPISODES:
open(os.path.join(TEST_DIR, os.path.split(video.name)[1]), 'w').close()
def tearDown(self):
shutil.rmtree(TEST_DIR)
def test_scan_video_movie(self):
video = MOVIES[0]
scanned_video = scan_video(os.path.join(TEST_DIR, os.path.split(video.name)[1]))
self.assertTrue(scanned_video.name == os.path.join(TEST_DIR, os.path.split(video.name)[1]))
self.assertTrue(scanned_video.title.lower() == video.title.lower())
self.assertTrue(scanned_video.year == video.year)
self.assertTrue(scanned_video.video_codec == video.video_codec)
self.assertTrue(scanned_video.resolution == video.resolution)
self.assertTrue(scanned_video.release_group == video.release_group)
self.assertTrue(scanned_video.subtitle_languages == set())
self.assertTrue(scanned_video.hashes == {})
self.assertTrue(scanned_video.audio_codec is None)
self.assertTrue(scanned_video.imdb_id is None)
self.assertTrue(scanned_video.size == 0)
def test_scan_video_episode(self):
video = EPISODES[0]
scanned_video = scan_video(os.path.join(TEST_DIR, os.path.split(video.name)[1]))
self.assertTrue(scanned_video.name == os.path.join(TEST_DIR, os.path.split(video.name)[1]))
self.assertTrue(scanned_video.series == video.series)
self.assertTrue(scanned_video.season == video.season)
self.assertTrue(scanned_video.episode == video.episode)
self.assertTrue(scanned_video.video_codec == video.video_codec)
self.assertTrue(scanned_video.resolution == video.resolution)
self.assertTrue(scanned_video.release_group == video.release_group)
self.assertTrue(scanned_video.subtitle_languages == set())
self.assertTrue(scanned_video.hashes == {})
self.assertTrue(scanned_video.title is None)
self.assertTrue(scanned_video.tvdb_id is None)
self.assertTrue(scanned_video.imdb_id is None)
self.assertTrue(scanned_video.audio_codec is None)
self.assertTrue(scanned_video.size == 0)
def test_scan_video_subtitle_language_und(self):
video = EPISODES[0]
open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.srt', 'w').close()
scanned_video = scan_video(os.path.join(TEST_DIR, os.path.split(video.name)[1]))
self.assertTrue(scanned_video.subtitle_languages == {Language('und')})
def test_scan_video_subtitles_language_eng(self):
video = EPISODES[0]
open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.en.srt', 'w').close()
scanned_video = scan_video(os.path.join(TEST_DIR, os.path.split(video.name)[1]))
self.assertTrue(scanned_video.subtitle_languages == {Language('eng')})
def test_scan_video_subtitles_languages(self):
video = EPISODES[0]
open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.en.srt', 'w').close()
open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.fr.srt', 'w').close()
open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.srt', 'w').close()
scanned_video = scan_video(os.path.join(TEST_DIR, os.path.split(video.name)[1]))
self.assertTrue(scanned_video.subtitle_languages == {Language('eng'), Language('fra'), Language('und')})
def suite():
suite = TestSuite()
suite.addTest(TestLoader().loadTestsFromTestCase(ApiTestCase))
suite.addTest(TestLoader().loadTestsFromTestCase(VideoTestCase))
return suite
+66 -112
View File
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import datetime
import hashlib
import logging
import os
@@ -24,6 +23,9 @@ VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.
#: Subtitle extensions
SUBTITLE_EXTENSIONS = ('.srt', '.sub', '.smi', '.txt', '.ssa', '.ass', '.mpl')
#: Language extensions
LANGUAGE_EXTENSIONS = tuple('.' + c for c in babelfish.CONVERTERS['alpha2'].codes)
class Video(object):
"""Base class for videos
@@ -156,15 +158,10 @@ def scan_subtitle_languages(path):
:rtype: set
"""
language_extensions = tuple('.' + c for c in babelfish.CONVERTERS['alpha2'].codes)
dirpath, filename = os.path.split(path)
subtitles = set()
for p in os.listdir(dirpath):
if not isinstance(p, bytes) and p.startswith(os.path.splitext(filename)[0]) and p.endswith(SUBTITLE_EXTENSIONS):
if os.path.splitext(p)[0].endswith(language_extensions):
subtitles.add(babelfish.Language.fromalpha2(os.path.splitext(p)[0][-2:]))
else:
subtitles.add(babelfish.Language('und'))
subtitles = {babelfish.Language.fromalpha2(os.path.splitext(p)[0][-2:]) for p in os.listdir(dirpath)
if not isinstance(p, bytes) and p.startswith(os.path.splitext(filename)[0])
and os.path.splitext(p)[0].endswith(LANGUAGE_EXTENSIONS)}
logger.debug('Found subtitles %r', subtitles)
return subtitles
@@ -172,7 +169,7 @@ def scan_subtitle_languages(path):
def scan_video(path, subtitles=True, embedded_subtitles=True):
"""Scan a video and its subtitle languages from a video `path`
:param string path: absolute path to the video
:param string path: path to the video
:param bool subtitles: scan for subtitles with the same name
:param bool embedded_subtitles: scan for embedded subtitles
:return: the scanned video
@@ -182,80 +179,63 @@ def scan_video(path, subtitles=True, embedded_subtitles=True):
"""
dirpath, filename = os.path.split(path)
logger.info('Scanning video %r in %r', filename, dirpath)
video = Video.fromguess(path, guessit.guess_file_info(path, 'autodetect'))
video = Video.fromguess(path, guessit.guess_file_info(filename, 'autodetect'))
# mkv container
if filename.endswith('.mkv'):
with open(path, 'rb') as f:
mkv = enzyme.MKV(f)
video_track = mkv.video_tracks[0]
audio_track = mkv.audio_tracks[0]
# resolution
if video_track.height in (480, 720, 1080):
if video_track.interlaced:
video.resolution = '%di' % video_track.height
logger.debug('Found resolution %s with enzyme', video.resolution)
else:
video.resolution = '%dp' % video_track.height
logger.debug('Found resolution %s with enzyme', video.resolution)
# video codec
if video_track.codec_id == 'V_MPEG4/ISO/AVC':
video.video_codec = 'h264'
logger.debug('Found video_codec %s with enzyme', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/SP':
video.video_codec = 'DivX'
logger.debug('Found video_codec %s with enzyme', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
video.video_codec = 'XviD'
logger.debug('Found video_codec %s with enzyme', video.video_codec)
# audio codec
if audio_track.codec_id == 'A_AC3':
video.audio_codec = 'AC3'
logger.debug('Found audio_codec %s with enzyme', video.audio_codec)
elif audio_track.codec_id == 'A_DTS':
video.audio_codec = 'DTS'
logger.debug('Found audio_codec %s with enzyme', video.audio_codec)
elif audio_track.codec_id == 'A_AAC':
video.audio_codec = 'AAC'
logger.debug('Found audio_codec %s with enzyme', video.audio_codec)
# embedded subtitles
if embedded_subtitles:
embedded_subtitle_languages = {babelfish.Language.fromalpha3b(st.language) for st in
mkv.subtitle_tracks if st.language != 'und'}
if embedded_subtitle_languages:
logger.debug('Found embedded subtitle %r with enzyme', embedded_subtitle_languages)
video.subtitle_languages |= embedded_subtitle_languages
video.size = os.path.getsize(path)
if video.size > 10485760:
logger.debug('Size is %d', video.size)
video.hashes['opensubtitles'] = hash_opensubtitles(path)
video.hashes['thesubdb'] = hash_thesubdb(path)
logger.debug('Computed hashes %r', video.hashes)
else:
logger.warning('Size is lower than 10MB: hashes not computed')
logger.debug('Size is %d', video.size)
video.hashes['opensubtitles'] = hash_opensubtitles(path)
video.hashes['thesubdb'] = hash_thesubdb(path)
logger.debug('Computed hashes %r', video.hashes)
# add subtitles
if subtitles:
video.subtitle_languages |= scan_subtitle_languages(path)
# enzyme
try:
if filename.endswith('.mkv'):
with open(path, 'rb') as f:
mkv = enzyme.MKV(f)
if mkv.video_tracks:
video_track = mkv.video_tracks[0]
# resolution
if video_track.height in (480, 720, 1080):
if video_track.interlaced:
video.resolution = '%di' % video_track.height
logger.debug('Found resolution %s with enzyme', video.resolution)
else:
video.resolution = '%dp' % video_track.height
logger.debug('Found resolution %s with enzyme', video.resolution)
# video codec
if video_track.codec_id == 'V_MPEG4/ISO/AVC':
video.video_codec = 'h264'
logger.debug('Found video_codec %s with enzyme', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/SP':
video.video_codec = 'DivX'
logger.debug('Found video_codec %s with enzyme', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
video.video_codec = 'XviD'
logger.debug('Found video_codec %s with enzyme', video.video_codec)
else:
logger.warning('MKV has no video track')
if mkv.audio_tracks:
audio_track = mkv.audio_tracks[0]
# audio codec
if audio_track.codec_id == 'A_AC3':
video.audio_codec = 'AC3'
logger.debug('Found audio_codec %s with enzyme', video.audio_codec)
elif audio_track.codec_id == 'A_DTS':
video.audio_codec = 'DTS'
logger.debug('Found audio_codec %s with enzyme', video.audio_codec)
elif audio_track.codec_id == 'A_AAC':
video.audio_codec = 'AAC'
logger.debug('Found audio_codec %s with enzyme', video.audio_codec)
else:
logger.warning('MKV has no audio track')
if mkv.subtitle_tracks:
# embedded subtitles
if embedded_subtitles:
embedded_subtitle_languages = set()
for st in mkv.subtitle_tracks:
try:
embedded_subtitle_languages.add(babelfish.Language.fromalpha3b(st.language or 'und'))
except babelfish.Error:
logger.error('Embedded subtitle language %r is not a valid language', st.language)
logger.debug('Found embedded subtitle %r with enzyme', embedded_subtitle_languages)
video.subtitle_languages |= embedded_subtitle_languages
else:
logger.info('MKV has no subtitle track')
except enzyme.Error:
logger.error('Parsing video metadata with enzyme failed')
return video
def scan_videos(paths, subtitles=True, embedded_subtitles=True, age=None):
"""Scan `paths` for videos and their subtitle languages
:params paths: absolute paths to scan for videos
:params paths: paths to scan for videos
:type paths: list of string
:param bool subtitles: scan for subtitles with the same name
:param bool embedded_subtitles: scan for embedded subtitles
@@ -268,57 +248,31 @@ def scan_videos(paths, subtitles=True, embedded_subtitles=True, age=None):
videos = []
# scan files
for filepath in [p for p in paths if os.path.isfile(p)]:
if age and datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(filepath)) > age:
logger.info('Skipping video %r: older than %r', filepath, age)
continue
try:
videos.append(scan_video(filepath, subtitles, embedded_subtitles))
videos.append(scan_video(filepath, subtitles))
except ValueError as e:
logger.error('Skipping video: %s', e)
logger.info('Skipping video: %s', e)
continue
# scan directories
for path in [p for p in paths if os.path.isdir(p)]:
logger.info('Scanning directory %r', path)
for dirpath, dirnames, filenames in os.walk(path):
# skip badly encoded directories
for dirpath, _, filenames in os.walk(path):
# skip badly encoded directories and files
if isinstance(dirpath, bytes):
logger.error('Skipping badly encoded directory %r', dirpath.decode('utf-8', errors='replace'))
continue
# skip badly encoded and hidden sub directories
for dirname in list(dirnames):
if isinstance(dirname, bytes):
logger.error('Skipping badly encoded dirname %r in %r', dirname.decode('utf-8', errors='replace'),
dirpath)
dirnames.remove(dirname)
elif dirname.startswith('.'):
logger.debug('Skipping hidden dirname %r in %r', dirname, dirpath)
dirnames.remove(dirname)
# scan for videos
safe_filenames = []
for filename in filenames:
# skip badly encoded files
if isinstance(filename, bytes):
logger.error('Skipping badly encoded filename %r in %r', filename.decode('utf-8', errors='replace'),
dirpath)
continue
# filter videos
if not filename.endswith(VIDEO_EXTENSIONS):
continue
# skip hidden files
if filename.startswith('.'):
logger.debug('Skipping hidden filename %r in %r', filename, dirpath)
continue
filepath = os.path.join(dirpath, filename)
# skip links
if os.path.islink(filepath):
logger.debug('Skipping link %r in %r', filename, dirpath)
continue
if age and datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(filepath)) > age:
logger.info('Skipping video %r: older than %r', filepath, age)
logger.error('Skipping badly encoded filename %r', filename.decode('utf-8', errors='replace'))
continue
safe_filenames.append(filename)
# scan for videos
for video_filename in [f for f in safe_filenames if f.endswith(VIDEO_EXTENSIONS)]:
try:
video = scan_video(filepath, subtitles, embedded_subtitles)
video = scan_video(os.path.join(dirpath, video_filename), subtitles=subtitles)
except ValueError as e:
logger.error('Skipping video: %s', e)
logger.info('Skipping video: %s', e)
continue
videos.append(video)
return videos