Compare commits
188 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bed93bf928 | |||
| 7697ceffef | |||
| 81dd24a9bd | |||
| 729d7d97c4 | |||
| c7a4b3c0a4 | |||
| 3da044ada9 | |||
| 44bbc93dae | |||
| 54341a0afc | |||
| 599eab3e5b | |||
| 9f9c875234 | |||
| 74c0ed80c5 | |||
| 5ecb7aea5e | |||
| 829eacc4d6 | |||
| f7b3f924b4 | |||
| e247bc0e59 | |||
| 4158416183 | |||
| cf1181f2af | |||
| a2d1335403 | |||
| 520cbb5189 | |||
| e8eeadb094 | |||
| 92a2336dba | |||
| cbc75c8b85 | |||
| 563973163e | |||
| e147a7a0ca | |||
| b494dc7bec | |||
| 9ce4b02610 | |||
| d0ff69d224 | |||
| cde09e0f56 | |||
| 84409395d1 | |||
| e4e6bcfad2 | |||
| 2103215e41 | |||
| d086569f09 | |||
| 28064767ea | |||
| e996e4d4b6 | |||
| 422100f9fc | |||
| c9a7ffd778 | |||
| db009abf79 | |||
| c1cc7c98ef | |||
| a08b00d5c4 | |||
| 16a22ab7b2 | |||
| da32ee2504 | |||
| 54eaa9e695 | |||
| 28c1481a48 | |||
| cac340ad43 | |||
| d6994d9a60 | |||
| 90372ad30d | |||
| 24fc22dbe6 | |||
| 7b7adac774 | |||
| 7f0ff6ae2f | |||
| 1b3e58b326 | |||
| dc47fc60b8 | |||
| 6c588964a7 | |||
| f65b24094a | |||
| 6b807be0e6 | |||
| a794eb8310 | |||
| 8290c8a371 | |||
| 475152a7eb | |||
| 4e75e20ede | |||
| d36823c7ca | |||
| 2a6b387112 | |||
| a83822bff9 | |||
| 8e7538f6e6 | |||
| 9cdb26f7cc | |||
| 9659c913c4 | |||
| c9506cb95e | |||
| 43e6ce3997 | |||
| dfd12edcb3 | |||
| 154a8072f6 | |||
| 904abaf26b | |||
| bea18a27ba | |||
| 2d998eab50 | |||
| a25a67572b | |||
| 1bdf6f9969 | |||
| 0b32892fa8 | |||
| fea5b8a716 | |||
| 90b3707409 | |||
| 1c0224fbe7 | |||
| 626fcd1140 | |||
| b01c84b14c | |||
| 412492b4d1 | |||
| 9a6f7a4316 | |||
| 660f887923 | |||
| fe9c67ed91 | |||
| d3bbd05e4f | |||
| 34585129aa | |||
| 955cd4c173 | |||
| 4da63a8fd7 | |||
| fa27789608 | |||
| f9e9f35157 | |||
| 4a6604f0ab | |||
| 971d1221da | |||
| ba69885477 | |||
| 8e23098037 | |||
| 8da7bf029c | |||
| e16e58cbfa | |||
| abb7cd3bfa | |||
| bfa06f3989 | |||
| c63529939d | |||
| 2814f57e89 | |||
| 70476883c6 | |||
| b5ed209453 | |||
| be7687f15d | |||
| b7fb8e1e76 | |||
| 1a03720a7d | |||
| cb4099109a | |||
| 131504e7ee | |||
| b0c7b480d6 | |||
| e543c927cf | |||
| 897b602d71 | |||
| d94421dcf3 | |||
| e371b99dca | |||
| 49d10e5ff7 | |||
| d959f5b826 | |||
| 709f5cb605 | |||
| b11a051c23 | |||
| 1a77902079 | |||
| 481dc2f3b4 | |||
| 732aa91889 | |||
| 0df4c55548 | |||
| 7c72ed41fb | |||
| 83ace14faf | |||
| 9b1c3538b3 | |||
| 27a6e51cd3 | |||
| 86fad21cf0 | |||
| 5d081c3d65 | |||
| ca74c0af0a | |||
| 002ec90b09 | |||
| 6f42199100 | |||
| 87bb2493d1 | |||
| 716a66e9fa | |||
| 88cc95239a | |||
| 924470d2c0 | |||
| 45d5200b89 | |||
| 8f82554927 | |||
| 423688c352 | |||
| 8207223002 | |||
| 3b3fdb34e3 | |||
| ecce1fca9c | |||
| 2011100251 | |||
| 14e42e57ea | |||
| 834f18f3d5 | |||
| cf9a916e95 | |||
| a8a26ec642 | |||
| e6c398589c | |||
| c5332644f1 | |||
| bd0c134ae0 | |||
| d3282648fd | |||
| 1156817c71 | |||
| e1af48bbc2 | |||
| d6dd8379ab | |||
| bc73e559d1 | |||
| a6d8c9d5fc | |||
| a5d8a8b1d8 | |||
| ae28116c59 | |||
| b03403cf72 | |||
| f5736fcd3b | |||
| 4193c245a5 | |||
| c649d5b5fd | |||
| 1fa70995a3 | |||
| 3afee79415 | |||
| 5e43c1936e | |||
| fcae524771 | |||
| 098da50e23 | |||
| 9b3544bff7 | |||
| 063cae161b | |||
| 825e073e08 | |||
| 77098e1dc3 | |||
| e2210b7624 | |||
| ffa9051d69 | |||
| bad0dbfc71 | |||
| 53b938b83d | |||
| e6c0e5fe7a | |||
| d8513e910d | |||
| c2d984a908 | |||
| 15b7f134be | |||
| 3463718195 | |||
| 9cf1b759d7 | |||
| e8b9d6dd1f | |||
| 66280ded50 | |||
| 55860e5f18 | |||
| d7bc17a485 | |||
| be3a291cbc | |||
| 4696bfe364 | |||
| a81533d2cf | |||
| fc9a8dcf48 | |||
| 5e6d53fe63 | |||
| 0c68e8cf47 | |||
| d41a0cdda4 |
@@ -1,8 +0,0 @@
|
||||
[report]
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
raise NotImplementedError
|
||||
def __repr__
|
||||
if __name__ == .__main__.:
|
||||
omit =
|
||||
subliminal/cli.py
|
||||
+11
-20
@@ -1,7 +1,6 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
@@ -9,12 +8,11 @@ __pycache__/
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
bin/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
@@ -24,12 +22,6 @@ var/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
@@ -38,27 +30,26 @@ pip-delete-this-directory.txt
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*,cover
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Mr Developer
|
||||
.mr.developer.cfg
|
||||
.project
|
||||
.pydevproject
|
||||
.settings
|
||||
|
||||
# Rope
|
||||
.ropeproject
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
*.pot
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Pycharm
|
||||
.idea
|
||||
|
||||
# Subliminal
|
||||
tests/data/mkv/
|
||||
|
||||
-49
@@ -1,49 +0,0 @@
|
||||
sudo: false
|
||||
|
||||
language: python
|
||||
|
||||
python:
|
||||
- "2.7"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
|
||||
env:
|
||||
- PARSER=native
|
||||
- PARSER=lxml
|
||||
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- unrar
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- python: "3.5"
|
||||
env:
|
||||
- PARSER=native
|
||||
- VCR_RECORD_MODE=all
|
||||
- PYTEST_ADDOPTS="-m integration"
|
||||
allow_failures:
|
||||
- python: "3.5"
|
||||
env:
|
||||
- PARSER=native
|
||||
- VCR_RECORD_MODE=all
|
||||
- PYTEST_ADDOPTS="-m integration"
|
||||
|
||||
cache:
|
||||
directories:
|
||||
- $HOME/.cache/pip
|
||||
- tests/data/mkv
|
||||
|
||||
before_cache:
|
||||
- rm -f $HOME/.cache/pip/log/debug.log
|
||||
|
||||
install:
|
||||
- pip install -e .[test]
|
||||
- if [ $PARSER = "lxml" ]; then pip install lxml; fi
|
||||
- pip install coveralls
|
||||
|
||||
script: python setup.py test --addopts "--cov subliminal --verbose $PYTEST_ADDOPTS"
|
||||
|
||||
after_success: coveralls
|
||||
@@ -1,19 +0,0 @@
|
||||
Contributing
|
||||
============
|
||||
|
||||
Issues
|
||||
------
|
||||
Issues are intended for bug report and feature requests. For any bug report please make sure to include the complete
|
||||
stack trace and DEBUG level logs as well as reproduce steps.
|
||||
|
||||
If you use the CLI, you can create a debug log file with `subliminal --debug [...] 2> debug.log`.
|
||||
|
||||
Pull Requests
|
||||
-------------
|
||||
You can contribute code and documentation with pull requests. Any code contribution must be unit tested and the pull
|
||||
request open against the *develop* branch.
|
||||
|
||||
Translations
|
||||
------------
|
||||
Contribution to translations can be made on [subliminal's transifex page](https://www.transifex.com/subliminal/subliminal/)
|
||||
Subliminal is configured to work with [transifex-client](http://docs.transifex.com/client/)
|
||||
-272
@@ -1,272 +0,0 @@
|
||||
Changelog
|
||||
---------
|
||||
|
||||
2.0.3
|
||||
^^^^^
|
||||
**release date:** 2016-06-10
|
||||
|
||||
* Fix clearing cache in CLI
|
||||
|
||||
|
||||
2.0.2
|
||||
^^^^^
|
||||
**release date:** 2016-06-06
|
||||
|
||||
* Fix for dogpile.cache>=0.6.0
|
||||
* Fix missing sphinx_rtd_theme dependency
|
||||
|
||||
|
||||
2.0.1
|
||||
^^^^^
|
||||
**release date:** 2016-06-06
|
||||
|
||||
* Fix beautifulsoup4 minimal requirement
|
||||
|
||||
|
||||
2.0.0
|
||||
^^^^^
|
||||
**release date:** 2016-06-04
|
||||
|
||||
* Add refiners to enrich videos with information from metadata, tvdb and omdb
|
||||
* Add asynchronous provider search for faster searches
|
||||
* Add registrable managers so subliminal can run without install
|
||||
* Add archive support
|
||||
* Add the ability to customize scoring logic
|
||||
* Add an age argument to scan_videos for faster scanning
|
||||
* Add legendas.tv provider
|
||||
* Add shooter.cn provider
|
||||
* Improve matching and scoring
|
||||
* Improve documentation
|
||||
* Split nautilus integration into its own project
|
||||
|
||||
|
||||
1.1.1
|
||||
^^^^^
|
||||
**release date:** 2016-01-03
|
||||
|
||||
* Fix scanning videos on bad MKV files
|
||||
|
||||
|
||||
1.1
|
||||
^^^
|
||||
**release date:** 2015-12-29
|
||||
|
||||
* Fix library usage example in README
|
||||
* Fix for series name with special characters in addic7ed provider
|
||||
* Fix id property in thesubdb provider
|
||||
* Improve matching on titles
|
||||
* Add support for nautilus context menu with translations
|
||||
* Add support for searching subtitles in a separate directory
|
||||
* Add subscenter provider
|
||||
* Add support for python 3.5
|
||||
|
||||
|
||||
1.0.1
|
||||
^^^^^
|
||||
**release date:** 2015-07-23
|
||||
|
||||
* Fix unicode issues in CLI (python 2 only)
|
||||
* Fix score scaling in CLI (python 2 only)
|
||||
* Improve error handling in CLI
|
||||
* Color collect report in CLI
|
||||
|
||||
|
||||
1.0
|
||||
^^^
|
||||
**release date:** 2015-07-22
|
||||
|
||||
* Many changes and fixes
|
||||
* New test suite
|
||||
* New documentation
|
||||
* New CLI
|
||||
* Added support for SubsCenter
|
||||
|
||||
|
||||
0.7.5
|
||||
^^^^^
|
||||
**release date:** 2015-03-04
|
||||
|
||||
* Update requirements
|
||||
* Remove BierDopje provider
|
||||
* Add pre-guessed video optional argument in scan_video
|
||||
* Improve hearing impaired support
|
||||
* Fix TVSubtitles and Podnapisi providers
|
||||
|
||||
|
||||
0.7.4
|
||||
^^^^^
|
||||
**release date:** 2014-01-27
|
||||
|
||||
* Fix requirements for guessit and babelfish
|
||||
|
||||
|
||||
0.7.3
|
||||
^^^^^
|
||||
**release date:** 2013-11-22
|
||||
|
||||
* Fix windows compatibility
|
||||
* Improve subtitle validation
|
||||
* Improve embedded subtitle languages detection
|
||||
* Improve unittests
|
||||
|
||||
|
||||
0.7.2
|
||||
^^^^^
|
||||
**release date:** 2013-11-10
|
||||
|
||||
* Fix TVSubtitles for ambiguous series
|
||||
* Add a CACHE_VERSION to force cache reloading on version change
|
||||
* Set CLI default cache expiration time to 30 days
|
||||
* Add podnapisi provider
|
||||
* Support script for languages e.g. Latn, Cyrl
|
||||
* Improve logging levels
|
||||
* Fix subtitle validation in some rare cases
|
||||
|
||||
|
||||
0.7.1
|
||||
^^^^^
|
||||
**release date:** 2013-11-06
|
||||
|
||||
* Improve CLI
|
||||
* Add login support for Addic7ed
|
||||
* Remove lxml dependency
|
||||
* Many fixes
|
||||
|
||||
|
||||
0.7.0
|
||||
^^^^^
|
||||
**release date:** 2013-10-29
|
||||
|
||||
**WARNING:** Complete rewrite of subliminal with backward incompatible changes
|
||||
|
||||
* Use enzyme to parse metadata of videos
|
||||
* Use babelfish to handle languages
|
||||
* Use dogpile.cache for caching
|
||||
* Use charade to detect subtitle encoding
|
||||
* Use pysrt for subtitle validation
|
||||
* Use entry points for subtitle providers
|
||||
* New subtitle score computation
|
||||
* Hearing impaired subtitles support
|
||||
* Drop async support
|
||||
* Drop a few providers
|
||||
* And much more...
|
||||
|
||||
|
||||
0.6.4
|
||||
^^^^^
|
||||
**release date:** 2013-05-19
|
||||
|
||||
* Fix requirements due to enzyme 0.3
|
||||
|
||||
|
||||
0.6.3
|
||||
^^^^^
|
||||
**release date:** 2013-01-17
|
||||
|
||||
* Fix requirements due to requests 1.0
|
||||
|
||||
|
||||
0.6.2
|
||||
^^^^^
|
||||
**release date:** 2012-09-15
|
||||
|
||||
* Fix BierDopje
|
||||
* Fix Addic7ed
|
||||
* Fix SubsWiki
|
||||
* Fix missing enzyme import
|
||||
* Add Catalan and Galician languages to Addic7ed
|
||||
* Add possible services in help message of the CLI
|
||||
* Allow existing filenames to be passed without the ./ prefix
|
||||
|
||||
|
||||
0.6.1
|
||||
^^^^^
|
||||
**release date:** 2012-06-24
|
||||
|
||||
* Fix subtitle release name in BierDopje
|
||||
* Fix subtitles being downloaded multiple times
|
||||
* Add Chinese support to TvSubtitles
|
||||
* Fix encoding issues
|
||||
* Fix single download subtitles without the force option
|
||||
* Add Spanish (Latin America) exception to Addic7ed
|
||||
* Fix group_by_video when a list entry has None as subtitles
|
||||
* Add support for Galician language in Subtitulos
|
||||
* Add an integrity check after subtitles download for Addic7ed
|
||||
* Add error handling for if not strict in Language
|
||||
* Fix TheSubDB hash method to return None if the file is too small
|
||||
* Fix guessit.Language in Video.scan
|
||||
* Fix language detection of subtitles
|
||||
|
||||
|
||||
0.6.0
|
||||
^^^^^
|
||||
**release date:** 2012-06-16
|
||||
|
||||
**WARNING:** Backward incompatible changes
|
||||
|
||||
* Fix --workers option in CLI
|
||||
* Use a dedicated module for languages
|
||||
* Use beautifulsoup4
|
||||
* Improve return types
|
||||
* Add scan_filter option
|
||||
* Add --age option in CLI
|
||||
* Add TvSubtitles service
|
||||
* Add Addic7ed service
|
||||
|
||||
|
||||
0.5.1
|
||||
^^^^^
|
||||
**release date:** 2012-03-25
|
||||
|
||||
* Improve error handling of enzyme parsing
|
||||
|
||||
|
||||
0.5
|
||||
^^^
|
||||
**release date:** 2012-03-25
|
||||
**WARNING:** Backward incompatible changes
|
||||
|
||||
* Use more unicode
|
||||
* New list_subtitles and download_subtitles methods
|
||||
* New Pool object for asynchronous work
|
||||
* Improve sort algorithm
|
||||
* Better error handling
|
||||
* Make sorting customizable
|
||||
* Remove class Subliminal
|
||||
* Remove permissions handling
|
||||
|
||||
|
||||
0.4
|
||||
^^^
|
||||
**release date:** 2011-11-11
|
||||
|
||||
* Many fixes
|
||||
* Better error handling
|
||||
|
||||
|
||||
0.3
|
||||
^^^
|
||||
**release date:** 2011-08-18
|
||||
|
||||
* Fix a bug when series is not guessed by guessit
|
||||
* Fix dependencies failure when installing package
|
||||
* Fix encoding issues with logging
|
||||
* Add a script to ease subtitles download
|
||||
* Add possibility to choose mode of created files
|
||||
* Add more checks before adjusting permissions
|
||||
|
||||
|
||||
0.2
|
||||
^^^
|
||||
**release date:** 2011-07-11
|
||||
|
||||
* Fix plugin configuration
|
||||
* Fix some encoding issues
|
||||
* Remove extra logging
|
||||
|
||||
|
||||
0.1
|
||||
^^^
|
||||
**release date:** *private release*
|
||||
|
||||
* Initial release
|
||||
@@ -1,20 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 Antoine Bertin
|
||||
Copyright (c) 2014 Bram Walet
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
@@ -0,0 +1,444 @@
|
||||
#local media assets agent
|
||||
import os, string, hashlib, base64, re, plistlib, unicodedata
|
||||
import config
|
||||
import helpers
|
||||
import localmedia
|
||||
import audiohelpers
|
||||
import videohelpers
|
||||
|
||||
from mutagen import File
|
||||
from mutagen.mp4 import MP4
|
||||
from mutagen.id3 import ID3
|
||||
from mutagen.flac import FLAC
|
||||
from mutagen.flac import Picture
|
||||
from mutagen.oggvorbis import OggVorbis
|
||||
|
||||
PERSONAL_MEDIA_IDENTIFIER = "com.plexapp.agents.none"
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
@expose
|
||||
def ReadTags(f):
|
||||
try:
|
||||
return dict(File(f, easy=True))
|
||||
except Exception, e:
|
||||
Log('Error reading tags from file: %s' % f)
|
||||
return {}
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
class localMediaMovie(Agent.Movies):
|
||||
name = 'Local Media Assets Extended (Movies)'
|
||||
languages = [Locale.Language.NoLanguage]
|
||||
primary_provider = False
|
||||
persist_stored_files = False
|
||||
contributes_to = ['com.plexapp.agents.imdb', 'com.plexapp.agents.none']
|
||||
|
||||
def search(self, results, media, lang):
|
||||
results.Append(MetadataSearchResult(id = 'null', score = 100))
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
|
||||
# Clear out the title to ensure stale data doesn't clobber other agents' contributions.
|
||||
metadata.title = None
|
||||
|
||||
part = media.items[0].parts[0]
|
||||
path = os.path.dirname(part.file)
|
||||
|
||||
# Look for local media.
|
||||
try: localmedia.findAssets(metadata, media.title, [path], 'movie', media.items[0].parts)
|
||||
except Exception, e:
|
||||
Log('Error finding media for movie %s: %s' % (media.title, str(e)))
|
||||
|
||||
# Look for subtitles
|
||||
for item in media.items:
|
||||
for part in item.parts:
|
||||
localmedia.findSubtitles(part)
|
||||
|
||||
# If there is an appropriate VideoHelper, use it.
|
||||
video_helper = videohelpers.VideoHelpers(part.file)
|
||||
if video_helper:
|
||||
video_helper.process_metadata(metadata)
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
def FindUniqueSubdirs(dirs):
|
||||
final_dirs = {}
|
||||
for dir in dirs:
|
||||
final_dirs[dir] = True
|
||||
try:
|
||||
parent = os.path.split(dir)[0]
|
||||
final_dirs[parent] = True
|
||||
try: final_dirs[os.path.split(parent)[0]] = True
|
||||
except: pass
|
||||
except: pass
|
||||
|
||||
if final_dirs.has_key(''):
|
||||
del final_dirs['']
|
||||
return final_dirs
|
||||
|
||||
class localMediaTV(Agent.TV_Shows):
|
||||
name = 'Local Media Assets Extended (TV)'
|
||||
languages = [Locale.Language.NoLanguage]
|
||||
primary_provider = False
|
||||
persist_stored_files = False
|
||||
contributes_to = ['com.plexapp.agents.thetvdb', 'com.plexapp.agents.thetvdbdvdorder', 'com.plexapp.agents.none']
|
||||
|
||||
def search(self, results, media, lang):
|
||||
results.Append(MetadataSearchResult(id = 'null', score = 100))
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
|
||||
# Clear out the title to ensure stale data doesn't clobber other agents' contributions.
|
||||
metadata.title = None
|
||||
|
||||
# Look for media, collect directories.
|
||||
dirs = {}
|
||||
for s in media.seasons:
|
||||
Log('Creating season %s', s)
|
||||
metadata.seasons[s].index = int(s)
|
||||
for e in media.seasons[s].episodes:
|
||||
|
||||
# Make sure metadata exists, and find sidecar media.
|
||||
episodeMetadata = metadata.seasons[s].episodes[e]
|
||||
episodeMedia = media.seasons[s].episodes[e].items[0]
|
||||
dir = os.path.dirname(episodeMedia.parts[0].file)
|
||||
dirs[dir] = True
|
||||
|
||||
try: localmedia.findAssets(episodeMetadata, media.title, [dir], 'episode', episodeMedia.parts)
|
||||
except Exception, e:
|
||||
Log('Error finding media for episode: %s' % str(e))
|
||||
|
||||
# Figure out the directories we should be looking in.
|
||||
try: dirs = FindUniqueSubdirs(dirs)
|
||||
except: dirs = []
|
||||
|
||||
# Look for show images.
|
||||
Log("Looking for show media for %s.", metadata.title)
|
||||
try: localmedia.findAssets(metadata, media.title, dirs, 'show')
|
||||
except: Log("Error finding show media.")
|
||||
|
||||
# Look for season images.
|
||||
for s in metadata.seasons:
|
||||
Log('Looking for season media for %s season %s.', metadata.title, s)
|
||||
try: localmedia.findAssets(metadata.seasons[s], media.title, dirs, 'season')
|
||||
except: Log("Error finding season media for season %s" % s)
|
||||
|
||||
# Look for subtitles for each episode.
|
||||
for s in media.seasons:
|
||||
# If we've got a date based season, ignore it for now, otherwise it'll collide with S/E folders/XML and PMS
|
||||
# prefers date-based (why?)
|
||||
if int(s) < 1900 or metadata.guid.startswith(PERSONAL_MEDIA_IDENTIFIER):
|
||||
for e in media.seasons[s].episodes:
|
||||
for i in media.seasons[s].episodes[e].items:
|
||||
|
||||
# Look for subtitles.
|
||||
for part in i.parts:
|
||||
localmedia.findSubtitles(part)
|
||||
|
||||
# If there is an appropriate VideoHelper, use it.
|
||||
video_helper = videohelpers.VideoHelpers(part.file)
|
||||
if video_helper:
|
||||
video_helper.process_metadata(metadata, episode = metadata.seasons[s].episodes[e])
|
||||
else:
|
||||
# Whack it in case we wrote it.
|
||||
#del metadata.seasons[s]
|
||||
pass
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
class localMediaArtistCommon(object):
|
||||
name = 'Local Media Assets Extended (Artists)'
|
||||
languages = [Locale.Language.NoLanguage]
|
||||
primary_provider = False
|
||||
persist_stored_files = False
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
|
||||
# Clear out the title to ensure stale data doesn't clobber other agents' contributions.
|
||||
metadata.title = None
|
||||
|
||||
if shouldFindExtras():
|
||||
extra_type_map = getExtraTypeMap()
|
||||
|
||||
artist_file_dirs = []
|
||||
artist_extras = {}
|
||||
|
||||
# First look for track extras.
|
||||
checked_tag = False
|
||||
|
||||
for album in media.children:
|
||||
for track in album.children:
|
||||
part = helpers.unicodize(track.items[0].parts[0].file)
|
||||
findTrackExtra(part, extra_type_map, artist_extras)
|
||||
artist_file_dirs.append(os.path.dirname(part))
|
||||
|
||||
# Look for artist sort field.
|
||||
if checked_tag == False:
|
||||
checked_tag = True
|
||||
audio_helper = audiohelpers.AudioHelpers(part)
|
||||
if audio_helper and hasattr(audio_helper, 'get_artist_sort_title'):
|
||||
artist_sort_title = audio_helper.get_artist_sort_title()
|
||||
if artist_sort_title and hasattr(metadata, 'title_sort'):
|
||||
metadata.title_sort = artist_sort_title
|
||||
|
||||
# Now go through this artist's directories looking for additional extras.
|
||||
for artist_file_dir in set(artist_file_dirs):
|
||||
findArtistExtras(helpers.unicodize(artist_file_dir), extra_type_map, artist_extras, media.title)
|
||||
|
||||
for extra in sorted(artist_extras.values(), key = lambda v: (getExtraSortOrder()[type(v)], v.title)):
|
||||
metadata.extras.add(extra)
|
||||
|
||||
|
||||
class localMediaArtistLegacy(localMediaArtistCommon, Agent.Artist):
|
||||
contributes_to = ['com.plexapp.agents.discogs', 'com.plexapp.agents.lastfm', 'com.plexapp.agents.plexmusic', 'com.plexapp.agents.none']
|
||||
|
||||
def search(self, results, media, lang):
|
||||
results.Append(MetadataSearchResult(id = 'null', name=media.artist, score = 100))
|
||||
|
||||
|
||||
class localMediaArtistModern(localMediaArtistCommon, Agent.Artist):
|
||||
version = 2
|
||||
contributes_to = ['com.plexapp.agents.plexmusic']
|
||||
|
||||
def search(self, results, tree, hints, lang='en', manual=False):
|
||||
results.add(SearchResult(id='null', type='artist', parentName=hints.artist, score=100))
|
||||
|
||||
def update(self, metadata, media, lang='en', child_guid=None):
|
||||
super(localMediaArtistModern, self).update(metadata, media, lang)
|
||||
|
||||
|
||||
class localMediaAlbum(Agent.Album):
|
||||
name = 'Local Media Assets Extended (Albums)'
|
||||
languages = [Locale.Language.NoLanguage]
|
||||
primary_provider = False
|
||||
persist_stored_files = False
|
||||
contributes_to = ['com.plexapp.agents.discogs', 'com.plexapp.agents.lastfm', 'com.plexapp.agents.plexmusic', 'com.plexapp.agents.none']
|
||||
|
||||
def search(self, results, media, lang):
|
||||
results.Append(MetadataSearchResult(id = 'null', score = 100))
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
|
||||
find_extras = shouldFindExtras()
|
||||
extra_type_map = getExtraTypeMap() if find_extras else None
|
||||
updateAlbum(metadata, media, lang, find_extras, artist_extras=[], extra_type_map=extra_type_map)
|
||||
|
||||
|
||||
def updateAlbum(metadata, media, lang, find_extras=False, artist_extras={}, extra_type_map=None):
|
||||
|
||||
# Clear out the title to ensure stale data doesn't clobber other agents' contributions.
|
||||
metadata.title = None
|
||||
|
||||
valid_posters = []
|
||||
path = None
|
||||
for track in media.tracks:
|
||||
for item in media.tracks[track].items:
|
||||
for part in item.parts:
|
||||
filename = helpers.unicodize(part.file)
|
||||
path = os.path.dirname(filename)
|
||||
(file_root, fext) = os.path.splitext(filename)
|
||||
|
||||
path_files = {}
|
||||
for p in os.listdir(path):
|
||||
path_files[p.lower()] = p
|
||||
|
||||
# Look for posters
|
||||
poster_files = config.POSTER_FILES + [ os.path.basename(file_root), helpers.splitPath(path)[-1] ]
|
||||
for ext in config.ART_EXTS:
|
||||
for name in poster_files:
|
||||
file = (name + '.' + ext).lower()
|
||||
if file in path_files.keys():
|
||||
data = Core.storage.load(os.path.join(path, path_files[file]))
|
||||
poster_name = hashlib.md5(data).hexdigest()
|
||||
valid_posters.append(poster_name)
|
||||
|
||||
if poster_name not in metadata.posters:
|
||||
metadata.posters[poster_name] = Proxy.Media(data)
|
||||
Log('Local asset image added: ' + file + ', for file: ' + filename)
|
||||
else:
|
||||
Log('Skipping local poster since its already added')
|
||||
|
||||
# If there is an appropriate AudioHelper, use it.
|
||||
audio_helper = audiohelpers.AudioHelpers(part.file)
|
||||
if audio_helper != None:
|
||||
try:
|
||||
valid_posters = valid_posters + audio_helper.process_metadata(metadata)
|
||||
|
||||
# Album sort title.
|
||||
if hasattr(audio_helper, 'get_album_sort_title'):
|
||||
album_sort_title = audio_helper.get_album_sort_title()
|
||||
if album_sort_title and hasattr(metadata, 'title_sort'):
|
||||
metadata.title_sort = album_sort_title
|
||||
|
||||
if hasattr(audio_helper, 'get_track_sort_title'):
|
||||
track_sort_title = audio_helper.get_track_sort_title()
|
||||
track_key = media.tracks[track].guid or track
|
||||
if track_sort_title and hasattr(metadata.tracks[track_key], 'title_sort'):
|
||||
metadata.tracks[track_key].title_sort = track_sort_title
|
||||
except:
|
||||
pass
|
||||
|
||||
# Look for a video extra for this track.
|
||||
if find_extras:
|
||||
track_video = findTrackExtra(helpers.unicodize(part.file), extra_type_map)
|
||||
if track_video is not None:
|
||||
track_key = media.tracks[track].guid or track
|
||||
metadata.tracks[track_key].extras.add(track_video)
|
||||
|
||||
metadata.posters.validate_keys(valid_posters)
|
||||
|
||||
def findTrackExtra(file_path, extra_type_map, artist_extras={}):
|
||||
|
||||
# Look for music videos for this track of the format: "track file name - pretty name (optional) - type (optional).ext"
|
||||
file_name = os.path.basename(file_path)
|
||||
file_root, file_ext = os.path.splitext(file_name)
|
||||
track_videos = []
|
||||
for video in [f for f in os.listdir(os.path.dirname(file_path))
|
||||
if os.path.splitext(f)[1][1:].lower() in config.VIDEO_EXTS
|
||||
and helpers.unicodize(f).lower().startswith(file_root.lower())]:
|
||||
|
||||
video_file, ext = os.path.splitext(video)
|
||||
name_components = video_file.split('-')
|
||||
extra_type = MusicVideoObject
|
||||
if len(name_components) > 1:
|
||||
type_component = re.sub(r'[ ._]+', '', name_components[-1].lower())
|
||||
if type_component in extra_type_map:
|
||||
extra_type = extra_type_map[type_component]
|
||||
name_components.pop(-1)
|
||||
|
||||
# Use the video file name for the title unless we have a prettier one.
|
||||
pretty_title = '-'.join(name_components).strip()
|
||||
if len(pretty_title) - len(file_root) > 0:
|
||||
pretty_title = pretty_title.replace(file_root, '')
|
||||
if pretty_title.startswith(file_ext):
|
||||
pretty_title = pretty_title[len(file_ext):]
|
||||
pretty_title = re.sub(r'^[- ]+', '', pretty_title)
|
||||
|
||||
track_video = extra_type(title=pretty_title, file=os.path.join(os.path.dirname(file_path), video))
|
||||
artist_extras[video] = track_video
|
||||
|
||||
if extra_type in [MusicVideoObject, LyricMusicVideoObject]:
|
||||
Log('Found video %s for track: %s from file: %s' % (pretty_title, file_name, os.path.join(os.path.dirname(file_path), video)))
|
||||
track_videos.append(track_video)
|
||||
else:
|
||||
Log('Skipping track video %s (only regular music videos allowed on tracks)' % video)
|
||||
|
||||
if len(track_videos) > 0:
|
||||
track_videos = sorted(track_videos, key = lambda v: (getExtraSortOrder()[type(v)], v.title))
|
||||
return track_videos[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def findArtistExtras(path, extra_type_map, artist_extras, artist_name):
|
||||
|
||||
# Look for other videos in this directory.
|
||||
for video in [f for f in os.listdir(path)
|
||||
if os.path.splitext(f)[1][1:].lower() in config.VIDEO_EXTS
|
||||
and f not in artist_extras]:
|
||||
|
||||
if video not in artist_extras:
|
||||
Log('Found artist video: %s' % video)
|
||||
extra = parseArtistExtra(os.path.join(path, video), extra_type_map, artist_name)
|
||||
if extra is not None:
|
||||
artist_extras[video] = extra
|
||||
|
||||
# Look for artist videos in the custom path if present.
|
||||
artist_name = normalizeArtist(artist_name)
|
||||
music_video_path = Prefs['music_video_path']
|
||||
if music_video_path is not None and len(music_video_path) > 0:
|
||||
if not os.path.exists(music_video_path):
|
||||
Log('The specified local music video path doesn\'t exist: %s' % music_video_path)
|
||||
return
|
||||
else:
|
||||
local_files = [f for f in os.listdir(music_video_path)
|
||||
if (os.path.splitext(f)[1][1:].lower() in config.VIDEO_EXTS or os.path.isdir(os.path.join(music_video_path, f)))
|
||||
and normalizeArtist(os.path.basename(f)).startswith(artist_name)
|
||||
and f not in artist_extras]
|
||||
for local_file in local_files:
|
||||
|
||||
# Go ahead and add files directly in the specific path matching the "artist - title - type (optional).ext" convention.
|
||||
if os.path.isfile(os.path.join(music_video_path, local_file)) and local_file not in artist_extras:
|
||||
Log('Found artist video: %s' % local_file)
|
||||
extra = parseArtistExtra(os.path.join(music_video_path, local_file), extra_type_map, artist_name)
|
||||
if extra is not None:
|
||||
artist_extras[local_file] = extra
|
||||
|
||||
# Also add all the videos in the "local video root/artist" directory if we found one.
|
||||
elif os.path.isdir(os.path.join(music_video_path, local_file)) and normalizeArtist(os.path.basename(local_file)) == artist_name:
|
||||
for artist_dir_file in [f for f in os.listdir(os.path.join(music_video_path, local_file))
|
||||
if os.path.splitext(f)[1][1:].lower() in config.VIDEO_EXTS
|
||||
and f not in artist_extras]:
|
||||
if artist_dir_file not in artist_extras:
|
||||
Log('Found artist video: %s' % artist_dir_file)
|
||||
extra = parseArtistExtra(os.path.join(music_video_path, local_file, artist_dir_file), extra_type_map, artist_name)
|
||||
if extra is not None:
|
||||
artist_extras[artist_dir_file] = extra
|
||||
|
||||
|
||||
def parseArtistExtra(path, extra_type_map, artist_name):
|
||||
|
||||
video_file, ext = os.path.splitext(os.path.basename(path))
|
||||
name_components = video_file.split('-')
|
||||
|
||||
# Set the type and whack the type component from the name if we found one.
|
||||
if len(name_components) > 1 and name_components[-1].lower().strip() in extra_type_map:
|
||||
extra_type = extra_type_map[name_components.pop(-1).lower().strip()]
|
||||
else:
|
||||
extra_type = MusicVideoObject
|
||||
|
||||
# Only return concerts if we're new enough.
|
||||
if extra_type in [ConcertVideoObject] and not Util.VersionAtLeast(Platform.ServerVersion, 0,9,12,2):
|
||||
Log('Found concert, but skipping, not new enough server.')
|
||||
return None
|
||||
|
||||
# Whack the artist name if it's the first component and we have more than one.
|
||||
if len(name_components) > 1 and normalizeArtist(name_components[0]) == artist_name:
|
||||
name_components.pop(0)
|
||||
|
||||
return extra_type(title='-'.join(name_components), file=helpers.unicodize(path))
|
||||
|
||||
|
||||
def normalizeArtist(artist_name):
|
||||
try:
|
||||
u_artist_name = helpers.unicodize(artist_name)
|
||||
ret = ''
|
||||
for i in range(len(u_artist_name)):
|
||||
if not unicodedata.category(u_artist_name[i]).startswith('P'):
|
||||
ret += u_artist_name[i]
|
||||
ret = ret.replace(' ', '').lower()
|
||||
if len(ret) > 0:
|
||||
return ret
|
||||
else:
|
||||
return artist_name
|
||||
except Exception, e:
|
||||
Log('Error normalizing artist: %s' % e)
|
||||
return artist_name
|
||||
|
||||
|
||||
def shouldFindExtras():
|
||||
# Determine whether we should look for video extras.
|
||||
try:
|
||||
v = ConcertVideoObject()
|
||||
if Util.VersionAtLeast(Platform.ServerVersion, 0,9,12,0):
|
||||
find_extras = True
|
||||
else:
|
||||
find_extras = False
|
||||
Log('Not adding extras: Server v0.9.12.0+ required')
|
||||
except NameError, e:
|
||||
Log('Not adding extras: Framework v2.6.2+ required')
|
||||
find_extras = False
|
||||
return find_extras
|
||||
|
||||
|
||||
def getExtraTypeMap():
|
||||
return {'video' : MusicVideoObject,
|
||||
'live' : LiveMusicVideoObject,
|
||||
'lyrics' : LyricMusicVideoObject,
|
||||
'behindthescenes' : BehindTheScenesObject,
|
||||
'interview' : InterviewObject,
|
||||
'concert' : ConcertVideoObject }
|
||||
|
||||
def getExtraSortOrder():
|
||||
return {MusicVideoObject : 0, LyricMusicVideoObject : 1, ConcertVideoObject : 2, LiveMusicVideoObject : 3, BehindTheScenesObject : 4, InterviewObject : 5}
|
||||
@@ -0,0 +1,286 @@
|
||||
import os
|
||||
import helpers
|
||||
|
||||
from mutagen import File as MFile
|
||||
from mutagen.flac import Picture
|
||||
|
||||
class AudioHelper(object):
|
||||
def __init__(self, filename):
|
||||
self.filename = filename
|
||||
|
||||
def AudioHelpers(filename):
|
||||
filename = helpers.unicodize(filename)
|
||||
try:
|
||||
tag = MFile(filename, None, True)
|
||||
except Exception, e:
|
||||
Log('Error getting file details for %s: %s' % (filename, e))
|
||||
return None
|
||||
|
||||
if tag is not None:
|
||||
for cls in [ ID3AudioHelper, MP4AudioHelper, FLACAudioHelper, OGGAudioHelper ]:
|
||||
if cls.is_helper_for(type(tag).__name__):
|
||||
return cls(filename)
|
||||
return None
|
||||
|
||||
|
||||
def parse_genres(genre):
|
||||
if genre.find(';') != -1:
|
||||
genre_list = genre.split(';')
|
||||
else:
|
||||
genre_list = genre.split('/')
|
||||
|
||||
return genre_list
|
||||
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
class ID3AudioHelper(AudioHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, tagType):
|
||||
return tagType in ('EasyID3', 'EasyMP3', 'EasyTrueAudio', 'ID3', 'MP3', 'TrueAudio', 'AIFF') # All of these file types use ID3 tags like MP3
|
||||
|
||||
def get_album_sort_title(self):
|
||||
return self.tags.get('TSOA')
|
||||
|
||||
def get_track_sort_title(self):
|
||||
return self.tags.get('TSOT')
|
||||
|
||||
def get_artist_sort_title(self):
|
||||
try:
|
||||
self.tags = tags = MFile(self.filename)
|
||||
tag = self.tags.get('TSO2')
|
||||
if tag:
|
||||
return tag
|
||||
|
||||
return self.tags.get('TSOP')
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def process_metadata(self, metadata):
|
||||
|
||||
Log('Reading ID3 tags from: ' + self.filename)
|
||||
try:
|
||||
self.tags = tags = MFile(self.filename)
|
||||
Log('Found tags: ' + str(tags.keys()))
|
||||
except:
|
||||
Log('An error occurred while attempting to read ID3 tags from ' + self.filename)
|
||||
return
|
||||
|
||||
# Release Date
|
||||
try:
|
||||
year = tags.get('TDRC')
|
||||
if year is not None and len(year.text) > 0:
|
||||
metadata.originally_available_at = Datetime.ParseDate('01-01-' + str(year.text[0])).date()
|
||||
except Exception, e:
|
||||
Log('Exception reading TDRC (year): ' + str(e))
|
||||
|
||||
# Genres
|
||||
try:
|
||||
genres = tags.get('TCON')
|
||||
if genres is not None and len(genres.text) > 0:
|
||||
metadata.genres.clear()
|
||||
for genre in genres.text:
|
||||
for sub_genre in parse_genres(genre):
|
||||
metadata.genres.add(sub_genre.strip())
|
||||
except Exception, e:
|
||||
Log('Exception reading TCON (genre): ' + str(e))
|
||||
|
||||
# Posters
|
||||
try:
|
||||
valid_posters = []
|
||||
frames = [f for f in tags if f.startswith('APIC:')]
|
||||
for frame in frames:
|
||||
if (tags[frame].mime == 'image/jpeg') or (tags[frame].mime == 'image/jpg'): ext = 'jpg'
|
||||
elif tags[frame].mime == 'image/png': ext = 'png'
|
||||
elif tags[frame].mime == 'image/gif': ext = 'gif'
|
||||
else: ext = ''
|
||||
|
||||
poster_name = hashlib.md5(tags[frame].data).hexdigest()
|
||||
valid_posters.append(poster_name)
|
||||
if poster_name not in metadata.posters:
|
||||
Log('Adding embedded APIC art: ' + poster_name)
|
||||
metadata.posters[poster_name] = Proxy.Media(tags[frame].data, ext = ext)
|
||||
except Exception, e:
|
||||
Log('Exception adding posters: ' + str(e))
|
||||
|
||||
return valid_posters
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
class MP4AudioHelper(AudioHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, tagType):
|
||||
return tagType in ['MP4','EasyMP4']
|
||||
|
||||
def get_track_sort_title(self):
|
||||
try:
|
||||
tags = MFile(self.filename, easy=True)
|
||||
return tags.get('titlesort')[0] # 'sonm'
|
||||
except:
|
||||
return None
|
||||
|
||||
def get_album_sort_title(self):
|
||||
try:
|
||||
tags = MFile(self.filename, easy=True)
|
||||
return tags.get('albumsort')[0] # 'soal'
|
||||
except:
|
||||
return None
|
||||
|
||||
def get_artist_sort_title(self):
|
||||
try:
|
||||
tags = MFile(self.filename, easy=True)
|
||||
return tags.get('artistsort')[0] # 'soar'
|
||||
except:
|
||||
return None
|
||||
|
||||
def process_metadata(self, metadata):
|
||||
|
||||
Log('Reading MP4 tags from: ' + self.filename)
|
||||
try:
|
||||
tags = MFile(self.filename)
|
||||
Log('Found tags: ' + str(tags.keys()))
|
||||
except:
|
||||
Log('An error occurred while attempting to parse the MP4 file: ' + self.filename)
|
||||
return
|
||||
|
||||
# Genres
|
||||
try:
|
||||
genres = tags.get('\xa9gen')
|
||||
if genres is not None and len(genres) > 0:
|
||||
metadata.genres.clear()
|
||||
for genre in genres:
|
||||
for sub_genre in parse_genres(genre):
|
||||
metadata.genres.add(sub_genre.strip())
|
||||
except Exception, e:
|
||||
Log('Exception reading \xa9gen (genre): ' + str(e))
|
||||
|
||||
# Release Date
|
||||
try:
|
||||
release_date = tags.get('\xa9day')
|
||||
if release_date is not None and len(release_date) > 0:
|
||||
metadata.originally_available_at = Datetime.ParseDate(release_date[0].split('T')[0])
|
||||
except Exception, e:
|
||||
Log('Exception reading \xa9day (release date)' + str(e))
|
||||
|
||||
# Posters
|
||||
valid_posters = []
|
||||
try:
|
||||
covers = tags.get('covr')
|
||||
if covers is not None and len(covers) > 0:
|
||||
for cover in covers:
|
||||
poster_name = hashlib.md5(cover).hexdigest()
|
||||
valid_posters.append(poster_name)
|
||||
if poster_name not in metadata.posters:
|
||||
Log('Adding embedded cover art: ' + poster_name)
|
||||
metadata.posters[poster_name] = Proxy.Media(cover)
|
||||
except Exception, e:
|
||||
Log('Exception adding posters: ' + str(e))
|
||||
|
||||
return valid_posters
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
class FLACAudioHelper(AudioHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, tagType):
|
||||
return tagType in ['FLAC']
|
||||
|
||||
def process_metadata(self, metadata):
|
||||
|
||||
Log('Reading FLAC tags from: ' + self.filename)
|
||||
try:
|
||||
tags = MFile(self.filename)
|
||||
Log('Found tags: ' + str(tags.keys()))
|
||||
except:
|
||||
Log('An error occurred while attempting to parse the FLAC file: ' + self.filename)
|
||||
return
|
||||
|
||||
# Genres
|
||||
try:
|
||||
genres = tags.get('genre')
|
||||
if genres is not None and len(genres) > 0:
|
||||
metadata.genres.clear()
|
||||
for genre in genres:
|
||||
for sub_genre in parse_genres(genre):
|
||||
metadata.genres.add(sub_genre.strip())
|
||||
except Exception, e:
|
||||
Log('Exception reading genre: ' + str(e))
|
||||
|
||||
# Release Date
|
||||
try:
|
||||
release_date = tags.get('date')
|
||||
if release_date is not None and len(release_date) > 0:
|
||||
metadata.originally_available_at = Datetime.ParseDate(release_date[0])
|
||||
except Exception, e:
|
||||
Log('Exception reading release date' + str(e))
|
||||
|
||||
# Posters
|
||||
valid_posters = []
|
||||
try:
|
||||
covers = tags.pictures
|
||||
if covers is not None and len(covers) > 0:
|
||||
for cover in covers:
|
||||
poster_name = hashlib.md5(cover.data).hexdigest()
|
||||
valid_posters.append(poster_name)
|
||||
if poster_name not in metadata.posters:
|
||||
Log('Adding embedded cover art: ' + poster_name)
|
||||
metadata.posters[poster_name] = Proxy.Media(cover.data)
|
||||
except Exception, e:
|
||||
Log('Exception adding posters: ' + str(e))
|
||||
|
||||
return valid_posters
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
class OGGAudioHelper(AudioHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, tagType):
|
||||
return tagType in ['OggVorbis']
|
||||
|
||||
def process_metadata(self, metadata):
|
||||
|
||||
Log('Reading OGG tags from: ' + self.filename)
|
||||
try:
|
||||
tags = MFile(self.filename)
|
||||
Log('Found tags: ' + str(tags.keys()))
|
||||
except:
|
||||
Log('An error occured while attempting to parse the OGG file: ' + self.filename)
|
||||
return
|
||||
|
||||
# Genres
|
||||
try:
|
||||
genres = tags.get('genre')
|
||||
if genres is not None and len(genres) > 0:
|
||||
metadata.genres.clear()
|
||||
for genre in genres:
|
||||
for sub_genre in parse_genres(genre):
|
||||
metadata.genres.add(sub_genre.strip())
|
||||
except Exception, e:
|
||||
Log('Exception reading genre: ' + str(e))
|
||||
|
||||
# Release Date
|
||||
try:
|
||||
release_date = tags.get('date')
|
||||
if release_date is not None and len(release_date) > 0:
|
||||
metadata.originally_available_at = Datetime.ParseDate(release_date[0])
|
||||
except Exception, e:
|
||||
Log('Exception reading release date' + str(e))
|
||||
|
||||
# Posters
|
||||
valid_posters = []
|
||||
try:
|
||||
covers = tags.get('metadata_block_picture')
|
||||
if covers is not None and len(covers) > 0:
|
||||
for cover in covers:
|
||||
poster = Picture(base64.standard_b64decode(cover))
|
||||
poster_name = hashlib.md5(poster.data).hexdigest()
|
||||
valid_posters.append(poster_name)
|
||||
if poster_name not in metadata.posters:
|
||||
Log('Adding embedded cover art: ' + poster_name)
|
||||
metadata.posters[poster_name] = Proxy.Media(poster.data)
|
||||
except Exception, e:
|
||||
Log('Exception adding posters: ' + str(e))
|
||||
|
||||
return valid_posters
|
||||
@@ -0,0 +1,11 @@
|
||||
|
||||
IMAGE_EXTS = ['jpg', 'png', 'jpeg', 'tbn']
|
||||
ART_EXTS = ['jpg','jpeg','png','tbn']
|
||||
AUDIO_EXTS = ['mp3']
|
||||
SUBTITLE_EXTS = ['utf','utf8','utf-8','srt','smi','rt','ssa','aqt','jss','ass','idx','sub','txt', 'psb']
|
||||
VIDEO_EXTS = ['3g2', '3gp', 'asf', 'asx', 'avc', 'avi', 'avs', 'bivx', 'bup', 'divx', 'dv', 'dvr-ms', 'evo', 'fli', 'flv',
|
||||
'm2t', 'm2ts', 'm2v', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'mts', 'nsv', 'nuv', 'ogm', 'ogv', 'tp',
|
||||
'pva', 'qt', 'rm', 'rmvb', 'sdp', 'svq3', 'strm', 'ts', 'ty', 'vdr', 'viv', 'vob', 'vp3', 'wmv', 'wpl', 'wtv', 'xsp', 'xvid', 'webm']
|
||||
|
||||
POSTER_FILES = ['poster','default','cover','movie','folder']
|
||||
ART_FILES = ['fanart','art','background','backdrop']
|
||||
@@ -0,0 +1,34 @@
|
||||
import unicodedata
|
||||
|
||||
# Unicode control characters can appear in ID3v2 tags but are not legal in XML.
|
||||
RE_UNICODE_CONTROL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \
|
||||
u'|' + \
|
||||
u'([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])' % \
|
||||
(
|
||||
unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff),
|
||||
unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff),
|
||||
unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff)
|
||||
)
|
||||
|
||||
# A platform independent way to split paths which might come in with different separators.
|
||||
def splitPath(str):
|
||||
if str.find('\\') != -1:
|
||||
return str.split('\\')
|
||||
else:
|
||||
return str.split('/')
|
||||
|
||||
def unicodize(s):
|
||||
filename = s
|
||||
try:
|
||||
filename = unicodedata.normalize('NFC', unicode(s.decode('utf-8')))
|
||||
except:
|
||||
Log('Failed to unicodize: ' + filename)
|
||||
try:
|
||||
filename = re.sub(RE_UNICODE_CONTROL, '', filename)
|
||||
except:
|
||||
Log('Couldn\'t strip control characters: ' + filename)
|
||||
return filename
|
||||
|
||||
def cleanFilename(filename):
|
||||
#this will remove any whitespace and punctuation chars and replace them with spaces, strip and return as lowercase
|
||||
return string.translate(filename.encode('utf-8'), string.maketrans(string.punctuation + string.whitespace, ' ' * len (string.punctuation + string.whitespace))).strip().lower()
|
||||
@@ -0,0 +1,338 @@
|
||||
import os, unicodedata
|
||||
import config
|
||||
import helpers
|
||||
import subtitlehelpers
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
def findAssets(metadata, media_title, paths, type, parts=[]):
|
||||
|
||||
ignore_samples = ['[-\._ ]sample', 'sample[-\._ ]']
|
||||
ignore_trailers = ['-trailer\.']
|
||||
|
||||
# Do a quick check to make sure we've got the extra types available in this framework version,
|
||||
# and that the server is new enough to support them.
|
||||
#
|
||||
try:
|
||||
t = InterviewObject()
|
||||
if Util.VersionAtLeast(Platform.ServerVersion, 0,9,9,13):
|
||||
find_extras = True
|
||||
else:
|
||||
find_extras = False
|
||||
Log('Not adding extras: Server v0.9.9.13+ required')
|
||||
except NameError, e:
|
||||
Log('Not adding extras: Framework v2.5.0+ required')
|
||||
find_extras = False
|
||||
|
||||
if find_extras:
|
||||
extra_type_map = {'trailer' : TrailerObject,
|
||||
'deleted' : DeletedSceneObject,
|
||||
'behindthescenes' : BehindTheScenesObject,
|
||||
'interview' : InterviewObject,
|
||||
'scene' : SceneOrSampleObject}
|
||||
|
||||
# We start by building a dictionary of files to their absolute paths. We also need to know
|
||||
# the number of media files that are actually present, in case the found local media asset
|
||||
# is limited to a single instance per media file.
|
||||
#
|
||||
path_files = {}
|
||||
multi_parts = []
|
||||
total_media_files = 0
|
||||
root_file = getRootFile(helpers.unicodize(parts[0].file)) if parts else None
|
||||
for path in paths:
|
||||
path = helpers.unicodize(path)
|
||||
for file_path in sorted(os.listdir(path)):
|
||||
|
||||
# When using os.listdir with a unicode path, it will always return a string using the
|
||||
# NFD form. However, we internally are using the form NFC and therefore need to convert
|
||||
# it to allow correct regex / comparisons to be performed.
|
||||
#
|
||||
file_path = helpers.unicodize(file_path)
|
||||
full_path = os.path.join(path,file_path)
|
||||
|
||||
if os.path.isfile(full_path):
|
||||
path_files[file_path.lower()] = full_path
|
||||
|
||||
# Only count real and distinct (not stacked) video files.
|
||||
(root, ext) = os.path.splitext(file_path)
|
||||
should_count = True
|
||||
|
||||
# Check for valid video file extension.
|
||||
if ext.lower()[1:] not in config.VIDEO_EXTS:
|
||||
should_count = False
|
||||
|
||||
# Don't count sample files if they're smaller than 300MB.
|
||||
if should_count:
|
||||
for rx in ignore_samples:
|
||||
if re.search(rx, full_path, re.IGNORECASE) and os.path.getsize(full_path) < 300 * 1024 * 1024:
|
||||
Log('%s looks like a sample, won\'t contribute to total media file count.' % file_path)
|
||||
should_count = False
|
||||
|
||||
# Don't count trailer files.
|
||||
if should_count:
|
||||
for rx in ignore_trailers:
|
||||
if re.search(rx, full_path, re.IGNORECASE):
|
||||
Log('%s looks like a trailer, won\'t contribute to total media file count.' % file_path)
|
||||
should_count = False
|
||||
|
||||
# Don't count dot files.
|
||||
if should_count:
|
||||
if root.lower().startswith('.'):
|
||||
Log('%s won\'t contribute to total media file count.' % file_path)
|
||||
should_count = False
|
||||
|
||||
# Don't count multi-part files (stack everything up to and including the year).
|
||||
if should_count:
|
||||
year = re.search(r'([\(\[\.\-])([1-2][0-9]{3})([\.\-\)\]_,+])', file_path)
|
||||
if year:
|
||||
multi_part = file_path[0:year.end()]
|
||||
if multi_part in multi_parts:
|
||||
should_count = False
|
||||
Log('%s looks like part of a multi-version set, won\'t contribute to total media file count.' % file_path)
|
||||
else:
|
||||
multi_parts.append(multi_part)
|
||||
|
||||
# Don't count stacked parts.
|
||||
if should_count:
|
||||
if full_path in [p.file for p in parts[1:]]:
|
||||
should_count = False
|
||||
Log('%s looks like a stacked part, won\'t contribute to total media file count.' % file_path)
|
||||
|
||||
# Don't count things that follow the "-extra" naming convention.
|
||||
if should_count and find_extras:
|
||||
for key in extra_type_map.keys():
|
||||
if root.endswith('-' + key):
|
||||
Log('%s looks like a %s extra, won\'t contribute to total media file count.' % (file_path, key))
|
||||
should_count = False
|
||||
|
||||
# Don't count things that follow specific trailer naming conventions.
|
||||
if should_count:
|
||||
if root == 'trailer' or root.startswith('movie-trailer'):
|
||||
Log('%s looks like a trailer, won\'t contribute to total media file count.' % (file_path))
|
||||
should_count = False
|
||||
|
||||
if should_count:
|
||||
total_media_files += 1
|
||||
|
||||
if find_extras and type == 'movie':
|
||||
extras = []
|
||||
re_strip = Regex('[\W ]+')
|
||||
|
||||
if total_media_files != 1:
|
||||
Log('Found %d media files in this directory, skipping local extras search: %s' % (total_media_files, path))
|
||||
else:
|
||||
|
||||
# Look for extras in named directories.
|
||||
Log('Looking for local extras in path: ' + path)
|
||||
for root, dirs, files in os.walk(path):
|
||||
for d in dirs:
|
||||
for key in extra_type_map.keys():
|
||||
if re_strip.sub('', d.lower()).startswith(key):
|
||||
for f in os.listdir(os.path.join(root, d)):
|
||||
(fn, ext) = os.path.splitext(f)
|
||||
if not fn.startswith('.') and ext[1:] in config.VIDEO_EXTS:
|
||||
|
||||
# On Windows, os.walk() likes to prepend the "extended-length path prefix" to root.
|
||||
# This causes issues later on when this path is converted to the file:// URL for
|
||||
# serialization and later consumption by PMS, so clean it up here.
|
||||
#
|
||||
root = re.sub(r'^\\\\\?\\', '', root)
|
||||
|
||||
Log('Found %s extra: %s' % (key, f))
|
||||
extras.append({'type' : key, 'title' : helpers.unicodize(fn), 'file' : os.path.join(root, d, f)})
|
||||
continue
|
||||
|
||||
# Look for filenames following the "-extra" convention and a couple of other special cases.
|
||||
for f in os.listdir(path):
|
||||
|
||||
(fn, ext) = os.path.splitext(f)
|
||||
|
||||
# Files named exactly 'trailer' or starting with 'movie-trailer'.
|
||||
if (fn == 'trailer' or fn.startswith('movie-trailer')) and not fn.startswith('.') and ext[1:] in config.VIDEO_EXTS:
|
||||
Log('Found trailer extra, renaming with title: ' + media_title)
|
||||
extras.append({'type' : key, 'title' : media_title, 'file' : os.path.join(path, f)})
|
||||
|
||||
# Files following the "-extra" convention.
|
||||
else:
|
||||
for key in extra_type_map.keys():
|
||||
if not fn.startswith('.') and fn.endswith('-' + key) and ext[1:] in config.VIDEO_EXTS:
|
||||
Log('Found %s extra: %s' % (key, f))
|
||||
title = ' '.join(fn.split('-')[:-1])
|
||||
extras.append({'type' : key, 'title' : helpers.unicodize(title), 'file' : os.path.join(path, f)})
|
||||
|
||||
# Make sure extras are sorted alphabetically and by type.
|
||||
type_order = ['trailer', 'behindthescenes', 'interview', 'deleted', 'scene', 'sample']
|
||||
extras.sort(key=lambda e: e['title'])
|
||||
extras.sort(key=lambda e: type_order.index(e['type']))
|
||||
|
||||
for extra in extras:
|
||||
metadata.extras.add(extra_type_map[extra['type']](title=extra['title'], file=extra['file']))
|
||||
|
||||
Log('Added %d extras' % len(metadata.extras))
|
||||
|
||||
Log('Looking for %s media (%s) in %d paths (root file: %s) with %d media files.', type, media_title, len(paths), root_file, total_media_files)
|
||||
Log('Paths: %s', ", ".join([ helpers.unicodize(p) for p in paths ]))
|
||||
|
||||
# Figure out what regexs to use.
|
||||
search_tuples = []
|
||||
if type == 'season':
|
||||
search_tuples += [['season-?0?%s[-a-z]?(-poster)?' % metadata.index, metadata.posters, config.IMAGE_EXTS, False]]
|
||||
search_tuples += [['season-?0?%s-banner[-a-z]?' % metadata.index, metadata.banners, config.IMAGE_EXTS, False]]
|
||||
if int(metadata.index) == 0: # Season zero, also look for Frodo-compliant 'specials' artwork.
|
||||
search_tuples += [['season-specials-poster', metadata.posters, config.IMAGE_EXTS, False]]
|
||||
search_tuples += [['season-specials-banner', metadata.banners, config.IMAGE_EXTS, False]]
|
||||
elif type == 'show':
|
||||
search_tuples += [['(show|poster|folder)-?[0-9]?', metadata.posters, config.IMAGE_EXTS, False]]
|
||||
search_tuples += [['banner-?[0-9]?', metadata.banners, config.IMAGE_EXTS, False]]
|
||||
search_tuples += [['(fanart|art|background|backdrop)-?[0-9]?', metadata.art, config.IMAGE_EXTS, False]]
|
||||
search_tuples += [['theme-?[0-9]?', metadata.themes, config.AUDIO_EXTS, False]]
|
||||
elif type == 'episode':
|
||||
search_tuples += [[re.escape(root_file) + '(-|-thumb)?[0-9]?', metadata.thumbs, config.IMAGE_EXTS, False]]
|
||||
elif type == 'movie':
|
||||
search_tuples += [['(poster|default|cover|movie|folder|' + re.escape(root_file) + ')-?[0-9]?', metadata.posters, config.IMAGE_EXTS, True]]
|
||||
search_tuples += [['(fanart|art|background|backdrop|' + re.escape(root_file) + '-fanart' + ')-?[0-9]?', metadata.art, config.IMAGE_EXTS, True]]
|
||||
|
||||
for (pattern, media_list, extensions, limited) in search_tuples:
|
||||
valid_keys = []
|
||||
|
||||
sort_index = 1
|
||||
file_path_keys = sorted(path_files.keys(), key = lambda x: os.path.splitext(x)[0])
|
||||
for file_path in file_path_keys:
|
||||
for ext in extensions:
|
||||
if re.match('%s.%s' % (pattern, ext), file_path, re.IGNORECASE):
|
||||
|
||||
# Use a pattern if it's unlimited, or if there's only one media file.
|
||||
if (limited and total_media_files == 1) or (not limited) or (file_path.find(root_file.lower()) == 0):
|
||||
|
||||
# Read data and hash it.
|
||||
data = Core.storage.load(path_files[file_path])
|
||||
media_hash = hashlib.md5(data).hexdigest()
|
||||
|
||||
# See if we need to add it.
|
||||
valid_keys.append(media_hash)
|
||||
if media_hash not in media_list:
|
||||
media_list[media_hash] = Proxy.Media(data, sort_order = sort_index)
|
||||
sort_index = sort_index + 1
|
||||
Log(' Local asset added: %s (%s)', path_files[file_path], media_hash)
|
||||
else:
|
||||
Log('Skipping file %s because there are %d media files.', file_path, total_media_files)
|
||||
|
||||
Log('Found %d valid things for pattern %s (ext: %s)', len(valid_keys), pattern, str(extensions))
|
||||
media_list.validate_keys(valid_keys)
|
||||
|
||||
def getRootFile(filename):
|
||||
path = os.path.dirname(filename)
|
||||
if 'video_ts' == helpers.splitPath(path.lower())[-1]:
|
||||
path = '/'.join(helpers.splitPath(path)[:-1])
|
||||
basename = os.path.basename(filename)
|
||||
(root_file, ext) = os.path.splitext(basename)
|
||||
return root_file
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
def findSubtitles(part):
|
||||
|
||||
lang_sub_map = {}
|
||||
part_filename = helpers.unicodize(part.file)
|
||||
part_basename = os.path.splitext(os.path.basename(part_filename))[0]
|
||||
paths = [ os.path.dirname(part_filename) ]
|
||||
|
||||
# Check for local subtitles subdirectory
|
||||
sub_dirs_default = ["sub", "subs", "subtitle", "subtitles"]
|
||||
sub_dir_base = paths[0]
|
||||
|
||||
sub_dir_list = []
|
||||
if Prefs["scanAll"]:
|
||||
# not only use the subtitle sub-folders we know, but also search for capitalized versions of them
|
||||
for sub_dir in sub_dirs_default + [s.capitalize() for s in sub_dirs_default]:
|
||||
sub_dir_list.append(os.path.join(sub_dir_base, sub_dir))
|
||||
|
||||
else:
|
||||
if Prefs["subFolder"] != "current folder":
|
||||
# got selected subfolder
|
||||
sub_dir_list.append(os.path.join(sub_dir_base, Prefs["subFolder"]))
|
||||
|
||||
sub_dir_custom = Prefs["subFolderCustom"].strip() if bool(Prefs["subFolderCustom"]) else None
|
||||
if sub_dir_custom:
|
||||
# got custom subfolder
|
||||
if sub_dir_custom.startswith("/"):
|
||||
# absolute folder
|
||||
sub_dir_list.append(sub_dir_custom)
|
||||
else:
|
||||
# relative folder
|
||||
sub_dir_list.append(os.path.join(sub_dir_base, sub_dir_custom))
|
||||
|
||||
for sub_dir in sub_dir_list:
|
||||
if os.path.isdir(sub_dir):
|
||||
paths.append(sub_dir)
|
||||
|
||||
# Check for a global subtitle location
|
||||
global_subtitle_folder = os.path.join(Core.app_support_path, 'Subtitles')
|
||||
if os.path.exists(global_subtitle_folder):
|
||||
paths.append(global_subtitle_folder)
|
||||
|
||||
|
||||
|
||||
# We start by building a dictionary of files to their absolute paths. We also need to know
|
||||
# the number of media files that are actually present, in case the found local media asset
|
||||
# is limited to a single instance per media file.
|
||||
#
|
||||
file_paths = {}
|
||||
total_media_files = 0
|
||||
for path in paths:
|
||||
path = helpers.unicodize(path)
|
||||
for file_path_listing in os.listdir(path):
|
||||
|
||||
# When using os.listdir with a unicode path, it will always return a string using the
|
||||
# NFD form. However, we internally are using the form NFC and therefore need to convert
|
||||
# it to allow correct regex / comparisons to be performed.
|
||||
#
|
||||
file_path_listing = helpers.unicodize(file_path_listing)
|
||||
if os.path.isfile(os.path.join(path, file_path_listing)):
|
||||
file_paths[file_path_listing.lower()] = os.path.join(path, file_path_listing)
|
||||
|
||||
# If we've found an actual media file, we should record it.
|
||||
(root, ext) = os.path.splitext(file_path_listing)
|
||||
if ext.lower()[1:] in config.VIDEO_EXTS:
|
||||
total_media_files += 1
|
||||
|
||||
Log('Looking for subtitle media in %d paths with %d media files.', len(paths), total_media_files)
|
||||
Log('Paths: %s', ", ".join([ helpers.unicodize(p) for p in paths ]))
|
||||
|
||||
for file_path in file_paths.values():
|
||||
|
||||
local_basename = helpers.unicodize(os.path.splitext(os.path.basename(file_path))[0])
|
||||
local_basename2 = local_basename.rsplit('.', 1)[0]
|
||||
filename_matches_part = local_basename == part_basename or local_basename2 == part_basename
|
||||
|
||||
# If the file is located within the global subtitle folder and it's name doesn't match exactly
|
||||
# then we should simply ignore it.
|
||||
#
|
||||
if file_path.count(global_subtitle_folder) and not filename_matches_part:
|
||||
continue
|
||||
|
||||
# If we have more than one media file within the folder and located filename doesn't match
|
||||
# exactly then we should simply ignore it.
|
||||
#
|
||||
if total_media_files > 1 and not filename_matches_part:
|
||||
continue
|
||||
|
||||
subtitle_helper = subtitlehelpers.SubtitleHelpers(file_path)
|
||||
if subtitle_helper != None:
|
||||
local_lang_map = subtitle_helper.process_subtitles(part)
|
||||
for new_language, subtitles in local_lang_map.items():
|
||||
|
||||
# Add the possible new language along with the located subtitles so that we can validate them
|
||||
# at the end...
|
||||
#
|
||||
if not lang_sub_map.has_key(new_language):
|
||||
lang_sub_map[new_language] = []
|
||||
lang_sub_map[new_language] = lang_sub_map[new_language] + subtitles
|
||||
|
||||
# Now whack subtitles that don't exist anymore.
|
||||
for language in lang_sub_map.keys():
|
||||
part.subtitles[language].validate_keys(lang_sub_map[language])
|
||||
|
||||
# Now whack the languages that don't exist anymore.
|
||||
for language in list(set(part.subtitles.keys()) - set(lang_sub_map.keys())):
|
||||
part.subtitles[language].validate_keys({})
|
||||
@@ -0,0 +1,127 @@
|
||||
import re, unicodedata
|
||||
import config
|
||||
import helpers
|
||||
|
||||
class SubtitleHelper(object):
|
||||
def __init__(self, filename):
|
||||
self.filename = filename
|
||||
|
||||
def SubtitleHelpers(filename):
|
||||
filename = helpers.unicodize(filename)
|
||||
for cls in [ VobSubSubtitleHelper, DefaultSubtitleHelper ]:
|
||||
if cls.is_helper_for(filename):
|
||||
return cls(filename)
|
||||
return None
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
class VobSubSubtitleHelper(SubtitleHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, filename):
|
||||
(file, file_extension) = os.path.splitext(filename)
|
||||
|
||||
# We only support idx (and maybe sub)
|
||||
if not file_extension.lower() in ['.idx', '.sub']:
|
||||
return False
|
||||
|
||||
# If we've been given a sub, we only support it if there exists a matching idx file
|
||||
return os.path.exists(file + '.idx')
|
||||
|
||||
def process_subtitles(self, part):
|
||||
|
||||
lang_sub_map = {}
|
||||
|
||||
# We don't directly process the sub file, only the idx. Therefore if we are passed on of these files, we simply
|
||||
# ignore it.
|
||||
(file, ext) = os.path.splitext(self.filename)
|
||||
if ext == '.sub':
|
||||
return lang_sub_map
|
||||
|
||||
# If we have an idx file, we need to confirm there is an identically names sub file before we can proceed.
|
||||
sub_filename = file + ".sub"
|
||||
if os.path.exists(sub_filename) == False:
|
||||
return lang_sub_map
|
||||
|
||||
Log('Attempting to parse VobSub file: ' + self.filename)
|
||||
idx = Core.storage.load(os.path.join(self.filename))
|
||||
if idx.count('VobSub index file') == 0:
|
||||
Log('The idx file does not appear to be a VobSub, skipping...')
|
||||
return lang_sub_map
|
||||
|
||||
languages = {}
|
||||
language_index = 0
|
||||
basename = os.path.basename(self.filename)
|
||||
for language in re.findall('\nid: ([A-Za-z]{2})', idx):
|
||||
|
||||
if not languages.has_key(language):
|
||||
languages[language] = []
|
||||
|
||||
Log('Found .idx subtitle file: ' + self.filename + ' language: ' + language + ' stream index: ' + str(language_index))
|
||||
languages[language].append(Proxy.LocalFile(self.filename, index = str(language_index), format = "vobsub"))
|
||||
language_index += 1
|
||||
|
||||
if not lang_sub_map.has_key(language):
|
||||
lang_sub_map[language] = []
|
||||
lang_sub_map[language].append(basename)
|
||||
|
||||
for language, subs in languages.items():
|
||||
part.subtitles[language][basename] = subs
|
||||
|
||||
return lang_sub_map
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
class DefaultSubtitleHelper(SubtitleHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, filename):
|
||||
(file, file_extension) = os.path.splitext(filename)
|
||||
return file_extension.lower()[1:] in config.SUBTITLE_EXTS
|
||||
|
||||
def process_subtitles(self, part):
|
||||
|
||||
lang_sub_map = {}
|
||||
|
||||
basename = os.path.basename(self.filename)
|
||||
(file, ext) = os.path.splitext(self.filename)
|
||||
|
||||
# Remove the initial '.' from the extension
|
||||
ext = ext[1:]
|
||||
|
||||
# Attempt to extract the language from the filename (e.g. Avatar (2009).eng)
|
||||
language = ""
|
||||
language_match = re.match(".+\.([^\.]+)$", file)
|
||||
if language_match and len(language_match.groups()) == 1:
|
||||
language = language_match.groups()[0]
|
||||
language = Locale.Language.Match(language)
|
||||
|
||||
codec = None
|
||||
format = None
|
||||
if ext in ['txt', 'sub']:
|
||||
try:
|
||||
|
||||
file_contents = Core.storage.load(self.filename)
|
||||
lines = [ line.strip() for line in file_contents.splitlines(True) ]
|
||||
if re.match('^\{[0-9]+\}\{[0-9]*\}', lines[1]):
|
||||
format = 'microdvd'
|
||||
elif re.match('^[0-9]{1,2}:[0-9]{2}:[0-9]{2}[:=,]', lines[1]):
|
||||
format = 'txt'
|
||||
elif '[SUBTITLE]' in lines[1]:
|
||||
format = 'subviewer'
|
||||
else:
|
||||
Log("The subtitle file does not have a known format, skipping... : " + self.filename)
|
||||
return lang_sub_map
|
||||
except:
|
||||
Log("An error occurred while attempting to parse the subtitle file, skipping... : " + self.filename)
|
||||
return lang_sub_map
|
||||
|
||||
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb']:
|
||||
codec = ext.replace('ass', 'ssa')
|
||||
|
||||
if format is None:
|
||||
format = codec
|
||||
|
||||
Log('Found subtitle file: ' + self.filename + ' language: ' + language + ' codec: ' + str(codec) + ' format: ' + str(format))
|
||||
part.subtitles[language][basename] = Proxy.LocalFile(self.filename, codec = codec, format = format)
|
||||
|
||||
lang_sub_map[language] = [ basename ]
|
||||
return lang_sub_map
|
||||
@@ -0,0 +1,170 @@
|
||||
import os
|
||||
import helpers
|
||||
|
||||
from mutagen import File
|
||||
from mutagen.mp4 import MP4
|
||||
|
||||
class VideoHelper(object):
|
||||
def __init__(self, filename):
|
||||
self.filename = filename
|
||||
|
||||
def VideoHelpers(filename):
|
||||
filename = helpers.unicodize(filename)
|
||||
file = os.path.basename(filename)
|
||||
(file, ext) = os.path.splitext(file)
|
||||
|
||||
for cls in [ MP4VideoHelper ]:
|
||||
if cls.is_helper_for(ext):
|
||||
return cls(filename)
|
||||
return None
|
||||
|
||||
#####################################################################################################################
|
||||
|
||||
class MP4VideoHelper(VideoHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, file_extension):
|
||||
return file_extension.lower() in ['.mp4', '.m4v', '.mov']
|
||||
|
||||
def process_metadata(self, metadata, episode = None):
|
||||
|
||||
if episode == None:
|
||||
item = metadata
|
||||
else:
|
||||
item = episode
|
||||
|
||||
Log('Reading MP4 tags')
|
||||
try: tags = File(self.filename, options=[MP4])
|
||||
except Exception, e:
|
||||
Log('An error occurred while attempting to parse the MP4 file: ' + self.filename)
|
||||
Log(str(e))
|
||||
return
|
||||
if tags == None:
|
||||
Log('Not reading tags from %s because it doesn\'t look like an MP4 file.' % self.filename)
|
||||
return
|
||||
|
||||
# Coverart
|
||||
try:
|
||||
picture = Proxy.Media(str(tags["covr"][0]))
|
||||
|
||||
# If we're dealing with an actual episode, it uses thumbs rather than posters.
|
||||
if episode != None:
|
||||
item.thumbs['atom_coverart'] = picture
|
||||
else:
|
||||
item.posters['atom_coverart'] = picture
|
||||
except: pass
|
||||
|
||||
# Title
|
||||
try:
|
||||
title = tags["\xa9nam"][0]
|
||||
item.title = title
|
||||
except: pass
|
||||
|
||||
# Sort Title
|
||||
try:
|
||||
title_sort = tags["sonm"][0]
|
||||
item.title_sort = title_sort
|
||||
except: pass
|
||||
|
||||
# Summary (long or short)
|
||||
try:
|
||||
try:
|
||||
summary = tags["ldes"][0]
|
||||
except:
|
||||
summary = tags["desc"][0]
|
||||
item.summary = summary
|
||||
except: pass
|
||||
|
||||
# Genres
|
||||
try:
|
||||
if "\xa9gen" in tags:
|
||||
genres = tags["\xa9gen"][0]
|
||||
else:
|
||||
genres = tags["gnre"][0]
|
||||
if len(genres) > 0:
|
||||
if ':' in genres:
|
||||
genre_list = genres.split(':')
|
||||
elif ',' in genres:
|
||||
genre_list = genres.split(',')
|
||||
else:
|
||||
genre_list = genres.split('/')
|
||||
metadata.genres.clear()
|
||||
for genre in genre_list:
|
||||
metadata.genres.add(genre.strip())
|
||||
except: pass
|
||||
|
||||
# Release Date & Year
|
||||
try:
|
||||
releaseDate = tags["\xa9day"][0]
|
||||
releaseDate = releaseDate.split('T')[0]
|
||||
parsedDate = Datetime.ParseDate(releaseDate)
|
||||
item.originally_available_at = parsedDate.date()
|
||||
item.year = parsedDate.year
|
||||
except: pass
|
||||
|
||||
# Content Rating
|
||||
try:
|
||||
rating = tags["----:com.apple.iTunes:iTunEXTC"][0].split('|')[1]
|
||||
if len(rating) > 0:
|
||||
item.content_rating = rating
|
||||
except: pass
|
||||
|
||||
# Look for iTunes-style metadata, use regular tags otherwise
|
||||
try:
|
||||
pl = plistlib.readPlistFromString(str(tags["----:com.apple.iTunes:iTunMOVI"][0]))
|
||||
except:
|
||||
pl = None
|
||||
|
||||
# Directors
|
||||
try:
|
||||
if pl and 'directors' in pl and pl['directors']:
|
||||
item.directors.clear()
|
||||
for director in pl['directors']:
|
||||
item.directors.add(director['name'])
|
||||
except: pass
|
||||
|
||||
# Writers
|
||||
try:
|
||||
if pl and 'screenwriters' in pl and pl['screenwriters']:
|
||||
item.writers.clear()
|
||||
for writer in pl['screenwriters']:
|
||||
item.writers.add(writer['name'])
|
||||
except: pass
|
||||
|
||||
# Cast
|
||||
try:
|
||||
if pl and 'cast' in pl and pl['cast']:
|
||||
item.roles.clear()
|
||||
for actor in pl['cast']:
|
||||
role = item.roles.new()
|
||||
role.actor = actor['name']
|
||||
else:
|
||||
artists = tags["\xa9ART"][0]
|
||||
if len(artists) > 0:
|
||||
artist_list = artists.split(',')
|
||||
item.roles.clear()
|
||||
for artist in artist_list:
|
||||
role = item.roles.new()
|
||||
role.actor = artist.strip()
|
||||
except: pass
|
||||
|
||||
# Studio
|
||||
try:
|
||||
if pl and 'studio' in pl and pl['studio']:
|
||||
item.studio = pl['studio']
|
||||
else:
|
||||
try:
|
||||
copyright = tags["cprt"][0]
|
||||
if len(copyright) > 0:
|
||||
item.studio = copyright
|
||||
except: pass
|
||||
except: pass
|
||||
|
||||
# Collection
|
||||
try:
|
||||
albums = tags["\xa9alb"][0]
|
||||
if len(albums) > 0:
|
||||
album_list = albums.split('/')
|
||||
item.collections.clear()
|
||||
for album in album_list:
|
||||
item.collections.add(album.strip())
|
||||
except: pass
|
||||
@@ -0,0 +1,27 @@
|
||||
[
|
||||
{
|
||||
"id": "scanAll",
|
||||
"label": "Scan for subtitles in all default folders (sub, subs, subtitle, subtitles) + custom if specified",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subFolder",
|
||||
"label": "Subtitle Folder (\"current folder\" is the folder the current media file lives in)",
|
||||
"type": "enum",
|
||||
"values": ["current folder", "sub", "subs", "subtitle", "subtitles"],
|
||||
"default": "current folder"
|
||||
},
|
||||
{
|
||||
"id": "subFolderCustom",
|
||||
"label": "Custom Subtitle folder (computes to real paths; use for example \"bla\" as a subfolder of the current media file folder - can use real paths aswell)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "music_video_path",
|
||||
"label": "Local music video path",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>com.plexapp.agents.localmediaextended</string>
|
||||
<key>PlexFrameworkVersion</key>
|
||||
<string>2</string>
|
||||
<key>PlexPluginClass</key>
|
||||
<string>Agent</string>
|
||||
<key>PlexPluginCodePolicy</key>
|
||||
<string>Elevated</string>
|
||||
</dict>
|
||||
</plist>
|
||||
+315
@@ -0,0 +1,315 @@
|
||||
# ID3.py version 1.0
|
||||
|
||||
# Module for manipulating ID3 informational tags in MP3 audio files
|
||||
# $Id: ID3.py,v 1.1 2002/09/10 21:04:52 elan Exp $
|
||||
|
||||
# Written 2 May 1999 by Ben Gertzfield <che@debian.org>
|
||||
# This work is released under the GNU GPL, version 2 or later.
|
||||
|
||||
# Modified 10 June 1999 by Arne Zellentin <arne@unix-ag.org> to
|
||||
# fix bug with overwriting last 128 bytes of a file without an
|
||||
# ID3 tag
|
||||
|
||||
# Patches from Jim Speth <speth@end.com> and someone whose email
|
||||
# I've forgotten at the moment (huge apologies, I didn't save the
|
||||
# entire mail, just the patch!) for so-called ID3 v1.1 support,
|
||||
# which makes the last two bytes of the comment field signify a
|
||||
# track number. If the first byte is null but the second byte
|
||||
# is not, the second byte is assumed to signify a track number.
|
||||
|
||||
# Also thanks to Jim for the simple function to remove nulls and
|
||||
# whitespace from the ends of ID3 tags. I'd like to add a boolean
|
||||
# flag defaulting to false to the ID3() constructor signifying whether
|
||||
# or not to remove whitespace, just in case old code depended on the
|
||||
# old behavior for some reason, but that'd make any code that wanted
|
||||
# to use the stripping behavior not work with old ID3.py. Bleh.
|
||||
|
||||
# This is the first thing I've ever written in Python, so bear with
|
||||
# me if it looks terrible. In a few years I'll probably look back at
|
||||
# this and laugh and laugh..
|
||||
|
||||
# Constructor:
|
||||
#
|
||||
# ID3(filename)
|
||||
# Opens filename and tries to parse its ID3 header. If the ID3 header
|
||||
# is invalid or the file access failed, raises InvalidTagError.
|
||||
#
|
||||
# When object is deconstructed, if any of the class data (below) have
|
||||
# been changed, opens the file again read-write and writes out the
|
||||
# new header. If the header is to be deleted, truncates the last
|
||||
# 128 bytes of the file.
|
||||
#
|
||||
# Note that if ID3 cannot write the tag out to the file upon
|
||||
# deconstruction, InvalidTagError will be raised and ignored
|
||||
# (as we are in __del__, and exceptions just give warnings when
|
||||
# raised in __del__.)
|
||||
|
||||
# Class Data of Interest:
|
||||
#
|
||||
# Note that all ID3 fields, unless otherwise specified, are a maximum of
|
||||
# 30 characters in length. If a field is set to a string longer than
|
||||
# the maximum, it will be truncated when it's written to disk.
|
||||
#
|
||||
# ID3.title
|
||||
# Title of the song.
|
||||
# ID3.artist
|
||||
# Artist/creator of the song.
|
||||
# ID3.album
|
||||
# Title of the album the song is from.
|
||||
# ID3.year
|
||||
# Year the song was released. Maximum of 4 characters (Y10K bug!)
|
||||
# ID3.genre
|
||||
# Genre of the song. Integer value from 0 to 255. Genre specification
|
||||
# comes from (sorry) WinAMP. http://mp3.musichall.cz/id3master/faq.htm
|
||||
# has a list of current genres; I spell-checked this list against
|
||||
# WinAMP's by running strings(1) on the file Winamp/Plugins/in_mp3.dll
|
||||
# and made a few corrections.
|
||||
# ID3.comment
|
||||
# Comment about the song.
|
||||
# ID3.track
|
||||
# Track number of the song. None if undefined.
|
||||
#
|
||||
# ID3.genres
|
||||
# List of all genres. ID3.genre above is used to index into this
|
||||
# list. ID3.genres is current as of WinAMP 1.92.
|
||||
|
||||
# Methods of Interest:
|
||||
#
|
||||
# write()
|
||||
# If the class data above have changed, opens the file given
|
||||
# to the constructor read-write and writes out the new header.
|
||||
# If the header is flagged for deletion (see delete() below)
|
||||
# truncates the last 128 bytes of the file to remove the header.
|
||||
#
|
||||
# NOTE: write() is called from ID3's deconstructor, so it's technically
|
||||
# unnecessary to call it. However, write() can raise an InvalidTagError,
|
||||
# which can't be caught during deconstruction, so generally it's
|
||||
# nicer to call it when writing is desired.
|
||||
#
|
||||
# delete()
|
||||
# Flags the ID3 tag for deletion upon destruction of the object
|
||||
#
|
||||
# find_genre(genre_string)
|
||||
# Searches for the numerical value of the given genre string in the
|
||||
# ID3.genres table. The search is performed case-insensitively. Returns
|
||||
# an integer from 0 to len(ID3.genres).
|
||||
#
|
||||
|
||||
import string
|
||||
import re
|
||||
|
||||
def lengthen(string, num_spaces):
|
||||
string = string[:num_spaces]
|
||||
return string + ('\0' * (num_spaces - len(string)))
|
||||
|
||||
# We would normally use string.rstrip(), but that doesn't remove \0 characters.
|
||||
def strip_padding(s):
|
||||
try:
|
||||
s = s.decode('iso-8859-1').encode('utf-8')
|
||||
except:
|
||||
pass
|
||||
|
||||
while len(s) > 0 and s[-1] in (string.whitespace + "\0"):
|
||||
s = s[:-1]
|
||||
|
||||
# Get rid of everything after \0...the above code doesn't always
|
||||
# work.
|
||||
#
|
||||
rx = re.compile("\000.*");
|
||||
s = rx.sub("", s)
|
||||
|
||||
# Change \222 to ' (must be strange keyboard)
|
||||
s = s.replace("\222", "'")
|
||||
|
||||
return s
|
||||
|
||||
class InvalidTagError:
|
||||
def __init__(self, msg):
|
||||
self.msg = msg
|
||||
def __str__(self):
|
||||
return self.msg
|
||||
|
||||
class ID3:
|
||||
|
||||
genres = [
|
||||
"Blues", "Classic Rock", "Country", "Dance", "Disco", "Funk",
|
||||
"Grunge", "Hip-Hop", "Jazz", "Metal", "New Age", "Oldies", "Other",
|
||||
"Pop", "R&B", "Rap", "Reggae", "Rock", "Techno", "Industrial",
|
||||
"Alternative", "Ska", "Death Metal", "Pranks", "Soundtrack",
|
||||
"Euro-Techno", "Ambient", "Trip-Hop", "Vocal", "Jazz+Funk", "Fusion",
|
||||
"Trance", "Classical", "Instrumental", "Acid", "House", "Game",
|
||||
"Sound Clip", "Gospel", "Noise", "Alt. Rock", "Bass", "Soul",
|
||||
"Punk", "Space", "Meditative", "Instrum. Pop", "Instrum. Rock",
|
||||
"Ethnic", "Gothic", "Darkwave", "Techno-Indust.", "Electronic",
|
||||
"Pop-Folk", "Eurodance", "Dream", "Southern Rock", "Comedy",
|
||||
"Cult", "Gangsta", "Top 40", "Christian Rap", "Pop/Funk", "Jungle",
|
||||
"Native American", "Cabaret", "New Wave", "Psychadelic", "Rave",
|
||||
"Showtunes", "Trailer", "Lo-Fi", "Tribal", "Acid Punk", "Acid Jazz",
|
||||
"Polka", "Retro", "Musical", "Rock & Roll", "Hard Rock", "Folk",
|
||||
"Folk/Rock", "National Folk", "Swing", "Fusion", "Bebob", "Latin",
|
||||
"Revival", "Celtic", "Bluegrass", "Avantgarde", "Gothic Rock",
|
||||
"Progress. Rock", "Psychadel. Rock", "Symphonic Rock", "Slow Rock",
|
||||
"Big Band", "Chorus", "Easy Listening", "Acoustic", "Humour",
|
||||
"Speech", "Chanson", "Opera", "Chamber Music", "Sonata", "Symphony",
|
||||
"Booty Bass", "Primus", "Porn Groove", "Satire", "Slow Jam",
|
||||
"Club", "Tango", "Samba", "Folklore", "Ballad", "Power Ballad",
|
||||
"Rhythmic Soul", "Freestyle", "Duet", "Punk Rock", "Drum Solo",
|
||||
"A Capella", "Euro-House", "Dance Hall", "Goa", "Drum & Bass",
|
||||
"Club-House", "Hardcore", "Terror", "Indie", "BritPop", "Negerpunk",
|
||||
"Polsk Punk", "Beat", "Christian Gangsta Rap", "Heavy Metal",
|
||||
"Black Metal", "Crossover", "Contemporary Christian", "Christian Rock",
|
||||
"Merengue", "Salsa", "Thrash Metal", "Anime", "Jpop", "Synthpop"
|
||||
]
|
||||
|
||||
def __init__(self, filename):
|
||||
self.filename = filename
|
||||
self.delete_tag = 0
|
||||
self.zero()
|
||||
self.modified = 0
|
||||
self.has_tag = 0
|
||||
self.had_tag = 0
|
||||
|
||||
try:
|
||||
self.file = open(filename, 'rb')
|
||||
self.file.seek(-128, 2)
|
||||
|
||||
except IOError, msg:
|
||||
self.modified = 0
|
||||
raise InvalidTagError("Can't open %s: %s" % (filename, msg))
|
||||
return
|
||||
|
||||
try:
|
||||
if self.file.read(3) == 'TAG':
|
||||
self.has_tag = 1
|
||||
self.had_tag = 1
|
||||
self.title = self.file.read(30)
|
||||
self.artist = self.file.read(30)
|
||||
self.album = self.file.read(30)
|
||||
self.year = self.file.read(4)
|
||||
self.comment = self.file.read(30)
|
||||
|
||||
if ord(self.comment[-2]) == 0 and ord(self.comment[-1]) != 0:
|
||||
self.track = ord(self.comment[-1])
|
||||
self.comment = self.comment[:-2]
|
||||
else:
|
||||
self.track = None
|
||||
|
||||
self.genre = ord(self.file.read(1))
|
||||
self.file.close()
|
||||
|
||||
self.title = strip_padding(self.title)
|
||||
self.artist = strip_padding(self.artist)
|
||||
self.album = strip_padding(self.album)
|
||||
self.year = strip_padding(self.year)
|
||||
self.comment = strip_padding(self.comment)
|
||||
|
||||
except IOError, msg:
|
||||
self.modified = 0
|
||||
raise InvalidTagError("Invalid ID3 tag in %s: %s" % (filename, msg))
|
||||
self.modified = 0
|
||||
|
||||
def delete(self):
|
||||
self.zero()
|
||||
self.delete_tag = 1
|
||||
self.has_tag = 0
|
||||
|
||||
def zero(self):
|
||||
self.title = ''
|
||||
self.artist = ''
|
||||
self.album = ''
|
||||
self.year = ''
|
||||
self.comment = ''
|
||||
self.track = None
|
||||
self.genre = 0
|
||||
|
||||
def find_genre(self, genre_to_find):
|
||||
i = 0
|
||||
find_me = string.lower(genre_to_find)
|
||||
|
||||
for genre in self.genres:
|
||||
if string.lower(genre) == find_me:
|
||||
break
|
||||
i = i + 1
|
||||
if i == len(self.genres):
|
||||
return -1
|
||||
else:
|
||||
return i
|
||||
|
||||
def write(self):
|
||||
if self.modified:
|
||||
try:
|
||||
self.file = open(self.filename, 'rb+')
|
||||
if self.had_tag:
|
||||
self.file.seek(-128, 2)
|
||||
else:
|
||||
self.file.seek(0, 2) # a new tag is appended at the end
|
||||
if self.delete_tag and self.had_tag:
|
||||
self.file.truncate()
|
||||
self.had_tag = 0
|
||||
elif self.has_tag:
|
||||
go_on = 1
|
||||
if self.had_tag:
|
||||
#if self.file.read(3) == "TAG":
|
||||
self.file.seek(-128, 2)
|
||||
#else:
|
||||
# someone has changed the file in the mean time
|
||||
# go_on = 0
|
||||
# raise IOError("File has been modified, losing tag changes")
|
||||
if go_on:
|
||||
self.file.write('TAG')
|
||||
self.file.write(lengthen(self.title, 30))
|
||||
self.file.write(lengthen(self.artist, 30))
|
||||
self.file.write(lengthen(self.album, 30))
|
||||
self.file.write(lengthen(self.year, 4))
|
||||
|
||||
comment = lengthen(self.comment, 30)
|
||||
|
||||
if self.track < 0 or self.track > 255:
|
||||
self.track = None
|
||||
|
||||
if self.track != None:
|
||||
comment = comment[:-2] + "\0" + chr(self.track)
|
||||
|
||||
self.file.write(comment)
|
||||
|
||||
if self.genre < 0 or self.genre > 255:
|
||||
self.genre = 255
|
||||
self.file.write(chr(self.genre))
|
||||
self.had_tag = 1
|
||||
self.file.close()
|
||||
except IOError, msg:
|
||||
raise InvalidTagError("Cannot write modified ID3 tag to %s: %s" % (self.filename, msg))
|
||||
else:
|
||||
self.modified = 0
|
||||
|
||||
def __del__(self):
|
||||
self.write()
|
||||
|
||||
def __str__(self):
|
||||
if self.has_tag:
|
||||
if self.genre != None and self.genre > 0 and self.genre < len(self.genres):
|
||||
genre = self.genres[self.genre]
|
||||
else:
|
||||
genre = 'Unknown'
|
||||
|
||||
if self.track != None:
|
||||
track = str(self.track)
|
||||
else:
|
||||
track = 'Unknown'
|
||||
|
||||
return "File : %s\nTitle : %-30.30s Artist: %-30.30s\nAlbum : %-30.30s Track : %s Year: %-4.4s\nComment: %-30.30s Genre : %s (%i)" % (self.filename, self.title, self.artist, self.album, track, self.year, self.comment, genre, self.genre)
|
||||
else:
|
||||
return "%s: No ID3 tag." % self.filename
|
||||
|
||||
# intercept setting of attributes to set self.modified
|
||||
def __setattr__(self, name, value):
|
||||
if name in ['title', 'artist', 'album', 'year', 'comment',
|
||||
'track', 'genre']:
|
||||
self.__dict__['modified'] = 1
|
||||
self.__dict__['has_tag'] = 1
|
||||
self.__dict__[name] = value
|
||||
|
||||
import sys
|
||||
if __name__ == '__main__':
|
||||
id3 = ID3(sys.argv[1])
|
||||
print id3.artist, id3.album, id3.title
|
||||
@@ -0,0 +1,267 @@
|
||||
#!/usr/bin/env python
|
||||
# id3v2.py Version 0.1 (work still in progress)
|
||||
# $Header: /mnt/haven/Source/Cleaners/ID3v2.py,v 1.4 2004/01/23 08:02:28 elan Exp $
|
||||
#
|
||||
# This takes a list of mp3 filenames and spits out an alternative
|
||||
# filename based on the files id3v2 tag.
|
||||
# There is another script id3.py which uses the v1 tag.
|
||||
# I'll integrate it in when I get the time. Shouldnt be too hard
|
||||
# but I'm often stupidly optimistic like this.
|
||||
#
|
||||
# This script only reads id3v2 tags.
|
||||
# I suppose next enhancement might be to write id3v2 tags.
|
||||
# and maybe some interaction with CDDB.
|
||||
# Hmmmm, might need to next write some code to calculate the CDDB id.
|
||||
#
|
||||
# Would be nice if u sent me any enhancements/suggestions.
|
||||
# mailto:calcium@altavista.net
|
||||
# http://www.ozemail.com.au/~calcium
|
||||
#
|
||||
# -----------------
|
||||
# TODO
|
||||
# -----------------
|
||||
# Handle extended headers properly
|
||||
# Ability to create id3v2 tags.
|
||||
# Make it more robust.
|
||||
#
|
||||
# -----------------
|
||||
# The documentation.
|
||||
# -----------------
|
||||
# I doubt this will be of much use to anyone apart from curiousity value.
|
||||
# I guess if u want to enhance it to handle additional tags, u'll need
|
||||
# to write a function called "processXXXX" where XXXX is the frameId.
|
||||
#
|
||||
# I also suspect u'll need to have the id3v2 spec to make sense of
|
||||
# some of the code.
|
||||
# See http://www.id3.org
|
||||
# See http://www.python.org
|
||||
# See http://www.jython.org
|
||||
# That's it.
|
||||
#
|
||||
# Ciao,
|
||||
# Chai in Melbourne, Australia.
|
||||
#
|
||||
|
||||
import sys
|
||||
import string
|
||||
import struct
|
||||
from UnicodeHelper import fixEncoding
|
||||
|
||||
_encodings = ['iso8859-1', 'utf-16', 'utf-16be', 'utf-8']
|
||||
|
||||
#
|
||||
# This gets the id3v2 tag from the file specified.
|
||||
#
|
||||
class ID3v2:
|
||||
def __init__(self, filename, language=None):
|
||||
|
||||
self.artist = ''
|
||||
self.album = ''
|
||||
self.title = ''
|
||||
self.year = ''
|
||||
self.filename = filename
|
||||
self.ok = 0
|
||||
self.track = None
|
||||
self.TPE2 = None
|
||||
self.disk = None # None unless album has multiple disks
|
||||
self.language = language
|
||||
|
||||
f = open(self.filename, 'rb')
|
||||
|
||||
# The header
|
||||
self.header = f.read( 3 )
|
||||
if self.header != "ID3":
|
||||
return
|
||||
|
||||
# The version is the next 2 bytes
|
||||
self.version = struct.unpack('>bb', f.read(2))[0]
|
||||
|
||||
# The flags. See the id3 v2 spec for details. Am ignoring it.
|
||||
self.flags = f.read(1)
|
||||
|
||||
# I guess I shouldnt ignore the flags but could nt find any test data.
|
||||
if ord( self.flags ) != 0:
|
||||
print "Hey! There is an extended header present in %s" % filename
|
||||
|
||||
# The id3 Tag Size.
|
||||
b1, b2, b3, b4 = struct.unpack( '>bbbb', f.read( 4 ) )
|
||||
id3Size = self.syncSafeInt( b1, b2, b3, b4 )
|
||||
|
||||
'''
|
||||
# Not ready
|
||||
# If there is extended header.
|
||||
if ord( self.flags ) != 0:
|
||||
# The extended header Size.
|
||||
b1, b2, b3, b4 = struct.unpack( '>bbbb', f.read( 4 ) )
|
||||
self.extHeaderSize = self.syncSafeInt( b1, b2, b3, b4 )
|
||||
self.extHeaderFlagBytes = f.read( 1 )
|
||||
self.extHeaderExtendedFlags = f.read( 1 )
|
||||
print "reading" + str ( self.extHeaderSize )
|
||||
self.extHeaderData = f.read( self.extHeaderSize )
|
||||
'''
|
||||
|
||||
# Reading in the id3 frames
|
||||
while (1) :
|
||||
|
||||
# Assume that the id3size specified in the header is correct.
|
||||
if f.tell() >= id3Size:
|
||||
break
|
||||
|
||||
if self.version == 2:
|
||||
self.frameId = f.read(3)
|
||||
size = struct.unpack('>bbb', f.read(3))
|
||||
self.frameSize = [size[0]*256*256+size[1]*256+size[2]]
|
||||
else:
|
||||
self.frameId = f.read(4)
|
||||
self.frameSize = struct.unpack('>l', f.read(4))
|
||||
|
||||
# incase the id3 size is wrong, break anyway.
|
||||
if self.frameSize[0] == 0:
|
||||
break
|
||||
|
||||
if self.version > 2:
|
||||
# read the frame header flags
|
||||
self.frameFlags = f.read( 2 )
|
||||
else:
|
||||
self.frameFlags = 0
|
||||
|
||||
blkSize = self.frameSize[ 0 ]
|
||||
if blkSize < 0:
|
||||
#print ("[%s] Error in frame size(" + str( blkSize ) + ")") % filename
|
||||
break
|
||||
|
||||
if blkSize > 1000000:
|
||||
print "Too many bytes (%d) in '%s', aborting read" % (blkSize, filename)
|
||||
return
|
||||
|
||||
try:
|
||||
self.data = f.read( blkSize )
|
||||
except:
|
||||
print "Error reading %d bytes in %s." % (blkSize, filename)
|
||||
break
|
||||
|
||||
# constructing the statement to process the header
|
||||
# passing the TAG, EXTFLAGS, DATA as parameters.
|
||||
pStr = "self.process" + self.frameId.replace(' ','') \
|
||||
+ "( self.frameId, self.frameFlags, self.data )"
|
||||
|
||||
try:
|
||||
exec pStr
|
||||
self.ok = 1
|
||||
except AttributeError:
|
||||
#print "Warning: process" + self.frameId + "() unimpl."
|
||||
continue
|
||||
except:
|
||||
print "Warning: strange ID3v2 tag in %s" % filename
|
||||
print pStr
|
||||
break
|
||||
|
||||
f.close()
|
||||
|
||||
#
|
||||
# Gets the filename
|
||||
#
|
||||
def getFilename( self ):
|
||||
return self.filename
|
||||
|
||||
#
|
||||
# A guess as to whether file interrogation succeeded
|
||||
#
|
||||
def isOK( self ):
|
||||
return self.ok
|
||||
|
||||
#
|
||||
# Gets the version
|
||||
#
|
||||
def getVersion( self ):
|
||||
return self.version
|
||||
|
||||
#
|
||||
# Gets the flags
|
||||
#
|
||||
def getFlags( self ):
|
||||
# print "Flags='%x" % ( ord( self.flags ) )
|
||||
return self.flags
|
||||
#
|
||||
# Sets the album name
|
||||
#
|
||||
def processTALB( self, theString, theFlags, theValue ):
|
||||
self.album = fixEncoding( theValue, self.language )
|
||||
def processTAL(self, theString, theFlags, theValue):
|
||||
self.processTALB(theString, theFlags, theValue)
|
||||
|
||||
def getAlbum( self ):
|
||||
return self.album
|
||||
|
||||
#
|
||||
# Sets the artist name
|
||||
#
|
||||
def processTPE1( self, theString, theFlags, theValue ):
|
||||
self.artist = fixEncoding( theValue, self.language )
|
||||
def processTP1(self, s, f, v):
|
||||
self.processTPE1(s,f,v)
|
||||
|
||||
#
|
||||
# Sets the TPE2
|
||||
#
|
||||
def processTPE2( self, theString, theFlags, theValue ):
|
||||
self.TPE2 = fixEncoding( theValue, self.language )
|
||||
|
||||
#
|
||||
# Sets the disk
|
||||
#
|
||||
def processTPOS( self, theString, theFlags, theValue ):
|
||||
TPOS = fixEncoding( theValue, self.language )
|
||||
try:
|
||||
if TPOS == '1/1':
|
||||
return
|
||||
else:
|
||||
sp = TPOS.split('/')
|
||||
self.disk = int(sp[0])
|
||||
except:
|
||||
pass
|
||||
|
||||
def getArtist( self ):
|
||||
return self.artist
|
||||
|
||||
#
|
||||
# Sets the year.
|
||||
#
|
||||
def processTYER( self, theString, theFlags, theValue ):
|
||||
self.year = fixEncoding( theValue, self.language )
|
||||
def processTYE(self, s, f, v):
|
||||
self.processTYER(s,f,v)
|
||||
|
||||
#
|
||||
# Sets the track
|
||||
#
|
||||
def processTRCK(self,s,f,v):
|
||||
track = fixEncoding(v, self.language)
|
||||
slash = track.find('/')
|
||||
if slash != -1:
|
||||
track = track[0:slash]
|
||||
self.track = int(track)
|
||||
def processTRK(self,s,f,v):
|
||||
self.processTRCK(s,f,v)
|
||||
|
||||
#
|
||||
# Sets the title track name
|
||||
#
|
||||
def processTIT2( self, theString, theFlags, theValue ):
|
||||
self.title = fixEncoding( theValue, self.language )
|
||||
def processTT2( self, theString, theFlags, theValue ):
|
||||
self.processTIT2(theString, theFlags, theValue)
|
||||
|
||||
def getSong( self ):
|
||||
return title.song
|
||||
|
||||
def syncSafeInt( self, b1, b2, b3, b4 ):
|
||||
return ( b4 & 0xff ) + \
|
||||
+ ( ( b3 & 0xff ) << 7 ) \
|
||||
+ ( ( b2 & 0xff ) << 14 ) \
|
||||
+ ( ( b1 & 0xff ) << 21 )
|
||||
|
||||
import sys
|
||||
if __name__ == '__main__':
|
||||
id3 = ID3v2(sys.argv[1])
|
||||
print id3.artist, id3.album, id3.title
|
||||
@@ -0,0 +1,19 @@
|
||||
import string
|
||||
|
||||
_encodings = ['iso8859-1', 'utf-16', 'utf-16be', 'utf-8']
|
||||
|
||||
def fixEncoding(theString, language=None):
|
||||
encoding = ord(theString[0])
|
||||
if 0 <= encoding < len(_encodings):
|
||||
# If we're dealing with a particular language, we might want to try another code page.
|
||||
if encoding == 0 and language == 'ko':
|
||||
value = theString[1:].decode('cp949').encode('utf-8')
|
||||
else:
|
||||
value = theString[1:].decode(_encodings[encoding]).encode("utf-8")
|
||||
else:
|
||||
value = theString
|
||||
|
||||
if value:
|
||||
value = value.strip('\0')
|
||||
|
||||
return value
|
||||
@@ -0,0 +1,11 @@
|
||||
Metadata-Version: 1.0
|
||||
Name: mp4file
|
||||
Version: 0.2
|
||||
Summary: Library for rudimentary parsing of mp4 atoms, especially metadata atoms.
|
||||
Home-page: UNKNOWN
|
||||
Author: Bill Napier
|
||||
Author-email: napier@pobox.com
|
||||
License: PSF
|
||||
Description: UNKNOWN
|
||||
Keywords: mp4 atom quicktime
|
||||
Platform: UNKNOWN
|
||||
@@ -0,0 +1,232 @@
|
||||
'''
|
||||
Created on Dec 6, 2009
|
||||
|
||||
@author: napier
|
||||
'''
|
||||
#import logging
|
||||
import os
|
||||
import struct
|
||||
|
||||
from atomsearch import find_path, findall_path
|
||||
|
||||
#log = logging.getLogger("mp4file")
|
||||
|
||||
class EndOFFile(Exception):
|
||||
def __init_(self):
|
||||
Exception.__init__(self)
|
||||
|
||||
def read64(file):
|
||||
data = file.read(8)
|
||||
if (data is None or len(data) <> 8):
|
||||
raise EndOFFile()
|
||||
return struct.unpack(">Q", data)[0]
|
||||
|
||||
def read32(file):
|
||||
data = file.read(4)
|
||||
if (data is None or len(data) <> 4):
|
||||
raise EndOFFile()
|
||||
return struct.unpack(">I", data)[0]
|
||||
|
||||
def read16(file):
|
||||
data = file.read(2)
|
||||
if (data is None or len(data) <> 2):
|
||||
raise EndOFFile()
|
||||
return struct.unpack(">H", data)[0]
|
||||
|
||||
def read8(file):
|
||||
data = file.read(1)
|
||||
if (data is None or len(data) <> 1):
|
||||
raise EndOFFile()
|
||||
return struct.unpack(">B", data)[0]
|
||||
|
||||
def type_to_str(data):
|
||||
a = (data >> 0) & 0xff
|
||||
b = (data >> 8) & 0xff
|
||||
c = (data >> 16) & 0xff
|
||||
d = (data >> 24) & 0xff
|
||||
|
||||
return '%c%c%c%c' % (d, c, b, a)
|
||||
|
||||
def parse_atom(file):
|
||||
try:
|
||||
offset = file.tell()
|
||||
size = read32(file)
|
||||
type = type_to_str(read32(file))
|
||||
if (size == 1):
|
||||
size = read64(file)
|
||||
|
||||
return create_atom(size, type, offset, file)
|
||||
except EndOFFile:
|
||||
return None
|
||||
|
||||
ATOM_TYPE_MAP = { '\xa9too': 'encoder',
|
||||
'\xa9nam': 'title',
|
||||
'\xa9alb': 'album',
|
||||
'\xa9ART': 'artist',
|
||||
'\xa9art': 'artist',
|
||||
'\xa9cmt': 'comment',
|
||||
'\xa9gen': 'genre',
|
||||
'gnre': 'genre',
|
||||
'\xa9day': 'year',
|
||||
'trkn': 'tracknum',
|
||||
'disk': 'disknum',
|
||||
'\xa9wrt': 'composer',
|
||||
'tmpo': 'bpm',
|
||||
'cptr': 'copyright',
|
||||
'cpil': 'compilation',
|
||||
'covr': 'coverart',
|
||||
'rtng': 'rating',
|
||||
'\xa9grp': 'grouping',
|
||||
'pcst': 'podcast',
|
||||
'catg': 'category',
|
||||
'keyw': 'keyword',
|
||||
'purl': 'podcasturl',
|
||||
'egid': 'episodeguid',
|
||||
'desc': 'description',
|
||||
'ldes': 'long_description',
|
||||
'\xa9lyr': 'lyrics',
|
||||
'tvnn': 'tvnetwork',
|
||||
'tvsh': 'tvshow',
|
||||
'tven': 'tvepisodenum',
|
||||
'tvsn': 'tvseason',
|
||||
'tves': 'tvepisode',
|
||||
'purd': 'purcahsedate',
|
||||
'pgap': 'gapless',
|
||||
}
|
||||
|
||||
# There are a lot of atom's with children. No need to create
|
||||
# special classes for all of them
|
||||
ATOM_WITH_CHILDREN = [ 'stik', 'moov', 'trak',
|
||||
'udta', 'ilst', '\xa9too',
|
||||
'\xa9nam', '\xa9alb', '\xa9ART', '\xa9art',
|
||||
'\xa9cmt', '\xa9gen', 'gnre',
|
||||
'\xa9day', 'trkn', 'disk',
|
||||
'\xa9wrt', 'tmpo', 'cptr',
|
||||
'cpil', 'covr', 'rtng',
|
||||
'\xa9grp', 'pcst', 'catg',
|
||||
'keyw', 'purl', 'egid',
|
||||
'desc', 'ldes', '\xa9lyr', 'tvnn',
|
||||
'tvsh', 'tven', 'tvsn',
|
||||
'tves', 'purd', 'pgap',
|
||||
]
|
||||
|
||||
def create_atom(size, type, offset, file):
|
||||
clz = type.lower()
|
||||
# Possibly remap atom types that aren't valid
|
||||
# python variable names
|
||||
if (ATOM_TYPE_MAP.has_key(type)):
|
||||
clz = ATOM_TYPE_MAP[type]
|
||||
if type in ATOM_WITH_CHILDREN:
|
||||
return AtomWithChildren(size, type, clz, offset, file)
|
||||
try:
|
||||
# Try and eval the class into existance
|
||||
return eval("%s(size, type, clz, offset, file)" % clz)
|
||||
except (NameError, SyntaxError, TypeError):
|
||||
# Not defined, use generic Atom
|
||||
return Atom(size, type, clz, offset, file)
|
||||
|
||||
def parse_atoms(file, maxFileOffset):
|
||||
atoms = []
|
||||
while file.tell() < maxFileOffset:
|
||||
atom = parse_atom(file)
|
||||
|
||||
if not atom or atom.size == 0:
|
||||
break
|
||||
|
||||
atoms.append(atom)
|
||||
|
||||
# Seek to the end of the atom
|
||||
file.seek(atom.offset + atom.size, os.SEEK_SET)
|
||||
|
||||
return atoms
|
||||
|
||||
class Atom(object):
|
||||
def __init__(self, size, type, name, offset, file):
|
||||
self.size = size
|
||||
self.type = type
|
||||
self.name = name
|
||||
self.offset = offset
|
||||
self.file = file
|
||||
self.children = []
|
||||
self.attrs = {}
|
||||
|
||||
def _set_attr(self, key, value):
|
||||
self.attrs[key] = value
|
||||
|
||||
def _set_children(self, children):
|
||||
# Tell the children who their parents are
|
||||
for child in children:
|
||||
child.parent = self
|
||||
self.children = children
|
||||
|
||||
def get_attribute(self, key):
|
||||
return self.attrs[key]
|
||||
|
||||
def get_atoms(self):
|
||||
return self.children
|
||||
|
||||
def find(self, path):
|
||||
return find_path(self, path)
|
||||
|
||||
def findall(self, path):
|
||||
return findall_path(self, path)
|
||||
|
||||
class AtomWithChildren(Atom):
|
||||
def __init__(self, size, type, name, offset, file):
|
||||
Atom.__init__(self, size, type, name, offset, file)
|
||||
self._set_children(parse_atoms(file, offset + size))
|
||||
|
||||
class ftyp(Atom):
|
||||
def __init__(self, size, type, name, offset, file):
|
||||
Atom.__init__(self, size, type, name, offset, file)
|
||||
self._set_attr('major_version', type_to_str(read32(file)))
|
||||
self._set_attr('minor_version', read32(file))
|
||||
|
||||
class meta(Atom):
|
||||
def __init__(self, size, type, name, offset, file):
|
||||
Atom.__init__(self, size, type, name, offset, file)
|
||||
# meta has an extra null after the atom header. consume it here
|
||||
read32(file)
|
||||
self._set_children(parse_atoms(file, offset + size))
|
||||
|
||||
class data(Atom):
|
||||
def __init__(self, size, type, name, offset, file):
|
||||
Atom.__init__(self, size, type, name, offset, file)
|
||||
|
||||
# Mask off the version field
|
||||
self.type = read32(file) & 0xFFFFFF
|
||||
|
||||
data = None
|
||||
if self.type == 1:
|
||||
data = self.parse_string()
|
||||
self._set_attr("data", data)
|
||||
elif self.type == 21 or self.type == 0:
|
||||
# Another random null padding
|
||||
read32(self.file)
|
||||
data = read32(self.file)
|
||||
|
||||
# If this looks big-endian, swap it; I would assume there's an
|
||||
# atom or something that indicates this, but I can't find it.
|
||||
#
|
||||
if (data & 0xff000000) != 0 and (data & 0xff) == 0:
|
||||
data = (data & 0xff000000) >> 24
|
||||
|
||||
self._set_attr("data", data)
|
||||
elif self.type == 13 or self.type == 14:
|
||||
# Another random null padding
|
||||
read32(self.file)
|
||||
data = self.file.read(self.size - 16)
|
||||
self._set_attr("data", data)
|
||||
elif self.type == 22:
|
||||
# uint8.
|
||||
read32(self.file)
|
||||
data = read8(self.file)
|
||||
self._set_attr("data", data)
|
||||
else:
|
||||
print "UNKNOWN TYPE", self.type
|
||||
|
||||
def parse_string(self):
|
||||
# consume extra null?
|
||||
read32(self.file)
|
||||
howMuch = self.size - 16
|
||||
return unicode(self.file.read(howMuch), "utf-8", errors='ignore')
|
||||
@@ -0,0 +1,59 @@
|
||||
'''
|
||||
Created on Dec 26, 2009
|
||||
|
||||
@author: napier
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
def path_compare(path, pattern):
|
||||
# Handle the simple case
|
||||
if pattern.find('*') == -1 and pattern.find('//') == -1:
|
||||
return path == pattern
|
||||
# Convert pattern into regexp
|
||||
regexp = pattern.replace('*', '[^/]+').replace('//', '.*')
|
||||
return re.match(regexp, path)
|
||||
|
||||
def find_path(atom, findpath):
|
||||
if findpath == '.':
|
||||
return atom
|
||||
if (findpath[0] != '.'):
|
||||
findpath = './' + findpath
|
||||
for child in atom.children:
|
||||
res = find_path_helper(child, findpath, '.', '.')
|
||||
if res:
|
||||
return res
|
||||
|
||||
def find_path_helper(atom, findpath,
|
||||
typepath, namepath,
|
||||
all=False):
|
||||
typepath = typepath + '/' + str(atom.type)
|
||||
namepath = namepath + '/' + atom.name
|
||||
if path_compare(typepath, findpath):
|
||||
if all:
|
||||
return [atom]
|
||||
return atom
|
||||
if path_compare(namepath, findpath):
|
||||
if all:
|
||||
return [atom]
|
||||
return atom
|
||||
all_res = []
|
||||
for child in atom.children:
|
||||
res = find_path_helper(child, findpath, typepath, namepath, all)
|
||||
if not all and res:
|
||||
return res
|
||||
if all:
|
||||
all_res += res
|
||||
if all:
|
||||
return all_res
|
||||
return None
|
||||
|
||||
def findall_path(atom, findpath):
|
||||
if findpath == '.':
|
||||
return atom
|
||||
if (findpath[0] != '.'):
|
||||
findpath = './' + findpath
|
||||
all_res = []
|
||||
for child in atom.children:
|
||||
all_res += find_path_helper(child, findpath, '.', '.', True)
|
||||
return all_res
|
||||
@@ -0,0 +1,54 @@
|
||||
'''
|
||||
Created on Dec 26, 2009
|
||||
|
||||
@author: napier
|
||||
'''
|
||||
from atomsearch import find_path, findall_path
|
||||
import unittest
|
||||
|
||||
|
||||
class FakeAtom(object):
|
||||
def __init__(self, name, type):
|
||||
self.name = name
|
||||
self.type = type
|
||||
self.children = []
|
||||
|
||||
def get_atoms(self):
|
||||
return self.children
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.mp4 = FakeAtom('root', 'root')
|
||||
self.mp4.children = [FakeAtom('child1', 'child1'),
|
||||
FakeAtom('child2', 'child2')]
|
||||
child1 = self.mp4.children[0]
|
||||
child1.children = [FakeAtom('grandchild', 'gc'),
|
||||
FakeAtom('granchild', 'gc')]
|
||||
|
||||
def testFindSelf(self):
|
||||
root = find_path(self.mp4, '.')
|
||||
self.assertEquals(root.type, 'root')
|
||||
|
||||
def testFindStar(self):
|
||||
gc1 = find_path(self.mp4, '*/gc')
|
||||
self.assertNotEquals(None, gc1)
|
||||
|
||||
def testFindGc1(self):
|
||||
gc1 = find_path(self.mp4, './child1/gc')
|
||||
self.assertNotEqual(None, gc1)
|
||||
gc2 = find_path(self.mp4, './child1/grandchild')
|
||||
self.assertNotEqual(None, gc2)
|
||||
gc3 = find_path(self.mp4, 'child1/grandchild')
|
||||
self.assertNotEqual(None, gc3)
|
||||
self.assertEquals(gc1.type, gc2.type)
|
||||
self.assertEquals(gc2.type, gc3.type)
|
||||
|
||||
def testFindall(self):
|
||||
res = findall_path(self.mp4, './child1/gc')
|
||||
self.assertEquals(2, len(res))
|
||||
all = findall_path(self.mp4, './/gc')
|
||||
self.assertEquals(2, len(all))
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import sys;sys.argv = ['', 'Test.testName']
|
||||
unittest.main()
|
||||
@@ -0,0 +1,23 @@
|
||||
'''
|
||||
Created on Dec 6, 2009
|
||||
|
||||
@author: napier
|
||||
'''
|
||||
from atom import parse_atoms, AtomWithChildren
|
||||
#import logging
|
||||
import os
|
||||
|
||||
#log = logging.getLogger("mp4file")
|
||||
|
||||
def getFileSize(file):
|
||||
file.seek(0, os.SEEK_END)
|
||||
endFile = file.tell()
|
||||
file.seek(0, os.SEEK_SET)
|
||||
return endFile
|
||||
|
||||
class Mp4File(AtomWithChildren):
|
||||
def __init__(self, filename):
|
||||
file = open(filename, "rb")
|
||||
self.atoms = parse_atoms(file, getFileSize(file))
|
||||
AtomWithChildren.__init__(self, getFileSize(file),
|
||||
'', '', 0, file)
|
||||
@@ -0,0 +1,278 @@
|
||||
# mutagen aims to be an all purpose media tagging library
|
||||
# Copyright (C) 2005 Michael Urman
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
|
||||
"""Mutagen aims to be an all purpose multimedia tagging library.
|
||||
|
||||
::
|
||||
|
||||
import mutagen.[format]
|
||||
metadata = mutagen.[format].Open(filename)
|
||||
|
||||
`metadata` acts like a dictionary of tags in the file. Tags are generally a
|
||||
list of string-like values, but may have additional methods available
|
||||
depending on tag or format. They may also be entirely different objects
|
||||
for certain keys, again depending on format.
|
||||
"""
|
||||
|
||||
version = (1, 24)
|
||||
"""Version tuple."""
|
||||
|
||||
version_string = ".".join(map(str, version))
|
||||
"""Version string."""
|
||||
|
||||
|
||||
import warnings
|
||||
|
||||
import mutagen._util
|
||||
|
||||
|
||||
class Metadata(object):
|
||||
"""An abstract dict-like object.
|
||||
|
||||
Metadata is the base class for many of the tag objects in Mutagen.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if args or kwargs:
|
||||
self.load(*args, **kwargs)
|
||||
|
||||
def load(self, *args, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def save(self, filename=None):
|
||||
"""Save changes to a file."""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def delete(self, filename=None):
|
||||
"""Remove tags from a file."""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class FileType(mutagen._util.DictMixin):
|
||||
"""An abstract object wrapping tags and audio stream information.
|
||||
|
||||
Attributes:
|
||||
|
||||
* info -- stream information (length, bitrate, sample rate)
|
||||
* tags -- metadata tags, if any
|
||||
|
||||
Each file format has different potential tags and stream
|
||||
information.
|
||||
|
||||
FileTypes implement an interface very similar to Metadata; the
|
||||
dict interface, save, load, and delete calls on a FileType call
|
||||
the appropriate methods on its tag data.
|
||||
"""
|
||||
|
||||
info = None
|
||||
tags = None
|
||||
filename = None
|
||||
_mimes = ["application/octet-stream"]
|
||||
|
||||
def __init__(self, filename=None, *args, **kwargs):
|
||||
if filename is None:
|
||||
warnings.warn("FileType constructor requires a filename",
|
||||
DeprecationWarning)
|
||||
else:
|
||||
self.load(filename, *args, **kwargs)
|
||||
|
||||
def load(self, filename, *args, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Look up a metadata tag key.
|
||||
|
||||
If the file has no tags at all, a KeyError is raised.
|
||||
"""
|
||||
|
||||
if self.tags is None:
|
||||
raise KeyError(key)
|
||||
else:
|
||||
return self.tags[key]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""Set a metadata tag.
|
||||
|
||||
If the file has no tags, an appropriate format is added (but
|
||||
not written until save is called).
|
||||
"""
|
||||
|
||||
if self.tags is None:
|
||||
self.add_tags()
|
||||
self.tags[key] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
"""Delete a metadata tag key.
|
||||
|
||||
If the file has no tags at all, a KeyError is raised.
|
||||
"""
|
||||
|
||||
if self.tags is None:
|
||||
raise KeyError(key)
|
||||
else:
|
||||
del(self.tags[key])
|
||||
|
||||
def keys(self):
|
||||
"""Return a list of keys in the metadata tag.
|
||||
|
||||
If the file has no tags at all, an empty list is returned.
|
||||
"""
|
||||
|
||||
if self.tags is None:
|
||||
return []
|
||||
else:
|
||||
return self.tags.keys()
|
||||
|
||||
def delete(self, filename=None):
|
||||
"""Remove tags from a file."""
|
||||
|
||||
if self.tags is not None:
|
||||
if filename is None:
|
||||
filename = self.filename
|
||||
else:
|
||||
warnings.warn(
|
||||
"delete(filename=...) is deprecated, reload the file",
|
||||
DeprecationWarning)
|
||||
return self.tags.delete(filename)
|
||||
|
||||
def save(self, filename=None, **kwargs):
|
||||
"""Save metadata tags."""
|
||||
|
||||
if filename is None:
|
||||
filename = self.filename
|
||||
else:
|
||||
warnings.warn(
|
||||
"save(filename=...) is deprecated, reload the file",
|
||||
DeprecationWarning)
|
||||
if self.tags is not None:
|
||||
return self.tags.save(filename, **kwargs)
|
||||
else:
|
||||
raise ValueError("no tags in file")
|
||||
|
||||
def pprint(self):
|
||||
"""Print stream information and comment key=value pairs."""
|
||||
|
||||
stream = "%s (%s)" % (self.info.pprint(), self.mime[0])
|
||||
try:
|
||||
tags = self.tags.pprint()
|
||||
except AttributeError:
|
||||
return stream
|
||||
else:
|
||||
return stream + ((tags and "\n" + tags) or "")
|
||||
|
||||
def add_tags(self):
|
||||
"""Adds new tags to the file.
|
||||
|
||||
Raises if tags already exist.
|
||||
"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def mime(self):
|
||||
"""A list of mime types"""
|
||||
|
||||
mimes = []
|
||||
for Kind in type(self).__mro__:
|
||||
for mime in getattr(Kind, '_mimes', []):
|
||||
if mime not in mimes:
|
||||
mimes.append(mime)
|
||||
return mimes
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class StreamInfo(object):
|
||||
"""Abstract stream information object.
|
||||
|
||||
Provides attributes for length, bitrate, sample rate etc.
|
||||
|
||||
See the implementations for details.
|
||||
"""
|
||||
|
||||
def pprint(self):
|
||||
"""Print stream information"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def File(filename, options=None, easy=False):
|
||||
"""Guess the type of the file and try to open it.
|
||||
|
||||
The file type is decided by several things, such as the first 128
|
||||
bytes (which usually contains a file type identifier), the
|
||||
filename extension, and the presence of existing tags.
|
||||
|
||||
If no appropriate type could be found, None is returned.
|
||||
|
||||
:param options: Sequence of :class:`FileType` implementations, defaults to
|
||||
all included ones.
|
||||
|
||||
:param easy: If the easy wrappers should be returnd if available.
|
||||
For example :class:`EasyMP3 <mp3.EasyMP3>` instead
|
||||
of :class:`MP3 <mp3.MP3>`.
|
||||
"""
|
||||
|
||||
if options is None:
|
||||
from mutagen.asf import ASF
|
||||
from mutagen.apev2 import APEv2File
|
||||
from mutagen.flac import FLAC
|
||||
if easy:
|
||||
from mutagen.easyid3 import EasyID3FileType as ID3FileType
|
||||
else:
|
||||
from mutagen.id3 import ID3FileType
|
||||
if easy:
|
||||
from mutagen.mp3 import EasyMP3 as MP3
|
||||
else:
|
||||
from mutagen.mp3 import MP3
|
||||
from mutagen.oggflac import OggFLAC
|
||||
from mutagen.oggspeex import OggSpeex
|
||||
from mutagen.oggtheora import OggTheora
|
||||
from mutagen.oggvorbis import OggVorbis
|
||||
from mutagen.oggopus import OggOpus
|
||||
if easy:
|
||||
from mutagen.trueaudio import EasyTrueAudio as TrueAudio
|
||||
else:
|
||||
from mutagen.trueaudio import TrueAudio
|
||||
from mutagen.wavpack import WavPack
|
||||
if easy:
|
||||
from mutagen.easymp4 import EasyMP4 as MP4
|
||||
else:
|
||||
from mutagen.mp4 import MP4
|
||||
from mutagen.musepack import Musepack
|
||||
from mutagen.monkeysaudio import MonkeysAudio
|
||||
from mutagen.optimfrog import OptimFROG
|
||||
from mutagen.aiff import AIFF
|
||||
options = [MP3, TrueAudio, OggTheora, OggSpeex, OggVorbis, OggFLAC,
|
||||
FLAC, AIFF, APEv2File, MP4, ID3FileType, WavPack,
|
||||
Musepack, MonkeysAudio, OptimFROG, ASF, OggOpus]
|
||||
|
||||
if not options:
|
||||
return None
|
||||
|
||||
fileobj = open(filename, "rb")
|
||||
try:
|
||||
header = fileobj.read(128)
|
||||
# Sort by name after score. Otherwise import order affects
|
||||
# Kind sort order, which affects treatment of things with
|
||||
# equals scores.
|
||||
results = [(Kind.score(filename, fileobj, header), Kind.__name__)
|
||||
for Kind in options]
|
||||
finally:
|
||||
fileobj.close()
|
||||
results = list(zip(results, options))
|
||||
results.sort()
|
||||
(score, name), Kind = results[-1]
|
||||
if score > 0:
|
||||
return Kind(filename)
|
||||
else:
|
||||
return None
|
||||
@@ -0,0 +1,82 @@
|
||||
# Copyright 2013 Christoph Reiter
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
PY2 = sys.version_info[0] == 2
|
||||
PY3 = not PY2
|
||||
|
||||
if PY2:
|
||||
from StringIO import StringIO
|
||||
BytesIO = StringIO
|
||||
from cStringIO import StringIO as cBytesIO
|
||||
|
||||
long_ = long
|
||||
integer_types = (int, long)
|
||||
string_types = (str, unicode)
|
||||
text_type = unicode
|
||||
|
||||
xrange = xrange
|
||||
cmp = cmp
|
||||
chr_ = chr
|
||||
|
||||
def endswith(text, end):
|
||||
return text.endswith(end)
|
||||
|
||||
iteritems = lambda d: d.iteritems()
|
||||
itervalues = lambda d: d.itervalues()
|
||||
iterkeys = lambda d: d.iterkeys()
|
||||
|
||||
iterbytes = lambda b: iter(b)
|
||||
|
||||
exec("def reraise(tp, value, tb):\n raise tp, value, tb")
|
||||
|
||||
def swap_to_string(cls):
|
||||
if hasattr(cls, '__str__'):
|
||||
cls.__unicode__ = cls.__str__
|
||||
|
||||
if hasattr(cls, '__bytes__'):
|
||||
cls.__str__ = cls.__bytes__
|
||||
|
||||
return cls
|
||||
|
||||
elif PY3:
|
||||
from io import StringIO
|
||||
StringIO = StringIO
|
||||
from io import BytesIO
|
||||
cBytesIO = BytesIO
|
||||
|
||||
long_ = int
|
||||
integer_types = (int,)
|
||||
string_types = (str,)
|
||||
text_type = str
|
||||
|
||||
xrange = range
|
||||
cmp = lambda a, b: (a > b) - (a < b)
|
||||
chr_ = lambda x: bytes([x])
|
||||
|
||||
def endswith(text, end):
|
||||
# usefull for paths which can be both, str and bytes
|
||||
if isinstance(text, str):
|
||||
if not isinstance(end, str):
|
||||
end = end.decode("ascii")
|
||||
else:
|
||||
if not isinstance(end, bytes):
|
||||
end = end.encode("ascii")
|
||||
return text.endswith(end)
|
||||
|
||||
iteritems = lambda d: iter(d.items())
|
||||
itervalues = lambda d: iter(d.values())
|
||||
iterkeys = lambda d: iter(d.keys())
|
||||
|
||||
iterbytes = lambda b: (bytes([v]) for v in b)
|
||||
|
||||
def reraise(tp, value, tb):
|
||||
raise tp(value).with_traceback(tb)
|
||||
|
||||
def swap_to_string(cls):
|
||||
return cls
|
||||
@@ -0,0 +1,197 @@
|
||||
"""Constants used by Mutagen."""
|
||||
|
||||
GENRES = [
|
||||
u"Blues",
|
||||
u"Classic Rock",
|
||||
u"Country",
|
||||
u"Dance",
|
||||
u"Disco",
|
||||
u"Funk",
|
||||
u"Grunge",
|
||||
u"Hip-Hop",
|
||||
u"Jazz",
|
||||
u"Metal",
|
||||
u"New Age",
|
||||
u"Oldies",
|
||||
u"Other",
|
||||
u"Pop",
|
||||
u"R&B",
|
||||
u"Rap",
|
||||
u"Reggae",
|
||||
u"Rock",
|
||||
u"Techno",
|
||||
u"Industrial",
|
||||
u"Alternative",
|
||||
u"Ska",
|
||||
u"Death Metal",
|
||||
u"Pranks",
|
||||
u"Soundtrack",
|
||||
u"Euro-Techno",
|
||||
u"Ambient",
|
||||
u"Trip-Hop",
|
||||
u"Vocal",
|
||||
u"Jazz+Funk",
|
||||
u"Fusion",
|
||||
u"Trance",
|
||||
u"Classical",
|
||||
u"Instrumental",
|
||||
u"Acid",
|
||||
u"House",
|
||||
u"Game",
|
||||
u"Sound Clip",
|
||||
u"Gospel",
|
||||
u"Noise",
|
||||
u"Alt. Rock",
|
||||
u"Bass",
|
||||
u"Soul",
|
||||
u"Punk",
|
||||
u"Space",
|
||||
u"Meditative",
|
||||
u"Instrumental Pop",
|
||||
u"Instrumental Rock",
|
||||
u"Ethnic",
|
||||
u"Gothic",
|
||||
u"Darkwave",
|
||||
u"Techno-Industrial",
|
||||
u"Electronic",
|
||||
u"Pop-Folk",
|
||||
u"Eurodance",
|
||||
u"Dream",
|
||||
u"Southern Rock",
|
||||
u"Comedy",
|
||||
u"Cult",
|
||||
u"Gangsta Rap",
|
||||
u"Top 40",
|
||||
u"Christian Rap",
|
||||
u"Pop/Funk",
|
||||
u"Jungle",
|
||||
u"Native American",
|
||||
u"Cabaret",
|
||||
u"New Wave",
|
||||
u"Psychedelic",
|
||||
u"Rave",
|
||||
u"Showtunes",
|
||||
u"Trailer",
|
||||
u"Lo-Fi",
|
||||
u"Tribal",
|
||||
u"Acid Punk",
|
||||
u"Acid Jazz",
|
||||
u"Polka",
|
||||
u"Retro",
|
||||
u"Musical",
|
||||
u"Rock & Roll",
|
||||
u"Hard Rock",
|
||||
u"Folk",
|
||||
u"Folk-Rock",
|
||||
u"National Folk",
|
||||
u"Swing",
|
||||
u"Fast-Fusion",
|
||||
u"Bebop",
|
||||
u"Latin",
|
||||
u"Revival",
|
||||
u"Celtic",
|
||||
u"Bluegrass",
|
||||
u"Avantgarde",
|
||||
u"Gothic Rock",
|
||||
u"Progressive Rock",
|
||||
u"Psychedelic Rock",
|
||||
u"Symphonic Rock",
|
||||
u"Slow Rock",
|
||||
u"Big Band",
|
||||
u"Chorus",
|
||||
u"Easy Listening",
|
||||
u"Acoustic",
|
||||
u"Humour",
|
||||
u"Speech",
|
||||
u"Chanson",
|
||||
u"Opera",
|
||||
u"Chamber Music",
|
||||
u"Sonata",
|
||||
u"Symphony",
|
||||
u"Booty Bass",
|
||||
u"Primus",
|
||||
u"Porn Groove",
|
||||
u"Satire",
|
||||
u"Slow Jam",
|
||||
u"Club",
|
||||
u"Tango",
|
||||
u"Samba",
|
||||
u"Folklore",
|
||||
u"Ballad",
|
||||
u"Power Ballad",
|
||||
u"Rhythmic Soul",
|
||||
u"Freestyle",
|
||||
u"Duet",
|
||||
u"Punk Rock",
|
||||
u"Drum Solo",
|
||||
u"A Cappella",
|
||||
u"Euro-House",
|
||||
u"Dance Hall",
|
||||
u"Goa",
|
||||
u"Drum & Bass",
|
||||
u"Club-House",
|
||||
u"Hardcore",
|
||||
u"Terror",
|
||||
u"Indie",
|
||||
u"BritPop",
|
||||
u"Afro-Punk",
|
||||
u"Polsk Punk",
|
||||
u"Beat",
|
||||
u"Christian Gangsta Rap",
|
||||
u"Heavy Metal",
|
||||
u"Black Metal",
|
||||
u"Crossover",
|
||||
u"Contemporary Christian",
|
||||
u"Christian Rock",
|
||||
u"Merengue",
|
||||
u"Salsa",
|
||||
u"Thrash Metal",
|
||||
u"Anime",
|
||||
u"JPop",
|
||||
u"Synthpop",
|
||||
u"Abstract",
|
||||
u"Art Rock",
|
||||
u"Baroque",
|
||||
u"Bhangra",
|
||||
u"Big Beat",
|
||||
u"Breakbeat",
|
||||
u"Chillout",
|
||||
u"Downtempo",
|
||||
u"Dub",
|
||||
u"EBM",
|
||||
u"Eclectic",
|
||||
u"Electro",
|
||||
u"Electroclash",
|
||||
u"Emo",
|
||||
u"Experimental",
|
||||
u"Garage",
|
||||
u"Global",
|
||||
u"IDM",
|
||||
u"Illbient",
|
||||
u"Industro-Goth",
|
||||
u"Jam Band",
|
||||
u"Krautrock",
|
||||
u"Leftfield",
|
||||
u"Lounge",
|
||||
u"Math Rock",
|
||||
u"New Romantic",
|
||||
u"Nu-Breakz",
|
||||
u"Post-Punk",
|
||||
u"Post-Rock",
|
||||
u"Psytrance",
|
||||
u"Shoegaze",
|
||||
u"Space Rock",
|
||||
u"Trop Rock",
|
||||
u"World Music",
|
||||
u"Neoclassical",
|
||||
u"Audiobook",
|
||||
u"Audio Theatre",
|
||||
u"Neue Deutsche Welle",
|
||||
u"Podcast",
|
||||
u"Indie Rock",
|
||||
u"G-Funk",
|
||||
u"Dubstep",
|
||||
u"Garage Rock",
|
||||
u"Psybient",
|
||||
]
|
||||
"""The ID3v1 genre list."""
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,496 @@
|
||||
# Copyright (C) 2005 Michael Urman
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
import struct
|
||||
from struct import unpack, pack
|
||||
from warnings import warn
|
||||
|
||||
from ._compat import text_type, chr_, PY3, swap_to_string, string_types
|
||||
from mutagen._id3util import ID3JunkFrameError, ID3Warning, BitPaddedInt
|
||||
from mutagen._util import total_ordering, decode_terminated
|
||||
|
||||
|
||||
class Spec(object):
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __hash__(self):
|
||||
raise TypeError("Spec objects are unhashable")
|
||||
|
||||
def _validate23(self, frame, value, **kwargs):
|
||||
"""Return a possibly modified value which, if written,
|
||||
results in valid id3v2.3 data.
|
||||
"""
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class ByteSpec(Spec):
|
||||
def read(self, frame, data):
|
||||
return bytearray(data)[0], data[1:]
|
||||
|
||||
def write(self, frame, value):
|
||||
return chr_(value)
|
||||
|
||||
def validate(self, frame, value):
|
||||
if value is not None:
|
||||
chr_(value)
|
||||
return value
|
||||
|
||||
|
||||
class IntegerSpec(Spec):
|
||||
def read(self, frame, data):
|
||||
return int(BitPaddedInt(data, bits=8)), ''
|
||||
|
||||
def write(self, frame, value):
|
||||
return BitPaddedInt.to_str(value, bits=8, width=-1)
|
||||
|
||||
def validate(self, frame, value):
|
||||
return value
|
||||
|
||||
|
||||
class SizedIntegerSpec(Spec):
|
||||
def __init__(self, name, size):
|
||||
self.name, self.__sz = name, size
|
||||
|
||||
def read(self, frame, data):
|
||||
return int(BitPaddedInt(data[:self.__sz], bits=8)), data[self.__sz:]
|
||||
|
||||
def write(self, frame, value):
|
||||
return BitPaddedInt.to_str(value, bits=8, width=self.__sz)
|
||||
|
||||
def validate(self, frame, value):
|
||||
return value
|
||||
|
||||
|
||||
class EncodingSpec(ByteSpec):
|
||||
def read(self, frame, data):
|
||||
enc, data = super(EncodingSpec, self).read(frame, data)
|
||||
if enc < 16:
|
||||
return enc, data
|
||||
else:
|
||||
return 0, chr_(enc) + data
|
||||
|
||||
def validate(self, frame, value):
|
||||
if value is None:
|
||||
return None
|
||||
if 0 <= value <= 3:
|
||||
return value
|
||||
raise ValueError('Invalid Encoding: %r' % value)
|
||||
|
||||
def _validate23(self, frame, value, **kwargs):
|
||||
# only 0, 1 are valid in v2.3, default to utf-16
|
||||
return min(1, value)
|
||||
|
||||
|
||||
class StringSpec(Spec):
|
||||
def __init__(self, name, length):
|
||||
super(StringSpec, self).__init__(name)
|
||||
self.len = length
|
||||
|
||||
def read(s, frame, data):
|
||||
return data[:s.len], data[s.len:]
|
||||
|
||||
def write(s, frame, value):
|
||||
if value is None:
|
||||
return b'\x00' * s.len
|
||||
else:
|
||||
return (bytes(value) + b'\x00' * s.len)[:s.len]
|
||||
|
||||
def validate(s, frame, value):
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
if not isinstance(value, bytes):
|
||||
value = value.encode("ascii")
|
||||
|
||||
if len(value) == s.len:
|
||||
return value
|
||||
raise ValueError('Invalid StringSpec[%d] data: %r' % (s.len, value))
|
||||
|
||||
|
||||
class BinaryDataSpec(Spec):
|
||||
def read(self, frame, data):
|
||||
return data, b''
|
||||
|
||||
def write(self, frame, value):
|
||||
if value is None:
|
||||
return b""
|
||||
if isinstance(value, bytes):
|
||||
return value
|
||||
value = text_type(value).encode("ascii")
|
||||
return value
|
||||
|
||||
def validate(self, frame, value):
|
||||
if isinstance(value, bytes):
|
||||
return value
|
||||
value = text_type(value).encode("ascii")
|
||||
return value
|
||||
|
||||
|
||||
class EncodedTextSpec(Spec):
|
||||
# Okay, seriously. This is private and defined explicitly and
|
||||
# completely by the ID3 specification. You can't just add
|
||||
# encodings here however you want.
|
||||
_encodings = (
|
||||
('latin1', b'\x00'),
|
||||
('utf16', b'\x00\x00'),
|
||||
('utf_16_be', b'\x00\x00'),
|
||||
('utf8', b'\x00')
|
||||
)
|
||||
|
||||
def read(self, frame, data):
|
||||
enc, term = self._encodings[frame.encoding]
|
||||
try:
|
||||
# allow missing termination
|
||||
return decode_terminated(data, enc, strict=False)
|
||||
except ValueError:
|
||||
# utf-16 termination with missing BOM, or single NULL
|
||||
if not data[:len(term)].strip(b"\x00"):
|
||||
return u"", data[len(term):]
|
||||
|
||||
# utf-16 data with single NULL, see issue 169
|
||||
try:
|
||||
return decode_terminated(data + b"\x00", enc)
|
||||
except ValueError:
|
||||
raise ID3JunkFrameError
|
||||
|
||||
def write(self, frame, value):
|
||||
enc, term = self._encodings[frame.encoding]
|
||||
return value.encode(enc) + term
|
||||
|
||||
def validate(self, frame, value):
|
||||
return text_type(value)
|
||||
|
||||
|
||||
class MultiSpec(Spec):
|
||||
def __init__(self, name, *specs, **kw):
|
||||
super(MultiSpec, self).__init__(name)
|
||||
self.specs = specs
|
||||
self.sep = kw.get('sep')
|
||||
|
||||
def read(self, frame, data):
|
||||
values = []
|
||||
while data:
|
||||
record = []
|
||||
for spec in self.specs:
|
||||
value, data = spec.read(frame, data)
|
||||
record.append(value)
|
||||
if len(self.specs) != 1:
|
||||
values.append(record)
|
||||
else:
|
||||
values.append(record[0])
|
||||
return values, data
|
||||
|
||||
def write(self, frame, value):
|
||||
data = []
|
||||
if len(self.specs) == 1:
|
||||
for v in value:
|
||||
data.append(self.specs[0].write(frame, v))
|
||||
else:
|
||||
for record in value:
|
||||
for v, s in zip(record, self.specs):
|
||||
data.append(s.write(frame, v))
|
||||
return b''.join(data)
|
||||
|
||||
def validate(self, frame, value):
|
||||
if value is None:
|
||||
return []
|
||||
if self.sep and isinstance(value, string_types):
|
||||
value = value.split(self.sep)
|
||||
if isinstance(value, list):
|
||||
if len(self.specs) == 1:
|
||||
return [self.specs[0].validate(frame, v) for v in value]
|
||||
else:
|
||||
return [
|
||||
[s.validate(frame, v) for (v, s) in zip(val, self.specs)]
|
||||
for val in value]
|
||||
raise ValueError('Invalid MultiSpec data: %r' % value)
|
||||
|
||||
def _validate23(self, frame, value, **kwargs):
|
||||
if len(self.specs) != 1:
|
||||
return [[s._validate23(frame, v, **kwargs)
|
||||
for (v, s) in zip(val, self.specs)]
|
||||
for val in value]
|
||||
|
||||
spec = self.specs[0]
|
||||
|
||||
# Merge single text spec multispecs only.
|
||||
# (TimeStampSpec beeing the exception, but it's not a valid v2.3 frame)
|
||||
if not isinstance(spec, EncodedTextSpec) or \
|
||||
isinstance(spec, TimeStampSpec):
|
||||
return value
|
||||
|
||||
value = [spec._validate23(frame, v, **kwargs) for v in value]
|
||||
if kwargs.get("sep") is not None:
|
||||
return [spec.validate(frame, kwargs["sep"].join(value))]
|
||||
return value
|
||||
|
||||
|
||||
class EncodedNumericTextSpec(EncodedTextSpec):
|
||||
pass
|
||||
|
||||
|
||||
class EncodedNumericPartTextSpec(EncodedTextSpec):
|
||||
pass
|
||||
|
||||
|
||||
class Latin1TextSpec(EncodedTextSpec):
|
||||
def read(self, frame, data):
|
||||
if b'\x00' in data:
|
||||
data, ret = data.split(b'\x00', 1)
|
||||
else:
|
||||
ret = b''
|
||||
return data.decode('latin1'), ret
|
||||
|
||||
def write(self, data, value):
|
||||
return value.encode('latin1') + b'\x00'
|
||||
|
||||
def validate(self, frame, value):
|
||||
return text_type(value)
|
||||
|
||||
|
||||
@swap_to_string
|
||||
@total_ordering
|
||||
class ID3TimeStamp(object):
|
||||
"""A time stamp in ID3v2 format.
|
||||
|
||||
This is a restricted form of the ISO 8601 standard; time stamps
|
||||
take the form of:
|
||||
YYYY-MM-DD HH:MM:SS
|
||||
Or some partial form (YYYY-MM-DD HH, YYYY, etc.).
|
||||
|
||||
The 'text' attribute contains the raw text data of the time stamp.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
def __init__(self, text):
|
||||
if isinstance(text, ID3TimeStamp):
|
||||
text = text.text
|
||||
elif not isinstance(text, text_type):
|
||||
if PY3:
|
||||
raise TypeError("not a str")
|
||||
text = text.decode("utf-8")
|
||||
|
||||
self.text = text
|
||||
|
||||
__formats = ['%04d'] + ['%02d'] * 5
|
||||
__seps = ['-', '-', ' ', ':', ':', 'x']
|
||||
|
||||
def get_text(self):
|
||||
parts = [self.year, self.month, self.day,
|
||||
self.hour, self.minute, self.second]
|
||||
pieces = []
|
||||
for i, part in enumerate(parts):
|
||||
if part is None:
|
||||
break
|
||||
pieces.append(self.__formats[i] % part + self.__seps[i])
|
||||
return u''.join(pieces)[:-1]
|
||||
|
||||
def set_text(self, text, splitre=re.compile('[-T:/.]|\s+')):
|
||||
year, month, day, hour, minute, second = \
|
||||
splitre.split(text + ':::::')[:6]
|
||||
for a in 'year month day hour minute second'.split():
|
||||
try:
|
||||
v = int(locals()[a])
|
||||
except ValueError:
|
||||
v = None
|
||||
setattr(self, a, v)
|
||||
|
||||
text = property(get_text, set_text, doc="ID3v2.4 date and time.")
|
||||
|
||||
def __str__(self):
|
||||
return self.text
|
||||
|
||||
def __bytes__(self):
|
||||
return self.text.encode("utf-8")
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self.text)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.text == other.text
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.text < other.text
|
||||
|
||||
__hash__ = object.__hash__
|
||||
|
||||
def encode(self, *args):
|
||||
return self.text.encode(*args)
|
||||
|
||||
|
||||
class TimeStampSpec(EncodedTextSpec):
|
||||
def read(self, frame, data):
|
||||
value, data = super(TimeStampSpec, self).read(frame, data)
|
||||
return self.validate(frame, value), data
|
||||
|
||||
def write(self, frame, data):
|
||||
return super(TimeStampSpec, self).write(frame,
|
||||
data.text.replace(' ', 'T'))
|
||||
|
||||
def validate(self, frame, value):
|
||||
try:
|
||||
return ID3TimeStamp(value)
|
||||
except TypeError:
|
||||
raise ValueError("Invalid ID3TimeStamp: %r" % value)
|
||||
|
||||
|
||||
class ChannelSpec(ByteSpec):
|
||||
(OTHER, MASTER, FRONTRIGHT, FRONTLEFT, BACKRIGHT, BACKLEFT, FRONTCENTRE,
|
||||
BACKCENTRE, SUBWOOFER) = range(9)
|
||||
|
||||
|
||||
class VolumeAdjustmentSpec(Spec):
|
||||
def read(self, frame, data):
|
||||
value, = unpack('>h', data[0:2])
|
||||
return value/512.0, data[2:]
|
||||
|
||||
def write(self, frame, value):
|
||||
number = int(round(value * 512))
|
||||
# pack only fails in 2.7, do it manually in 2.6
|
||||
if not -32768 <= number <= 32767:
|
||||
raise struct.error
|
||||
return pack('>h', number)
|
||||
|
||||
def validate(self, frame, value):
|
||||
if value is not None:
|
||||
try:
|
||||
self.write(frame, value)
|
||||
except struct.error:
|
||||
raise ValueError("out of range")
|
||||
return value
|
||||
|
||||
|
||||
class VolumePeakSpec(Spec):
|
||||
def read(self, frame, data):
|
||||
# http://bugs.xmms.org/attachment.cgi?id=113&action=view
|
||||
peak = 0
|
||||
bits = ord(data[0])
|
||||
bytes = min(4, (bits + 7) >> 3)
|
||||
# not enough frame data
|
||||
if bytes + 1 > len(data):
|
||||
raise ID3JunkFrameError
|
||||
shift = ((8 - (bits & 7)) & 7) + (4 - bytes) * 8
|
||||
for i in range(1, bytes+1):
|
||||
peak *= 256
|
||||
peak += ord(data[i])
|
||||
peak *= 2 ** shift
|
||||
return (float(peak) / (2**31-1)), data[1+bytes:]
|
||||
|
||||
def write(self, frame, value):
|
||||
number = int(round(value * 32768))
|
||||
# pack only fails in 2.7, do it manually in 2.6
|
||||
if not 0 <= number <= 65535:
|
||||
raise struct.error
|
||||
# always write as 16 bits for sanity.
|
||||
return b"\x10" + pack('>H', number)
|
||||
|
||||
def validate(self, frame, value):
|
||||
if value is not None:
|
||||
try:
|
||||
self.write(frame, value)
|
||||
except struct.error:
|
||||
raise ValueError("out of range")
|
||||
return value
|
||||
|
||||
|
||||
class SynchronizedTextSpec(EncodedTextSpec):
|
||||
def read(self, frame, data):
|
||||
texts = []
|
||||
encoding, term = self._encodings[frame.encoding]
|
||||
while data:
|
||||
try:
|
||||
value, data = decode_terminated(data, encoding)
|
||||
except ValueError:
|
||||
raise ID3JunkFrameError
|
||||
|
||||
if len(data) < 4:
|
||||
raise ID3JunkFrameError
|
||||
time, = struct.unpack(">I", data[:4])
|
||||
|
||||
texts.append((value, time))
|
||||
data = data[4:]
|
||||
return texts, ""
|
||||
|
||||
def write(self, frame, value):
|
||||
data = []
|
||||
encoding, term = self._encodings[frame.encoding]
|
||||
for text, time in value:
|
||||
text = text.encode(encoding) + term
|
||||
data.append(text + struct.pack(">I", time))
|
||||
return b"".join(data)
|
||||
|
||||
def validate(self, frame, value):
|
||||
return value
|
||||
|
||||
|
||||
class KeyEventSpec(Spec):
|
||||
def read(self, frame, data):
|
||||
events = []
|
||||
while len(data) >= 5:
|
||||
events.append(struct.unpack(">bI", data[:5]))
|
||||
data = data[5:]
|
||||
return events, data
|
||||
|
||||
def write(self, frame, value):
|
||||
return b"".join([struct.pack(">bI", *event) for event in value])
|
||||
|
||||
def validate(self, frame, value):
|
||||
return value
|
||||
|
||||
|
||||
class VolumeAdjustmentsSpec(Spec):
|
||||
# Not to be confused with VolumeAdjustmentSpec.
|
||||
def read(self, frame, data):
|
||||
adjustments = {}
|
||||
while len(data) >= 4:
|
||||
freq, adj = struct.unpack(">Hh", data[:4])
|
||||
data = data[4:]
|
||||
freq /= 2.0
|
||||
adj /= 512.0
|
||||
adjustments[freq] = adj
|
||||
adjustments = adjustments.items()
|
||||
adjustments.sort()
|
||||
return adjustments, data
|
||||
|
||||
def write(self, frame, value):
|
||||
value.sort()
|
||||
return b"".join([struct.pack(">Hh", int(freq * 2), int(adj * 512))
|
||||
for (freq, adj) in value])
|
||||
|
||||
def validate(self, frame, value):
|
||||
return value
|
||||
|
||||
|
||||
class ASPIIndexSpec(Spec):
|
||||
def read(self, frame, data):
|
||||
if frame.b == 16:
|
||||
format = "H"
|
||||
size = 2
|
||||
elif frame.b == 8:
|
||||
format = "B"
|
||||
size = 1
|
||||
else:
|
||||
warn("invalid bit count in ASPI (%d)" % frame.b, ID3Warning)
|
||||
return [], data
|
||||
|
||||
indexes = data[:frame.N * size]
|
||||
data = data[frame.N * size:]
|
||||
return list(struct.unpack(">" + format * frame.N, indexes)), data
|
||||
|
||||
def write(self, frame, values):
|
||||
if frame.b == 16:
|
||||
format = "H"
|
||||
elif frame.b == 8:
|
||||
format = "B"
|
||||
else:
|
||||
raise ValueError("frame.b must be 8 or 16")
|
||||
return struct.pack(">" + format * frame.N, *values)
|
||||
|
||||
def validate(self, frame, values):
|
||||
return values
|
||||
@@ -0,0 +1,178 @@
|
||||
# Copyright (C) 2005 Michael Urman
|
||||
# 2013 Christoph Reiter
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from ._compat import long_, integer_types
|
||||
|
||||
|
||||
class error(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ID3NoHeaderError(error, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class ID3BadUnsynchData(error, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class ID3BadCompressedData(error, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class ID3TagError(error, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class ID3UnsupportedVersionError(error, NotImplementedError):
|
||||
pass
|
||||
|
||||
|
||||
class ID3EncryptionUnsupportedError(error, NotImplementedError):
|
||||
pass
|
||||
|
||||
|
||||
class ID3JunkFrameError(error, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class ID3Warning(error, UserWarning):
|
||||
pass
|
||||
|
||||
|
||||
class unsynch(object):
|
||||
@staticmethod
|
||||
def decode(value):
|
||||
output = bytearray()
|
||||
safe = True
|
||||
append = output.append
|
||||
for val in bytearray(value):
|
||||
if safe:
|
||||
append(val)
|
||||
safe = val != 0xFF
|
||||
else:
|
||||
if val >= 0xE0:
|
||||
raise ValueError('invalid sync-safe string')
|
||||
elif val != 0x00:
|
||||
append(val)
|
||||
safe = True
|
||||
if not safe:
|
||||
raise ValueError('string ended unsafe')
|
||||
return bytes(output)
|
||||
|
||||
@staticmethod
|
||||
def encode(value):
|
||||
output = bytearray()
|
||||
safe = True
|
||||
append = output.append
|
||||
for val in bytearray(value):
|
||||
if safe:
|
||||
append(val)
|
||||
if val == 0xFF:
|
||||
safe = False
|
||||
elif val == 0x00 or val >= 0xE0:
|
||||
append(0x00)
|
||||
append(val)
|
||||
safe = val != 0xFF
|
||||
else:
|
||||
append(val)
|
||||
safe = True
|
||||
if not safe:
|
||||
append(0x00)
|
||||
return bytes(output)
|
||||
|
||||
|
||||
class _BitPaddedMixin(object):
|
||||
|
||||
def as_str(self, width=4, minwidth=4):
|
||||
return self.to_str(self, self.bits, self.bigendian, width, minwidth)
|
||||
|
||||
@staticmethod
|
||||
def to_str(value, bits=7, bigendian=True, width=4, minwidth=4):
|
||||
mask = (1 << bits) - 1
|
||||
|
||||
if width != -1:
|
||||
index = 0
|
||||
bytes_ = bytearray(width)
|
||||
try:
|
||||
while value:
|
||||
bytes_[index] = value & mask
|
||||
value >>= bits
|
||||
index += 1
|
||||
except IndexError:
|
||||
raise ValueError('Value too wide (>%d bytes)' % width)
|
||||
else:
|
||||
# PCNT and POPM use growing integers
|
||||
# of at least 4 bytes (=minwidth) as counters.
|
||||
bytes_ = bytearray()
|
||||
append = bytes_.append
|
||||
while value:
|
||||
append(value & mask)
|
||||
value >>= bits
|
||||
bytes_ = bytes_.ljust(minwidth, b"\x00")
|
||||
|
||||
if bigendian:
|
||||
bytes_.reverse()
|
||||
return bytes(bytes_)
|
||||
|
||||
@staticmethod
|
||||
def has_valid_padding(value, bits=7):
|
||||
"""Whether the padding bits are all zero"""
|
||||
|
||||
assert bits <= 8
|
||||
|
||||
mask = (((1 << (8 - bits)) - 1) << bits)
|
||||
|
||||
if isinstance(value, integer_types):
|
||||
while value:
|
||||
if value & mask:
|
||||
return False
|
||||
value >>= 8
|
||||
elif isinstance(value, bytes):
|
||||
for byte in bytearray(value):
|
||||
if byte & mask:
|
||||
return False
|
||||
else:
|
||||
raise TypeError
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class BitPaddedInt(int, _BitPaddedMixin):
|
||||
|
||||
def __new__(cls, value, bits=7, bigendian=True):
|
||||
|
||||
mask = (1 << (bits)) - 1
|
||||
numeric_value = 0
|
||||
shift = 0
|
||||
|
||||
if isinstance(value, integer_types):
|
||||
while value:
|
||||
numeric_value += (value & mask) << shift
|
||||
value >>= 8
|
||||
shift += bits
|
||||
elif isinstance(value, bytes):
|
||||
if bigendian:
|
||||
value = reversed(value)
|
||||
for byte in bytearray(value):
|
||||
numeric_value += (byte & mask) << shift
|
||||
shift += bits
|
||||
else:
|
||||
raise TypeError
|
||||
|
||||
if isinstance(numeric_value, int):
|
||||
self = int.__new__(BitPaddedInt, numeric_value)
|
||||
else:
|
||||
self = long_.__new__(BitPaddedLong, numeric_value)
|
||||
|
||||
self.bits = bits
|
||||
self.bigendian = bigendian
|
||||
return self
|
||||
|
||||
|
||||
class BitPaddedLong(long_, _BitPaddedMixin):
|
||||
pass
|
||||
@@ -0,0 +1,422 @@
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Utility classes for Mutagen.
|
||||
|
||||
You should not rely on the interfaces here being stable. They are
|
||||
intended for internal use in Mutagen only.
|
||||
"""
|
||||
|
||||
import struct
|
||||
import codecs
|
||||
|
||||
from fnmatch import fnmatchcase
|
||||
|
||||
from ._compat import chr_, text_type, PY2, iteritems, iterbytes
|
||||
|
||||
|
||||
def total_ordering(cls):
|
||||
assert hasattr(cls, "__eq__")
|
||||
assert hasattr(cls, "__lt__")
|
||||
|
||||
cls.__le__ = lambda self, other: self == other or self < other
|
||||
cls.__gt__ = lambda self, other: not (self == other or self < other)
|
||||
cls.__ge__ = lambda self, other: not self < other
|
||||
cls.__ne__ = lambda self, other: not self.__eq__(other)
|
||||
|
||||
return cls
|
||||
|
||||
|
||||
@total_ordering
|
||||
class DictMixin(object):
|
||||
"""Implement the dict API using keys() and __*item__ methods.
|
||||
|
||||
Similar to UserDict.DictMixin, this takes a class that defines
|
||||
__getitem__, __setitem__, __delitem__, and keys(), and turns it
|
||||
into a full dict-like object.
|
||||
|
||||
UserDict.DictMixin is not suitable for this purpose because it's
|
||||
an old-style class.
|
||||
|
||||
This class is not optimized for very large dictionaries; many
|
||||
functions have linear memory requirements. I recommend you
|
||||
override some of these functions if speed is required.
|
||||
"""
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.keys())
|
||||
|
||||
def __has_key(self, key):
|
||||
try:
|
||||
self[key]
|
||||
except KeyError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
if PY2:
|
||||
has_key = __has_key
|
||||
|
||||
__contains__ = __has_key
|
||||
|
||||
iterkeys = lambda self: iter(self.keys())
|
||||
|
||||
def values(self):
|
||||
return [self[k] for k in self.keys()]
|
||||
|
||||
itervalues = lambda self: iter(self.values())
|
||||
|
||||
def items(self):
|
||||
return list(zip(self.keys(), self.values()))
|
||||
|
||||
iteritems = lambda s: iter(s.items())
|
||||
|
||||
def clear(self):
|
||||
for key in list(self.keys()):
|
||||
self.__delitem__(key)
|
||||
|
||||
def pop(self, key, *args):
|
||||
if len(args) > 1:
|
||||
raise TypeError("pop takes at most two arguments")
|
||||
try:
|
||||
value = self[key]
|
||||
except KeyError:
|
||||
if args:
|
||||
return args[0]
|
||||
else:
|
||||
raise
|
||||
del(self[key])
|
||||
return value
|
||||
|
||||
def popitem(self):
|
||||
for key in self.keys():
|
||||
break
|
||||
else:
|
||||
raise KeyError("dictionary is empty")
|
||||
return key, self.pop(key)
|
||||
|
||||
def update(self, other=None, **kwargs):
|
||||
if other is None:
|
||||
self.update(kwargs)
|
||||
other = {}
|
||||
|
||||
try:
|
||||
for key, value in other.items():
|
||||
self.__setitem__(key, value)
|
||||
except AttributeError:
|
||||
for key, value in other:
|
||||
self[key] = value
|
||||
|
||||
def setdefault(self, key, default=None):
|
||||
try:
|
||||
return self[key]
|
||||
except KeyError:
|
||||
self[key] = default
|
||||
return default
|
||||
|
||||
def get(self, key, default=None):
|
||||
try:
|
||||
return self[key]
|
||||
except KeyError:
|
||||
return default
|
||||
|
||||
def __repr__(self):
|
||||
return repr(dict(self.items()))
|
||||
|
||||
def __eq__(self, other):
|
||||
return dict(self.items()) == other
|
||||
|
||||
def __lt__(self, other):
|
||||
return dict(self.items()) < other
|
||||
|
||||
__hash__ = object.__hash__
|
||||
|
||||
def __len__(self):
|
||||
return len(self.keys())
|
||||
|
||||
|
||||
class DictProxy(DictMixin):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.__dict = {}
|
||||
super(DictProxy, self).__init__(*args, **kwargs)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.__dict[key]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self.__dict[key] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
del(self.__dict[key])
|
||||
|
||||
def keys(self):
|
||||
return self.__dict.keys()
|
||||
|
||||
|
||||
class cdata(object):
|
||||
"""C character buffer to Python numeric type conversions."""
|
||||
|
||||
from struct import error
|
||||
error = error
|
||||
|
||||
short_le = staticmethod(lambda data: struct.unpack('<h', data)[0])
|
||||
ushort_le = staticmethod(lambda data: struct.unpack('<H', data)[0])
|
||||
|
||||
short_be = staticmethod(lambda data: struct.unpack('>h', data)[0])
|
||||
ushort_be = staticmethod(lambda data: struct.unpack('>H', data)[0])
|
||||
|
||||
int_le = staticmethod(lambda data: struct.unpack('<i', data)[0])
|
||||
uint_le = staticmethod(lambda data: struct.unpack('<I', data)[0])
|
||||
|
||||
int_be = staticmethod(lambda data: struct.unpack('>i', data)[0])
|
||||
uint_be = staticmethod(lambda data: struct.unpack('>I', data)[0])
|
||||
|
||||
longlong_le = staticmethod(lambda data: struct.unpack('<q', data)[0])
|
||||
ulonglong_le = staticmethod(lambda data: struct.unpack('<Q', data)[0])
|
||||
|
||||
longlong_be = staticmethod(lambda data: struct.unpack('>q', data)[0])
|
||||
ulonglong_be = staticmethod(lambda data: struct.unpack('>Q', data)[0])
|
||||
|
||||
to_short_le = staticmethod(lambda data: struct.pack('<h', data))
|
||||
to_ushort_le = staticmethod(lambda data: struct.pack('<H', data))
|
||||
|
||||
to_short_be = staticmethod(lambda data: struct.pack('>h', data))
|
||||
to_ushort_be = staticmethod(lambda data: struct.pack('>H', data))
|
||||
|
||||
to_int_le = staticmethod(lambda data: struct.pack('<i', data))
|
||||
to_uint_le = staticmethod(lambda data: struct.pack('<I', data))
|
||||
|
||||
to_int_be = staticmethod(lambda data: struct.pack('>i', data))
|
||||
to_uint_be = staticmethod(lambda data: struct.pack('>I', data))
|
||||
|
||||
to_longlong_le = staticmethod(lambda data: struct.pack('<q', data))
|
||||
to_ulonglong_le = staticmethod(lambda data: struct.pack('<Q', data))
|
||||
|
||||
to_longlong_be = staticmethod(lambda data: struct.pack('>q', data))
|
||||
to_ulonglong_be = staticmethod(lambda data: struct.pack('>Q', data))
|
||||
|
||||
bitswap = b''.join([chr_(sum([((val >> i) & 1) << (7-i)
|
||||
for i in range(8)]))
|
||||
for val in range(256)])
|
||||
|
||||
try:
|
||||
del(i)
|
||||
del(val)
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
test_bit = staticmethod(lambda value, n: bool((value >> n) & 1))
|
||||
|
||||
|
||||
def lock(fileobj):
|
||||
"""Lock a file object 'safely'.
|
||||
|
||||
That means a failure to lock because the platform doesn't
|
||||
support fcntl or filesystem locks is not considered a
|
||||
failure. This call does block.
|
||||
|
||||
Returns whether or not the lock was successful, or
|
||||
raises an exception in more extreme circumstances (full
|
||||
lock table, invalid file).
|
||||
"""
|
||||
|
||||
try:
|
||||
import fcntl
|
||||
except ImportError:
|
||||
return False
|
||||
else:
|
||||
try:
|
||||
fcntl.lockf(fileobj, fcntl.LOCK_EX)
|
||||
except IOError:
|
||||
# FIXME: There's possibly a lot of complicated
|
||||
# logic that needs to go here in case the IOError
|
||||
# is EACCES or EAGAIN.
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def unlock(fileobj):
|
||||
"""Unlock a file object.
|
||||
|
||||
Don't call this on a file object unless a call to lock()
|
||||
returned true.
|
||||
"""
|
||||
|
||||
# If this fails there's a mismatched lock/unlock pair,
|
||||
# so we definitely don't want to ignore errors.
|
||||
import fcntl
|
||||
fcntl.lockf(fileobj, fcntl.LOCK_UN)
|
||||
|
||||
|
||||
def insert_bytes(fobj, size, offset, BUFFER_SIZE=2**16):
|
||||
"""Insert size bytes of empty space starting at offset.
|
||||
|
||||
fobj must be an open file object, open rb+ or
|
||||
equivalent. Mutagen tries to use mmap to resize the file, but
|
||||
falls back to a significantly slower method if mmap fails.
|
||||
"""
|
||||
|
||||
assert 0 < size
|
||||
assert 0 <= offset
|
||||
locked = False
|
||||
fobj.seek(0, 2)
|
||||
filesize = fobj.tell()
|
||||
movesize = filesize - offset
|
||||
fobj.write(b'\x00' * size)
|
||||
fobj.flush()
|
||||
try:
|
||||
try:
|
||||
import mmap
|
||||
map = mmap.mmap(fobj.fileno(), filesize + size)
|
||||
try:
|
||||
map.move(offset + size, offset, movesize)
|
||||
finally:
|
||||
map.close()
|
||||
except (ValueError, EnvironmentError, ImportError):
|
||||
# handle broken mmap scenarios
|
||||
locked = lock(fobj)
|
||||
fobj.truncate(filesize)
|
||||
|
||||
fobj.seek(0, 2)
|
||||
padsize = size
|
||||
# Don't generate an enormous string if we need to pad
|
||||
# the file out several megs.
|
||||
while padsize:
|
||||
addsize = min(BUFFER_SIZE, padsize)
|
||||
fobj.write(b"\x00" * addsize)
|
||||
padsize -= addsize
|
||||
|
||||
fobj.seek(filesize, 0)
|
||||
while movesize:
|
||||
# At the start of this loop, fobj is pointing at the end
|
||||
# of the data we need to move, which is of movesize length.
|
||||
thismove = min(BUFFER_SIZE, movesize)
|
||||
# Seek back however much we're going to read this frame.
|
||||
fobj.seek(-thismove, 1)
|
||||
nextpos = fobj.tell()
|
||||
# Read it, so we're back at the end.
|
||||
data = fobj.read(thismove)
|
||||
# Seek back to where we need to write it.
|
||||
fobj.seek(-thismove + size, 1)
|
||||
# Write it.
|
||||
fobj.write(data)
|
||||
# And seek back to the end of the unmoved data.
|
||||
fobj.seek(nextpos)
|
||||
movesize -= thismove
|
||||
|
||||
fobj.flush()
|
||||
finally:
|
||||
if locked:
|
||||
unlock(fobj)
|
||||
|
||||
|
||||
def delete_bytes(fobj, size, offset, BUFFER_SIZE=2**16):
|
||||
"""Delete size bytes of empty space starting at offset.
|
||||
|
||||
fobj must be an open file object, open rb+ or
|
||||
equivalent. Mutagen tries to use mmap to resize the file, but
|
||||
falls back to a significantly slower method if mmap fails.
|
||||
"""
|
||||
|
||||
locked = False
|
||||
assert 0 < size
|
||||
assert 0 <= offset
|
||||
fobj.seek(0, 2)
|
||||
filesize = fobj.tell()
|
||||
movesize = filesize - offset - size
|
||||
assert 0 <= movesize
|
||||
try:
|
||||
if movesize > 0:
|
||||
fobj.flush()
|
||||
try:
|
||||
import mmap
|
||||
map = mmap.mmap(fobj.fileno(), filesize)
|
||||
try:
|
||||
map.move(offset, offset + size, movesize)
|
||||
finally:
|
||||
map.close()
|
||||
except (ValueError, EnvironmentError, ImportError):
|
||||
# handle broken mmap scenarios
|
||||
locked = lock(fobj)
|
||||
fobj.seek(offset + size)
|
||||
buf = fobj.read(BUFFER_SIZE)
|
||||
while buf:
|
||||
fobj.seek(offset)
|
||||
fobj.write(buf)
|
||||
offset += len(buf)
|
||||
fobj.seek(offset + size)
|
||||
buf = fobj.read(BUFFER_SIZE)
|
||||
fobj.truncate(filesize - size)
|
||||
fobj.flush()
|
||||
finally:
|
||||
if locked:
|
||||
unlock(fobj)
|
||||
|
||||
|
||||
def utf8(data):
|
||||
"""Convert a basestring to a valid UTF-8 str."""
|
||||
|
||||
if isinstance(data, bytes):
|
||||
return data.decode("utf-8", "replace").encode("utf-8")
|
||||
elif isinstance(data, text_type):
|
||||
return data.encode("utf-8")
|
||||
else:
|
||||
raise TypeError("only unicode/str types can be converted to UTF-8")
|
||||
|
||||
|
||||
def dict_match(d, key, default=None):
|
||||
try:
|
||||
return d[key]
|
||||
except KeyError:
|
||||
for pattern, value in iteritems(d):
|
||||
if fnmatchcase(key, pattern):
|
||||
return value
|
||||
return default
|
||||
|
||||
|
||||
def decode_terminated(data, encoding, strict=True):
|
||||
"""Returns the decoded data until the first NULL terminator
|
||||
and all data after it.
|
||||
|
||||
In case the data can't be decoded raises UnicodeError.
|
||||
In case the encoding is not found raises LookupError.
|
||||
In case the data isn't null terminated (even if it is encoded correctly)
|
||||
raises ValueError except if strict is False, then the decoded string
|
||||
will be returned anyway.
|
||||
"""
|
||||
|
||||
codec_info = codecs.lookup(encoding)
|
||||
|
||||
# normalize encoding name so we can compare by name
|
||||
encoding = codec_info.name
|
||||
|
||||
# fast path
|
||||
if encoding in ("utf-8", "iso8859-1"):
|
||||
index = data.find(b"\x00")
|
||||
if index == -1:
|
||||
# make sure we raise UnicodeError first, like in the slow path
|
||||
res = data.decode(encoding), b""
|
||||
if strict:
|
||||
raise ValueError("not null terminated")
|
||||
else:
|
||||
return res
|
||||
return data[:index].decode(encoding), data[index + 1:]
|
||||
|
||||
# slow path
|
||||
decoder = codec_info.incrementaldecoder()
|
||||
r = []
|
||||
for i, b in enumerate(iterbytes(data)):
|
||||
c = decoder.decode(b)
|
||||
if c == u"\x00":
|
||||
return u"".join(r), data[i + 1:]
|
||||
r.append(c)
|
||||
else:
|
||||
# make sure the decoder is finished
|
||||
r.append(decoder.decode(b"", True))
|
||||
if strict:
|
||||
raise ValueError("not null terminated")
|
||||
return u"".join(r), b""
|
||||
@@ -0,0 +1,317 @@
|
||||
# Vorbis comment support for Mutagen
|
||||
# Copyright 2005-2006 Joe Wreschnig
|
||||
# 2013 Christoph Reiter
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Read and write Vorbis comment data.
|
||||
|
||||
Vorbis comments are freeform key/value pairs; keys are
|
||||
case-insensitive ASCII and values are Unicode strings. A key may have
|
||||
multiple values.
|
||||
|
||||
The specification is at http://www.xiph.org/vorbis/doc/v-comment.html.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
import mutagen
|
||||
from ._compat import reraise, BytesIO, text_type, xrange, PY3, PY2
|
||||
from mutagen._util import DictMixin, cdata
|
||||
|
||||
|
||||
def is_valid_key(key):
|
||||
"""Return true if a string is a valid Vorbis comment key.
|
||||
|
||||
Valid Vorbis comment keys are printable ASCII between 0x20 (space)
|
||||
and 0x7D ('}'), excluding '='.
|
||||
|
||||
Takes str/unicode in Python 2, unicode in Python 3
|
||||
"""
|
||||
|
||||
if PY3 and isinstance(key, bytes):
|
||||
raise ValueError
|
||||
|
||||
for c in key:
|
||||
if c < " " or c > "}" or c == "=":
|
||||
return False
|
||||
else:
|
||||
return bool(key)
|
||||
|
||||
|
||||
istag = is_valid_key
|
||||
|
||||
|
||||
class error(IOError):
|
||||
pass
|
||||
|
||||
|
||||
class VorbisUnsetFrameError(error):
|
||||
pass
|
||||
|
||||
|
||||
class VorbisEncodingError(error):
|
||||
pass
|
||||
|
||||
|
||||
class VComment(mutagen.Metadata, list):
|
||||
"""A Vorbis comment parser, accessor, and renderer.
|
||||
|
||||
All comment ordering is preserved. A VComment is a list of
|
||||
key/value pairs, and so any Python list method can be used on it.
|
||||
|
||||
Vorbis comments are always wrapped in something like an Ogg Vorbis
|
||||
bitstream or a FLAC metadata block, so this loads string data or a
|
||||
file-like object, not a filename.
|
||||
|
||||
Attributes:
|
||||
|
||||
* vendor -- the stream 'vendor' (i.e. writer); default 'Mutagen'
|
||||
"""
|
||||
|
||||
vendor = u"Mutagen " + mutagen.version_string
|
||||
|
||||
def __init__(self, data=None, *args, **kwargs):
|
||||
# Collect the args to pass to load, this lets child classes
|
||||
# override just load and get equivalent magic for the
|
||||
# constructor.
|
||||
if data is not None:
|
||||
if isinstance(data, bytes):
|
||||
data = BytesIO(data)
|
||||
elif not hasattr(data, 'read'):
|
||||
raise TypeError("VComment requires bytes or a file-like")
|
||||
self.load(data, *args, **kwargs)
|
||||
|
||||
def load(self, fileobj, errors='replace', framing=True):
|
||||
"""Parse a Vorbis comment from a file-like object.
|
||||
|
||||
Keyword arguments:
|
||||
|
||||
* errors:
|
||||
'strict', 'replace', or 'ignore'. This affects Unicode decoding
|
||||
and how other malformed content is interpreted.
|
||||
* framing -- if true, fail if a framing bit is not present
|
||||
|
||||
Framing bits are required by the Vorbis comment specification,
|
||||
but are not used in FLAC Vorbis comment blocks.
|
||||
"""
|
||||
|
||||
try:
|
||||
vendor_length = cdata.uint_le(fileobj.read(4))
|
||||
self.vendor = fileobj.read(vendor_length).decode('utf-8', errors)
|
||||
count = cdata.uint_le(fileobj.read(4))
|
||||
for i in xrange(count):
|
||||
length = cdata.uint_le(fileobj.read(4))
|
||||
try:
|
||||
string = fileobj.read(length).decode('utf-8', errors)
|
||||
except (OverflowError, MemoryError):
|
||||
raise error("cannot read %d bytes, too large" % length)
|
||||
try:
|
||||
tag, value = string.split('=', 1)
|
||||
except ValueError as err:
|
||||
if errors == "ignore":
|
||||
continue
|
||||
elif errors == "replace":
|
||||
tag, value = u"unknown%d" % i, string
|
||||
else:
|
||||
reraise(VorbisEncodingError, err, sys.exc_info()[2])
|
||||
try:
|
||||
tag = tag.encode('ascii', errors)
|
||||
except UnicodeEncodeError:
|
||||
raise VorbisEncodingError("invalid tag name %r" % tag)
|
||||
else:
|
||||
# string keys in py3k
|
||||
if PY3:
|
||||
tag = tag.decode("ascii")
|
||||
if is_valid_key(tag):
|
||||
self.append((tag, value))
|
||||
if framing and not ord(fileobj.read(1)) & 0x01:
|
||||
raise VorbisUnsetFrameError("framing bit was unset")
|
||||
except (cdata.error, TypeError):
|
||||
raise error("file is not a valid Vorbis comment")
|
||||
|
||||
def validate(self):
|
||||
"""Validate keys and values.
|
||||
|
||||
Check to make sure every key used is a valid Vorbis key, and
|
||||
that every value used is a valid Unicode or UTF-8 string. If
|
||||
any invalid keys or values are found, a ValueError is raised.
|
||||
|
||||
In Python 3 all keys and values have to be a string.
|
||||
"""
|
||||
|
||||
# be stricter in Python 3
|
||||
if PY3:
|
||||
if not isinstance(self.vendor, text_type):
|
||||
raise ValueError
|
||||
for key, value in self:
|
||||
if not isinstance(key, text_type):
|
||||
raise ValueError
|
||||
if not isinstance(value, text_type):
|
||||
raise ValueError
|
||||
|
||||
if not isinstance(self.vendor, text_type):
|
||||
try:
|
||||
self.vendor.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
raise ValueError
|
||||
|
||||
for key, value in self:
|
||||
try:
|
||||
if not is_valid_key(key):
|
||||
raise ValueError
|
||||
except:
|
||||
raise ValueError("%r is not a valid key" % key)
|
||||
|
||||
if not isinstance(value, text_type):
|
||||
try:
|
||||
value.encode("utf-8")
|
||||
except:
|
||||
raise ValueError("%r is not a valid value" % value)
|
||||
else:
|
||||
return True
|
||||
|
||||
def clear(self):
|
||||
"""Clear all keys from the comment."""
|
||||
|
||||
for i in list(self):
|
||||
self.remove(i)
|
||||
|
||||
def write(self, framing=True):
|
||||
"""Return a string representation of the data.
|
||||
|
||||
Validation is always performed, so calling this function on
|
||||
invalid data may raise a ValueError.
|
||||
|
||||
Keyword arguments:
|
||||
|
||||
* framing -- if true, append a framing bit (see load)
|
||||
"""
|
||||
|
||||
self.validate()
|
||||
|
||||
def _encode(value):
|
||||
if not isinstance(value, bytes):
|
||||
return value.encode('utf-8')
|
||||
return value
|
||||
|
||||
f = BytesIO()
|
||||
vendor = _encode(self.vendor)
|
||||
f.write(cdata.to_uint_le(len(vendor)))
|
||||
f.write(vendor)
|
||||
f.write(cdata.to_uint_le(len(self)))
|
||||
for tag, value in self:
|
||||
tag = _encode(tag)
|
||||
value = _encode(value)
|
||||
comment = tag + b"=" + value
|
||||
f.write(cdata.to_uint_le(len(comment)))
|
||||
f.write(comment)
|
||||
if framing:
|
||||
f.write(b"\x01")
|
||||
return f.getvalue()
|
||||
|
||||
def pprint(self):
|
||||
|
||||
def _decode(value):
|
||||
if not isinstance(value, text_type):
|
||||
return value.decode('utf-8', 'replace')
|
||||
return value
|
||||
|
||||
tags = [u"%s=%s" % (_decode(k), _decode(v)) for k, v in self]
|
||||
return u"\n".join(tags)
|
||||
|
||||
|
||||
class VCommentDict(VComment, DictMixin):
|
||||
"""A VComment that looks like a dictionary.
|
||||
|
||||
This object differs from a dictionary in two ways. First,
|
||||
len(comment) will still return the number of values, not the
|
||||
number of keys. Secondly, iterating through the object will
|
||||
iterate over (key, value) pairs, not keys. Since a key may have
|
||||
multiple values, the same value may appear multiple times while
|
||||
iterating.
|
||||
|
||||
Since Vorbis comment keys are case-insensitive, all keys are
|
||||
normalized to lowercase ASCII.
|
||||
"""
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""A list of values for the key.
|
||||
|
||||
This is a copy, so comment['title'].append('a title') will not
|
||||
work.
|
||||
"""
|
||||
|
||||
if not is_valid_key(key):
|
||||
raise ValueError
|
||||
|
||||
key = key.lower()
|
||||
|
||||
values = [value for (k, value) in self if k.lower() == key]
|
||||
if not values:
|
||||
raise KeyError(key)
|
||||
else:
|
||||
return values
|
||||
|
||||
def __delitem__(self, key):
|
||||
"""Delete all values associated with the key."""
|
||||
|
||||
if not is_valid_key(key):
|
||||
raise ValueError
|
||||
|
||||
key = key.lower()
|
||||
to_delete = [x for x in self if x[0].lower() == key]
|
||||
if not to_delete:
|
||||
raise KeyError(key)
|
||||
else:
|
||||
for item in to_delete:
|
||||
self.remove(item)
|
||||
|
||||
def __contains__(self, key):
|
||||
"""Return true if the key has any values."""
|
||||
|
||||
if not is_valid_key(key):
|
||||
raise ValueError
|
||||
|
||||
key = key.lower()
|
||||
for k, value in self:
|
||||
if k.lower() == key:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def __setitem__(self, key, values):
|
||||
"""Set a key's value or values.
|
||||
|
||||
Setting a value overwrites all old ones. The value may be a
|
||||
list of Unicode or UTF-8 strings, or a single Unicode or UTF-8
|
||||
string.
|
||||
"""
|
||||
|
||||
if not is_valid_key(key):
|
||||
raise ValueError
|
||||
|
||||
if not isinstance(values, list):
|
||||
values = [values]
|
||||
try:
|
||||
del(self[key])
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
if PY2:
|
||||
key = key.encode('ascii')
|
||||
|
||||
for value in values:
|
||||
self.append((key, value))
|
||||
|
||||
def keys(self):
|
||||
"""Return all keys in the comment."""
|
||||
|
||||
return list(set([k.lower() for k, v in self]))
|
||||
|
||||
def as_dict(self):
|
||||
"""Return a copy of the comment data in a real dict."""
|
||||
|
||||
return dict([(key, self[key]) for key in self.keys()])
|
||||
@@ -0,0 +1,311 @@
|
||||
# AIFF audio stream header information and ID3 tag support for Mutagen.
|
||||
# Copyright 2014 Evan Purkhiser <evanpurkhiser@gmail.com>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""AIFF audio stream information and tags."""
|
||||
|
||||
import struct
|
||||
from struct import pack
|
||||
|
||||
from ._compat import endswith
|
||||
from mutagen import StreamInfo, FileType
|
||||
|
||||
from mutagen.id3 import ID3
|
||||
from mutagen._id3util import error as ID3Error
|
||||
from mutagen._util import insert_bytes, delete_bytes
|
||||
|
||||
__all__ = ["AIFF", "Open", "delete"]
|
||||
|
||||
|
||||
class error(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidChunk(error, IOError):
|
||||
pass
|
||||
|
||||
|
||||
# based on stdlib's aifc
|
||||
_HUGE_VAL = 1.79769313486231e+308
|
||||
|
||||
|
||||
def read_float(s): # 10 bytes
|
||||
expon, himant, lomant = struct.unpack('>hLL', s)
|
||||
sign = 1
|
||||
if expon < 0:
|
||||
sign = -1
|
||||
expon = expon + 0x8000
|
||||
if expon == himant == lomant == 0:
|
||||
f = 0.0
|
||||
elif expon == 0x7FFF:
|
||||
f = _HUGE_VAL
|
||||
else:
|
||||
expon = expon - 16383
|
||||
f = (himant * 0x100000000 + lomant) * pow(2.0, expon - 63)
|
||||
return sign * f
|
||||
|
||||
|
||||
class IFFChunk(object):
|
||||
"""Representation of a single IFF chunk"""
|
||||
|
||||
# Chunk headers are 8 bytes long (4 for ID and 4 for the size)
|
||||
HEADER_SIZE = 8
|
||||
|
||||
def __init__(self, fileobj, parent_chunk=None):
|
||||
self.__fileobj = fileobj
|
||||
self.parent_chunk = parent_chunk
|
||||
self.offset = fileobj.tell()
|
||||
|
||||
header = fileobj.read(self.HEADER_SIZE)
|
||||
if len(header) < self.HEADER_SIZE:
|
||||
raise InvalidChunk()
|
||||
|
||||
self.id, self.data_size = struct.unpack('>4si', header)
|
||||
if self.id == b'\x00' * 4:
|
||||
raise InvalidChunk()
|
||||
|
||||
self.size = self.HEADER_SIZE + self.data_size
|
||||
self.data_offset = fileobj.tell()
|
||||
self.data = None
|
||||
|
||||
def read(self):
|
||||
"""Read the chunks data"""
|
||||
self.__fileobj.seek(self.data_offset)
|
||||
self.data = self.__fileobj.read(self.data_size)
|
||||
|
||||
def delete(self):
|
||||
"""Removes the chunk from the file"""
|
||||
delete_bytes(self.__fileobj, self.size, self.offset)
|
||||
if self.parent_chunk is not None:
|
||||
self.parent_chunk.resize(self.parent_chunk.data_size - self.size)
|
||||
|
||||
def resize(self, data_size):
|
||||
"""Update the size of the chunk"""
|
||||
self.__fileobj.seek(self.offset + 4)
|
||||
self.__fileobj.write(pack('>I', data_size))
|
||||
if self.parent_chunk is not None:
|
||||
size_diff = self.data_size - data_size
|
||||
self.parent_chunk.resize(self.parent_chunk.data_size - size_diff)
|
||||
self.data_size = data_size
|
||||
self.size = data_size + self.HEADER_SIZE
|
||||
|
||||
|
||||
class IFFFile(object):
|
||||
"""Representation of a IFF file"""
|
||||
|
||||
def __init__(self, fileobj):
|
||||
self.__fileobj = fileobj
|
||||
self.__chunks = {}
|
||||
|
||||
# AIFF Files always start with the FORM chunk which contains a 4 byte
|
||||
# ID before the start of other chunks
|
||||
fileobj.seek(0)
|
||||
self.__chunks['FORM'] = IFFChunk(fileobj)
|
||||
|
||||
# Skip past the 4 byte FORM id
|
||||
fileobj.seek(IFFChunk.HEADER_SIZE + 4)
|
||||
|
||||
# Where the next chunk can be located. We need to keep track of this
|
||||
# since the size indicated in the FORM header may not match up with the
|
||||
# offset determined from the size of the last chunk in the file
|
||||
self.__next_offset = fileobj.tell()
|
||||
|
||||
# Load all of the chunks
|
||||
while True:
|
||||
try:
|
||||
chunk = IFFChunk(fileobj, self['FORM'])
|
||||
except InvalidChunk:
|
||||
break
|
||||
self.__chunks[chunk.id.strip()] = chunk
|
||||
|
||||
# Calculate the location of the next chunk,
|
||||
# considering the pad byte
|
||||
self.__next_offset = chunk.offset + chunk.size
|
||||
self.__next_offset += self.__next_offset % 2
|
||||
fileobj.seek(self.__next_offset)
|
||||
|
||||
def __contains__(self, id_):
|
||||
"""Check if the IFF file contains a specific chunk"""
|
||||
return id_ in self.__chunks
|
||||
|
||||
def __getitem__(self, id_):
|
||||
"""Get a chunk from the IFF file"""
|
||||
|
||||
try:
|
||||
return self.__chunks[id_]
|
||||
except KeyError:
|
||||
raise KeyError(
|
||||
"%r has no %r chunk" % (self.__fileobj.name, id_))
|
||||
|
||||
def __delitem__(self, id_):
|
||||
"""Remove a chunk from the IFF file"""
|
||||
self.__chunks.pop(id_).delete()
|
||||
|
||||
def insert_chunk(self, id_):
|
||||
"""Insert a new chunk at the end of the IFF file"""
|
||||
self.__fileobj.seek(self.__next_offset)
|
||||
self.__fileobj.write(pack('>4si', id_.ljust(4), 0))
|
||||
self.__fileobj.seek(self.__next_offset)
|
||||
chunk = IFFChunk(self.__fileobj, self['FORM'])
|
||||
self['FORM'].resize(self['FORM'].data_size + chunk.size)
|
||||
|
||||
self.__chunks[id_] = chunk
|
||||
self.__next_offset = chunk.offset + chunk.size
|
||||
|
||||
|
||||
class AIFFInfo(StreamInfo):
|
||||
"""AIFF audio stream information.
|
||||
|
||||
Information is parsed from the COMM chunk of the AIFF file
|
||||
|
||||
Useful attributes:
|
||||
|
||||
* length -- audio length, in seconds
|
||||
* bitrate -- audio bitrate, in bits per second
|
||||
* channels -- The number of audio channels
|
||||
* sample_rate -- audio sample rate, in Hz
|
||||
* sample_size -- The audio sample size
|
||||
"""
|
||||
|
||||
length = 0
|
||||
bitrate = 0
|
||||
channels = 0
|
||||
sample_rate = 0
|
||||
|
||||
def __init__(self, fileobj):
|
||||
iff = IFFFile(fileobj)
|
||||
try:
|
||||
common_chunk = iff['COMM']
|
||||
except KeyError as e:
|
||||
raise error(str(e))
|
||||
|
||||
common_chunk.read()
|
||||
|
||||
info = struct.unpack('>hLh10s', common_chunk.data[:18])
|
||||
channels, frame_count, sample_size, sample_rate = info
|
||||
|
||||
self.sample_rate = int(read_float(sample_rate))
|
||||
self.sample_size = sample_size
|
||||
self.channels = channels
|
||||
self.bitrate = channels * sample_size * self.sample_rate
|
||||
self.length = frame_count / float(self.sample_rate)
|
||||
|
||||
def pprint(self):
|
||||
return "%d channel AIFF @ %d bps, %s Hz, %.2f seconds" % (
|
||||
self.channels, self.bitrate, self.sample_rate, self.length)
|
||||
|
||||
|
||||
class _IFFID3(ID3):
|
||||
"""A AIFF file with ID3v2 tags"""
|
||||
|
||||
def _load_header(self):
|
||||
try:
|
||||
self._fileobj.seek(IFFFile(self._fileobj)['ID3'].data_offset)
|
||||
except (InvalidChunk, KeyError):
|
||||
raise ID3Error()
|
||||
super(_IFFID3, self)._load_header()
|
||||
|
||||
def save(self, filename=None, v2_version=4, v23_sep='/'):
|
||||
"""Save ID3v2 data to the AIFF file"""
|
||||
|
||||
framedata = self._prepare_framedata(v2_version, v23_sep)
|
||||
framesize = len(framedata)
|
||||
|
||||
if filename is None:
|
||||
filename = self.filename
|
||||
|
||||
# Unlike the parent ID3.save method, we won't save to a blank file
|
||||
# since we would have to construct a empty AIFF file
|
||||
fileobj = open(filename, 'rb+')
|
||||
iff_file = IFFFile(fileobj)
|
||||
|
||||
try:
|
||||
if 'ID3' not in iff_file:
|
||||
iff_file.insert_chunk('ID3')
|
||||
|
||||
chunk = iff_file['ID3']
|
||||
fileobj.seek(chunk.data_offset)
|
||||
|
||||
header = fileobj.read(10)
|
||||
header = self._prepare_id3_header(header, framesize, v2_version)
|
||||
header, new_size, _ = header
|
||||
|
||||
data = header + framedata + (b'\x00' * (new_size - framesize))
|
||||
|
||||
# Include ID3 header size in 'new_size' calculation
|
||||
new_size += 10
|
||||
|
||||
# Expand the chunk if necessary, including pad byte
|
||||
if new_size > chunk.size:
|
||||
insert_at = chunk.offset + chunk.size
|
||||
insert_size = new_size - chunk.size + new_size % 2
|
||||
insert_bytes(fileobj, insert_size, insert_at)
|
||||
chunk.resize(new_size)
|
||||
|
||||
fileobj.seek(chunk.data_offset)
|
||||
fileobj.write(data)
|
||||
finally:
|
||||
fileobj.close()
|
||||
|
||||
def delete(self, filename=None):
|
||||
"""Completely removes the ID3 chunk from the AIFF file"""
|
||||
|
||||
if filename is None:
|
||||
filename = self.filename
|
||||
delete(filename)
|
||||
self.clear()
|
||||
|
||||
|
||||
def delete(filename):
|
||||
"""Completely removes the ID3 chunk from the AIFF file"""
|
||||
|
||||
with open(filename, "rb+") as file_:
|
||||
try:
|
||||
del IFFFile(file_)['ID3']
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
class AIFF(FileType):
|
||||
"""An AIFF audio file.
|
||||
|
||||
:ivar info: :class:`AIFFInfo`
|
||||
:ivar tags: :class:`ID3`
|
||||
"""
|
||||
|
||||
_mimes = ["audio/aiff", "audio/x-aiff"]
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
filename = filename.lower()
|
||||
|
||||
return (header.startswith(b"FORM") * 2 + endswith(filename, b".aif") +
|
||||
endswith(filename, b".aiff") + endswith(filename, b".aifc"))
|
||||
|
||||
def add_tags(self):
|
||||
"""Add an empty ID3 tag to the file."""
|
||||
if self.tags is None:
|
||||
self.tags = _IFFID3()
|
||||
else:
|
||||
raise error("an ID3 tag already exists")
|
||||
|
||||
def load(self, filename, **kwargs):
|
||||
"""Load stream and tag information from a file."""
|
||||
self.filename = filename
|
||||
|
||||
try:
|
||||
self.tags = _IFFID3(filename, **kwargs)
|
||||
except ID3Error:
|
||||
self.tags = None
|
||||
|
||||
try:
|
||||
fileobj = open(filename, "rb")
|
||||
self.info = AIFFInfo(fileobj)
|
||||
finally:
|
||||
fileobj.close()
|
||||
|
||||
|
||||
Open = AIFF
|
||||
@@ -0,0 +1,627 @@
|
||||
# An APEv2 tag reader
|
||||
#
|
||||
# Copyright 2005 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""APEv2 reading and writing.
|
||||
|
||||
The APEv2 format is most commonly used with Musepack files, but is
|
||||
also the format of choice for WavPack and other formats. Some MP3s
|
||||
also have APEv2 tags, but this can cause problems with many MP3
|
||||
decoders and taggers.
|
||||
|
||||
APEv2 tags, like Vorbis comments, are freeform key=value pairs. APEv2
|
||||
keys can be any ASCII string with characters from 0x20 to 0x7E,
|
||||
between 2 and 255 characters long. Keys are case-sensitive, but
|
||||
readers are recommended to be case insensitive, and it is forbidden to
|
||||
multiple keys which differ only in case. Keys are usually stored
|
||||
title-cased (e.g. 'Artist' rather than 'artist').
|
||||
|
||||
APEv2 values are slightly more structured than Vorbis comments; values
|
||||
are flagged as one of text, binary, or an external reference (usually
|
||||
a URI).
|
||||
|
||||
Based off the format specification found at
|
||||
http://wiki.hydrogenaudio.org/index.php?title=APEv2_specification.
|
||||
"""
|
||||
|
||||
__all__ = ["APEv2", "APEv2File", "Open", "delete"]
|
||||
|
||||
import sys
|
||||
import struct
|
||||
|
||||
from ._compat import cBytesIO, PY3, text_type, PY2, reraise, swap_to_string
|
||||
from mutagen import Metadata, FileType, StreamInfo
|
||||
from mutagen._util import DictMixin, cdata, delete_bytes, total_ordering
|
||||
|
||||
|
||||
def is_valid_apev2_key(key):
|
||||
if PY3 and not isinstance(key, text_type):
|
||||
raise TypeError("Keys have to be str")
|
||||
|
||||
return (2 <= len(key) <= 255 and min(key) >= ' ' and max(key) <= '~' and
|
||||
key not in ["OggS", "TAG", "ID3", "MP+"])
|
||||
|
||||
# There are three different kinds of APE tag values.
|
||||
# "0: Item contains text information coded in UTF-8
|
||||
# 1: Item contains binary information
|
||||
# 2: Item is a locator of external stored information [e.g. URL]
|
||||
# 3: reserved"
|
||||
TEXT, BINARY, EXTERNAL = range(3)
|
||||
|
||||
HAS_HEADER = 1 << 31
|
||||
HAS_NO_FOOTER = 1 << 30
|
||||
IS_HEADER = 1 << 29
|
||||
|
||||
|
||||
class error(IOError):
|
||||
pass
|
||||
|
||||
|
||||
class APENoHeaderError(error, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class APEUnsupportedVersionError(error, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class APEBadItemError(error, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class _APEv2Data(object):
|
||||
# Store offsets of the important parts of the file.
|
||||
start = header = data = footer = end = None
|
||||
# Footer or header; seek here and read 32 to get version/size/items/flags
|
||||
metadata = None
|
||||
# Actual tag data
|
||||
tag = None
|
||||
|
||||
version = None
|
||||
size = None
|
||||
items = None
|
||||
flags = 0
|
||||
|
||||
# The tag is at the start rather than the end. A tag at both
|
||||
# the start and end of the file (i.e. the tag is the whole file)
|
||||
# is not considered to be at the start.
|
||||
is_at_start = False
|
||||
|
||||
def __init__(self, fileobj):
|
||||
self.__find_metadata(fileobj)
|
||||
|
||||
if self.header is None:
|
||||
self.metadata = self.footer
|
||||
elif self.footer is None:
|
||||
self.metadata = self.header
|
||||
else:
|
||||
self.metadata = max(self.header, self.footer)
|
||||
if self.metadata is None:
|
||||
return
|
||||
|
||||
self.__fill_missing(fileobj)
|
||||
self.__fix_brokenness(fileobj)
|
||||
if self.data is not None:
|
||||
fileobj.seek(self.data)
|
||||
self.tag = fileobj.read(self.size)
|
||||
|
||||
def __find_metadata(self, fileobj):
|
||||
# Try to find a header or footer.
|
||||
|
||||
# Check for a simple footer.
|
||||
try:
|
||||
fileobj.seek(-32, 2)
|
||||
except IOError:
|
||||
fileobj.seek(0, 2)
|
||||
return
|
||||
if fileobj.read(8) == b"APETAGEX":
|
||||
fileobj.seek(-8, 1)
|
||||
self.footer = self.metadata = fileobj.tell()
|
||||
return
|
||||
|
||||
# Check for an APEv2 tag followed by an ID3v1 tag at the end.
|
||||
try:
|
||||
fileobj.seek(-128, 2)
|
||||
if fileobj.read(3) == b"TAG":
|
||||
|
||||
fileobj.seek(-35, 1) # "TAG" + header length
|
||||
if fileobj.read(8) == b"APETAGEX":
|
||||
fileobj.seek(-8, 1)
|
||||
self.footer = fileobj.tell()
|
||||
return
|
||||
|
||||
# ID3v1 tag at the end, maybe preceded by Lyrics3v2.
|
||||
# (http://www.id3.org/lyrics3200.html)
|
||||
# (header length - "APETAGEX") - "LYRICS200"
|
||||
fileobj.seek(15, 1)
|
||||
if fileobj.read(9) == b'LYRICS200':
|
||||
fileobj.seek(-15, 1) # "LYRICS200" + size tag
|
||||
try:
|
||||
offset = int(fileobj.read(6))
|
||||
except ValueError:
|
||||
raise IOError
|
||||
|
||||
fileobj.seek(-32 - offset - 6, 1)
|
||||
if fileobj.read(8) == b"APETAGEX":
|
||||
fileobj.seek(-8, 1)
|
||||
self.footer = fileobj.tell()
|
||||
return
|
||||
|
||||
except IOError:
|
||||
pass
|
||||
|
||||
# Check for a tag at the start.
|
||||
fileobj.seek(0, 0)
|
||||
if fileobj.read(8) == b"APETAGEX":
|
||||
self.is_at_start = True
|
||||
self.header = 0
|
||||
|
||||
def __fill_missing(self, fileobj):
|
||||
fileobj.seek(self.metadata + 8)
|
||||
self.version = fileobj.read(4)
|
||||
self.size = cdata.uint_le(fileobj.read(4))
|
||||
self.items = cdata.uint_le(fileobj.read(4))
|
||||
self.flags = cdata.uint_le(fileobj.read(4))
|
||||
|
||||
if self.header is not None:
|
||||
self.data = self.header + 32
|
||||
# If we're reading the header, the size is the header
|
||||
# offset + the size, which includes the footer.
|
||||
self.end = self.data + self.size
|
||||
fileobj.seek(self.end - 32, 0)
|
||||
if fileobj.read(8) == b"APETAGEX":
|
||||
self.footer = self.end - 32
|
||||
elif self.footer is not None:
|
||||
self.end = self.footer + 32
|
||||
self.data = self.end - self.size
|
||||
if self.flags & HAS_HEADER:
|
||||
self.header = self.data - 32
|
||||
else:
|
||||
self.header = self.data
|
||||
else:
|
||||
raise APENoHeaderError("No APE tag found")
|
||||
|
||||
# exclude the footer from size
|
||||
if self.footer is not None:
|
||||
self.size -= 32
|
||||
|
||||
def __fix_brokenness(self, fileobj):
|
||||
# Fix broken tags written with PyMusepack.
|
||||
if self.header is not None:
|
||||
start = self.header
|
||||
else:
|
||||
start = self.data
|
||||
fileobj.seek(start)
|
||||
|
||||
while start > 0:
|
||||
# Clean up broken writing from pre-Mutagen PyMusepack.
|
||||
# It didn't remove the first 24 bytes of header.
|
||||
try:
|
||||
fileobj.seek(-24, 1)
|
||||
except IOError:
|
||||
break
|
||||
else:
|
||||
if fileobj.read(8) == b"APETAGEX":
|
||||
fileobj.seek(-8, 1)
|
||||
start = fileobj.tell()
|
||||
else:
|
||||
break
|
||||
self.start = start
|
||||
|
||||
|
||||
class _CIDictProxy(DictMixin):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.__casemap = {}
|
||||
self.__dict = {}
|
||||
super(_CIDictProxy, self).__init__(*args, **kwargs)
|
||||
# Internally all names are stored as lowercase, but the case
|
||||
# they were set with is remembered and used when saving. This
|
||||
# is roughly in line with the standard, which says that keys
|
||||
# are case-sensitive but two keys differing only in case are
|
||||
# not allowed, and recommends case-insensitive
|
||||
# implementations.
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.__dict[key.lower()]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
lower = key.lower()
|
||||
self.__casemap[lower] = key
|
||||
self.__dict[lower] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
lower = key.lower()
|
||||
del(self.__casemap[lower])
|
||||
del(self.__dict[lower])
|
||||
|
||||
def keys(self):
|
||||
return [self.__casemap.get(key, key) for key in self.__dict.keys()]
|
||||
|
||||
|
||||
class APEv2(_CIDictProxy, Metadata):
|
||||
"""A file with an APEv2 tag.
|
||||
|
||||
ID3v1 tags are silently ignored and overwritten.
|
||||
"""
|
||||
|
||||
filename = None
|
||||
|
||||
def pprint(self):
|
||||
"""Return tag key=value pairs in a human-readable format."""
|
||||
|
||||
items = sorted(self.items())
|
||||
return u"\n".join([u"%s=%s" % (k, v.pprint()) for k, v in items])
|
||||
|
||||
def load(self, filename):
|
||||
"""Load tags from a filename."""
|
||||
|
||||
self.filename = filename
|
||||
fileobj = open(filename, "rb")
|
||||
try:
|
||||
data = _APEv2Data(fileobj)
|
||||
finally:
|
||||
fileobj.close()
|
||||
if data.tag:
|
||||
self.clear()
|
||||
self.__parse_tag(data.tag, data.items)
|
||||
else:
|
||||
raise APENoHeaderError("No APE tag found")
|
||||
|
||||
def __parse_tag(self, tag, count):
|
||||
fileobj = cBytesIO(tag)
|
||||
|
||||
for i in range(count):
|
||||
size_data = fileobj.read(4)
|
||||
# someone writes wrong item counts
|
||||
if not size_data:
|
||||
break
|
||||
size = cdata.uint_le(size_data)
|
||||
flags = cdata.uint_le(fileobj.read(4))
|
||||
|
||||
# Bits 1 and 2 bits are flags, 0-3
|
||||
# Bit 0 is read/write flag, ignored
|
||||
kind = (flags & 6) >> 1
|
||||
if kind == 3:
|
||||
raise APEBadItemError("value type must be 0, 1, or 2")
|
||||
key = value = fileobj.read(1)
|
||||
while key[-1:] != b'\x00' and value:
|
||||
value = fileobj.read(1)
|
||||
key += value
|
||||
if key[-1:] == b"\x00":
|
||||
key = key[:-1]
|
||||
if PY3:
|
||||
try:
|
||||
key = key.decode("ascii")
|
||||
except UnicodeError as err:
|
||||
reraise(APEBadItemError, err, sys.exc_info()[2])
|
||||
value = fileobj.read(size)
|
||||
|
||||
if kind == TEXT:
|
||||
value = APETextValue(value, kind)
|
||||
elif kind == BINARY:
|
||||
value = APEBinaryValue(value, kind)
|
||||
elif kind == EXTERNAL:
|
||||
value = APEExtValue(value, kind)
|
||||
|
||||
self[key] = value
|
||||
|
||||
def __getitem__(self, key):
|
||||
if not is_valid_apev2_key(key):
|
||||
raise KeyError("%r is not a valid APEv2 key" % key)
|
||||
if PY2:
|
||||
key = key.encode('ascii')
|
||||
|
||||
return super(APEv2, self).__getitem__(key)
|
||||
|
||||
def __delitem__(self, key):
|
||||
if not is_valid_apev2_key(key):
|
||||
raise KeyError("%r is not a valid APEv2 key" % key)
|
||||
if PY2:
|
||||
key = key.encode('ascii')
|
||||
|
||||
super(APEv2, self).__delitem__(key)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""'Magic' value setter.
|
||||
|
||||
This function tries to guess at what kind of value you want to
|
||||
store. If you pass in a valid UTF-8 or Unicode string, it
|
||||
treats it as a text value. If you pass in a list, it treats it
|
||||
as a list of string/Unicode values. If you pass in a string
|
||||
that is not valid UTF-8, it assumes it is a binary value.
|
||||
|
||||
Python 3: all bytes will be assumed to be a byte value, even
|
||||
if they are valid utf-8.
|
||||
|
||||
If you need to force a specific type of value (e.g. binary
|
||||
data that also happens to be valid UTF-8, or an external
|
||||
reference), use the APEValue factory and set the value to the
|
||||
result of that::
|
||||
|
||||
from mutagen.apev2 import APEValue, EXTERNAL
|
||||
tag['Website'] = APEValue('http://example.org', EXTERNAL)
|
||||
"""
|
||||
|
||||
if not is_valid_apev2_key(key):
|
||||
raise KeyError("%r is not a valid APEv2 key" % key)
|
||||
|
||||
if PY2:
|
||||
key = key.encode('ascii')
|
||||
|
||||
if not isinstance(value, _APEValue):
|
||||
# let's guess at the content if we're not already a value...
|
||||
if isinstance(value, text_type):
|
||||
# unicode? we've got to be text.
|
||||
value = APEValue(value, TEXT)
|
||||
elif isinstance(value, list):
|
||||
items = []
|
||||
for v in value:
|
||||
if not isinstance(v, text_type):
|
||||
if PY3:
|
||||
raise TypeError("item in list not str")
|
||||
v = v.decode("utf-8")
|
||||
items.append(v)
|
||||
|
||||
# list? text.
|
||||
value = APEValue(u"\0".join(items), TEXT)
|
||||
else:
|
||||
if PY3:
|
||||
value = APEValue(value, BINARY)
|
||||
else:
|
||||
try:
|
||||
value.decode("utf-8")
|
||||
except UnicodeError:
|
||||
# invalid UTF8 text, probably binary
|
||||
value = APEValue(value, BINARY)
|
||||
else:
|
||||
# valid UTF8, probably text
|
||||
value = APEValue(value, TEXT)
|
||||
|
||||
super(APEv2, self).__setitem__(key, value)
|
||||
|
||||
def save(self, filename=None):
|
||||
"""Save changes to a file.
|
||||
|
||||
If no filename is given, the one most recently loaded is used.
|
||||
|
||||
Tags are always written at the end of the file, and include
|
||||
a header and a footer.
|
||||
"""
|
||||
|
||||
filename = filename or self.filename
|
||||
try:
|
||||
fileobj = open(filename, "r+b")
|
||||
except IOError:
|
||||
fileobj = open(filename, "w+b")
|
||||
data = _APEv2Data(fileobj)
|
||||
|
||||
if data.is_at_start:
|
||||
delete_bytes(fileobj, data.end - data.start, data.start)
|
||||
elif data.start is not None:
|
||||
fileobj.seek(data.start)
|
||||
# Delete an ID3v1 tag if present, too.
|
||||
fileobj.truncate()
|
||||
fileobj.seek(0, 2)
|
||||
|
||||
# "APE tags items should be sorted ascending by size... This is
|
||||
# not a MUST, but STRONGLY recommended. Actually the items should
|
||||
# be sorted by importance/byte, but this is not feasible."
|
||||
tags = [v._internal(k) for k, v in self.items()]
|
||||
tags.sort(key=len)
|
||||
num_tags = len(tags)
|
||||
tags = b"".join(tags)
|
||||
|
||||
header = bytearray(b"APETAGEX")
|
||||
# version, tag size, item count, flags
|
||||
header += struct.pack("<4I", 2000, len(tags) + 32, num_tags,
|
||||
HAS_HEADER | IS_HEADER)
|
||||
header += b"\0" * 8
|
||||
fileobj.write(header)
|
||||
|
||||
fileobj.write(tags)
|
||||
|
||||
footer = bytearray(b"APETAGEX")
|
||||
footer += struct.pack("<4I", 2000, len(tags) + 32, num_tags,
|
||||
HAS_HEADER)
|
||||
footer += b"\0" * 8
|
||||
|
||||
fileobj.write(footer)
|
||||
fileobj.close()
|
||||
|
||||
def delete(self, filename=None):
|
||||
"""Remove tags from a file."""
|
||||
|
||||
filename = filename or self.filename
|
||||
fileobj = open(filename, "r+b")
|
||||
try:
|
||||
data = _APEv2Data(fileobj)
|
||||
if data.start is not None and data.size is not None:
|
||||
delete_bytes(fileobj, data.end - data.start, data.start)
|
||||
finally:
|
||||
fileobj.close()
|
||||
self.clear()
|
||||
|
||||
|
||||
Open = APEv2
|
||||
|
||||
|
||||
def delete(filename):
|
||||
"""Remove tags from a file."""
|
||||
|
||||
try:
|
||||
APEv2(filename).delete()
|
||||
except APENoHeaderError:
|
||||
pass
|
||||
|
||||
|
||||
def APEValue(value, kind):
|
||||
"""APEv2 tag value factory.
|
||||
|
||||
Use this if you need to specify the value's type manually. Binary
|
||||
and text data are automatically detected by APEv2.__setitem__.
|
||||
"""
|
||||
|
||||
if kind in (TEXT, EXTERNAL):
|
||||
if not isinstance(value, text_type):
|
||||
# stricter with py3
|
||||
if PY3:
|
||||
raise TypeError("str only for text/external values")
|
||||
else:
|
||||
value = value.encode("utf-8")
|
||||
|
||||
if kind == TEXT:
|
||||
return APETextValue(value, kind)
|
||||
elif kind == BINARY:
|
||||
return APEBinaryValue(value, kind)
|
||||
elif kind == EXTERNAL:
|
||||
return APEExtValue(value, kind)
|
||||
else:
|
||||
raise ValueError("kind must be TEXT, BINARY, or EXTERNAL")
|
||||
|
||||
|
||||
@swap_to_string
|
||||
@total_ordering
|
||||
class _APEValue(object):
|
||||
def __init__(self, value, kind):
|
||||
if not isinstance(value, bytes):
|
||||
raise TypeError("value not bytes")
|
||||
self.kind = kind
|
||||
self.value = value
|
||||
|
||||
def __len__(self):
|
||||
return len(self.value)
|
||||
|
||||
def __bytes__(self):
|
||||
return self.value
|
||||
|
||||
def __eq__(self, other):
|
||||
return bytes(self) == other
|
||||
|
||||
def __lt__(self, other):
|
||||
return bytes(self) < other
|
||||
|
||||
# Packed format for an item:
|
||||
# 4B: Value length
|
||||
# 4B: Value type
|
||||
# Key name
|
||||
# 1B: Null
|
||||
# Key value
|
||||
def _internal(self, key):
|
||||
if not isinstance(key, bytes):
|
||||
key = key.encode("utf-8")
|
||||
data = bytearray()
|
||||
data += struct.pack("<2I", len(self.value), self.kind << 1)
|
||||
data += key
|
||||
data += b"\0"
|
||||
data += self.value
|
||||
return bytes(data)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%r, %d)" % (type(self).__name__, self.value, self.kind)
|
||||
|
||||
|
||||
@swap_to_string
|
||||
@total_ordering
|
||||
class _APEUtf8Value(_APEValue):
|
||||
|
||||
def __str__(self):
|
||||
return self.value.decode("utf-8")
|
||||
|
||||
def __eq__(self, other):
|
||||
return text_type(self) == other
|
||||
|
||||
def __lt__(self, other):
|
||||
return text_type(self) < other
|
||||
|
||||
|
||||
class APETextValue(_APEUtf8Value):
|
||||
"""An APEv2 text value.
|
||||
|
||||
Text values are Unicode/UTF-8 strings. They can be accessed like
|
||||
strings (with a null separating the values), or arrays of strings.
|
||||
"""
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate over the strings of the value (not the characters)"""
|
||||
|
||||
return iter(text_type(self).split(u"\0"))
|
||||
|
||||
def __getitem__(self, index):
|
||||
return text_type(self).split(u"\0")[index]
|
||||
|
||||
def __len__(self):
|
||||
return self.value.count(b"\0") + 1
|
||||
|
||||
__hash__ = _APEValue.__hash__
|
||||
|
||||
def __setitem__(self, index, value):
|
||||
if not isinstance(value, text_type):
|
||||
if PY3:
|
||||
raise TypeError("value not str")
|
||||
value = value.decode("utf-8")
|
||||
|
||||
values = list(self)
|
||||
values[index] = value
|
||||
self.value = (u"\0".join(values)).encode("utf-8")
|
||||
|
||||
def pprint(self):
|
||||
return u" / ".join(self)
|
||||
|
||||
|
||||
class APEBinaryValue(_APEValue):
|
||||
"""An APEv2 binary value."""
|
||||
|
||||
def pprint(self):
|
||||
return u"[%d bytes]" % len(self)
|
||||
|
||||
|
||||
class APEExtValue(_APEUtf8Value):
|
||||
"""An APEv2 external value.
|
||||
|
||||
External values are usually URI or IRI strings.
|
||||
"""
|
||||
|
||||
def pprint(self):
|
||||
return u"[External] %s" % text_type(self)
|
||||
|
||||
|
||||
class APEv2File(FileType):
|
||||
class _Info(StreamInfo):
|
||||
length = 0
|
||||
bitrate = 0
|
||||
|
||||
def __init__(self, fileobj):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def pprint():
|
||||
return u"Unknown format with APEv2 tag."
|
||||
|
||||
def load(self, filename):
|
||||
self.filename = filename
|
||||
self.info = self._Info(open(filename, "rb"))
|
||||
try:
|
||||
self.tags = APEv2(filename)
|
||||
except error:
|
||||
self.tags = None
|
||||
|
||||
def add_tags(self):
|
||||
if self.tags is None:
|
||||
self.tags = APEv2()
|
||||
else:
|
||||
raise ValueError("%r already has tags: %r" % (self, self.tags))
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
try:
|
||||
fileobj.seek(-160, 2)
|
||||
except IOError:
|
||||
fileobj.seek(0)
|
||||
footer = fileobj.read()
|
||||
filename = filename.lower()
|
||||
return ((b"APETAGEX" in footer) - header.startswith(b"ID3"))
|
||||
@@ -0,0 +1,751 @@
|
||||
# Copyright 2006-2007 Lukas Lalinsky
|
||||
# Copyright 2005-2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Read and write ASF (Window Media Audio) files."""
|
||||
|
||||
__all__ = ["ASF", "Open"]
|
||||
|
||||
import struct
|
||||
from mutagen import FileType, Metadata, StreamInfo
|
||||
from mutagen._util import insert_bytes, delete_bytes, DictMixin, total_ordering
|
||||
from ._compat import swap_to_string, text_type, PY2, string_types
|
||||
|
||||
|
||||
class error(IOError):
|
||||
pass
|
||||
|
||||
|
||||
class ASFError(error):
|
||||
pass
|
||||
|
||||
|
||||
class ASFHeaderError(error):
|
||||
pass
|
||||
|
||||
|
||||
class ASFInfo(StreamInfo):
|
||||
"""ASF stream information."""
|
||||
|
||||
def __init__(self):
|
||||
self.length = 0.0
|
||||
self.sample_rate = 0
|
||||
self.bitrate = 0
|
||||
self.channels = 0
|
||||
|
||||
def pprint(self):
|
||||
s = "Windows Media Audio %d bps, %s Hz, %d channels, %.2f seconds" % (
|
||||
self.bitrate, self.sample_rate, self.channels, self.length)
|
||||
return s
|
||||
|
||||
|
||||
class ASFTags(list, DictMixin, Metadata):
|
||||
"""Dictionary containing ASF attributes."""
|
||||
|
||||
def pprint(self):
|
||||
return "\n".join(["%s=%s" % (k, v) for k, v in self])
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""A list of values for the key.
|
||||
|
||||
This is a copy, so comment['title'].append('a title') will not
|
||||
work.
|
||||
|
||||
"""
|
||||
values = [value for (k, value) in self if k == key]
|
||||
if not values:
|
||||
raise KeyError(key)
|
||||
else:
|
||||
return values
|
||||
|
||||
def __delitem__(self, key):
|
||||
"""Delete all values associated with the key."""
|
||||
to_delete = list(filter(lambda x: x[0] == key, self))
|
||||
if not to_delete:
|
||||
raise KeyError(key)
|
||||
else:
|
||||
for k in to_delete:
|
||||
self.remove(k)
|
||||
|
||||
def __contains__(self, key):
|
||||
"""Return true if the key has any values."""
|
||||
for k, value in self:
|
||||
if k == key:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def __setitem__(self, key, values):
|
||||
"""Set a key's value or values.
|
||||
|
||||
Setting a value overwrites all old ones. The value may be a
|
||||
list of Unicode or UTF-8 strings, or a single Unicode or UTF-8
|
||||
string.
|
||||
|
||||
"""
|
||||
if not isinstance(values, list):
|
||||
values = [values]
|
||||
try:
|
||||
del(self[key])
|
||||
except KeyError:
|
||||
pass
|
||||
for value in values:
|
||||
if key in _standard_attribute_names:
|
||||
value = text_type(value)
|
||||
elif not isinstance(value, ASFBaseAttribute):
|
||||
if isinstance(value, string_types):
|
||||
if PY2 or isinstance(value, text_type):
|
||||
value = ASFUnicodeAttribute(value)
|
||||
elif isinstance(value, bool):
|
||||
value = ASFBoolAttribute(value)
|
||||
elif isinstance(value, int):
|
||||
value = ASFDWordAttribute(value)
|
||||
elif isinstance(value, long):
|
||||
value = ASFQWordAttribute(value)
|
||||
self.append((key, value))
|
||||
|
||||
def keys(self):
|
||||
"""Return all keys in the comment."""
|
||||
return self and set(next(iter(zip(*self))))
|
||||
|
||||
def as_dict(self):
|
||||
"""Return a copy of the comment data in a real dict."""
|
||||
d = {}
|
||||
for key, value in self:
|
||||
d.setdefault(key, []).append(value)
|
||||
return d
|
||||
|
||||
|
||||
class ASFBaseAttribute(object):
|
||||
"""Generic attribute."""
|
||||
TYPE = None
|
||||
|
||||
def __init__(self, value=None, data=None, language=None,
|
||||
stream=None, **kwargs):
|
||||
self.language = language
|
||||
self.stream = stream
|
||||
if data:
|
||||
self.value = self.parse(data, **kwargs)
|
||||
else:
|
||||
self.value = value
|
||||
|
||||
def data_size(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def __repr__(self):
|
||||
name = "%s(%r" % (type(self).__name__, self.value)
|
||||
if self.language:
|
||||
name += ", language=%d" % self.language
|
||||
if self.stream:
|
||||
name += ", stream=%d" % self.stream
|
||||
name += ")"
|
||||
return name
|
||||
|
||||
def render(self, name):
|
||||
name = name.encode("utf-16-le") + b"\x00\x00"
|
||||
data = self._render()
|
||||
return (struct.pack("<H", len(name)) + name +
|
||||
struct.pack("<HH", self.TYPE, len(data)) + data)
|
||||
|
||||
def render_m(self, name):
|
||||
name = name.encode("utf-16-le") + b"\x00\x00"
|
||||
if self.TYPE == 2:
|
||||
data = self._render(dword=False)
|
||||
else:
|
||||
data = self._render()
|
||||
return (struct.pack("<HHHHI", 0, self.stream or 0, len(name),
|
||||
self.TYPE, len(data)) + name + data)
|
||||
|
||||
def render_ml(self, name):
|
||||
name = name.encode("utf-16-le") + b"\x00\x00"
|
||||
if self.TYPE == 2:
|
||||
data = self._render(dword=False)
|
||||
else:
|
||||
data = self._render()
|
||||
|
||||
return (struct.pack("<HHHHI", self.language or 0, self.stream or 0,
|
||||
len(name), self.TYPE, len(data)) + name + data)
|
||||
|
||||
|
||||
@swap_to_string
|
||||
@total_ordering
|
||||
class ASFUnicodeAttribute(ASFBaseAttribute):
|
||||
"""Unicode string attribute."""
|
||||
TYPE = 0x0000
|
||||
|
||||
def parse(self, data):
|
||||
return data.decode("utf-16-le").strip("\x00")
|
||||
|
||||
def _render(self):
|
||||
return self.value.encode("utf-16-le") + b"\x00\x00"
|
||||
|
||||
def data_size(self):
|
||||
return len(self._render())
|
||||
|
||||
def __bytes__(self):
|
||||
return self.value.encode("utf-16-le")
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
||||
def __eq__(self, other):
|
||||
return text_type(self) == other
|
||||
|
||||
def __lt__(self, other):
|
||||
return text_type(self) < other
|
||||
|
||||
__hash__ = ASFBaseAttribute.__hash__
|
||||
|
||||
|
||||
@swap_to_string
|
||||
@total_ordering
|
||||
class ASFByteArrayAttribute(ASFBaseAttribute):
|
||||
"""Byte array attribute."""
|
||||
TYPE = 0x0001
|
||||
|
||||
def parse(self, data):
|
||||
assert isinstance(data, bytes)
|
||||
return data
|
||||
|
||||
def _render(self):
|
||||
assert isinstance(self.value, bytes)
|
||||
return self.value
|
||||
|
||||
def data_size(self):
|
||||
return len(self.value)
|
||||
|
||||
def __bytes__(self):
|
||||
return "[binary data (%s bytes)]" % len(self.value)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.value == other
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.value < other
|
||||
|
||||
__hash__ = ASFBaseAttribute.__hash__
|
||||
|
||||
|
||||
@swap_to_string
|
||||
@total_ordering
|
||||
class ASFBoolAttribute(ASFBaseAttribute):
|
||||
"""Bool attribute."""
|
||||
TYPE = 0x0002
|
||||
|
||||
def parse(self, data, dword=True):
|
||||
if dword:
|
||||
return struct.unpack("<I", data)[0] == 1
|
||||
else:
|
||||
return struct.unpack("<H", data)[0] == 1
|
||||
|
||||
def _render(self, dword=True):
|
||||
if dword:
|
||||
return struct.pack("<I", int(self.value))
|
||||
else:
|
||||
return struct.pack("<H", int(self.value))
|
||||
|
||||
def data_size(self):
|
||||
return 4
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.value)
|
||||
|
||||
def __bytes__(self):
|
||||
return self.value
|
||||
|
||||
def __eq__(self, other):
|
||||
return bool(self.value) == other
|
||||
|
||||
def __lt__(self, other):
|
||||
return bool(self.value) < other
|
||||
|
||||
__hash__ = ASFBaseAttribute.__hash__
|
||||
|
||||
|
||||
@swap_to_string
|
||||
@total_ordering
|
||||
class ASFDWordAttribute(ASFBaseAttribute):
|
||||
"""DWORD attribute."""
|
||||
TYPE = 0x0003
|
||||
|
||||
def parse(self, data):
|
||||
return struct.unpack("<L", data)[0]
|
||||
|
||||
def _render(self):
|
||||
return struct.pack("<L", self.value)
|
||||
|
||||
def data_size(self):
|
||||
return 4
|
||||
|
||||
def __int__(self):
|
||||
return self.value
|
||||
|
||||
def __bytes__(self):
|
||||
return self.value
|
||||
|
||||
def __eq__(self, other):
|
||||
return int(self.value) == other
|
||||
|
||||
def __lt__(self, other):
|
||||
return int(self.value) < other
|
||||
|
||||
__hash__ = ASFBaseAttribute.__hash__
|
||||
|
||||
|
||||
@swap_to_string
|
||||
@total_ordering
|
||||
class ASFQWordAttribute(ASFBaseAttribute):
|
||||
"""QWORD attribute."""
|
||||
TYPE = 0x0004
|
||||
|
||||
def parse(self, data):
|
||||
return struct.unpack("<Q", data)[0]
|
||||
|
||||
def _render(self):
|
||||
return struct.pack("<Q", self.value)
|
||||
|
||||
def data_size(self):
|
||||
return 8
|
||||
|
||||
def __int__(self):
|
||||
return self.value
|
||||
|
||||
def __bytes__(self):
|
||||
return self.value
|
||||
|
||||
def __eq__(self, other):
|
||||
return int(self.value) == other
|
||||
|
||||
def __lt__(self, other):
|
||||
return int(self.value) < other
|
||||
|
||||
__hash__ = ASFBaseAttribute.__hash__
|
||||
|
||||
|
||||
@swap_to_string
|
||||
@total_ordering
|
||||
class ASFWordAttribute(ASFBaseAttribute):
|
||||
"""WORD attribute."""
|
||||
TYPE = 0x0005
|
||||
|
||||
def parse(self, data):
|
||||
return struct.unpack("<H", data)[0]
|
||||
|
||||
def _render(self):
|
||||
return struct.pack("<H", self.value)
|
||||
|
||||
def data_size(self):
|
||||
return 2
|
||||
|
||||
def __int__(self):
|
||||
return self.value
|
||||
|
||||
def __bytes__(self):
|
||||
return self.value
|
||||
|
||||
def __eq__(self, other):
|
||||
return int(self.value) == other
|
||||
|
||||
def __lt__(self, other):
|
||||
return int(self.value) < other
|
||||
|
||||
__hash__ = ASFBaseAttribute.__hash__
|
||||
|
||||
|
||||
@swap_to_string
|
||||
@total_ordering
|
||||
class ASFGUIDAttribute(ASFBaseAttribute):
|
||||
"""GUID attribute."""
|
||||
TYPE = 0x0006
|
||||
|
||||
def parse(self, data):
|
||||
assert isinstance(data, bytes)
|
||||
return data
|
||||
|
||||
def _render(self):
|
||||
assert isinstance(self.value, bytes)
|
||||
return self.value
|
||||
|
||||
def data_size(self):
|
||||
return len(self.value)
|
||||
|
||||
def __bytes__(self):
|
||||
return self.value
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.value == other
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.value < other
|
||||
|
||||
__hash__ = ASFBaseAttribute.__hash__
|
||||
|
||||
|
||||
UNICODE = ASFUnicodeAttribute.TYPE
|
||||
BYTEARRAY = ASFByteArrayAttribute.TYPE
|
||||
BOOL = ASFBoolAttribute.TYPE
|
||||
DWORD = ASFDWordAttribute.TYPE
|
||||
QWORD = ASFQWordAttribute.TYPE
|
||||
WORD = ASFWordAttribute.TYPE
|
||||
GUID = ASFGUIDAttribute.TYPE
|
||||
|
||||
|
||||
def ASFValue(value, kind, **kwargs):
|
||||
for t, c in _attribute_types.items():
|
||||
if kind == t:
|
||||
return c(value=value, **kwargs)
|
||||
raise ValueError("Unknown value type")
|
||||
|
||||
|
||||
_attribute_types = {
|
||||
ASFUnicodeAttribute.TYPE: ASFUnicodeAttribute,
|
||||
ASFByteArrayAttribute.TYPE: ASFByteArrayAttribute,
|
||||
ASFBoolAttribute.TYPE: ASFBoolAttribute,
|
||||
ASFDWordAttribute.TYPE: ASFDWordAttribute,
|
||||
ASFQWordAttribute.TYPE: ASFQWordAttribute,
|
||||
ASFWordAttribute.TYPE: ASFWordAttribute,
|
||||
ASFGUIDAttribute.TYPE: ASFGUIDAttribute,
|
||||
}
|
||||
|
||||
|
||||
_standard_attribute_names = [
|
||||
"Title",
|
||||
"Author",
|
||||
"Copyright",
|
||||
"Description",
|
||||
"Rating"
|
||||
]
|
||||
|
||||
|
||||
class BaseObject(object):
|
||||
"""Base ASF object."""
|
||||
GUID = None
|
||||
|
||||
def parse(self, asf, data, fileobj, size):
|
||||
self.data = data
|
||||
|
||||
def render(self, asf):
|
||||
data = self.GUID + struct.pack("<Q", len(self.data) + 24) + self.data
|
||||
return data
|
||||
|
||||
|
||||
class UnknownObject(BaseObject):
|
||||
"""Unknown ASF object."""
|
||||
def __init__(self, guid):
|
||||
assert isinstance(guid, bytes)
|
||||
self.GUID = guid
|
||||
|
||||
|
||||
class HeaderObject(object):
|
||||
"""ASF header."""
|
||||
GUID = b"\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C"
|
||||
|
||||
|
||||
class ContentDescriptionObject(BaseObject):
|
||||
"""Content description."""
|
||||
GUID = b"\x33\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C"
|
||||
|
||||
def parse(self, asf, data, fileobj, size):
|
||||
super(ContentDescriptionObject, self).parse(asf, data, fileobj, size)
|
||||
asf.content_description_obj = self
|
||||
lengths = struct.unpack("<HHHHH", data[:10])
|
||||
texts = []
|
||||
pos = 10
|
||||
for length in lengths:
|
||||
end = pos + length
|
||||
if length > 0:
|
||||
texts.append(data[pos:end].decode("utf-16-le").strip("\x00"))
|
||||
else:
|
||||
texts.append(None)
|
||||
pos = end
|
||||
title, author, copyright, desc, rating = texts
|
||||
for key, value in dict(
|
||||
Title=title,
|
||||
Author=author,
|
||||
Copyright=copyright,
|
||||
Description=desc,
|
||||
Rating=rating
|
||||
).items():
|
||||
if value is not None:
|
||||
asf.tags[key] = value
|
||||
|
||||
def render(self, asf):
|
||||
def render_text(name):
|
||||
value = asf.tags.get(name, [])
|
||||
if value:
|
||||
return value[0].encode("utf-16-le") + b"\x00\x00"
|
||||
else:
|
||||
return b""
|
||||
texts = list(map(render_text, _standard_attribute_names))
|
||||
data = struct.pack("<HHHHH", *map(len, texts)) + b"".join(texts)
|
||||
return self.GUID + struct.pack("<Q", 24 + len(data)) + data
|
||||
|
||||
|
||||
class ExtendedContentDescriptionObject(BaseObject):
|
||||
"""Extended content description."""
|
||||
GUID = b"\x40\xA4\xD0\xD2\x07\xE3\xD2\x11\x97\xF0\x00\xA0\xC9\x5E\xA8\x50"
|
||||
|
||||
def parse(self, asf, data, fileobj, size):
|
||||
super(ExtendedContentDescriptionObject, self).parse(
|
||||
asf, data, fileobj, size)
|
||||
asf.extended_content_description_obj = self
|
||||
num_attributes, = struct.unpack("<H", data[0:2])
|
||||
pos = 2
|
||||
for i in range(num_attributes):
|
||||
name_length, = struct.unpack("<H", data[pos:pos+2])
|
||||
pos += 2
|
||||
name = data[pos:pos+name_length].decode("utf-16-le").strip("\x00")
|
||||
pos += name_length
|
||||
value_type, value_length = struct.unpack("<HH", data[pos:pos+4])
|
||||
pos += 4
|
||||
value = data[pos:pos+value_length]
|
||||
pos += value_length
|
||||
attr = _attribute_types[value_type](data=value)
|
||||
asf.tags.append((name, attr))
|
||||
|
||||
def render(self, asf):
|
||||
attrs = asf.to_extended_content_description.items()
|
||||
data = b"".join([attr.render(name) for (name, attr) in attrs])
|
||||
data = struct.pack("<QH", 26 + len(data), len(attrs)) + data
|
||||
return self.GUID + data
|
||||
|
||||
|
||||
class FilePropertiesObject(BaseObject):
|
||||
"""File properties."""
|
||||
GUID = b"\xA1\xDC\xAB\x8C\x47\xA9\xCF\x11\x8E\xE4\x00\xC0\x0C\x20\x53\x65"
|
||||
|
||||
def parse(self, asf, data, fileobj, size):
|
||||
super(FilePropertiesObject, self).parse(asf, data, fileobj, size)
|
||||
length, _, preroll = struct.unpack("<QQQ", data[40:64])
|
||||
asf.info.length = length / 10000000.0 - preroll / 1000.0
|
||||
|
||||
|
||||
class StreamPropertiesObject(BaseObject):
|
||||
"""Stream properties."""
|
||||
GUID = b"\x91\x07\xDC\xB7\xB7\xA9\xCF\x11\x8E\xE6\x00\xC0\x0C\x20\x53\x65"
|
||||
|
||||
def parse(self, asf, data, fileobj, size):
|
||||
super(StreamPropertiesObject, self).parse(asf, data, fileobj, size)
|
||||
channels, sample_rate, bitrate = struct.unpack("<HII", data[56:66])
|
||||
asf.info.channels = channels
|
||||
asf.info.sample_rate = sample_rate
|
||||
asf.info.bitrate = bitrate * 8
|
||||
|
||||
|
||||
class HeaderExtensionObject(BaseObject):
|
||||
"""Header extension."""
|
||||
GUID = b"\xb5\x03\xbf_.\xa9\xcf\x11\x8e\xe3\x00\xc0\x0c Se"
|
||||
|
||||
def parse(self, asf, data, fileobj, size):
|
||||
super(HeaderExtensionObject, self).parse(asf, data, fileobj, size)
|
||||
asf.header_extension_obj = self
|
||||
datasize, = struct.unpack("<I", data[18:22])
|
||||
datapos = 0
|
||||
self.objects = []
|
||||
while datapos < datasize:
|
||||
guid, size = struct.unpack("<16sQ", data[22+datapos:22+datapos+24])
|
||||
if guid in _object_types:
|
||||
obj = _object_types[guid]()
|
||||
else:
|
||||
obj = UnknownObject(guid)
|
||||
obj.parse(asf, data[22+datapos+24:22+datapos+size], fileobj, size)
|
||||
self.objects.append(obj)
|
||||
datapos += size
|
||||
|
||||
def render(self, asf):
|
||||
data = b"".join([obj.render(asf) for obj in self.objects])
|
||||
return (self.GUID + struct.pack("<Q", 24 + 16 + 6 + len(data)) +
|
||||
b"\x11\xD2\xD3\xAB\xBA\xA9\xcf\x11" +
|
||||
b"\x8E\xE6\x00\xC0\x0C\x20\x53\x65" +
|
||||
b"\x06\x00" + struct.pack("<I", len(data)) + data)
|
||||
|
||||
|
||||
class MetadataObject(BaseObject):
|
||||
"""Metadata description."""
|
||||
GUID = b"\xea\xcb\xf8\xc5\xaf[wH\x84g\xaa\x8cD\xfaL\xca"
|
||||
|
||||
def parse(self, asf, data, fileobj, size):
|
||||
super(MetadataObject, self).parse(asf, data, fileobj, size)
|
||||
asf.metadata_obj = self
|
||||
num_attributes, = struct.unpack("<H", data[0:2])
|
||||
pos = 2
|
||||
for i in range(num_attributes):
|
||||
(reserved, stream, name_length, value_type,
|
||||
value_length) = struct.unpack("<HHHHI", data[pos:pos+12])
|
||||
pos += 12
|
||||
name = data[pos:pos+name_length].decode("utf-16-le").strip("\x00")
|
||||
pos += name_length
|
||||
value = data[pos:pos+value_length]
|
||||
pos += value_length
|
||||
args = {'data': value, 'stream': stream}
|
||||
if value_type == 2:
|
||||
args['dword'] = False
|
||||
attr = _attribute_types[value_type](**args)
|
||||
asf.tags.append((name, attr))
|
||||
|
||||
def render(self, asf):
|
||||
attrs = asf.to_metadata.items()
|
||||
data = b"".join([attr.render_m(name) for (name, attr) in attrs])
|
||||
return (self.GUID + struct.pack("<QH", 26 + len(data), len(attrs)) +
|
||||
data)
|
||||
|
||||
|
||||
class MetadataLibraryObject(BaseObject):
|
||||
"""Metadata library description."""
|
||||
GUID = b"\x94\x1c#D\x98\x94\xd1I\xa1A\x1d\x13NEpT"
|
||||
|
||||
def parse(self, asf, data, fileobj, size):
|
||||
super(MetadataLibraryObject, self).parse(asf, data, fileobj, size)
|
||||
asf.metadata_library_obj = self
|
||||
num_attributes, = struct.unpack("<H", data[0:2])
|
||||
pos = 2
|
||||
for i in range(num_attributes):
|
||||
(language, stream, name_length, value_type,
|
||||
value_length) = struct.unpack("<HHHHI", data[pos:pos+12])
|
||||
pos += 12
|
||||
name = data[pos:pos+name_length].decode("utf-16-le").strip("\x00")
|
||||
pos += name_length
|
||||
value = data[pos:pos+value_length]
|
||||
pos += value_length
|
||||
args = {'data': value, 'language': language, 'stream': stream}
|
||||
if value_type == 2:
|
||||
args['dword'] = False
|
||||
attr = _attribute_types[value_type](**args)
|
||||
asf.tags.append((name, attr))
|
||||
|
||||
def render(self, asf):
|
||||
attrs = asf.to_metadata_library
|
||||
data = b"".join([attr.render_ml(name) for (name, attr) in attrs])
|
||||
return (self.GUID + struct.pack("<QH", 26 + len(data), len(attrs)) +
|
||||
data)
|
||||
|
||||
|
||||
_object_types = {
|
||||
ExtendedContentDescriptionObject.GUID: ExtendedContentDescriptionObject,
|
||||
ContentDescriptionObject.GUID: ContentDescriptionObject,
|
||||
FilePropertiesObject.GUID: FilePropertiesObject,
|
||||
StreamPropertiesObject.GUID: StreamPropertiesObject,
|
||||
HeaderExtensionObject.GUID: HeaderExtensionObject,
|
||||
MetadataLibraryObject.GUID: MetadataLibraryObject,
|
||||
MetadataObject.GUID: MetadataObject,
|
||||
}
|
||||
|
||||
|
||||
class ASF(FileType):
|
||||
"""An ASF file, probably containing WMA or WMV."""
|
||||
|
||||
_mimes = ["audio/x-ms-wma", "audio/x-ms-wmv", "video/x-ms-asf",
|
||||
"audio/x-wma", "video/x-wmv"]
|
||||
|
||||
def load(self, filename):
|
||||
self.filename = filename
|
||||
fileobj = open(filename, "rb")
|
||||
try:
|
||||
self.size = 0
|
||||
self.size1 = 0
|
||||
self.size2 = 0
|
||||
self.offset1 = 0
|
||||
self.offset2 = 0
|
||||
self.num_objects = 0
|
||||
self.info = ASFInfo()
|
||||
self.tags = ASFTags()
|
||||
self.__read_file(fileobj)
|
||||
finally:
|
||||
fileobj.close()
|
||||
|
||||
def save(self):
|
||||
# Move attributes to the right objects
|
||||
self.to_extended_content_description = {}
|
||||
self.to_metadata = {}
|
||||
self.to_metadata_library = []
|
||||
for name, value in self.tags:
|
||||
if name in _standard_attribute_names:
|
||||
continue
|
||||
library_only = (value.data_size() > 0xFFFF or value.TYPE == GUID)
|
||||
if (value.language is None and value.stream is None and
|
||||
name not in self.to_extended_content_description and
|
||||
not library_only):
|
||||
self.to_extended_content_description[name] = value
|
||||
elif (value.language is None and value.stream is not None and
|
||||
name not in self.to_metadata and not library_only):
|
||||
self.to_metadata[name] = value
|
||||
else:
|
||||
self.to_metadata_library.append((name, value))
|
||||
|
||||
# Add missing objects
|
||||
if not self.content_description_obj:
|
||||
self.content_description_obj = \
|
||||
ContentDescriptionObject()
|
||||
self.objects.append(self.content_description_obj)
|
||||
if not self.extended_content_description_obj:
|
||||
self.extended_content_description_obj = \
|
||||
ExtendedContentDescriptionObject()
|
||||
self.objects.append(self.extended_content_description_obj)
|
||||
if not self.header_extension_obj:
|
||||
self.header_extension_obj = \
|
||||
HeaderExtensionObject()
|
||||
self.objects.append(self.header_extension_obj)
|
||||
if not self.metadata_obj:
|
||||
self.metadata_obj = \
|
||||
MetadataObject()
|
||||
self.header_extension_obj.objects.append(self.metadata_obj)
|
||||
if not self.metadata_library_obj:
|
||||
self.metadata_library_obj = \
|
||||
MetadataLibraryObject()
|
||||
self.header_extension_obj.objects.append(self.metadata_library_obj)
|
||||
|
||||
# Render the header
|
||||
data = b"".join([obj.render(self) for obj in self.objects])
|
||||
data = (HeaderObject.GUID +
|
||||
struct.pack("<QL", len(data) + 30, len(self.objects)) +
|
||||
b"\x01\x02" + data)
|
||||
|
||||
fileobj = open(self.filename, "rb+")
|
||||
try:
|
||||
size = len(data)
|
||||
if size > self.size:
|
||||
insert_bytes(fileobj, size - self.size, self.size)
|
||||
if size < self.size:
|
||||
delete_bytes(fileobj, self.size - size, 0)
|
||||
fileobj.seek(0)
|
||||
fileobj.write(data)
|
||||
finally:
|
||||
fileobj.close()
|
||||
|
||||
self.size = size
|
||||
self.num_objects = len(self.objects)
|
||||
|
||||
def __read_file(self, fileobj):
|
||||
header = fileobj.read(30)
|
||||
if len(header) != 30 or header[:16] != HeaderObject.GUID:
|
||||
raise ASFHeaderError("Not an ASF file.")
|
||||
|
||||
self.extended_content_description_obj = None
|
||||
self.content_description_obj = None
|
||||
self.header_extension_obj = None
|
||||
self.metadata_obj = None
|
||||
self.metadata_library_obj = None
|
||||
|
||||
self.size, self.num_objects = struct.unpack("<QL", header[16:28])
|
||||
self.objects = []
|
||||
for i in range(self.num_objects):
|
||||
self.__read_object(fileobj)
|
||||
|
||||
def __read_object(self, fileobj):
|
||||
guid, size = struct.unpack("<16sQ", fileobj.read(24))
|
||||
if guid in _object_types:
|
||||
obj = _object_types[guid]()
|
||||
else:
|
||||
obj = UnknownObject(guid)
|
||||
data = fileobj.read(size - 24)
|
||||
obj.parse(self, data, fileobj, size)
|
||||
self.objects.append(obj)
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return header.startswith(HeaderObject.GUID) * 2
|
||||
|
||||
Open = ASF
|
||||
@@ -0,0 +1,509 @@
|
||||
# Simpler (but far more limited) API for ID3 editing
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Easier access to ID3 tags.
|
||||
|
||||
EasyID3 is a wrapper around mutagen.id3.ID3 to make ID3 tags appear
|
||||
more like Vorbis or APEv2 tags.
|
||||
"""
|
||||
|
||||
import mutagen.id3
|
||||
|
||||
from ._compat import iteritems, text_type, PY2
|
||||
from mutagen import Metadata
|
||||
from mutagen._util import DictMixin, dict_match
|
||||
from mutagen.id3 import ID3, error, delete, ID3FileType
|
||||
|
||||
|
||||
__all__ = ['EasyID3', 'Open', 'delete']
|
||||
|
||||
|
||||
class EasyID3KeyError(KeyError, ValueError, error):
|
||||
"""Raised when trying to get/set an invalid key.
|
||||
|
||||
Subclasses both KeyError and ValueError for API compatibility,
|
||||
catching KeyError is preferred.
|
||||
"""
|
||||
|
||||
|
||||
class EasyID3(DictMixin, Metadata):
|
||||
"""A file with an ID3 tag.
|
||||
|
||||
Like Vorbis comments, EasyID3 keys are case-insensitive ASCII
|
||||
strings. Only a subset of ID3 frames are supported by default. Use
|
||||
EasyID3.RegisterKey and its wrappers to support more.
|
||||
|
||||
You can also set the GetFallback, SetFallback, and DeleteFallback
|
||||
to generic key getter/setter/deleter functions, which are called
|
||||
if no specific handler is registered for a key. Additionally,
|
||||
ListFallback can be used to supply an arbitrary list of extra
|
||||
keys. These can be set on EasyID3 or on individual instances after
|
||||
creation.
|
||||
|
||||
To use an EasyID3 class with mutagen.mp3.MP3::
|
||||
|
||||
from mutagen.mp3 import EasyMP3 as MP3
|
||||
MP3(filename)
|
||||
|
||||
Because many of the attributes are constructed on the fly, things
|
||||
like the following will not work::
|
||||
|
||||
ezid3["performer"].append("Joe")
|
||||
|
||||
Instead, you must do::
|
||||
|
||||
values = ezid3["performer"]
|
||||
values.append("Joe")
|
||||
ezid3["performer"] = values
|
||||
|
||||
"""
|
||||
|
||||
Set = {}
|
||||
Get = {}
|
||||
Delete = {}
|
||||
List = {}
|
||||
|
||||
# For compatibility.
|
||||
valid_keys = Get
|
||||
|
||||
GetFallback = None
|
||||
SetFallback = None
|
||||
DeleteFallback = None
|
||||
ListFallback = None
|
||||
|
||||
@classmethod
|
||||
def RegisterKey(cls, key,
|
||||
getter=None, setter=None, deleter=None, lister=None):
|
||||
"""Register a new key mapping.
|
||||
|
||||
A key mapping is four functions, a getter, setter, deleter,
|
||||
and lister. The key may be either a string or a glob pattern.
|
||||
|
||||
The getter, deleted, and lister receive an ID3 instance and
|
||||
the requested key name. The setter also receives the desired
|
||||
value, which will be a list of strings.
|
||||
|
||||
The getter, setter, and deleter are used to implement __getitem__,
|
||||
__setitem__, and __delitem__.
|
||||
|
||||
The lister is used to implement keys(). It should return a
|
||||
list of keys that are actually in the ID3 instance, provided
|
||||
by its associated getter.
|
||||
"""
|
||||
key = key.lower()
|
||||
if getter is not None:
|
||||
cls.Get[key] = getter
|
||||
if setter is not None:
|
||||
cls.Set[key] = setter
|
||||
if deleter is not None:
|
||||
cls.Delete[key] = deleter
|
||||
if lister is not None:
|
||||
cls.List[key] = lister
|
||||
|
||||
@classmethod
|
||||
def RegisterTextKey(cls, key, frameid):
|
||||
"""Register a text key.
|
||||
|
||||
If the key you need to register is a simple one-to-one mapping
|
||||
of ID3 frame name to EasyID3 key, then you can use this
|
||||
function::
|
||||
|
||||
EasyID3.RegisterTextKey("title", "TIT2")
|
||||
"""
|
||||
def getter(id3, key):
|
||||
return list(id3[frameid])
|
||||
|
||||
def setter(id3, key, value):
|
||||
try:
|
||||
frame = id3[frameid]
|
||||
except KeyError:
|
||||
id3.add(mutagen.id3.Frames[frameid](encoding=3, text=value))
|
||||
else:
|
||||
frame.encoding = 3
|
||||
frame.text = value
|
||||
|
||||
def deleter(id3, key):
|
||||
del(id3[frameid])
|
||||
|
||||
cls.RegisterKey(key, getter, setter, deleter)
|
||||
|
||||
@classmethod
|
||||
def RegisterTXXXKey(cls, key, desc):
|
||||
"""Register a user-defined text frame key.
|
||||
|
||||
Some ID3 tags are stored in TXXX frames, which allow a
|
||||
freeform 'description' which acts as a subkey,
|
||||
e.g. TXXX:BARCODE.::
|
||||
|
||||
EasyID3.RegisterTXXXKey('barcode', 'BARCODE').
|
||||
"""
|
||||
frameid = "TXXX:" + desc
|
||||
|
||||
def getter(id3, key):
|
||||
return list(id3[frameid])
|
||||
|
||||
def setter(id3, key, value):
|
||||
try:
|
||||
frame = id3[frameid]
|
||||
except KeyError:
|
||||
enc = 0
|
||||
# Store 8859-1 if we can, per MusicBrainz spec.
|
||||
for v in value:
|
||||
if v and max(v) > u'\x7f':
|
||||
enc = 3
|
||||
id3.add(mutagen.id3.TXXX(encoding=enc, text=value, desc=desc))
|
||||
else:
|
||||
frame.text = value
|
||||
|
||||
def deleter(id3, key):
|
||||
del(id3[frameid])
|
||||
|
||||
cls.RegisterKey(key, getter, setter, deleter)
|
||||
|
||||
def __init__(self, filename=None):
|
||||
self.__id3 = ID3()
|
||||
if filename is not None:
|
||||
self.load(filename)
|
||||
|
||||
load = property(lambda s: s.__id3.load,
|
||||
lambda s, v: setattr(s.__id3, 'load', v))
|
||||
|
||||
save = property(lambda s: s.__id3.save,
|
||||
lambda s, v: setattr(s.__id3, 'save', v))
|
||||
|
||||
delete = property(lambda s: s.__id3.delete,
|
||||
lambda s, v: setattr(s.__id3, 'delete', v))
|
||||
|
||||
filename = property(lambda s: s.__id3.filename,
|
||||
lambda s, fn: setattr(s.__id3, 'filename', fn))
|
||||
|
||||
size = property(lambda s: s.__id3.size,
|
||||
lambda s, fn: setattr(s.__id3, 'size', s))
|
||||
|
||||
def __getitem__(self, key):
|
||||
key = key.lower()
|
||||
func = dict_match(self.Get, key, self.GetFallback)
|
||||
if func is not None:
|
||||
return func(self.__id3, key)
|
||||
else:
|
||||
raise EasyID3KeyError("%r is not a valid key" % key)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
key = key.lower()
|
||||
if PY2:
|
||||
if isinstance(value, basestring):
|
||||
value = [value]
|
||||
else:
|
||||
if isinstance(value, text_type):
|
||||
value = [value]
|
||||
func = dict_match(self.Set, key, self.SetFallback)
|
||||
if func is not None:
|
||||
return func(self.__id3, key, value)
|
||||
else:
|
||||
raise EasyID3KeyError("%r is not a valid key" % key)
|
||||
|
||||
def __delitem__(self, key):
|
||||
key = key.lower()
|
||||
func = dict_match(self.Delete, key, self.DeleteFallback)
|
||||
if func is not None:
|
||||
return func(self.__id3, key)
|
||||
else:
|
||||
raise EasyID3KeyError("%r is not a valid key" % key)
|
||||
|
||||
def keys(self):
|
||||
keys = []
|
||||
for key in self.Get.keys():
|
||||
if key in self.List:
|
||||
keys.extend(self.List[key](self.__id3, key))
|
||||
elif key in self:
|
||||
keys.append(key)
|
||||
if self.ListFallback is not None:
|
||||
keys.extend(self.ListFallback(self.__id3, ""))
|
||||
return keys
|
||||
|
||||
def pprint(self):
|
||||
"""Print tag key=value pairs."""
|
||||
strings = []
|
||||
for key in sorted(self.keys()):
|
||||
values = self[key]
|
||||
for value in values:
|
||||
strings.append("%s=%s" % (key, value))
|
||||
return "\n".join(strings)
|
||||
|
||||
|
||||
Open = EasyID3
|
||||
|
||||
|
||||
def genre_get(id3, key):
|
||||
return id3["TCON"].genres
|
||||
|
||||
|
||||
def genre_set(id3, key, value):
|
||||
try:
|
||||
frame = id3["TCON"]
|
||||
except KeyError:
|
||||
id3.add(mutagen.id3.TCON(encoding=3, text=value))
|
||||
else:
|
||||
frame.encoding = 3
|
||||
frame.genres = value
|
||||
|
||||
|
||||
def genre_delete(id3, key):
|
||||
del(id3["TCON"])
|
||||
|
||||
|
||||
def date_get(id3, key):
|
||||
return [stamp.text for stamp in id3["TDRC"].text]
|
||||
|
||||
|
||||
def date_set(id3, key, value):
|
||||
id3.add(mutagen.id3.TDRC(encoding=3, text=value))
|
||||
|
||||
|
||||
def date_delete(id3, key):
|
||||
del(id3["TDRC"])
|
||||
|
||||
|
||||
def performer_get(id3, key):
|
||||
people = []
|
||||
wanted_role = key.split(":", 1)[1]
|
||||
try:
|
||||
mcl = id3["TMCL"]
|
||||
except KeyError:
|
||||
raise KeyError(key)
|
||||
for role, person in mcl.people:
|
||||
if role == wanted_role:
|
||||
people.append(person)
|
||||
if people:
|
||||
return people
|
||||
else:
|
||||
raise KeyError(key)
|
||||
|
||||
|
||||
def performer_set(id3, key, value):
|
||||
wanted_role = key.split(":", 1)[1]
|
||||
try:
|
||||
mcl = id3["TMCL"]
|
||||
except KeyError:
|
||||
mcl = mutagen.id3.TMCL(encoding=3, people=[])
|
||||
id3.add(mcl)
|
||||
mcl.encoding = 3
|
||||
people = [p for p in mcl.people if p[0] != wanted_role]
|
||||
for v in value:
|
||||
people.append((wanted_role, v))
|
||||
mcl.people = people
|
||||
|
||||
|
||||
def performer_delete(id3, key):
|
||||
wanted_role = key.split(":", 1)[1]
|
||||
try:
|
||||
mcl = id3["TMCL"]
|
||||
except KeyError:
|
||||
raise KeyError(key)
|
||||
people = [p for p in mcl.people if p[0] != wanted_role]
|
||||
if people == mcl.people:
|
||||
raise KeyError(key)
|
||||
elif people:
|
||||
mcl.people = people
|
||||
else:
|
||||
del(id3["TMCL"])
|
||||
|
||||
|
||||
def performer_list(id3, key):
|
||||
try:
|
||||
mcl = id3["TMCL"]
|
||||
except KeyError:
|
||||
return []
|
||||
else:
|
||||
return list(set("performer:" + p[0] for p in mcl.people))
|
||||
|
||||
|
||||
def musicbrainz_trackid_get(id3, key):
|
||||
return [id3["UFID:http://musicbrainz.org"].data.decode('ascii')]
|
||||
|
||||
|
||||
def musicbrainz_trackid_set(id3, key, value):
|
||||
if len(value) != 1:
|
||||
raise ValueError("only one track ID may be set per song")
|
||||
value = value[0].encode('ascii')
|
||||
try:
|
||||
frame = id3["UFID:http://musicbrainz.org"]
|
||||
except KeyError:
|
||||
frame = mutagen.id3.UFID(owner="http://musicbrainz.org", data=value)
|
||||
id3.add(frame)
|
||||
else:
|
||||
frame.data = value
|
||||
|
||||
|
||||
def musicbrainz_trackid_delete(id3, key):
|
||||
del(id3["UFID:http://musicbrainz.org"])
|
||||
|
||||
|
||||
def website_get(id3, key):
|
||||
urls = [frame.url for frame in id3.getall("WOAR")]
|
||||
if urls:
|
||||
return urls
|
||||
else:
|
||||
raise EasyID3KeyError(key)
|
||||
|
||||
|
||||
def website_set(id3, key, value):
|
||||
id3.delall("WOAR")
|
||||
for v in value:
|
||||
id3.add(mutagen.id3.WOAR(url=v))
|
||||
|
||||
|
||||
def website_delete(id3, key):
|
||||
id3.delall("WOAR")
|
||||
|
||||
|
||||
def gain_get(id3, key):
|
||||
try:
|
||||
frame = id3["RVA2:" + key[11:-5]]
|
||||
except KeyError:
|
||||
raise EasyID3KeyError(key)
|
||||
else:
|
||||
return [u"%+f dB" % frame.gain]
|
||||
|
||||
|
||||
def gain_set(id3, key, value):
|
||||
if len(value) != 1:
|
||||
raise ValueError(
|
||||
"there must be exactly one gain value, not %r.", value)
|
||||
gain = float(value[0].split()[0])
|
||||
try:
|
||||
frame = id3["RVA2:" + key[11:-5]]
|
||||
except KeyError:
|
||||
frame = mutagen.id3.RVA2(desc=key[11:-5], gain=0, peak=0, channel=1)
|
||||
id3.add(frame)
|
||||
frame.gain = gain
|
||||
|
||||
|
||||
def gain_delete(id3, key):
|
||||
try:
|
||||
frame = id3["RVA2:" + key[11:-5]]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if frame.peak:
|
||||
frame.gain = 0.0
|
||||
else:
|
||||
del(id3["RVA2:" + key[11:-5]])
|
||||
|
||||
|
||||
def peak_get(id3, key):
|
||||
try:
|
||||
frame = id3["RVA2:" + key[11:-5]]
|
||||
except KeyError:
|
||||
raise EasyID3KeyError(key)
|
||||
else:
|
||||
return [u"%f" % frame.peak]
|
||||
|
||||
|
||||
def peak_set(id3, key, value):
|
||||
if len(value) != 1:
|
||||
raise ValueError(
|
||||
"there must be exactly one peak value, not %r.", value)
|
||||
peak = float(value[0])
|
||||
if peak >= 2 or peak < 0:
|
||||
raise ValueError("peak must be => 0 and < 2.")
|
||||
try:
|
||||
frame = id3["RVA2:" + key[11:-5]]
|
||||
except KeyError:
|
||||
frame = mutagen.id3.RVA2(desc=key[11:-5], gain=0, peak=0, channel=1)
|
||||
id3.add(frame)
|
||||
frame.peak = peak
|
||||
|
||||
|
||||
def peak_delete(id3, key):
|
||||
try:
|
||||
frame = id3["RVA2:" + key[11:-5]]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if frame.gain:
|
||||
frame.peak = 0.0
|
||||
else:
|
||||
del(id3["RVA2:" + key[11:-5]])
|
||||
|
||||
|
||||
def peakgain_list(id3, key):
|
||||
keys = []
|
||||
for frame in id3.getall("RVA2"):
|
||||
keys.append("replaygain_%s_gain" % frame.desc)
|
||||
keys.append("replaygain_%s_peak" % frame.desc)
|
||||
return keys
|
||||
|
||||
for frameid, key in iteritems({
|
||||
"TALB": "album",
|
||||
"TBPM": "bpm",
|
||||
"TCMP": "compilation", # iTunes extension
|
||||
"TCOM": "composer",
|
||||
"TCOP": "copyright",
|
||||
"TENC": "encodedby",
|
||||
"TEXT": "lyricist",
|
||||
"TLEN": "length",
|
||||
"TMED": "media",
|
||||
"TMOO": "mood",
|
||||
"TIT2": "title",
|
||||
"TIT3": "version",
|
||||
"TPE1": "artist",
|
||||
"TPE2": "performer",
|
||||
"TPE3": "conductor",
|
||||
"TPE4": "arranger",
|
||||
"TPOS": "discnumber",
|
||||
"TPUB": "organization",
|
||||
"TRCK": "tracknumber",
|
||||
"TOLY": "author",
|
||||
"TSO2": "albumartistsort", # iTunes extension
|
||||
"TSOA": "albumsort",
|
||||
"TSOC": "composersort", # iTunes extension
|
||||
"TSOP": "artistsort",
|
||||
"TSOT": "titlesort",
|
||||
"TSRC": "isrc",
|
||||
"TSST": "discsubtitle",
|
||||
}):
|
||||
EasyID3.RegisterTextKey(key, frameid)
|
||||
|
||||
EasyID3.RegisterKey("genre", genre_get, genre_set, genre_delete)
|
||||
EasyID3.RegisterKey("date", date_get, date_set, date_delete)
|
||||
EasyID3.RegisterKey(
|
||||
"performer:*", performer_get, performer_set, performer_delete,
|
||||
performer_list)
|
||||
EasyID3.RegisterKey("musicbrainz_trackid", musicbrainz_trackid_get,
|
||||
musicbrainz_trackid_set, musicbrainz_trackid_delete)
|
||||
EasyID3.RegisterKey("website", website_get, website_set, website_delete)
|
||||
EasyID3.RegisterKey("website", website_get, website_set, website_delete)
|
||||
EasyID3.RegisterKey(
|
||||
"replaygain_*_gain", gain_get, gain_set, gain_delete, peakgain_list)
|
||||
EasyID3.RegisterKey("replaygain_*_peak", peak_get, peak_set, peak_delete)
|
||||
|
||||
# At various times, information for this came from
|
||||
# http://musicbrainz.org/docs/specs/metadata_tags.html
|
||||
# http://bugs.musicbrainz.org/ticket/1383
|
||||
# http://musicbrainz.org/doc/MusicBrainzTag
|
||||
for desc, key in iteritems({
|
||||
u"MusicBrainz Artist Id": "musicbrainz_artistid",
|
||||
u"MusicBrainz Album Id": "musicbrainz_albumid",
|
||||
u"MusicBrainz Album Artist Id": "musicbrainz_albumartistid",
|
||||
u"MusicBrainz TRM Id": "musicbrainz_trmid",
|
||||
u"MusicIP PUID": "musicip_puid",
|
||||
u"MusicMagic Fingerprint": "musicip_fingerprint",
|
||||
u"MusicBrainz Album Status": "musicbrainz_albumstatus",
|
||||
u"MusicBrainz Album Type": "musicbrainz_albumtype",
|
||||
u"MusicBrainz Album Release Country": "releasecountry",
|
||||
u"MusicBrainz Disc Id": "musicbrainz_discid",
|
||||
u"ASIN": "asin",
|
||||
u"ALBUMARTISTSORT": "albumartistsort",
|
||||
u"BARCODE": "barcode",
|
||||
}):
|
||||
EasyID3.RegisterTXXXKey(key, desc)
|
||||
|
||||
|
||||
class EasyID3FileType(ID3FileType):
|
||||
"""Like ID3FileType, but uses EasyID3 for tags."""
|
||||
ID3 = EasyID3
|
||||
@@ -0,0 +1,274 @@
|
||||
# Copyright 2009 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from mutagen import Metadata
|
||||
from mutagen._util import DictMixin, dict_match, utf8
|
||||
from mutagen.mp4 import MP4, MP4Tags, error, delete
|
||||
from ._compat import PY2, text_type
|
||||
|
||||
|
||||
__all__ = ["EasyMP4Tags", "EasyMP4", "delete", "error"]
|
||||
|
||||
|
||||
class EasyMP4KeyError(error, KeyError, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class EasyMP4Tags(DictMixin, Metadata):
|
||||
"""A file with MPEG-4 iTunes metadata.
|
||||
|
||||
Like Vorbis comments, EasyMP4Tags keys are case-insensitive ASCII
|
||||
strings, and values are a list of Unicode strings (and these lists
|
||||
are always of length 0 or 1).
|
||||
|
||||
If you need access to the full MP4 metadata feature set, you should use
|
||||
MP4, not EasyMP4.
|
||||
"""
|
||||
|
||||
Set = {}
|
||||
Get = {}
|
||||
Delete = {}
|
||||
List = {}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.__mp4 = MP4Tags(*args, **kwargs)
|
||||
self.load = self.__mp4.load
|
||||
self.save = self.__mp4.save
|
||||
self.delete = self.__mp4.delete
|
||||
|
||||
filename = property(lambda s: s.__mp4.filename,
|
||||
lambda s, fn: setattr(s.__mp4, 'filename', fn))
|
||||
|
||||
@classmethod
|
||||
def RegisterKey(cls, key,
|
||||
getter=None, setter=None, deleter=None, lister=None):
|
||||
"""Register a new key mapping.
|
||||
|
||||
A key mapping is four functions, a getter, setter, deleter,
|
||||
and lister. The key may be either a string or a glob pattern.
|
||||
|
||||
The getter, deleted, and lister receive an MP4Tags instance
|
||||
and the requested key name. The setter also receives the
|
||||
desired value, which will be a list of strings.
|
||||
|
||||
The getter, setter, and deleter are used to implement __getitem__,
|
||||
__setitem__, and __delitem__.
|
||||
|
||||
The lister is used to implement keys(). It should return a
|
||||
list of keys that are actually in the MP4 instance, provided
|
||||
by its associated getter.
|
||||
"""
|
||||
key = key.lower()
|
||||
if getter is not None:
|
||||
cls.Get[key] = getter
|
||||
if setter is not None:
|
||||
cls.Set[key] = setter
|
||||
if deleter is not None:
|
||||
cls.Delete[key] = deleter
|
||||
if lister is not None:
|
||||
cls.List[key] = lister
|
||||
|
||||
@classmethod
|
||||
def RegisterTextKey(cls, key, atomid):
|
||||
"""Register a text key.
|
||||
|
||||
If the key you need to register is a simple one-to-one mapping
|
||||
of MP4 atom name to EasyMP4Tags key, then you can use this
|
||||
function::
|
||||
|
||||
EasyMP4Tags.RegisterTextKey("artist", "\xa9ART")
|
||||
"""
|
||||
def getter(tags, key):
|
||||
return tags[atomid]
|
||||
|
||||
def setter(tags, key, value):
|
||||
tags[atomid] = value
|
||||
|
||||
def deleter(tags, key):
|
||||
del(tags[atomid])
|
||||
|
||||
cls.RegisterKey(key, getter, setter, deleter)
|
||||
|
||||
@classmethod
|
||||
def RegisterIntKey(cls, key, atomid, min_value=0, max_value=2**16-1):
|
||||
"""Register a scalar integer key.
|
||||
"""
|
||||
|
||||
def getter(tags, key):
|
||||
return list(map(text_type, tags[atomid]))
|
||||
|
||||
def setter(tags, key, value):
|
||||
clamp = lambda x: int(min(max(min_value, x), max_value))
|
||||
tags[atomid] = list(map(clamp, map(int, value)))
|
||||
|
||||
def deleter(tags, key):
|
||||
del(tags[atomid])
|
||||
|
||||
cls.RegisterKey(key, getter, setter, deleter)
|
||||
|
||||
@classmethod
|
||||
def RegisterIntPairKey(cls, key, atomid, min_value=0, max_value=2**16-1):
|
||||
def getter(tags, key):
|
||||
ret = []
|
||||
for (track, total) in tags[atomid]:
|
||||
if total:
|
||||
ret.append(u"%d/%d" % (track, total))
|
||||
else:
|
||||
ret.append(text_type(track))
|
||||
return ret
|
||||
|
||||
def setter(tags, key, value):
|
||||
clamp = lambda x: int(min(max(min_value, x), max_value))
|
||||
data = []
|
||||
for v in value:
|
||||
try:
|
||||
tracks, total = v.split("/")
|
||||
tracks = clamp(int(tracks))
|
||||
total = clamp(int(total))
|
||||
except (ValueError, TypeError):
|
||||
tracks = clamp(int(v))
|
||||
total = min_value
|
||||
data.append((tracks, total))
|
||||
tags[atomid] = data
|
||||
|
||||
def deleter(tags, key):
|
||||
del(tags[atomid])
|
||||
|
||||
cls.RegisterKey(key, getter, setter, deleter)
|
||||
|
||||
@classmethod
|
||||
def RegisterFreeformKey(cls, key, name, mean=b"com.apple.iTunes"):
|
||||
"""Register a text key.
|
||||
|
||||
If the key you need to register is a simple one-to-one mapping
|
||||
of MP4 freeform atom (----) and name to EasyMP4Tags key, then
|
||||
you can use this function::
|
||||
|
||||
EasyMP4Tags.RegisterFreeformKey(
|
||||
"musicbrainz_artistid", "MusicBrainz Artist Id")
|
||||
"""
|
||||
atomid = b"----:" + mean + b":" + name
|
||||
|
||||
def getter(tags, key):
|
||||
return [s.decode("utf-8", "replace") for s in tags[atomid]]
|
||||
|
||||
def setter(tags, key, value):
|
||||
tags[atomid] = [utf8(v) for v in value]
|
||||
|
||||
def deleter(tags, key):
|
||||
del(tags[atomid])
|
||||
|
||||
cls.RegisterKey(key, getter, setter, deleter)
|
||||
|
||||
def __getitem__(self, key):
|
||||
key = key.lower()
|
||||
func = dict_match(self.Get, key)
|
||||
if func is not None:
|
||||
return func(self.__mp4, key)
|
||||
else:
|
||||
raise EasyMP4KeyError("%r is not a valid key" % key)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
key = key.lower()
|
||||
|
||||
if PY2:
|
||||
if isinstance(value, basestring):
|
||||
value = [value]
|
||||
else:
|
||||
if isinstance(value, text_type):
|
||||
value = [value]
|
||||
|
||||
func = dict_match(self.Set, key)
|
||||
if func is not None:
|
||||
return func(self.__mp4, key, value)
|
||||
else:
|
||||
raise EasyMP4KeyError("%r is not a valid key" % key)
|
||||
|
||||
def __delitem__(self, key):
|
||||
key = key.lower()
|
||||
func = dict_match(self.Delete, key)
|
||||
if func is not None:
|
||||
return func(self.__mp4, key)
|
||||
else:
|
||||
raise EasyMP4KeyError("%r is not a valid key" % key)
|
||||
|
||||
def keys(self):
|
||||
keys = []
|
||||
for key in self.Get.keys():
|
||||
if key in self.List:
|
||||
keys.extend(self.List[key](self.__mp4, key))
|
||||
elif key in self:
|
||||
keys.append(key)
|
||||
return keys
|
||||
|
||||
def pprint(self):
|
||||
"""Print tag key=value pairs."""
|
||||
strings = []
|
||||
for key in sorted(self.keys()):
|
||||
values = self[key]
|
||||
for value in values:
|
||||
strings.append("%s=%s" % (key, value))
|
||||
return "\n".join(strings)
|
||||
|
||||
for atomid, key in {
|
||||
b'\xa9nam': 'title',
|
||||
b'\xa9alb': 'album',
|
||||
b'\xa9ART': 'artist',
|
||||
b'aART': 'albumartist',
|
||||
b'\xa9day': 'date',
|
||||
b'\xa9cmt': 'comment',
|
||||
b'desc': 'description',
|
||||
b'\xa9grp': 'grouping',
|
||||
b'\xa9gen': 'genre',
|
||||
b'cprt': 'copyright',
|
||||
b'soal': 'albumsort',
|
||||
b'soaa': 'albumartistsort',
|
||||
b'soar': 'artistsort',
|
||||
b'sonm': 'titlesort',
|
||||
b'soco': 'composersort',
|
||||
}.items():
|
||||
EasyMP4Tags.RegisterTextKey(key, atomid)
|
||||
|
||||
for name, key in {
|
||||
b'MusicBrainz Artist Id': 'musicbrainz_artistid',
|
||||
b'MusicBrainz Track Id': 'musicbrainz_trackid',
|
||||
b'MusicBrainz Album Id': 'musicbrainz_albumid',
|
||||
b'MusicBrainz Album Artist Id': 'musicbrainz_albumartistid',
|
||||
b'MusicIP PUID': 'musicip_puid',
|
||||
b'MusicBrainz Album Status': 'musicbrainz_albumstatus',
|
||||
b'MusicBrainz Album Type': 'musicbrainz_albumtype',
|
||||
b'MusicBrainz Release Country': 'releasecountry',
|
||||
}.items():
|
||||
EasyMP4Tags.RegisterFreeformKey(key, name)
|
||||
|
||||
for name, key in {
|
||||
b"tmpo": "bpm",
|
||||
}.items():
|
||||
EasyMP4Tags.RegisterIntKey(key, name)
|
||||
|
||||
for name, key in {
|
||||
b"trkn": "tracknumber",
|
||||
b"disk": "discnumber",
|
||||
}.items():
|
||||
EasyMP4Tags.RegisterIntPairKey(key, name)
|
||||
|
||||
|
||||
class EasyMP4(MP4):
|
||||
"""Like :class:`MP4 <mutagen.mp4.MP4>`,
|
||||
but uses :class:`EasyMP4Tags` for tags.
|
||||
|
||||
:ivar info: :class:`MP4Info <mutagen.mp4.MP4Info>`
|
||||
:ivar tags: :class:`EasyMP4Tags`
|
||||
"""
|
||||
|
||||
MP4Tags = EasyMP4Tags
|
||||
|
||||
Get = EasyMP4Tags.Get
|
||||
Set = EasyMP4Tags.Set
|
||||
Delete = EasyMP4Tags.Delete
|
||||
List = EasyMP4Tags.List
|
||||
RegisterTextKey = EasyMP4Tags.RegisterTextKey
|
||||
RegisterKey = EasyMP4Tags.RegisterKey
|
||||
@@ -0,0 +1,839 @@
|
||||
# FLAC comment support for Mutagen
|
||||
# Copyright 2005 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Read and write FLAC Vorbis comments and stream information.
|
||||
|
||||
Read more about FLAC at http://flac.sourceforge.net.
|
||||
|
||||
FLAC supports arbitrary metadata blocks. The two most interesting ones
|
||||
are the FLAC stream information block, and the Vorbis comment block;
|
||||
these are also the only ones Mutagen can currently read.
|
||||
|
||||
This module does not handle Ogg FLAC files.
|
||||
|
||||
Based off documentation available at
|
||||
http://flac.sourceforge.net/format.html
|
||||
"""
|
||||
|
||||
__all__ = ["FLAC", "Open", "delete"]
|
||||
|
||||
import struct
|
||||
from ._vorbis import VCommentDict
|
||||
import mutagen
|
||||
|
||||
from ._compat import cBytesIO, endswith, chr_
|
||||
from mutagen._util import insert_bytes
|
||||
from mutagen.id3 import BitPaddedInt
|
||||
import sys
|
||||
if sys.version_info >= (2, 6):
|
||||
from functools import reduce
|
||||
|
||||
|
||||
class error(IOError):
|
||||
pass
|
||||
|
||||
|
||||
class FLACNoHeaderError(error):
|
||||
pass
|
||||
|
||||
|
||||
class FLACVorbisError(ValueError, error):
|
||||
pass
|
||||
|
||||
|
||||
def to_int_be(string):
|
||||
"""Convert an arbitrarily-long string to a long using big-endian
|
||||
byte order."""
|
||||
return reduce(lambda a, b: (a << 8) + b, bytearray(string), 0)
|
||||
|
||||
|
||||
class StrictFileObject(object):
|
||||
"""Wraps a file-like object and raises an exception if the requested
|
||||
amount of data to read isn't returned."""
|
||||
|
||||
def __init__(self, fileobj):
|
||||
self._fileobj = fileobj
|
||||
for m in ["close", "tell", "seek", "write", "name"]:
|
||||
if hasattr(fileobj, m):
|
||||
setattr(self, m, getattr(fileobj, m))
|
||||
|
||||
def read(self, size=-1):
|
||||
data = self._fileobj.read(size)
|
||||
if size >= 0 and len(data) != size:
|
||||
raise error("file said %d bytes, read %d bytes" % (
|
||||
size, len(data)))
|
||||
return data
|
||||
|
||||
def tryread(self, *args):
|
||||
return self._fileobj.read(*args)
|
||||
|
||||
|
||||
class MetadataBlock(object):
|
||||
"""A generic block of FLAC metadata.
|
||||
|
||||
This class is extended by specific used as an ancestor for more specific
|
||||
blocks, and also as a container for data blobs of unknown blocks.
|
||||
|
||||
Attributes:
|
||||
|
||||
* data -- raw binary data for this block
|
||||
"""
|
||||
|
||||
_distrust_size = False
|
||||
|
||||
def __init__(self, data):
|
||||
"""Parse the given data string or file-like as a metadata block.
|
||||
The metadata header should not be included."""
|
||||
if data is not None:
|
||||
if not isinstance(data, StrictFileObject):
|
||||
if isinstance(data, bytes):
|
||||
data = cBytesIO(data)
|
||||
elif not hasattr(data, 'read'):
|
||||
raise TypeError(
|
||||
"StreamInfo requires string data or a file-like")
|
||||
data = StrictFileObject(data)
|
||||
self.load(data)
|
||||
|
||||
def load(self, data):
|
||||
self.data = data.read()
|
||||
|
||||
def write(self):
|
||||
return self.data
|
||||
|
||||
@staticmethod
|
||||
def writeblocks(blocks):
|
||||
"""Render metadata block as a byte string."""
|
||||
data = []
|
||||
codes = [[block.code, block.write()] for block in blocks]
|
||||
codes[-1][0] |= 128
|
||||
for code, datum in codes:
|
||||
byte = chr_(code)
|
||||
if len(datum) > 2**24:
|
||||
raise error("block is too long to write")
|
||||
length = struct.pack(">I", len(datum))[-3:]
|
||||
data.append(byte + length + datum)
|
||||
return b"".join(data)
|
||||
|
||||
@staticmethod
|
||||
def group_padding(blocks):
|
||||
"""Consolidate FLAC padding metadata blocks.
|
||||
|
||||
The overall size of the rendered blocks does not change, so
|
||||
this adds several bytes of padding for each merged block.
|
||||
"""
|
||||
|
||||
paddings = [b for b in blocks if isinstance(b, Padding)]
|
||||
for p in paddings:
|
||||
blocks.remove(p)
|
||||
# total padding size is the sum of padding sizes plus 4 bytes
|
||||
# per removed header.
|
||||
size = sum([padding.length for padding in paddings])
|
||||
padding = Padding()
|
||||
padding.length = size + 4 * (len(paddings) - 1)
|
||||
blocks.append(padding)
|
||||
|
||||
|
||||
class StreamInfo(MetadataBlock, mutagen.StreamInfo):
|
||||
"""FLAC stream information.
|
||||
|
||||
This contains information about the audio data in the FLAC file.
|
||||
Unlike most stream information objects in Mutagen, changes to this
|
||||
one will rewritten to the file when it is saved. Unless you are
|
||||
actually changing the audio stream itself, don't change any
|
||||
attributes of this block.
|
||||
|
||||
Attributes:
|
||||
|
||||
* min_blocksize -- minimum audio block size
|
||||
* max_blocksize -- maximum audio block size
|
||||
* sample_rate -- audio sample rate in Hz
|
||||
* channels -- audio channels (1 for mono, 2 for stereo)
|
||||
* bits_per_sample -- bits per sample
|
||||
* total_samples -- total samples in file
|
||||
* length -- audio length in seconds
|
||||
"""
|
||||
|
||||
code = 0
|
||||
|
||||
def __eq__(self, other):
|
||||
try:
|
||||
return (self.min_blocksize == other.min_blocksize and
|
||||
self.max_blocksize == other.max_blocksize and
|
||||
self.sample_rate == other.sample_rate and
|
||||
self.channels == other.channels and
|
||||
self.bits_per_sample == other.bits_per_sample and
|
||||
self.total_samples == other.total_samples)
|
||||
except:
|
||||
return False
|
||||
|
||||
__hash__ = MetadataBlock.__hash__
|
||||
|
||||
def load(self, data):
|
||||
self.min_blocksize = int(to_int_be(data.read(2)))
|
||||
self.max_blocksize = int(to_int_be(data.read(2)))
|
||||
self.min_framesize = int(to_int_be(data.read(3)))
|
||||
self.max_framesize = int(to_int_be(data.read(3)))
|
||||
# first 16 bits of sample rate
|
||||
sample_first = to_int_be(data.read(2))
|
||||
# last 4 bits of sample rate, 3 of channels, first 1 of bits/sample
|
||||
sample_channels_bps = to_int_be(data.read(1))
|
||||
# last 4 of bits/sample, 36 of total samples
|
||||
bps_total = to_int_be(data.read(5))
|
||||
|
||||
sample_tail = sample_channels_bps >> 4
|
||||
self.sample_rate = int((sample_first << 4) + sample_tail)
|
||||
if not self.sample_rate:
|
||||
raise error("A sample rate value of 0 is invalid")
|
||||
self.channels = int(((sample_channels_bps >> 1) & 7) + 1)
|
||||
bps_tail = bps_total >> 36
|
||||
bps_head = (sample_channels_bps & 1) << 4
|
||||
self.bits_per_sample = int(bps_head + bps_tail + 1)
|
||||
self.total_samples = bps_total & 0xFFFFFFFFF
|
||||
self.length = self.total_samples / float(self.sample_rate)
|
||||
|
||||
self.md5_signature = to_int_be(data.read(16))
|
||||
|
||||
def write(self):
|
||||
f = cBytesIO()
|
||||
f.write(struct.pack(">I", self.min_blocksize)[-2:])
|
||||
f.write(struct.pack(">I", self.max_blocksize)[-2:])
|
||||
f.write(struct.pack(">I", self.min_framesize)[-3:])
|
||||
f.write(struct.pack(">I", self.max_framesize)[-3:])
|
||||
|
||||
# first 16 bits of sample rate
|
||||
f.write(struct.pack(">I", self.sample_rate >> 4)[-2:])
|
||||
# 4 bits sample, 3 channel, 1 bps
|
||||
byte = (self.sample_rate & 0xF) << 4
|
||||
byte += ((self.channels - 1) & 7) << 1
|
||||
byte += ((self.bits_per_sample - 1) >> 4) & 1
|
||||
f.write(chr_(byte))
|
||||
# 4 bits of bps, 4 of sample count
|
||||
byte = ((self.bits_per_sample - 1) & 0xF) << 4
|
||||
byte += (self.total_samples >> 32) & 0xF
|
||||
f.write(chr_(byte))
|
||||
# last 32 of sample count
|
||||
f.write(struct.pack(">I", self.total_samples & 0xFFFFFFFF))
|
||||
# MD5 signature
|
||||
sig = self.md5_signature
|
||||
f.write(struct.pack(
|
||||
">4I", (sig >> 96) & 0xFFFFFFFF, (sig >> 64) & 0xFFFFFFFF,
|
||||
(sig >> 32) & 0xFFFFFFFF, sig & 0xFFFFFFFF))
|
||||
return f.getvalue()
|
||||
|
||||
def pprint(self):
|
||||
return "FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate)
|
||||
|
||||
|
||||
class SeekPoint(tuple):
|
||||
"""A single seek point in a FLAC file.
|
||||
|
||||
Placeholder seek points have first_sample of 0xFFFFFFFFFFFFFFFFL,
|
||||
and byte_offset and num_samples undefined. Seek points must be
|
||||
sorted in ascending order by first_sample number. Seek points must
|
||||
be unique by first_sample number, except for placeholder
|
||||
points. Placeholder points must occur last in the table and there
|
||||
may be any number of them.
|
||||
|
||||
Attributes:
|
||||
|
||||
* first_sample -- sample number of first sample in the target frame
|
||||
* byte_offset -- offset from first frame to target frame
|
||||
* num_samples -- number of samples in target frame
|
||||
"""
|
||||
|
||||
def __new__(cls, first_sample, byte_offset, num_samples):
|
||||
return super(cls, SeekPoint).__new__(
|
||||
cls, (first_sample, byte_offset, num_samples))
|
||||
|
||||
first_sample = property(lambda self: self[0])
|
||||
byte_offset = property(lambda self: self[1])
|
||||
num_samples = property(lambda self: self[2])
|
||||
|
||||
|
||||
class SeekTable(MetadataBlock):
|
||||
"""Read and write FLAC seek tables.
|
||||
|
||||
Attributes:
|
||||
|
||||
* seekpoints -- list of SeekPoint objects
|
||||
"""
|
||||
|
||||
__SEEKPOINT_FORMAT = '>QQH'
|
||||
__SEEKPOINT_SIZE = struct.calcsize(__SEEKPOINT_FORMAT)
|
||||
|
||||
code = 3
|
||||
|
||||
def __init__(self, data):
|
||||
self.seekpoints = []
|
||||
super(SeekTable, self).__init__(data)
|
||||
|
||||
def __eq__(self, other):
|
||||
try:
|
||||
return (self.seekpoints == other.seekpoints)
|
||||
except (AttributeError, TypeError):
|
||||
return False
|
||||
|
||||
__hash__ = MetadataBlock.__hash__
|
||||
|
||||
def load(self, data):
|
||||
self.seekpoints = []
|
||||
sp = data.tryread(self.__SEEKPOINT_SIZE)
|
||||
while len(sp) == self.__SEEKPOINT_SIZE:
|
||||
self.seekpoints.append(SeekPoint(
|
||||
*struct.unpack(self.__SEEKPOINT_FORMAT, sp)))
|
||||
sp = data.tryread(self.__SEEKPOINT_SIZE)
|
||||
|
||||
def write(self):
|
||||
f = cBytesIO()
|
||||
for seekpoint in self.seekpoints:
|
||||
packed = struct.pack(
|
||||
self.__SEEKPOINT_FORMAT,
|
||||
seekpoint.first_sample, seekpoint.byte_offset,
|
||||
seekpoint.num_samples)
|
||||
f.write(packed)
|
||||
return f.getvalue()
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s seekpoints=%r>" % (type(self).__name__, self.seekpoints)
|
||||
|
||||
|
||||
class VCFLACDict(VCommentDict):
|
||||
"""Read and write FLAC Vorbis comments.
|
||||
|
||||
FLACs don't use the framing bit at the end of the comment block.
|
||||
So this extends VCommentDict to not use the framing bit.
|
||||
"""
|
||||
|
||||
code = 4
|
||||
_distrust_size = True
|
||||
|
||||
def load(self, data, errors='replace', framing=False):
|
||||
super(VCFLACDict, self).load(data, errors=errors, framing=framing)
|
||||
|
||||
def write(self, framing=False):
|
||||
return super(VCFLACDict, self).write(framing=framing)
|
||||
|
||||
|
||||
class CueSheetTrackIndex(tuple):
|
||||
"""Index for a track in a cuesheet.
|
||||
|
||||
For CD-DA, an index_number of 0 corresponds to the track
|
||||
pre-gap. The first index in a track must have a number of 0 or 1,
|
||||
and subsequently, index_numbers must increase by 1. Index_numbers
|
||||
must be unique within a track. And index_offset must be evenly
|
||||
divisible by 588 samples.
|
||||
|
||||
Attributes:
|
||||
|
||||
* index_number -- index point number
|
||||
* index_offset -- offset in samples from track start
|
||||
"""
|
||||
|
||||
def __new__(cls, index_number, index_offset):
|
||||
return super(cls, CueSheetTrackIndex).__new__(
|
||||
cls, (index_number, index_offset))
|
||||
|
||||
index_number = property(lambda self: self[0])
|
||||
index_offset = property(lambda self: self[1])
|
||||
|
||||
|
||||
class CueSheetTrack(object):
|
||||
"""A track in a cuesheet.
|
||||
|
||||
For CD-DA, track_numbers must be 1-99, or 170 for the
|
||||
lead-out. Track_numbers must be unique within a cue sheet. There
|
||||
must be atleast one index in every track except the lead-out track
|
||||
which must have none.
|
||||
|
||||
Attributes:
|
||||
|
||||
* track_number -- track number
|
||||
* start_offset -- track offset in samples from start of FLAC stream
|
||||
* isrc -- ISRC code
|
||||
* type -- 0 for audio, 1 for digital data
|
||||
* pre_emphasis -- true if the track is recorded with pre-emphasis
|
||||
* indexes -- list of CueSheetTrackIndex objects
|
||||
"""
|
||||
|
||||
def __init__(self, track_number, start_offset, isrc='', type_=0,
|
||||
pre_emphasis=False):
|
||||
self.track_number = track_number
|
||||
self.start_offset = start_offset
|
||||
self.isrc = isrc
|
||||
self.type = type_
|
||||
self.pre_emphasis = pre_emphasis
|
||||
self.indexes = []
|
||||
|
||||
def __eq__(self, other):
|
||||
try:
|
||||
return (self.track_number == other.track_number and
|
||||
self.start_offset == other.start_offset and
|
||||
self.isrc == other.isrc and
|
||||
self.type == other.type and
|
||||
self.pre_emphasis == other.pre_emphasis and
|
||||
self.indexes == other.indexes)
|
||||
except (AttributeError, TypeError):
|
||||
return False
|
||||
|
||||
__hash__ = object.__hash__
|
||||
|
||||
def __repr__(self):
|
||||
return ("<%s number=%r, offset=%d, isrc=%r, type=%r, "
|
||||
"pre_emphasis=%r, indexes=%r)>") % (
|
||||
type(self).__name__, self.track_number, self.start_offset,
|
||||
self.isrc, self.type, self.pre_emphasis, self.indexes)
|
||||
|
||||
|
||||
class CueSheet(MetadataBlock):
|
||||
"""Read and write FLAC embedded cue sheets.
|
||||
|
||||
Number of tracks should be from 1 to 100. There should always be
|
||||
exactly one lead-out track and that track must be the last track
|
||||
in the cue sheet.
|
||||
|
||||
Attributes:
|
||||
|
||||
* media_catalog_number -- media catalog number in ASCII
|
||||
* lead_in_samples -- number of lead-in samples
|
||||
* compact_disc -- true if the cuesheet corresponds to a compact disc
|
||||
* tracks -- list of CueSheetTrack objects
|
||||
* lead_out -- lead-out as CueSheetTrack or None if lead-out was not found
|
||||
"""
|
||||
|
||||
__CUESHEET_FORMAT = '>128sQB258xB'
|
||||
__CUESHEET_SIZE = struct.calcsize(__CUESHEET_FORMAT)
|
||||
__CUESHEET_TRACK_FORMAT = '>QB12sB13xB'
|
||||
__CUESHEET_TRACK_SIZE = struct.calcsize(__CUESHEET_TRACK_FORMAT)
|
||||
__CUESHEET_TRACKINDEX_FORMAT = '>QB3x'
|
||||
__CUESHEET_TRACKINDEX_SIZE = struct.calcsize(__CUESHEET_TRACKINDEX_FORMAT)
|
||||
|
||||
code = 5
|
||||
|
||||
media_catalog_number = b''
|
||||
lead_in_samples = 88200
|
||||
compact_disc = True
|
||||
|
||||
def __init__(self, data):
|
||||
self.tracks = []
|
||||
super(CueSheet, self).__init__(data)
|
||||
|
||||
def __eq__(self, other):
|
||||
try:
|
||||
return (self.media_catalog_number == other.media_catalog_number and
|
||||
self.lead_in_samples == other.lead_in_samples and
|
||||
self.compact_disc == other.compact_disc and
|
||||
self.tracks == other.tracks)
|
||||
except (AttributeError, TypeError):
|
||||
return False
|
||||
|
||||
__hash__ = MetadataBlock.__hash__
|
||||
|
||||
def load(self, data):
|
||||
header = data.read(self.__CUESHEET_SIZE)
|
||||
media_catalog_number, lead_in_samples, flags, num_tracks = \
|
||||
struct.unpack(self.__CUESHEET_FORMAT, header)
|
||||
self.media_catalog_number = media_catalog_number.rstrip(b'\0')
|
||||
self.lead_in_samples = lead_in_samples
|
||||
self.compact_disc = bool(flags & 0x80)
|
||||
self.tracks = []
|
||||
for i in range(num_tracks):
|
||||
track = data.read(self.__CUESHEET_TRACK_SIZE)
|
||||
start_offset, track_number, isrc_padded, flags, num_indexes = \
|
||||
struct.unpack(self.__CUESHEET_TRACK_FORMAT, track)
|
||||
isrc = isrc_padded.rstrip(b'\0')
|
||||
type_ = (flags & 0x80) >> 7
|
||||
pre_emphasis = bool(flags & 0x40)
|
||||
val = CueSheetTrack(
|
||||
track_number, start_offset, isrc, type_, pre_emphasis)
|
||||
for j in range(num_indexes):
|
||||
index = data.read(self.__CUESHEET_TRACKINDEX_SIZE)
|
||||
index_offset, index_number = struct.unpack(
|
||||
self.__CUESHEET_TRACKINDEX_FORMAT, index)
|
||||
val.indexes.append(
|
||||
CueSheetTrackIndex(index_number, index_offset))
|
||||
self.tracks.append(val)
|
||||
|
||||
def write(self):
|
||||
f = cBytesIO()
|
||||
flags = 0
|
||||
if self.compact_disc:
|
||||
flags |= 0x80
|
||||
packed = struct.pack(
|
||||
self.__CUESHEET_FORMAT, self.media_catalog_number,
|
||||
self.lead_in_samples, flags, len(self.tracks))
|
||||
f.write(packed)
|
||||
for track in self.tracks:
|
||||
track_flags = 0
|
||||
track_flags |= (track.type & 1) << 7
|
||||
if track.pre_emphasis:
|
||||
track_flags |= 0x40
|
||||
track_packed = struct.pack(
|
||||
self.__CUESHEET_TRACK_FORMAT, track.start_offset,
|
||||
track.track_number, track.isrc, track_flags,
|
||||
len(track.indexes))
|
||||
f.write(track_packed)
|
||||
for index in track.indexes:
|
||||
index_packed = struct.pack(
|
||||
self.__CUESHEET_TRACKINDEX_FORMAT,
|
||||
index.index_offset, index.index_number)
|
||||
f.write(index_packed)
|
||||
return f.getvalue()
|
||||
|
||||
def __repr__(self):
|
||||
return ("<%s media_catalog_number=%r, lead_in=%r, compact_disc=%r, "
|
||||
"tracks=%r>") % (
|
||||
type(self).__name__, self.media_catalog_number,
|
||||
self.lead_in_samples, self.compact_disc, self.tracks)
|
||||
|
||||
|
||||
class Picture(MetadataBlock):
|
||||
"""Read and write FLAC embed pictures.
|
||||
|
||||
Attributes:
|
||||
|
||||
* type -- picture type (same as types for ID3 APIC frames)
|
||||
* mime -- MIME type of the picture
|
||||
* desc -- picture's description
|
||||
* width -- width in pixels
|
||||
* height -- height in pixels
|
||||
* depth -- color depth in bits-per-pixel
|
||||
* colors -- number of colors for indexed palettes (like GIF),
|
||||
0 for non-indexed
|
||||
* data -- picture data
|
||||
"""
|
||||
|
||||
code = 6
|
||||
_distrust_size = True
|
||||
|
||||
def __init__(self, data=None):
|
||||
self.type = 0
|
||||
self.mime = u''
|
||||
self.desc = u''
|
||||
self.width = 0
|
||||
self.height = 0
|
||||
self.depth = 0
|
||||
self.colors = 0
|
||||
self.data = b''
|
||||
super(Picture, self).__init__(data)
|
||||
|
||||
def __eq__(self, other):
|
||||
try:
|
||||
return (self.type == other.type and
|
||||
self.mime == other.mime and
|
||||
self.desc == other.desc and
|
||||
self.width == other.width and
|
||||
self.height == other.height and
|
||||
self.depth == other.depth and
|
||||
self.colors == other.colors and
|
||||
self.data == other.data)
|
||||
except (AttributeError, TypeError):
|
||||
return False
|
||||
|
||||
__hash__ = MetadataBlock.__hash__
|
||||
|
||||
def load(self, data):
|
||||
self.type, length = struct.unpack('>2I', data.read(8))
|
||||
self.mime = data.read(length).decode('UTF-8', 'replace')
|
||||
length, = struct.unpack('>I', data.read(4))
|
||||
self.desc = data.read(length).decode('UTF-8', 'replace')
|
||||
(self.width, self.height, self.depth,
|
||||
self.colors, length) = struct.unpack('>5I', data.read(20))
|
||||
self.data = data.read(length)
|
||||
|
||||
def write(self):
|
||||
f = cBytesIO()
|
||||
mime = self.mime.encode('UTF-8')
|
||||
f.write(struct.pack('>2I', self.type, len(mime)))
|
||||
f.write(mime)
|
||||
desc = self.desc.encode('UTF-8')
|
||||
f.write(struct.pack('>I', len(desc)))
|
||||
f.write(desc)
|
||||
f.write(struct.pack('>5I', self.width, self.height, self.depth,
|
||||
self.colors, len(self.data)))
|
||||
f.write(self.data)
|
||||
return f.getvalue()
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s '%s' (%d bytes)>" % (type(self).__name__, self.mime,
|
||||
len(self.data))
|
||||
|
||||
|
||||
class Padding(MetadataBlock):
|
||||
"""Empty padding space for metadata blocks.
|
||||
|
||||
To avoid rewriting the entire FLAC file when editing comments,
|
||||
metadata is often padded. Padding should occur at the end, and no
|
||||
more than one padding block should be in any FLAC file. Mutagen
|
||||
handles this with MetadataBlock.group_padding.
|
||||
"""
|
||||
|
||||
code = 1
|
||||
|
||||
def __init__(self, data=b""):
|
||||
super(Padding, self).__init__(data)
|
||||
|
||||
def load(self, data):
|
||||
self.length = len(data.read())
|
||||
|
||||
def write(self):
|
||||
try:
|
||||
return b"\x00" * self.length
|
||||
# On some 64 bit platforms this won't generate a MemoryError
|
||||
# or OverflowError since you might have enough RAM, but it
|
||||
# still generates a ValueError. On other 64 bit platforms,
|
||||
# this will still succeed for extremely large values.
|
||||
# Those should never happen in the real world, and if they
|
||||
# do, writeblocks will catch it.
|
||||
except (OverflowError, ValueError, MemoryError):
|
||||
raise error("cannot write %d bytes" % self.length)
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Padding) and self.length == other.length
|
||||
|
||||
__hash__ = MetadataBlock.__hash__
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s (%d bytes)>" % (type(self).__name__, self.length)
|
||||
|
||||
|
||||
class FLAC(mutagen.FileType):
|
||||
"""A FLAC audio file.
|
||||
|
||||
Attributes:
|
||||
|
||||
* info -- stream information (length, bitrate, sample rate)
|
||||
* tags -- metadata tags, if any
|
||||
* cuesheet -- CueSheet object, if any
|
||||
* seektable -- SeekTable object, if any
|
||||
* pictures -- list of embedded pictures
|
||||
"""
|
||||
|
||||
_mimes = ["audio/x-flac", "application/x-flac"]
|
||||
|
||||
METADATA_BLOCKS = [StreamInfo, Padding, None, SeekTable, VCFLACDict,
|
||||
CueSheet, Picture]
|
||||
"""Known metadata block types, indexed by ID."""
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return (header.startswith(b"fLaC") +
|
||||
endswith(filename.lower(), ".flac") * 3)
|
||||
|
||||
def __read_metadata_block(self, fileobj):
|
||||
byte = ord(fileobj.read(1))
|
||||
size = to_int_be(fileobj.read(3))
|
||||
code = byte & 0x7F
|
||||
last_block = bool(byte & 0x80)
|
||||
|
||||
try:
|
||||
block_type = self.METADATA_BLOCKS[code] or MetadataBlock
|
||||
except IndexError:
|
||||
block_type = MetadataBlock
|
||||
|
||||
if block_type._distrust_size:
|
||||
# Some jackass is writing broken Metadata block length
|
||||
# for Vorbis comment blocks, and the FLAC reference
|
||||
# implementaton can parse them (mostly by accident),
|
||||
# so we have to too. Instead of parsing the size
|
||||
# given, parse an actual Vorbis comment, leaving
|
||||
# fileobj in the right position.
|
||||
# http://code.google.com/p/mutagen/issues/detail?id=52
|
||||
# ..same for the Picture block:
|
||||
# http://code.google.com/p/mutagen/issues/detail?id=106
|
||||
block = block_type(fileobj)
|
||||
else:
|
||||
data = fileobj.read(size)
|
||||
block = block_type(data)
|
||||
block.code = code
|
||||
|
||||
if block.code == VCFLACDict.code:
|
||||
if self.tags is None:
|
||||
self.tags = block
|
||||
else:
|
||||
raise FLACVorbisError("> 1 Vorbis comment block found")
|
||||
elif block.code == CueSheet.code:
|
||||
if self.cuesheet is None:
|
||||
self.cuesheet = block
|
||||
else:
|
||||
raise error("> 1 CueSheet block found")
|
||||
elif block.code == SeekTable.code:
|
||||
if self.seektable is None:
|
||||
self.seektable = block
|
||||
else:
|
||||
raise error("> 1 SeekTable block found")
|
||||
self.metadata_blocks.append(block)
|
||||
return not last_block
|
||||
|
||||
def add_tags(self):
|
||||
"""Add a Vorbis comment block to the file."""
|
||||
if self.tags is None:
|
||||
self.tags = VCFLACDict()
|
||||
self.metadata_blocks.append(self.tags)
|
||||
else:
|
||||
raise FLACVorbisError("a Vorbis comment already exists")
|
||||
|
||||
add_vorbiscomment = add_tags
|
||||
|
||||
def delete(self, filename=None):
|
||||
"""Remove Vorbis comments from a file.
|
||||
|
||||
If no filename is given, the one most recently loaded is used.
|
||||
"""
|
||||
if filename is None:
|
||||
filename = self.filename
|
||||
for s in list(self.metadata_blocks):
|
||||
if isinstance(s, VCFLACDict):
|
||||
self.metadata_blocks.remove(s)
|
||||
self.tags = None
|
||||
self.save()
|
||||
break
|
||||
|
||||
vc = property(lambda s: s.tags, doc="Alias for tags; don't use this.")
|
||||
|
||||
def load(self, filename):
|
||||
"""Load file information from a filename."""
|
||||
|
||||
self.metadata_blocks = []
|
||||
self.tags = None
|
||||
self.cuesheet = None
|
||||
self.seektable = None
|
||||
self.filename = filename
|
||||
fileobj = StrictFileObject(open(filename, "rb"))
|
||||
try:
|
||||
self.__check_header(fileobj)
|
||||
while self.__read_metadata_block(fileobj):
|
||||
pass
|
||||
finally:
|
||||
fileobj.close()
|
||||
|
||||
try:
|
||||
self.metadata_blocks[0].length
|
||||
except (AttributeError, IndexError):
|
||||
raise FLACNoHeaderError("Stream info block not found")
|
||||
|
||||
@property
|
||||
def info(self):
|
||||
return self.metadata_blocks[0]
|
||||
|
||||
def add_picture(self, picture):
|
||||
"""Add a new picture to the file."""
|
||||
self.metadata_blocks.append(picture)
|
||||
|
||||
def clear_pictures(self):
|
||||
"""Delete all pictures from the file."""
|
||||
|
||||
blocks = [b for b in self.metadata_blocks if b.code != Picture.code]
|
||||
self.metadata_blocks = blocks
|
||||
|
||||
@property
|
||||
def pictures(self):
|
||||
"""List of embedded pictures"""
|
||||
|
||||
return [b for b in self.metadata_blocks if b.code == Picture.code]
|
||||
|
||||
def save(self, filename=None, deleteid3=False):
|
||||
"""Save metadata blocks to a file.
|
||||
|
||||
If no filename is given, the one most recently loaded is used.
|
||||
"""
|
||||
|
||||
if filename is None:
|
||||
filename = self.filename
|
||||
f = open(filename, 'rb+')
|
||||
|
||||
try:
|
||||
# Ensure we've got padding at the end, and only at the end.
|
||||
# If adding makes it too large, we'll scale it down later.
|
||||
self.metadata_blocks.append(Padding(b'\x00' * 1020))
|
||||
MetadataBlock.group_padding(self.metadata_blocks)
|
||||
|
||||
header = self.__check_header(f)
|
||||
# "fLaC" and maybe ID3
|
||||
available = self.__find_audio_offset(f) - header
|
||||
data = MetadataBlock.writeblocks(self.metadata_blocks)
|
||||
|
||||
# Delete ID3v2
|
||||
if deleteid3 and header > 4:
|
||||
available += header - 4
|
||||
header = 4
|
||||
|
||||
if len(data) > available:
|
||||
# If we have too much data, see if we can reduce padding.
|
||||
padding = self.metadata_blocks[-1]
|
||||
newlength = padding.length - (len(data) - available)
|
||||
if newlength > 0:
|
||||
padding.length = newlength
|
||||
data = MetadataBlock.writeblocks(self.metadata_blocks)
|
||||
assert len(data) == available
|
||||
|
||||
elif len(data) < available:
|
||||
# If we have too little data, increase padding.
|
||||
self.metadata_blocks[-1].length += (available - len(data))
|
||||
data = MetadataBlock.writeblocks(self.metadata_blocks)
|
||||
assert len(data) == available
|
||||
|
||||
if len(data) != available:
|
||||
# We couldn't reduce the padding enough.
|
||||
diff = (len(data) - available)
|
||||
insert_bytes(f, diff, header)
|
||||
|
||||
f.seek(header - 4)
|
||||
f.write(b"fLaC" + data)
|
||||
|
||||
# Delete ID3v1
|
||||
if deleteid3:
|
||||
try:
|
||||
f.seek(-128, 2)
|
||||
except IOError:
|
||||
pass
|
||||
else:
|
||||
if f.read(3) == b"TAG":
|
||||
f.seek(-128, 2)
|
||||
f.truncate()
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
def __find_audio_offset(self, fileobj):
|
||||
byte = 0x00
|
||||
while not (byte & 0x80):
|
||||
byte = ord(fileobj.read(1))
|
||||
size = to_int_be(fileobj.read(3))
|
||||
try:
|
||||
block_type = self.METADATA_BLOCKS[byte & 0x7F]
|
||||
except IndexError:
|
||||
block_type = None
|
||||
|
||||
if block_type and block_type._distrust_size:
|
||||
# See comments in read_metadata_block; the size can't
|
||||
# be trusted for Vorbis comment blocks and Picture block
|
||||
block_type(fileobj)
|
||||
else:
|
||||
fileobj.read(size)
|
||||
return fileobj.tell()
|
||||
|
||||
def __check_header(self, fileobj):
|
||||
size = 4
|
||||
header = fileobj.read(4)
|
||||
if header != b"fLaC":
|
||||
size = None
|
||||
if header[:3] == b"ID3":
|
||||
size = 14 + BitPaddedInt(fileobj.read(6)[2:])
|
||||
fileobj.seek(size - 4)
|
||||
if fileobj.read(4) != b"fLaC":
|
||||
size = None
|
||||
if size is None:
|
||||
raise FLACNoHeaderError(
|
||||
"%r is not a valid FLAC file" % fileobj.name)
|
||||
return size
|
||||
|
||||
|
||||
Open = FLAC
|
||||
|
||||
|
||||
def delete(filename):
|
||||
"""Remove tags from a file."""
|
||||
FLAC(filename).delete()
|
||||
@@ -0,0 +1,937 @@
|
||||
# id3 support for mutagen
|
||||
# Copyright (C) 2005 Michael Urman
|
||||
# 2006 Lukas Lalinsky
|
||||
# 2013 Christoph Reiter
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""ID3v2 reading and writing.
|
||||
|
||||
This is based off of the following references:
|
||||
|
||||
* http://id3.org/id3v2.4.0-structure
|
||||
* http://id3.org/id3v2.4.0-frames
|
||||
* http://id3.org/id3v2.3.0
|
||||
* http://id3.org/id3v2-00
|
||||
* http://id3.org/ID3v1
|
||||
|
||||
Its largest deviation from the above (versions 2.3 and 2.2) is that it
|
||||
will not interpret the / characters as a separator, and will almost
|
||||
always accept null separators to generate multi-valued text frames.
|
||||
|
||||
Because ID3 frame structure differs between frame types, each frame is
|
||||
implemented as a different class (e.g. TIT2 as mutagen.id3.TIT2). Each
|
||||
frame's documentation contains a list of its attributes.
|
||||
|
||||
Since this file's documentation is a little unwieldy, you are probably
|
||||
interested in the :class:`ID3` class to start with.
|
||||
"""
|
||||
|
||||
__all__ = ['ID3', 'ID3FileType', 'Frames', 'Open', 'delete']
|
||||
|
||||
import struct
|
||||
|
||||
from struct import unpack, pack, error as StructError
|
||||
|
||||
import mutagen
|
||||
from mutagen._util import insert_bytes, delete_bytes, DictProxy
|
||||
from ._compat import reraise, chr_
|
||||
|
||||
from mutagen._id3util import *
|
||||
from mutagen._id3frames import *
|
||||
from mutagen._id3specs import *
|
||||
|
||||
|
||||
class ID3(DictProxy, mutagen.Metadata):
|
||||
"""A file with an ID3v2 tag.
|
||||
|
||||
Attributes:
|
||||
|
||||
* version -- ID3 tag version as a tuple
|
||||
* unknown_frames -- raw frame data of any unknown frames found
|
||||
* size -- the total size of the ID3 tag, including the header
|
||||
"""
|
||||
|
||||
PEDANTIC = True
|
||||
version = (2, 4, 0)
|
||||
|
||||
filename = None
|
||||
size = 0
|
||||
__flags = 0
|
||||
__readbytes = 0
|
||||
__crc = None
|
||||
__unknown_version = None
|
||||
|
||||
_V24 = (2, 4, 0)
|
||||
_V23 = (2, 3, 0)
|
||||
_V22 = (2, 2, 0)
|
||||
_V11 = (1, 1)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.unknown_frames = []
|
||||
super(ID3, self).__init__(*args, **kwargs)
|
||||
|
||||
def __fullread(self, size):
|
||||
try:
|
||||
if size < 0:
|
||||
raise ValueError('Requested bytes (%s) less than zero' % size)
|
||||
if size > self.__filesize:
|
||||
raise EOFError('Requested %#x of %#x (%s)' % (
|
||||
int(size), int(self.__filesize), self.filename))
|
||||
except AttributeError:
|
||||
pass
|
||||
data = self._fileobj.read(size)
|
||||
if len(data) != size:
|
||||
raise EOFError
|
||||
self.__readbytes += size
|
||||
return data
|
||||
|
||||
def load(self, filename, known_frames=None, translate=True, v2_version=4):
|
||||
"""Load tags from a filename.
|
||||
|
||||
Keyword arguments:
|
||||
|
||||
* filename -- filename to load tag data from
|
||||
* known_frames -- dict mapping frame IDs to Frame objects
|
||||
* translate -- Update all tags to ID3v2.3/4 internally. If you
|
||||
intend to save, this must be true or you have to
|
||||
call update_to_v23() / update_to_v24() manually.
|
||||
* v2_version -- if update_to_v23 or update_to_v24 get called (3 or 4)
|
||||
|
||||
Example of loading a custom frame::
|
||||
|
||||
my_frames = dict(mutagen.id3.Frames)
|
||||
class XMYF(Frame): ...
|
||||
my_frames["XMYF"] = XMYF
|
||||
mutagen.id3.ID3(filename, known_frames=my_frames)
|
||||
"""
|
||||
|
||||
if not v2_version in (3, 4):
|
||||
raise ValueError("Only 3 and 4 possible for v2_version")
|
||||
|
||||
from os.path import getsize
|
||||
|
||||
self.filename = filename
|
||||
self.__known_frames = known_frames
|
||||
self._fileobj = open(filename, 'rb')
|
||||
self.__filesize = getsize(filename)
|
||||
try:
|
||||
try:
|
||||
self._load_header()
|
||||
except EOFError:
|
||||
self.size = 0
|
||||
raise ID3NoHeaderError("%s: too small (%d bytes)" % (
|
||||
filename, self.__filesize))
|
||||
except (ID3NoHeaderError, ID3UnsupportedVersionError) as err:
|
||||
self.size = 0
|
||||
import sys
|
||||
stack = sys.exc_info()[2]
|
||||
try:
|
||||
self._fileobj.seek(-128, 2)
|
||||
except EnvironmentError:
|
||||
reraise(err, None, stack)
|
||||
else:
|
||||
frames = ParseID3v1(self._fileobj.read(128))
|
||||
if frames is not None:
|
||||
self.version = self._V11
|
||||
for v in frames.values():
|
||||
self.add(v)
|
||||
else:
|
||||
reraise(type(err), None, stack)
|
||||
else:
|
||||
frames = self.__known_frames
|
||||
if frames is None:
|
||||
if self._V23 <= self.version:
|
||||
frames = Frames
|
||||
elif self._V22 <= self.version:
|
||||
frames = Frames_2_2
|
||||
data = self.__fullread(self.size - 10)
|
||||
for frame in self.__read_frames(data, frames=frames):
|
||||
if isinstance(frame, Frame):
|
||||
self.add(frame)
|
||||
else:
|
||||
self.unknown_frames.append(frame)
|
||||
self.__unknown_version = self.version
|
||||
finally:
|
||||
self._fileobj.close()
|
||||
del self._fileobj
|
||||
del self.__filesize
|
||||
if translate:
|
||||
if v2_version == 3:
|
||||
self.update_to_v23()
|
||||
else:
|
||||
self.update_to_v24()
|
||||
|
||||
def getall(self, key):
|
||||
"""Return all frames with a given name (the list may be empty).
|
||||
|
||||
This is best explained by examples::
|
||||
|
||||
id3.getall('TIT2') == [id3['TIT2']]
|
||||
id3.getall('TTTT') == []
|
||||
id3.getall('TXXX') == [TXXX(desc='woo', text='bar'),
|
||||
TXXX(desc='baz', text='quuuux'), ...]
|
||||
|
||||
Since this is based on the frame's HashKey, which is
|
||||
colon-separated, you can use it to do things like
|
||||
``getall('COMM:MusicMatch')`` or ``getall('TXXX:QuodLibet:')``.
|
||||
"""
|
||||
if key in self:
|
||||
return [self[key]]
|
||||
else:
|
||||
key = key + ":"
|
||||
return [v for s, v in self.items() if s.startswith(key)]
|
||||
|
||||
def delall(self, key):
|
||||
"""Delete all tags of a given kind; see getall."""
|
||||
if key in self:
|
||||
del(self[key])
|
||||
else:
|
||||
key = key + ":"
|
||||
for k in filter(lambda s: s.startswith(key), self.keys()):
|
||||
del(self[k])
|
||||
|
||||
def setall(self, key, values):
|
||||
"""Delete frames of the given type and add frames in 'values'."""
|
||||
self.delall(key)
|
||||
for tag in values:
|
||||
self[tag.HashKey] = tag
|
||||
|
||||
def pprint(self):
|
||||
"""Return tags in a human-readable format.
|
||||
|
||||
"Human-readable" is used loosely here. The format is intended
|
||||
to mirror that used for Vorbis or APEv2 output, e.g.
|
||||
|
||||
``TIT2=My Title``
|
||||
|
||||
However, ID3 frames can have multiple keys:
|
||||
|
||||
``POPM=user@example.org=3 128/255``
|
||||
"""
|
||||
frames = list(map(Frame.pprint, self.values()))
|
||||
frames.sort()
|
||||
return "\n".join(frames)
|
||||
|
||||
def loaded_frame(self, tag):
|
||||
"""Deprecated; use the add method."""
|
||||
# turn 2.2 into 2.3/2.4 tags
|
||||
if len(type(tag).__name__) == 3:
|
||||
tag = type(tag).__base__(tag)
|
||||
self[tag.HashKey] = tag
|
||||
|
||||
# add = loaded_frame (and vice versa) break applications that
|
||||
# expect to be able to override loaded_frame (e.g. Quod Libet),
|
||||
# as does making loaded_frame call add.
|
||||
def add(self, frame):
|
||||
"""Add a frame to the tag."""
|
||||
return self.loaded_frame(frame)
|
||||
|
||||
def _load_header(self):
|
||||
fn = self.filename
|
||||
data = self.__fullread(10)
|
||||
id3, vmaj, vrev, flags, size = unpack('>3sBBB4s', data)
|
||||
self.__flags = flags
|
||||
self.size = BitPaddedInt(size) + 10
|
||||
self.version = (2, vmaj, vrev)
|
||||
|
||||
if id3 != b'ID3':
|
||||
raise ID3NoHeaderError("%r doesn't start with an ID3 tag" % fn)
|
||||
if vmaj not in [2, 3, 4]:
|
||||
raise ID3UnsupportedVersionError("%r ID3v2.%d not supported"
|
||||
% (fn, vmaj))
|
||||
|
||||
if self.PEDANTIC:
|
||||
if not BitPaddedInt.has_valid_padding(size):
|
||||
raise ValueError("Header size not synchsafe")
|
||||
|
||||
if self._V24 <= self.version and (flags & 0x0f):
|
||||
raise ValueError("%r has invalid flags %#02x" % (fn, flags))
|
||||
elif self._V23 <= self.version < self._V24 and (flags & 0x1f):
|
||||
raise ValueError("%r has invalid flags %#02x" % (fn, flags))
|
||||
|
||||
if self.f_extended:
|
||||
extsize = self.__fullread(4)
|
||||
if extsize in Frames:
|
||||
# Some tagger sets the extended header flag but
|
||||
# doesn't write an extended header; in this case, the
|
||||
# ID3 data follows immediately. Since no extended
|
||||
# header is going to be long enough to actually match
|
||||
# a frame, and if it's *not* a frame we're going to be
|
||||
# completely lost anyway, this seems to be the most
|
||||
# correct check.
|
||||
# http://code.google.com/p/quodlibet/issues/detail?id=126
|
||||
self.__flags ^= 0x40
|
||||
self.__extsize = 0
|
||||
self._fileobj.seek(-4, 1)
|
||||
self.__readbytes -= 4
|
||||
elif self.version >= self._V24:
|
||||
# "Where the 'Extended header size' is the size of the whole
|
||||
# extended header, stored as a 32 bit synchsafe integer."
|
||||
self.__extsize = BitPaddedInt(extsize) - 4
|
||||
if self.PEDANTIC:
|
||||
if not BitPaddedInt.has_valid_padding(extsize):
|
||||
raise ValueError("Extended header size not synchsafe")
|
||||
else:
|
||||
# "Where the 'Extended header size', currently 6 or 10 bytes,
|
||||
# excludes itself."
|
||||
self.__extsize = unpack('>L', extsize)[0]
|
||||
if self.__extsize:
|
||||
self.__extdata = self.__fullread(self.__extsize)
|
||||
else:
|
||||
self.__extdata = b""
|
||||
|
||||
def __determine_bpi(self, data, frames, EMPTY=b"\x00" * 10):
|
||||
if self.version < self._V24:
|
||||
return int
|
||||
# have to special case whether to use bitpaddedints here
|
||||
# spec says to use them, but iTunes has it wrong
|
||||
|
||||
# count number of tags found as BitPaddedInt and how far past
|
||||
o = 0
|
||||
asbpi = 0
|
||||
while o < len(data) - 10:
|
||||
part = data[o:o + 10]
|
||||
if part == EMPTY:
|
||||
bpioff = -((len(data) - o) % 10)
|
||||
break
|
||||
name, size, flags = unpack('>4sLH', part)
|
||||
size = BitPaddedInt(size)
|
||||
o += 10 + size
|
||||
if name in frames:
|
||||
asbpi += 1
|
||||
else:
|
||||
bpioff = o - len(data)
|
||||
|
||||
# count number of tags found as int and how far past
|
||||
o = 0
|
||||
asint = 0
|
||||
while o < len(data) - 10:
|
||||
part = data[o:o + 10]
|
||||
if part == EMPTY:
|
||||
intoff = -((len(data) - o) % 10)
|
||||
break
|
||||
name, size, flags = unpack('>4sLH', part)
|
||||
o += 10 + size
|
||||
if name in frames:
|
||||
asint += 1
|
||||
else:
|
||||
intoff = o - len(data)
|
||||
|
||||
# if more tags as int, or equal and bpi is past and int is not
|
||||
if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)):
|
||||
return int
|
||||
return BitPaddedInt
|
||||
|
||||
def __read_frames(self, data, frames):
|
||||
if self.version < self._V24 and self.f_unsynch:
|
||||
try:
|
||||
data = unsynch.decode(data)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if self._V23 <= self.version:
|
||||
bpi = self.__determine_bpi(data, frames)
|
||||
while data:
|
||||
header = data[:10]
|
||||
try:
|
||||
name, size, flags = unpack('>4sLH', header)
|
||||
except struct.error:
|
||||
return # not enough header
|
||||
if name.strip(b'\x00') == b'':
|
||||
return
|
||||
size = bpi(size)
|
||||
framedata = data[10:10+size]
|
||||
data = data[10+size:]
|
||||
if size == 0:
|
||||
continue # drop empty frames
|
||||
try:
|
||||
tag = frames[name]
|
||||
except KeyError:
|
||||
if is_valid_frame_id(name):
|
||||
yield header + framedata
|
||||
else:
|
||||
try:
|
||||
yield self.__load_framedata(tag, flags, framedata)
|
||||
except NotImplementedError:
|
||||
yield header + framedata
|
||||
except ID3JunkFrameError:
|
||||
pass
|
||||
|
||||
elif self._V22 <= self.version:
|
||||
while data:
|
||||
header = data[0:6]
|
||||
try:
|
||||
name, size = unpack('>3s3s', header)
|
||||
except struct.error:
|
||||
return # not enough header
|
||||
size, = struct.unpack('>L', b'\x00'+size)
|
||||
if name.strip(b'\x00') == b'':
|
||||
return
|
||||
framedata = data[6:6+size]
|
||||
data = data[6+size:]
|
||||
if size == 0:
|
||||
continue # drop empty frames
|
||||
try:
|
||||
tag = frames[name]
|
||||
except KeyError:
|
||||
if is_valid_frame_id(name):
|
||||
yield header + framedata
|
||||
else:
|
||||
try:
|
||||
yield self.__load_framedata(tag, 0, framedata)
|
||||
except NotImplementedError:
|
||||
yield header + framedata
|
||||
except ID3JunkFrameError:
|
||||
pass
|
||||
|
||||
def __load_framedata(self, tag, flags, framedata):
|
||||
return tag.fromData(self, flags, framedata)
|
||||
|
||||
f_unsynch = property(lambda s: bool(s.__flags & 0x80))
|
||||
f_extended = property(lambda s: bool(s.__flags & 0x40))
|
||||
f_experimental = property(lambda s: bool(s.__flags & 0x20))
|
||||
f_footer = property(lambda s: bool(s.__flags & 0x10))
|
||||
|
||||
#f_crc = property(lambda s: bool(s.__extflags & 0x8000))
|
||||
|
||||
def _prepare_framedata(self, v2_version, v23_sep):
|
||||
if v2_version == 3:
|
||||
version = self._V23
|
||||
elif v2_version == 4:
|
||||
version = self._V24
|
||||
else:
|
||||
raise ValueError("Only 3 or 4 allowed for v2_version")
|
||||
|
||||
# Sort frames by 'importance'
|
||||
order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"]
|
||||
order = dict(zip(order, range(len(order))))
|
||||
last = len(order)
|
||||
frames = self.items()
|
||||
frames.sort(key=lambda a: (order.get(a[0][:4], last), a[0]))
|
||||
|
||||
framedata = [self.__save_frame(frame, version=version, v23_sep=v23_sep)
|
||||
for (key, frame) in frames]
|
||||
|
||||
# only write unknown frames if they were loaded from the version
|
||||
# we are saving with or upgraded to it
|
||||
if self.__unknown_version == version:
|
||||
framedata.extend([data for data in self.unknown_frames
|
||||
if len(data) > 10])
|
||||
|
||||
return b''.join(framedata)
|
||||
|
||||
def _prepare_id3_header(self, original_header, framesize, v2_version):
|
||||
try:
|
||||
id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', original_header)
|
||||
except struct.error:
|
||||
id3, insize = b'', 0
|
||||
insize = BitPaddedInt(insize)
|
||||
if id3 != b'ID3':
|
||||
insize = -10
|
||||
|
||||
if insize >= framesize:
|
||||
outsize = insize
|
||||
else:
|
||||
outsize = (framesize + 1023) & ~0x3FF
|
||||
|
||||
framesize = BitPaddedInt.to_str(outsize, width=4)
|
||||
header = pack('>3sBBB4s', b'ID3', v2_version, 0, 0, framesize)
|
||||
|
||||
return (header, outsize, insize)
|
||||
|
||||
def save(self, filename=None, v1=1, v2_version=4, v23_sep='/'):
|
||||
"""Save changes to a file.
|
||||
|
||||
If no filename is given, the one most recently loaded is used.
|
||||
|
||||
Keyword arguments:
|
||||
v1 -- if 0, ID3v1 tags will be removed
|
||||
if 1, ID3v1 tags will be updated but not added
|
||||
if 2, ID3v1 tags will be created and/or updated
|
||||
v2 -- version of ID3v2 tags (3 or 4).
|
||||
|
||||
By default Mutagen saves ID3v2.4 tags. If you want to save ID3v2.3
|
||||
tags, you must call method update_to_v23 before saving the file.
|
||||
|
||||
v23_sep -- the separator used to join multiple text values
|
||||
if v2_version == 3. Defaults to '/' but if it's None
|
||||
will be the ID3v2v2.4 null separator.
|
||||
|
||||
The lack of a way to update only an ID3v1 tag is intentional.
|
||||
"""
|
||||
|
||||
framedata = self._prepare_framedata(v2_version, v23_sep)
|
||||
framesize = len(framedata)
|
||||
|
||||
if not framedata:
|
||||
try:
|
||||
self.delete(filename)
|
||||
except EnvironmentError as err:
|
||||
from errno import ENOENT
|
||||
if err.errno != ENOENT:
|
||||
raise
|
||||
return
|
||||
|
||||
if filename is None:
|
||||
filename = self.filename
|
||||
try:
|
||||
f = open(filename, 'rb+')
|
||||
except IOError as err:
|
||||
from errno import ENOENT
|
||||
if err.errno != ENOENT:
|
||||
raise
|
||||
f = open(filename, 'ab') # create, then reopen
|
||||
f = open(filename, 'rb+')
|
||||
try:
|
||||
idata = f.read(10)
|
||||
|
||||
header = self._prepare_id3_header(idata, framesize, v2_version)
|
||||
header, outsize, insize = header
|
||||
|
||||
data = header + framedata + (b'\x00' * (outsize - framesize))
|
||||
|
||||
if (insize < outsize):
|
||||
insert_bytes(f, outsize-insize, insize+10)
|
||||
f.seek(0)
|
||||
f.write(data)
|
||||
|
||||
try:
|
||||
f.seek(-128, 2)
|
||||
except IOError as err:
|
||||
# If the file is too small, that's OK - it just means
|
||||
# we're certain it doesn't have a v1 tag.
|
||||
from errno import EINVAL
|
||||
if err.errno != EINVAL:
|
||||
# If we failed to see for some other reason, bail out.
|
||||
raise
|
||||
# Since we're sure this isn't a v1 tag, don't read it.
|
||||
f.seek(0, 2)
|
||||
|
||||
data = f.read(128)
|
||||
try:
|
||||
idx = data.index(b"TAG")
|
||||
except ValueError:
|
||||
offset = 0
|
||||
has_v1 = False
|
||||
else:
|
||||
offset = idx - len(data)
|
||||
has_v1 = True
|
||||
|
||||
f.seek(offset, 2)
|
||||
if v1 == 1 and has_v1 or v1 == 2:
|
||||
f.write(MakeID3v1(self))
|
||||
else:
|
||||
f.truncate()
|
||||
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
def delete(self, filename=None, delete_v1=True, delete_v2=True):
|
||||
"""Remove tags from a file.
|
||||
|
||||
If no filename is given, the one most recently loaded is used.
|
||||
|
||||
Keyword arguments:
|
||||
|
||||
* delete_v1 -- delete any ID3v1 tag
|
||||
* delete_v2 -- delete any ID3v2 tag
|
||||
"""
|
||||
if filename is None:
|
||||
filename = self.filename
|
||||
delete(filename, delete_v1, delete_v2)
|
||||
self.clear()
|
||||
|
||||
def __save_frame(self, frame, name=None, version=_V24, v23_sep=None):
|
||||
flags = 0
|
||||
if self.PEDANTIC and isinstance(frame, TextFrame):
|
||||
if len(str(frame)) == 0:
|
||||
return b''
|
||||
|
||||
if version == self._V23:
|
||||
framev23 = frame._get_v23_frame(sep=v23_sep)
|
||||
framedata = framev23._writeData()
|
||||
else:
|
||||
framedata = frame._writeData()
|
||||
|
||||
usize = len(framedata)
|
||||
if usize > 2048:
|
||||
# Disabled as this causes iTunes and other programs
|
||||
# to fail to find these frames, which usually includes
|
||||
# e.g. APIC.
|
||||
#framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib')
|
||||
#flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN
|
||||
pass
|
||||
|
||||
if version == self._V24:
|
||||
bits = 7
|
||||
elif version == self._V23:
|
||||
bits = 8
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
datasize = BitPaddedInt.to_str(len(framedata), width=4, bits=bits)
|
||||
frame_name = type(frame).__name__.encode("ascii")
|
||||
header = pack('>4s4sH', name or frame_name, datasize, flags)
|
||||
return header + framedata
|
||||
|
||||
def __update_common(self):
|
||||
"""Updates done by both v23 and v24 update"""
|
||||
|
||||
if "TCON" in self:
|
||||
# Get rid of "(xx)Foobr" format.
|
||||
self["TCON"].genres = self["TCON"].genres
|
||||
|
||||
if self.version < self._V23:
|
||||
# ID3v2.2 PIC frames are slightly different.
|
||||
pics = self.getall("APIC")
|
||||
mimes = {"PNG": "image/png", "JPG": "image/jpeg"}
|
||||
self.delall("APIC")
|
||||
for pic in pics:
|
||||
newpic = APIC(
|
||||
encoding=pic.encoding, mime=mimes.get(pic.mime, pic.mime),
|
||||
type=pic.type, desc=pic.desc, data=pic.data)
|
||||
self.add(newpic)
|
||||
|
||||
# ID3v2.2 LNK frames are just way too different to upgrade.
|
||||
self.delall("LINK")
|
||||
|
||||
def update_to_v24(self):
|
||||
"""Convert older tags into an ID3v2.4 tag.
|
||||
|
||||
This updates old ID3v2 frames to ID3v2.4 ones (e.g. TYER to
|
||||
TDRC). If you intend to save tags, you must call this function
|
||||
at some point; it is called by default when loading the tag.
|
||||
"""
|
||||
|
||||
self.__update_common()
|
||||
|
||||
if self.__unknown_version == (2, 3, 0):
|
||||
# convert unknown 2.3 frames (flags/size) to 2.4
|
||||
converted = []
|
||||
for frame in self.unknown_frames:
|
||||
try:
|
||||
name, size, flags = unpack('>4sLH', frame[:10])
|
||||
frame = BinaryFrame.fromData(self, flags, frame[10:])
|
||||
except (struct.error, error):
|
||||
continue
|
||||
converted.append(self.__save_frame(frame, name=name))
|
||||
self.unknown_frames[:] = converted
|
||||
self.__unknown_version = (2, 4, 0)
|
||||
|
||||
# TDAT, TYER, and TIME have been turned into TDRC.
|
||||
try:
|
||||
if str(self.get("TYER", "")).strip("\x00"):
|
||||
date = str(self.pop("TYER"))
|
||||
if str(self.get("TDAT", "")).strip("\x00"):
|
||||
dat = str(self.pop("TDAT"))
|
||||
date = "%s-%s-%s" % (date, dat[2:], dat[:2])
|
||||
if str(self.get("TIME", "")).strip("\x00"):
|
||||
time = str(self.pop("TIME"))
|
||||
date += "T%s:%s:00" % (time[:2], time[2:])
|
||||
if "TDRC" not in self:
|
||||
self.add(TDRC(encoding=0, text=date))
|
||||
except UnicodeDecodeError:
|
||||
# Old ID3 tags have *lots* of Unicode problems, so if TYER
|
||||
# is bad, just chuck the frames.
|
||||
pass
|
||||
|
||||
# TORY can be the first part of a TDOR.
|
||||
if "TORY" in self:
|
||||
f = self.pop("TORY")
|
||||
if "TDOR" not in self:
|
||||
try:
|
||||
self.add(TDOR(encoding=0, text=str(f)))
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
# IPLS is now TIPL.
|
||||
if "IPLS" in self:
|
||||
f = self.pop("IPLS")
|
||||
if "TIPL" not in self:
|
||||
self.add(TIPL(encoding=f.encoding, people=f.people))
|
||||
|
||||
# These can't be trivially translated to any ID3v2.4 tags, or
|
||||
# should have been removed already.
|
||||
for key in ["RVAD", "EQUA", "TRDA", "TSIZ", "TDAT", "TIME", "CRM"]:
|
||||
if key in self:
|
||||
del(self[key])
|
||||
|
||||
def update_to_v23(self):
|
||||
"""Convert older (and newer) tags into an ID3v2.3 tag.
|
||||
|
||||
This updates incompatible ID3v2 frames to ID3v2.3 ones. If you
|
||||
intend to save tags as ID3v2.3, you must call this function
|
||||
at some point.
|
||||
|
||||
If you want to to go off spec and include some v2.4 frames
|
||||
in v2.3, remove them before calling this and add them back afterwards.
|
||||
"""
|
||||
|
||||
self.__update_common()
|
||||
|
||||
# we could downgrade unknown v2.4 frames here, but given that
|
||||
# the main reason to save v2.3 is compatibility and this
|
||||
# might increase the chance of some parser breaking.. better not
|
||||
|
||||
# TMCL, TIPL -> TIPL
|
||||
if "TIPL" in self or "TMCL" in self:
|
||||
people = []
|
||||
if "TIPL" in self:
|
||||
f = self.pop("TIPL")
|
||||
people.extend(f.people)
|
||||
if "TMCL" in self:
|
||||
f = self.pop("TMCL")
|
||||
people.extend(f.people)
|
||||
if "IPLS" not in self:
|
||||
self.add(IPLS(encoding=f.encoding, people=people))
|
||||
|
||||
# TDOR -> TORY
|
||||
if "TDOR" in self:
|
||||
f = self.pop("TDOR")
|
||||
if f.text:
|
||||
d = f.text[0]
|
||||
if d.year and "TORY" not in self:
|
||||
self.add(TORY(encoding=f.encoding, text="%04d" % d.year))
|
||||
|
||||
# TDRC -> TYER, TDAT, TIME
|
||||
if "TDRC" in self:
|
||||
f = self.pop("TDRC")
|
||||
if f.text:
|
||||
d = f.text[0]
|
||||
if d.year and "TYER" not in self:
|
||||
self.add(TYER(encoding=f.encoding, text="%04d" % d.year))
|
||||
if d.month and d.day and "TDAT" not in self:
|
||||
self.add(TDAT(encoding=f.encoding,
|
||||
text="%02d%02d" % (d.day, d.month)))
|
||||
if d.hour and d.minute and "TIME" not in self:
|
||||
self.add(TIME(encoding=f.encoding,
|
||||
text="%02d%02d" % (d.hour, d.minute)))
|
||||
|
||||
# New frames added in v2.4
|
||||
v24_frames = [
|
||||
'ASPI', 'EQU2', 'RVA2', 'SEEK', 'SIGN', 'TDEN', 'TDOR',
|
||||
'TDRC', 'TDRL', 'TDTG', 'TIPL', 'TMCL', 'TMOO', 'TPRO',
|
||||
'TSOA', 'TSOP', 'TSOT', 'TSST',
|
||||
]
|
||||
|
||||
for key in v24_frames:
|
||||
if key in self:
|
||||
del(self[key])
|
||||
|
||||
|
||||
def delete(filename, delete_v1=True, delete_v2=True):
|
||||
"""Remove tags from a file.
|
||||
|
||||
Keyword arguments:
|
||||
|
||||
* delete_v1 -- delete any ID3v1 tag
|
||||
* delete_v2 -- delete any ID3v2 tag
|
||||
"""
|
||||
|
||||
f = open(filename, 'rb+')
|
||||
|
||||
if delete_v1:
|
||||
try:
|
||||
f.seek(-128, 2)
|
||||
except IOError:
|
||||
pass
|
||||
else:
|
||||
if f.read(3) == b"TAG":
|
||||
f.seek(-128, 2)
|
||||
f.truncate()
|
||||
|
||||
# technically an insize=0 tag is invalid, but we delete it anyway
|
||||
# (primarily because we used to write it)
|
||||
if delete_v2:
|
||||
f.seek(0, 0)
|
||||
idata = f.read(10)
|
||||
try:
|
||||
id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata)
|
||||
except struct.error:
|
||||
id3, insize = '', -1
|
||||
insize = BitPaddedInt(insize)
|
||||
if id3 == b'ID3' and insize >= 0:
|
||||
delete_bytes(f, insize + 10, 0)
|
||||
|
||||
|
||||
# support open(filename) as interface
|
||||
Open = ID3
|
||||
|
||||
|
||||
# ID3v1.1 support.
|
||||
def ParseID3v1(string):
|
||||
"""Parse an ID3v1 tag, returning a list of ID3v2.4 frames."""
|
||||
|
||||
try:
|
||||
string = string[string.index(b"TAG"):]
|
||||
except ValueError:
|
||||
return None
|
||||
if 128 < len(string) or len(string) < 124:
|
||||
return None
|
||||
|
||||
# Issue #69 - Previous versions of Mutagen, when encountering
|
||||
# out-of-spec TDRC and TYER frames of less than four characters,
|
||||
# wrote only the characters available - e.g. "1" or "" - into the
|
||||
# year field. To parse those, reduce the size of the year field.
|
||||
# Amazingly, "0s" works as a struct format string.
|
||||
unpack_fmt = "3s30s30s30s%ds29sBB" % (len(string) - 124)
|
||||
|
||||
try:
|
||||
tag, title, artist, album, year, comment, track, genre = unpack(
|
||||
unpack_fmt, string)
|
||||
except StructError:
|
||||
return None
|
||||
|
||||
if tag != b"TAG":
|
||||
return None
|
||||
|
||||
def fix(string):
|
||||
return string.split(b"\x00")[0].strip().decode('latin1')
|
||||
|
||||
title, artist, album, year, comment = map(
|
||||
fix, [title, artist, album, year, comment])
|
||||
|
||||
frames = {}
|
||||
if title:
|
||||
frames["TIT2"] = TIT2(encoding=0, text=title)
|
||||
if artist:
|
||||
frames["TPE1"] = TPE1(encoding=0, text=[artist])
|
||||
if album:
|
||||
frames["TALB"] = TALB(encoding=0, text=album)
|
||||
if year:
|
||||
frames["TDRC"] = TDRC(encoding=0, text=year)
|
||||
if comment:
|
||||
frames["COMM"] = COMM(
|
||||
encoding=0, lang="eng", desc="ID3v1 Comment", text=comment)
|
||||
# Don't read a track number if it looks like the comment was
|
||||
# padded with spaces instead of nulls (thanks, WinAmp).
|
||||
if track and (track != 32 or string[-3] == b'\x00'):
|
||||
frames["TRCK"] = TRCK(encoding=0, text=str(track))
|
||||
if genre != 255:
|
||||
frames["TCON"] = TCON(encoding=0, text=str(genre))
|
||||
return frames
|
||||
|
||||
|
||||
def MakeID3v1(id3):
|
||||
"""Return an ID3v1.1 tag string from a dict of ID3v2.4 frames."""
|
||||
|
||||
v1 = {}
|
||||
|
||||
for v2id, name in {"TIT2": "title", "TPE1": "artist",
|
||||
"TALB": "album"}.items():
|
||||
if v2id in id3:
|
||||
text = id3[v2id].text[0].encode('latin1', 'replace')[:30]
|
||||
else:
|
||||
text = b""
|
||||
v1[name] = text + (b"\x00" * (30 - len(text)))
|
||||
|
||||
if "COMM" in id3:
|
||||
cmnt = id3["COMM"].text[0].encode('latin1', 'replace')[:28]
|
||||
else:
|
||||
cmnt = b""
|
||||
v1["comment"] = cmnt + (b"\x00" * (29 - len(cmnt)))
|
||||
|
||||
if "TRCK" in id3:
|
||||
try:
|
||||
v1["track"] = chr_(+id3["TRCK"])
|
||||
except ValueError:
|
||||
v1["track"] = b"\x00"
|
||||
else:
|
||||
v1["track"] = b"\x00"
|
||||
|
||||
if "TCON" in id3:
|
||||
try:
|
||||
genre = id3["TCON"].genres[0]
|
||||
except IndexError:
|
||||
pass
|
||||
else:
|
||||
if genre in TCON.GENRES:
|
||||
v1["genre"] = chr_(TCON.GENRES.index(genre))
|
||||
if "genre" not in v1:
|
||||
v1["genre"] = b"\xff"
|
||||
|
||||
if "TDRC" in id3:
|
||||
year = bytes(id3["TDRC"])
|
||||
elif "TYER" in id3:
|
||||
year = bytes(id3["TYER"])
|
||||
else:
|
||||
year = b""
|
||||
v1["year"] = (year + b"\x00\x00\x00\x00")[:4]
|
||||
|
||||
data = b"TAG"
|
||||
data += v1["title"]
|
||||
data += v1["artist"]
|
||||
data += v1["album"]
|
||||
data += v1["year"]
|
||||
data += v1["comment"]
|
||||
data += v1["track"]
|
||||
data += v1["genre"]
|
||||
return data
|
||||
|
||||
|
||||
class ID3FileType(mutagen.FileType):
|
||||
"""An unknown type of file with ID3 tags."""
|
||||
|
||||
ID3 = ID3
|
||||
|
||||
class _Info(mutagen.StreamInfo):
|
||||
length = 0
|
||||
|
||||
def __init__(self, fileobj, offset):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def pprint():
|
||||
return "Unknown format with ID3 tag"
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return header.startswith(b"ID3")
|
||||
|
||||
def add_tags(self, ID3=None):
|
||||
"""Add an empty ID3 tag to the file.
|
||||
|
||||
A custom tag reader may be used in instead of the default
|
||||
mutagen.id3.ID3 object, e.g. an EasyID3 reader.
|
||||
"""
|
||||
if ID3 is None:
|
||||
ID3 = self.ID3
|
||||
if self.tags is None:
|
||||
self.ID3 = ID3
|
||||
self.tags = ID3()
|
||||
else:
|
||||
raise error("an ID3 tag already exists")
|
||||
|
||||
def load(self, filename, ID3=None, **kwargs):
|
||||
"""Load stream and tag information from a file.
|
||||
|
||||
A custom tag reader may be used in instead of the default
|
||||
mutagen.id3.ID3 object, e.g. an EasyID3 reader.
|
||||
"""
|
||||
|
||||
if ID3 is None:
|
||||
ID3 = self.ID3
|
||||
else:
|
||||
# If this was initialized with EasyID3, remember that for
|
||||
# when tags are auto-instantiated in add_tags.
|
||||
self.ID3 = ID3
|
||||
self.filename = filename
|
||||
try:
|
||||
self.tags = ID3(filename, **kwargs)
|
||||
except error:
|
||||
self.tags = None
|
||||
if self.tags is not None:
|
||||
try:
|
||||
offset = self.tags.size
|
||||
except AttributeError:
|
||||
offset = None
|
||||
else:
|
||||
offset = None
|
||||
try:
|
||||
fileobj = open(filename, "rb")
|
||||
self.info = self._Info(fileobj, offset)
|
||||
finally:
|
||||
fileobj.close()
|
||||
@@ -0,0 +1,544 @@
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] != 2:
|
||||
raise ImportError("No longer available with Python 3, use mutagen.mp4")
|
||||
|
||||
"""Read and write MPEG-4 audio files with iTunes metadata.
|
||||
|
||||
This module will read MPEG-4 audio information and metadata,
|
||||
as found in Apple's M4A (aka MP4, M4B, M4P) files.
|
||||
|
||||
There is no official specification for this format. The source code
|
||||
for TagLib, FAAD, and various MPEG specifications at
|
||||
http://developer.apple.com/documentation/QuickTime/QTFF/,
|
||||
http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt,
|
||||
and http://wiki.multimedia.cx/index.php?title=Apple_QuickTime were all
|
||||
consulted.
|
||||
|
||||
This module does not support 64 bit atom sizes, and so will not
|
||||
work on metadata over 4GB.
|
||||
"""
|
||||
|
||||
import struct
|
||||
import sys
|
||||
|
||||
from cStringIO import StringIO
|
||||
|
||||
from ._compat import reraise
|
||||
from mutagen import FileType, Metadata, StreamInfo
|
||||
from mutagen._constants import GENRES
|
||||
from mutagen._util import cdata, insert_bytes, delete_bytes, DictProxy
|
||||
|
||||
|
||||
class error(IOError):
|
||||
pass
|
||||
|
||||
|
||||
class M4AMetadataError(error):
|
||||
pass
|
||||
|
||||
|
||||
class M4AStreamInfoError(error):
|
||||
pass
|
||||
|
||||
|
||||
class M4AMetadataValueError(ValueError, M4AMetadataError):
|
||||
pass
|
||||
|
||||
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"mutagen.m4a is deprecated; use mutagen.mp4 instead.", DeprecationWarning)
|
||||
|
||||
|
||||
# This is not an exhaustive list of container atoms, but just the
|
||||
# ones this module needs to peek inside.
|
||||
_CONTAINERS = ["moov", "udta", "trak", "mdia", "meta", "ilst",
|
||||
"stbl", "minf", "stsd"]
|
||||
_SKIP_SIZE = {"meta": 4}
|
||||
|
||||
__all__ = ['M4A', 'Open', 'delete', 'M4ACover']
|
||||
|
||||
|
||||
class M4ACover(str):
|
||||
"""A cover artwork.
|
||||
|
||||
Attributes:
|
||||
imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG)
|
||||
"""
|
||||
FORMAT_JPEG = 0x0D
|
||||
FORMAT_PNG = 0x0E
|
||||
|
||||
def __new__(cls, data, imageformat=None):
|
||||
self = str.__new__(cls, data)
|
||||
if imageformat is None:
|
||||
imageformat = M4ACover.FORMAT_JPEG
|
||||
self.imageformat = imageformat
|
||||
try:
|
||||
self.format
|
||||
except AttributeError:
|
||||
self.format = imageformat
|
||||
return self
|
||||
|
||||
|
||||
class Atom(object):
|
||||
"""An individual atom.
|
||||
|
||||
Attributes:
|
||||
children -- list child atoms (or None for non-container atoms)
|
||||
length -- length of this atom, including length and name
|
||||
name -- four byte name of the atom, as a str
|
||||
offset -- location in the constructor-given fileobj of this atom
|
||||
|
||||
This structure should only be used internally by Mutagen.
|
||||
"""
|
||||
|
||||
children = None
|
||||
|
||||
def __init__(self, fileobj):
|
||||
self.offset = fileobj.tell()
|
||||
self.length, self.name = struct.unpack(">I4s", fileobj.read(8))
|
||||
if self.length == 1:
|
||||
raise error("64 bit atom sizes are not supported")
|
||||
elif self.length < 8:
|
||||
return
|
||||
|
||||
if self.name in _CONTAINERS:
|
||||
self.children = []
|
||||
fileobj.seek(_SKIP_SIZE.get(self.name, 0), 1)
|
||||
while fileobj.tell() < self.offset + self.length:
|
||||
self.children.append(Atom(fileobj))
|
||||
else:
|
||||
fileobj.seek(self.offset + self.length, 0)
|
||||
|
||||
@staticmethod
|
||||
def render(name, data):
|
||||
"""Render raw atom data."""
|
||||
# this raises OverflowError if Py_ssize_t can't handle the atom data
|
||||
size = len(data) + 8
|
||||
if size <= 0xFFFFFFFF:
|
||||
return struct.pack(">I4s", size, name) + data
|
||||
else:
|
||||
return struct.pack(">I4sQ", 1, name, size + 8) + data
|
||||
|
||||
def __getitem__(self, remaining):
|
||||
"""Look up a child atom, potentially recursively.
|
||||
|
||||
e.g. atom['udta', 'meta'] => <Atom name='meta' ...>
|
||||
"""
|
||||
if not remaining:
|
||||
return self
|
||||
elif self.children is None:
|
||||
raise KeyError("%r is not a container" % self.name)
|
||||
for child in self.children:
|
||||
if child.name == remaining[0]:
|
||||
return child[remaining[1:]]
|
||||
else:
|
||||
raise KeyError("%r not found" % remaining[0])
|
||||
|
||||
def __repr__(self):
|
||||
klass = self.__class__.__name__
|
||||
if self.children is None:
|
||||
return "<%s name=%r length=%r offset=%r>" % (
|
||||
klass, self.name, self.length, self.offset)
|
||||
else:
|
||||
children = "\n".join([" " + line for child in self.children
|
||||
for line in repr(child).splitlines()])
|
||||
return "<%s name=%r length=%r offset=%r\n%s>" % (
|
||||
klass, self.name, self.length, self.offset, children)
|
||||
|
||||
|
||||
class Atoms(object):
|
||||
"""Root atoms in a given file.
|
||||
|
||||
Attributes:
|
||||
atoms -- a list of top-level atoms as Atom objects
|
||||
|
||||
This structure should only be used internally by Mutagen.
|
||||
"""
|
||||
def __init__(self, fileobj):
|
||||
self.atoms = []
|
||||
fileobj.seek(0, 2)
|
||||
end = fileobj.tell()
|
||||
fileobj.seek(0)
|
||||
while fileobj.tell() < end:
|
||||
self.atoms.append(Atom(fileobj))
|
||||
|
||||
def path(self, *names):
|
||||
"""Look up and return the complete path of an atom.
|
||||
|
||||
For example, atoms.path('moov', 'udta', 'meta') will return a
|
||||
list of three atoms, corresponding to the moov, udta, and meta
|
||||
atoms.
|
||||
"""
|
||||
path = [self]
|
||||
for name in names:
|
||||
path.append(path[-1][name, ])
|
||||
return path[1:]
|
||||
|
||||
def __getitem__(self, names):
|
||||
"""Look up a child atom.
|
||||
|
||||
'names' may be a list of atoms (['moov', 'udta']) or a string
|
||||
specifying the complete path ('moov.udta').
|
||||
"""
|
||||
if isinstance(names, basestring):
|
||||
names = names.split(".")
|
||||
for child in self.atoms:
|
||||
if child.name == names[0]:
|
||||
return child[names[1:]]
|
||||
else:
|
||||
raise KeyError("%s not found" % names[0])
|
||||
|
||||
def __repr__(self):
|
||||
return "\n".join([repr(child) for child in self.atoms])
|
||||
|
||||
|
||||
class M4ATags(DictProxy, Metadata):
|
||||
"""Dictionary containing Apple iTunes metadata list key/values.
|
||||
|
||||
Keys are four byte identifiers, except for freeform ('----')
|
||||
keys. Values are usually unicode strings, but some atoms have a
|
||||
special structure:
|
||||
cpil -- boolean
|
||||
trkn, disk -- tuple of 16 bit ints (current, total)
|
||||
tmpo -- 16 bit int
|
||||
covr -- list of M4ACover objects (which are tagged strs)
|
||||
gnre -- not supported. Use '\\xa9gen' instead.
|
||||
|
||||
The freeform '----' frames use a key in the format '----:mean:name'
|
||||
where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique
|
||||
identifier for this frame. The value is a str, but is probably
|
||||
text that can be decoded as UTF-8.
|
||||
|
||||
M4A tag data cannot exist outside of the structure of an M4A file,
|
||||
so this class should not be manually instantiated.
|
||||
|
||||
Unknown non-text tags are removed.
|
||||
"""
|
||||
|
||||
def load(self, atoms, fileobj):
|
||||
try:
|
||||
ilst = atoms["moov.udta.meta.ilst"]
|
||||
except KeyError as key:
|
||||
raise M4AMetadataError(key)
|
||||
for atom in ilst.children:
|
||||
fileobj.seek(atom.offset + 8)
|
||||
data = fileobj.read(atom.length - 8)
|
||||
parse = self.__atoms.get(atom.name, (M4ATags.__parse_text,))[0]
|
||||
parse(self, atom, data)
|
||||
|
||||
@staticmethod
|
||||
def __key_sort(item1, item2):
|
||||
(key1, v1) = item1
|
||||
(key2, v2) = item2
|
||||
# iTunes always writes the tags in order of "relevance", try
|
||||
# to copy it as closely as possible.
|
||||
order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb",
|
||||
"\xa9gen", "gnre", "trkn", "disk",
|
||||
"\xa9day", "cpil", "tmpo", "\xa9too",
|
||||
"----", "covr", "\xa9lyr"]
|
||||
order = dict(zip(order, range(len(order))))
|
||||
last = len(order)
|
||||
# If there's no key-based way to distinguish, order by length.
|
||||
# If there's still no way, go by string comparison on the
|
||||
# values, so we at least have something determinstic.
|
||||
return (cmp(order.get(key1[:4], last), order.get(key2[:4], last)) or
|
||||
cmp(len(v1), len(v2)) or cmp(v1, v2))
|
||||
|
||||
def save(self, filename):
|
||||
"""Save the metadata to the given filename."""
|
||||
values = []
|
||||
items = self.items()
|
||||
items.sort(self.__key_sort)
|
||||
for key, value in items:
|
||||
render = self.__atoms.get(
|
||||
key[:4], (None, M4ATags.__render_text))[1]
|
||||
values.append(render(self, key, value))
|
||||
data = Atom.render("ilst", "".join(values))
|
||||
|
||||
# Find the old atoms.
|
||||
fileobj = open(filename, "rb+")
|
||||
try:
|
||||
atoms = Atoms(fileobj)
|
||||
|
||||
moov = atoms["moov"]
|
||||
|
||||
if moov != atoms.atoms[-1]:
|
||||
# "Free" the old moov block. Something in the mdat
|
||||
# block is not happy when its offset changes and it
|
||||
# won't play back. So, rather than try to figure that
|
||||
# out, just move the moov atom to the end of the file.
|
||||
offset = self.__move_moov(fileobj, moov)
|
||||
else:
|
||||
offset = 0
|
||||
|
||||
try:
|
||||
path = atoms.path("moov", "udta", "meta", "ilst")
|
||||
except KeyError:
|
||||
self.__save_new(fileobj, atoms, data, offset)
|
||||
else:
|
||||
self.__save_existing(fileobj, atoms, path, data, offset)
|
||||
finally:
|
||||
fileobj.close()
|
||||
|
||||
def __move_moov(self, fileobj, moov):
|
||||
fileobj.seek(moov.offset)
|
||||
data = fileobj.read(moov.length)
|
||||
fileobj.seek(moov.offset)
|
||||
free = Atom.render("free", "\x00" * (moov.length - 8))
|
||||
fileobj.write(free)
|
||||
fileobj.seek(0, 2)
|
||||
# Figure out how far we have to shift all our successive
|
||||
# seek calls, relative to what the atoms say.
|
||||
old_end = fileobj.tell()
|
||||
fileobj.write(data)
|
||||
return old_end - moov.offset
|
||||
|
||||
def __save_new(self, fileobj, atoms, ilst, offset):
|
||||
hdlr = Atom.render("hdlr", "\x00" * 8 + "mdirappl" + "\x00" * 9)
|
||||
meta = Atom.render("meta", "\x00\x00\x00\x00" + hdlr + ilst)
|
||||
moov, udta = atoms.path("moov", "udta")
|
||||
insert_bytes(fileobj, len(meta), udta.offset + offset + 8)
|
||||
fileobj.seek(udta.offset + offset + 8)
|
||||
fileobj.write(meta)
|
||||
self.__update_parents(fileobj, [moov, udta], len(meta), offset)
|
||||
|
||||
def __save_existing(self, fileobj, atoms, path, data, offset):
|
||||
# Replace the old ilst atom.
|
||||
ilst = path.pop()
|
||||
delta = len(data) - ilst.length
|
||||
fileobj.seek(ilst.offset + offset)
|
||||
if delta > 0:
|
||||
insert_bytes(fileobj, delta, ilst.offset + offset)
|
||||
elif delta < 0:
|
||||
delete_bytes(fileobj, -delta, ilst.offset + offset)
|
||||
fileobj.seek(ilst.offset + offset)
|
||||
fileobj.write(data)
|
||||
self.__update_parents(fileobj, path, delta, offset)
|
||||
|
||||
def __update_parents(self, fileobj, path, delta, offset):
|
||||
# Update all parent atoms with the new size.
|
||||
for atom in path:
|
||||
fileobj.seek(atom.offset + offset)
|
||||
size = cdata.uint_be(fileobj.read(4)) + delta
|
||||
fileobj.seek(atom.offset + offset)
|
||||
fileobj.write(cdata.to_uint_be(size))
|
||||
|
||||
def __render_data(self, key, flags, data):
|
||||
data = struct.pack(">2I", flags, 0) + data
|
||||
return Atom.render(key, Atom.render("data", data))
|
||||
|
||||
def __parse_freeform(self, atom, data):
|
||||
try:
|
||||
fileobj = StringIO(data)
|
||||
mean_length = cdata.uint_be(fileobj.read(4))
|
||||
# skip over 8 bytes of atom name, flags
|
||||
mean = fileobj.read(mean_length - 4)[8:]
|
||||
name_length = cdata.uint_be(fileobj.read(4))
|
||||
name = fileobj.read(name_length - 4)[8:]
|
||||
value_length = cdata.uint_be(fileobj.read(4))
|
||||
# Name, flags, and reserved bytes
|
||||
value = fileobj.read(value_length - 4)[12:]
|
||||
except struct.error:
|
||||
# Some ---- atoms have no data atom, I have no clue why
|
||||
# they actually end up in the file.
|
||||
pass
|
||||
else:
|
||||
self["%s:%s:%s" % (atom.name, mean, name)] = value
|
||||
|
||||
def __render_freeform(self, key, value):
|
||||
dummy, mean, name = key.split(":", 2)
|
||||
mean = struct.pack(">I4sI", len(mean) + 12, "mean", 0) + mean
|
||||
name = struct.pack(">I4sI", len(name) + 12, "name", 0) + name
|
||||
value = struct.pack(">I4s2I", len(value) + 16, "data", 0x1, 0) + value
|
||||
final = mean + name + value
|
||||
return Atom.render("----", final)
|
||||
|
||||
def __parse_pair(self, atom, data):
|
||||
self[atom.name] = struct.unpack(">2H", data[18:22])
|
||||
|
||||
def __render_pair(self, key, value):
|
||||
track, total = value
|
||||
if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
|
||||
data = struct.pack(">4H", 0, track, total, 0)
|
||||
return self.__render_data(key, 0, data)
|
||||
else:
|
||||
raise M4AMetadataValueError("invalid numeric pair %r" % (value,))
|
||||
|
||||
def __render_pair_no_trailing(self, key, value):
|
||||
track, total = value
|
||||
if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
|
||||
data = struct.pack(">3H", 0, track, total)
|
||||
return self.__render_data(key, 0, data)
|
||||
else:
|
||||
raise M4AMetadataValueError("invalid numeric pair %r" % (value,))
|
||||
|
||||
def __parse_genre(self, atom, data):
|
||||
# Translate to a freeform genre.
|
||||
genre = cdata.short_be(data[16:18])
|
||||
if "\xa9gen" not in self:
|
||||
try:
|
||||
self["\xa9gen"] = GENRES[genre - 1]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
def __parse_tempo(self, atom, data):
|
||||
self[atom.name] = cdata.short_be(data[16:18])
|
||||
|
||||
def __render_tempo(self, key, value):
|
||||
if 0 <= value < 1 << 16:
|
||||
return self.__render_data(key, 0x15, cdata.to_ushort_be(value))
|
||||
else:
|
||||
raise M4AMetadataValueError("invalid short integer %r" % value)
|
||||
|
||||
def __parse_compilation(self, atom, data):
|
||||
try:
|
||||
self[atom.name] = bool(ord(data[16:17]))
|
||||
except TypeError:
|
||||
self[atom.name] = False
|
||||
|
||||
def __render_compilation(self, key, value):
|
||||
return self.__render_data(key, 0x15, chr(bool(value)))
|
||||
|
||||
def __parse_cover(self, atom, data):
|
||||
length, name, imageformat = struct.unpack(">I4sI", data[:12])
|
||||
if name != "data":
|
||||
raise M4AMetadataError(
|
||||
"unexpected atom %r inside 'covr'" % name)
|
||||
if imageformat not in (M4ACover.FORMAT_JPEG, M4ACover.FORMAT_PNG):
|
||||
imageformat = M4ACover.FORMAT_JPEG
|
||||
self[atom.name] = M4ACover(data[16:length], imageformat)
|
||||
|
||||
def __render_cover(self, key, value):
|
||||
try:
|
||||
imageformat = value.imageformat
|
||||
except AttributeError:
|
||||
imageformat = M4ACover.FORMAT_JPEG
|
||||
data = Atom.render("data", struct.pack(">2I", imageformat, 0) + value)
|
||||
return Atom.render(key, data)
|
||||
|
||||
def __parse_text(self, atom, data):
|
||||
flags = cdata.uint_be(data[8:12])
|
||||
if flags == 1:
|
||||
self[atom.name] = data[16:].decode('utf-8', 'replace')
|
||||
|
||||
def __render_text(self, key, value):
|
||||
return self.__render_data(key, 0x1, value.encode('utf-8'))
|
||||
|
||||
def delete(self, filename):
|
||||
self.clear()
|
||||
self.save(filename)
|
||||
|
||||
__atoms = {
|
||||
"----": (__parse_freeform, __render_freeform),
|
||||
"trkn": (__parse_pair, __render_pair),
|
||||
"disk": (__parse_pair, __render_pair_no_trailing),
|
||||
"gnre": (__parse_genre, None),
|
||||
"tmpo": (__parse_tempo, __render_tempo),
|
||||
"cpil": (__parse_compilation, __render_compilation),
|
||||
"covr": (__parse_cover, __render_cover),
|
||||
}
|
||||
|
||||
def pprint(self):
|
||||
values = []
|
||||
for key, value in self.iteritems():
|
||||
key = key.decode('latin1')
|
||||
try:
|
||||
values.append("%s=%s" % (key, value))
|
||||
except UnicodeDecodeError:
|
||||
values.append("%s=[%d bytes of data]" % (key, len(value)))
|
||||
return "\n".join(values)
|
||||
|
||||
|
||||
class M4AInfo(StreamInfo):
|
||||
"""MPEG-4 stream information.
|
||||
|
||||
Attributes:
|
||||
bitrate -- bitrate in bits per second, as an int
|
||||
length -- file length in seconds, as a float
|
||||
"""
|
||||
|
||||
bitrate = 0
|
||||
|
||||
def __init__(self, atoms, fileobj):
|
||||
hdlr = atoms["moov.trak.mdia.hdlr"]
|
||||
fileobj.seek(hdlr.offset)
|
||||
if "soun" not in fileobj.read(hdlr.length):
|
||||
raise M4AStreamInfoError("track has no audio data")
|
||||
|
||||
mdhd = atoms["moov.trak.mdia.mdhd"]
|
||||
fileobj.seek(mdhd.offset)
|
||||
data = fileobj.read(mdhd.length)
|
||||
if ord(data[8]) == 0:
|
||||
offset = 20
|
||||
fmt = ">2I"
|
||||
else:
|
||||
offset = 28
|
||||
fmt = ">IQ"
|
||||
end = offset + struct.calcsize(fmt)
|
||||
unit, length = struct.unpack(fmt, data[offset:end])
|
||||
self.length = float(length) / unit
|
||||
|
||||
try:
|
||||
atom = atoms["moov.trak.mdia.minf.stbl.stsd"]
|
||||
fileobj.seek(atom.offset)
|
||||
data = fileobj.read(atom.length)
|
||||
self.bitrate = cdata.uint_be(data[-17:-13])
|
||||
except (ValueError, KeyError):
|
||||
# Bitrate values are optional.
|
||||
pass
|
||||
|
||||
def pprint(self):
|
||||
return "MPEG-4 audio, %.2f seconds, %d bps" % (
|
||||
self.length, self.bitrate)
|
||||
|
||||
|
||||
class M4A(FileType):
|
||||
"""An MPEG-4 audio file, probably containing AAC.
|
||||
|
||||
If more than one track is present in the file, the first is used.
|
||||
Only audio ('soun') tracks will be read.
|
||||
"""
|
||||
|
||||
_mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"]
|
||||
|
||||
def load(self, filename):
|
||||
self.filename = filename
|
||||
fileobj = open(filename, "rb")
|
||||
try:
|
||||
atoms = Atoms(fileobj)
|
||||
try:
|
||||
self.info = M4AInfo(atoms, fileobj)
|
||||
except StandardError as err:
|
||||
reraise(M4AStreamInfoError, err, sys.exc_info()[2])
|
||||
try:
|
||||
self.tags = M4ATags(atoms, fileobj)
|
||||
except M4AMetadataError:
|
||||
self.tags = None
|
||||
except StandardError as err:
|
||||
reraise(M4AMetadataError, err, sys.exc_info()[2])
|
||||
finally:
|
||||
fileobj.close()
|
||||
|
||||
def add_tags(self):
|
||||
self.tags = M4ATags()
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return ("ftyp" in header) + ("mp4" in header)
|
||||
|
||||
|
||||
Open = M4A
|
||||
|
||||
|
||||
def delete(filename):
|
||||
"""Remove tags from a file."""
|
||||
|
||||
M4A(filename).delete()
|
||||
@@ -0,0 +1,86 @@
|
||||
# A Monkey's Audio (APE) reader/tagger
|
||||
#
|
||||
# Copyright 2006 Lukas Lalinsky <lalinsky@gmail.com>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Monkey's Audio streams with APEv2 tags.
|
||||
|
||||
Monkey's Audio is a very efficient lossless audio compressor developed
|
||||
by Matt Ashland.
|
||||
|
||||
For more information, see http://www.monkeysaudio.com/.
|
||||
"""
|
||||
|
||||
__all__ = ["MonkeysAudio", "Open", "delete"]
|
||||
|
||||
import struct
|
||||
|
||||
from ._compat import endswith
|
||||
from mutagen import StreamInfo
|
||||
from mutagen.apev2 import APEv2File, error, delete
|
||||
from mutagen._util import cdata
|
||||
|
||||
|
||||
class MonkeysAudioHeaderError(error):
|
||||
pass
|
||||
|
||||
|
||||
class MonkeysAudioInfo(StreamInfo):
|
||||
"""Monkey's Audio stream information.
|
||||
|
||||
Attributes:
|
||||
|
||||
* channels -- number of audio channels
|
||||
* length -- file length in seconds, as a float
|
||||
* sample_rate -- audio sampling rate in Hz
|
||||
* bits_per_sample -- bits per sample
|
||||
* version -- Monkey's Audio stream version, as a float (eg: 3.99)
|
||||
"""
|
||||
|
||||
def __init__(self, fileobj):
|
||||
header = fileobj.read(76)
|
||||
if len(header) != 76 or not header.startswith(b"MAC "):
|
||||
raise MonkeysAudioHeaderError("not a Monkey's Audio file")
|
||||
self.version = cdata.ushort_le(header[4:6])
|
||||
if self.version >= 3980:
|
||||
(blocks_per_frame, final_frame_blocks, total_frames,
|
||||
self.bits_per_sample, self.channels,
|
||||
self.sample_rate) = struct.unpack("<IIIHHI", header[56:76])
|
||||
else:
|
||||
compression_level = cdata.ushort_le(header[6:8])
|
||||
self.channels, self.sample_rate = struct.unpack(
|
||||
"<HI", header[10:16])
|
||||
total_frames, final_frame_blocks = struct.unpack(
|
||||
"<II", header[24:32])
|
||||
if self.version >= 3950:
|
||||
blocks_per_frame = 73728 * 4
|
||||
elif self.version >= 3900 or (self.version >= 3800 and
|
||||
compression_level == 4):
|
||||
blocks_per_frame = 73728
|
||||
else:
|
||||
blocks_per_frame = 9216
|
||||
self.version /= 1000.0
|
||||
self.length = 0.0
|
||||
if self.sample_rate != 0 and total_frames > 0:
|
||||
total_blocks = ((total_frames - 1) * blocks_per_frame +
|
||||
final_frame_blocks)
|
||||
self.length = float(total_blocks) / self.sample_rate
|
||||
|
||||
def pprint(self):
|
||||
return "Monkey's Audio %.2f, %.2f seconds, %d Hz" % (
|
||||
self.version, self.length, self.sample_rate)
|
||||
|
||||
|
||||
class MonkeysAudio(APEv2File):
|
||||
_Info = MonkeysAudioInfo
|
||||
_mimes = ["audio/ape", "audio/x-ape"]
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return header.startswith(b"MAC ") + endswith(filename.lower(), ".ape")
|
||||
|
||||
|
||||
Open = MonkeysAudio
|
||||
@@ -0,0 +1,283 @@
|
||||
# MP3 stream header information support for Mutagen.
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""MPEG audio stream information and tags."""
|
||||
|
||||
import os
|
||||
import struct
|
||||
|
||||
from ._compat import endswith
|
||||
from mutagen import StreamInfo
|
||||
from mutagen.id3 import ID3FileType, BitPaddedInt, delete
|
||||
|
||||
__all__ = ["MP3", "Open", "delete", "MP3"]
|
||||
|
||||
|
||||
class error(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class HeaderNotFoundError(error, IOError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidMPEGHeader(error, IOError):
|
||||
pass
|
||||
|
||||
|
||||
# Mode values.
|
||||
STEREO, JOINTSTEREO, DUALCHANNEL, MONO = range(4)
|
||||
|
||||
|
||||
class MPEGInfo(StreamInfo):
|
||||
"""MPEG audio stream information
|
||||
|
||||
Parse information about an MPEG audio file. This also reads the
|
||||
Xing VBR header format.
|
||||
|
||||
This code was implemented based on the format documentation at
|
||||
http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm.
|
||||
|
||||
Useful attributes:
|
||||
|
||||
* length -- audio length, in seconds
|
||||
* bitrate -- audio bitrate, in bits per second
|
||||
* sketchy -- if true, the file may not be valid MPEG audio
|
||||
|
||||
Useless attributes:
|
||||
|
||||
* version -- MPEG version (1, 2, 2.5)
|
||||
* layer -- 1, 2, or 3
|
||||
* mode -- One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3)
|
||||
* protected -- whether or not the file is "protected"
|
||||
* padding -- whether or not audio frames are padded
|
||||
* sample_rate -- audio sample rate, in Hz
|
||||
"""
|
||||
|
||||
# Map (version, layer) tuples to bitrates.
|
||||
__BITRATE = {
|
||||
(1, 1): range(0, 480, 32),
|
||||
(1, 2): [0, 32, 48, 56, 64, 80, 96, 112, 128,
|
||||
160, 192, 224, 256, 320, 384],
|
||||
(1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112,
|
||||
128, 160, 192, 224, 256, 320],
|
||||
(2, 1): [0, 32, 48, 56, 64, 80, 96, 112, 128,
|
||||
144, 160, 176, 192, 224, 256],
|
||||
(2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64,
|
||||
80, 96, 112, 128, 144, 160],
|
||||
}
|
||||
|
||||
__BITRATE[(2, 3)] = __BITRATE[(2, 2)]
|
||||
for i in range(1, 4):
|
||||
__BITRATE[(2.5, i)] = __BITRATE[(2, i)]
|
||||
|
||||
# Map version to sample rates.
|
||||
__RATES = {
|
||||
1: [44100, 48000, 32000],
|
||||
2: [22050, 24000, 16000],
|
||||
2.5: [11025, 12000, 8000]
|
||||
}
|
||||
|
||||
sketchy = False
|
||||
|
||||
def __init__(self, fileobj, offset=None):
|
||||
"""Parse MPEG stream information from a file-like object.
|
||||
|
||||
If an offset argument is given, it is used to start looking
|
||||
for stream information and Xing headers; otherwise, ID3v2 tags
|
||||
will be skipped automatically. A correct offset can make
|
||||
loading files significantly faster.
|
||||
"""
|
||||
|
||||
try:
|
||||
size = os.path.getsize(fileobj.name)
|
||||
except (IOError, OSError, AttributeError):
|
||||
fileobj.seek(0, 2)
|
||||
size = fileobj.tell()
|
||||
|
||||
# If we don't get an offset, try to skip an ID3v2 tag.
|
||||
if offset is None:
|
||||
fileobj.seek(0, 0)
|
||||
idata = fileobj.read(10)
|
||||
try:
|
||||
id3, insize = struct.unpack('>3sxxx4s', idata)
|
||||
except struct.error:
|
||||
id3, insize = '', 0
|
||||
insize = BitPaddedInt(insize)
|
||||
if id3 == b'ID3' and insize > 0:
|
||||
offset = insize + 10
|
||||
else:
|
||||
offset = 0
|
||||
|
||||
# Try to find two valid headers (meaning, very likely MPEG data)
|
||||
# at the given offset, 30% through the file, 60% through the file,
|
||||
# and 90% through the file.
|
||||
for i in [offset, 0.3 * size, 0.6 * size, 0.9 * size]:
|
||||
try:
|
||||
self.__try(fileobj, int(i), size - offset)
|
||||
except error:
|
||||
pass
|
||||
else:
|
||||
break
|
||||
# If we can't find any two consecutive frames, try to find just
|
||||
# one frame back at the original offset given.
|
||||
else:
|
||||
self.__try(fileobj, offset, size - offset, False)
|
||||
self.sketchy = True
|
||||
|
||||
def __try(self, fileobj, offset, real_size, check_second=True):
|
||||
# This is going to be one really long function; bear with it,
|
||||
# because there's not really a sane point to cut it up.
|
||||
fileobj.seek(offset, 0)
|
||||
|
||||
# We "know" we have an MPEG file if we find two frames that look like
|
||||
# valid MPEG data. If we can't find them in 32k of reads, something
|
||||
# is horribly wrong (the longest frame can only be about 4k). This
|
||||
# is assuming the offset didn't lie.
|
||||
data = fileobj.read(32768)
|
||||
|
||||
frame_1 = data.find(b"\xff")
|
||||
while 0 <= frame_1 <= len(data) - 4:
|
||||
frame_data = struct.unpack(">I", data[frame_1:frame_1 + 4])[0]
|
||||
if (frame_data >> 16) & 0xE0 != 0xE0:
|
||||
frame_1 = data.find(b"\xff", frame_1 + 2)
|
||||
else:
|
||||
version = (frame_data >> 19) & 0x3
|
||||
layer = (frame_data >> 17) & 0x3
|
||||
protection = (frame_data >> 16) & 0x1
|
||||
bitrate = (frame_data >> 12) & 0xF
|
||||
sample_rate = (frame_data >> 10) & 0x3
|
||||
padding = (frame_data >> 9) & 0x1
|
||||
#private = (frame_data >> 8) & 0x1
|
||||
self.mode = (frame_data >> 6) & 0x3
|
||||
#mode_extension = (frame_data >> 4) & 0x3
|
||||
#copyright = (frame_data >> 3) & 0x1
|
||||
#original = (frame_data >> 2) & 0x1
|
||||
#emphasis = (frame_data >> 0) & 0x3
|
||||
if (version == 1 or layer == 0 or sample_rate == 0x3 or
|
||||
bitrate == 0 or bitrate == 0xF):
|
||||
frame_1 = data.find(b"\xff", frame_1 + 2)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
raise HeaderNotFoundError("can't sync to an MPEG frame")
|
||||
|
||||
# There is a serious problem here, which is that many flags
|
||||
# in an MPEG header are backwards.
|
||||
self.version = [2.5, None, 2, 1][version]
|
||||
self.layer = 4 - layer
|
||||
self.protected = not protection
|
||||
self.padding = bool(padding)
|
||||
|
||||
self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate]
|
||||
self.bitrate *= 1000
|
||||
self.sample_rate = self.__RATES[self.version][sample_rate]
|
||||
|
||||
if self.layer == 1:
|
||||
frame_length = (12 * self.bitrate / self.sample_rate + padding) * 4
|
||||
frame_size = 384
|
||||
elif self.version >= 2 and self.layer == 3:
|
||||
frame_length = 72 * self.bitrate / self.sample_rate + padding
|
||||
frame_size = 576
|
||||
else:
|
||||
frame_length = 144 * self.bitrate / self.sample_rate + padding
|
||||
frame_size = 1152
|
||||
|
||||
if check_second:
|
||||
possible = int(frame_1 + frame_length)
|
||||
if possible > len(data) + 4:
|
||||
raise HeaderNotFoundError("can't sync to second MPEG frame")
|
||||
try:
|
||||
frame_data = struct.unpack(
|
||||
">H", data[possible:possible + 2])[0]
|
||||
except struct.error:
|
||||
raise HeaderNotFoundError("can't sync to second MPEG frame")
|
||||
if frame_data & 0xFFE0 != 0xFFE0:
|
||||
raise HeaderNotFoundError("can't sync to second MPEG frame")
|
||||
|
||||
self.length = 8 * real_size / float(self.bitrate)
|
||||
|
||||
# Try to find/parse the Xing header, which trumps the above length
|
||||
# and bitrate calculation.
|
||||
fileobj.seek(offset, 0)
|
||||
data = fileobj.read(32768)
|
||||
try:
|
||||
xing = data[:-4].index(b"Xing")
|
||||
except ValueError:
|
||||
# Try to find/parse the VBRI header, which trumps the above length
|
||||
# calculation.
|
||||
try:
|
||||
vbri = data[:-24].index(b"VBRI")
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
# If a VBRI header was found, this is definitely MPEG audio.
|
||||
self.sketchy = False
|
||||
vbri_version = struct.unpack('>H', data[vbri + 4:vbri + 6])[0]
|
||||
if vbri_version == 1:
|
||||
frame_count = struct.unpack(
|
||||
'>I', data[vbri + 14:vbri + 18])[0]
|
||||
samples = float(frame_size * frame_count)
|
||||
self.length = (samples / self.sample_rate) or self.length
|
||||
else:
|
||||
# If a Xing header was found, this is definitely MPEG audio.
|
||||
self.sketchy = False
|
||||
flags = struct.unpack('>I', data[xing + 4:xing + 8])[0]
|
||||
if flags & 0x1:
|
||||
frame_count = struct.unpack('>I', data[xing + 8:xing + 12])[0]
|
||||
samples = float(frame_size * frame_count)
|
||||
self.length = (samples / self.sample_rate) or self.length
|
||||
if flags & 0x2:
|
||||
bytes = struct.unpack('>I', data[xing + 12:xing + 16])[0]
|
||||
self.bitrate = int((bytes * 8) // self.length)
|
||||
|
||||
def pprint(self):
|
||||
s = "MPEG %s layer %d, %d bps, %s Hz, %.2f seconds" % (
|
||||
self.version, self.layer, self.bitrate, self.sample_rate,
|
||||
self.length)
|
||||
if self.sketchy:
|
||||
s += " (sketchy)"
|
||||
return s
|
||||
|
||||
|
||||
class MP3(ID3FileType):
|
||||
"""An MPEG audio (usually MPEG-1 Layer 3) file.
|
||||
|
||||
:ivar info: :class:`MPEGInfo`
|
||||
:ivar tags: :class:`ID3 <mutagen.id3.ID3>`
|
||||
"""
|
||||
|
||||
_Info = MPEGInfo
|
||||
|
||||
_mimes = ["audio/mpeg", "audio/mpg", "audio/x-mpeg"]
|
||||
|
||||
@property
|
||||
def mime(self):
|
||||
l = self.info.layer
|
||||
return ["audio/mp%d" % l, "audio/x-mp%d" % l] + super(MP3, self).mime
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
filename = filename.lower()
|
||||
|
||||
return (header.startswith(b"ID3") * 2 + endswith(filename, b".mp3") +
|
||||
endswith(filename, b".mp2") + endswith(filename, b".mpg") +
|
||||
endswith(filename, b".mpeg"))
|
||||
|
||||
|
||||
Open = MP3
|
||||
|
||||
|
||||
class EasyMP3(MP3):
|
||||
"""Like MP3, but uses EasyID3 for tags.
|
||||
|
||||
:ivar info: :class:`MPEGInfo`
|
||||
:ivar tags: :class:`EasyID3 <mutagen.easyid3.EasyID3>`
|
||||
"""
|
||||
|
||||
from mutagen.easyid3 import EasyID3 as ID3
|
||||
ID3 = ID3
|
||||
@@ -0,0 +1,837 @@
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Read and write MPEG-4 audio files with iTunes metadata.
|
||||
|
||||
This module will read MPEG-4 audio information and metadata,
|
||||
as found in Apple's MP4 (aka M4A, M4B, M4P) files.
|
||||
|
||||
There is no official specification for this format. The source code
|
||||
for TagLib, FAAD, and various MPEG specifications at
|
||||
|
||||
* http://developer.apple.com/documentation/QuickTime/QTFF/
|
||||
* http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt
|
||||
* http://standards.iso.org/ittf/PubliclyAvailableStandards/\
|
||||
c041828_ISO_IEC_14496-12_2005(E).zip
|
||||
* http://wiki.multimedia.cx/index.php?title=Apple_QuickTime
|
||||
|
||||
were all consulted.
|
||||
"""
|
||||
|
||||
import struct
|
||||
import sys
|
||||
|
||||
from mutagen import FileType, Metadata, StreamInfo
|
||||
from mutagen._constants import GENRES
|
||||
from mutagen._util import cdata, insert_bytes, DictProxy, utf8
|
||||
from mutagen._compat import reraise, PY2, string_types, text_type, chr_
|
||||
|
||||
|
||||
class error(IOError):
|
||||
pass
|
||||
|
||||
|
||||
class MP4MetadataError(error):
|
||||
pass
|
||||
|
||||
|
||||
class MP4StreamInfoError(error):
|
||||
pass
|
||||
|
||||
|
||||
class MP4MetadataValueError(ValueError, MP4MetadataError):
|
||||
pass
|
||||
|
||||
|
||||
# This is not an exhaustive list of container atoms, but just the
|
||||
# ones this module needs to peek inside.
|
||||
_CONTAINERS = [b"moov", b"udta", b"trak", b"mdia", b"meta", b"ilst",
|
||||
b"stbl", b"minf", b"moof", b"traf"]
|
||||
_SKIP_SIZE = {b"meta": 4}
|
||||
|
||||
__all__ = ['MP4', 'Open', 'delete', 'MP4Cover', 'MP4FreeForm']
|
||||
|
||||
|
||||
class MP4Cover(bytes):
|
||||
"""A cover artwork.
|
||||
|
||||
Attributes:
|
||||
|
||||
* imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG)
|
||||
"""
|
||||
FORMAT_JPEG = 0x0D
|
||||
FORMAT_PNG = 0x0E
|
||||
|
||||
def __new__(cls, data, *args, **kwargs):
|
||||
return bytes.__new__(cls, data)
|
||||
|
||||
def __init__(self, data, imageformat=FORMAT_JPEG):
|
||||
self.imageformat = imageformat
|
||||
try:
|
||||
self.format
|
||||
except AttributeError:
|
||||
self.format = imageformat
|
||||
|
||||
|
||||
class MP4FreeForm(bytes):
|
||||
"""A freeform value.
|
||||
|
||||
Attributes:
|
||||
|
||||
* dataformat -- format of the data (either FORMAT_TEXT or FORMAT_DATA)
|
||||
"""
|
||||
|
||||
FORMAT_DATA = 0x0
|
||||
FORMAT_TEXT = 0x1
|
||||
|
||||
def __new__(cls, data, *args, **kwargs):
|
||||
return bytes.__new__(cls, data)
|
||||
|
||||
def __init__(self, data, dataformat=FORMAT_TEXT):
|
||||
self.dataformat = dataformat
|
||||
|
||||
|
||||
class Atom(object):
|
||||
"""An individual atom.
|
||||
|
||||
Attributes:
|
||||
children -- list child atoms (or None for non-container atoms)
|
||||
length -- length of this atom, including length and name
|
||||
name -- four byte name of the atom, as a str
|
||||
offset -- location in the constructor-given fileobj of this atom
|
||||
|
||||
This structure should only be used internally by Mutagen.
|
||||
"""
|
||||
|
||||
children = None
|
||||
|
||||
def __init__(self, fileobj, level=0):
|
||||
self.offset = fileobj.tell()
|
||||
self.length, self.name = struct.unpack(">I4s", fileobj.read(8))
|
||||
if self.length == 1:
|
||||
self.length, = struct.unpack(">Q", fileobj.read(8))
|
||||
if self.length < 16:
|
||||
raise MP4MetadataError(
|
||||
"64 bit atom length can only be 16 and higher")
|
||||
elif self.length == 0:
|
||||
if level != 0:
|
||||
raise MP4MetadataError(
|
||||
"only a top-level atom can have zero length")
|
||||
# Only the last atom is supposed to have a zero-length, meaning it
|
||||
# extends to the end of file.
|
||||
fileobj.seek(0, 2)
|
||||
self.length = fileobj.tell() - self.offset
|
||||
fileobj.seek(self.offset + 8, 0)
|
||||
elif self.length < 8:
|
||||
raise MP4MetadataError(
|
||||
"atom length can only be 0, 1 or 8 and higher")
|
||||
|
||||
if self.name in _CONTAINERS:
|
||||
self.children = []
|
||||
fileobj.seek(_SKIP_SIZE.get(self.name, 0), 1)
|
||||
while fileobj.tell() < self.offset + self.length:
|
||||
self.children.append(Atom(fileobj, level + 1))
|
||||
else:
|
||||
fileobj.seek(self.offset + self.length, 0)
|
||||
|
||||
@staticmethod
|
||||
def render(name, data):
|
||||
"""Render raw atom data."""
|
||||
# this raises OverflowError if Py_ssize_t can't handle the atom data
|
||||
size = len(data) + 8
|
||||
if size <= 0xFFFFFFFF:
|
||||
return struct.pack(">I4s", size, name) + data
|
||||
else:
|
||||
return struct.pack(">I4sQ", 1, name, size + 8) + data
|
||||
|
||||
def findall(self, name, recursive=False):
|
||||
"""Recursively find all child atoms by specified name."""
|
||||
if self.children is not None:
|
||||
for child in self.children:
|
||||
if child.name == name:
|
||||
yield child
|
||||
if recursive:
|
||||
for atom in child.findall(name, True):
|
||||
yield atom
|
||||
|
||||
def __getitem__(self, remaining):
|
||||
"""Look up a child atom, potentially recursively.
|
||||
|
||||
e.g. atom['udta', 'meta'] => <Atom name='meta' ...>
|
||||
"""
|
||||
if not remaining:
|
||||
return self
|
||||
elif self.children is None:
|
||||
raise KeyError("%r is not a container" % self.name)
|
||||
for child in self.children:
|
||||
if child.name == remaining[0]:
|
||||
return child[remaining[1:]]
|
||||
else:
|
||||
raise KeyError("%r not found" % remaining[0])
|
||||
|
||||
def __repr__(self):
|
||||
klass = self.__class__.__name__
|
||||
if self.children is None:
|
||||
return "<%s name=%r length=%r offset=%r>" % (
|
||||
klass, self.name, self.length, self.offset)
|
||||
else:
|
||||
children = "\n".join([" " + line for child in self.children
|
||||
for line in repr(child).splitlines()])
|
||||
return "<%s name=%r length=%r offset=%r\n%s>" % (
|
||||
klass, self.name, self.length, self.offset, children)
|
||||
|
||||
|
||||
class Atoms(object):
|
||||
"""Root atoms in a given file.
|
||||
|
||||
Attributes:
|
||||
atoms -- a list of top-level atoms as Atom objects
|
||||
|
||||
This structure should only be used internally by Mutagen.
|
||||
"""
|
||||
|
||||
def __init__(self, fileobj):
|
||||
self.atoms = []
|
||||
fileobj.seek(0, 2)
|
||||
end = fileobj.tell()
|
||||
fileobj.seek(0)
|
||||
while fileobj.tell() + 8 <= end:
|
||||
self.atoms.append(Atom(fileobj))
|
||||
|
||||
def path(self, *names):
|
||||
"""Look up and return the complete path of an atom.
|
||||
|
||||
For example, atoms.path('moov', 'udta', 'meta') will return a
|
||||
list of three atoms, corresponding to the moov, udta, and meta
|
||||
atoms.
|
||||
"""
|
||||
|
||||
path = [self]
|
||||
for name in names:
|
||||
path.append(path[-1][name, ])
|
||||
return path[1:]
|
||||
|
||||
def __contains__(self, names):
|
||||
try:
|
||||
self[names]
|
||||
except KeyError:
|
||||
return False
|
||||
return True
|
||||
|
||||
def __getitem__(self, names):
|
||||
"""Look up a child atom.
|
||||
|
||||
'names' may be a list of atoms (['moov', 'udta']) or a string
|
||||
specifying the complete path ('moov.udta').
|
||||
"""
|
||||
|
||||
if PY2:
|
||||
if isinstance(names, basestring):
|
||||
names = names.split(b".")
|
||||
else:
|
||||
if isinstance(names, bytes):
|
||||
names = names.split(b".")
|
||||
|
||||
for child in self.atoms:
|
||||
if child.name == names[0]:
|
||||
return child[names[1:]]
|
||||
else:
|
||||
raise KeyError("%s not found" % names[0])
|
||||
|
||||
def __repr__(self):
|
||||
return "\n".join([repr(child) for child in self.atoms])
|
||||
|
||||
|
||||
class MP4Tags(DictProxy, Metadata):
|
||||
r"""Dictionary containing Apple iTunes metadata list key/values.
|
||||
|
||||
Keys are four byte identifiers, except for freeform ('----')
|
||||
keys. Values are usually unicode strings, but some atoms have a
|
||||
special structure:
|
||||
|
||||
Text values (multiple values per key are supported):
|
||||
|
||||
* '\\xa9nam' -- track title
|
||||
* '\\xa9alb' -- album
|
||||
* '\\xa9ART' -- artist
|
||||
* 'aART' -- album artist
|
||||
* '\\xa9wrt' -- composer
|
||||
* '\\xa9day' -- year
|
||||
* '\\xa9cmt' -- comment
|
||||
* 'desc' -- description (usually used in podcasts)
|
||||
* 'purd' -- purchase date
|
||||
* '\\xa9grp' -- grouping
|
||||
* '\\xa9gen' -- genre
|
||||
* '\\xa9lyr' -- lyrics
|
||||
* 'purl' -- podcast URL
|
||||
* 'egid' -- podcast episode GUID
|
||||
* 'catg' -- podcast category
|
||||
* 'keyw' -- podcast keywords
|
||||
* '\\xa9too' -- encoded by
|
||||
* 'cprt' -- copyright
|
||||
* 'soal' -- album sort order
|
||||
* 'soaa' -- album artist sort order
|
||||
* 'soar' -- artist sort order
|
||||
* 'sonm' -- title sort order
|
||||
* 'soco' -- composer sort order
|
||||
* 'sosn' -- show sort order
|
||||
* 'tvsh' -- show name
|
||||
|
||||
Boolean values:
|
||||
|
||||
* 'cpil' -- part of a compilation
|
||||
* 'pgap' -- part of a gapless album
|
||||
* 'pcst' -- podcast (iTunes reads this only on import)
|
||||
|
||||
Tuples of ints (multiple values per key are supported):
|
||||
|
||||
* 'trkn' -- track number, total tracks
|
||||
* 'disk' -- disc number, total discs
|
||||
|
||||
Others:
|
||||
|
||||
* 'tmpo' -- tempo/BPM, 16 bit int
|
||||
* 'covr' -- cover artwork, list of MP4Cover objects (which are
|
||||
tagged strs)
|
||||
* 'gnre' -- ID3v1 genre. Not supported, use '\\xa9gen' instead.
|
||||
|
||||
The freeform '----' frames use a key in the format '----:mean:name'
|
||||
where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique
|
||||
identifier for this frame. The value is a str, but is probably
|
||||
text that can be decoded as UTF-8. Multiple values per key are
|
||||
supported.
|
||||
|
||||
MP4 tag data cannot exist outside of the structure of an MP4 file,
|
||||
so this class should not be manually instantiated.
|
||||
|
||||
Unknown non-text tags are removed.
|
||||
"""
|
||||
|
||||
def load(self, atoms, fileobj):
|
||||
try:
|
||||
ilst = atoms[b"moov.udta.meta.ilst"]
|
||||
except KeyError as key:
|
||||
raise MP4MetadataError(key)
|
||||
for atom in ilst.children:
|
||||
fileobj.seek(atom.offset + 8)
|
||||
data = fileobj.read(atom.length - 8)
|
||||
if len(data) != atom.length - 8:
|
||||
raise MP4MetadataError("Not enough data")
|
||||
|
||||
if atom.name in self.__atoms:
|
||||
info = self.__atoms[atom.name]
|
||||
info[0](self, atom, data, *info[2:])
|
||||
else:
|
||||
# unknown atom, try as text and skip if it fails
|
||||
# FIXME: keep them somehow
|
||||
try:
|
||||
self.__parse_text(atom, data)
|
||||
except MP4MetadataError:
|
||||
continue
|
||||
|
||||
@classmethod
|
||||
def _can_load(cls, atoms):
|
||||
return b"moov.udta.meta.ilst" in atoms
|
||||
|
||||
@staticmethod
|
||||
def __key_sort(item):
|
||||
(key, v) = item
|
||||
# iTunes always writes the tags in order of "relevance", try
|
||||
# to copy it as closely as possible.
|
||||
order = [b"\xa9nam", b"\xa9ART", b"\xa9wrt", b"\xa9alb",
|
||||
b"\xa9gen", b"gnre", b"trkn", b"disk",
|
||||
b"\xa9day", b"cpil", b"pgap", b"pcst", b"tmpo",
|
||||
b"\xa9too", b"----", b"covr", b"\xa9lyr"]
|
||||
order = dict(zip(order, range(len(order))))
|
||||
last = len(order)
|
||||
# If there's no key-based way to distinguish, order by length.
|
||||
# If there's still no way, go by string comparison on the
|
||||
# values, so we at least have something determinstic.
|
||||
return (order.get(key[:4], last), len(repr(v)), repr(v))
|
||||
|
||||
def save(self, filename):
|
||||
"""Save the metadata to the given filename."""
|
||||
|
||||
values = []
|
||||
items = self.items()
|
||||
items.sort(key=self.__key_sort)
|
||||
for key, value in items:
|
||||
|
||||
if not PY2 and not isinstance(key, bytes):
|
||||
raise MP4MetadataValueError("keys have to be bytes")
|
||||
|
||||
info = self.__atoms.get(key[:4], (None, type(self).__render_text))
|
||||
try:
|
||||
values.append(info[1](self, key, value, *info[2:]))
|
||||
except (TypeError, ValueError) as s:
|
||||
reraise(MP4MetadataValueError, s, sys.exc_info()[2])
|
||||
data = Atom.render(b"ilst", b"".join(values))
|
||||
|
||||
# Find the old atoms.
|
||||
fileobj = open(filename, "rb+")
|
||||
try:
|
||||
atoms = Atoms(fileobj)
|
||||
try:
|
||||
path = atoms.path(b"moov", b"udta", b"meta", b"ilst")
|
||||
except KeyError:
|
||||
self.__save_new(fileobj, atoms, data)
|
||||
else:
|
||||
self.__save_existing(fileobj, atoms, path, data)
|
||||
finally:
|
||||
fileobj.close()
|
||||
|
||||
def __pad_ilst(self, data, length=None):
|
||||
if length is None:
|
||||
length = ((len(data) + 1023) & ~1023) - len(data)
|
||||
return Atom.render(b"free", b"\x00" * length)
|
||||
|
||||
def __save_new(self, fileobj, atoms, ilst):
|
||||
hdlr = Atom.render(b"hdlr", b"\x00" * 8 + b"mdirappl" + b"\x00" * 9)
|
||||
meta = Atom.render(
|
||||
b"meta", b"\x00\x00\x00\x00" + hdlr + ilst + self.__pad_ilst(ilst))
|
||||
try:
|
||||
path = atoms.path(b"moov", b"udta")
|
||||
except KeyError:
|
||||
# moov.udta not found -- create one
|
||||
path = atoms.path(b"moov")
|
||||
meta = Atom.render(b"udta", meta)
|
||||
offset = path[-1].offset + 8
|
||||
insert_bytes(fileobj, len(meta), offset)
|
||||
fileobj.seek(offset)
|
||||
fileobj.write(meta)
|
||||
self.__update_parents(fileobj, path, len(meta))
|
||||
self.__update_offsets(fileobj, atoms, len(meta), offset)
|
||||
|
||||
def __save_existing(self, fileobj, atoms, path, data):
|
||||
# Replace the old ilst atom.
|
||||
ilst = path.pop()
|
||||
offset = ilst.offset
|
||||
length = ilst.length
|
||||
|
||||
# Check for padding "free" atoms
|
||||
meta = path[-1]
|
||||
index = meta.children.index(ilst)
|
||||
try:
|
||||
prev = meta.children[index-1]
|
||||
if prev.name == b"free":
|
||||
offset = prev.offset
|
||||
length += prev.length
|
||||
except IndexError:
|
||||
pass
|
||||
try:
|
||||
next = meta.children[index+1]
|
||||
if next.name == b"free":
|
||||
length += next.length
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
delta = len(data) - length
|
||||
if delta > 0 or (delta < 0 and delta > -8):
|
||||
data += self.__pad_ilst(data)
|
||||
delta = len(data) - length
|
||||
insert_bytes(fileobj, delta, offset)
|
||||
elif delta < 0:
|
||||
data += self.__pad_ilst(data, -delta - 8)
|
||||
delta = 0
|
||||
|
||||
fileobj.seek(offset)
|
||||
fileobj.write(data)
|
||||
self.__update_parents(fileobj, path, delta)
|
||||
self.__update_offsets(fileobj, atoms, delta, offset)
|
||||
|
||||
def __update_parents(self, fileobj, path, delta):
|
||||
"""Update all parent atoms with the new size."""
|
||||
for atom in path:
|
||||
fileobj.seek(atom.offset)
|
||||
size = cdata.uint_be(fileobj.read(4))
|
||||
if size == 1: # 64bit
|
||||
# skip name (4B) and read size (8B)
|
||||
size = cdata.ulonglong_be(fileobj.read(12)[4:])
|
||||
fileobj.seek(atom.offset + 8)
|
||||
fileobj.write(cdata.to_ulonglong_be(size + delta))
|
||||
else: # 32bit
|
||||
fileobj.seek(atom.offset)
|
||||
fileobj.write(cdata.to_uint_be(size + delta))
|
||||
|
||||
def __update_offset_table(self, fileobj, fmt, atom, delta, offset):
|
||||
"""Update offset table in the specified atom."""
|
||||
if atom.offset > offset:
|
||||
atom.offset += delta
|
||||
fileobj.seek(atom.offset + 12)
|
||||
data = fileobj.read(atom.length - 12)
|
||||
fmt = fmt % cdata.uint_be(data[:4])
|
||||
offsets = struct.unpack(fmt, data[4:])
|
||||
offsets = [o + (0, delta)[offset < o] for o in offsets]
|
||||
fileobj.seek(atom.offset + 16)
|
||||
fileobj.write(struct.pack(fmt, *offsets))
|
||||
|
||||
def __update_tfhd(self, fileobj, atom, delta, offset):
|
||||
if atom.offset > offset:
|
||||
atom.offset += delta
|
||||
fileobj.seek(atom.offset + 9)
|
||||
data = fileobj.read(atom.length - 9)
|
||||
flags = cdata.uint_be(b"\x00" + data[:3])
|
||||
if flags & 1:
|
||||
o = cdata.ulonglong_be(data[7:15])
|
||||
if o > offset:
|
||||
o += delta
|
||||
fileobj.seek(atom.offset + 16)
|
||||
fileobj.write(cdata.to_ulonglong_be(o))
|
||||
|
||||
def __update_offsets(self, fileobj, atoms, delta, offset):
|
||||
"""Update offset tables in all 'stco' and 'co64' atoms."""
|
||||
if delta == 0:
|
||||
return
|
||||
moov = atoms[b"moov"]
|
||||
for atom in moov.findall(b'stco', True):
|
||||
self.__update_offset_table(fileobj, ">%dI", atom, delta, offset)
|
||||
for atom in moov.findall(b'co64', True):
|
||||
self.__update_offset_table(fileobj, ">%dQ", atom, delta, offset)
|
||||
try:
|
||||
for atom in atoms[b"moof"].findall(b'tfhd', True):
|
||||
self.__update_tfhd(fileobj, atom, delta, offset)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def __parse_data(self, atom, data):
|
||||
pos = 0
|
||||
while pos < atom.length - 8:
|
||||
length, name, flags = struct.unpack(">I4sI", data[pos:pos+12])
|
||||
if name != b"data":
|
||||
raise MP4MetadataError(
|
||||
"unexpected atom %r inside %r" % (name, atom.name))
|
||||
yield flags, data[pos+16:pos+length]
|
||||
pos += length
|
||||
|
||||
def __render_data(self, key, flags, value):
|
||||
return Atom.render(key, b"".join([
|
||||
Atom.render(b"data", struct.pack(">2I", flags, 0) + data)
|
||||
for data in value]))
|
||||
|
||||
def __parse_freeform(self, atom, data):
|
||||
length = cdata.uint_be(data[:4])
|
||||
mean = data[12:length]
|
||||
pos = length
|
||||
length = cdata.uint_be(data[pos:pos+4])
|
||||
name = data[pos+12:pos+length]
|
||||
pos += length
|
||||
value = []
|
||||
while pos < atom.length - 8:
|
||||
length, atom_name = struct.unpack(">I4s", data[pos:pos+8])
|
||||
if atom_name != b"data":
|
||||
raise MP4MetadataError(
|
||||
"unexpected atom %r inside %r" % (atom_name, atom.name))
|
||||
|
||||
version = ord(data[pos+8:pos+8+1])
|
||||
if version != 0:
|
||||
raise MP4MetadataError("Unsupported version: %r" % version)
|
||||
|
||||
flags = struct.unpack(">I", b"\x00" + data[pos+9:pos+12])[0]
|
||||
value.append(MP4FreeForm(data[pos+16:pos+length],
|
||||
dataformat=flags))
|
||||
pos += length
|
||||
if value:
|
||||
self[atom.name + b":" + mean + b":" + name] = value
|
||||
|
||||
def __render_freeform(self, key, value):
|
||||
dummy, mean, name = key.split(b":", 2)
|
||||
mean = struct.pack(">I4sI", len(mean) + 12, b"mean", 0) + mean
|
||||
name = struct.pack(">I4sI", len(name) + 12, b"name", 0) + name
|
||||
if isinstance(value, bytes):
|
||||
value = [value]
|
||||
data = b""
|
||||
for v in value:
|
||||
flags = MP4FreeForm.FORMAT_TEXT
|
||||
if isinstance(v, MP4FreeForm):
|
||||
flags = v.dataformat
|
||||
data += struct.pack(">I4s2I", len(v) + 16, b"data", flags, 0)
|
||||
data += v
|
||||
return Atom.render(b"----", mean + name + data)
|
||||
|
||||
def __parse_pair(self, atom, data):
|
||||
self[atom.name] = [struct.unpack(">2H", d[2:6]) for
|
||||
flags, d in self.__parse_data(atom, data)]
|
||||
|
||||
def __render_pair(self, key, value):
|
||||
data = []
|
||||
for (track, total) in value:
|
||||
if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
|
||||
data.append(struct.pack(">4H", 0, track, total, 0))
|
||||
else:
|
||||
raise MP4MetadataValueError(
|
||||
"invalid numeric pair %r" % ((track, total),))
|
||||
return self.__render_data(key, 0, data)
|
||||
|
||||
def __render_pair_no_trailing(self, key, value):
|
||||
data = []
|
||||
for (track, total) in value:
|
||||
if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
|
||||
data.append(struct.pack(">3H", 0, track, total))
|
||||
else:
|
||||
raise MP4MetadataValueError(
|
||||
"invalid numeric pair %r" % ((track, total),))
|
||||
return self.__render_data(key, 0, data)
|
||||
|
||||
def __parse_genre(self, atom, data):
|
||||
# Translate to a freeform genre.
|
||||
genre = cdata.short_be(data[16:18])
|
||||
if b"\xa9gen" not in self:
|
||||
try:
|
||||
self[b"\xa9gen"] = [GENRES[genre - 1]]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
def __parse_tempo(self, atom, data):
|
||||
self[atom.name] = [cdata.ushort_be(value[1]) for
|
||||
value in self.__parse_data(atom, data)]
|
||||
|
||||
def __render_tempo(self, key, value):
|
||||
try:
|
||||
if len(value) == 0:
|
||||
return self.__render_data(key, 0x15, b"")
|
||||
|
||||
if min(value) < 0 or max(value) >= 2**16:
|
||||
raise MP4MetadataValueError(
|
||||
"invalid 16 bit integers: %r" % value)
|
||||
except TypeError:
|
||||
raise MP4MetadataValueError(
|
||||
"tmpo must be a list of 16 bit integers")
|
||||
|
||||
values = list(map(cdata.to_ushort_be, value))
|
||||
return self.__render_data(key, 0x15, values)
|
||||
|
||||
def __parse_bool(self, atom, data):
|
||||
try:
|
||||
self[atom.name] = bool(ord(data[16:17]))
|
||||
except TypeError:
|
||||
self[atom.name] = False
|
||||
|
||||
def __render_bool(self, key, value):
|
||||
return self.__render_data(key, 0x15, [chr_(bool(value))])
|
||||
|
||||
def __parse_cover(self, atom, data):
|
||||
self[atom.name] = []
|
||||
pos = 0
|
||||
while pos < atom.length - 8:
|
||||
length, name, imageformat = struct.unpack(">I4sI",
|
||||
data[pos:pos+12])
|
||||
if name != b"data":
|
||||
if name == b"name":
|
||||
pos += length
|
||||
continue
|
||||
raise MP4MetadataError(
|
||||
"unexpected atom %r inside 'covr'" % name)
|
||||
if imageformat not in (MP4Cover.FORMAT_JPEG, MP4Cover.FORMAT_PNG):
|
||||
imageformat = MP4Cover.FORMAT_JPEG
|
||||
cover = MP4Cover(data[pos+16:pos+length], imageformat)
|
||||
self[atom.name].append(cover)
|
||||
pos += length
|
||||
|
||||
def __render_cover(self, key, value):
|
||||
atom_data = []
|
||||
for cover in value:
|
||||
try:
|
||||
imageformat = cover.imageformat
|
||||
except AttributeError:
|
||||
imageformat = MP4Cover.FORMAT_JPEG
|
||||
atom_data.append(Atom.render(
|
||||
b"data", struct.pack(">2I", imageformat, 0) + cover))
|
||||
return Atom.render(key, b"".join(atom_data))
|
||||
|
||||
def __parse_text(self, atom, data, expected_flags=1):
|
||||
value = [text.decode('utf-8', 'replace') for flags, text
|
||||
in self.__parse_data(atom, data)
|
||||
if flags == expected_flags]
|
||||
if value:
|
||||
self[atom.name] = value
|
||||
|
||||
def __render_text(self, key, value, flags=1):
|
||||
if isinstance(value, string_types):
|
||||
value = [value]
|
||||
return self.__render_data(
|
||||
key, flags, [utf8(v) for v in value])
|
||||
|
||||
def delete(self, filename):
|
||||
"""Remove the metadata from the given filename."""
|
||||
|
||||
self.clear()
|
||||
self.save(filename)
|
||||
|
||||
__atoms = {
|
||||
b"----": (__parse_freeform, __render_freeform),
|
||||
b"trkn": (__parse_pair, __render_pair),
|
||||
b"disk": (__parse_pair, __render_pair_no_trailing),
|
||||
b"gnre": (__parse_genre, None),
|
||||
b"tmpo": (__parse_tempo, __render_tempo),
|
||||
b"cpil": (__parse_bool, __render_bool),
|
||||
b"pgap": (__parse_bool, __render_bool),
|
||||
b"pcst": (__parse_bool, __render_bool),
|
||||
b"covr": (__parse_cover, __render_cover),
|
||||
b"purl": (__parse_text, __render_text, 0),
|
||||
b"egid": (__parse_text, __render_text, 0),
|
||||
}
|
||||
|
||||
# the text atoms we know about which should make loading fail if parsing
|
||||
# any of them fails
|
||||
for name in [b"\xa9nam", b"\xa9alb", b"\xa9ART", b"aART", b"\xa9wrt",
|
||||
b"\xa9day", b"\xa9cmt", b"desc", b"purd", b"\xa9grp",
|
||||
b"\xa9gen", b"\xa9lyr", b"catg", b"keyw", b"\xa9too",
|
||||
b"cprt", b"soal", b"soaa", b"soar", b"sonm", b"soco",
|
||||
b"sosn", b"tvsh"]:
|
||||
__atoms[name] = (__parse_text, __render_text)
|
||||
|
||||
def pprint(self):
|
||||
values = []
|
||||
for key, value in self.iteritems():
|
||||
key = key.decode('latin1', "replace")
|
||||
if key == "covr":
|
||||
values.append("%s=%s" % (key, ", ".join(
|
||||
["[%d bytes of data]" % len(data) for data in value])))
|
||||
elif isinstance(value, list):
|
||||
values.append("%s=%s" %
|
||||
(key, " / ".join(map(text_type, value))))
|
||||
else:
|
||||
values.append("%s=%s" % (key, value))
|
||||
return "\n".join(values)
|
||||
|
||||
|
||||
class MP4Info(StreamInfo):
|
||||
"""MPEG-4 stream information.
|
||||
|
||||
Attributes:
|
||||
|
||||
* bitrate -- bitrate in bits per second, as an int
|
||||
* length -- file length in seconds, as a float
|
||||
* channels -- number of audio channels
|
||||
* sample_rate -- audio sampling rate in Hz
|
||||
* bits_per_sample -- bits per sample
|
||||
"""
|
||||
|
||||
bitrate = 0
|
||||
channels = 0
|
||||
sample_rate = 0
|
||||
bits_per_sample = 0
|
||||
|
||||
def __init__(self, atoms, fileobj):
|
||||
for trak in list(atoms[b"moov"].findall(b"trak")):
|
||||
hdlr = trak[b"mdia", b"hdlr"]
|
||||
fileobj.seek(hdlr.offset)
|
||||
data = fileobj.read(hdlr.length)
|
||||
if data[16:20] == b"soun":
|
||||
break
|
||||
else:
|
||||
raise MP4StreamInfoError("track has no audio data")
|
||||
|
||||
mdhd = trak[b"mdia", b"mdhd"]
|
||||
fileobj.seek(mdhd.offset)
|
||||
data = fileobj.read(mdhd.length)
|
||||
if ord(data[8:9]) == 0:
|
||||
offset = 20
|
||||
fmt = ">2I"
|
||||
else:
|
||||
offset = 28
|
||||
fmt = ">IQ"
|
||||
end = offset + struct.calcsize(fmt)
|
||||
unit, length = struct.unpack(fmt, data[offset:end])
|
||||
self.length = float(length) / unit
|
||||
|
||||
try:
|
||||
atom = trak[b"mdia", b"minf", b"stbl", b"stsd"]
|
||||
fileobj.seek(atom.offset)
|
||||
data = fileobj.read(atom.length)
|
||||
if data[20:24] == b"mp4a":
|
||||
length = cdata.uint_be(data[16:20])
|
||||
(self.channels, self.bits_per_sample, _,
|
||||
self.sample_rate) = struct.unpack(">3HI", data[40:50])
|
||||
# ES descriptor type
|
||||
if data[56:60] == b"esds" and ord(data[64:65]) == 0x03:
|
||||
pos = 65
|
||||
# skip extended descriptor type tag, length, ES ID
|
||||
# and stream priority
|
||||
if data[pos:pos+3] == b"\x80\x80\x80":
|
||||
pos += 3
|
||||
pos += 4
|
||||
# decoder config descriptor type
|
||||
if ord(data[pos:pos+1]) == 0x04:
|
||||
pos += 1
|
||||
# skip extended descriptor type tag, length,
|
||||
# object type ID, stream type, buffer size
|
||||
# and maximum bitrate
|
||||
if data[pos:pos+3] == b"\x80\x80\x80":
|
||||
pos += 3
|
||||
pos += 10
|
||||
# average bitrate
|
||||
self.bitrate = cdata.uint_be(data[pos:pos+4])
|
||||
except (ValueError, KeyError):
|
||||
# stsd atoms are optional
|
||||
pass
|
||||
|
||||
def pprint(self):
|
||||
return "MPEG-4 audio, %.2f seconds, %d bps" % (
|
||||
self.length, self.bitrate)
|
||||
|
||||
|
||||
class MP4(FileType):
|
||||
"""An MPEG-4 audio file, probably containing AAC.
|
||||
|
||||
If more than one track is present in the file, the first is used.
|
||||
Only audio ('soun') tracks will be read.
|
||||
|
||||
:ivar info: :class:`MP4Info`
|
||||
:ivar tags: :class:`MP4Tags`
|
||||
"""
|
||||
|
||||
MP4Tags = MP4Tags
|
||||
|
||||
_mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"]
|
||||
|
||||
def load(self, filename):
|
||||
self.filename = filename
|
||||
fileobj = open(filename, "rb")
|
||||
try:
|
||||
atoms = Atoms(fileobj)
|
||||
|
||||
# ftyp is always the first atom in a valid MP4 file
|
||||
if not atoms.atoms or atoms.atoms[0].name != b"ftyp":
|
||||
raise error("Not a MP4 file")
|
||||
|
||||
try:
|
||||
self.info = MP4Info(atoms, fileobj)
|
||||
except error:
|
||||
raise
|
||||
except Exception as err:
|
||||
reraise(MP4StreamInfoError, err, sys.exc_info()[2])
|
||||
|
||||
if not MP4Tags._can_load(atoms):
|
||||
self.tags = None
|
||||
else:
|
||||
try:
|
||||
self.tags = self.MP4Tags(atoms, fileobj)
|
||||
except error:
|
||||
raise
|
||||
except Exception as err:
|
||||
reraise(MP4MetadataError, err, sys.exc_info()[2])
|
||||
finally:
|
||||
fileobj.close()
|
||||
|
||||
def add_tags(self):
|
||||
if self.tags is None:
|
||||
self.tags = self.MP4Tags()
|
||||
else:
|
||||
raise error("an MP4 tag already exists")
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return (b"ftyp" in header) + (b"mp4" in header)
|
||||
|
||||
|
||||
Open = MP4
|
||||
|
||||
|
||||
def delete(filename):
|
||||
"""Remove tags from a file."""
|
||||
|
||||
MP4(filename).delete()
|
||||
@@ -0,0 +1,260 @@
|
||||
# A Musepack reader/tagger
|
||||
#
|
||||
# Copyright 2006 Lukas Lalinsky <lalinsky@gmail.com>
|
||||
# Copyright 2012 Christoph Reiter <christoph.reiter@gmx.at>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Musepack audio streams with APEv2 tags.
|
||||
|
||||
Musepack is an audio format originally based on the MPEG-1 Layer-2
|
||||
algorithms. Stream versions 4 through 7 are supported.
|
||||
|
||||
For more information, see http://www.musepack.net/.
|
||||
"""
|
||||
|
||||
__all__ = ["Musepack", "Open", "delete"]
|
||||
|
||||
import struct
|
||||
|
||||
from ._compat import endswith
|
||||
from mutagen import StreamInfo
|
||||
from mutagen.apev2 import APEv2File, error, delete
|
||||
from mutagen.id3 import BitPaddedInt
|
||||
from mutagen._util import cdata
|
||||
from ._compat import xrange
|
||||
|
||||
|
||||
class MusepackHeaderError(error):
|
||||
pass
|
||||
|
||||
|
||||
RATES = [44100, 48000, 37800, 32000]
|
||||
|
||||
|
||||
def _parse_sv8_int(fileobj, limit=9):
|
||||
"""Reads (max limit) bytes from fileobj until the MSB is zero.
|
||||
All 7 LSB will be merged to a big endian uint.
|
||||
|
||||
Raises ValueError in case not MSB is zero, or EOFError in
|
||||
case the file ended before limit is reached.
|
||||
|
||||
Returns (parsed number, number of bytes read)
|
||||
"""
|
||||
|
||||
num = 0
|
||||
for i in xrange(limit):
|
||||
c = fileobj.read(1)
|
||||
if len(c) != 1:
|
||||
raise EOFError
|
||||
num = (num << 7) | (ord(c) & 0x7F)
|
||||
if not ord(c) & 0x80:
|
||||
return num, i + 1
|
||||
if limit > 0:
|
||||
raise ValueError
|
||||
return 0, 0
|
||||
|
||||
|
||||
def _calc_sv8_gain(gain):
|
||||
# 64.82 taken from mpcdec
|
||||
return 64.82 - gain / 256.0
|
||||
|
||||
|
||||
def _calc_sv8_peak(peak):
|
||||
return (10 ** (peak / (256.0 * 20.0)) / 65535.0)
|
||||
|
||||
|
||||
class MusepackInfo(StreamInfo):
|
||||
"""Musepack stream information.
|
||||
|
||||
Attributes:
|
||||
|
||||
* channels -- number of audio channels
|
||||
* length -- file length in seconds, as a float
|
||||
* sample_rate -- audio sampling rate in Hz
|
||||
* bitrate -- audio bitrate, in bits per second
|
||||
* version -- Musepack stream version
|
||||
|
||||
Optional Attributes:
|
||||
|
||||
* title_gain, title_peak -- Replay Gain and peak data for this song
|
||||
* album_gain, album_peak -- Replay Gain and peak data for this album
|
||||
|
||||
These attributes are only available in stream version 7/8. The
|
||||
gains are a float, +/- some dB. The peaks are a percentage [0..1] of
|
||||
the maximum amplitude. This means to get a number comparable to
|
||||
VorbisGain, you must multiply the peak by 2.
|
||||
"""
|
||||
|
||||
def __init__(self, fileobj):
|
||||
header = fileobj.read(4)
|
||||
if len(header) != 4:
|
||||
raise MusepackHeaderError("not a Musepack file")
|
||||
|
||||
# Skip ID3v2 tags
|
||||
if header[:3] == b"ID3":
|
||||
header = fileobj.read(6)
|
||||
if len(header) != 6:
|
||||
raise MusepackHeaderError("not a Musepack file")
|
||||
size = 10 + BitPaddedInt(header[2:6])
|
||||
fileobj.seek(size)
|
||||
header = fileobj.read(4)
|
||||
if len(header) != 4:
|
||||
raise MusepackHeaderError("not a Musepack file")
|
||||
|
||||
if header.startswith(b"MPCK"):
|
||||
self.__parse_sv8(fileobj)
|
||||
else:
|
||||
self.__parse_sv467(fileobj)
|
||||
|
||||
if not self.bitrate and self.length != 0:
|
||||
fileobj.seek(0, 2)
|
||||
self.bitrate = int(round(fileobj.tell() * 8 / self.length))
|
||||
|
||||
def __parse_sv8(self, fileobj):
|
||||
#SV8 http://trac.musepack.net/trac/wiki/SV8Specification
|
||||
|
||||
key_size = 2
|
||||
mandatory_packets = [b"SH", b"RG"]
|
||||
|
||||
def check_frame_key(key):
|
||||
if len(frame_type) != key_size or not b'AA' <= frame_type <= b'ZZ':
|
||||
raise MusepackHeaderError("Invalid frame key.")
|
||||
|
||||
frame_type = fileobj.read(key_size)
|
||||
check_frame_key(frame_type)
|
||||
|
||||
while frame_type not in (b"AP", b"SE") and mandatory_packets:
|
||||
try:
|
||||
frame_size, slen = _parse_sv8_int(fileobj)
|
||||
except (EOFError, ValueError):
|
||||
raise MusepackHeaderError("Invalid packet size.")
|
||||
data_size = frame_size - key_size - slen
|
||||
|
||||
if frame_type == b"SH":
|
||||
mandatory_packets.remove(frame_type)
|
||||
self.__parse_stream_header(fileobj, data_size)
|
||||
elif frame_type == b"RG":
|
||||
mandatory_packets.remove(frame_type)
|
||||
self.__parse_replaygain_packet(fileobj, data_size)
|
||||
else:
|
||||
fileobj.seek(data_size, 1)
|
||||
|
||||
frame_type = fileobj.read(key_size)
|
||||
check_frame_key(frame_type)
|
||||
|
||||
if mandatory_packets:
|
||||
raise MusepackHeaderError("Missing mandatory packets: %s." %
|
||||
", ".join(map(repr, mandatory_packets)))
|
||||
|
||||
self.length = float(self.samples) / self.sample_rate
|
||||
self.bitrate = 0
|
||||
|
||||
def __parse_stream_header(self, fileobj, data_size):
|
||||
fileobj.seek(4, 1)
|
||||
try:
|
||||
self.version = ord(fileobj.read(1))
|
||||
except TypeError:
|
||||
raise MusepackHeaderError("SH packet ended unexpectedly.")
|
||||
try:
|
||||
samples, l1 = _parse_sv8_int(fileobj)
|
||||
samples_skip, l2 = _parse_sv8_int(fileobj)
|
||||
except (EOFError, ValueError):
|
||||
raise MusepackHeaderError(
|
||||
"SH packet: Invalid sample counts.")
|
||||
left_size = data_size - 5 - l1 - l2
|
||||
if left_size != 2:
|
||||
raise MusepackHeaderError("Invalid SH packet size.")
|
||||
data = fileobj.read(left_size)
|
||||
if len(data) != left_size:
|
||||
raise MusepackHeaderError("SH packet ended unexpectedly.")
|
||||
self.sample_rate = RATES[ord(data[-2:-1]) >> 5]
|
||||
self.channels = (ord(data[-1:]) >> 4) + 1
|
||||
self.samples = samples - samples_skip
|
||||
|
||||
def __parse_replaygain_packet(self, fileobj, data_size):
|
||||
data = fileobj.read(data_size)
|
||||
if data_size != 9:
|
||||
raise MusepackHeaderError("Invalid RG packet size.")
|
||||
if len(data) != data_size:
|
||||
raise MusepackHeaderError("RG packet ended unexpectedly.")
|
||||
title_gain = cdata.short_be(data[1:3])
|
||||
title_peak = cdata.short_be(data[3:5])
|
||||
album_gain = cdata.short_be(data[5:7])
|
||||
album_peak = cdata.short_be(data[7:9])
|
||||
if title_gain:
|
||||
self.title_gain = _calc_sv8_gain(title_gain)
|
||||
if title_peak:
|
||||
self.title_peak = _calc_sv8_peak(title_peak)
|
||||
if album_gain:
|
||||
self.album_gain = _calc_sv8_gain(album_gain)
|
||||
if album_peak:
|
||||
self.album_peak = _calc_sv8_peak(album_peak)
|
||||
|
||||
def __parse_sv467(self, fileobj):
|
||||
fileobj.seek(-4, 1)
|
||||
header = fileobj.read(32)
|
||||
if len(header) != 32:
|
||||
raise MusepackHeaderError("not a Musepack file")
|
||||
|
||||
# SV7
|
||||
if header.startswith(b"MP+"):
|
||||
self.version = ord(header[3:4]) & 0xF
|
||||
if self.version < 7:
|
||||
raise MusepackHeaderError("not a Musepack file")
|
||||
frames = cdata.uint_le(header[4:8])
|
||||
flags = cdata.uint_le(header[8:12])
|
||||
|
||||
self.title_peak, self.title_gain = struct.unpack(
|
||||
"<Hh", header[12:16])
|
||||
self.album_peak, self.album_gain = struct.unpack(
|
||||
"<Hh", header[16:20])
|
||||
self.title_gain /= 100.0
|
||||
self.album_gain /= 100.0
|
||||
self.title_peak /= 65535.0
|
||||
self.album_peak /= 65535.0
|
||||
|
||||
self.sample_rate = RATES[(flags >> 16) & 0x0003]
|
||||
self.bitrate = 0
|
||||
# SV4-SV6
|
||||
else:
|
||||
header_dword = cdata.uint_le(header[0:4])
|
||||
self.version = (header_dword >> 11) & 0x03FF
|
||||
if self.version < 4 or self.version > 6:
|
||||
raise MusepackHeaderError("not a Musepack file")
|
||||
self.bitrate = (header_dword >> 23) & 0x01FF
|
||||
self.sample_rate = 44100
|
||||
if self.version >= 5:
|
||||
frames = cdata.uint_le(header[4:8])
|
||||
else:
|
||||
frames = cdata.ushort_le(header[6:8])
|
||||
if self.version < 6:
|
||||
frames -= 1
|
||||
self.channels = 2
|
||||
self.length = float(frames * 1152 - 576) / self.sample_rate
|
||||
|
||||
def pprint(self):
|
||||
rg_data = []
|
||||
if hasattr(self, "title_gain"):
|
||||
rg_data.append("%+0.2f (title)" % self.title_gain)
|
||||
if hasattr(self, "album_gain"):
|
||||
rg_data.append("%+0.2f (album)" % self.album_gain)
|
||||
rg_data = (rg_data and ", Gain: " + ", ".join(rg_data)) or ""
|
||||
|
||||
return "Musepack SV%d, %.2f seconds, %d Hz, %d bps%s" % (
|
||||
self.version, self.length, self.sample_rate, self.bitrate, rg_data)
|
||||
|
||||
|
||||
class Musepack(APEv2File):
|
||||
_Info = MusepackInfo
|
||||
_mimes = ["audio/x-musepack", "audio/x-mpc"]
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return (header.startswith(b"MP+") + header.startswith(b"MPCK") +
|
||||
endswith(filename.lower(), b".mpc"))
|
||||
|
||||
|
||||
Open = Musepack
|
||||
@@ -0,0 +1,506 @@
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Read and write Ogg bitstreams and pages.
|
||||
|
||||
This module reads and writes a subset of the Ogg bitstream format
|
||||
version 0. It does *not* read or write Ogg Vorbis files! For that,
|
||||
you should use mutagen.oggvorbis.
|
||||
|
||||
This implementation is based on the RFC 3533 standard found at
|
||||
http://www.xiph.org/ogg/doc/rfc3533.txt.
|
||||
"""
|
||||
|
||||
import struct
|
||||
import sys
|
||||
import zlib
|
||||
|
||||
from mutagen import FileType
|
||||
from mutagen._util import cdata, insert_bytes, delete_bytes
|
||||
from ._compat import cBytesIO, reraise, chr_
|
||||
|
||||
|
||||
class error(IOError):
|
||||
"""Ogg stream parsing errors."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class OggPage(object):
|
||||
"""A single Ogg page (not necessarily a single encoded packet).
|
||||
|
||||
A page is a header of 26 bytes, followed by the length of the
|
||||
data, followed by the data.
|
||||
|
||||
The constructor is givin a file-like object pointing to the start
|
||||
of an Ogg page. After the constructor is finished it is pointing
|
||||
to the start of the next page.
|
||||
|
||||
Attributes:
|
||||
|
||||
* version -- stream structure version (currently always 0)
|
||||
* position -- absolute stream position (default -1)
|
||||
* serial -- logical stream serial number (default 0)
|
||||
* sequence -- page sequence number within logical stream (default 0)
|
||||
* offset -- offset this page was read from (default None)
|
||||
* complete -- if the last packet on this page is complete (default True)
|
||||
* packets -- list of raw packet data (default [])
|
||||
|
||||
Note that if 'complete' is false, the next page's 'continued'
|
||||
property must be true (so set both when constructing pages).
|
||||
|
||||
If a file-like object is supplied to the constructor, the above
|
||||
attributes will be filled in based on it.
|
||||
"""
|
||||
|
||||
version = 0
|
||||
__type_flags = 0
|
||||
position = 0
|
||||
serial = 0
|
||||
sequence = 0
|
||||
offset = None
|
||||
complete = True
|
||||
|
||||
def __init__(self, fileobj=None):
|
||||
self.packets = []
|
||||
|
||||
if fileobj is None:
|
||||
return
|
||||
|
||||
self.offset = fileobj.tell()
|
||||
|
||||
header = fileobj.read(27)
|
||||
if len(header) == 0:
|
||||
raise EOFError
|
||||
|
||||
try:
|
||||
(oggs, self.version, self.__type_flags, self.position,
|
||||
self.serial, self.sequence, crc, segments) = struct.unpack(
|
||||
"<4sBBqIIiB", header)
|
||||
except struct.error:
|
||||
raise error("unable to read full header; got %r" % header)
|
||||
|
||||
if oggs != b"OggS":
|
||||
raise error("read %r, expected %r, at 0x%x" % (
|
||||
oggs, b"OggS", fileobj.tell() - 27))
|
||||
|
||||
if self.version != 0:
|
||||
raise error("version %r unsupported" % self.version)
|
||||
|
||||
total = 0
|
||||
lacings = []
|
||||
lacing_bytes = fileobj.read(segments)
|
||||
if len(lacing_bytes) != segments:
|
||||
raise error("unable to read %r lacing bytes" % segments)
|
||||
for c in bytearray(lacing_bytes):
|
||||
total += c
|
||||
if c < 255:
|
||||
lacings.append(total)
|
||||
total = 0
|
||||
if total:
|
||||
lacings.append(total)
|
||||
self.complete = False
|
||||
|
||||
self.packets = [fileobj.read(l) for l in lacings]
|
||||
if [len(p) for p in self.packets] != lacings:
|
||||
raise error("unable to read full data")
|
||||
|
||||
def __eq__(self, other):
|
||||
"""Two Ogg pages are the same if they write the same data."""
|
||||
try:
|
||||
return (self.write() == other.write())
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
__hash__ = object.__hash__
|
||||
|
||||
def __repr__(self):
|
||||
attrs = ['version', 'position', 'serial', 'sequence', 'offset',
|
||||
'complete', 'continued', 'first', 'last']
|
||||
values = ["%s=%r" % (attr, getattr(self, attr)) for attr in attrs]
|
||||
return "<%s %s, %d bytes in %d packets>" % (
|
||||
type(self).__name__, " ".join(values), sum(map(len, self.packets)),
|
||||
len(self.packets))
|
||||
|
||||
def write(self):
|
||||
"""Return a string encoding of the page header and data.
|
||||
|
||||
A ValueError is raised if the data is too big to fit in a
|
||||
single page.
|
||||
"""
|
||||
|
||||
data = [
|
||||
struct.pack("<4sBBqIIi", b"OggS", self.version, self.__type_flags,
|
||||
self.position, self.serial, self.sequence, 0)
|
||||
]
|
||||
|
||||
lacing_data = []
|
||||
for datum in self.packets:
|
||||
quot, rem = divmod(len(datum), 255)
|
||||
lacing_data.append(b"\xff" * quot + chr_(rem))
|
||||
lacing_data = b"".join(lacing_data)
|
||||
if not self.complete and lacing_data.endswith(b"\x00"):
|
||||
lacing_data = lacing_data[:-1]
|
||||
data.append(chr_(len(lacing_data)))
|
||||
data.append(lacing_data)
|
||||
data.extend(self.packets)
|
||||
data = b"".join(data)
|
||||
|
||||
# Python's CRC is swapped relative to Ogg's needs.
|
||||
# crc32 returns uint prior to py2.6 on some platforms, so force uint
|
||||
crc = (~zlib.crc32(data.translate(cdata.bitswap), -1)) & 0xffffffff
|
||||
# Although we're using to_uint_be, this actually makes the CRC
|
||||
# a proper le integer, since Python's CRC is byteswapped.
|
||||
crc = cdata.to_uint_be(crc).translate(cdata.bitswap)
|
||||
data = data[:22] + crc + data[26:]
|
||||
return data
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
"""Total frame size."""
|
||||
|
||||
size = 27 # Initial header size
|
||||
for datum in self.packets:
|
||||
quot, rem = divmod(len(datum), 255)
|
||||
size += quot + 1
|
||||
if not self.complete and rem == 0:
|
||||
# Packet contains a multiple of 255 bytes and is not
|
||||
# terminated, so we don't have a \x00 at the end.
|
||||
size -= 1
|
||||
size += sum(map(len, self.packets))
|
||||
return size
|
||||
|
||||
def __set_flag(self, bit, val):
|
||||
mask = 1 << bit
|
||||
if val:
|
||||
self.__type_flags |= mask
|
||||
else:
|
||||
self.__type_flags &= ~mask
|
||||
|
||||
continued = property(
|
||||
lambda self: cdata.test_bit(self.__type_flags, 0),
|
||||
lambda self, v: self.__set_flag(0, v),
|
||||
doc="The first packet is continued from the previous page.")
|
||||
|
||||
first = property(
|
||||
lambda self: cdata.test_bit(self.__type_flags, 1),
|
||||
lambda self, v: self.__set_flag(1, v),
|
||||
doc="This is the first page of a logical bitstream.")
|
||||
|
||||
last = property(
|
||||
lambda self: cdata.test_bit(self.__type_flags, 2),
|
||||
lambda self, v: self.__set_flag(2, v),
|
||||
doc="This is the last page of a logical bitstream.")
|
||||
|
||||
@classmethod
|
||||
def renumber(klass, fileobj, serial, start):
|
||||
"""Renumber pages belonging to a specified logical stream.
|
||||
|
||||
fileobj must be opened with mode r+b or w+b.
|
||||
|
||||
Starting at page number 'start', renumber all pages belonging
|
||||
to logical stream 'serial'. Other pages will be ignored.
|
||||
|
||||
fileobj must point to the start of a valid Ogg page; any
|
||||
occuring after it and part of the specified logical stream
|
||||
will be numbered. No adjustment will be made to the data in
|
||||
the pages nor the granule position; only the page number, and
|
||||
so also the CRC.
|
||||
|
||||
If an error occurs (e.g. non-Ogg data is found), fileobj will
|
||||
be left pointing to the place in the stream the error occured,
|
||||
but the invalid data will be left intact (since this function
|
||||
does not change the total file size).
|
||||
"""
|
||||
|
||||
number = start
|
||||
while True:
|
||||
try:
|
||||
page = OggPage(fileobj)
|
||||
except EOFError:
|
||||
break
|
||||
else:
|
||||
if page.serial != serial:
|
||||
# Wrong stream, skip this page.
|
||||
continue
|
||||
# Changing the number can't change the page size,
|
||||
# so seeking back based on the current size is safe.
|
||||
fileobj.seek(-page.size, 1)
|
||||
page.sequence = number
|
||||
fileobj.write(page.write())
|
||||
fileobj.seek(page.offset + page.size, 0)
|
||||
number += 1
|
||||
|
||||
@classmethod
|
||||
def to_packets(klass, pages, strict=False):
|
||||
"""Construct a list of packet data from a list of Ogg pages.
|
||||
|
||||
If strict is true, the first page must start a new packet,
|
||||
and the last page must end the last packet.
|
||||
"""
|
||||
|
||||
serial = pages[0].serial
|
||||
sequence = pages[0].sequence
|
||||
packets = []
|
||||
|
||||
if strict:
|
||||
if pages[0].continued:
|
||||
raise ValueError("first packet is continued")
|
||||
if not pages[-1].complete:
|
||||
raise ValueError("last packet does not complete")
|
||||
elif pages and pages[0].continued:
|
||||
packets.append([b""])
|
||||
|
||||
for page in pages:
|
||||
if serial != page.serial:
|
||||
raise ValueError("invalid serial number in %r" % page)
|
||||
elif sequence != page.sequence:
|
||||
raise ValueError("bad sequence number in %r" % page)
|
||||
else:
|
||||
sequence += 1
|
||||
|
||||
if page.continued:
|
||||
packets[-1].append(page.packets[0])
|
||||
else:
|
||||
packets.append([page.packets[0]])
|
||||
packets.extend([[p] for p in page.packets[1:]])
|
||||
|
||||
return [b"".join(p) for p in packets]
|
||||
|
||||
@classmethod
|
||||
def from_packets(klass, packets, sequence=0,
|
||||
default_size=4096, wiggle_room=2048):
|
||||
"""Construct a list of Ogg pages from a list of packet data.
|
||||
|
||||
The algorithm will generate pages of approximately
|
||||
default_size in size (rounded down to the nearest multiple of
|
||||
255). However, it will also allow pages to increase to
|
||||
approximately default_size + wiggle_room if allowing the
|
||||
wiggle room would finish a packet (only one packet will be
|
||||
finished in this way per page; if the next packet would fit
|
||||
into the wiggle room, it still starts on a new page).
|
||||
|
||||
This method reduces packet fragmentation when packet sizes are
|
||||
slightly larger than the default page size, while still
|
||||
ensuring most pages are of the average size.
|
||||
|
||||
Pages are numbered started at 'sequence'; other information is
|
||||
uninitialized.
|
||||
"""
|
||||
|
||||
chunk_size = (default_size // 255) * 255
|
||||
|
||||
pages = []
|
||||
|
||||
page = OggPage()
|
||||
page.sequence = sequence
|
||||
|
||||
for packet in packets:
|
||||
page.packets.append(b"")
|
||||
while packet:
|
||||
data, packet = packet[:chunk_size], packet[chunk_size:]
|
||||
if page.size < default_size and len(page.packets) < 255:
|
||||
page.packets[-1] += data
|
||||
else:
|
||||
# If we've put any packet data into this page yet,
|
||||
# we need to mark it incomplete. However, we can
|
||||
# also have just started this packet on an already
|
||||
# full page, in which case, just start the new
|
||||
# page with this packet.
|
||||
if page.packets[-1]:
|
||||
page.complete = False
|
||||
if len(page.packets) == 1:
|
||||
page.position = -1
|
||||
else:
|
||||
page.packets.pop(-1)
|
||||
pages.append(page)
|
||||
page = OggPage()
|
||||
page.continued = not pages[-1].complete
|
||||
page.sequence = pages[-1].sequence + 1
|
||||
page.packets.append(data)
|
||||
|
||||
if len(packet) < wiggle_room:
|
||||
page.packets[-1] += packet
|
||||
packet = b""
|
||||
|
||||
if page.packets:
|
||||
pages.append(page)
|
||||
|
||||
return pages
|
||||
|
||||
@classmethod
|
||||
def replace(klass, fileobj, old_pages, new_pages):
|
||||
"""Replace old_pages with new_pages within fileobj.
|
||||
|
||||
old_pages must have come from reading fileobj originally.
|
||||
new_pages are assumed to have the 'same' data as old_pages,
|
||||
and so the serial and sequence numbers will be copied, as will
|
||||
the flags for the first and last pages.
|
||||
|
||||
fileobj will be resized and pages renumbered as necessary. As
|
||||
such, it must be opened r+b or w+b.
|
||||
"""
|
||||
|
||||
# Number the new pages starting from the first old page.
|
||||
first = old_pages[0].sequence
|
||||
for page, seq in zip(new_pages, range(first, first + len(new_pages))):
|
||||
page.sequence = seq
|
||||
page.serial = old_pages[0].serial
|
||||
|
||||
new_pages[0].first = old_pages[0].first
|
||||
new_pages[0].last = old_pages[0].last
|
||||
new_pages[0].continued = old_pages[0].continued
|
||||
|
||||
new_pages[-1].first = old_pages[-1].first
|
||||
new_pages[-1].last = old_pages[-1].last
|
||||
new_pages[-1].complete = old_pages[-1].complete
|
||||
if not new_pages[-1].complete and len(new_pages[-1].packets) == 1:
|
||||
new_pages[-1].position = -1
|
||||
|
||||
new_data = b"".join(map(klass.write, new_pages))
|
||||
|
||||
# Make room in the file for the new data.
|
||||
delta = len(new_data)
|
||||
fileobj.seek(old_pages[0].offset, 0)
|
||||
insert_bytes(fileobj, delta, old_pages[0].offset)
|
||||
fileobj.seek(old_pages[0].offset, 0)
|
||||
fileobj.write(new_data)
|
||||
new_data_end = old_pages[0].offset + delta
|
||||
|
||||
# Go through the old pages and delete them. Since we shifted
|
||||
# the data down the file, we need to adjust their offsets. We
|
||||
# also need to go backwards, so we don't adjust the deltas of
|
||||
# the other pages.
|
||||
old_pages.reverse()
|
||||
for old_page in old_pages:
|
||||
adj_offset = old_page.offset + delta
|
||||
delete_bytes(fileobj, old_page.size, adj_offset)
|
||||
|
||||
# Finally, if there's any discrepency in length, we need to
|
||||
# renumber the pages for the logical stream.
|
||||
if len(old_pages) != len(new_pages):
|
||||
fileobj.seek(new_data_end, 0)
|
||||
serial = new_pages[-1].serial
|
||||
sequence = new_pages[-1].sequence + 1
|
||||
klass.renumber(fileobj, serial, sequence)
|
||||
|
||||
@classmethod
|
||||
def find_last(klass, fileobj, serial):
|
||||
"""Find the last page of the stream 'serial'.
|
||||
|
||||
If the file is not multiplexed this function is fast. If it is,
|
||||
it must read the whole the stream.
|
||||
|
||||
This finds the last page in the actual file object, or the last
|
||||
page in the stream (with eos set), whichever comes first.
|
||||
"""
|
||||
|
||||
# For non-muxed streams, look at the last page.
|
||||
try:
|
||||
fileobj.seek(-256*256, 2)
|
||||
except IOError:
|
||||
# The file is less than 64k in length.
|
||||
fileobj.seek(0)
|
||||
data = fileobj.read()
|
||||
try:
|
||||
index = data.rindex(b"OggS")
|
||||
except ValueError:
|
||||
raise error("unable to find final Ogg header")
|
||||
stringobj = cBytesIO(data[index:])
|
||||
best_page = None
|
||||
try:
|
||||
page = OggPage(stringobj)
|
||||
except error:
|
||||
pass
|
||||
else:
|
||||
if page.serial == serial:
|
||||
if page.last:
|
||||
return page
|
||||
else:
|
||||
best_page = page
|
||||
else:
|
||||
best_page = None
|
||||
|
||||
# The stream is muxed, so use the slow way.
|
||||
fileobj.seek(0)
|
||||
try:
|
||||
page = OggPage(fileobj)
|
||||
while not page.last:
|
||||
page = OggPage(fileobj)
|
||||
while page.serial != serial:
|
||||
page = OggPage(fileobj)
|
||||
best_page = page
|
||||
return page
|
||||
except error:
|
||||
return best_page
|
||||
except EOFError:
|
||||
return best_page
|
||||
|
||||
|
||||
class OggFileType(FileType):
|
||||
"""An generic Ogg file."""
|
||||
|
||||
_Info = None
|
||||
_Tags = None
|
||||
_Error = None
|
||||
_mimes = ["application/ogg", "application/x-ogg"]
|
||||
|
||||
def load(self, filename):
|
||||
"""Load file information from a filename."""
|
||||
|
||||
self.filename = filename
|
||||
fileobj = open(filename, "rb")
|
||||
try:
|
||||
try:
|
||||
self.info = self._Info(fileobj)
|
||||
self.tags = self._Tags(fileobj, self.info)
|
||||
self.info._post_tags(fileobj)
|
||||
except error as e:
|
||||
reraise(self._Error, e, sys.exc_info()[2])
|
||||
except EOFError:
|
||||
raise self._Error("no appropriate stream found")
|
||||
finally:
|
||||
fileobj.close()
|
||||
|
||||
def delete(self, filename=None):
|
||||
"""Remove tags from a file.
|
||||
|
||||
If no filename is given, the one most recently loaded is used.
|
||||
"""
|
||||
|
||||
if filename is None:
|
||||
filename = self.filename
|
||||
|
||||
self.tags.clear()
|
||||
fileobj = open(filename, "rb+")
|
||||
try:
|
||||
try:
|
||||
self.tags._inject(fileobj)
|
||||
except error as e:
|
||||
reraise(self._Error, e, sys.exc_info()[2])
|
||||
except EOFError:
|
||||
raise self._Error("no appropriate stream found")
|
||||
finally:
|
||||
fileobj.close()
|
||||
|
||||
def save(self, filename=None):
|
||||
"""Save a tag to a file.
|
||||
|
||||
If no filename is given, the one most recently loaded is used.
|
||||
"""
|
||||
|
||||
if filename is None:
|
||||
filename = self.filename
|
||||
fileobj = open(filename, "rb+")
|
||||
try:
|
||||
try:
|
||||
self.tags._inject(fileobj)
|
||||
except error as e:
|
||||
reraise(self._Error, e, sys.exc_info()[2])
|
||||
except EOFError:
|
||||
raise self._Error("no appropriate stream found")
|
||||
finally:
|
||||
fileobj.close()
|
||||
@@ -0,0 +1,148 @@
|
||||
# Ogg FLAC support.
|
||||
#
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Read and write Ogg FLAC comments.
|
||||
|
||||
This module handles FLAC files wrapped in an Ogg bitstream. The first
|
||||
FLAC stream found is used. For 'naked' FLACs, see mutagen.flac.
|
||||
|
||||
This module is based off the specification at
|
||||
http://flac.sourceforge.net/ogg_mapping.html.
|
||||
"""
|
||||
|
||||
__all__ = ["OggFLAC", "Open", "delete"]
|
||||
|
||||
import struct
|
||||
|
||||
from ._compat import cBytesIO
|
||||
|
||||
from mutagen import flac
|
||||
from mutagen.flac import VCFLACDict, StrictFileObject
|
||||
from mutagen.ogg import OggPage, OggFileType, error as OggError
|
||||
|
||||
|
||||
class error(OggError):
|
||||
pass
|
||||
|
||||
|
||||
class OggFLACHeaderError(error):
|
||||
pass
|
||||
|
||||
|
||||
class OggFLACStreamInfo(flac.StreamInfo):
|
||||
"""Ogg FLAC general header and stream info.
|
||||
|
||||
This encompasses the Ogg wrapper for the FLAC STREAMINFO metadata
|
||||
block, as well as the Ogg codec setup that precedes it.
|
||||
|
||||
Attributes (in addition to StreamInfo's):
|
||||
|
||||
* packets -- number of metadata packets
|
||||
* serial -- Ogg logical stream serial number
|
||||
"""
|
||||
|
||||
packets = 0
|
||||
serial = 0
|
||||
|
||||
def load(self, data):
|
||||
# Ogg expects file objects that don't raise on read
|
||||
if isinstance(data, StrictFileObject):
|
||||
data = data._fileobj
|
||||
|
||||
page = OggPage(data)
|
||||
while not page.packets[0].startswith(b"\x7FFLAC"):
|
||||
page = OggPage(data)
|
||||
major, minor, self.packets, flac = struct.unpack(
|
||||
">BBH4s", page.packets[0][5:13])
|
||||
if flac != b"fLaC":
|
||||
raise OggFLACHeaderError("invalid FLAC marker (%r)" % flac)
|
||||
elif (major, minor) != (1, 0):
|
||||
raise OggFLACHeaderError(
|
||||
"unknown mapping version: %d.%d" % (major, minor))
|
||||
self.serial = page.serial
|
||||
|
||||
# Skip over the block header.
|
||||
stringobj = StrictFileObject(cBytesIO(page.packets[0][17:]))
|
||||
super(OggFLACStreamInfo, self).load(stringobj)
|
||||
|
||||
def _post_tags(self, fileobj):
|
||||
if self.length:
|
||||
return
|
||||
page = OggPage.find_last(fileobj, self.serial)
|
||||
self.length = page.position / float(self.sample_rate)
|
||||
|
||||
def pprint(self):
|
||||
return u"Ogg " + super(OggFLACStreamInfo, self).pprint()
|
||||
|
||||
|
||||
class OggFLACVComment(VCFLACDict):
|
||||
def load(self, data, info, errors='replace'):
|
||||
# data should be pointing at the start of an Ogg page, after
|
||||
# the first FLAC page.
|
||||
pages = []
|
||||
complete = False
|
||||
while not complete:
|
||||
page = OggPage(data)
|
||||
if page.serial == info.serial:
|
||||
pages.append(page)
|
||||
complete = page.complete or (len(page.packets) > 1)
|
||||
comment = cBytesIO(OggPage.to_packets(pages)[0][4:])
|
||||
super(OggFLACVComment, self).load(comment, errors=errors)
|
||||
|
||||
def _inject(self, fileobj):
|
||||
"""Write tag data into the FLAC Vorbis comment packet/page."""
|
||||
|
||||
# Ogg FLAC has no convenient data marker like Vorbis, but the
|
||||
# second packet - and second page - must be the comment data.
|
||||
fileobj.seek(0)
|
||||
page = OggPage(fileobj)
|
||||
while not page.packets[0].startswith(b"\x7FFLAC"):
|
||||
page = OggPage(fileobj)
|
||||
|
||||
first_page = page
|
||||
while not (page.sequence == 1 and page.serial == first_page.serial):
|
||||
page = OggPage(fileobj)
|
||||
|
||||
old_pages = [page]
|
||||
while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1):
|
||||
page = OggPage(fileobj)
|
||||
if page.serial == first_page.serial:
|
||||
old_pages.append(page)
|
||||
|
||||
packets = OggPage.to_packets(old_pages, strict=False)
|
||||
|
||||
# Set the new comment block.
|
||||
data = self.write()
|
||||
data = packets[0][:1] + struct.pack(">I", len(data))[-3:] + data
|
||||
packets[0] = data
|
||||
|
||||
new_pages = OggPage.from_packets(packets, old_pages[0].sequence)
|
||||
OggPage.replace(fileobj, old_pages, new_pages)
|
||||
|
||||
|
||||
class OggFLAC(OggFileType):
|
||||
"""An Ogg FLAC file."""
|
||||
|
||||
_Info = OggFLACStreamInfo
|
||||
_Tags = OggFLACVComment
|
||||
_Error = OggFLACHeaderError
|
||||
_mimes = ["audio/x-oggflac"]
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return (header.startswith(b"OggS") * (
|
||||
(b"FLAC" in header) + (b"fLaC" in header)))
|
||||
|
||||
|
||||
Open = OggFLAC
|
||||
|
||||
|
||||
def delete(filename):
|
||||
"""Remove tags from a file."""
|
||||
|
||||
OggFLAC(filename).delete()
|
||||
@@ -0,0 +1,126 @@
|
||||
# Copyright 2012, 2013 Christoph Reiter
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Read and write Ogg Opus comments.
|
||||
|
||||
This module handles Opus files wrapped in an Ogg bitstream. The
|
||||
first Opus stream found is used.
|
||||
|
||||
Based on http://tools.ietf.org/html/draft-terriberry-oggopus-01
|
||||
"""
|
||||
|
||||
__all__ = ["OggOpus", "Open", "delete"]
|
||||
|
||||
import struct
|
||||
|
||||
from mutagen import StreamInfo
|
||||
from mutagen._vorbis import VCommentDict
|
||||
from mutagen.ogg import OggPage, OggFileType, error as OggError
|
||||
|
||||
|
||||
class error(OggError):
|
||||
pass
|
||||
|
||||
|
||||
class OggOpusHeaderError(error):
|
||||
pass
|
||||
|
||||
|
||||
class OggOpusInfo(StreamInfo):
|
||||
"""Ogg Opus stream information.
|
||||
|
||||
Attributes:
|
||||
|
||||
* length - file length in seconds, as a float
|
||||
* channels - number of channels
|
||||
"""
|
||||
|
||||
length = 0
|
||||
|
||||
def __init__(self, fileobj):
|
||||
page = OggPage(fileobj)
|
||||
while not page.packets[0].startswith(b"OpusHead"):
|
||||
page = OggPage(fileobj)
|
||||
|
||||
self.serial = page.serial
|
||||
|
||||
if not page.first:
|
||||
raise OggOpusHeaderError(
|
||||
"page has ID header, but doesn't start a stream")
|
||||
|
||||
(version, self.channels, pre_skip, orig_sample_rate, output_gain,
|
||||
channel_map) = struct.unpack("<BBHIhB", page.packets[0][8:19])
|
||||
|
||||
self.__pre_skip = pre_skip
|
||||
|
||||
# only the higher 4 bits change on incombatible changes
|
||||
major, minor = version >> 4, version & 0xF
|
||||
if major != 0:
|
||||
raise OggOpusHeaderError("version %r unsupported" % major)
|
||||
|
||||
def _post_tags(self, fileobj):
|
||||
page = OggPage.find_last(fileobj, self.serial)
|
||||
self.length = (page.position - self.__pre_skip) / float(48000)
|
||||
|
||||
def pprint(self):
|
||||
return u"Ogg Opus, %.2f seconds" % (self.length)
|
||||
|
||||
|
||||
class OggOpusVComment(VCommentDict):
|
||||
"""Opus comments embedded in an Ogg bitstream."""
|
||||
|
||||
def __get_comment_pages(self, fileobj, info):
|
||||
# find the first tags page with the right serial
|
||||
page = OggPage(fileobj)
|
||||
while info.serial != page.serial or \
|
||||
not page.packets[0].startswith(b"OpusTags"):
|
||||
page = OggPage(fileobj)
|
||||
|
||||
# get all comment pages
|
||||
pages = [page]
|
||||
while not (pages[-1].complete or len(pages[-1].packets) > 1):
|
||||
page = OggPage(fileobj)
|
||||
if page.serial == pages[0].serial:
|
||||
pages.append(page)
|
||||
|
||||
return pages
|
||||
|
||||
def __init__(self, fileobj, info):
|
||||
pages = self.__get_comment_pages(fileobj, info)
|
||||
data = OggPage.to_packets(pages)[0][8:] # Strip OpusTags
|
||||
super(OggOpusVComment, self).__init__(data, framing=False)
|
||||
|
||||
def _inject(self, fileobj):
|
||||
fileobj.seek(0)
|
||||
info = OggOpusInfo(fileobj)
|
||||
old_pages = self.__get_comment_pages(fileobj, info)
|
||||
|
||||
packets = OggPage.to_packets(old_pages)
|
||||
packets[0] = b"OpusTags" + self.write(framing=False)
|
||||
new_pages = OggPage.from_packets(packets, old_pages[0].sequence)
|
||||
OggPage.replace(fileobj, old_pages, new_pages)
|
||||
|
||||
|
||||
class OggOpus(OggFileType):
|
||||
"""An Ogg Opus file."""
|
||||
|
||||
_Info = OggOpusInfo
|
||||
_Tags = OggOpusVComment
|
||||
_Error = OggOpusHeaderError
|
||||
_mimes = ["audio/ogg", "audio/ogg; codecs=opus"]
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return (header.startswith(b"OggS") * (b"OpusHead" in header))
|
||||
|
||||
|
||||
Open = OggOpus
|
||||
|
||||
|
||||
def delete(filename):
|
||||
"""Remove tags from a file."""
|
||||
|
||||
OggOpus(filename).delete()
|
||||
@@ -0,0 +1,138 @@
|
||||
# Ogg Speex support.
|
||||
#
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Read and write Ogg Speex comments.
|
||||
|
||||
This module handles Speex files wrapped in an Ogg bitstream. The
|
||||
first Speex stream found is used.
|
||||
|
||||
Read more about Ogg Speex at http://www.speex.org/. This module is
|
||||
based on the specification at http://www.speex.org/manual2/node7.html
|
||||
and clarifications after personal communication with Jean-Marc,
|
||||
http://lists.xiph.org/pipermail/speex-dev/2006-July/004676.html.
|
||||
"""
|
||||
|
||||
__all__ = ["OggSpeex", "Open", "delete"]
|
||||
|
||||
from mutagen import StreamInfo
|
||||
from mutagen._vorbis import VCommentDict
|
||||
from mutagen.ogg import OggPage, OggFileType, error as OggError
|
||||
from mutagen._util import cdata
|
||||
|
||||
|
||||
class error(OggError):
|
||||
pass
|
||||
|
||||
|
||||
class OggSpeexHeaderError(error):
|
||||
pass
|
||||
|
||||
|
||||
class OggSpeexInfo(StreamInfo):
|
||||
"""Ogg Speex stream information.
|
||||
|
||||
Attributes:
|
||||
|
||||
* bitrate - nominal bitrate in bits per second
|
||||
* channels - number of channels
|
||||
* length - file length in seconds, as a float
|
||||
|
||||
The reference encoder does not set the bitrate; in this case,
|
||||
the bitrate will be 0.
|
||||
"""
|
||||
|
||||
length = 0
|
||||
|
||||
def __init__(self, fileobj):
|
||||
page = OggPage(fileobj)
|
||||
while not page.packets[0].startswith(b"Speex "):
|
||||
page = OggPage(fileobj)
|
||||
if not page.first:
|
||||
raise OggSpeexHeaderError(
|
||||
"page has ID header, but doesn't start a stream")
|
||||
self.sample_rate = cdata.uint_le(page.packets[0][36:40])
|
||||
self.channels = cdata.uint_le(page.packets[0][48:52])
|
||||
self.bitrate = max(0, cdata.int_le(page.packets[0][52:56]))
|
||||
self.serial = page.serial
|
||||
|
||||
def _post_tags(self, fileobj):
|
||||
page = OggPage.find_last(fileobj, self.serial)
|
||||
self.length = page.position / float(self.sample_rate)
|
||||
|
||||
def pprint(self):
|
||||
return u"Ogg Speex, %.2f seconds" % self.length
|
||||
|
||||
|
||||
class OggSpeexVComment(VCommentDict):
|
||||
"""Speex comments embedded in an Ogg bitstream."""
|
||||
|
||||
def __init__(self, fileobj, info):
|
||||
pages = []
|
||||
complete = False
|
||||
while not complete:
|
||||
page = OggPage(fileobj)
|
||||
if page.serial == info.serial:
|
||||
pages.append(page)
|
||||
complete = page.complete or (len(page.packets) > 1)
|
||||
data = OggPage.to_packets(pages)[0] + b"\x01"
|
||||
super(OggSpeexVComment, self).__init__(data, framing=False)
|
||||
|
||||
def _inject(self, fileobj):
|
||||
"""Write tag data into the Speex comment packet/page."""
|
||||
|
||||
fileobj.seek(0)
|
||||
|
||||
# Find the first header page, with the stream info.
|
||||
# Use it to get the serial number.
|
||||
page = OggPage(fileobj)
|
||||
while not page.packets[0].startswith(b"Speex "):
|
||||
page = OggPage(fileobj)
|
||||
|
||||
# Look for the next page with that serial number, it'll start
|
||||
# the comment packet.
|
||||
serial = page.serial
|
||||
page = OggPage(fileobj)
|
||||
while page.serial != serial:
|
||||
page = OggPage(fileobj)
|
||||
|
||||
# Then find all the pages with the comment packet.
|
||||
old_pages = [page]
|
||||
while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1):
|
||||
page = OggPage(fileobj)
|
||||
if page.serial == old_pages[0].serial:
|
||||
old_pages.append(page)
|
||||
|
||||
packets = OggPage.to_packets(old_pages, strict=False)
|
||||
|
||||
# Set the new comment packet.
|
||||
packets[0] = self.write(framing=False)
|
||||
|
||||
new_pages = OggPage.from_packets(packets, old_pages[0].sequence)
|
||||
OggPage.replace(fileobj, old_pages, new_pages)
|
||||
|
||||
|
||||
class OggSpeex(OggFileType):
|
||||
"""An Ogg Speex file."""
|
||||
|
||||
_Info = OggSpeexInfo
|
||||
_Tags = OggSpeexVComment
|
||||
_Error = OggSpeexHeaderError
|
||||
_mimes = ["audio/x-speex"]
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return (header.startswith(b"OggS") * (b"Speex " in header))
|
||||
|
||||
|
||||
Open = OggSpeex
|
||||
|
||||
|
||||
def delete(filename):
|
||||
"""Remove tags from a file."""
|
||||
|
||||
OggSpeex(filename).delete()
|
||||
@@ -0,0 +1,131 @@
|
||||
# Ogg Theora support.
|
||||
#
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Read and write Ogg Theora comments.
|
||||
|
||||
This module handles Theora files wrapped in an Ogg bitstream. The
|
||||
first Theora stream found is used.
|
||||
|
||||
Based on the specification at http://theora.org/doc/Theora_I_spec.pdf.
|
||||
"""
|
||||
|
||||
__all__ = ["OggTheora", "Open", "delete"]
|
||||
|
||||
import struct
|
||||
|
||||
from mutagen import StreamInfo
|
||||
from mutagen._vorbis import VCommentDict
|
||||
from mutagen._util import cdata
|
||||
from mutagen.ogg import OggPage, OggFileType, error as OggError
|
||||
|
||||
|
||||
class error(OggError):
|
||||
pass
|
||||
|
||||
|
||||
class OggTheoraHeaderError(error):
|
||||
pass
|
||||
|
||||
|
||||
class OggTheoraInfo(StreamInfo):
|
||||
"""Ogg Theora stream information.
|
||||
|
||||
Attributes:
|
||||
|
||||
* length - file length in seconds, as a float
|
||||
* fps - video frames per second, as a float
|
||||
"""
|
||||
|
||||
length = 0
|
||||
|
||||
def __init__(self, fileobj):
|
||||
page = OggPage(fileobj)
|
||||
while not page.packets[0].startswith(b"\x80theora"):
|
||||
page = OggPage(fileobj)
|
||||
if not page.first:
|
||||
raise OggTheoraHeaderError(
|
||||
"page has ID header, but doesn't start a stream")
|
||||
data = page.packets[0]
|
||||
vmaj, vmin = struct.unpack("2B", data[7:9])
|
||||
if (vmaj, vmin) != (3, 2):
|
||||
raise OggTheoraHeaderError(
|
||||
"found Theora version %d.%d != 3.2" % (vmaj, vmin))
|
||||
fps_num, fps_den = struct.unpack(">2I", data[22:30])
|
||||
self.fps = fps_num / float(fps_den)
|
||||
self.bitrate = cdata.uint_be(b"\x00" + data[37:40])
|
||||
self.granule_shift = (cdata.ushort_be(data[40:42]) >> 5) & 0x1F
|
||||
self.serial = page.serial
|
||||
|
||||
def _post_tags(self, fileobj):
|
||||
page = OggPage.find_last(fileobj, self.serial)
|
||||
position = page.position
|
||||
mask = (1 << self.granule_shift) - 1
|
||||
frames = (position >> self.granule_shift) + (position & mask)
|
||||
self.length = frames / float(self.fps)
|
||||
|
||||
def pprint(self):
|
||||
return "Ogg Theora, %.2f seconds, %d bps" % (self.length, self.bitrate)
|
||||
|
||||
|
||||
class OggTheoraCommentDict(VCommentDict):
|
||||
"""Theora comments embedded in an Ogg bitstream."""
|
||||
|
||||
def __init__(self, fileobj, info):
|
||||
pages = []
|
||||
complete = False
|
||||
while not complete:
|
||||
page = OggPage(fileobj)
|
||||
if page.serial == info.serial:
|
||||
pages.append(page)
|
||||
complete = page.complete or (len(page.packets) > 1)
|
||||
data = OggPage.to_packets(pages)[0][7:]
|
||||
super(OggTheoraCommentDict, self).__init__(data + b"\x01")
|
||||
|
||||
def _inject(self, fileobj):
|
||||
"""Write tag data into the Theora comment packet/page."""
|
||||
|
||||
fileobj.seek(0)
|
||||
page = OggPage(fileobj)
|
||||
while not page.packets[0].startswith(b"\x81theora"):
|
||||
page = OggPage(fileobj)
|
||||
|
||||
old_pages = [page]
|
||||
while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1):
|
||||
page = OggPage(fileobj)
|
||||
if page.serial == old_pages[0].serial:
|
||||
old_pages.append(page)
|
||||
|
||||
packets = OggPage.to_packets(old_pages, strict=False)
|
||||
|
||||
packets[0] = b"\x81theora" + self.write(framing=False)
|
||||
|
||||
new_pages = OggPage.from_packets(packets, old_pages[0].sequence)
|
||||
OggPage.replace(fileobj, old_pages, new_pages)
|
||||
|
||||
|
||||
class OggTheora(OggFileType):
|
||||
"""An Ogg Theora file."""
|
||||
|
||||
_Info = OggTheoraInfo
|
||||
_Tags = OggTheoraCommentDict
|
||||
_Error = OggTheoraHeaderError
|
||||
_mimes = ["video/x-theora"]
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return (header.startswith(b"OggS") *
|
||||
((b"\x80theora" in header) + (b"\x81theora" in header)))
|
||||
|
||||
|
||||
Open = OggTheora
|
||||
|
||||
|
||||
def delete(filename):
|
||||
"""Remove tags from a file."""
|
||||
|
||||
OggTheora(filename).delete()
|
||||
@@ -0,0 +1,138 @@
|
||||
# Ogg Vorbis support.
|
||||
#
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Read and write Ogg Vorbis comments.
|
||||
|
||||
This module handles Vorbis files wrapped in an Ogg bitstream. The
|
||||
first Vorbis stream found is used.
|
||||
|
||||
Read more about Ogg Vorbis at http://vorbis.com/. This module is based
|
||||
on the specification at http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html.
|
||||
"""
|
||||
|
||||
__all__ = ["OggVorbis", "Open", "delete"]
|
||||
|
||||
import struct
|
||||
|
||||
from mutagen import StreamInfo
|
||||
from mutagen._vorbis import VCommentDict
|
||||
from mutagen.ogg import OggPage, OggFileType, error as OggError
|
||||
|
||||
|
||||
class error(OggError):
|
||||
pass
|
||||
|
||||
|
||||
class OggVorbisHeaderError(error):
|
||||
pass
|
||||
|
||||
|
||||
class OggVorbisInfo(StreamInfo):
|
||||
"""Ogg Vorbis stream information.
|
||||
|
||||
Attributes:
|
||||
|
||||
* length - file length in seconds, as a float
|
||||
* bitrate - nominal ('average') bitrate in bits per second, as an int
|
||||
"""
|
||||
|
||||
length = 0
|
||||
|
||||
def __init__(self, fileobj):
|
||||
page = OggPage(fileobj)
|
||||
while not page.packets[0].startswith(b"\x01vorbis"):
|
||||
page = OggPage(fileobj)
|
||||
if not page.first:
|
||||
raise OggVorbisHeaderError(
|
||||
"page has ID header, but doesn't start a stream")
|
||||
(self.channels, self.sample_rate, max_bitrate, nominal_bitrate,
|
||||
min_bitrate) = struct.unpack("<B4i", page.packets[0][11:28])
|
||||
self.serial = page.serial
|
||||
|
||||
max_bitrate = max(0, max_bitrate)
|
||||
min_bitrate = max(0, min_bitrate)
|
||||
nominal_bitrate = max(0, nominal_bitrate)
|
||||
|
||||
if nominal_bitrate == 0:
|
||||
self.bitrate = (max_bitrate + min_bitrate) // 2
|
||||
elif max_bitrate and max_bitrate < nominal_bitrate:
|
||||
# If the max bitrate is less than the nominal, we know
|
||||
# the nominal is wrong.
|
||||
self.bitrate = max_bitrate
|
||||
elif min_bitrate > nominal_bitrate:
|
||||
self.bitrate = min_bitrate
|
||||
else:
|
||||
self.bitrate = nominal_bitrate
|
||||
|
||||
def _post_tags(self, fileobj):
|
||||
page = OggPage.find_last(fileobj, self.serial)
|
||||
self.length = page.position / float(self.sample_rate)
|
||||
|
||||
def pprint(self):
|
||||
return u"Ogg Vorbis, %.2f seconds, %d bps" % (self.length, self.bitrate)
|
||||
|
||||
|
||||
class OggVCommentDict(VCommentDict):
|
||||
"""Vorbis comments embedded in an Ogg bitstream."""
|
||||
|
||||
def __init__(self, fileobj, info):
|
||||
pages = []
|
||||
complete = False
|
||||
while not complete:
|
||||
page = OggPage(fileobj)
|
||||
if page.serial == info.serial:
|
||||
pages.append(page)
|
||||
complete = page.complete or (len(page.packets) > 1)
|
||||
data = OggPage.to_packets(pages)[0][7:] # Strip off "\x03vorbis".
|
||||
super(OggVCommentDict, self).__init__(data)
|
||||
|
||||
def _inject(self, fileobj):
|
||||
"""Write tag data into the Vorbis comment packet/page."""
|
||||
|
||||
# Find the old pages in the file; we'll need to remove them,
|
||||
# plus grab any stray setup packet data out of them.
|
||||
fileobj.seek(0)
|
||||
page = OggPage(fileobj)
|
||||
while not page.packets[0].startswith(b"\x03vorbis"):
|
||||
page = OggPage(fileobj)
|
||||
|
||||
old_pages = [page]
|
||||
while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1):
|
||||
page = OggPage(fileobj)
|
||||
if page.serial == old_pages[0].serial:
|
||||
old_pages.append(page)
|
||||
|
||||
packets = OggPage.to_packets(old_pages, strict=False)
|
||||
|
||||
# Set the new comment packet.
|
||||
packets[0] = b"\x03vorbis" + self.write()
|
||||
|
||||
new_pages = OggPage.from_packets(packets, old_pages[0].sequence)
|
||||
OggPage.replace(fileobj, old_pages, new_pages)
|
||||
|
||||
|
||||
class OggVorbis(OggFileType):
|
||||
"""An Ogg Vorbis file."""
|
||||
|
||||
_Info = OggVorbisInfo
|
||||
_Tags = OggVCommentDict
|
||||
_Error = OggVorbisHeaderError
|
||||
_mimes = ["audio/vorbis", "audio/x-vorbis"]
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return (header.startswith(b"OggS") * (b"\x01vorbis" in header))
|
||||
|
||||
|
||||
Open = OggVorbis
|
||||
|
||||
|
||||
def delete(filename):
|
||||
"""Remove tags from a file."""
|
||||
|
||||
OggVorbis(filename).delete()
|
||||
@@ -0,0 +1,74 @@
|
||||
# OptimFROG reader/tagger
|
||||
#
|
||||
# Copyright 2006 Lukas Lalinsky <lalinsky@gmail.com>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""OptimFROG audio streams with APEv2 tags.
|
||||
|
||||
OptimFROG is a lossless audio compression program. Its main goal is to
|
||||
reduce at maximum the size of audio files, while permitting bit
|
||||
identical restoration for all input. It is similar with the ZIP
|
||||
compression, but it is highly specialized to compress audio data.
|
||||
|
||||
Only versions 4.5 and higher are supported.
|
||||
|
||||
For more information, see http://www.losslessaudio.org/
|
||||
"""
|
||||
|
||||
__all__ = ["OptimFROG", "Open", "delete"]
|
||||
|
||||
import struct
|
||||
|
||||
from ._compat import endswith
|
||||
from mutagen import StreamInfo
|
||||
from mutagen.apev2 import APEv2File, error, delete
|
||||
|
||||
|
||||
class OptimFROGHeaderError(error):
|
||||
pass
|
||||
|
||||
|
||||
class OptimFROGInfo(StreamInfo):
|
||||
"""OptimFROG stream information.
|
||||
|
||||
Attributes:
|
||||
|
||||
* channels - number of audio channels
|
||||
* length - file length in seconds, as a float
|
||||
* sample_rate - audio sampling rate in Hz
|
||||
"""
|
||||
|
||||
def __init__(self, fileobj):
|
||||
header = fileobj.read(76)
|
||||
if (len(header) != 76 or not header.startswith(b"OFR ") or
|
||||
struct.unpack("<I", header[4:8])[0] not in [12, 15]):
|
||||
raise OptimFROGHeaderError("not an OptimFROG file")
|
||||
(total_samples, total_samples_high, sample_type, self.channels,
|
||||
self.sample_rate) = struct.unpack("<IHBBI", header[8:20])
|
||||
total_samples += total_samples_high << 32
|
||||
self.channels += 1
|
||||
if self.sample_rate:
|
||||
self.length = float(total_samples) / (self.channels *
|
||||
self.sample_rate)
|
||||
else:
|
||||
self.length = 0.0
|
||||
|
||||
def pprint(self):
|
||||
return "OptimFROG, %.2f seconds, %d Hz" % (self.length,
|
||||
self.sample_rate)
|
||||
|
||||
|
||||
class OptimFROG(APEv2File):
|
||||
_Info = OptimFROGInfo
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
filename = filename.lower()
|
||||
|
||||
return (header.startswith(b"OFR") + endswith(filename, b".ofr") +
|
||||
endswith(filename, b".ofs"))
|
||||
|
||||
Open = OptimFROG
|
||||
@@ -0,0 +1,83 @@
|
||||
# True Audio support for Mutagen
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""True Audio audio stream information and tags.
|
||||
|
||||
True Audio is a lossless format designed for real-time encoding and
|
||||
decoding. This module is based on the documentation at
|
||||
http://www.true-audio.com/TTA_Lossless_Audio_Codec\_-_Format_Description
|
||||
|
||||
True Audio files use ID3 tags.
|
||||
"""
|
||||
|
||||
__all__ = ["TrueAudio", "Open", "delete", "EasyTrueAudio"]
|
||||
|
||||
from ._compat import endswith
|
||||
from mutagen import StreamInfo
|
||||
from mutagen.id3 import ID3FileType, delete
|
||||
from mutagen._util import cdata
|
||||
|
||||
|
||||
class error(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class TrueAudioHeaderError(error, IOError):
|
||||
pass
|
||||
|
||||
|
||||
class TrueAudioInfo(StreamInfo):
|
||||
"""True Audio stream information.
|
||||
|
||||
Attributes:
|
||||
|
||||
* length - audio length, in seconds
|
||||
* sample_rate - audio sample rate, in Hz
|
||||
"""
|
||||
|
||||
def __init__(self, fileobj, offset):
|
||||
fileobj.seek(offset or 0)
|
||||
header = fileobj.read(18)
|
||||
if len(header) != 18 or not header.startswith(b"TTA"):
|
||||
raise TrueAudioHeaderError("TTA header not found")
|
||||
self.sample_rate = cdata.int_le(header[10:14])
|
||||
samples = cdata.uint_le(header[14:18])
|
||||
self.length = float(samples) / self.sample_rate
|
||||
|
||||
def pprint(self):
|
||||
return "True Audio, %.2f seconds, %d Hz." % (
|
||||
self.length, self.sample_rate)
|
||||
|
||||
|
||||
class TrueAudio(ID3FileType):
|
||||
"""A True Audio file.
|
||||
|
||||
:ivar info: :class:`TrueAudioInfo`
|
||||
:ivar tags: :class:`ID3 <mutagen.id3.ID3>`
|
||||
"""
|
||||
|
||||
_Info = TrueAudioInfo
|
||||
_mimes = ["audio/x-tta"]
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return (header.startswith(b"ID3") + header.startswith(b"TTA") +
|
||||
endswith(filename.lower(), b".tta") * 2)
|
||||
|
||||
|
||||
Open = TrueAudio
|
||||
|
||||
|
||||
class EasyTrueAudio(TrueAudio):
|
||||
"""Like MP3, but uses EasyID3 for tags.
|
||||
|
||||
:ivar info: :class:`TrueAudioInfo`
|
||||
:ivar tags: :class:`EasyID3 <mutagen.easyid3.EasyID3>`
|
||||
"""
|
||||
|
||||
from mutagen.easyid3 import EasyID3 as ID3
|
||||
ID3 = ID3
|
||||
@@ -0,0 +1,124 @@
|
||||
# A WavPack reader/tagger
|
||||
#
|
||||
# Copyright 2006 Joe Wreschnig
|
||||
# 2014 Christoph Reiter
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""WavPack reading and writing.
|
||||
|
||||
WavPack is a lossless format that uses APEv2 tags. Read
|
||||
|
||||
* http://www.wavpack.com/
|
||||
* http://www.wavpack.com/file_format.txt
|
||||
|
||||
for more information.
|
||||
"""
|
||||
|
||||
__all__ = ["WavPack", "Open", "delete"]
|
||||
|
||||
from mutagen import StreamInfo
|
||||
from mutagen.apev2 import APEv2File, error, delete
|
||||
from mutagen._util import cdata
|
||||
|
||||
|
||||
class WavPackHeaderError(error):
|
||||
pass
|
||||
|
||||
RATES = [6000, 8000, 9600, 11025, 12000, 16000, 22050, 24000, 32000, 44100,
|
||||
48000, 64000, 88200, 96000, 192000]
|
||||
|
||||
|
||||
class _WavPackHeader(object):
|
||||
|
||||
def __init__(self, block_size, version, track_no, index_no, total_samples,
|
||||
block_index, block_samples, flags, crc):
|
||||
|
||||
self.block_size = block_size
|
||||
self.version = version
|
||||
self.track_no = track_no
|
||||
self.index_no = index_no
|
||||
self.total_samples = total_samples
|
||||
self.block_index = block_index
|
||||
self.block_samples = block_samples
|
||||
self.flags = flags
|
||||
self.crc = crc
|
||||
|
||||
@classmethod
|
||||
def from_fileobj(cls, fileobj):
|
||||
"""A new _WavPackHeader or raises WavPackHeaderError"""
|
||||
|
||||
header = fileobj.read(32)
|
||||
if len(header) != 32 or not header.startswith(b"wvpk"):
|
||||
raise WavPackHeaderError("not a WavPack header: %r" % header)
|
||||
|
||||
block_size = cdata.uint_le(header[4:8])
|
||||
version = cdata.ushort_le(header[8:10])
|
||||
track_no = ord(header[10:11])
|
||||
index_no = ord(header[11:12])
|
||||
samples = cdata.uint_le(header[12:16])
|
||||
if samples == 2**32 - 1:
|
||||
samples = -1
|
||||
block_index = cdata.uint_le(header[16:20])
|
||||
block_samples = cdata.uint_le(header[20:24])
|
||||
flags = cdata.uint_le(header[24:28])
|
||||
crc = cdata.uint_le(header[28:32])
|
||||
|
||||
return _WavPackHeader(block_size, version, track_no, index_no,
|
||||
samples, block_index, block_samples, flags, crc)
|
||||
|
||||
|
||||
class WavPackInfo(StreamInfo):
|
||||
"""WavPack stream information.
|
||||
|
||||
Attributes:
|
||||
|
||||
* channels - number of audio channels (1 or 2)
|
||||
* length - file length in seconds, as a float
|
||||
* sample_rate - audio sampling rate in Hz
|
||||
* version - WavPack stream version
|
||||
"""
|
||||
|
||||
def __init__(self, fileobj):
|
||||
try:
|
||||
header = _WavPackHeader.from_fileobj(fileobj)
|
||||
except WavPackHeaderError:
|
||||
raise WavPackHeaderError("not a WavPack file")
|
||||
|
||||
self.version = header.version
|
||||
self.channels = bool(header.flags & 4) or 2
|
||||
self.sample_rate = RATES[(header.flags >> 23) & 0xF]
|
||||
|
||||
if header.total_samples == -1 or header.block_index != 0:
|
||||
# TODO: we could make this faster by using the tag size
|
||||
# and search backwards for the last block, then do
|
||||
# last.block_index + last.block_samples - initial.block_index
|
||||
samples = header.block_samples
|
||||
while 1:
|
||||
fileobj.seek(header.block_size - 32 + 8, 1)
|
||||
try:
|
||||
header = _WavPackHeader.from_fileobj(fileobj)
|
||||
except WavPackHeaderError:
|
||||
break
|
||||
samples += header.block_samples
|
||||
else:
|
||||
samples = header.total_samples
|
||||
|
||||
self.length = float(samples) / self.sample_rate
|
||||
|
||||
def pprint(self):
|
||||
return "WavPack, %.2f seconds, %d Hz" % (self.length, self.sample_rate)
|
||||
|
||||
|
||||
class WavPack(APEv2File):
|
||||
_Info = WavPackInfo
|
||||
_mimes = ["audio/x-wavpack"]
|
||||
|
||||
@staticmethod
|
||||
def score(filename, fileobj, header):
|
||||
return header.startswith(b"wvpk") * 2
|
||||
|
||||
|
||||
Open = WavPack
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 89 KiB |
@@ -0,0 +1,4 @@
|
||||
License
|
||||
-------
|
||||
|
||||
If the software submitted to this repository accesses or calls any software provided by Plex (“Interfacing Software”), then as a condition for receiving services from Plex in response to such accesses or calls, you agree to grant and do hereby grant to Plex and its affiliates worldwide a worldwide, nonexclusive, and royalty-free right and license to use (including testing, hosting and linking to), copy, publicly perform, publicly display, reproduce in copies for distribution, and distribute the copies of any Interfacing Software made by you or with your assistance; provided, however, that you may notify Plex at legal@plex.tv if you do not wish for Plex to use, distribute, copy, publicly perform, publicly display, reproduce in copies for distribution, or distribute copies of an Interfacing Software that was created by you, and Plex will reasonable efforts to comply with such a request within a reasonable time.
|
||||
@@ -1 +0,0 @@
|
||||
include LICENSE HISTORY.rst requirements.txt
|
||||
@@ -0,0 +1,150 @@
|
||||
Sub-Zero for Plex, 1.1-RC5.2
|
||||
=================
|
||||
|
||||

|
||||
|
||||
##### Subtitles done right
|
||||
Originally based on @bramwalet's awesome [Subliminal.bundle](https://github.com/bramwalet/Subliminal.bundle)
|
||||
|
||||
Plex forum thread: https://forums.plex.tv/discussion/186575
|
||||
|
||||
### Quick installation
|
||||
* go to ```Library/Application Support/Plex Media Server/Plug-ins/```
|
||||
* ```rm -r Sub-Zero.bundle LocalMediaExtended.bundle```
|
||||
* ```wget https://github.com/pannal/Sub-Zero/releases/download/1.1-rc5.2/Sub-Zero-1.1-rc5.2.zip```
|
||||
* ```unzip Sub-Zero-1.1-rc5.2.zip```
|
||||
* more indepth: look below on ```Installation```
|
||||
|
||||
### Usage
|
||||
Use the following agent order:
|
||||
|
||||
1. Sub-Zero TV/Movie Subtitles
|
||||
2. Local Media Assets Extended
|
||||
3. anything else
|
||||
4. again, **DISABLE Local Media Assets**!
|
||||
|
||||
### Encountered a bug?
|
||||
* be sure to post your logs: ```Library/Application Support/Plex Media Server/Logs/PMS Plugin Logs/com.plexapp.agents.subzero.log```; there may be multiple logs (com.plexapp.agents.subzero.log.*) depending on the amount of Videos you're refreshing
|
||||
* **Remember: before you open a bug-ticket please double-check, that you've deleted the Sub-Zero.bundle folder BEFORE every update** (to avoid .pyc leftovers)
|
||||
|
||||
## Changelog
|
||||
RC-5.2
|
||||
- revert back to /plexinc-agents/LocalMedia.bundle/tree/dist instead of /plexinc-agents/LocalMedia.bundle/tree/master, as the current public PMS version is too old for that
|
||||
|
||||
RC-5.1
|
||||
- make hearing_impaired option more configurable and clear (see #configuration-)
|
||||
|
||||
RC-5
|
||||
- fix wrong video type matching by hinting video type to guessit
|
||||
- update to newest LocalMediaExtended.bundle (incorporated plex-inc's changes)
|
||||
- show page links for subtitles in log file instead of subtitle ID
|
||||
- add custom language setting in addition to the three hardcoded ones
|
||||
- if a subtitle doesn't match our hearing_impaired setting, ignore it
|
||||
- add an optional boost for addic7ed subtitles, if their series, season, episode, year, and format (e.g. WEB-DL) matches
|
||||
|
||||
RC-4
|
||||
- rename project to Sub-Zero
|
||||
- incorporate LocalMediaExtended.bundle
|
||||
- making this a multi-bundle plugin
|
||||
- update default scores
|
||||
- add icon
|
||||
|
||||
RC-3
|
||||
- addic7ed/tvsubtitles: punctuation fixes (correctly get show ids for series like "Mr. Poopster" now)
|
||||
- podnapisi: fix logging
|
||||
- opensubtitles: add login credentials (for VIPs)
|
||||
- add retry functionality to retry failed subtitle downloads, including configurable amount of retries until discarding of provider
|
||||
- move possibly not needed setting "Restrict to one language" to the bottom
|
||||
- more detailed logging
|
||||
- some cleanup
|
||||
|
||||
RC-2
|
||||
- fix empty custom subtitle folder creation
|
||||
- fix detection of existing embedded subtitles (switch to https://github.com/tonswieb/enzyme)
|
||||
- better logging
|
||||
- set default TV score to 15; movie score to 30
|
||||
|
||||
RC-1
|
||||
- fix subliminal's logging error on min_score not met (fixes #15)
|
||||
- separated tv and movies subtitle scores settings (fixes #16)
|
||||
- add option to save only one subtitle per video (skipping the ".lang." naming scheme plex supports) (fixes #3)
|
||||
|
||||
beta5
|
||||
- fix storing subtitles besides the actual video file, not subfolder (fixes #14)
|
||||
- "custom folder" setting now always used if given (properly overrides "subtitle folder" setting)
|
||||
- also scan (custom) given subtitle folders for existing subtitles instead of redownloading them on every refresh (fixes #9, #2)
|
||||
|
||||
beta4
|
||||
- ~~increased score of addic7ed subtitles a bit~~ (not existing currently)
|
||||
- **support for newest Subliminal ([1.0.1](27a6e51cd36ffb2910cd9a7add6d797a2c6469b7)) and guessit ([0.11.0](2814f57e8999dcc31575619f076c0c1a63ce78f2))**
|
||||
- **plugin now also [works with com.plexapp.agents.thetvdbdvdorder](924470d2c0db3a71529278bce4b7247eaf2f85b8)**
|
||||
- providers fixed for subliminal 1.0.1 ([at least addic7ed](131504e7eed8b3400c457fbe49beea3b115bc916))
|
||||
- providers [don't simply fail and get excluded on non-detected language](1a779020792e0201ad689eefbf5a126155e89c97)
|
||||
- support for addic7ed languages: [French (Canadian)](b11a051c233fd72033f0c3b5a8c1965260e7e19f)
|
||||
- support for additional languages: [pt-br (Portuguese (Brasil)), fa (Persian (Farsi))](131504e7eed8b3400c457fbe49beea3b115bc916)
|
||||
- support for [three (two optional) subtitle languages](e543c927cf49c264eaece36640c99d67a99c7da2)
|
||||
- optionally use [random user agent for addic7ed provider](83ace14faf75fbd75313f0ceda9b78161895fbcf) (should not be needed)
|
||||
|
||||
Description
|
||||
------------
|
||||
|
||||
Plex Metadata agent plugin based on Subliminal. This agent will search on the following sites for the best matching subtitles:
|
||||
- OpenSubtitles
|
||||
- TheSubDB
|
||||
- Podnapisi.NET
|
||||
- Addic7ed
|
||||
- TVsubtitles.net
|
||||
|
||||
All providers can be disabled or enabled on a per provider setting. Certain preferences change the behaviour of subliminal, for instance the minimum score of subtitles to download, or whether to download hearing impaired subtitles or not. The agent stores the subtitles as metadata, but can be configured (See Configuration) to store it next to the media files.
|
||||
|
||||
Installation
|
||||
------------
|
||||
See [article](https://support.plex.tv/hc/en-us/articles/201187656-How-do-I-manually-install-a-channel-) on Plex website.
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
Several options are provided in the preferences of this agent.
|
||||
* Addic7ed username/password: Provide your addic7ed username here, otherwise the provider won't work. Please make sure your account is activated, before using the agent.
|
||||
* Subtitle language (1)/(2)/(3): Your preferred languages to download subtitles for.
|
||||
* Additional Subtitle Languages: Additional languages to download; comma-separated; use ISO-639-1 codes)
|
||||
* Provider: Enable ...: Enable/disable this provider. Affects both movies and series.
|
||||
* Addic7ed: boost over hash score if requirements met: if an Addic7ed subtitle matches the video's series, season, episode, year, and format (e.g. WEB-DL), boost its score, possibly over OpenSubtitles/TheSubDB direct hash match
|
||||
* Scan: Include embedded subtitles: When enabled, subliminal finds embedded subtitles that are already present within the media file.
|
||||
* Scan: Include external subtitles: When enabled, subliminal finds subtitles located near the media file on the filesystem.
|
||||
* Minimum score for download: When configured, what is the minimum score for subtitles to download them? Lower scored subtitles are not downloaded.
|
||||
* Download hearing impaired subtitles:
|
||||
* "prefer": score subtitles for hearing impaired higher
|
||||
* "don't prefer": score subtitles for hearing impaired lower
|
||||
* "force HI": skip subtitles if the hearing impaired flag isn't set
|
||||
* "force non-HI": skip subtitles if the hearing impaired flag is set
|
||||
* Store subtitles next to media files (instead of metadata): See Store as metadata or on filesystem
|
||||
* Subtitle folder: See Store as metadata or on filesystem
|
||||
* Custom Subtitle folder: See Store as metadata or on filesystem
|
||||
|
||||
Store as metadata or on filesystem
|
||||
----------------------------------
|
||||
By default, Plex stores posters, fan art and subtitles as metadata in a separate folder which is not managed by the user. This is the default behaviour of this agent. However, expert users can enable 'Store subtitles next to media files'. The agent will write the subtitle files in the media folder. The setting 'Subtitle folder' configures in which folder (current folder or other subfolder) the subtitles are stored. The expert user can also supply 'Custom Subtitle folder' which can also be an absolute path.
|
||||
|
||||
Please note that you need a way to pick up external subtitles to show up in the Plex Media server. When the subtitles are stored next to your media folders, it is sufficient to enable Local Media agent and place it below the Subliminal agent in the agent priorities. When a subfolder (either custom or predefined) is used, you need [LocalMediaExtended](https://github.com/pannal/LocalMediaExtended.bundle).
|
||||
|
||||
License
|
||||
-------
|
||||
MIT
|
||||
|
||||
Libraries
|
||||
---------
|
||||
Uses the following libraries and their LICENSE:
|
||||
- [babelfish](https://pypi.python.org/pypi/babelfish/) (BSD-3-Clause)
|
||||
- [beautifulsoup4](https://pypi.python.org/pypi/beautifulsoup4/) (MIT)
|
||||
- [chardet](https://pypi.python.org/pypi/chardet/) (LGPL)
|
||||
- [dogpile.core](https://pypi.python.org/pypi/dogpile.core/) (BSD)
|
||||
- [dogpile.cache](https://pypi.python.org/pypi/dogpile.cache/) (BSD)
|
||||
- [enzyme](https://pypi.python.org/pypi/enzyme/) (Apache 2.0)
|
||||
- [guessit](https://pypi.python.org/pypi/guessit/) (LGPLv3)
|
||||
- [html5lib](https://pypi.python.org/pypi/html5lib/) (MIT)
|
||||
- [pysrt](https://pypi.python.org/pypi/pysrt/) (GPLv3)
|
||||
- [requests](https://pypi.python.org/pypi/requests/) (Apache 2.0)
|
||||
- [stevedore](https://pypi.python.org/pypi/stevedore/) (Apache)
|
||||
- [subliminal](https://pypi.python.org/pypi/subliminal/) (MIT)
|
||||
- [xdg](https://pypi.python.org/pypi/pyxdg/) (LGPLv2)
|
||||
- [setuptools](https://pypi.python.org/pypi/setuptools/) (PSF ZPL)
|
||||
-82
@@ -1,82 +0,0 @@
|
||||
Subliminal
|
||||
==========
|
||||
Subtitles, faster than your thoughts.
|
||||
|
||||
.. image:: https://img.shields.io/pypi/v/subliminal.svg
|
||||
:target: https://pypi.python.org/pypi/subliminal
|
||||
:alt: Latest Version
|
||||
|
||||
.. image:: https://travis-ci.org/Diaoul/subliminal.svg?branch=develop
|
||||
:target: https://travis-ci.org/Diaoul/subliminal
|
||||
:alt: Travis CI build status
|
||||
|
||||
.. image:: https://readthedocs.org/projects/subliminal/badge/?version=latest
|
||||
:target: https://subliminal.readthedocs.org/
|
||||
:alt: Documentation Status
|
||||
|
||||
.. image:: https://coveralls.io/repos/Diaoul/subliminal/badge.svg?branch=develop&service=github
|
||||
:target: https://coveralls.io/github/Diaoul/subliminal?branch=develop
|
||||
:alt: Code coverage
|
||||
|
||||
.. image:: https://img.shields.io/github/license/Diaoul/subliminal.svg
|
||||
:target: https://github.com/Diaoul/subliminal/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. image:: https://img.shields.io/badge/gitter-join%20chat-1dce73.svg
|
||||
:alt: Join the chat at https://gitter.im/Diaoul/subliminal
|
||||
:target: https://gitter.im/Diaoul/subliminal
|
||||
|
||||
|
||||
:Project page: https://github.com/Diaoul/subliminal
|
||||
:Documentation: https://subliminal.readthedocs.org/
|
||||
|
||||
|
||||
Usage
|
||||
-----
|
||||
CLI
|
||||
^^^
|
||||
Download English subtitles::
|
||||
|
||||
$ subliminal download -l en The.Big.Bang.Theory.S05E18.HDTV.x264-LOL.mp4
|
||||
Collecting videos [####################################] 100%
|
||||
1 video collected / 0 video ignored / 0 error
|
||||
Downloading subtitles [####################################] 100%
|
||||
Downloaded 1 subtitle
|
||||
|
||||
Library
|
||||
^^^^^^^
|
||||
Download best subtitles in French and English for videos less than two weeks old in a video folder:
|
||||
|
||||
.. code:: python
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from babelfish import Language
|
||||
from subliminal import download_best_subtitles, region, save_subtitles, scan_videos
|
||||
|
||||
# configure the cache
|
||||
region.configure('dogpile.cache.dbm', arguments={'filename': 'cachefile.dbm'})
|
||||
|
||||
# scan for videos newer than 2 weeks and their existing subtitles in a folder
|
||||
videos = scan_videos('/video/folder', age=timedelta(weeks=2))
|
||||
|
||||
# download best subtitles
|
||||
subtitles = download_best_subtitles(videos, {Language('eng'), Language('fra')})
|
||||
|
||||
# save them to disk, next to the video
|
||||
for v in videos:
|
||||
save_subtitles(v, subtitles[v])
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
Subliminal can be installed as a regular python module by running::
|
||||
|
||||
$ [sudo] pip install subliminal
|
||||
|
||||
For a better isolation with your system you should use a dedicated virtualenv or install for your user only using
|
||||
the ``--user`` flag.
|
||||
|
||||
Nautilus/Nemo integration
|
||||
-------------------------
|
||||
See the dedicated `project page <https://github.com/Diaoul/nautilus-subliminal>`_ for more information.
|
||||
@@ -0,0 +1,211 @@
|
||||
# hdbits.org
|
||||
|
||||
import string, os, urllib, zipfile, re, copy
|
||||
from babelfish import Language
|
||||
from datetime import timedelta
|
||||
import subliminal
|
||||
import subliminal_patch
|
||||
import logger
|
||||
|
||||
OS_PLEX_USERAGENT = 'plexapp.com v9.0'
|
||||
|
||||
DEPENDENCY_MODULE_NAMES = ['subliminal', 'subliminal_patch', 'enzyme', 'guessit', 'requests']
|
||||
|
||||
def Start():
|
||||
HTTP.CacheTime = 0
|
||||
HTTP.Headers['User-agent'] = OS_PLEX_USERAGENT
|
||||
Log.Debug("START CALLED")
|
||||
logger.registerLoggingHander(DEPENDENCY_MODULE_NAMES)
|
||||
# configured cache to be in memory as per https://github.com/Diaoul/subliminal/issues/303
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
|
||||
|
||||
|
||||
def ValidatePrefs():
|
||||
Log.Debug("Validate Prefs called.")
|
||||
return
|
||||
|
||||
# Prepare a list of languages we want subs for
|
||||
def getLangList():
|
||||
langList = {Language.fromietf(Prefs["langPref1"])}
|
||||
langCustom = Prefs["langPrefCustom"].strip()
|
||||
|
||||
if Prefs['subtitles.only_one']:
|
||||
return langList
|
||||
|
||||
if Prefs["langPref2"] != "None":
|
||||
langList.update({Language.fromietf(Prefs["langPref2"])})
|
||||
|
||||
if Prefs["langPref3"] != "None":
|
||||
langList.update({Language.fromietf(Prefs["langPref3"])})
|
||||
|
||||
if len(langCustom) and langCustom != "None":
|
||||
for lang in langCustom.split(u","):
|
||||
lang = lang.strip()
|
||||
try:
|
||||
real_lang = Language.fromietf(lang)
|
||||
except:
|
||||
try:
|
||||
real_lang = Language.fromname(lang)
|
||||
except:
|
||||
continue
|
||||
langList.update({real_lang})
|
||||
|
||||
return langList
|
||||
|
||||
def getSubtitleDestinationFolder():
|
||||
if not Prefs["subtitles.save.filesystem"]:
|
||||
return
|
||||
|
||||
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if bool(Prefs["subtitles.save.subFolder.Custom"]) else None
|
||||
return fld_custom or (Prefs["subtitles.save.subFolder"] if Prefs["subtitles.save.subFolder"] != "current folder" else None)
|
||||
|
||||
def initSubliminalPatches():
|
||||
# configure custom subtitle destination folders for scanning pre-existing subs
|
||||
dest_folder = getSubtitleDestinationFolder()
|
||||
subliminal_patch.patch_video.CUSTOM_PATHS = [dest_folder] if dest_folder else []
|
||||
subliminal_patch.patch_provider_pool.DOWNLOAD_TRIES = int(Prefs['subtitles.try_downloads'])
|
||||
subliminal_patch.patch_providers.addic7ed.USE_BOOST = bool(Prefs['provider.addic7ed.boost'])
|
||||
|
||||
def getProviders():
|
||||
providers = {'opensubtitles' : Prefs['provider.opensubtitles.enabled'],
|
||||
'thesubdb' : Prefs['provider.thesubdb.enabled'],
|
||||
'podnapisi' : Prefs['provider.podnapisi.enabled'],
|
||||
'addic7ed' : Prefs['provider.addic7ed.enabled'],
|
||||
'tvsubtitles' : Prefs['provider.tvsubtitles.enabled']
|
||||
}
|
||||
return filter(lambda prov: providers[prov], providers)
|
||||
|
||||
def getProviderSettings():
|
||||
provider_settings = {'addic7ed': {'username': Prefs['provider.addic7ed.username'],
|
||||
'password': Prefs['provider.addic7ed.password'],
|
||||
'use_random_agents': Prefs['provider.addic7ed.use_random_agents'],
|
||||
},
|
||||
'opensubtitles': {'username': Prefs['provider.opensubtitles.username'],
|
||||
'password': Prefs['provider.opensubtitles.password'],
|
||||
},
|
||||
}
|
||||
|
||||
return provider_settings
|
||||
|
||||
def scanTvMedia(media):
|
||||
videos = {}
|
||||
for season in media.seasons:
|
||||
for episode in media.seasons[season].episodes:
|
||||
for item in media.seasons[season].episodes[episode].items:
|
||||
for part in item.parts:
|
||||
scannedVideo = scanVideo(part, "episode")
|
||||
videos[scannedVideo] = part
|
||||
return videos
|
||||
|
||||
def scanMovieMedia(media):
|
||||
videos = {}
|
||||
for item in media.items:
|
||||
for part in item.parts:
|
||||
scannedVideo = scanVideo(part, "movie")
|
||||
videos[scannedVideo] = part
|
||||
return videos
|
||||
|
||||
def scanVideo(part, video_type):
|
||||
embedded_subtitles = Prefs['subtitles.scan.embedded']
|
||||
external_subtitles = Prefs['subtitles.scan.external']
|
||||
|
||||
Log.Debug("Scanning video: %s, subtitles=%s, embedded_subtitles=%s" % (part.file, external_subtitles, embedded_subtitles))
|
||||
try:
|
||||
return subliminal.video.scan_video(part.file, subtitles=external_subtitles, embedded_subtitles=embedded_subtitles, video_type=video_type)
|
||||
except ValueError:
|
||||
Log.Warn("File could not be guessed by subliminal")
|
||||
|
||||
def downloadBestSubtitles(videos, min_score=0):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
languages = getLangList()
|
||||
if not languages:
|
||||
return
|
||||
|
||||
missing_languages = False
|
||||
for video in videos:
|
||||
if not (languages - video.subtitle_languages):
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
continue
|
||||
missing_languages = True
|
||||
break
|
||||
|
||||
if missing_languages:
|
||||
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s" %(min_score, hearing_impaired))
|
||||
|
||||
return subliminal.api.download_best_subtitles(videos, languages, min_score, hearing_impaired, providers=getProviders(), provider_configs=getProviderSettings(), only_one=Prefs['subtitles.only_one'])
|
||||
Log.Debug("All languages for all requested videos exist. Doing nothing.")
|
||||
|
||||
def saveSubtitles(videos, subtitles):
|
||||
if Prefs['subtitles.save.filesystem']:
|
||||
Log.Debug("Using filesystem as subtitle storage")
|
||||
saveSubtitlesToFile(subtitles)
|
||||
else:
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
saveSubtitlesToMetadata(videos, subtitles)
|
||||
|
||||
def saveSubtitlesToFile(subtitles):
|
||||
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if bool(Prefs["subtitles.save.subFolder.Custom"]) else None
|
||||
|
||||
for video, video_subtitles in subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
fld = None
|
||||
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
# specific subFolder requested, create it if it doesn't exist
|
||||
fld_base = os.path.split(video.name)[0]
|
||||
if fld_custom:
|
||||
if fld_custom.startswith("/"):
|
||||
# absolute folder
|
||||
fld = fld_custom
|
||||
else:
|
||||
fld = os.path.join(fld_base, fld_custom)
|
||||
else:
|
||||
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
|
||||
if not os.path.exists(fld):
|
||||
os.makedirs(fld)
|
||||
subliminal.api.save_subtitles(video, video_subtitles, directory=fld, single=Prefs['subtitles.only_one'])
|
||||
|
||||
def saveSubtitlesToMetadata(videos, subtitles):
|
||||
for video, video_subtitles in subtitles.items():
|
||||
mediaPart = videos[video]
|
||||
for subtitle in video_subtitles:
|
||||
mediaPart.subtitles[Locale.Language.Match(subtitle.language.alpha2)][subtitle.page_link] = Proxy.Media(subtitle.content, ext="srt")
|
||||
|
||||
class SubZeroSubtitlesAgentMovies(Agent.Movies):
|
||||
name = 'Sub-Zero Movie Subtitles'
|
||||
languages = [Locale.Language.English]
|
||||
primary_provider = False
|
||||
contributes_to = ['com.plexapp.agents.imdb']
|
||||
|
||||
def search(self, results, media, lang):
|
||||
Log.Debug("MOVIE SEARCH CALLED")
|
||||
results.Append(MetadataSearchResult(id='null', score=100))
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
Log.Debug("MOVIE UPDATE CALLED")
|
||||
initSubliminalPatches()
|
||||
videos = scanMovieMedia(media)
|
||||
subtitles = downloadBestSubtitles(videos.keys(), min_score=int(Prefs["subtitles.search.minimumMovieScore"]))
|
||||
if subtitles:
|
||||
saveSubtitles(videos, subtitles)
|
||||
|
||||
class SubZeroSubtitlesAgentTvShows(Agent.TV_Shows):
|
||||
|
||||
name = 'Sub-Zero TV Subtitles'
|
||||
languages = [Locale.Language.English]
|
||||
primary_provider = False
|
||||
contributes_to = ['com.plexapp.agents.thetvdb', 'com.plexapp.agents.thetvdbdvdorder']
|
||||
|
||||
def search(self, results, media, lang):
|
||||
Log.Debug("TV SEARCH CALLED")
|
||||
results.Append(MetadataSearchResult(id='null', score=100))
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
Log.Debug("TvUpdate. Lang %s" % lang)
|
||||
initSubliminalPatches()
|
||||
videos = scanTvMedia(media)
|
||||
subtitles = downloadBestSubtitles(videos.keys(), min_score=int(Prefs["subtitles.search.minimumTVScore"]))
|
||||
if subtitles:
|
||||
saveSubtitles(videos, subtitles)
|
||||
@@ -0,0 +1,33 @@
|
||||
import logging
|
||||
|
||||
def registerLoggingHander(dependencies):
|
||||
plexHandler = PlexLoggerHandler()
|
||||
for dependency in dependencies:
|
||||
Log.Debug("Registering LoggerHandler for dependency: %s" % dependency)
|
||||
log = logging.getLogger(dependency)
|
||||
log.setLevel('DEBUG')
|
||||
log.addHandler(plexHandler)
|
||||
|
||||
class PlexLoggerHandler(logging.StreamHandler):
|
||||
|
||||
def __init__(self, level=0):
|
||||
super(PlexLoggerHandler, self).__init__(level)
|
||||
|
||||
def getFormattedString(self, record):
|
||||
return record.name + ": " + record.getMessage()
|
||||
|
||||
def emit(self, record):
|
||||
if record.levelno == logging.DEBUG:
|
||||
Log.Debug(self.getFormattedString(record))
|
||||
elif record.levelno == logging.INFO:
|
||||
Log.Info(self.getFormattedString(record))
|
||||
elif record.levelno == logging.WARNING:
|
||||
Log.Warn(self.getFormattedString(record))
|
||||
elif record.levelno == logging.ERROR:
|
||||
Log.Error(self.getFormattedString(record))
|
||||
elif record.levelno == logging.CRITICAL:
|
||||
Log.Critical(self.getFormattedString(record))
|
||||
elif record.levelno == logging.FATAL:
|
||||
Log.Exception(self.getFormattedString(record))
|
||||
else:
|
||||
Log.Error("UNKNOWN LEVEL: %s", record.getMessage())
|
||||
@@ -0,0 +1,163 @@
|
||||
[
|
||||
{ "id": "subtitles.try_downloads",
|
||||
"label": "How many download tries per subtitle (on timeout or error)",
|
||||
"type": "enum",
|
||||
"values": ["1", "2", "3", "4"],
|
||||
"default": "2"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.username",
|
||||
"label": "Addic7ed Username",
|
||||
"type": "text",
|
||||
"default": "Username"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.password",
|
||||
"label": "Addic7ed Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.username",
|
||||
"label": "Opensubtitles Username (VIP)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.password",
|
||||
"label": "Opensubtitles Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.use_random_agents",
|
||||
"label": "Addic7ed: Use random user agents (should not be necessary)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "langPref1",
|
||||
"label": "Subtitle Language (1)",
|
||||
"type": "enum",
|
||||
"values": ["sq","ar","be","bs","bg","ca","zh","cs","da","nl","en","et","fi","fr","de","el","he","hi","hu","is","id","it","ja","ko","lv","lt","mk","ms","no","fa","pl","pt","pt-br","ro","ru","sr","sk","sl","es","sv","th","tr","uk","vi","hr"],
|
||||
"default": "en"
|
||||
},
|
||||
{
|
||||
"id": "langPref2",
|
||||
"label": "Subtitle Language (2)",
|
||||
"type": "enum",
|
||||
"values": ["None", "sq","ar","be","bs","bg","ca","zh","cs","da","nl","en","et","fi","fr","de","el","he","hi","hu","is","id","it","ja","ko","lv","lt","mk","ms","no","fa","pl","pt","pt-br","ro","ru","sr","sk","sl","es","sv","th","tr","uk","vi","hr"],
|
||||
"default": "None"
|
||||
},
|
||||
{
|
||||
"id": "langPref3",
|
||||
"label": "Subtitle Language (3)",
|
||||
"type": "enum",
|
||||
"values": ["None", "sq","ar","be","bs","bg","ca","zh","cs","da","nl","en","et","fi","fr","de","el","he","hi","hu","is","id","it","ja","ko","lv","lt","mk","ms","no","fa","pl","pt","pt-br","ro","ru","sr","sk","sl","es","sv","th","tr","uk","vi","hr"],
|
||||
"default": "None"
|
||||
},
|
||||
{
|
||||
"id": "langPrefCustom",
|
||||
"label": "Additional Subtitle Languages (use ISO-639-1 codes; comma-separated)",
|
||||
"type": "text",
|
||||
"default": "None"
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.enabled",
|
||||
"label": "Provider: Enable OpenSubtitles",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.thesubdb.enabled",
|
||||
"label": "Provider: Enable TheSubDB",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.podnapisi.enabled",
|
||||
"label": "Provider: Enable Podnapisi.NET",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.enabled",
|
||||
"label": "Provider: Enable Addic7ed",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.boost",
|
||||
"label": "Addic7ed: boost over hash score if requirements met (prefer over other providers)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.tvsubtitles.enabled",
|
||||
"label": "Provider: Enable TVsubtitles.net",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.embedded",
|
||||
"label": "Scan: include embedded subtitles (skip if existing)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.external",
|
||||
"label": "Scan: include external subtitles (skip if existing)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.search.minimumTVScore",
|
||||
"label": "Minimum score for TV subtitles to download",
|
||||
"type": "enum",
|
||||
"values": ["100","95","90","85","80","75","70","65","60","55","50","45","40","35","30","25","20","15","10","5","0"],
|
||||
"default": "40"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.search.minimumMovieScore",
|
||||
"label": "Minimum score for movie subtitles to download",
|
||||
"type": "enum",
|
||||
"values": ["100","95","90","85","80","75","70","65","60","55","50","45","40","35","30","25","20","15","10","5","0"],
|
||||
"default": "20"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.search.hearingImpaired",
|
||||
"label": "Download hearing impaired subtitles.",
|
||||
"type": "enum",
|
||||
"values": ["prefer", "don't prefer", "force HI", "force non-HI"],
|
||||
"default": "don't prefer"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.save.filesystem",
|
||||
"label": "Store subtitles next to media files (instead of metadata)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.save.subFolder",
|
||||
"label": "Subtitle Folder (\"current folder\" is the folder the current media file lives in) - needs LocalMediaExtended agent",
|
||||
"type": "enum",
|
||||
"values": ["current folder", "sub", "subs", "subtitle", "subtitles"],
|
||||
"default": "subs"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.save.subFolder.Custom",
|
||||
"label": "Custom Subtitle folder (overrides \"Subtitle Folder\"; computes to real paths; use for example \"bla\" as a subfolder of the current media file folder or an absolute path) - needs LocalMediaExtended agent",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "subtitles.only_one",
|
||||
"label": "Restrict to one language (skips adding \".lang.\" to the subtitle filename; only uses \"Subtitle Language (1)\")",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,47 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleDevelopmentRegion</key>
|
||||
<string>English</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>com.plexapp.agents.subzero</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>1.0.9</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>1.0.9.7</string>
|
||||
<key>PlexFrameworkVersion</key>
|
||||
<string>2</string>
|
||||
<key>PlexPluginClass</key>
|
||||
<string>Agent</string>
|
||||
<key>PlexPluginMode</key>
|
||||
<string>Daemon</string>
|
||||
<key>PlexPluginConsoleLogging</key>
|
||||
<string>1</string>
|
||||
<key>PlexPluginDevMode</key>
|
||||
<string>1</string>
|
||||
<key>PlexPluginCodePolicy</key>
|
||||
<!-- this allows channels to access some python methods which are otherwise blocked, as well as import external code libraries, and interact with the PMS HTTP API -->
|
||||
<string>Elevated</string>
|
||||
<key>PlexAgentAttributionText</key>
|
||||
<string><div style="white-space: pre;"><img src="https://raw.githubusercontent.com/pannal/Sub-Zero/master/Sub-Zero.bundle/Contents/Resources/subzero.gif" />
|
||||
|
||||
<h1>Sub-Zero for Plex</h1><i>Subtitles done right</i>
|
||||
|
||||
Version 1.1-rc5.2
|
||||
|
||||
Originally based on @bramwalet's awesome <a href="https://github.com/bramwalet/Subliminal.bundle">Subliminal.bundle</a>
|
||||
|
||||
<strong>Need help?</strong>
|
||||
Plex thread: <a href="https://forums.plex.tv/discussion/186575">https://forums.plex.tv/discussion/186575</a>
|
||||
Github: <a href="https://github.com/pannal/Sub-Zero">https://github.com/pannal/Sub-Zero</a>
|
||||
|
||||
panni, 2015
|
||||
</div>
|
||||
</string>
|
||||
</dict>
|
||||
</plist>
|
||||
@@ -0,0 +1,25 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
__title__ = 'babelfish'
|
||||
__version__ = '0.5.5-dev'
|
||||
__author__ = 'Antoine Bertin'
|
||||
__license__ = 'BSD'
|
||||
__copyright__ = 'Copyright 2015 the BabelFish authors'
|
||||
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
basestr = str
|
||||
else:
|
||||
basestr = basestring
|
||||
|
||||
from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
|
||||
CountryReverseConverter)
|
||||
from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
|
||||
from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
|
||||
from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
|
||||
from .script import SCRIPTS, SCRIPT_MATRIX, Script
|
||||
@@ -0,0 +1,287 @@
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
import collections
|
||||
from pkg_resources import iter_entry_points, EntryPoint
|
||||
from ..exceptions import LanguageConvertError, LanguageReverseError
|
||||
|
||||
|
||||
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
|
||||
class CaseInsensitiveDict(collections.MutableMapping):
|
||||
"""A case-insensitive ``dict``-like object.
|
||||
|
||||
Implements all methods and operations of
|
||||
``collections.MutableMapping`` as well as dict's ``copy``. Also
|
||||
provides ``lower_items``.
|
||||
|
||||
All keys are expected to be strings. The structure remembers the
|
||||
case of the last key to be set, and ``iter(instance)``,
|
||||
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
|
||||
will contain case-sensitive keys. However, querying and contains
|
||||
testing is case insensitive:
|
||||
|
||||
cid = CaseInsensitiveDict()
|
||||
cid['English'] = 'eng'
|
||||
cid['ENGLISH'] == 'eng' # True
|
||||
list(cid) == ['English'] # True
|
||||
|
||||
If the constructor, ``.update``, or equality comparison
|
||||
operations are given keys that have equal ``.lower()``s, the
|
||||
behavior is undefined.
|
||||
|
||||
"""
|
||||
def __init__(self, data=None, **kwargs):
|
||||
self._store = dict()
|
||||
if data is None:
|
||||
data = {}
|
||||
self.update(data, **kwargs)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
# Use the lowercased key for lookups, but store the actual
|
||||
# key alongside the value.
|
||||
self._store[key.lower()] = (key, value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._store[key.lower()][1]
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._store[key.lower()]
|
||||
|
||||
def __iter__(self):
|
||||
return (casedkey for casedkey, mappedvalue in self._store.values())
|
||||
|
||||
def __len__(self):
|
||||
return len(self._store)
|
||||
|
||||
def lower_items(self):
|
||||
"""Like iteritems(), but with all lowercase keys."""
|
||||
return (
|
||||
(lowerkey, keyval[1])
|
||||
for (lowerkey, keyval)
|
||||
in self._store.items()
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, collections.Mapping):
|
||||
other = CaseInsensitiveDict(other)
|
||||
else:
|
||||
return NotImplemented
|
||||
# Compare insensitively
|
||||
return dict(self.lower_items()) == dict(other.lower_items())
|
||||
|
||||
# Copy is required
|
||||
def copy(self):
|
||||
return CaseInsensitiveDict(self._store.values())
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
|
||||
|
||||
|
||||
class LanguageConverter(object):
|
||||
"""A :class:`LanguageConverter` supports converting an alpha3 language code with an
|
||||
alpha2 country code and a script code into a custom code
|
||||
|
||||
.. attribute:: codes
|
||||
|
||||
Set of possible custom codes
|
||||
|
||||
"""
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
"""Convert an alpha3 language code with an alpha2 country code and a script code
|
||||
into a custom code
|
||||
|
||||
:param string alpha3: ISO-639-3 language code
|
||||
:param country: ISO-3166 country code, if any
|
||||
:type country: string or None
|
||||
:param script: ISO-15924 script code, if any
|
||||
:type script: string or None
|
||||
:return: the corresponding custom code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.LanguageConvertError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LanguageReverseConverter(LanguageConverter):
|
||||
"""A :class:`LanguageConverter` able to reverse a custom code into a alpha3
|
||||
ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
||||
|
||||
"""
|
||||
def reverse(self, code):
|
||||
"""Reverse a custom code into alpha3, country and script code
|
||||
|
||||
:param string code: custom code to reverse
|
||||
:return: the corresponding alpha3 ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
||||
:rtype: tuple
|
||||
:raise: :class:`~babelfish.exceptions.LanguageReverseError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LanguageEquivalenceConverter(LanguageReverseConverter):
|
||||
"""A :class:`LanguageEquivalenceConverter` is a utility class that allows you to easily define a
|
||||
:class:`LanguageReverseConverter` by only specifying the dict from alpha3 to their corresponding symbols.
|
||||
|
||||
You must specify the dict of equivalence as a class variable named SYMBOLS.
|
||||
|
||||
If you also set the class variable CASE_SENSITIVE to ``True`` then the reverse conversion function will be
|
||||
case-sensitive (it is case-insensitive by default).
|
||||
|
||||
Example::
|
||||
|
||||
class MyCodeConverter(babelfish.LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {'fra': 'mycode1', 'eng': 'mycode2'}
|
||||
|
||||
"""
|
||||
CASE_SENSITIVE = False
|
||||
|
||||
def __init__(self):
|
||||
self.codes = set()
|
||||
self.to_symbol = {}
|
||||
if self.CASE_SENSITIVE:
|
||||
self.from_symbol = {}
|
||||
else:
|
||||
self.from_symbol = CaseInsensitiveDict()
|
||||
|
||||
for alpha3, symbol in self.SYMBOLS.items():
|
||||
self.to_symbol[alpha3] = symbol
|
||||
self.from_symbol[symbol] = (alpha3, None, None)
|
||||
self.codes.add(symbol)
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
try:
|
||||
return self.to_symbol[alpha3]
|
||||
except KeyError:
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
|
||||
def reverse(self, code):
|
||||
try:
|
||||
return self.from_symbol[code]
|
||||
except KeyError:
|
||||
raise LanguageReverseError(code)
|
||||
|
||||
|
||||
class CountryConverter(object):
|
||||
"""A :class:`CountryConverter` supports converting an alpha2 country code
|
||||
into a custom code
|
||||
|
||||
.. attribute:: codes
|
||||
|
||||
Set of possible custom codes
|
||||
|
||||
"""
|
||||
def convert(self, alpha2):
|
||||
"""Convert an alpha2 country code into a custom code
|
||||
|
||||
:param string alpha2: ISO-3166-1 language code
|
||||
:return: the corresponding custom code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.CountryConvertError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class CountryReverseConverter(CountryConverter):
|
||||
"""A :class:`CountryConverter` able to reverse a custom code into a alpha2
|
||||
ISO-3166-1 country code
|
||||
|
||||
"""
|
||||
def reverse(self, code):
|
||||
"""Reverse a custom code into alpha2 code
|
||||
|
||||
:param string code: custom code to reverse
|
||||
:return: the corresponding alpha2 ISO-3166-1 country code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.CountryReverseError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ConverterManager(object):
|
||||
"""Manager for babelfish converters behaving like a dict with lazy loading
|
||||
|
||||
Loading is done in this order:
|
||||
|
||||
* Entry point converters
|
||||
* Registered converters
|
||||
* Internal converters
|
||||
|
||||
.. attribute:: entry_point
|
||||
|
||||
The entry point where to look for converters
|
||||
|
||||
.. attribute:: internal_converters
|
||||
|
||||
Internal converters with entry point syntax
|
||||
|
||||
"""
|
||||
entry_point = ''
|
||||
internal_converters = []
|
||||
|
||||
def __init__(self):
|
||||
#: Registered converters with entry point syntax
|
||||
self.registered_converters = []
|
||||
|
||||
#: Loaded converters
|
||||
self.converters = {}
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""Get a converter, lazy loading it if necessary"""
|
||||
if name in self.converters:
|
||||
return self.converters[name]
|
||||
for ep in iter_entry_points(self.entry_point):
|
||||
if ep.name == name:
|
||||
self.converters[ep.name] = ep.load()()
|
||||
return self.converters[ep.name]
|
||||
for ep in (EntryPoint.parse(c) for c in self.registered_converters + self.internal_converters):
|
||||
if ep.name == name:
|
||||
# `require` argument of ep.load() is deprecated in newer versions of setuptools
|
||||
if hasattr(ep, 'resolve'):
|
||||
plugin = ep.resolve()
|
||||
elif hasattr(ep, '_load'):
|
||||
plugin = ep._load()
|
||||
else:
|
||||
plugin = ep.load(require=False)
|
||||
self.converters[ep.name] = plugin()
|
||||
return self.converters[ep.name]
|
||||
raise KeyError(name)
|
||||
|
||||
def __setitem__(self, name, converter):
|
||||
"""Load a converter"""
|
||||
self.converters[name] = converter
|
||||
|
||||
def __delitem__(self, name):
|
||||
"""Unload a converter"""
|
||||
del self.converters[name]
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterator over loaded converters"""
|
||||
return iter(self.converters)
|
||||
|
||||
def register(self, entry_point):
|
||||
"""Register a converter
|
||||
|
||||
:param string entry_point: converter to register (entry point syntax)
|
||||
:raise: ValueError if already registered
|
||||
|
||||
"""
|
||||
if entry_point in self.registered_converters:
|
||||
raise ValueError('Already registered')
|
||||
self.registered_converters.insert(0, entry_point)
|
||||
|
||||
def unregister(self, entry_point):
|
||||
"""Unregister a converter
|
||||
|
||||
:param string entry_point: converter to unregister (entry point syntax)
|
||||
|
||||
"""
|
||||
self.registered_converters.remove(entry_point)
|
||||
|
||||
def __contains__(self, name):
|
||||
return name in self.converters
|
||||
@@ -0,0 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha2Converter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha2:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha2
|
||||
@@ -0,0 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha3BConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha3b:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha3b
|
||||
@@ -0,0 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha3TConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha3t:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha3t
|
||||
@@ -0,0 +1,31 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import CountryReverseConverter, CaseInsensitiveDict
|
||||
from ..country import COUNTRY_MATRIX
|
||||
from ..exceptions import CountryConvertError, CountryReverseError
|
||||
|
||||
|
||||
class CountryNameConverter(CountryReverseConverter):
|
||||
def __init__(self):
|
||||
self.codes = set()
|
||||
self.to_name = {}
|
||||
self.from_name = CaseInsensitiveDict()
|
||||
for country in COUNTRY_MATRIX:
|
||||
self.codes.add(country.name)
|
||||
self.to_name[country.alpha2] = country.name
|
||||
self.from_name[country.name] = country.alpha2
|
||||
|
||||
def convert(self, alpha2):
|
||||
if alpha2 not in self.to_name:
|
||||
raise CountryConvertError(alpha2)
|
||||
return self.to_name[alpha2]
|
||||
|
||||
def reverse(self, name):
|
||||
if name not in self.from_name:
|
||||
raise CountryReverseError(name)
|
||||
return self.from_name[name]
|
||||
@@ -0,0 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class NameConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = False
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.name:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.name
|
||||
@@ -0,0 +1,36 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageReverseConverter, CaseInsensitiveDict
|
||||
from ..exceptions import LanguageReverseError
|
||||
from ..language import language_converters
|
||||
|
||||
|
||||
class OpenSubtitlesConverter(LanguageReverseConverter):
|
||||
def __init__(self):
|
||||
self.alpha3b_converter = language_converters['alpha3b']
|
||||
self.alpha2_converter = language_converters['alpha2']
|
||||
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
|
||||
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
|
||||
'scc': ('srp', None), 'mne': ('srp', 'ME')})
|
||||
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(['pob', 'pb', 'scc', 'mne']))
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
|
||||
if (alpha3b, country) in self.to_opensubtitles:
|
||||
return self.to_opensubtitles[(alpha3b, country)]
|
||||
return alpha3b
|
||||
|
||||
def reverse(self, opensubtitles):
|
||||
if opensubtitles in self.from_opensubtitles:
|
||||
return self.from_opensubtitles[opensubtitles]
|
||||
for conv in [self.alpha3b_converter, self.alpha2_converter]:
|
||||
try:
|
||||
return conv.reverse(opensubtitles)
|
||||
except LanguageReverseError:
|
||||
pass
|
||||
raise LanguageReverseError(opensubtitles)
|
||||
@@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageConverter
|
||||
from ..exceptions import LanguageConvertError
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class ScopeConverter(LanguageConverter):
|
||||
FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.scope
|
||||
codes = set(SYMBOLS.values())
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
||||
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
@@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageConverter
|
||||
from ..exceptions import LanguageConvertError
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class LanguageTypeConverter(LanguageConverter):
|
||||
FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.type
|
||||
codes = set(SYMBOLS.values())
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
||||
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
@@ -0,0 +1,104 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from .converters import ConverterManager
|
||||
from . import basestr
|
||||
|
||||
|
||||
COUNTRIES = {}
|
||||
COUNTRY_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`COUNTRY_MATRIX`
|
||||
IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
|
||||
|
||||
f = resource_stream('babelfish', 'data/iso-3166-1.txt')
|
||||
f.readline()
|
||||
for l in f:
|
||||
iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
|
||||
COUNTRIES[iso_country.alpha2] = iso_country.name
|
||||
COUNTRY_MATRIX.append(iso_country)
|
||||
f.close()
|
||||
|
||||
|
||||
class CountryConverterManager(ConverterManager):
|
||||
""":class:`~babelfish.converters.ConverterManager` for country converters"""
|
||||
entry_point = 'babelfish.country_converters'
|
||||
internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
|
||||
|
||||
country_converters = CountryConverterManager()
|
||||
|
||||
|
||||
class CountryMeta(type):
|
||||
"""The :class:`Country` metaclass
|
||||
|
||||
Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
|
||||
|
||||
"""
|
||||
def __getattr__(cls, name):
|
||||
if name.startswith('from'):
|
||||
return partial(cls.fromcode, converter=name[4:])
|
||||
return type.__getattribute__(cls, name)
|
||||
|
||||
|
||||
class Country(CountryMeta(str('CountryBase'), (object,), {})):
|
||||
"""A country on Earth
|
||||
|
||||
A country is represented by a 2-letter code from the ISO-3166 standard
|
||||
|
||||
:param string country: 2-letter ISO-3166 country code
|
||||
|
||||
"""
|
||||
def __init__(self, country):
|
||||
if country not in COUNTRIES:
|
||||
raise ValueError('%r is not a valid country' % country)
|
||||
|
||||
#: ISO-3166 2-letter country code
|
||||
self.alpha2 = country
|
||||
|
||||
@classmethod
|
||||
def fromcode(cls, code, converter):
|
||||
"""Create a :class:`Country` by its `code` using `converter` to
|
||||
:meth:`~babelfish.converters.CountryReverseConverter.reverse` it
|
||||
|
||||
:param string code: the code to reverse
|
||||
:param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
|
||||
:return: the corresponding :class:`Country` instance
|
||||
:rtype: :class:`Country`
|
||||
|
||||
"""
|
||||
return cls(country_converters[converter].reverse(code))
|
||||
|
||||
def __getstate__(self):
|
||||
return self.alpha2
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.alpha2 = state
|
||||
|
||||
def __getattr__(self, name):
|
||||
return country_converters[name].convert(self.alpha2)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.alpha2)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, basestr):
|
||||
return str(self) == other
|
||||
if not isinstance(other, Country):
|
||||
return False
|
||||
return self.alpha2 == other.alpha2
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return '<Country [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
return self.alpha2
|
||||
@@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
import os.path
|
||||
import tempfile
|
||||
import zipfile
|
||||
import requests
|
||||
|
||||
|
||||
DATA_DIR = os.path.dirname(__file__)
|
||||
|
||||
# iso-3166-1.txt
|
||||
print('Downloading ISO-3166-1 standard (ISO country codes)...')
|
||||
with open(os.path.join(DATA_DIR, 'iso-3166-1.txt'), 'w') as f:
|
||||
r = requests.get('http://www.iso.org/iso/home/standards/country_codes/country_names_and_code_elements_txt.htm')
|
||||
f.write(r.content.strip())
|
||||
|
||||
# iso-639-3.tab
|
||||
print('Downloading ISO-639-3 standard (ISO language codes)...')
|
||||
with tempfile.TemporaryFile() as f:
|
||||
r = requests.get('http://www-01.sil.org/iso639-3/iso-639-3_Code_Tables_20130531.zip')
|
||||
f.write(r.content)
|
||||
with zipfile.ZipFile(f) as z:
|
||||
z.extract('iso-639-3.tab', DATA_DIR)
|
||||
|
||||
# iso-15924
|
||||
print('Downloading ISO-15924 standard (ISO script codes)...')
|
||||
with tempfile.TemporaryFile() as f:
|
||||
r = requests.get('http://www.unicode.org/iso15924/iso15924.txt.zip')
|
||||
f.write(r.content)
|
||||
with zipfile.ZipFile(f) as z:
|
||||
z.extract('iso15924-utf8-20131012.txt', DATA_DIR)
|
||||
|
||||
# opensubtitles supported languages
|
||||
print('Downloading OpenSubtitles supported languages...')
|
||||
with open(os.path.join(DATA_DIR, 'opensubtitles_languages.txt'), 'w') as f:
|
||||
r = requests.get('http://www.opensubtitles.org/addons/export_languages.php')
|
||||
f.write(r.content)
|
||||
|
||||
print('Done!')
|
||||
@@ -0,0 +1,250 @@
|
||||
Country Name;ISO 3166-1-alpha-2 code
|
||||
AFGHANISTAN;AF
|
||||
ÅLAND ISLANDS;AX
|
||||
ALBANIA;AL
|
||||
ALGERIA;DZ
|
||||
AMERICAN SAMOA;AS
|
||||
ANDORRA;AD
|
||||
ANGOLA;AO
|
||||
ANGUILLA;AI
|
||||
ANTARCTICA;AQ
|
||||
ANTIGUA AND BARBUDA;AG
|
||||
ARGENTINA;AR
|
||||
ARMENIA;AM
|
||||
ARUBA;AW
|
||||
AUSTRALIA;AU
|
||||
AUSTRIA;AT
|
||||
AZERBAIJAN;AZ
|
||||
BAHAMAS;BS
|
||||
BAHRAIN;BH
|
||||
BANGLADESH;BD
|
||||
BARBADOS;BB
|
||||
BELARUS;BY
|
||||
BELGIUM;BE
|
||||
BELIZE;BZ
|
||||
BENIN;BJ
|
||||
BERMUDA;BM
|
||||
BHUTAN;BT
|
||||
BOLIVIA, PLURINATIONAL STATE OF;BO
|
||||
BONAIRE, SINT EUSTATIUS AND SABA;BQ
|
||||
BOSNIA AND HERZEGOVINA;BA
|
||||
BOTSWANA;BW
|
||||
BOUVET ISLAND;BV
|
||||
BRAZIL;BR
|
||||
BRITISH INDIAN OCEAN TERRITORY;IO
|
||||
BRUNEI DARUSSALAM;BN
|
||||
BULGARIA;BG
|
||||
BURKINA FASO;BF
|
||||
BURUNDI;BI
|
||||
CAMBODIA;KH
|
||||
CAMEROON;CM
|
||||
CANADA;CA
|
||||
CAPE VERDE;CV
|
||||
CAYMAN ISLANDS;KY
|
||||
CENTRAL AFRICAN REPUBLIC;CF
|
||||
CHAD;TD
|
||||
CHILE;CL
|
||||
CHINA;CN
|
||||
CHRISTMAS ISLAND;CX
|
||||
COCOS (KEELING) ISLANDS;CC
|
||||
COLOMBIA;CO
|
||||
COMOROS;KM
|
||||
CONGO;CG
|
||||
CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
|
||||
COOK ISLANDS;CK
|
||||
COSTA RICA;CR
|
||||
CÔTE D'IVOIRE;CI
|
||||
CROATIA;HR
|
||||
CUBA;CU
|
||||
CURAÇAO;CW
|
||||
CYPRUS;CY
|
||||
CZECH REPUBLIC;CZ
|
||||
DENMARK;DK
|
||||
DJIBOUTI;DJ
|
||||
DOMINICA;DM
|
||||
DOMINICAN REPUBLIC;DO
|
||||
ECUADOR;EC
|
||||
EGYPT;EG
|
||||
EL SALVADOR;SV
|
||||
EQUATORIAL GUINEA;GQ
|
||||
ERITREA;ER
|
||||
ESTONIA;EE
|
||||
ETHIOPIA;ET
|
||||
FALKLAND ISLANDS (MALVINAS);FK
|
||||
FAROE ISLANDS;FO
|
||||
FIJI;FJ
|
||||
FINLAND;FI
|
||||
FRANCE;FR
|
||||
FRENCH GUIANA;GF
|
||||
FRENCH POLYNESIA;PF
|
||||
FRENCH SOUTHERN TERRITORIES;TF
|
||||
GABON;GA
|
||||
GAMBIA;GM
|
||||
GEORGIA;GE
|
||||
GERMANY;DE
|
||||
GHANA;GH
|
||||
GIBRALTAR;GI
|
||||
GREECE;GR
|
||||
GREENLAND;GL
|
||||
GRENADA;GD
|
||||
GUADELOUPE;GP
|
||||
GUAM;GU
|
||||
GUATEMALA;GT
|
||||
GUERNSEY;GG
|
||||
GUINEA;GN
|
||||
GUINEA-BISSAU;GW
|
||||
GUYANA;GY
|
||||
HAITI;HT
|
||||
HEARD ISLAND AND MCDONALD ISLANDS;HM
|
||||
HOLY SEE (VATICAN CITY STATE);VA
|
||||
HONDURAS;HN
|
||||
HONG KONG;HK
|
||||
HUNGARY;HU
|
||||
ICELAND;IS
|
||||
INDIA;IN
|
||||
INDONESIA;ID
|
||||
IRAN, ISLAMIC REPUBLIC OF;IR
|
||||
IRAQ;IQ
|
||||
IRELAND;IE
|
||||
ISLE OF MAN;IM
|
||||
ISRAEL;IL
|
||||
ITALY;IT
|
||||
JAMAICA;JM
|
||||
JAPAN;JP
|
||||
JERSEY;JE
|
||||
JORDAN;JO
|
||||
KAZAKHSTAN;KZ
|
||||
KENYA;KE
|
||||
KIRIBATI;KI
|
||||
KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
|
||||
KOREA, REPUBLIC OF;KR
|
||||
KUWAIT;KW
|
||||
KYRGYZSTAN;KG
|
||||
LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
|
||||
LATVIA;LV
|
||||
LEBANON;LB
|
||||
LESOTHO;LS
|
||||
LIBERIA;LR
|
||||
LIBYA;LY
|
||||
LIECHTENSTEIN;LI
|
||||
LITHUANIA;LT
|
||||
LUXEMBOURG;LU
|
||||
MACAO;MO
|
||||
MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
|
||||
MADAGASCAR;MG
|
||||
MALAWI;MW
|
||||
MALAYSIA;MY
|
||||
MALDIVES;MV
|
||||
MALI;ML
|
||||
MALTA;MT
|
||||
MARSHALL ISLANDS;MH
|
||||
MARTINIQUE;MQ
|
||||
MAURITANIA;MR
|
||||
MAURITIUS;MU
|
||||
MAYOTTE;YT
|
||||
MEXICO;MX
|
||||
MICRONESIA, FEDERATED STATES OF;FM
|
||||
MOLDOVA, REPUBLIC OF;MD
|
||||
MONACO;MC
|
||||
MONGOLIA;MN
|
||||
MONTENEGRO;ME
|
||||
MONTSERRAT;MS
|
||||
MOROCCO;MA
|
||||
MOZAMBIQUE;MZ
|
||||
MYANMAR;MM
|
||||
NAMIBIA;NA
|
||||
NAURU;NR
|
||||
NEPAL;NP
|
||||
NETHERLANDS;NL
|
||||
NEW CALEDONIA;NC
|
||||
NEW ZEALAND;NZ
|
||||
NICARAGUA;NI
|
||||
NIGER;NE
|
||||
NIGERIA;NG
|
||||
NIUE;NU
|
||||
NORFOLK ISLAND;NF
|
||||
NORTHERN MARIANA ISLANDS;MP
|
||||
NORWAY;NO
|
||||
OMAN;OM
|
||||
PAKISTAN;PK
|
||||
PALAU;PW
|
||||
PALESTINE, STATE OF;PS
|
||||
PANAMA;PA
|
||||
PAPUA NEW GUINEA;PG
|
||||
PARAGUAY;PY
|
||||
PERU;PE
|
||||
PHILIPPINES;PH
|
||||
PITCAIRN;PN
|
||||
POLAND;PL
|
||||
PORTUGAL;PT
|
||||
PUERTO RICO;PR
|
||||
QATAR;QA
|
||||
RÉUNION;RE
|
||||
ROMANIA;RO
|
||||
RUSSIAN FEDERATION;RU
|
||||
RWANDA;RW
|
||||
SAINT BARTHÉLEMY;BL
|
||||
SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
|
||||
SAINT KITTS AND NEVIS;KN
|
||||
SAINT LUCIA;LC
|
||||
SAINT MARTIN (FRENCH PART);MF
|
||||
SAINT PIERRE AND MIQUELON;PM
|
||||
SAINT VINCENT AND THE GRENADINES;VC
|
||||
SAMOA;WS
|
||||
SAN MARINO;SM
|
||||
SAO TOME AND PRINCIPE;ST
|
||||
SAUDI ARABIA;SA
|
||||
SENEGAL;SN
|
||||
SERBIA;RS
|
||||
SEYCHELLES;SC
|
||||
SIERRA LEONE;SL
|
||||
SINGAPORE;SG
|
||||
SINT MAARTEN (DUTCH PART);SX
|
||||
SLOVAKIA;SK
|
||||
SLOVENIA;SI
|
||||
SOLOMON ISLANDS;SB
|
||||
SOMALIA;SO
|
||||
SOUTH AFRICA;ZA
|
||||
SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
|
||||
SOUTH SUDAN;SS
|
||||
SPAIN;ES
|
||||
SRI LANKA;LK
|
||||
SUDAN;SD
|
||||
SURINAME;SR
|
||||
SVALBARD AND JAN MAYEN;SJ
|
||||
SWAZILAND;SZ
|
||||
SWEDEN;SE
|
||||
SWITZERLAND;CH
|
||||
SYRIAN ARAB REPUBLIC;SY
|
||||
TAIWAN, PROVINCE OF CHINA;TW
|
||||
TAJIKISTAN;TJ
|
||||
TANZANIA, UNITED REPUBLIC OF;TZ
|
||||
THAILAND;TH
|
||||
TIMOR-LESTE;TL
|
||||
TOGO;TG
|
||||
TOKELAU;TK
|
||||
TONGA;TO
|
||||
TRINIDAD AND TOBAGO;TT
|
||||
TUNISIA;TN
|
||||
TURKEY;TR
|
||||
TURKMENISTAN;TM
|
||||
TURKS AND CAICOS ISLANDS;TC
|
||||
TUVALU;TV
|
||||
UGANDA;UG
|
||||
UKRAINE;UA
|
||||
UNITED ARAB EMIRATES;AE
|
||||
UNITED KINGDOM;GB
|
||||
UNITED STATES;US
|
||||
UNITED STATES MINOR OUTLYING ISLANDS;UM
|
||||
URUGUAY;UY
|
||||
UZBEKISTAN;UZ
|
||||
VANUATU;VU
|
||||
VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
|
||||
VIET NAM;VN
|
||||
VIRGIN ISLANDS, BRITISH;VG
|
||||
VIRGIN ISLANDS, U.S.;VI
|
||||
WALLIS AND FUTUNA;WF
|
||||
WESTERN SAHARA;EH
|
||||
YEMEN;YE
|
||||
ZAMBIA;ZM
|
||||
ZIMBABWE;ZW
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,176 @@
|
||||
#
|
||||
# ISO 15924 - Codes for the representation of names of scripts
|
||||
# Codes pour la représentation des noms d’écritures
|
||||
# Format:
|
||||
# Code;N°;English Name;Nom français;PVA;Date
|
||||
#
|
||||
|
||||
Afak;439;Afaka;afaka;;2010-12-21
|
||||
Aghb;239;Caucasian Albanian;aghbanien;;2012-10-16
|
||||
Ahom;338;Ahom, Tai Ahom;âhom;;2012-11-01
|
||||
Arab;160;Arabic;arabe;Arabic;2004-05-01
|
||||
Armi;124;Imperial Aramaic;araméen impérial;Imperial_Aramaic;2009-06-01
|
||||
Armn;230;Armenian;arménien;Armenian;2004-05-01
|
||||
Avst;134;Avestan;avestique;Avestan;2009-06-01
|
||||
Bali;360;Balinese;balinais;Balinese;2006-10-10
|
||||
Bamu;435;Bamum;bamoum;Bamum;2009-06-01
|
||||
Bass;259;Bassa Vah;bassa;;2010-03-26
|
||||
Batk;365;Batak;batik;Batak;2010-07-23
|
||||
Beng;325;Bengali;bengalî;Bengali;2004-05-01
|
||||
Blis;550;Blissymbols;symboles Bliss;;2004-05-01
|
||||
Bopo;285;Bopomofo;bopomofo;Bopomofo;2004-05-01
|
||||
Brah;300;Brahmi;brahma;Brahmi;2010-07-23
|
||||
Brai;570;Braille;braille;Braille;2004-05-01
|
||||
Bugi;367;Buginese;bouguis;Buginese;2006-06-21
|
||||
Buhd;372;Buhid;bouhide;Buhid;2004-05-01
|
||||
Cakm;349;Chakma;chakma;Chakma;2012-02-06
|
||||
Cans;440;Unified Canadian Aboriginal Syllabics;syllabaire autochtone canadien unifié;Canadian_Aboriginal;2004-05-29
|
||||
Cari;201;Carian;carien;Carian;2007-07-02
|
||||
Cham;358;Cham;cham (čam, tcham);Cham;2009-11-11
|
||||
Cher;445;Cherokee;tchérokî;Cherokee;2004-05-01
|
||||
Cirt;291;Cirth;cirth;;2004-05-01
|
||||
Copt;204;Coptic;copte;Coptic;2006-06-21
|
||||
Cprt;403;Cypriot;syllabaire chypriote;Cypriot;2004-05-01
|
||||
Cyrl;220;Cyrillic;cyrillique;Cyrillic;2004-05-01
|
||||
Cyrs;221;Cyrillic (Old Church Slavonic variant);cyrillique (variante slavonne);;2004-05-01
|
||||
Deva;315;Devanagari (Nagari);dévanâgarî;Devanagari;2004-05-01
|
||||
Dsrt;250;Deseret (Mormon);déseret (mormon);Deseret;2004-05-01
|
||||
Dupl;755;Duployan shorthand, Duployan stenography;sténographie Duployé;;2010-07-18
|
||||
Egyd;070;Egyptian demotic;démotique égyptien;;2004-05-01
|
||||
Egyh;060;Egyptian hieratic;hiératique égyptien;;2004-05-01
|
||||
Egyp;050;Egyptian hieroglyphs;hiéroglyphes égyptiens;Egyptian_Hieroglyphs;2009-06-01
|
||||
Elba;226;Elbasan;elbasan;;2010-07-18
|
||||
Ethi;430;Ethiopic (Geʻez);éthiopien (geʻez, guèze);Ethiopic;2004-10-25
|
||||
Geor;240;Georgian (Mkhedruli);géorgien (mkhédrouli);Georgian;2004-05-29
|
||||
Geok;241;Khutsuri (Asomtavruli and Nuskhuri);khoutsouri (assomtavrouli et nouskhouri);Georgian;2012-10-16
|
||||
Glag;225;Glagolitic;glagolitique;Glagolitic;2006-06-21
|
||||
Goth;206;Gothic;gotique;Gothic;2004-05-01
|
||||
Gran;343;Grantha;grantha;;2009-11-11
|
||||
Grek;200;Greek;grec;Greek;2004-05-01
|
||||
Gujr;320;Gujarati;goudjarâtî (gujrâtî);Gujarati;2004-05-01
|
||||
Guru;310;Gurmukhi;gourmoukhî;Gurmukhi;2004-05-01
|
||||
Hang;286;Hangul (Hangŭl, Hangeul);hangûl (hangŭl, hangeul);Hangul;2004-05-29
|
||||
Hani;500;Han (Hanzi, Kanji, Hanja);idéogrammes han (sinogrammes);Han;2009-02-23
|
||||
Hano;371;Hanunoo (Hanunóo);hanounóo;Hanunoo;2004-05-29
|
||||
Hans;501;Han (Simplified variant);idéogrammes han (variante simplifiée);;2004-05-29
|
||||
Hant;502;Han (Traditional variant);idéogrammes han (variante traditionnelle);;2004-05-29
|
||||
Hatr;127;Hatran;hatrénien;;2012-11-01
|
||||
Hebr;125;Hebrew;hébreu;Hebrew;2004-05-01
|
||||
Hira;410;Hiragana;hiragana;Hiragana;2004-05-01
|
||||
Hluw;080;Anatolian Hieroglyphs (Luwian Hieroglyphs, Hittite Hieroglyphs);hiéroglyphes anatoliens (hiéroglyphes louvites, hiéroglyphes hittites);;2011-12-09
|
||||
Hmng;450;Pahawh Hmong;pahawh hmong;;2004-05-01
|
||||
Hrkt;412;Japanese syllabaries (alias for Hiragana + Katakana);syllabaires japonais (alias pour hiragana + katakana);Katakana_Or_Hiragana;2011-06-21
|
||||
Hung;176;Old Hungarian (Hungarian Runic);runes hongroises (ancien hongrois);;2012-10-16
|
||||
Inds;610;Indus (Harappan);indus;;2004-05-01
|
||||
Ital;210;Old Italic (Etruscan, Oscan, etc.);ancien italique (étrusque, osque, etc.);Old_Italic;2004-05-29
|
||||
Java;361;Javanese;javanais;Javanese;2009-06-01
|
||||
Jpan;413;Japanese (alias for Han + Hiragana + Katakana);japonais (alias pour han + hiragana + katakana);;2006-06-21
|
||||
Jurc;510;Jurchen;jurchen;;2010-12-21
|
||||
Kali;357;Kayah Li;kayah li;Kayah_Li;2007-07-02
|
||||
Kana;411;Katakana;katakana;Katakana;2004-05-01
|
||||
Khar;305;Kharoshthi;kharochthî;Kharoshthi;2006-06-21
|
||||
Khmr;355;Khmer;khmer;Khmer;2004-05-29
|
||||
Khoj;322;Khojki;khojkî;;2011-06-21
|
||||
Knda;345;Kannada;kannara (canara);Kannada;2004-05-29
|
||||
Kore;287;Korean (alias for Hangul + Han);coréen (alias pour hangûl + han);;2007-06-13
|
||||
Kpel;436;Kpelle;kpèllé;;2010-03-26
|
||||
Kthi;317;Kaithi;kaithî;Kaithi;2009-06-01
|
||||
Lana;351;Tai Tham (Lanna);taï tham (lanna);Tai_Tham;2009-06-01
|
||||
Laoo;356;Lao;laotien;Lao;2004-05-01
|
||||
Latf;217;Latin (Fraktur variant);latin (variante brisée);;2004-05-01
|
||||
Latg;216;Latin (Gaelic variant);latin (variante gaélique);;2004-05-01
|
||||
Latn;215;Latin;latin;Latin;2004-05-01
|
||||
Lepc;335;Lepcha (Róng);lepcha (róng);Lepcha;2007-07-02
|
||||
Limb;336;Limbu;limbou;Limbu;2004-05-29
|
||||
Lina;400;Linear A;linéaire A;;2004-05-01
|
||||
Linb;401;Linear B;linéaire B;Linear_B;2004-05-29
|
||||
Lisu;399;Lisu (Fraser);lisu (Fraser);Lisu;2009-06-01
|
||||
Loma;437;Loma;loma;;2010-03-26
|
||||
Lyci;202;Lycian;lycien;Lycian;2007-07-02
|
||||
Lydi;116;Lydian;lydien;Lydian;2007-07-02
|
||||
Mahj;314;Mahajani;mahâjanî;;2012-10-16
|
||||
Mand;140;Mandaic, Mandaean;mandéen;Mandaic;2010-07-23
|
||||
Mani;139;Manichaean;manichéen;;2007-07-15
|
||||
Maya;090;Mayan hieroglyphs;hiéroglyphes mayas;;2004-05-01
|
||||
Mend;438;Mende Kikakui;mendé kikakui;;2013-10-12
|
||||
Merc;101;Meroitic Cursive;cursif méroïtique;Meroitic_Cursive;2012-02-06
|
||||
Mero;100;Meroitic Hieroglyphs;hiéroglyphes méroïtiques;Meroitic_Hieroglyphs;2012-02-06
|
||||
Mlym;347;Malayalam;malayâlam;Malayalam;2004-05-01
|
||||
Modi;323;Modi, Moḍī;modî;;2013-10-12
|
||||
Moon;218;Moon (Moon code, Moon script, Moon type);écriture Moon;;2006-12-11
|
||||
Mong;145;Mongolian;mongol;Mongolian;2004-05-01
|
||||
Mroo;199;Mro, Mru;mro;;2010-12-21
|
||||
Mtei;337;Meitei Mayek (Meithei, Meetei);meitei mayek;Meetei_Mayek;2009-06-01
|
||||
Mult;323; Multani;multanî;;2012-11-01
|
||||
Mymr;350;Myanmar (Burmese);birman;Myanmar;2004-05-01
|
||||
Narb;106;Old North Arabian (Ancient North Arabian);nord-arabique;;2010-03-26
|
||||
Nbat;159;Nabataean;nabatéen;;2010-03-26
|
||||
Nkgb;420;Nakhi Geba ('Na-'Khi ²Ggŏ-¹baw, Naxi Geba);nakhi géba;;2009-02-23
|
||||
Nkoo;165;N’Ko;n’ko;Nko;2006-10-10
|
||||
Nshu;499;Nüshu;nüshu;;2010-12-21
|
||||
Ogam;212;Ogham;ogam;Ogham;2004-05-01
|
||||
Olck;261;Ol Chiki (Ol Cemet’, Ol, Santali);ol tchiki;Ol_Chiki;2007-07-02
|
||||
Orkh;175;Old Turkic, Orkhon Runic;orkhon;Old_Turkic;2009-06-01
|
||||
Orya;327;Oriya;oriyâ;Oriya;2004-05-01
|
||||
Osma;260;Osmanya;osmanais;Osmanya;2004-05-01
|
||||
Palm;126;Palmyrene;palmyrénien;;2010-03-26
|
||||
Pauc;263;Pau Cin Hau;paou chin haou;;2013-10-12
|
||||
Perm;227;Old Permic;ancien permien;;2004-05-01
|
||||
Phag;331;Phags-pa;’phags pa;Phags_Pa;2006-10-10
|
||||
Phli;131;Inscriptional Pahlavi;pehlevi des inscriptions;Inscriptional_Pahlavi;2009-06-01
|
||||
Phlp;132;Psalter Pahlavi;pehlevi des psautiers;;2007-11-26
|
||||
Phlv;133;Book Pahlavi;pehlevi des livres;;2007-07-15
|
||||
Phnx;115;Phoenician;phénicien;Phoenician;2006-10-10
|
||||
Plrd;282;Miao (Pollard);miao (Pollard);Miao;2012-02-06
|
||||
Prti;130;Inscriptional Parthian;parthe des inscriptions;Inscriptional_Parthian;2009-06-01
|
||||
Qaaa;900;Reserved for private use (start);réservé à l’usage privé (début);;2004-05-29
|
||||
Qabx;949;Reserved for private use (end);réservé à l’usage privé (fin);;2004-05-29
|
||||
Rjng;363;Rejang (Redjang, Kaganga);redjang (kaganga);Rejang;2009-02-23
|
||||
Roro;620;Rongorongo;rongorongo;;2004-05-01
|
||||
Runr;211;Runic;runique;Runic;2004-05-01
|
||||
Samr;123;Samaritan;samaritain;Samaritan;2009-06-01
|
||||
Sara;292;Sarati;sarati;;2004-05-29
|
||||
Sarb;105;Old South Arabian;sud-arabique, himyarite;Old_South_Arabian;2009-06-01
|
||||
Saur;344;Saurashtra;saurachtra;Saurashtra;2007-07-02
|
||||
Sgnw;095;SignWriting;SignÉcriture, SignWriting;;2006-10-10
|
||||
Shaw;281;Shavian (Shaw);shavien (Shaw);Shavian;2004-05-01
|
||||
Shrd;319;Sharada, Śāradā;charada, shard;Sharada;2012-02-06
|
||||
Sidd;302;Siddham, Siddhaṃ, Siddhamātṛkā;siddham;;2013-10-12
|
||||
Sind;318;Khudawadi, Sindhi;khoudawadî, sindhî;;2010-12-21
|
||||
Sinh;348;Sinhala;singhalais;Sinhala;2004-05-01
|
||||
Sora;398;Sora Sompeng;sora sompeng;Sora_Sompeng;2012-02-06
|
||||
Sund;362;Sundanese;sundanais;Sundanese;2007-07-02
|
||||
Sylo;316;Syloti Nagri;sylotî nâgrî;Syloti_Nagri;2006-06-21
|
||||
Syrc;135;Syriac;syriaque;Syriac;2004-05-01
|
||||
Syre;138;Syriac (Estrangelo variant);syriaque (variante estranghélo);;2004-05-01
|
||||
Syrj;137;Syriac (Western variant);syriaque (variante occidentale);;2004-05-01
|
||||
Syrn;136;Syriac (Eastern variant);syriaque (variante orientale);;2004-05-01
|
||||
Tagb;373;Tagbanwa;tagbanoua;Tagbanwa;2004-05-01
|
||||
Takr;321;Takri, Ṭākrī, Ṭāṅkrī;tâkrî;Takri;2012-02-06
|
||||
Tale;353;Tai Le;taï-le;Tai_Le;2004-10-25
|
||||
Talu;354;New Tai Lue;nouveau taï-lue;New_Tai_Lue;2006-06-21
|
||||
Taml;346;Tamil;tamoul;Tamil;2004-05-01
|
||||
Tang;520;Tangut;tangoute;;2010-12-21
|
||||
Tavt;359;Tai Viet;taï viêt;Tai_Viet;2009-06-01
|
||||
Telu;340;Telugu;télougou;Telugu;2004-05-01
|
||||
Teng;290;Tengwar;tengwar;;2004-05-01
|
||||
Tfng;120;Tifinagh (Berber);tifinagh (berbère);Tifinagh;2006-06-21
|
||||
Tglg;370;Tagalog (Baybayin, Alibata);tagal (baybayin, alibata);Tagalog;2009-02-23
|
||||
Thaa;170;Thaana;thâna;Thaana;2004-05-01
|
||||
Thai;352;Thai;thaï;Thai;2004-05-01
|
||||
Tibt;330;Tibetan;tibétain;Tibetan;2004-05-01
|
||||
Tirh;326;Tirhuta;tirhouta;;2011-12-09
|
||||
Ugar;040;Ugaritic;ougaritique;Ugaritic;2004-05-01
|
||||
Vaii;470;Vai;vaï;Vai;2007-07-02
|
||||
Visp;280;Visible Speech;parole visible;;2004-05-01
|
||||
Wara;262;Warang Citi (Varang Kshiti);warang citi;;2009-11-11
|
||||
Wole;480;Woleai;woléaï;;2010-12-21
|
||||
Xpeo;030;Old Persian;cunéiforme persépolitain;Old_Persian;2006-06-21
|
||||
Xsux;020;Cuneiform, Sumero-Akkadian;cunéiforme suméro-akkadien;Cuneiform;2006-10-10
|
||||
Yiii;460;Yi;yi;Yi;2004-05-01
|
||||
Zinh;994;Code for inherited script;codet pour écriture héritée;Inherited;2009-02-23
|
||||
Zmth;995;Mathematical notation;notation mathématique;;2007-11-26
|
||||
Zsym;996;Symbols;symboles;;2007-11-26
|
||||
Zxxx;997;Code for unwritten documents;codet pour les documents non écrits;;2011-06-21
|
||||
Zyyy;998;Code for undetermined script;codet pour écriture indéterminée;Common;2004-05-29
|
||||
Zzzz;999;Code for uncoded script;codet pour écriture non codée;Unknown;2006-10-10
|
||||
@@ -0,0 +1,474 @@
|
||||
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
|
||||
aar aa Afar, afar 0 0
|
||||
abk ab Abkhazian 0 0
|
||||
ace Achinese 0 0
|
||||
ach Acoli 0 0
|
||||
ada Adangme 0 0
|
||||
ady adyghé 0 0
|
||||
afa Afro-Asiatic (Other) 0 0
|
||||
afh Afrihili 0 0
|
||||
afr af Afrikaans 1 0
|
||||
ain Ainu 0 0
|
||||
aka ak Akan 0 0
|
||||
akk Akkadian 0 0
|
||||
alb sq Albanian 1 1
|
||||
ale Aleut 0 0
|
||||
alg Algonquian languages 0 0
|
||||
alt Southern Altai 0 0
|
||||
amh am Amharic 0 0
|
||||
ang English, Old (ca.450-1100) 0 0
|
||||
apa Apache languages 0 0
|
||||
ara ar Arabic 1 1
|
||||
arc Aramaic 0 0
|
||||
arg an Aragonese 0 0
|
||||
arm hy Armenian 1 0
|
||||
arn Araucanian 0 0
|
||||
arp Arapaho 0 0
|
||||
art Artificial (Other) 0 0
|
||||
arw Arawak 0 0
|
||||
asm as Assamese 0 0
|
||||
ast Asturian, Bable 0 0
|
||||
ath Athapascan languages 0 0
|
||||
aus Australian languages 0 0
|
||||
ava av Avaric 0 0
|
||||
ave ae Avestan 0 0
|
||||
awa Awadhi 0 0
|
||||
aym ay Aymara 0 0
|
||||
aze az Azerbaijani 0 0
|
||||
bad Banda 0 0
|
||||
bai Bamileke languages 0 0
|
||||
bak ba Bashkir 0 0
|
||||
bal Baluchi 0 0
|
||||
bam bm Bambara 0 0
|
||||
ban Balinese 0 0
|
||||
baq eu Basque 1 1
|
||||
bas Basa 0 0
|
||||
bat Baltic (Other) 0 0
|
||||
bej Beja 0 0
|
||||
bel be Belarusian 0 0
|
||||
bem Bemba 0 0
|
||||
ben bn Bengali 1 0
|
||||
ber Berber (Other) 0 0
|
||||
bho Bhojpuri 0 0
|
||||
bih bh Bihari 0 0
|
||||
bik Bikol 0 0
|
||||
bin Bini 0 0
|
||||
bis bi Bislama 0 0
|
||||
bla Siksika 0 0
|
||||
bnt Bantu (Other) 0 0
|
||||
bos bs Bosnian 1 0
|
||||
bra Braj 0 0
|
||||
bre br Breton 1 0
|
||||
btk Batak (Indonesia) 0 0
|
||||
bua Buriat 0 0
|
||||
bug Buginese 0 0
|
||||
bul bg Bulgarian 1 1
|
||||
bur my Burmese 1 0
|
||||
byn Blin 0 0
|
||||
cad Caddo 0 0
|
||||
cai Central American Indian (Other) 0 0
|
||||
car Carib 0 0
|
||||
cat ca Catalan 1 1
|
||||
cau Caucasian (Other) 0 0
|
||||
ceb Cebuano 0 0
|
||||
cel Celtic (Other) 0 0
|
||||
cha ch Chamorro 0 0
|
||||
chb Chibcha 0 0
|
||||
che ce Chechen 0 0
|
||||
chg Chagatai 0 0
|
||||
chi zh Chinese 1 1
|
||||
chk Chuukese 0 0
|
||||
chm Mari 0 0
|
||||
chn Chinook jargon 0 0
|
||||
cho Choctaw 0 0
|
||||
chp Chipewyan 0 0
|
||||
chr Cherokee 0 0
|
||||
chu cu Church Slavic 0 0
|
||||
chv cv Chuvash 0 0
|
||||
chy Cheyenne 0 0
|
||||
cmc Chamic languages 0 0
|
||||
cop Coptic 0 0
|
||||
cor kw Cornish 0 0
|
||||
cos co Corsican 0 0
|
||||
cpe Creoles and pidgins, English based (Other) 0 0
|
||||
cpf Creoles and pidgins, French-based (Other) 0 0
|
||||
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
|
||||
cre cr Cree 0 0
|
||||
crh Crimean Tatar 0 0
|
||||
crp Creoles and pidgins (Other) 0 0
|
||||
csb Kashubian 0 0
|
||||
cus Cushitic (Other)' couchitiques, autres langues 0 0
|
||||
cze cs Czech 1 1
|
||||
dak Dakota 0 0
|
||||
dan da Danish 1 1
|
||||
dar Dargwa 0 0
|
||||
day Dayak 0 0
|
||||
del Delaware 0 0
|
||||
den Slave (Athapascan) 0 0
|
||||
dgr Dogrib 0 0
|
||||
din Dinka 0 0
|
||||
div dv Divehi 0 0
|
||||
doi Dogri 0 0
|
||||
dra Dravidian (Other) 0 0
|
||||
dua Duala 0 0
|
||||
dum Dutch, Middle (ca.1050-1350) 0 0
|
||||
dut nl Dutch 1 1
|
||||
dyu Dyula 0 0
|
||||
dzo dz Dzongkha 0 0
|
||||
efi Efik 0 0
|
||||
egy Egyptian (Ancient) 0 0
|
||||
eka Ekajuk 0 0
|
||||
elx Elamite 0 0
|
||||
eng en English 1 1
|
||||
enm English, Middle (1100-1500) 0 0
|
||||
epo eo Esperanto 1 0
|
||||
est et Estonian 1 1
|
||||
ewe ee Ewe 0 0
|
||||
ewo Ewondo 0 0
|
||||
fan Fang 0 0
|
||||
fao fo Faroese 0 0
|
||||
fat Fanti 0 0
|
||||
fij fj Fijian 0 0
|
||||
fil Filipino 0 0
|
||||
fin fi Finnish 1 1
|
||||
fiu Finno-Ugrian (Other) 0 0
|
||||
fon Fon 0 0
|
||||
fre fr French 1 1
|
||||
frm French, Middle (ca.1400-1600) 0 0
|
||||
fro French, Old (842-ca.1400) 0 0
|
||||
fry fy Frisian 0 0
|
||||
ful ff Fulah 0 0
|
||||
fur Friulian 0 0
|
||||
gaa Ga 0 0
|
||||
gay Gayo 0 0
|
||||
gba Gbaya 0 0
|
||||
gem Germanic (Other) 0 0
|
||||
geo ka Georgian 1 1
|
||||
ger de German 1 1
|
||||
gez Geez 0 0
|
||||
gil Gilbertese 0 0
|
||||
gla gd Gaelic 0 0
|
||||
gle ga Irish 0 0
|
||||
glg gl Galician 1 1
|
||||
glv gv Manx 0 0
|
||||
gmh German, Middle High (ca.1050-1500) 0 0
|
||||
goh German, Old High (ca.750-1050) 0 0
|
||||
gon Gondi 0 0
|
||||
gor Gorontalo 0 0
|
||||
got Gothic 0 0
|
||||
grb Grebo 0 0
|
||||
grc Greek, Ancient (to 1453) 0 0
|
||||
ell el Greek 1 1
|
||||
grn gn Guarani 0 0
|
||||
guj gu Gujarati 0 0
|
||||
gwi Gwich´in 0 0
|
||||
hai Haida 0 0
|
||||
hat ht Haitian 0 0
|
||||
hau ha Hausa 0 0
|
||||
haw Hawaiian 0 0
|
||||
heb he Hebrew 1 1
|
||||
her hz Herero 0 0
|
||||
hil Hiligaynon 0 0
|
||||
him Himachali 0 0
|
||||
hin hi Hindi 1 1
|
||||
hit Hittite 0 0
|
||||
hmn Hmong 0 0
|
||||
hmo ho Hiri Motu 0 0
|
||||
hrv hr Croatian 1 1
|
||||
hun hu Hungarian 1 1
|
||||
hup Hupa 0 0
|
||||
iba Iban 0 0
|
||||
ibo ig Igbo 0 0
|
||||
ice is Icelandic 1 1
|
||||
ido io Ido 0 0
|
||||
iii ii Sichuan Yi 0 0
|
||||
ijo Ijo 0 0
|
||||
iku iu Inuktitut 0 0
|
||||
ile ie Interlingue 0 0
|
||||
ilo Iloko 0 0
|
||||
ina ia Interlingua (International Auxiliary Language Asso 0 0
|
||||
inc Indic (Other) 0 0
|
||||
ind id Indonesian 1 1
|
||||
ine Indo-European (Other) 0 0
|
||||
inh Ingush 0 0
|
||||
ipk ik Inupiaq 0 0
|
||||
ira Iranian (Other) 0 0
|
||||
iro Iroquoian languages 0 0
|
||||
ita it Italian 1 1
|
||||
jav jv Javanese 0 0
|
||||
jpn ja Japanese 1 1
|
||||
jpr Judeo-Persian 0 0
|
||||
jrb Judeo-Arabic 0 0
|
||||
kaa Kara-Kalpak 0 0
|
||||
kab Kabyle 0 0
|
||||
kac Kachin 0 0
|
||||
kal kl Kalaallisut 0 0
|
||||
kam Kamba 0 0
|
||||
kan kn Kannada 0 0
|
||||
kar Karen 0 0
|
||||
kas ks Kashmiri 0 0
|
||||
kau kr Kanuri 0 0
|
||||
kaw Kawi 0 0
|
||||
kaz kk Kazakh 1 0
|
||||
kbd Kabardian 0 0
|
||||
kha Khasi 0 0
|
||||
khi Khoisan (Other) 0 0
|
||||
khm km Khmer 1 1
|
||||
kho Khotanese 0 0
|
||||
kik ki Kikuyu 0 0
|
||||
kin rw Kinyarwanda 0 0
|
||||
kir ky Kirghiz 0 0
|
||||
kmb Kimbundu 0 0
|
||||
kok Konkani 0 0
|
||||
kom kv Komi 0 0
|
||||
kon kg Kongo 0 0
|
||||
kor ko Korean 1 1
|
||||
kos Kosraean 0 0
|
||||
kpe Kpelle 0 0
|
||||
krc Karachay-Balkar 0 0
|
||||
kro Kru 0 0
|
||||
kru Kurukh 0 0
|
||||
kua kj Kuanyama 0 0
|
||||
kum Kumyk 0 0
|
||||
kur ku Kurdish 0 0
|
||||
kut Kutenai 0 0
|
||||
lad Ladino 0 0
|
||||
lah Lahnda 0 0
|
||||
lam Lamba 0 0
|
||||
lao lo Lao 0 0
|
||||
lat la Latin 0 0
|
||||
lav lv Latvian 1 0
|
||||
lez Lezghian 0 0
|
||||
lim li Limburgan 0 0
|
||||
lin ln Lingala 0 0
|
||||
lit lt Lithuanian 1 0
|
||||
lol Mongo 0 0
|
||||
loz Lozi 0 0
|
||||
ltz lb Luxembourgish 1 0
|
||||
lua Luba-Lulua 0 0
|
||||
lub lu Luba-Katanga 0 0
|
||||
lug lg Ganda 0 0
|
||||
lui Luiseno 0 0
|
||||
lun Lunda 0 0
|
||||
luo Luo (Kenya and Tanzania) 0 0
|
||||
lus lushai 0 0
|
||||
mac mk Macedonian 1 1
|
||||
mad Madurese 0 0
|
||||
mag Magahi 0 0
|
||||
mah mh Marshallese 0 0
|
||||
mai Maithili 0 0
|
||||
mak Makasar 0 0
|
||||
mal ml Malayalam 1 0
|
||||
man Mandingo 0 0
|
||||
mao mi Maori 0 0
|
||||
map Austronesian (Other) 0 0
|
||||
mar mr Marathi 0 0
|
||||
mas Masai 0 0
|
||||
may ms Malay 1 1
|
||||
mdf Moksha 0 0
|
||||
mdr Mandar 0 0
|
||||
men Mende 0 0
|
||||
mga Irish, Middle (900-1200) 0 0
|
||||
mic Mi'kmaq 0 0
|
||||
min Minangkabau 0 0
|
||||
mis Miscellaneous languages 0 0
|
||||
mkh Mon-Khmer (Other) 0 0
|
||||
mlg mg Malagasy 0 0
|
||||
mlt mt Maltese 0 0
|
||||
mnc Manchu 0 0
|
||||
mni Manipuri 0 0
|
||||
mno Manobo languages 0 0
|
||||
moh Mohawk 0 0
|
||||
mol mo Moldavian 0 0
|
||||
mon mn Mongolian 1 0
|
||||
mos Mossi 0 0
|
||||
mwl Mirandese 0 0
|
||||
mul Multiple languages 0 0
|
||||
mun Munda languages 0 0
|
||||
mus Creek 0 0
|
||||
mwr Marwari 0 0
|
||||
myn Mayan languages 0 0
|
||||
myv Erzya 0 0
|
||||
nah Nahuatl 0 0
|
||||
nai North American Indian 0 0
|
||||
nap Neapolitan 0 0
|
||||
nau na Nauru 0 0
|
||||
nav nv Navajo 0 0
|
||||
nbl nr Ndebele, South 0 0
|
||||
nde nd Ndebele, North 0 0
|
||||
ndo ng Ndonga 0 0
|
||||
nds Low German 0 0
|
||||
nep ne Nepali 0 0
|
||||
new Nepal Bhasa 0 0
|
||||
nia Nias 0 0
|
||||
nic Niger-Kordofanian (Other) 0 0
|
||||
niu Niuean 0 0
|
||||
nno nn Norwegian Nynorsk 0 0
|
||||
nob nb Norwegian Bokmal 0 0
|
||||
nog Nogai 0 0
|
||||
non Norse, Old 0 0
|
||||
nor no Norwegian 1 1
|
||||
nso Northern Sotho 0 0
|
||||
nub Nubian languages 0 0
|
||||
nwc Classical Newari 0 0
|
||||
nya ny Chichewa 0 0
|
||||
nym Nyamwezi 0 0
|
||||
nyn Nyankole 0 0
|
||||
nyo Nyoro 0 0
|
||||
nzi Nzima 0 0
|
||||
oci oc Occitan 1 1
|
||||
oji oj Ojibwa 0 0
|
||||
ori or Oriya 0 0
|
||||
orm om Oromo 0 0
|
||||
osa Osage 0 0
|
||||
oss os Ossetian 0 0
|
||||
ota Turkish, Ottoman (1500-1928) 0 0
|
||||
oto Otomian languages 0 0
|
||||
paa Papuan (Other) 0 0
|
||||
pag Pangasinan 0 0
|
||||
pal Pahlavi 0 0
|
||||
pam Pampanga 0 0
|
||||
pan pa Panjabi 0 0
|
||||
pap Papiamento 0 0
|
||||
pau Palauan 0 0
|
||||
peo Persian, Old (ca.600-400 B.C.) 0 0
|
||||
per fa Persian 1 1
|
||||
phi Philippine (Other) 0 0
|
||||
phn Phoenician 0 0
|
||||
pli pi Pali 0 0
|
||||
pol pl Polish 1 1
|
||||
pon Pohnpeian 0 0
|
||||
por pt Portuguese 1 1
|
||||
pra Prakrit languages 0 0
|
||||
pro Provençal, Old (to 1500) 0 0
|
||||
pus ps Pushto 0 0
|
||||
que qu Quechua 0 0
|
||||
raj Rajasthani 0 0
|
||||
rap Rapanui 0 0
|
||||
rar Rarotongan 0 0
|
||||
roa Romance (Other) 0 0
|
||||
roh rm Raeto-Romance 0 0
|
||||
rom Romany 0 0
|
||||
run rn Rundi 0 0
|
||||
rup Aromanian 0 0
|
||||
rus ru Russian 1 1
|
||||
sad Sandawe 0 0
|
||||
sag sg Sango 0 0
|
||||
sah Yakut 0 0
|
||||
sai South American Indian (Other) 0 0
|
||||
sal Salishan languages 0 0
|
||||
sam Samaritan Aramaic 0 0
|
||||
san sa Sanskrit 0 0
|
||||
sas Sasak 0 0
|
||||
sat Santali 0 0
|
||||
scc sr Serbian 1 1
|
||||
scn Sicilian 0 0
|
||||
sco Scots 0 0
|
||||
sel Selkup 0 0
|
||||
sem Semitic (Other) 0 0
|
||||
sga Irish, Old (to 900) 0 0
|
||||
sgn Sign Languages 0 0
|
||||
shn Shan 0 0
|
||||
sid Sidamo 0 0
|
||||
sin si Sinhalese 1 1
|
||||
sio Siouan languages 0 0
|
||||
sit Sino-Tibetan (Other) 0 0
|
||||
sla Slavic (Other) 0 0
|
||||
slo sk Slovak 1 1
|
||||
slv sl Slovenian 1 1
|
||||
sma Southern Sami 0 0
|
||||
sme se Northern Sami 0 0
|
||||
smi Sami languages (Other) 0 0
|
||||
smj Lule Sami 0 0
|
||||
smn Inari Sami 0 0
|
||||
smo sm Samoan 0 0
|
||||
sms Skolt Sami 0 0
|
||||
sna sn Shona 0 0
|
||||
snd sd Sindhi 0 0
|
||||
snk Soninke 0 0
|
||||
sog Sogdian 0 0
|
||||
som so Somali 0 0
|
||||
son Songhai 0 0
|
||||
sot st Sotho, Southern 0 0
|
||||
spa es Spanish 1 1
|
||||
srd sc Sardinian 0 0
|
||||
srr Serer 0 0
|
||||
ssa Nilo-Saharan (Other) 0 0
|
||||
ssw ss Swati 0 0
|
||||
suk Sukuma 0 0
|
||||
sun su Sundanese 0 0
|
||||
sus Susu 0 0
|
||||
sux Sumerian 0 0
|
||||
swa sw Swahili 1 0
|
||||
swe sv Swedish 1 1
|
||||
syr Syriac 1 0
|
||||
tah ty Tahitian 0 0
|
||||
tai Tai (Other) 0 0
|
||||
tam ta Tamil 1 0
|
||||
tat tt Tatar 0 0
|
||||
tel te Telugu 1 0
|
||||
tem Timne 0 0
|
||||
ter Tereno 0 0
|
||||
tet Tetum 0 0
|
||||
tgk tg Tajik 0 0
|
||||
tgl tl Tagalog 1 1
|
||||
tha th Thai 1 1
|
||||
tib bo Tibetan 0 0
|
||||
tig Tigre 0 0
|
||||
tir ti Tigrinya 0 0
|
||||
tiv Tiv 0 0
|
||||
tkl Tokelau 0 0
|
||||
tlh Klingon 0 0
|
||||
tli Tlingit 0 0
|
||||
tmh Tamashek 0 0
|
||||
tog Tonga (Nyasa) 0 0
|
||||
ton to Tonga (Tonga Islands) 0 0
|
||||
tpi Tok Pisin 0 0
|
||||
tsi Tsimshian 0 0
|
||||
tsn tn Tswana 0 0
|
||||
tso ts Tsonga 0 0
|
||||
tuk tk Turkmen 0 0
|
||||
tum Tumbuka 0 0
|
||||
tup Tupi languages 0 0
|
||||
tur tr Turkish 1 1
|
||||
tut Altaic (Other) 0 0
|
||||
tvl Tuvalu 0 0
|
||||
twi tw Twi 0 0
|
||||
tyv Tuvinian 0 0
|
||||
udm Udmurt 0 0
|
||||
uga Ugaritic 0 0
|
||||
uig ug Uighur 0 0
|
||||
ukr uk Ukrainian 1 1
|
||||
umb Umbundu 0 0
|
||||
und Undetermined 0 0
|
||||
urd ur Urdu 1 0
|
||||
uzb uz Uzbek 0 0
|
||||
vai Vai 0 0
|
||||
ven ve Venda 0 0
|
||||
vie vi Vietnamese 1 1
|
||||
vol vo Volapük 0 0
|
||||
vot Votic 0 0
|
||||
wak Wakashan languages 0 0
|
||||
wal Walamo 0 0
|
||||
war Waray 0 0
|
||||
was Washo 0 0
|
||||
wel cy Welsh 0 0
|
||||
wen Sorbian languages 0 0
|
||||
wln wa Walloon 0 0
|
||||
wol wo Wolof 0 0
|
||||
xal Kalmyk 0 0
|
||||
xho xh Xhosa 0 0
|
||||
yao Yao 0 0
|
||||
yap Yapese 0 0
|
||||
yid yi Yiddish 0 0
|
||||
yor yo Yoruba 0 0
|
||||
ypk Yupik languages 0 0
|
||||
zap Zapotec 0 0
|
||||
zen Zenaga 0 0
|
||||
zha za Zhuang 0 0
|
||||
znd Zande 0 0
|
||||
zul zu Zulu 0 0
|
||||
zun Zuni 0 0
|
||||
rum ro Romanian 1 1
|
||||
pob pb Brazilian 1 1
|
||||
mne Montenegrin 1 0
|
||||
@@ -0,0 +1,85 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
"""Base class for all exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class LanguageError(Error, AttributeError):
|
||||
"""Base class for all language exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class LanguageConvertError(LanguageError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
|
||||
|
||||
:param string alpha3: alpha3 code that failed conversion
|
||||
:param country: country code that failed conversion, if any
|
||||
:type country: string or None
|
||||
:param script: script code that failed conversion, if any
|
||||
:type script: string or None
|
||||
|
||||
"""
|
||||
def __init__(self, alpha3, country=None, script=None):
|
||||
self.alpha3 = alpha3
|
||||
self.country = country
|
||||
self.script = script
|
||||
|
||||
def __str__(self):
|
||||
s = self.alpha3
|
||||
if self.country is not None:
|
||||
s += '-' + self.country
|
||||
if self.script is not None:
|
||||
s += '-' + self.script
|
||||
return s
|
||||
|
||||
|
||||
class LanguageReverseError(LanguageError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
|
||||
|
||||
:param string code: code that failed reverse conversion
|
||||
|
||||
"""
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.code)
|
||||
|
||||
|
||||
class CountryError(Error, AttributeError):
|
||||
"""Base class for all country exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class CountryConvertError(CountryError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
|
||||
|
||||
:param string alpha2: alpha2 code that failed conversion
|
||||
|
||||
"""
|
||||
def __init__(self, alpha2):
|
||||
self.alpha2 = alpha2
|
||||
|
||||
def __str__(self):
|
||||
return self.alpha2
|
||||
|
||||
|
||||
class CountryReverseError(CountryError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
|
||||
|
||||
:param string code: code that failed reverse conversion
|
||||
|
||||
"""
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.code)
|
||||
@@ -0,0 +1,185 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from .converters import ConverterManager
|
||||
from .country import Country
|
||||
from .exceptions import LanguageConvertError
|
||||
from .script import Script
|
||||
from . import basestr
|
||||
|
||||
|
||||
LANGUAGES = set()
|
||||
LANGUAGE_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
|
||||
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
|
||||
|
||||
f = resource_stream('babelfish', 'data/iso-639-3.tab')
|
||||
f.readline()
|
||||
for l in f:
|
||||
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
|
||||
LANGUAGES.add(iso_language.alpha3)
|
||||
LANGUAGE_MATRIX.append(iso_language)
|
||||
f.close()
|
||||
|
||||
|
||||
class LanguageConverterManager(ConverterManager):
|
||||
""":class:`~babelfish.converters.ConverterManager` for language converters"""
|
||||
entry_point = 'babelfish.language_converters'
|
||||
internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
|
||||
'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
|
||||
'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
|
||||
'name = babelfish.converters.name:NameConverter',
|
||||
'scope = babelfish.converters.scope:ScopeConverter',
|
||||
'type = babelfish.converters.type:LanguageTypeConverter',
|
||||
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
|
||||
|
||||
language_converters = LanguageConverterManager()
|
||||
|
||||
|
||||
class LanguageMeta(type):
|
||||
"""The :class:`Language` metaclass
|
||||
|
||||
Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
|
||||
|
||||
"""
|
||||
def __getattr__(cls, name):
|
||||
if name.startswith('from'):
|
||||
return partial(cls.fromcode, converter=name[4:])
|
||||
return type.__getattribute__(cls, name)
|
||||
|
||||
|
||||
class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
|
||||
"""A human language
|
||||
|
||||
A human language is composed of a language part following the ISO-639
|
||||
standard and can be country-specific when a :class:`~babelfish.country.Country`
|
||||
is specified.
|
||||
|
||||
The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
|
||||
|
||||
:param string language: the language as a 3-letter ISO-639-3 code
|
||||
:param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
|
||||
:type country: string or :class:`~babelfish.country.Country` or None
|
||||
:param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
|
||||
:type script: string or :class:`~babelfish.script.Script` or None
|
||||
:param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
|
||||
:type unknown: string or None
|
||||
:raise: ValueError if the language could not be recognized and `unknown` is ``None``
|
||||
|
||||
"""
|
||||
def __init__(self, language, country=None, script=None, unknown=None):
|
||||
if unknown is not None and language not in LANGUAGES:
|
||||
language = unknown
|
||||
if language not in LANGUAGES:
|
||||
raise ValueError('%r is not a valid language' % language)
|
||||
self.alpha3 = language
|
||||
self.country = None
|
||||
if isinstance(country, Country):
|
||||
self.country = country
|
||||
elif country is None:
|
||||
self.country = None
|
||||
else:
|
||||
self.country = Country(country)
|
||||
self.script = None
|
||||
if isinstance(script, Script):
|
||||
self.script = script
|
||||
elif script is None:
|
||||
self.script = None
|
||||
else:
|
||||
self.script = Script(script)
|
||||
|
||||
@classmethod
|
||||
def fromcode(cls, code, converter):
|
||||
"""Create a :class:`Language` by its `code` using `converter` to
|
||||
:meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
|
||||
|
||||
:param string code: the code to reverse
|
||||
:param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
|
||||
:return: the corresponding :class:`Language` instance
|
||||
:rtype: :class:`Language`
|
||||
|
||||
"""
|
||||
return cls(*language_converters[converter].reverse(code))
|
||||
|
||||
@classmethod
|
||||
def fromietf(cls, ietf):
|
||||
"""Create a :class:`Language` by from an IETF language code
|
||||
|
||||
:param string ietf: the ietf code
|
||||
:return: the corresponding :class:`Language` instance
|
||||
:rtype: :class:`Language`
|
||||
|
||||
"""
|
||||
subtags = ietf.split('-')
|
||||
language_subtag = subtags.pop(0).lower()
|
||||
if len(language_subtag) == 2:
|
||||
language = cls.fromalpha2(language_subtag)
|
||||
else:
|
||||
language = cls(language_subtag)
|
||||
while subtags:
|
||||
subtag = subtags.pop(0)
|
||||
if len(subtag) == 2:
|
||||
language.country = Country(subtag.upper())
|
||||
else:
|
||||
language.script = Script(subtag.capitalize())
|
||||
if language.script is not None:
|
||||
if subtags:
|
||||
raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
|
||||
break
|
||||
return language
|
||||
|
||||
def __getstate__(self):
|
||||
return self.alpha3, self.country, self.script
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.alpha3, self.country, self.script = state
|
||||
|
||||
def __getattr__(self, name):
|
||||
alpha3 = self.alpha3
|
||||
country = self.country.alpha2 if self.country is not None else None
|
||||
script = self.script.code if self.script is not None else None
|
||||
try:
|
||||
return language_converters[name].convert(alpha3, country, script)
|
||||
except KeyError:
|
||||
raise AttributeError(name)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, basestr):
|
||||
return str(self) == other
|
||||
if not isinstance(other, Language):
|
||||
return False
|
||||
return (self.alpha3 == other.alpha3 and
|
||||
self.country == other.country and
|
||||
self.script == other.script)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __bool__(self):
|
||||
return self.alpha3 != 'und'
|
||||
__nonzero__ = __bool__
|
||||
|
||||
def __repr__(self):
|
||||
return '<Language [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
try:
|
||||
s = self.alpha2
|
||||
except LanguageConvertError:
|
||||
s = self.alpha3
|
||||
if self.country is not None:
|
||||
s += '-' + str(self.country)
|
||||
if self.script is not None:
|
||||
s += '-' + str(self.script)
|
||||
return s
|
||||
@@ -0,0 +1,76 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from . import basestr
|
||||
|
||||
#: Script code to script name mapping
|
||||
SCRIPTS = {}
|
||||
|
||||
#: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
|
||||
SCRIPT_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`SCRIPT_MATRIX`
|
||||
IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
|
||||
|
||||
f = resource_stream('babelfish', 'data/iso15924-utf8-20131012.txt')
|
||||
f.readline()
|
||||
for l in f:
|
||||
l = l.decode('utf-8').strip()
|
||||
if not l or l.startswith('#'):
|
||||
continue
|
||||
script = IsoScript._make(l.split(';'))
|
||||
SCRIPT_MATRIX.append(script)
|
||||
SCRIPTS[script.code] = script.name
|
||||
f.close()
|
||||
|
||||
|
||||
class Script(object):
|
||||
"""A human writing system
|
||||
|
||||
A script is represented by a 4-letter code from the ISO-15924 standard
|
||||
|
||||
:param string script: 4-letter ISO-15924 script code
|
||||
|
||||
"""
|
||||
def __init__(self, script):
|
||||
if script not in SCRIPTS:
|
||||
raise ValueError('%r is not a valid script' % script)
|
||||
|
||||
#: ISO-15924 4-letter script code
|
||||
self.code = script
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""English name of the script"""
|
||||
return SCRIPTS[self.code]
|
||||
|
||||
def __getstate__(self):
|
||||
return self.code
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.code = state
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.code)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, basestr):
|
||||
return self.code == other
|
||||
if not isinstance(other, Script):
|
||||
return False
|
||||
return self.code == other.code
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return '<Script [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
return self.code
|
||||
@@ -0,0 +1,368 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import sys
|
||||
import pickle
|
||||
from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
|
||||
LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
|
||||
|
||||
|
||||
if sys.version_info[:2] <= (2, 6):
|
||||
_MAX_LENGTH = 80
|
||||
|
||||
def safe_repr(obj, short=False):
|
||||
try:
|
||||
result = repr(obj)
|
||||
except Exception:
|
||||
result = object.__repr__(obj)
|
||||
if not short or len(result) < _MAX_LENGTH:
|
||||
return result
|
||||
return result[:_MAX_LENGTH] + ' [truncated]...'
|
||||
|
||||
class _AssertRaisesContext(object):
|
||||
"""A context manager used to implement TestCase.assertRaises* methods."""
|
||||
|
||||
def __init__(self, expected, test_case, expected_regexp=None):
|
||||
self.expected = expected
|
||||
self.failureException = test_case.failureException
|
||||
self.expected_regexp = expected_regexp
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, tb):
|
||||
if exc_type is None:
|
||||
try:
|
||||
exc_name = self.expected.__name__
|
||||
except AttributeError:
|
||||
exc_name = str(self.expected)
|
||||
raise self.failureException(
|
||||
"{0} not raised".format(exc_name))
|
||||
if not issubclass(exc_type, self.expected):
|
||||
# let unexpected exceptions pass through
|
||||
return False
|
||||
self.exception = exc_value # store for later retrieval
|
||||
if self.expected_regexp is None:
|
||||
return True
|
||||
|
||||
expected_regexp = self.expected_regexp
|
||||
if isinstance(expected_regexp, basestring):
|
||||
expected_regexp = re.compile(expected_regexp)
|
||||
if not expected_regexp.search(str(exc_value)):
|
||||
raise self.failureException('"%s" does not match "%s"' %
|
||||
(expected_regexp.pattern, str(exc_value)))
|
||||
return True
|
||||
|
||||
class _Py26FixTestCase(object):
|
||||
def assertIsNone(self, obj, msg=None):
|
||||
"""Same as self.assertTrue(obj is None), with a nicer default message."""
|
||||
if obj is not None:
|
||||
standardMsg = '%s is not None' % (safe_repr(obj),)
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIsNotNone(self, obj, msg=None):
|
||||
"""Included for symmetry with assertIsNone."""
|
||||
if obj is None:
|
||||
standardMsg = 'unexpectedly None'
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIn(self, member, container, msg=None):
|
||||
"""Just like self.assertTrue(a in b), but with a nicer default message."""
|
||||
if member not in container:
|
||||
standardMsg = '%s not found in %s' % (safe_repr(member),
|
||||
safe_repr(container))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertNotIn(self, member, container, msg=None):
|
||||
"""Just like self.assertTrue(a not in b), but with a nicer default message."""
|
||||
if member in container:
|
||||
standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
|
||||
safe_repr(container))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIs(self, expr1, expr2, msg=None):
|
||||
"""Just like self.assertTrue(a is b), but with a nicer default message."""
|
||||
if expr1 is not expr2:
|
||||
standardMsg = '%s is not %s' % (safe_repr(expr1),
|
||||
safe_repr(expr2))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIsNot(self, expr1, expr2, msg=None):
|
||||
"""Just like self.assertTrue(a is not b), but with a nicer default message."""
|
||||
if expr1 is expr2:
|
||||
standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
else:
|
||||
class _Py26FixTestCase(object):
|
||||
pass
|
||||
|
||||
|
||||
class TestScript(TestCase, _Py26FixTestCase):
|
||||
def test_wrong_script(self):
|
||||
self.assertRaises(ValueError, lambda: Script('Azer'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Script('Latn'), Script('Latn'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Script('Cyrl'), Script('Latn'))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Script('Hira')), hash('Hira'))
|
||||
|
||||
def test_pickle(self):
|
||||
self.assertEqual(pickle.loads(pickle.dumps(Script('Latn'))), Script('Latn'))
|
||||
|
||||
|
||||
class TestCountry(TestCase, _Py26FixTestCase):
|
||||
def test_wrong_country(self):
|
||||
self.assertRaises(ValueError, lambda: Country('ZZ'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Country('US'), Country('US'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Country('GB'), Country('US'))
|
||||
self.assertIsNotNone(Country('US'))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Country('US')), hash('US'))
|
||||
|
||||
def test_pickle(self):
|
||||
for country in [Country('GB'), Country('US')]:
|
||||
self.assertEqual(pickle.loads(pickle.dumps(country)), country)
|
||||
|
||||
def test_converter_name(self):
|
||||
self.assertEqual(Country('US').name, 'UNITED STATES')
|
||||
self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
|
||||
self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
|
||||
self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
|
||||
self.assertEqual(len(country_converters['name'].codes), 249)
|
||||
|
||||
|
||||
class TestLanguage(TestCase, _Py26FixTestCase):
|
||||
def test_languages(self):
|
||||
self.assertEqual(len(LANGUAGES), 7874)
|
||||
|
||||
def test_wrong_language(self):
|
||||
self.assertRaises(ValueError, lambda: Language('zzz'))
|
||||
|
||||
def test_unknown_language(self):
|
||||
self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
|
||||
|
||||
def test_converter_alpha2(self):
|
||||
self.assertEqual(Language('eng').alpha2, 'en')
|
||||
self.assertEqual(Language.fromalpha2('en'), Language('eng'))
|
||||
self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
|
||||
self.assertEqual(len(language_converters['alpha2'].codes), 184)
|
||||
|
||||
def test_converter_alpha3b(self):
|
||||
self.assertEqual(Language('fra').alpha3b, 'fre')
|
||||
self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
|
||||
self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
|
||||
self.assertEqual(len(language_converters['alpha3b'].codes), 418)
|
||||
|
||||
def test_converter_alpha3t(self):
|
||||
self.assertEqual(Language('fra').alpha3t, 'fra')
|
||||
self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
|
||||
self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
|
||||
self.assertEqual(len(language_converters['alpha3t'].codes), 418)
|
||||
|
||||
def test_converter_name(self):
|
||||
self.assertEqual(Language('eng').name, 'English')
|
||||
self.assertEqual(Language.fromname('English'), Language('eng'))
|
||||
self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
|
||||
self.assertEqual(len(language_converters['name'].codes), 7874)
|
||||
|
||||
def test_converter_scope(self):
|
||||
self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
|
||||
self.assertEqual(Language('eng').scope, 'individual')
|
||||
self.assertEqual(Language('und').scope, 'special')
|
||||
|
||||
def test_converter_type(self):
|
||||
self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
|
||||
self.assertEqual(Language('eng').type, 'living')
|
||||
self.assertEqual(Language('und').type, 'special')
|
||||
|
||||
def test_converter_opensubtitles(self):
|
||||
self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
|
||||
self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
|
||||
self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
|
||||
self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
|
||||
self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
|
||||
# Montenegrin is not recognized as an ISO language (yet?) but for now it is
|
||||
# unofficially accepted as Serbian from Montenegro
|
||||
self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
|
||||
self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
|
||||
self.assertEqual(len(language_converters['opensubtitles'].codes), 606)
|
||||
|
||||
# test with all the LANGUAGES from the opensubtitles api
|
||||
# downloaded from: http://www.opensubtitles.org/addons/export_languages.php
|
||||
f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
|
||||
f.readline()
|
||||
for l in f:
|
||||
idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
|
||||
if not int(upload_enabled) and not int(web_enabled):
|
||||
# do not test LANGUAGES that are too esoteric / not widely available
|
||||
continue
|
||||
self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
|
||||
if alpha2:
|
||||
self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
|
||||
f.close()
|
||||
|
||||
def test_fromietf_country_script(self):
|
||||
language = Language.fromietf('fra-FR-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_country_no_script(self):
|
||||
language = Language.fromietf('fra-FR')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertIsNone(language.script)
|
||||
|
||||
def test_fromietf_no_country_no_script(self):
|
||||
language = Language.fromietf('fra-FR')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertIsNone(language.script)
|
||||
|
||||
def test_fromietf_no_country_script(self):
|
||||
language = Language.fromietf('fra-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertIsNone(language.country)
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_alpha2_language(self):
|
||||
language = Language.fromietf('fr-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertIsNone(language.country)
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_wrong_language(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
|
||||
|
||||
def test_fromietf_wrong_country(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
|
||||
|
||||
def test_fromietf_wrong_script(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Language('eng'), Language('eng'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Language('fra'), Language('eng'))
|
||||
self.assertIsNotNone(Language('fra'))
|
||||
|
||||
def test_nonzero(self):
|
||||
self.assertFalse(bool(Language('und')))
|
||||
self.assertTrue(bool(Language('eng')))
|
||||
|
||||
def test_language_hasattr(self):
|
||||
self.assertTrue(hasattr(Language('fra'), 'alpha3'))
|
||||
self.assertTrue(hasattr(Language('fra'), 'alpha2'))
|
||||
self.assertFalse(hasattr(Language('bej'), 'alpha2'))
|
||||
|
||||
def test_country(self):
|
||||
self.assertEqual(Language('por', 'BR').country, Country('BR'))
|
||||
self.assertEqual(Language('eng', Country('US')).country, Country('US'))
|
||||
|
||||
def test_eq_with_country(self):
|
||||
self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
|
||||
|
||||
def test_ne_with_country(self):
|
||||
self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
|
||||
|
||||
def test_script(self):
|
||||
self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
|
||||
self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
|
||||
|
||||
def test_eq_with_script(self):
|
||||
self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
|
||||
|
||||
def test_ne_with_script(self):
|
||||
self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
|
||||
|
||||
def test_eq_with_country_and_script(self):
|
||||
self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
|
||||
|
||||
def test_ne_with_country_and_script(self):
|
||||
self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Language('fra')), hash('fr'))
|
||||
self.assertEqual(hash(Language('ace')), hash('ace'))
|
||||
self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
|
||||
self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
|
||||
self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
|
||||
|
||||
def test_pickle(self):
|
||||
for lang in [Language('fra'),
|
||||
Language('eng', 'US'),
|
||||
Language('srp', script='Latn'),
|
||||
Language('eng', 'US', 'Latn')]:
|
||||
self.assertEqual(pickle.loads(pickle.dumps(lang)), lang)
|
||||
|
||||
def test_str(self):
|
||||
self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
|
||||
self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
|
||||
self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
|
||||
|
||||
def test_register_converter(self):
|
||||
class TestConverter(LanguageReverseConverter):
|
||||
def __init__(self):
|
||||
self.to_test = {'fra': 'test1', 'eng': 'test2'}
|
||||
self.from_test = {'test1': 'fra', 'test2': 'eng'}
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if alpha3 not in self.to_test:
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
return self.to_test[alpha3]
|
||||
|
||||
def reverse(self, test):
|
||||
if test not in self.from_test:
|
||||
raise LanguageReverseError(test)
|
||||
return (self.from_test[test], None)
|
||||
language = Language('fra')
|
||||
self.assertFalse(hasattr(language, 'test'))
|
||||
language_converters['test'] = TestConverter()
|
||||
self.assertTrue(hasattr(language, 'test'))
|
||||
self.assertIn('test', language_converters)
|
||||
self.assertEqual(Language('fra').test, 'test1')
|
||||
self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
|
||||
del language_converters['test']
|
||||
self.assertNotIn('test', language_converters)
|
||||
self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
|
||||
self.assertRaises(AttributeError, lambda: Language('fra').test)
|
||||
|
||||
|
||||
def suite():
|
||||
suite = TestSuite()
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
|
||||
return suite
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner().run(suite())
|
||||
@@ -0,0 +1,43 @@
|
||||
Behold, mortal, the origins of Beautiful Soup...
|
||||
================================================
|
||||
|
||||
Leonard Richardson is the primary programmer.
|
||||
|
||||
Aaron DeVore is awesome.
|
||||
|
||||
Mark Pilgrim provided the encoding detection code that forms the base
|
||||
of UnicodeDammit.
|
||||
|
||||
Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
|
||||
Soup 4 working under Python 3.
|
||||
|
||||
Simon Willison wrote soupselect, which was used to make Beautiful Soup
|
||||
support CSS selectors.
|
||||
|
||||
Sam Ruby helped with a lot of edge cases.
|
||||
|
||||
Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his
|
||||
work in solving the nestable tags conundrum.
|
||||
|
||||
An incomplete list of people have contributed patches to Beautiful
|
||||
Soup:
|
||||
|
||||
Istvan Albert, Andrew Lin, Anthony Baxter, Andrew Boyko, Tony Chang,
|
||||
Zephyr Fang, Fuzzy, Roman Gaufman, Yoni Gilad, Richie Hindle, Peteris
|
||||
Krumins, Kent Johnson, Ben Last, Robert Leftwich, Staffan Malmgren,
|
||||
Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon", Ed
|
||||
Oskiewicz, Greg Phillips, Giles Radford, Arthur Rudolph, Marko
|
||||
Samastur, Jouni Seppänen, Alexander Schmolck, Andy Theyers, Glyn
|
||||
Webster, Paul Wright, Danny Yoo
|
||||
|
||||
An incomplete list of people who made suggestions or found bugs or
|
||||
found ways to break Beautiful Soup:
|
||||
|
||||
Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel,
|
||||
Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes,
|
||||
Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams,
|
||||
warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison,
|
||||
Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed
|
||||
Summers, Dennis Sutch, Chris Smith, Aaron Sweep^W Swartz, Stuart
|
||||
Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de
|
||||
Sousa Rocha, Yichun Wei, Per Vognsen
|
||||
@@ -0,0 +1,26 @@
|
||||
Beautiful Soup is made available under the MIT license:
|
||||
|
||||
Copyright (c) 2004-2012 Leonard Richardson
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE, DAMMIT.
|
||||
|
||||
Beautiful Soup incorporates code from the html5lib library, which is
|
||||
also made available under the MIT license.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,63 @@
|
||||
= Introduction =
|
||||
|
||||
>>> from bs4 import BeautifulSoup
|
||||
>>> soup = BeautifulSoup("<p>Some<b>bad<i>HTML")
|
||||
>>> print soup.prettify()
|
||||
<html>
|
||||
<body>
|
||||
<p>
|
||||
Some
|
||||
<b>
|
||||
bad
|
||||
<i>
|
||||
HTML
|
||||
</i>
|
||||
</b>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
>>> soup.find(text="bad")
|
||||
u'bad'
|
||||
|
||||
>>> soup.i
|
||||
<i>HTML</i>
|
||||
|
||||
>>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml")
|
||||
>>> print soup.prettify()
|
||||
<?xml version="1.0" encoding="utf-8">
|
||||
<tag1>
|
||||
Some
|
||||
<tag2 />
|
||||
bad
|
||||
<tag3>
|
||||
XML
|
||||
</tag3>
|
||||
</tag1>
|
||||
|
||||
= Full documentation =
|
||||
|
||||
The bs4/doc/ directory contains full documentation in Sphinx
|
||||
format. Run "make html" in that directory to create HTML
|
||||
documentation.
|
||||
|
||||
= Running the unit tests =
|
||||
|
||||
Beautiful Soup supports unit test discovery from the project root directory:
|
||||
|
||||
$ nosetests
|
||||
|
||||
$ python -m unittest discover -s bs4 # Python 2.7 and up
|
||||
|
||||
If you checked out the source tree, you should see a script in the
|
||||
home directory called test-all-versions. This script will run the unit
|
||||
tests under Python 2.7, then create a temporary Python 3 conversion of
|
||||
the source and run the unit tests again under Python 3.
|
||||
|
||||
= Links =
|
||||
|
||||
Homepage: http://www.crummy.com/software/BeautifulSoup/bs4/
|
||||
Documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
http://readthedocs.org/docs/beautiful-soup-4/
|
||||
Discussion group: http://groups.google.com/group/beautifulsoup/
|
||||
Development: https://code.launchpad.net/beautifulsoup/
|
||||
Bug tracker: https://bugs.launchpad.net/beautifulsoup/
|
||||
@@ -0,0 +1,406 @@
|
||||
"""Beautiful Soup
|
||||
Elixir and Tonic
|
||||
"The Screen-Scraper's Friend"
|
||||
http://www.crummy.com/software/BeautifulSoup/
|
||||
|
||||
Beautiful Soup uses a pluggable XML or HTML parser to parse a
|
||||
(possibly invalid) document into a tree representation. Beautiful Soup
|
||||
provides provides methods and Pythonic idioms that make it easy to
|
||||
navigate, search, and modify the parse tree.
|
||||
|
||||
Beautiful Soup works with Python 2.6 and up. It works better if lxml
|
||||
and/or html5lib is installed.
|
||||
|
||||
For more than you ever wanted to know about Beautiful Soup, see the
|
||||
documentation:
|
||||
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
"""
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.3.2"
|
||||
__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson"
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = ['BeautifulSoup']
|
||||
|
||||
import os
|
||||
import re
|
||||
import warnings
|
||||
|
||||
from .builder import builder_registry, ParserRejectedMarkup
|
||||
from .dammit import UnicodeDammit
|
||||
from .element import (
|
||||
CData,
|
||||
Comment,
|
||||
DEFAULT_OUTPUT_ENCODING,
|
||||
Declaration,
|
||||
Doctype,
|
||||
NavigableString,
|
||||
PageElement,
|
||||
ProcessingInstruction,
|
||||
ResultSet,
|
||||
SoupStrainer,
|
||||
Tag,
|
||||
)
|
||||
|
||||
# The very first thing we do is give a useful error if someone is
|
||||
# running this code under Python 3 without converting it.
|
||||
syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
|
||||
|
||||
class BeautifulSoup(Tag):
|
||||
"""
|
||||
This class defines the basic interface called by the tree builders.
|
||||
|
||||
These methods will be called by the parser:
|
||||
reset()
|
||||
feed(markup)
|
||||
|
||||
The tree builder may call these methods from its feed() implementation:
|
||||
handle_starttag(name, attrs) # See note about return value
|
||||
handle_endtag(name)
|
||||
handle_data(data) # Appends to the current data node
|
||||
endData(containerClass=NavigableString) # Ends the current data node
|
||||
|
||||
No matter how complicated the underlying parser is, you should be
|
||||
able to build a tree using 'start tag' events, 'end tag' events,
|
||||
'data' events, and "done with data" events.
|
||||
|
||||
If you encounter an empty-element tag (aka a self-closing tag,
|
||||
like HTML's <br> tag), call handle_starttag and then
|
||||
handle_endtag.
|
||||
"""
|
||||
ROOT_TAG_NAME = u'[document]'
|
||||
|
||||
# If the end-user gives no indication which tree builder they
|
||||
# want, look for one with these features.
|
||||
DEFAULT_BUILDER_FEATURES = ['html', 'fast']
|
||||
|
||||
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
|
||||
|
||||
def __init__(self, markup="", features=None, builder=None,
|
||||
parse_only=None, from_encoding=None, **kwargs):
|
||||
"""The Soup object is initialized as the 'root tag', and the
|
||||
provided markup (which can be a string or a file-like object)
|
||||
is fed into the underlying parser."""
|
||||
|
||||
if 'convertEntities' in kwargs:
|
||||
warnings.warn(
|
||||
"BS4 does not respect the convertEntities argument to the "
|
||||
"BeautifulSoup constructor. Entities are always converted "
|
||||
"to Unicode characters.")
|
||||
|
||||
if 'markupMassage' in kwargs:
|
||||
del kwargs['markupMassage']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the markupMassage argument to the "
|
||||
"BeautifulSoup constructor. The tree builder is responsible "
|
||||
"for any necessary markup massage.")
|
||||
|
||||
if 'smartQuotesTo' in kwargs:
|
||||
del kwargs['smartQuotesTo']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the smartQuotesTo argument to the "
|
||||
"BeautifulSoup constructor. Smart quotes are always converted "
|
||||
"to Unicode characters.")
|
||||
|
||||
if 'selfClosingTags' in kwargs:
|
||||
del kwargs['selfClosingTags']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the selfClosingTags argument to the "
|
||||
"BeautifulSoup constructor. The tree builder is responsible "
|
||||
"for understanding self-closing tags.")
|
||||
|
||||
if 'isHTML' in kwargs:
|
||||
del kwargs['isHTML']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the isHTML argument to the "
|
||||
"BeautifulSoup constructor. You can pass in features='html' "
|
||||
"or features='xml' to get a builder capable of handling "
|
||||
"one or the other.")
|
||||
|
||||
def deprecated_argument(old_name, new_name):
|
||||
if old_name in kwargs:
|
||||
warnings.warn(
|
||||
'The "%s" argument to the BeautifulSoup constructor '
|
||||
'has been renamed to "%s."' % (old_name, new_name))
|
||||
value = kwargs[old_name]
|
||||
del kwargs[old_name]
|
||||
return value
|
||||
return None
|
||||
|
||||
parse_only = parse_only or deprecated_argument(
|
||||
"parseOnlyThese", "parse_only")
|
||||
|
||||
from_encoding = from_encoding or deprecated_argument(
|
||||
"fromEncoding", "from_encoding")
|
||||
|
||||
if len(kwargs) > 0:
|
||||
arg = kwargs.keys().pop()
|
||||
raise TypeError(
|
||||
"__init__() got an unexpected keyword argument '%s'" % arg)
|
||||
|
||||
if builder is None:
|
||||
if isinstance(features, basestring):
|
||||
features = [features]
|
||||
if features is None or len(features) == 0:
|
||||
features = self.DEFAULT_BUILDER_FEATURES
|
||||
builder_class = builder_registry.lookup(*features)
|
||||
if builder_class is None:
|
||||
raise FeatureNotFound(
|
||||
"Couldn't find a tree builder with the features you "
|
||||
"requested: %s. Do you need to install a parser library?"
|
||||
% ",".join(features))
|
||||
builder = builder_class()
|
||||
self.builder = builder
|
||||
self.is_xml = builder.is_xml
|
||||
self.builder.soup = self
|
||||
|
||||
self.parse_only = parse_only
|
||||
|
||||
if hasattr(markup, 'read'): # It's a file-type object.
|
||||
markup = markup.read()
|
||||
elif len(markup) <= 256:
|
||||
# Print out warnings for a couple beginner problems
|
||||
# involving passing non-markup to Beautiful Soup.
|
||||
# Beautiful Soup will still parse the input as markup,
|
||||
# just in case that's what the user really wants.
|
||||
if (isinstance(markup, unicode)
|
||||
and not os.path.supports_unicode_filenames):
|
||||
possible_filename = markup.encode("utf8")
|
||||
else:
|
||||
possible_filename = markup
|
||||
is_file = False
|
||||
try:
|
||||
is_file = os.path.exists(possible_filename)
|
||||
except Exception, e:
|
||||
# This is almost certainly a problem involving
|
||||
# characters not valid in filenames on this
|
||||
# system. Just let it go.
|
||||
pass
|
||||
if is_file:
|
||||
warnings.warn(
|
||||
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
|
||||
if markup[:5] == "http:" or markup[:6] == "https:":
|
||||
# TODO: This is ugly but I couldn't get it to work in
|
||||
# Python 3 otherwise.
|
||||
if ((isinstance(markup, bytes) and not b' ' in markup)
|
||||
or (isinstance(markup, unicode) and not u' ' in markup)):
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
|
||||
|
||||
for (self.markup, self.original_encoding, self.declared_html_encoding,
|
||||
self.contains_replacement_characters) in (
|
||||
self.builder.prepare_markup(markup, from_encoding)):
|
||||
self.reset()
|
||||
try:
|
||||
self._feed()
|
||||
break
|
||||
except ParserRejectedMarkup:
|
||||
pass
|
||||
|
||||
# Clear out the markup and remove the builder's circular
|
||||
# reference to this object.
|
||||
self.markup = None
|
||||
self.builder.soup = None
|
||||
|
||||
def _feed(self):
|
||||
# Convert the document to Unicode.
|
||||
self.builder.reset()
|
||||
|
||||
self.builder.feed(self.markup)
|
||||
# Close out any unfinished strings and close all the open tags.
|
||||
self.endData()
|
||||
while self.currentTag.name != self.ROOT_TAG_NAME:
|
||||
self.popTag()
|
||||
|
||||
def reset(self):
|
||||
Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
|
||||
self.hidden = 1
|
||||
self.builder.reset()
|
||||
self.current_data = []
|
||||
self.currentTag = None
|
||||
self.tagStack = []
|
||||
self.preserve_whitespace_tag_stack = []
|
||||
self.pushTag(self)
|
||||
|
||||
def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
|
||||
"""Create a new tag associated with this soup."""
|
||||
return Tag(None, self.builder, name, namespace, nsprefix, attrs)
|
||||
|
||||
def new_string(self, s, subclass=NavigableString):
|
||||
"""Create a new NavigableString associated with this soup."""
|
||||
navigable = subclass(s)
|
||||
navigable.setup()
|
||||
return navigable
|
||||
|
||||
def insert_before(self, successor):
|
||||
raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
|
||||
|
||||
def insert_after(self, successor):
|
||||
raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
|
||||
|
||||
def popTag(self):
|
||||
tag = self.tagStack.pop()
|
||||
if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
|
||||
self.preserve_whitespace_tag_stack.pop()
|
||||
#print "Pop", tag.name
|
||||
if self.tagStack:
|
||||
self.currentTag = self.tagStack[-1]
|
||||
return self.currentTag
|
||||
|
||||
def pushTag(self, tag):
|
||||
#print "Push", tag.name
|
||||
if self.currentTag:
|
||||
self.currentTag.contents.append(tag)
|
||||
self.tagStack.append(tag)
|
||||
self.currentTag = self.tagStack[-1]
|
||||
if tag.name in self.builder.preserve_whitespace_tags:
|
||||
self.preserve_whitespace_tag_stack.append(tag)
|
||||
|
||||
def endData(self, containerClass=NavigableString):
|
||||
if self.current_data:
|
||||
current_data = u''.join(self.current_data)
|
||||
# If whitespace is not preserved, and this string contains
|
||||
# nothing but ASCII spaces, replace it with a single space
|
||||
# or newline.
|
||||
if not self.preserve_whitespace_tag_stack:
|
||||
strippable = True
|
||||
for i in current_data:
|
||||
if i not in self.ASCII_SPACES:
|
||||
strippable = False
|
||||
break
|
||||
if strippable:
|
||||
if '\n' in current_data:
|
||||
current_data = '\n'
|
||||
else:
|
||||
current_data = ' '
|
||||
|
||||
# Reset the data collector.
|
||||
self.current_data = []
|
||||
|
||||
# Should we add this string to the tree at all?
|
||||
if self.parse_only and len(self.tagStack) <= 1 and \
|
||||
(not self.parse_only.text or \
|
||||
not self.parse_only.search(current_data)):
|
||||
return
|
||||
|
||||
o = containerClass(current_data)
|
||||
self.object_was_parsed(o)
|
||||
|
||||
def object_was_parsed(self, o, parent=None, most_recent_element=None):
|
||||
"""Add an object to the parse tree."""
|
||||
parent = parent or self.currentTag
|
||||
most_recent_element = most_recent_element or self._most_recent_element
|
||||
o.setup(parent, most_recent_element)
|
||||
|
||||
if most_recent_element is not None:
|
||||
most_recent_element.next_element = o
|
||||
self._most_recent_element = o
|
||||
parent.contents.append(o)
|
||||
|
||||
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
|
||||
"""Pops the tag stack up to and including the most recent
|
||||
instance of the given tag. If inclusivePop is false, pops the tag
|
||||
stack up to but *not* including the most recent instqance of
|
||||
the given tag."""
|
||||
#print "Popping to %s" % name
|
||||
if name == self.ROOT_TAG_NAME:
|
||||
# The BeautifulSoup object itself can never be popped.
|
||||
return
|
||||
|
||||
most_recently_popped = None
|
||||
|
||||
stack_size = len(self.tagStack)
|
||||
for i in range(stack_size - 1, 0, -1):
|
||||
t = self.tagStack[i]
|
||||
if (name == t.name and nsprefix == t.prefix):
|
||||
if inclusivePop:
|
||||
most_recently_popped = self.popTag()
|
||||
break
|
||||
most_recently_popped = self.popTag()
|
||||
|
||||
return most_recently_popped
|
||||
|
||||
def handle_starttag(self, name, namespace, nsprefix, attrs):
|
||||
"""Push a start tag on to the stack.
|
||||
|
||||
If this method returns None, the tag was rejected by the
|
||||
SoupStrainer. You should proceed as if the tag had not occured
|
||||
in the document. For instance, if this was a self-closing tag,
|
||||
don't call handle_endtag.
|
||||
"""
|
||||
|
||||
# print "Start tag %s: %s" % (name, attrs)
|
||||
self.endData()
|
||||
|
||||
if (self.parse_only and len(self.tagStack) <= 1
|
||||
and (self.parse_only.text
|
||||
or not self.parse_only.search_tag(name, attrs))):
|
||||
return None
|
||||
|
||||
tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
|
||||
self.currentTag, self._most_recent_element)
|
||||
if tag is None:
|
||||
return tag
|
||||
if self._most_recent_element:
|
||||
self._most_recent_element.next_element = tag
|
||||
self._most_recent_element = tag
|
||||
self.pushTag(tag)
|
||||
return tag
|
||||
|
||||
def handle_endtag(self, name, nsprefix=None):
|
||||
#print "End tag: " + name
|
||||
self.endData()
|
||||
self._popToTag(name, nsprefix)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.current_data.append(data)
|
||||
|
||||
def decode(self, pretty_print=False,
|
||||
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
formatter="minimal"):
|
||||
"""Returns a string or Unicode representation of this document.
|
||||
To get Unicode, pass None for encoding."""
|
||||
|
||||
if self.is_xml:
|
||||
# Print the XML declaration
|
||||
encoding_part = ''
|
||||
if eventual_encoding != None:
|
||||
encoding_part = ' encoding="%s"' % eventual_encoding
|
||||
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
|
||||
else:
|
||||
prefix = u''
|
||||
if not pretty_print:
|
||||
indent_level = None
|
||||
else:
|
||||
indent_level = 0
|
||||
return prefix + super(BeautifulSoup, self).decode(
|
||||
indent_level, eventual_encoding, formatter)
|
||||
|
||||
# Alias to make it easier to type import: 'from bs4 import _soup'
|
||||
_s = BeautifulSoup
|
||||
_soup = BeautifulSoup
|
||||
|
||||
class BeautifulStoneSoup(BeautifulSoup):
|
||||
"""Deprecated interface to an XML parser."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['features'] = 'xml'
|
||||
warnings.warn(
|
||||
'The BeautifulStoneSoup class is deprecated. Instead of using '
|
||||
'it, pass features="xml" into the BeautifulSoup constructor.')
|
||||
super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class StopParsing(Exception):
|
||||
pass
|
||||
|
||||
class FeatureNotFound(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
#By default, act as an HTML pretty-printer.
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
soup = BeautifulSoup(sys.stdin)
|
||||
print soup.prettify()
|
||||
@@ -0,0 +1,321 @@
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
import sys
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
whitespace_re
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'HTMLTreeBuilder',
|
||||
'SAXTreeBuilder',
|
||||
'TreeBuilder',
|
||||
'TreeBuilderRegistry',
|
||||
]
|
||||
|
||||
# Some useful features for a TreeBuilder to have.
|
||||
FAST = 'fast'
|
||||
PERMISSIVE = 'permissive'
|
||||
STRICT = 'strict'
|
||||
XML = 'xml'
|
||||
HTML = 'html'
|
||||
HTML_5 = 'html5'
|
||||
|
||||
|
||||
class TreeBuilderRegistry(object):
|
||||
|
||||
def __init__(self):
|
||||
self.builders_for_feature = defaultdict(list)
|
||||
self.builders = []
|
||||
|
||||
def register(self, treebuilder_class):
|
||||
"""Register a treebuilder based on its advertised features."""
|
||||
for feature in treebuilder_class.features:
|
||||
self.builders_for_feature[feature].insert(0, treebuilder_class)
|
||||
self.builders.insert(0, treebuilder_class)
|
||||
|
||||
def lookup(self, *features):
|
||||
if len(self.builders) == 0:
|
||||
# There are no builders at all.
|
||||
return None
|
||||
|
||||
if len(features) == 0:
|
||||
# They didn't ask for any features. Give them the most
|
||||
# recently registered builder.
|
||||
return self.builders[0]
|
||||
|
||||
# Go down the list of features in order, and eliminate any builders
|
||||
# that don't match every feature.
|
||||
features = list(features)
|
||||
features.reverse()
|
||||
candidates = None
|
||||
candidate_set = None
|
||||
while len(features) > 0:
|
||||
feature = features.pop()
|
||||
we_have_the_feature = self.builders_for_feature.get(feature, [])
|
||||
if len(we_have_the_feature) > 0:
|
||||
if candidates is None:
|
||||
candidates = we_have_the_feature
|
||||
candidate_set = set(candidates)
|
||||
else:
|
||||
# Eliminate any candidates that don't have this feature.
|
||||
candidate_set = candidate_set.intersection(
|
||||
set(we_have_the_feature))
|
||||
|
||||
# The only valid candidates are the ones in candidate_set.
|
||||
# Go through the original list of candidates and pick the first one
|
||||
# that's in candidate_set.
|
||||
if candidate_set is None:
|
||||
return None
|
||||
for candidate in candidates:
|
||||
if candidate in candidate_set:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
# The BeautifulSoup class will take feature lists from developers and use them
|
||||
# to look up builders in this registry.
|
||||
builder_registry = TreeBuilderRegistry()
|
||||
|
||||
class TreeBuilder(object):
|
||||
"""Turn a document into a Beautiful Soup object tree."""
|
||||
|
||||
features = []
|
||||
|
||||
is_xml = False
|
||||
preserve_whitespace_tags = set()
|
||||
empty_element_tags = None # A tag will be considered an empty-element
|
||||
# tag when and only when it has no contents.
|
||||
|
||||
# A value for these tag/attribute combinations is a space- or
|
||||
# comma-separated list of CDATA, rather than a single CDATA.
|
||||
cdata_list_attributes = {}
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.soup = None
|
||||
|
||||
def reset(self):
|
||||
pass
|
||||
|
||||
def can_be_empty_element(self, tag_name):
|
||||
"""Might a tag with this name be an empty-element tag?
|
||||
|
||||
The final markup may or may not actually present this tag as
|
||||
self-closing.
|
||||
|
||||
For instance: an HTMLBuilder does not consider a <p> tag to be
|
||||
an empty-element tag (it's not in
|
||||
HTMLBuilder.empty_element_tags). This means an empty <p> tag
|
||||
will be presented as "<p></p>", not "<p />".
|
||||
|
||||
The default implementation has no opinion about which tags are
|
||||
empty-element tags, so a tag will be presented as an
|
||||
empty-element tag if and only if it has no contents.
|
||||
"<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
|
||||
be left alone.
|
||||
"""
|
||||
if self.empty_element_tags is None:
|
||||
return True
|
||||
return tag_name in self.empty_element_tags
|
||||
|
||||
def feed(self, markup):
|
||||
raise NotImplementedError()
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None):
|
||||
return markup, None, None, False
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""Wrap an HTML fragment to make it look like a document.
|
||||
|
||||
Different parsers do this differently. For instance, lxml
|
||||
introduces an empty <head> tag, and html5lib
|
||||
doesn't. Abstracting this away lets us write simple tests
|
||||
which run HTML fragments through the parser and compare the
|
||||
results against other HTML fragments.
|
||||
|
||||
This method should not be used outside of tests.
|
||||
"""
|
||||
return fragment
|
||||
|
||||
def set_up_substitutions(self, tag):
|
||||
return False
|
||||
|
||||
def _replace_cdata_list_attribute_values(self, tag_name, attrs):
|
||||
"""Replaces class="foo bar" with class=["foo", "bar"]
|
||||
|
||||
Modifies its input in place.
|
||||
"""
|
||||
if not attrs:
|
||||
return attrs
|
||||
if self.cdata_list_attributes:
|
||||
universal = self.cdata_list_attributes.get('*', [])
|
||||
tag_specific = self.cdata_list_attributes.get(
|
||||
tag_name.lower(), None)
|
||||
for attr in attrs.keys():
|
||||
if attr in universal or (tag_specific and attr in tag_specific):
|
||||
# We have a "class"-type attribute whose string
|
||||
# value is a whitespace-separated list of
|
||||
# values. Split it into a list.
|
||||
value = attrs[attr]
|
||||
if isinstance(value, basestring):
|
||||
values = whitespace_re.split(value)
|
||||
else:
|
||||
# html5lib sometimes calls setAttributes twice
|
||||
# for the same tag when rearranging the parse
|
||||
# tree. On the second call the attribute value
|
||||
# here is already a list. If this happens,
|
||||
# leave the value alone rather than trying to
|
||||
# split it again.
|
||||
values = value
|
||||
attrs[attr] = values
|
||||
return attrs
|
||||
|
||||
class SAXTreeBuilder(TreeBuilder):
|
||||
"""A Beautiful Soup treebuilder that listens for SAX events."""
|
||||
|
||||
def feed(self, markup):
|
||||
raise NotImplementedError()
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
attrs = dict((key[1], value) for key, value in list(attrs.items()))
|
||||
#print "Start %s, %r" % (name, attrs)
|
||||
self.soup.handle_starttag(name, attrs)
|
||||
|
||||
def endElement(self, name):
|
||||
#print "End %s" % name
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def startElementNS(self, nsTuple, nodeName, attrs):
|
||||
# Throw away (ns, nodeName) for now.
|
||||
self.startElement(nodeName, attrs)
|
||||
|
||||
def endElementNS(self, nsTuple, nodeName):
|
||||
# Throw away (ns, nodeName) for now.
|
||||
self.endElement(nodeName)
|
||||
#handler.endElementNS((ns, node.nodeName), node.nodeName)
|
||||
|
||||
def startPrefixMapping(self, prefix, nodeValue):
|
||||
# Ignore the prefix for now.
|
||||
pass
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
# Ignore the prefix for now.
|
||||
# handler.endPrefixMapping(prefix)
|
||||
pass
|
||||
|
||||
def characters(self, content):
|
||||
self.soup.handle_data(content)
|
||||
|
||||
def startDocument(self):
|
||||
pass
|
||||
|
||||
def endDocument(self):
|
||||
pass
|
||||
|
||||
|
||||
class HTMLTreeBuilder(TreeBuilder):
|
||||
"""This TreeBuilder knows facts about HTML.
|
||||
|
||||
Such as which tags are empty-element tags.
|
||||
"""
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
|
||||
'spacer', 'link', 'frame', 'base'])
|
||||
|
||||
# The HTML standard defines these attributes as containing a
|
||||
# space-separated list of values, not a single value. That is,
|
||||
# class="foo bar" means that the 'class' attribute has two values,
|
||||
# 'foo' and 'bar', not the single value 'foo bar'. When we
|
||||
# encounter one of these attributes, we will parse its value into
|
||||
# a list of values if possible. Upon output, the list will be
|
||||
# converted back into a string.
|
||||
cdata_list_attributes = {
|
||||
"*" : ['class', 'accesskey', 'dropzone'],
|
||||
"a" : ['rel', 'rev'],
|
||||
"link" : ['rel', 'rev'],
|
||||
"td" : ["headers"],
|
||||
"th" : ["headers"],
|
||||
"td" : ["headers"],
|
||||
"form" : ["accept-charset"],
|
||||
"object" : ["archive"],
|
||||
|
||||
# These are HTML5 specific, as are *.accesskey and *.dropzone above.
|
||||
"area" : ["rel"],
|
||||
"icon" : ["sizes"],
|
||||
"iframe" : ["sandbox"],
|
||||
"output" : ["for"],
|
||||
}
|
||||
|
||||
def set_up_substitutions(self, tag):
|
||||
# We are only interested in <meta> tags
|
||||
if tag.name != 'meta':
|
||||
return False
|
||||
|
||||
http_equiv = tag.get('http-equiv')
|
||||
content = tag.get('content')
|
||||
charset = tag.get('charset')
|
||||
|
||||
# We are interested in <meta> tags that say what encoding the
|
||||
# document was originally in. This means HTML 5-style <meta>
|
||||
# tags that provide the "charset" attribute. It also means
|
||||
# HTML 4-style <meta> tags that provide the "content"
|
||||
# attribute and have "http-equiv" set to "content-type".
|
||||
#
|
||||
# In both cases we will replace the value of the appropriate
|
||||
# attribute with a standin object that can take on any
|
||||
# encoding.
|
||||
meta_encoding = None
|
||||
if charset is not None:
|
||||
# HTML 5 style:
|
||||
# <meta charset="utf8">
|
||||
meta_encoding = charset
|
||||
tag['charset'] = CharsetMetaAttributeValue(charset)
|
||||
|
||||
elif (content is not None and http_equiv is not None
|
||||
and http_equiv.lower() == 'content-type'):
|
||||
# HTML 4 style:
|
||||
# <meta http-equiv="content-type" content="text/html; charset=utf8">
|
||||
tag['content'] = ContentMetaAttributeValue(content)
|
||||
|
||||
return (meta_encoding is not None)
|
||||
|
||||
def register_treebuilders_from(module):
|
||||
"""Copy TreeBuilders from the given module into this module."""
|
||||
# I'm fairly sure this is not the best way to do this.
|
||||
this_module = sys.modules['bs4.builder']
|
||||
for name in module.__all__:
|
||||
obj = getattr(module, name)
|
||||
|
||||
if issubclass(obj, TreeBuilder):
|
||||
setattr(this_module, name, obj)
|
||||
this_module.__all__.append(name)
|
||||
# Register the builder while we're at it.
|
||||
this_module.builder_registry.register(obj)
|
||||
|
||||
class ParserRejectedMarkup(Exception):
|
||||
pass
|
||||
|
||||
# Builders are registered in reverse order of priority, so that custom
|
||||
# builder registrations will take precedence. In general, we want lxml
|
||||
# to take precedence over html5lib, because it's faster. And we only
|
||||
# want to use HTMLParser as a last result.
|
||||
from . import _htmlparser
|
||||
register_treebuilders_from(_htmlparser)
|
||||
try:
|
||||
from . import _html5lib
|
||||
register_treebuilders_from(_html5lib)
|
||||
except ImportError:
|
||||
# They don't have html5lib installed.
|
||||
pass
|
||||
try:
|
||||
from . import _lxml
|
||||
register_treebuilders_from(_lxml)
|
||||
except ImportError:
|
||||
# They don't have lxml installed.
|
||||
pass
|
||||
@@ -0,0 +1,285 @@
|
||||
__all__ = [
|
||||
'HTML5TreeBuilder',
|
||||
]
|
||||
|
||||
import warnings
|
||||
from bs4.builder import (
|
||||
PERMISSIVE,
|
||||
HTML,
|
||||
HTML_5,
|
||||
HTMLTreeBuilder,
|
||||
)
|
||||
from bs4.element import NamespacedAttribute
|
||||
import html5lib
|
||||
from html5lib.constants import namespaces
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
NavigableString,
|
||||
Tag,
|
||||
)
|
||||
|
||||
class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
"""Use html5lib to build a tree."""
|
||||
|
||||
features = ['html5lib', PERMISSIVE, HTML_5, HTML]
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding):
|
||||
# Store the user-specified encoding for use later on.
|
||||
self.user_specified_encoding = user_specified_encoding
|
||||
yield (markup, None, None, False)
|
||||
|
||||
# These methods are defined by Beautiful Soup.
|
||||
def feed(self, markup):
|
||||
if self.soup.parse_only is not None:
|
||||
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
|
||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||
doc = parser.parse(markup, encoding=self.user_specified_encoding)
|
||||
|
||||
# Set the character encoding detected by the tokenizer.
|
||||
if isinstance(markup, unicode):
|
||||
# We need to special-case this because html5lib sets
|
||||
# charEncoding to UTF-8 if it gets Unicode input.
|
||||
doc.original_encoding = None
|
||||
else:
|
||||
doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
|
||||
def create_treebuilder(self, namespaceHTMLElements):
|
||||
self.underlying_builder = TreeBuilderForHtml5lib(
|
||||
self.soup, namespaceHTMLElements)
|
||||
return self.underlying_builder
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<html><head></head><body>%s</body></html>' % fragment
|
||||
|
||||
|
||||
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
|
||||
def __init__(self, soup, namespaceHTMLElements):
|
||||
self.soup = soup
|
||||
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
|
||||
|
||||
def documentClass(self):
|
||||
self.soup.reset()
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
def insertDoctype(self, token):
|
||||
name = token["name"]
|
||||
publicId = token["publicId"]
|
||||
systemId = token["systemId"]
|
||||
|
||||
doctype = Doctype.for_name_and_ids(name, publicId, systemId)
|
||||
self.soup.object_was_parsed(doctype)
|
||||
|
||||
def elementClass(self, name, namespace):
|
||||
tag = self.soup.new_tag(name, namespace)
|
||||
return Element(tag, self.soup, namespace)
|
||||
|
||||
def commentClass(self, data):
|
||||
return TextNode(Comment(data), self.soup)
|
||||
|
||||
def fragmentClass(self):
|
||||
self.soup = BeautifulSoup("")
|
||||
self.soup.name = "[document_fragment]"
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
def appendChild(self, node):
|
||||
# XXX This code is not covered by the BS4 tests.
|
||||
self.soup.append(node.element)
|
||||
|
||||
def getDocument(self):
|
||||
return self.soup
|
||||
|
||||
def getFragment(self):
|
||||
return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
|
||||
|
||||
class AttrList(object):
|
||||
def __init__(self, element):
|
||||
self.element = element
|
||||
self.attrs = dict(self.element.attrs)
|
||||
def __iter__(self):
|
||||
return list(self.attrs.items()).__iter__()
|
||||
def __setitem__(self, name, value):
|
||||
"set attr", name, value
|
||||
self.element[name] = value
|
||||
def items(self):
|
||||
return list(self.attrs.items())
|
||||
def keys(self):
|
||||
return list(self.attrs.keys())
|
||||
def __len__(self):
|
||||
return len(self.attrs)
|
||||
def __getitem__(self, name):
|
||||
return self.attrs[name]
|
||||
def __contains__(self, name):
|
||||
return name in list(self.attrs.keys())
|
||||
|
||||
|
||||
class Element(html5lib.treebuilders._base.Node):
|
||||
def __init__(self, element, soup, namespace):
|
||||
html5lib.treebuilders._base.Node.__init__(self, element.name)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
self.namespace = namespace
|
||||
|
||||
def appendChild(self, node):
|
||||
string_child = child = None
|
||||
if isinstance(node, basestring):
|
||||
# Some other piece of code decided to pass in a string
|
||||
# instead of creating a TextElement object to contain the
|
||||
# string.
|
||||
string_child = child = node
|
||||
elif isinstance(node, Tag):
|
||||
# Some other piece of code decided to pass in a Tag
|
||||
# instead of creating an Element object to contain the
|
||||
# Tag.
|
||||
child = node
|
||||
elif node.element.__class__ == NavigableString:
|
||||
string_child = child = node.element
|
||||
else:
|
||||
child = node.element
|
||||
|
||||
if not isinstance(child, basestring) and child.parent is not None:
|
||||
node.element.extract()
|
||||
|
||||
if (string_child and self.element.contents
|
||||
and self.element.contents[-1].__class__ == NavigableString):
|
||||
# We are appending a string onto another string.
|
||||
# TODO This has O(n^2) performance, for input like
|
||||
# "a</a>a</a>a</a>..."
|
||||
old_element = self.element.contents[-1]
|
||||
new_element = self.soup.new_string(old_element + string_child)
|
||||
old_element.replace_with(new_element)
|
||||
self.soup._most_recent_element = new_element
|
||||
else:
|
||||
if isinstance(node, basestring):
|
||||
# Create a brand new NavigableString from this string.
|
||||
child = self.soup.new_string(node)
|
||||
|
||||
# Tell Beautiful Soup to act as if it parsed this element
|
||||
# immediately after the parent's last descendant. (Or
|
||||
# immediately after the parent, if it has no children.)
|
||||
if self.element.contents:
|
||||
most_recent_element = self.element._last_descendant(False)
|
||||
else:
|
||||
most_recent_element = self.element
|
||||
|
||||
self.soup.object_was_parsed(
|
||||
child, parent=self.element,
|
||||
most_recent_element=most_recent_element)
|
||||
|
||||
def getAttributes(self):
|
||||
return AttrList(self.element)
|
||||
|
||||
def setAttributes(self, attributes):
|
||||
if attributes is not None and len(attributes) > 0:
|
||||
|
||||
converted_attributes = []
|
||||
for name, value in list(attributes.items()):
|
||||
if isinstance(name, tuple):
|
||||
new_name = NamespacedAttribute(*name)
|
||||
del attributes[name]
|
||||
attributes[new_name] = value
|
||||
|
||||
self.soup.builder._replace_cdata_list_attribute_values(
|
||||
self.name, attributes)
|
||||
for name, value in attributes.items():
|
||||
self.element[name] = value
|
||||
|
||||
# The attributes may contain variables that need substitution.
|
||||
# Call set_up_substitutions manually.
|
||||
#
|
||||
# The Tag constructor called this method when the Tag was created,
|
||||
# but we just set/changed the attributes, so call it again.
|
||||
self.soup.builder.set_up_substitutions(self.element)
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
if insertBefore:
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
self.insertBefore(data, insertBefore)
|
||||
else:
|
||||
self.appendChild(data)
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = self.element.index(refNode.element)
|
||||
if (node.element.__class__ == NavigableString and self.element.contents
|
||||
and self.element.contents[index-1].__class__ == NavigableString):
|
||||
# (See comments in appendChild)
|
||||
old_node = self.element.contents[index-1]
|
||||
new_str = self.soup.new_string(old_node + node.element)
|
||||
old_node.replace_with(new_str)
|
||||
else:
|
||||
self.element.insert(index, node.element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node):
|
||||
node.element.extract()
|
||||
|
||||
def reparentChildren(self, new_parent):
|
||||
"""Move all of this tag's children into another tag."""
|
||||
element = self.element
|
||||
new_parent_element = new_parent.element
|
||||
# Determine what this tag's next_element will be once all the children
|
||||
# are removed.
|
||||
final_next_element = element.next_sibling
|
||||
|
||||
new_parents_last_descendant = new_parent_element._last_descendant(False, False)
|
||||
if len(new_parent_element.contents) > 0:
|
||||
# The new parent already contains children. We will be
|
||||
# appending this tag's children to the end.
|
||||
new_parents_last_child = new_parent_element.contents[-1]
|
||||
new_parents_last_descendant_next_element = new_parents_last_descendant.next_element
|
||||
else:
|
||||
# The new parent contains no children.
|
||||
new_parents_last_child = None
|
||||
new_parents_last_descendant_next_element = new_parent_element.next_element
|
||||
|
||||
to_append = element.contents
|
||||
append_after = new_parent.element.contents
|
||||
if len(to_append) > 0:
|
||||
# Set the first child's previous_element and previous_sibling
|
||||
# to elements within the new parent
|
||||
first_child = to_append[0]
|
||||
first_child.previous_element = new_parents_last_descendant
|
||||
first_child.previous_sibling = new_parents_last_child
|
||||
|
||||
# Fix the last child's next_element and next_sibling
|
||||
last_child = to_append[-1]
|
||||
last_child.next_element = new_parents_last_descendant_next_element
|
||||
last_child.next_sibling = None
|
||||
|
||||
for child in to_append:
|
||||
child.parent = new_parent_element
|
||||
new_parent_element.contents.append(child)
|
||||
|
||||
# Now that this element has no children, change its .next_element.
|
||||
element.contents = []
|
||||
element.next_element = final_next_element
|
||||
|
||||
def cloneNode(self):
|
||||
tag = self.soup.new_tag(self.element.name, self.namespace)
|
||||
node = Element(tag, self.soup, self.namespace)
|
||||
for key,value in self.attributes:
|
||||
node.attributes[key] = value
|
||||
return node
|
||||
|
||||
def hasContent(self):
|
||||
return self.element.contents
|
||||
|
||||
def getNameTuple(self):
|
||||
if self.namespace == None:
|
||||
return namespaces["html"], self.name
|
||||
else:
|
||||
return self.namespace, self.name
|
||||
|
||||
nameTuple = property(getNameTuple)
|
||||
|
||||
class TextNode(Element):
|
||||
def __init__(self, element, soup):
|
||||
html5lib.treebuilders._base.Node.__init__(self, None)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
|
||||
def cloneNode(self):
|
||||
raise NotImplementedError
|
||||
@@ -0,0 +1,258 @@
|
||||
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
|
||||
|
||||
__all__ = [
|
||||
'HTMLParserTreeBuilder',
|
||||
]
|
||||
|
||||
from HTMLParser import (
|
||||
HTMLParser,
|
||||
HTMLParseError,
|
||||
)
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
|
||||
# argument, which we'd like to set to False. Unfortunately,
|
||||
# http://bugs.python.org/issue13273 makes strict=True a better bet
|
||||
# before Python 3.2.3.
|
||||
#
|
||||
# At the end of this file, we monkeypatch HTMLParser so that
|
||||
# strict=True works well on Python 3.2.2.
|
||||
major, minor, release = sys.version_info[:3]
|
||||
CONSTRUCTOR_TAKES_STRICT = (
|
||||
major > 3
|
||||
or (major == 3 and minor > 2)
|
||||
or (major == 3 and minor == 2 and release >= 3))
|
||||
|
||||
from bs4.element import (
|
||||
CData,
|
||||
Comment,
|
||||
Declaration,
|
||||
Doctype,
|
||||
ProcessingInstruction,
|
||||
)
|
||||
from bs4.dammit import EntitySubstitution, UnicodeDammit
|
||||
|
||||
from bs4.builder import (
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
STRICT,
|
||||
)
|
||||
|
||||
|
||||
HTMLPARSER = 'html.parser'
|
||||
|
||||
class BeautifulSoupHTMLParser(HTMLParser):
|
||||
def handle_starttag(self, name, attrs):
|
||||
# XXX namespace
|
||||
attr_dict = {}
|
||||
for key, value in attrs:
|
||||
# Change None attribute values to the empty string
|
||||
# for consistency with the other tree builders.
|
||||
if value is None:
|
||||
value = ''
|
||||
attr_dict[key] = value
|
||||
attrvalue = '""'
|
||||
self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.soup.handle_data(data)
|
||||
|
||||
def handle_charref(self, name):
|
||||
# XXX workaround for a bug in HTMLParser. Remove this once
|
||||
# it's fixed.
|
||||
if name.startswith('x'):
|
||||
real_name = int(name.lstrip('x'), 16)
|
||||
elif name.startswith('X'):
|
||||
real_name = int(name.lstrip('X'), 16)
|
||||
else:
|
||||
real_name = int(name)
|
||||
|
||||
try:
|
||||
data = unichr(real_name)
|
||||
except (ValueError, OverflowError), e:
|
||||
data = u"\N{REPLACEMENT CHARACTER}"
|
||||
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_entityref(self, name):
|
||||
character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
|
||||
if character is not None:
|
||||
data = character
|
||||
else:
|
||||
data = "&%s;" % name
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_comment(self, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(Comment)
|
||||
|
||||
def handle_decl(self, data):
|
||||
self.soup.endData()
|
||||
if data.startswith("DOCTYPE "):
|
||||
data = data[len("DOCTYPE "):]
|
||||
elif data == 'DOCTYPE':
|
||||
# i.e. "<!DOCTYPE>"
|
||||
data = ''
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(Doctype)
|
||||
|
||||
def unknown_decl(self, data):
|
||||
if data.upper().startswith('CDATA['):
|
||||
cls = CData
|
||||
data = data[len('CDATA['):]
|
||||
else:
|
||||
cls = Declaration
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(cls)
|
||||
|
||||
def handle_pi(self, data):
|
||||
self.soup.endData()
|
||||
if data.endswith("?") and data.lower().startswith("xml"):
|
||||
# "An XHTML processing instruction using the trailing '?'
|
||||
# will cause the '?' to be included in data." - HTMLParser
|
||||
# docs.
|
||||
#
|
||||
# Strip the question mark so we don't end up with two
|
||||
# question marks.
|
||||
data = data[:-1]
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(ProcessingInstruction)
|
||||
|
||||
|
||||
class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
|
||||
is_xml = False
|
||||
features = [HTML, STRICT, HTMLPARSER]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if CONSTRUCTOR_TAKES_STRICT:
|
||||
kwargs['strict'] = False
|
||||
self.parser_args = (args, kwargs)
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None):
|
||||
"""
|
||||
:return: A 4-tuple (markup, original encoding, encoding
|
||||
declared within markup, whether any characters had to be
|
||||
replaced with REPLACEMENT CHARACTER).
|
||||
"""
|
||||
if isinstance(markup, unicode):
|
||||
yield (markup, None, None, False)
|
||||
return
|
||||
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
|
||||
yield (dammit.markup, dammit.original_encoding,
|
||||
dammit.declared_html_encoding,
|
||||
dammit.contains_replacement_characters)
|
||||
|
||||
def feed(self, markup):
|
||||
args, kwargs = self.parser_args
|
||||
parser = BeautifulSoupHTMLParser(*args, **kwargs)
|
||||
parser.soup = self.soup
|
||||
try:
|
||||
parser.feed(markup)
|
||||
except HTMLParseError, e:
|
||||
warnings.warn(RuntimeWarning(
|
||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
||||
raise e
|
||||
|
||||
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
|
||||
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
|
||||
# string.
|
||||
#
|
||||
# XXX This code can be removed once most Python 3 users are on 3.2.3.
|
||||
if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
|
||||
import re
|
||||
attrfind_tolerant = re.compile(
|
||||
r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
|
||||
HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
|
||||
|
||||
locatestarttagend = re.compile(r"""
|
||||
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
||||
(?:\s+ # whitespace before attribute name
|
||||
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
|
||||
(?:\s*=\s* # value indicator
|
||||
(?:'[^']*' # LITA-enclosed value
|
||||
|\"[^\"]*\" # LIT-enclosed value
|
||||
|[^'\">\s]+ # bare value
|
||||
)
|
||||
)?
|
||||
)
|
||||
)*
|
||||
\s* # trailing whitespace
|
||||
""", re.VERBOSE)
|
||||
BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
|
||||
|
||||
from html.parser import tagfind, attrfind
|
||||
|
||||
def parse_starttag(self, i):
|
||||
self.__starttag_text = None
|
||||
endpos = self.check_for_whole_start_tag(i)
|
||||
if endpos < 0:
|
||||
return endpos
|
||||
rawdata = self.rawdata
|
||||
self.__starttag_text = rawdata[i:endpos]
|
||||
|
||||
# Now parse the data between i+1 and j into a tag and attrs
|
||||
attrs = []
|
||||
match = tagfind.match(rawdata, i+1)
|
||||
assert match, 'unexpected call to parse_starttag()'
|
||||
k = match.end()
|
||||
self.lasttag = tag = rawdata[i+1:k].lower()
|
||||
while k < endpos:
|
||||
if self.strict:
|
||||
m = attrfind.match(rawdata, k)
|
||||
else:
|
||||
m = attrfind_tolerant.match(rawdata, k)
|
||||
if not m:
|
||||
break
|
||||
attrname, rest, attrvalue = m.group(1, 2, 3)
|
||||
if not rest:
|
||||
attrvalue = None
|
||||
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
||||
attrvalue = attrvalue[1:-1]
|
||||
if attrvalue:
|
||||
attrvalue = self.unescape(attrvalue)
|
||||
attrs.append((attrname.lower(), attrvalue))
|
||||
k = m.end()
|
||||
|
||||
end = rawdata[k:endpos].strip()
|
||||
if end not in (">", "/>"):
|
||||
lineno, offset = self.getpos()
|
||||
if "\n" in self.__starttag_text:
|
||||
lineno = lineno + self.__starttag_text.count("\n")
|
||||
offset = len(self.__starttag_text) \
|
||||
- self.__starttag_text.rfind("\n")
|
||||
else:
|
||||
offset = offset + len(self.__starttag_text)
|
||||
if self.strict:
|
||||
self.error("junk characters in start tag: %r"
|
||||
% (rawdata[k:endpos][:20],))
|
||||
self.handle_data(rawdata[i:endpos])
|
||||
return endpos
|
||||
if end.endswith('/>'):
|
||||
# XHTML-style empty tag: <span attr="value" />
|
||||
self.handle_startendtag(tag, attrs)
|
||||
else:
|
||||
self.handle_starttag(tag, attrs)
|
||||
if tag in self.CDATA_CONTENT_ELEMENTS:
|
||||
self.set_cdata_mode(tag)
|
||||
return endpos
|
||||
|
||||
def set_cdata_mode(self, elem):
|
||||
self.cdata_elem = elem.lower()
|
||||
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
|
||||
|
||||
BeautifulSoupHTMLParser.parse_starttag = parse_starttag
|
||||
BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
|
||||
|
||||
CONSTRUCTOR_TAKES_STRICT = True
|
||||
@@ -0,0 +1,233 @@
|
||||
__all__ = [
|
||||
'LXMLTreeBuilderForXML',
|
||||
'LXMLTreeBuilder',
|
||||
]
|
||||
|
||||
from io import BytesIO
|
||||
from StringIO import StringIO
|
||||
import collections
|
||||
from lxml import etree
|
||||
from bs4.element import Comment, Doctype, NamespacedAttribute
|
||||
from bs4.builder import (
|
||||
FAST,
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
PERMISSIVE,
|
||||
ParserRejectedMarkup,
|
||||
TreeBuilder,
|
||||
XML)
|
||||
from bs4.dammit import EncodingDetector
|
||||
|
||||
LXML = 'lxml'
|
||||
|
||||
class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
DEFAULT_PARSER_CLASS = etree.XMLParser
|
||||
|
||||
is_xml = True
|
||||
|
||||
# Well, it's permissive by XML parser standards.
|
||||
features = [LXML, XML, FAST, PERMISSIVE]
|
||||
|
||||
CHUNK_SIZE = 512
|
||||
|
||||
# This namespace mapping is specified in the XML Namespace
|
||||
# standard.
|
||||
DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
|
||||
|
||||
def default_parser(self, encoding):
|
||||
# This can either return a parser object or a class, which
|
||||
# will be instantiated with default arguments.
|
||||
if self._default_parser is not None:
|
||||
return self._default_parser
|
||||
return etree.XMLParser(
|
||||
target=self, strip_cdata=False, recover=True, encoding=encoding)
|
||||
|
||||
def parser_for(self, encoding):
|
||||
# Use the default parser.
|
||||
parser = self.default_parser(encoding)
|
||||
|
||||
if isinstance(parser, collections.Callable):
|
||||
# Instantiate the parser with default arguments
|
||||
parser = parser(target=self, strip_cdata=False, encoding=encoding)
|
||||
return parser
|
||||
|
||||
def __init__(self, parser=None, empty_element_tags=None):
|
||||
# TODO: Issue a warning if parser is present but not a
|
||||
# callable, since that means there's no way to create new
|
||||
# parsers for different encodings.
|
||||
self._default_parser = parser
|
||||
if empty_element_tags is not None:
|
||||
self.empty_element_tags = set(empty_element_tags)
|
||||
self.soup = None
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS]
|
||||
|
||||
def _getNsTag(self, tag):
|
||||
# Split the namespace URL out of a fully-qualified lxml tag
|
||||
# name. Copied from lxml's src/lxml/sax.py.
|
||||
if tag[0] == '{':
|
||||
return tuple(tag[1:].split('}', 1))
|
||||
else:
|
||||
return (None, tag)
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None):
|
||||
"""
|
||||
:yield: A series of 4-tuples.
|
||||
(markup, encoding, declared encoding,
|
||||
has undergone character replacement)
|
||||
|
||||
Each 4-tuple represents a strategy for parsing the document.
|
||||
"""
|
||||
if isinstance(markup, unicode):
|
||||
# We were given Unicode. Maybe lxml can parse Unicode on
|
||||
# this system?
|
||||
yield markup, None, document_declared_encoding, False
|
||||
|
||||
if isinstance(markup, unicode):
|
||||
# No, apparently not. Convert the Unicode to UTF-8 and
|
||||
# tell lxml to parse it as UTF-8.
|
||||
yield (markup.encode("utf8"), "utf8",
|
||||
document_declared_encoding, False)
|
||||
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
detector = EncodingDetector(markup, try_encodings, is_html)
|
||||
for encoding in detector.encodings:
|
||||
yield (detector.markup, encoding, document_declared_encoding, False)
|
||||
|
||||
def feed(self, markup):
|
||||
if isinstance(markup, bytes):
|
||||
markup = BytesIO(markup)
|
||||
elif isinstance(markup, unicode):
|
||||
markup = StringIO(markup)
|
||||
|
||||
# Call feed() at least once, even if the markup is empty,
|
||||
# or the parser won't be initialized.
|
||||
data = markup.read(self.CHUNK_SIZE)
|
||||
try:
|
||||
self.parser = self.parser_for(self.soup.original_encoding)
|
||||
self.parser.feed(data)
|
||||
while len(data) != 0:
|
||||
# Now call feed() on the rest of the data, chunk by chunk.
|
||||
data = markup.read(self.CHUNK_SIZE)
|
||||
if len(data) != 0:
|
||||
self.parser.feed(data)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
|
||||
raise ParserRejectedMarkup(str(e))
|
||||
|
||||
def close(self):
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS]
|
||||
|
||||
def start(self, name, attrs, nsmap={}):
|
||||
# Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
|
||||
attrs = dict(attrs)
|
||||
nsprefix = None
|
||||
# Invert each namespace map as it comes in.
|
||||
if len(self.nsmaps) > 1:
|
||||
# There are no new namespaces for this tag, but
|
||||
# non-default namespaces are in play, so we need a
|
||||
# separate tag stack to know when they end.
|
||||
self.nsmaps.append(None)
|
||||
elif len(nsmap) > 0:
|
||||
# A new namespace mapping has come into play.
|
||||
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
|
||||
self.nsmaps.append(inverted_nsmap)
|
||||
# Also treat the namespace mapping as a set of attributes on the
|
||||
# tag, so we can recreate it later.
|
||||
attrs = attrs.copy()
|
||||
for prefix, namespace in nsmap.items():
|
||||
attribute = NamespacedAttribute(
|
||||
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
|
||||
attrs[attribute] = namespace
|
||||
|
||||
# Namespaces are in play. Find any attributes that came in
|
||||
# from lxml with namespaces attached to their names, and
|
||||
# turn then into NamespacedAttribute objects.
|
||||
new_attrs = {}
|
||||
for attr, value in attrs.items():
|
||||
namespace, attr = self._getNsTag(attr)
|
||||
if namespace is None:
|
||||
new_attrs[attr] = value
|
||||
else:
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
attr = NamespacedAttribute(nsprefix, attr, namespace)
|
||||
new_attrs[attr] = value
|
||||
attrs = new_attrs
|
||||
|
||||
namespace, name = self._getNsTag(name)
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
self.soup.handle_starttag(name, namespace, nsprefix, attrs)
|
||||
|
||||
def _prefix_for_namespace(self, namespace):
|
||||
"""Find the currently active prefix for the given namespace."""
|
||||
if namespace is None:
|
||||
return None
|
||||
for inverted_nsmap in reversed(self.nsmaps):
|
||||
if inverted_nsmap is not None and namespace in inverted_nsmap:
|
||||
return inverted_nsmap[namespace]
|
||||
return None
|
||||
|
||||
def end(self, name):
|
||||
self.soup.endData()
|
||||
completed_tag = self.soup.tagStack[-1]
|
||||
namespace, name = self._getNsTag(name)
|
||||
nsprefix = None
|
||||
if namespace is not None:
|
||||
for inverted_nsmap in reversed(self.nsmaps):
|
||||
if inverted_nsmap is not None and namespace in inverted_nsmap:
|
||||
nsprefix = inverted_nsmap[namespace]
|
||||
break
|
||||
self.soup.handle_endtag(name, nsprefix)
|
||||
if len(self.nsmaps) > 1:
|
||||
# This tag, or one of its parents, introduced a namespace
|
||||
# mapping, so pop it off the stack.
|
||||
self.nsmaps.pop()
|
||||
|
||||
def pi(self, target, data):
|
||||
pass
|
||||
|
||||
def data(self, content):
|
||||
self.soup.handle_data(content)
|
||||
|
||||
def doctype(self, name, pubid, system):
|
||||
self.soup.endData()
|
||||
doctype = Doctype.for_name_and_ids(name, pubid, system)
|
||||
self.soup.object_was_parsed(doctype)
|
||||
|
||||
def comment(self, content):
|
||||
"Handle comments as Comment objects."
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(content)
|
||||
self.soup.endData(Comment)
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
|
||||
|
||||
|
||||
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
|
||||
|
||||
features = [LXML, HTML, FAST, PERMISSIVE]
|
||||
is_xml = False
|
||||
|
||||
def default_parser(self, encoding):
|
||||
return etree.HTMLParser
|
||||
|
||||
def feed(self, markup):
|
||||
encoding = self.soup.original_encoding
|
||||
try:
|
||||
self.parser = self.parser_for(encoding)
|
||||
self.parser.feed(markup)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
|
||||
raise ParserRejectedMarkup(str(e))
|
||||
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<html><body>%s</body></html>' % fragment
|
||||
@@ -0,0 +1,829 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Beautiful Soup bonus library: Unicode, Dammit
|
||||
|
||||
This library converts a bytestream to Unicode through any means
|
||||
necessary. It is heavily based on code from Mark Pilgrim's Universal
|
||||
Feed Parser. It works best on XML and XML, but it does not rewrite the
|
||||
XML or HTML to reflect a new encoding; that's the tree builder's job.
|
||||
"""
|
||||
|
||||
import codecs
|
||||
from htmlentitydefs import codepoint2name
|
||||
import re
|
||||
import logging
|
||||
import string
|
||||
|
||||
# Import a library to autodetect character encodings.
|
||||
chardet_type = None
|
||||
try:
|
||||
# First try the fast C implementation.
|
||||
# PyPI package: cchardet
|
||||
import cchardet
|
||||
def chardet_dammit(s):
|
||||
return cchardet.detect(s)['encoding']
|
||||
except ImportError:
|
||||
try:
|
||||
# Fall back to the pure Python implementation
|
||||
# Debian package: python-chardet
|
||||
# PyPI package: chardet
|
||||
import chardet
|
||||
def chardet_dammit(s):
|
||||
return chardet.detect(s)['encoding']
|
||||
#import chardet.constants
|
||||
#chardet.constants._debug = 1
|
||||
except ImportError:
|
||||
# No chardet available.
|
||||
def chardet_dammit(s):
|
||||
return None
|
||||
|
||||
# Available from http://cjkpython.i18n.org/.
|
||||
try:
|
||||
import iconv_codec
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
xml_encoding_re = re.compile(
|
||||
'^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
|
||||
html_meta_re = re.compile(
|
||||
'<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
|
||||
|
||||
class EntitySubstitution(object):
|
||||
|
||||
"""Substitute XML or HTML entities for the corresponding characters."""
|
||||
|
||||
def _populate_class_variables():
|
||||
lookup = {}
|
||||
reverse_lookup = {}
|
||||
characters_for_re = []
|
||||
for codepoint, name in list(codepoint2name.items()):
|
||||
character = unichr(codepoint)
|
||||
if codepoint != 34:
|
||||
# There's no point in turning the quotation mark into
|
||||
# ", unless it happens within an attribute value, which
|
||||
# is handled elsewhere.
|
||||
characters_for_re.append(character)
|
||||
lookup[character] = name
|
||||
# But we do want to turn " into the quotation mark.
|
||||
reverse_lookup[name] = character
|
||||
re_definition = "[%s]" % "".join(characters_for_re)
|
||||
return lookup, reverse_lookup, re.compile(re_definition)
|
||||
(CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
|
||||
CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
|
||||
|
||||
CHARACTER_TO_XML_ENTITY = {
|
||||
"'": "apos",
|
||||
'"': "quot",
|
||||
"&": "amp",
|
||||
"<": "lt",
|
||||
">": "gt",
|
||||
}
|
||||
|
||||
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
|
||||
"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
|
||||
")")
|
||||
|
||||
AMPERSAND_OR_BRACKET = re.compile("([<>&])")
|
||||
|
||||
@classmethod
|
||||
def _substitute_html_entity(cls, matchobj):
|
||||
entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
|
||||
return "&%s;" % entity
|
||||
|
||||
@classmethod
|
||||
def _substitute_xml_entity(cls, matchobj):
|
||||
"""Used with a regular expression to substitute the
|
||||
appropriate XML entity for an XML special character."""
|
||||
entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
|
||||
return "&%s;" % entity
|
||||
|
||||
@classmethod
|
||||
def quoted_attribute_value(self, value):
|
||||
"""Make a value into a quoted XML attribute, possibly escaping it.
|
||||
|
||||
Most strings will be quoted using double quotes.
|
||||
|
||||
Bob's Bar -> "Bob's Bar"
|
||||
|
||||
If a string contains double quotes, it will be quoted using
|
||||
single quotes.
|
||||
|
||||
Welcome to "my bar" -> 'Welcome to "my bar"'
|
||||
|
||||
If a string contains both single and double quotes, the
|
||||
double quotes will be escaped, and the string will be quoted
|
||||
using double quotes.
|
||||
|
||||
Welcome to "Bob's Bar" -> "Welcome to "Bob's bar"
|
||||
"""
|
||||
quote_with = '"'
|
||||
if '"' in value:
|
||||
if "'" in value:
|
||||
# The string contains both single and double
|
||||
# quotes. Turn the double quotes into
|
||||
# entities. We quote the double quotes rather than
|
||||
# the single quotes because the entity name is
|
||||
# """ whether this is HTML or XML. If we
|
||||
# quoted the single quotes, we'd have to decide
|
||||
# between ' and &squot;.
|
||||
replace_with = """
|
||||
value = value.replace('"', replace_with)
|
||||
else:
|
||||
# There are double quotes but no single quotes.
|
||||
# We can use single quotes to quote the attribute.
|
||||
quote_with = "'"
|
||||
return quote_with + value + quote_with
|
||||
|
||||
@classmethod
|
||||
def substitute_xml(cls, value, make_quoted_attribute=False):
|
||||
"""Substitute XML entities for special XML characters.
|
||||
|
||||
:param value: A string to be substituted. The less-than sign
|
||||
will become <, the greater-than sign will become >,
|
||||
and any ampersands will become &. If you want ampersands
|
||||
that appear to be part of an entity definition to be left
|
||||
alone, use substitute_xml_containing_entities() instead.
|
||||
|
||||
:param make_quoted_attribute: If True, then the string will be
|
||||
quoted, as befits an attribute value.
|
||||
"""
|
||||
# Escape angle brackets and ampersands.
|
||||
value = cls.AMPERSAND_OR_BRACKET.sub(
|
||||
cls._substitute_xml_entity, value)
|
||||
|
||||
if make_quoted_attribute:
|
||||
value = cls.quoted_attribute_value(value)
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def substitute_xml_containing_entities(
|
||||
cls, value, make_quoted_attribute=False):
|
||||
"""Substitute XML entities for special XML characters.
|
||||
|
||||
:param value: A string to be substituted. The less-than sign will
|
||||
become <, the greater-than sign will become >, and any
|
||||
ampersands that are not part of an entity defition will
|
||||
become &.
|
||||
|
||||
:param make_quoted_attribute: If True, then the string will be
|
||||
quoted, as befits an attribute value.
|
||||
"""
|
||||
# Escape angle brackets, and ampersands that aren't part of
|
||||
# entities.
|
||||
value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
|
||||
cls._substitute_xml_entity, value)
|
||||
|
||||
if make_quoted_attribute:
|
||||
value = cls.quoted_attribute_value(value)
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def substitute_html(cls, s):
|
||||
"""Replace certain Unicode characters with named HTML entities.
|
||||
|
||||
This differs from data.encode(encoding, 'xmlcharrefreplace')
|
||||
in that the goal is to make the result more readable (to those
|
||||
with ASCII displays) rather than to recover from
|
||||
errors. There's absolutely nothing wrong with a UTF-8 string
|
||||
containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
|
||||
character with "é" will make it more readable to some
|
||||
people.
|
||||
"""
|
||||
return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
|
||||
cls._substitute_html_entity, s)
|
||||
|
||||
|
||||
class EncodingDetector:
|
||||
"""Suggests a number of possible encodings for a bytestring.
|
||||
|
||||
Order of precedence:
|
||||
|
||||
1. Encodings you specifically tell EncodingDetector to try first
|
||||
(the override_encodings argument to the constructor).
|
||||
|
||||
2. An encoding declared within the bytestring itself, either in an
|
||||
XML declaration (if the bytestring is to be interpreted as an XML
|
||||
document), or in a <meta> tag (if the bytestring is to be
|
||||
interpreted as an HTML document.)
|
||||
|
||||
3. An encoding detected through textual analysis by chardet,
|
||||
cchardet, or a similar external library.
|
||||
|
||||
4. UTF-8.
|
||||
|
||||
5. Windows-1252.
|
||||
"""
|
||||
def __init__(self, markup, override_encodings=None, is_html=False):
|
||||
self.override_encodings = override_encodings or []
|
||||
self.chardet_encoding = None
|
||||
self.is_html = is_html
|
||||
self.declared_encoding = None
|
||||
|
||||
# First order of business: strip a byte-order mark.
|
||||
self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup)
|
||||
|
||||
def _usable(self, encoding, tried):
|
||||
if encoding is not None:
|
||||
encoding = encoding.lower()
|
||||
if encoding not in tried:
|
||||
tried.add(encoding)
|
||||
return True
|
||||
return False
|
||||
|
||||
@property
|
||||
def encodings(self):
|
||||
"""Yield a number of encodings that might work for this markup."""
|
||||
tried = set()
|
||||
for e in self.override_encodings:
|
||||
if self._usable(e, tried):
|
||||
yield e
|
||||
|
||||
# Did the document originally start with a byte-order mark
|
||||
# that indicated its encoding?
|
||||
if self._usable(self.sniffed_encoding, tried):
|
||||
yield self.sniffed_encoding
|
||||
|
||||
# Look within the document for an XML or HTML encoding
|
||||
# declaration.
|
||||
if self.declared_encoding is None:
|
||||
self.declared_encoding = self.find_declared_encoding(
|
||||
self.markup, self.is_html)
|
||||
if self._usable(self.declared_encoding, tried):
|
||||
yield self.declared_encoding
|
||||
|
||||
# Use third-party character set detection to guess at the
|
||||
# encoding.
|
||||
if self.chardet_encoding is None:
|
||||
self.chardet_encoding = chardet_dammit(self.markup)
|
||||
if self._usable(self.chardet_encoding, tried):
|
||||
yield self.chardet_encoding
|
||||
|
||||
# As a last-ditch effort, try utf-8 and windows-1252.
|
||||
for e in ('utf-8', 'windows-1252'):
|
||||
if self._usable(e, tried):
|
||||
yield e
|
||||
|
||||
@classmethod
|
||||
def strip_byte_order_mark(cls, data):
|
||||
"""If a byte-order mark is present, strip it and return the encoding it implies."""
|
||||
encoding = None
|
||||
if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
|
||||
and (data[2:4] != '\x00\x00'):
|
||||
encoding = 'utf-16be'
|
||||
data = data[2:]
|
||||
elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \
|
||||
and (data[2:4] != '\x00\x00'):
|
||||
encoding = 'utf-16le'
|
||||
data = data[2:]
|
||||
elif data[:3] == b'\xef\xbb\xbf':
|
||||
encoding = 'utf-8'
|
||||
data = data[3:]
|
||||
elif data[:4] == b'\x00\x00\xfe\xff':
|
||||
encoding = 'utf-32be'
|
||||
data = data[4:]
|
||||
elif data[:4] == b'\xff\xfe\x00\x00':
|
||||
encoding = 'utf-32le'
|
||||
data = data[4:]
|
||||
return data, encoding
|
||||
|
||||
@classmethod
|
||||
def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False):
|
||||
"""Given a document, tries to find its declared encoding.
|
||||
|
||||
An XML encoding is declared at the beginning of the document.
|
||||
|
||||
An HTML encoding is declared in a <meta> tag, hopefully near the
|
||||
beginning of the document.
|
||||
"""
|
||||
if search_entire_document:
|
||||
xml_endpos = html_endpos = len(markup)
|
||||
else:
|
||||
xml_endpos = 1024
|
||||
html_endpos = max(2048, int(len(markup) * 0.05))
|
||||
|
||||
declared_encoding = None
|
||||
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
|
||||
if not declared_encoding_match and is_html:
|
||||
declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
|
||||
if declared_encoding_match is not None:
|
||||
declared_encoding = declared_encoding_match.groups()[0].decode(
|
||||
'ascii')
|
||||
if declared_encoding:
|
||||
return declared_encoding.lower()
|
||||
return None
|
||||
|
||||
class UnicodeDammit:
|
||||
"""A class for detecting the encoding of a *ML document and
|
||||
converting it to a Unicode string. If the source encoding is
|
||||
windows-1252, can replace MS smart quotes with their HTML or XML
|
||||
equivalents."""
|
||||
|
||||
# This dictionary maps commonly seen values for "charset" in HTML
|
||||
# meta tags to the corresponding Python codec names. It only covers
|
||||
# values that aren't in Python's aliases and can't be determined
|
||||
# by the heuristics in find_codec.
|
||||
CHARSET_ALIASES = {"macintosh": "mac-roman",
|
||||
"x-sjis": "shift-jis"}
|
||||
|
||||
ENCODINGS_WITH_SMART_QUOTES = [
|
||||
"windows-1252",
|
||||
"iso-8859-1",
|
||||
"iso-8859-2",
|
||||
]
|
||||
|
||||
def __init__(self, markup, override_encodings=[],
|
||||
smart_quotes_to=None, is_html=False):
|
||||
self.smart_quotes_to = smart_quotes_to
|
||||
self.tried_encodings = []
|
||||
self.contains_replacement_characters = False
|
||||
self.is_html = is_html
|
||||
|
||||
self.detector = EncodingDetector(markup, override_encodings, is_html)
|
||||
|
||||
# Short-circuit if the data is in Unicode to begin with.
|
||||
if isinstance(markup, unicode) or markup == '':
|
||||
self.markup = markup
|
||||
self.unicode_markup = unicode(markup)
|
||||
self.original_encoding = None
|
||||
return
|
||||
|
||||
# The encoding detector may have stripped a byte-order mark.
|
||||
# Use the stripped markup from this point on.
|
||||
self.markup = self.detector.markup
|
||||
|
||||
u = None
|
||||
for encoding in self.detector.encodings:
|
||||
markup = self.detector.markup
|
||||
u = self._convert_from(encoding)
|
||||
if u is not None:
|
||||
break
|
||||
|
||||
if not u:
|
||||
# None of the encodings worked. As an absolute last resort,
|
||||
# try them again with character replacement.
|
||||
|
||||
for encoding in self.detector.encodings:
|
||||
if encoding != "ascii":
|
||||
u = self._convert_from(encoding, "replace")
|
||||
if u is not None:
|
||||
logging.warning(
|
||||
"Some characters could not be decoded, and were "
|
||||
"replaced with REPLACEMENT CHARACTER.")
|
||||
self.contains_replacement_characters = True
|
||||
break
|
||||
|
||||
# If none of that worked, we could at this point force it to
|
||||
# ASCII, but that would destroy so much data that I think
|
||||
# giving up is better.
|
||||
self.unicode_markup = u
|
||||
if not u:
|
||||
self.original_encoding = None
|
||||
|
||||
def _sub_ms_char(self, match):
|
||||
"""Changes a MS smart quote character to an XML or HTML
|
||||
entity, or an ASCII character."""
|
||||
orig = match.group(1)
|
||||
if self.smart_quotes_to == 'ascii':
|
||||
sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
|
||||
else:
|
||||
sub = self.MS_CHARS.get(orig)
|
||||
if type(sub) == tuple:
|
||||
if self.smart_quotes_to == 'xml':
|
||||
sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
|
||||
else:
|
||||
sub = '&'.encode() + sub[0].encode() + ';'.encode()
|
||||
else:
|
||||
sub = sub.encode()
|
||||
return sub
|
||||
|
||||
def _convert_from(self, proposed, errors="strict"):
|
||||
proposed = self.find_codec(proposed)
|
||||
if not proposed or (proposed, errors) in self.tried_encodings:
|
||||
return None
|
||||
self.tried_encodings.append((proposed, errors))
|
||||
markup = self.markup
|
||||
# Convert smart quotes to HTML if coming from an encoding
|
||||
# that might have them.
|
||||
if (self.smart_quotes_to is not None
|
||||
and proposed in self.ENCODINGS_WITH_SMART_QUOTES):
|
||||
smart_quotes_re = b"([\x80-\x9f])"
|
||||
smart_quotes_compiled = re.compile(smart_quotes_re)
|
||||
markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
|
||||
|
||||
try:
|
||||
#print "Trying to convert document to %s (errors=%s)" % (
|
||||
# proposed, errors)
|
||||
u = self._to_unicode(markup, proposed, errors)
|
||||
self.markup = u
|
||||
self.original_encoding = proposed
|
||||
except Exception as e:
|
||||
#print "That didn't work!"
|
||||
#print e
|
||||
return None
|
||||
#print "Correct encoding: %s" % proposed
|
||||
return self.markup
|
||||
|
||||
def _to_unicode(self, data, encoding, errors="strict"):
|
||||
'''Given a string and its encoding, decodes the string into Unicode.
|
||||
%encoding is a string recognized by encodings.aliases'''
|
||||
return unicode(data, encoding, errors)
|
||||
|
||||
@property
|
||||
def declared_html_encoding(self):
|
||||
if not self.is_html:
|
||||
return None
|
||||
return self.detector.declared_encoding
|
||||
|
||||
def find_codec(self, charset):
|
||||
value = (self._codec(self.CHARSET_ALIASES.get(charset, charset))
|
||||
or (charset and self._codec(charset.replace("-", "")))
|
||||
or (charset and self._codec(charset.replace("-", "_")))
|
||||
or (charset and charset.lower())
|
||||
or charset
|
||||
)
|
||||
if value:
|
||||
return value.lower()
|
||||
return None
|
||||
|
||||
def _codec(self, charset):
|
||||
if not charset:
|
||||
return charset
|
||||
codec = None
|
||||
try:
|
||||
codecs.lookup(charset)
|
||||
codec = charset
|
||||
except (LookupError, ValueError):
|
||||
pass
|
||||
return codec
|
||||
|
||||
|
||||
# A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
|
||||
MS_CHARS = {b'\x80': ('euro', '20AC'),
|
||||
b'\x81': ' ',
|
||||
b'\x82': ('sbquo', '201A'),
|
||||
b'\x83': ('fnof', '192'),
|
||||
b'\x84': ('bdquo', '201E'),
|
||||
b'\x85': ('hellip', '2026'),
|
||||
b'\x86': ('dagger', '2020'),
|
||||
b'\x87': ('Dagger', '2021'),
|
||||
b'\x88': ('circ', '2C6'),
|
||||
b'\x89': ('permil', '2030'),
|
||||
b'\x8A': ('Scaron', '160'),
|
||||
b'\x8B': ('lsaquo', '2039'),
|
||||
b'\x8C': ('OElig', '152'),
|
||||
b'\x8D': '?',
|
||||
b'\x8E': ('#x17D', '17D'),
|
||||
b'\x8F': '?',
|
||||
b'\x90': '?',
|
||||
b'\x91': ('lsquo', '2018'),
|
||||
b'\x92': ('rsquo', '2019'),
|
||||
b'\x93': ('ldquo', '201C'),
|
||||
b'\x94': ('rdquo', '201D'),
|
||||
b'\x95': ('bull', '2022'),
|
||||
b'\x96': ('ndash', '2013'),
|
||||
b'\x97': ('mdash', '2014'),
|
||||
b'\x98': ('tilde', '2DC'),
|
||||
b'\x99': ('trade', '2122'),
|
||||
b'\x9a': ('scaron', '161'),
|
||||
b'\x9b': ('rsaquo', '203A'),
|
||||
b'\x9c': ('oelig', '153'),
|
||||
b'\x9d': '?',
|
||||
b'\x9e': ('#x17E', '17E'),
|
||||
b'\x9f': ('Yuml', ''),}
|
||||
|
||||
# A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
|
||||
# horrors like stripping diacritical marks to turn á into a, but also
|
||||
# contains non-horrors like turning “ into ".
|
||||
MS_CHARS_TO_ASCII = {
|
||||
b'\x80' : 'EUR',
|
||||
b'\x81' : ' ',
|
||||
b'\x82' : ',',
|
||||
b'\x83' : 'f',
|
||||
b'\x84' : ',,',
|
||||
b'\x85' : '...',
|
||||
b'\x86' : '+',
|
||||
b'\x87' : '++',
|
||||
b'\x88' : '^',
|
||||
b'\x89' : '%',
|
||||
b'\x8a' : 'S',
|
||||
b'\x8b' : '<',
|
||||
b'\x8c' : 'OE',
|
||||
b'\x8d' : '?',
|
||||
b'\x8e' : 'Z',
|
||||
b'\x8f' : '?',
|
||||
b'\x90' : '?',
|
||||
b'\x91' : "'",
|
||||
b'\x92' : "'",
|
||||
b'\x93' : '"',
|
||||
b'\x94' : '"',
|
||||
b'\x95' : '*',
|
||||
b'\x96' : '-',
|
||||
b'\x97' : '--',
|
||||
b'\x98' : '~',
|
||||
b'\x99' : '(TM)',
|
||||
b'\x9a' : 's',
|
||||
b'\x9b' : '>',
|
||||
b'\x9c' : 'oe',
|
||||
b'\x9d' : '?',
|
||||
b'\x9e' : 'z',
|
||||
b'\x9f' : 'Y',
|
||||
b'\xa0' : ' ',
|
||||
b'\xa1' : '!',
|
||||
b'\xa2' : 'c',
|
||||
b'\xa3' : 'GBP',
|
||||
b'\xa4' : '$', #This approximation is especially parochial--this is the
|
||||
#generic currency symbol.
|
||||
b'\xa5' : 'YEN',
|
||||
b'\xa6' : '|',
|
||||
b'\xa7' : 'S',
|
||||
b'\xa8' : '..',
|
||||
b'\xa9' : '',
|
||||
b'\xaa' : '(th)',
|
||||
b'\xab' : '<<',
|
||||
b'\xac' : '!',
|
||||
b'\xad' : ' ',
|
||||
b'\xae' : '(R)',
|
||||
b'\xaf' : '-',
|
||||
b'\xb0' : 'o',
|
||||
b'\xb1' : '+-',
|
||||
b'\xb2' : '2',
|
||||
b'\xb3' : '3',
|
||||
b'\xb4' : ("'", 'acute'),
|
||||
b'\xb5' : 'u',
|
||||
b'\xb6' : 'P',
|
||||
b'\xb7' : '*',
|
||||
b'\xb8' : ',',
|
||||
b'\xb9' : '1',
|
||||
b'\xba' : '(th)',
|
||||
b'\xbb' : '>>',
|
||||
b'\xbc' : '1/4',
|
||||
b'\xbd' : '1/2',
|
||||
b'\xbe' : '3/4',
|
||||
b'\xbf' : '?',
|
||||
b'\xc0' : 'A',
|
||||
b'\xc1' : 'A',
|
||||
b'\xc2' : 'A',
|
||||
b'\xc3' : 'A',
|
||||
b'\xc4' : 'A',
|
||||
b'\xc5' : 'A',
|
||||
b'\xc6' : 'AE',
|
||||
b'\xc7' : 'C',
|
||||
b'\xc8' : 'E',
|
||||
b'\xc9' : 'E',
|
||||
b'\xca' : 'E',
|
||||
b'\xcb' : 'E',
|
||||
b'\xcc' : 'I',
|
||||
b'\xcd' : 'I',
|
||||
b'\xce' : 'I',
|
||||
b'\xcf' : 'I',
|
||||
b'\xd0' : 'D',
|
||||
b'\xd1' : 'N',
|
||||
b'\xd2' : 'O',
|
||||
b'\xd3' : 'O',
|
||||
b'\xd4' : 'O',
|
||||
b'\xd5' : 'O',
|
||||
b'\xd6' : 'O',
|
||||
b'\xd7' : '*',
|
||||
b'\xd8' : 'O',
|
||||
b'\xd9' : 'U',
|
||||
b'\xda' : 'U',
|
||||
b'\xdb' : 'U',
|
||||
b'\xdc' : 'U',
|
||||
b'\xdd' : 'Y',
|
||||
b'\xde' : 'b',
|
||||
b'\xdf' : 'B',
|
||||
b'\xe0' : 'a',
|
||||
b'\xe1' : 'a',
|
||||
b'\xe2' : 'a',
|
||||
b'\xe3' : 'a',
|
||||
b'\xe4' : 'a',
|
||||
b'\xe5' : 'a',
|
||||
b'\xe6' : 'ae',
|
||||
b'\xe7' : 'c',
|
||||
b'\xe8' : 'e',
|
||||
b'\xe9' : 'e',
|
||||
b'\xea' : 'e',
|
||||
b'\xeb' : 'e',
|
||||
b'\xec' : 'i',
|
||||
b'\xed' : 'i',
|
||||
b'\xee' : 'i',
|
||||
b'\xef' : 'i',
|
||||
b'\xf0' : 'o',
|
||||
b'\xf1' : 'n',
|
||||
b'\xf2' : 'o',
|
||||
b'\xf3' : 'o',
|
||||
b'\xf4' : 'o',
|
||||
b'\xf5' : 'o',
|
||||
b'\xf6' : 'o',
|
||||
b'\xf7' : '/',
|
||||
b'\xf8' : 'o',
|
||||
b'\xf9' : 'u',
|
||||
b'\xfa' : 'u',
|
||||
b'\xfb' : 'u',
|
||||
b'\xfc' : 'u',
|
||||
b'\xfd' : 'y',
|
||||
b'\xfe' : 'b',
|
||||
b'\xff' : 'y',
|
||||
}
|
||||
|
||||
# A map used when removing rogue Windows-1252/ISO-8859-1
|
||||
# characters in otherwise UTF-8 documents.
|
||||
#
|
||||
# Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in
|
||||
# Windows-1252.
|
||||
WINDOWS_1252_TO_UTF8 = {
|
||||
0x80 : b'\xe2\x82\xac', # €
|
||||
0x82 : b'\xe2\x80\x9a', # ‚
|
||||
0x83 : b'\xc6\x92', # ƒ
|
||||
0x84 : b'\xe2\x80\x9e', # „
|
||||
0x85 : b'\xe2\x80\xa6', # …
|
||||
0x86 : b'\xe2\x80\xa0', # †
|
||||
0x87 : b'\xe2\x80\xa1', # ‡
|
||||
0x88 : b'\xcb\x86', # ˆ
|
||||
0x89 : b'\xe2\x80\xb0', # ‰
|
||||
0x8a : b'\xc5\xa0', # Š
|
||||
0x8b : b'\xe2\x80\xb9', # ‹
|
||||
0x8c : b'\xc5\x92', # Œ
|
||||
0x8e : b'\xc5\xbd', # Ž
|
||||
0x91 : b'\xe2\x80\x98', # ‘
|
||||
0x92 : b'\xe2\x80\x99', # ’
|
||||
0x93 : b'\xe2\x80\x9c', # “
|
||||
0x94 : b'\xe2\x80\x9d', # ”
|
||||
0x95 : b'\xe2\x80\xa2', # •
|
||||
0x96 : b'\xe2\x80\x93', # –
|
||||
0x97 : b'\xe2\x80\x94', # —
|
||||
0x98 : b'\xcb\x9c', # ˜
|
||||
0x99 : b'\xe2\x84\xa2', # ™
|
||||
0x9a : b'\xc5\xa1', # š
|
||||
0x9b : b'\xe2\x80\xba', # ›
|
||||
0x9c : b'\xc5\x93', # œ
|
||||
0x9e : b'\xc5\xbe', # ž
|
||||
0x9f : b'\xc5\xb8', # Ÿ
|
||||
0xa0 : b'\xc2\xa0', #
|
||||
0xa1 : b'\xc2\xa1', # ¡
|
||||
0xa2 : b'\xc2\xa2', # ¢
|
||||
0xa3 : b'\xc2\xa3', # £
|
||||
0xa4 : b'\xc2\xa4', # ¤
|
||||
0xa5 : b'\xc2\xa5', # ¥
|
||||
0xa6 : b'\xc2\xa6', # ¦
|
||||
0xa7 : b'\xc2\xa7', # §
|
||||
0xa8 : b'\xc2\xa8', # ¨
|
||||
0xa9 : b'\xc2\xa9', # ©
|
||||
0xaa : b'\xc2\xaa', # ª
|
||||
0xab : b'\xc2\xab', # «
|
||||
0xac : b'\xc2\xac', # ¬
|
||||
0xad : b'\xc2\xad', #
|
||||
0xae : b'\xc2\xae', # ®
|
||||
0xaf : b'\xc2\xaf', # ¯
|
||||
0xb0 : b'\xc2\xb0', # °
|
||||
0xb1 : b'\xc2\xb1', # ±
|
||||
0xb2 : b'\xc2\xb2', # ²
|
||||
0xb3 : b'\xc2\xb3', # ³
|
||||
0xb4 : b'\xc2\xb4', # ´
|
||||
0xb5 : b'\xc2\xb5', # µ
|
||||
0xb6 : b'\xc2\xb6', # ¶
|
||||
0xb7 : b'\xc2\xb7', # ·
|
||||
0xb8 : b'\xc2\xb8', # ¸
|
||||
0xb9 : b'\xc2\xb9', # ¹
|
||||
0xba : b'\xc2\xba', # º
|
||||
0xbb : b'\xc2\xbb', # »
|
||||
0xbc : b'\xc2\xbc', # ¼
|
||||
0xbd : b'\xc2\xbd', # ½
|
||||
0xbe : b'\xc2\xbe', # ¾
|
||||
0xbf : b'\xc2\xbf', # ¿
|
||||
0xc0 : b'\xc3\x80', # À
|
||||
0xc1 : b'\xc3\x81', # Á
|
||||
0xc2 : b'\xc3\x82', # Â
|
||||
0xc3 : b'\xc3\x83', # Ã
|
||||
0xc4 : b'\xc3\x84', # Ä
|
||||
0xc5 : b'\xc3\x85', # Å
|
||||
0xc6 : b'\xc3\x86', # Æ
|
||||
0xc7 : b'\xc3\x87', # Ç
|
||||
0xc8 : b'\xc3\x88', # È
|
||||
0xc9 : b'\xc3\x89', # É
|
||||
0xca : b'\xc3\x8a', # Ê
|
||||
0xcb : b'\xc3\x8b', # Ë
|
||||
0xcc : b'\xc3\x8c', # Ì
|
||||
0xcd : b'\xc3\x8d', # Í
|
||||
0xce : b'\xc3\x8e', # Î
|
||||
0xcf : b'\xc3\x8f', # Ï
|
||||
0xd0 : b'\xc3\x90', # Ð
|
||||
0xd1 : b'\xc3\x91', # Ñ
|
||||
0xd2 : b'\xc3\x92', # Ò
|
||||
0xd3 : b'\xc3\x93', # Ó
|
||||
0xd4 : b'\xc3\x94', # Ô
|
||||
0xd5 : b'\xc3\x95', # Õ
|
||||
0xd6 : b'\xc3\x96', # Ö
|
||||
0xd7 : b'\xc3\x97', # ×
|
||||
0xd8 : b'\xc3\x98', # Ø
|
||||
0xd9 : b'\xc3\x99', # Ù
|
||||
0xda : b'\xc3\x9a', # Ú
|
||||
0xdb : b'\xc3\x9b', # Û
|
||||
0xdc : b'\xc3\x9c', # Ü
|
||||
0xdd : b'\xc3\x9d', # Ý
|
||||
0xde : b'\xc3\x9e', # Þ
|
||||
0xdf : b'\xc3\x9f', # ß
|
||||
0xe0 : b'\xc3\xa0', # à
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe2 : b'\xc3\xa2', # â
|
||||
0xe3 : b'\xc3\xa3', # ã
|
||||
0xe4 : b'\xc3\xa4', # ä
|
||||
0xe5 : b'\xc3\xa5', # å
|
||||
0xe6 : b'\xc3\xa6', # æ
|
||||
0xe7 : b'\xc3\xa7', # ç
|
||||
0xe8 : b'\xc3\xa8', # è
|
||||
0xe9 : b'\xc3\xa9', # é
|
||||
0xea : b'\xc3\xaa', # ê
|
||||
0xeb : b'\xc3\xab', # ë
|
||||
0xec : b'\xc3\xac', # ì
|
||||
0xed : b'\xc3\xad', # í
|
||||
0xee : b'\xc3\xae', # î
|
||||
0xef : b'\xc3\xaf', # ï
|
||||
0xf0 : b'\xc3\xb0', # ð
|
||||
0xf1 : b'\xc3\xb1', # ñ
|
||||
0xf2 : b'\xc3\xb2', # ò
|
||||
0xf3 : b'\xc3\xb3', # ó
|
||||
0xf4 : b'\xc3\xb4', # ô
|
||||
0xf5 : b'\xc3\xb5', # õ
|
||||
0xf6 : b'\xc3\xb6', # ö
|
||||
0xf7 : b'\xc3\xb7', # ÷
|
||||
0xf8 : b'\xc3\xb8', # ø
|
||||
0xf9 : b'\xc3\xb9', # ù
|
||||
0xfa : b'\xc3\xba', # ú
|
||||
0xfb : b'\xc3\xbb', # û
|
||||
0xfc : b'\xc3\xbc', # ü
|
||||
0xfd : b'\xc3\xbd', # ý
|
||||
0xfe : b'\xc3\xbe', # þ
|
||||
}
|
||||
|
||||
MULTIBYTE_MARKERS_AND_SIZES = [
|
||||
(0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF
|
||||
(0xe0, 0xef, 3), # 3-byte characters start with E0-EF
|
||||
(0xf0, 0xf4, 4), # 4-byte characters start with F0-F4
|
||||
]
|
||||
|
||||
FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0]
|
||||
LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1]
|
||||
|
||||
@classmethod
|
||||
def detwingle(cls, in_bytes, main_encoding="utf8",
|
||||
embedded_encoding="windows-1252"):
|
||||
"""Fix characters from one encoding embedded in some other encoding.
|
||||
|
||||
Currently the only situation supported is Windows-1252 (or its
|
||||
subset ISO-8859-1), embedded in UTF-8.
|
||||
|
||||
The input must be a bytestring. If you've already converted
|
||||
the document to Unicode, you're too late.
|
||||
|
||||
The output is a bytestring in which `embedded_encoding`
|
||||
characters have been converted to their `main_encoding`
|
||||
equivalents.
|
||||
"""
|
||||
if embedded_encoding.replace('_', '-').lower() not in (
|
||||
'windows-1252', 'windows_1252'):
|
||||
raise NotImplementedError(
|
||||
"Windows-1252 and ISO-8859-1 are the only currently supported "
|
||||
"embedded encodings.")
|
||||
|
||||
if main_encoding.lower() not in ('utf8', 'utf-8'):
|
||||
raise NotImplementedError(
|
||||
"UTF-8 is the only currently supported main encoding.")
|
||||
|
||||
byte_chunks = []
|
||||
|
||||
chunk_start = 0
|
||||
pos = 0
|
||||
while pos < len(in_bytes):
|
||||
byte = in_bytes[pos]
|
||||
if not isinstance(byte, int):
|
||||
# Python 2.x
|
||||
byte = ord(byte)
|
||||
if (byte >= cls.FIRST_MULTIBYTE_MARKER
|
||||
and byte <= cls.LAST_MULTIBYTE_MARKER):
|
||||
# This is the start of a UTF-8 multibyte character. Skip
|
||||
# to the end.
|
||||
for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES:
|
||||
if byte >= start and byte <= end:
|
||||
pos += size
|
||||
break
|
||||
elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8:
|
||||
# We found a Windows-1252 character!
|
||||
# Save the string up to this point as a chunk.
|
||||
byte_chunks.append(in_bytes[chunk_start:pos])
|
||||
|
||||
# Now translate the Windows-1252 character into UTF-8
|
||||
# and add it as another, one-byte chunk.
|
||||
byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte])
|
||||
pos += 1
|
||||
chunk_start = pos
|
||||
else:
|
||||
# Go on to the next character.
|
||||
pos += 1
|
||||
if chunk_start == 0:
|
||||
# The string is unchanged.
|
||||
return in_bytes
|
||||
else:
|
||||
# Store the final chunk.
|
||||
byte_chunks.append(in_bytes[chunk_start:])
|
||||
return b''.join(byte_chunks)
|
||||
|
||||
@@ -0,0 +1,204 @@
|
||||
"""Diagnostic functions, mainly for use when doing tech support."""
|
||||
import cProfile
|
||||
from StringIO import StringIO
|
||||
from HTMLParser import HTMLParser
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup, __version__
|
||||
from bs4.builder import builder_registry
|
||||
|
||||
import os
|
||||
import pstats
|
||||
import random
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
import sys
|
||||
import cProfile
|
||||
|
||||
def diagnose(data):
|
||||
"""Diagnostic suite for isolating common problems."""
|
||||
print "Diagnostic running on Beautiful Soup %s" % __version__
|
||||
print "Python version %s" % sys.version
|
||||
|
||||
basic_parsers = ["html.parser", "html5lib", "lxml"]
|
||||
for name in basic_parsers:
|
||||
for builder in builder_registry.builders:
|
||||
if name in builder.features:
|
||||
break
|
||||
else:
|
||||
basic_parsers.remove(name)
|
||||
print (
|
||||
"I noticed that %s is not installed. Installing it may help." %
|
||||
name)
|
||||
|
||||
if 'lxml' in basic_parsers:
|
||||
basic_parsers.append(["lxml", "xml"])
|
||||
from lxml import etree
|
||||
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
|
||||
|
||||
if 'html5lib' in basic_parsers:
|
||||
import html5lib
|
||||
print "Found html5lib version %s" % html5lib.__version__
|
||||
|
||||
if hasattr(data, 'read'):
|
||||
data = data.read()
|
||||
elif os.path.exists(data):
|
||||
print '"%s" looks like a filename. Reading data from the file.' % data
|
||||
data = open(data).read()
|
||||
elif data.startswith("http:") or data.startswith("https:"):
|
||||
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
|
||||
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
|
||||
return
|
||||
print
|
||||
|
||||
for parser in basic_parsers:
|
||||
print "Trying to parse your markup with %s" % parser
|
||||
success = False
|
||||
try:
|
||||
soup = BeautifulSoup(data, parser)
|
||||
success = True
|
||||
except Exception, e:
|
||||
print "%s could not parse the markup." % parser
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print "Here's what %s did with the markup:" % parser
|
||||
print soup.prettify()
|
||||
|
||||
print "-" * 80
|
||||
|
||||
def lxml_trace(data, html=True, **kwargs):
|
||||
"""Print out the lxml events that occur during parsing.
|
||||
|
||||
This lets you see how lxml parses a document when no Beautiful
|
||||
Soup code is running.
|
||||
"""
|
||||
from lxml import etree
|
||||
for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
|
||||
print("%s, %4s, %s" % (event, element.tag, element.text))
|
||||
|
||||
class AnnouncingParser(HTMLParser):
|
||||
"""Announces HTMLParser parse events, without doing anything else."""
|
||||
|
||||
def _p(self, s):
|
||||
print(s)
|
||||
|
||||
def handle_starttag(self, name, attrs):
|
||||
self._p("%s START" % name)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self._p("%s END" % name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self._p("%s DATA" % data)
|
||||
|
||||
def handle_charref(self, name):
|
||||
self._p("%s CHARREF" % name)
|
||||
|
||||
def handle_entityref(self, name):
|
||||
self._p("%s ENTITYREF" % name)
|
||||
|
||||
def handle_comment(self, data):
|
||||
self._p("%s COMMENT" % data)
|
||||
|
||||
def handle_decl(self, data):
|
||||
self._p("%s DECL" % data)
|
||||
|
||||
def unknown_decl(self, data):
|
||||
self._p("%s UNKNOWN-DECL" % data)
|
||||
|
||||
def handle_pi(self, data):
|
||||
self._p("%s PI" % data)
|
||||
|
||||
def htmlparser_trace(data):
|
||||
"""Print out the HTMLParser events that occur during parsing.
|
||||
|
||||
This lets you see how HTMLParser parses a document when no
|
||||
Beautiful Soup code is running.
|
||||
"""
|
||||
parser = AnnouncingParser()
|
||||
parser.feed(data)
|
||||
|
||||
_vowels = "aeiou"
|
||||
_consonants = "bcdfghjklmnpqrstvwxyz"
|
||||
|
||||
def rword(length=5):
|
||||
"Generate a random word-like string."
|
||||
s = ''
|
||||
for i in range(length):
|
||||
if i % 2 == 0:
|
||||
t = _consonants
|
||||
else:
|
||||
t = _vowels
|
||||
s += random.choice(t)
|
||||
return s
|
||||
|
||||
def rsentence(length=4):
|
||||
"Generate a random sentence-like string."
|
||||
return " ".join(rword(random.randint(4,9)) for i in range(length))
|
||||
|
||||
def rdoc(num_elements=1000):
|
||||
"""Randomly generate an invalid HTML document."""
|
||||
tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
|
||||
elements = []
|
||||
for i in range(num_elements):
|
||||
choice = random.randint(0,3)
|
||||
if choice == 0:
|
||||
# New tag.
|
||||
tag_name = random.choice(tag_names)
|
||||
elements.append("<%s>" % tag_name)
|
||||
elif choice == 1:
|
||||
elements.append(rsentence(random.randint(1,4)))
|
||||
elif choice == 2:
|
||||
# Close a tag.
|
||||
tag_name = random.choice(tag_names)
|
||||
elements.append("</%s>" % tag_name)
|
||||
return "<html>" + "\n".join(elements) + "</html>"
|
||||
|
||||
def benchmark_parsers(num_elements=100000):
|
||||
"""Very basic head-to-head performance benchmark."""
|
||||
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
|
||||
data = rdoc(num_elements)
|
||||
print "Generated a large invalid HTML document (%d bytes)." % len(data)
|
||||
|
||||
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
|
||||
success = False
|
||||
try:
|
||||
a = time.time()
|
||||
soup = BeautifulSoup(data, parser)
|
||||
b = time.time()
|
||||
success = True
|
||||
except Exception, e:
|
||||
print "%s could not parse the markup." % parser
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
|
||||
|
||||
from lxml import etree
|
||||
a = time.time()
|
||||
etree.HTML(data)
|
||||
b = time.time()
|
||||
print "Raw lxml parsed the markup in %.2fs." % (b-a)
|
||||
|
||||
import html5lib
|
||||
parser = html5lib.HTMLParser()
|
||||
a = time.time()
|
||||
parser.parse(data)
|
||||
b = time.time()
|
||||
print "Raw html5lib parsed the markup in %.2fs." % (b-a)
|
||||
|
||||
def profile(num_elements=100000, parser="lxml"):
|
||||
|
||||
filehandle = tempfile.NamedTemporaryFile()
|
||||
filename = filehandle.name
|
||||
|
||||
data = rdoc(num_elements)
|
||||
vars = dict(bs4=bs4, data=data, parser=parser)
|
||||
cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
|
||||
|
||||
stats = pstats.Stats(filename)
|
||||
# stats.strip_dirs()
|
||||
stats.sort_stats("cumulative")
|
||||
stats.print_stats('_html5lib|bs4', 50)
|
||||
|
||||
if __name__ == '__main__':
|
||||
diagnose(sys.stdin.read())
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,592 @@
|
||||
"""Helper classes for tests."""
|
||||
|
||||
import copy
|
||||
import functools
|
||||
import unittest
|
||||
from unittest import TestCase
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
Comment,
|
||||
ContentMetaAttributeValue,
|
||||
Doctype,
|
||||
SoupStrainer,
|
||||
)
|
||||
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
default_builder = HTMLParserTreeBuilder
|
||||
|
||||
|
||||
class SoupTest(unittest.TestCase):
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return default_builder()
|
||||
|
||||
def soup(self, markup, **kwargs):
|
||||
"""Build a Beautiful Soup object from markup."""
|
||||
builder = kwargs.pop('builder', self.default_builder)
|
||||
return BeautifulSoup(markup, builder=builder, **kwargs)
|
||||
|
||||
def document_for(self, markup):
|
||||
"""Turn an HTML fragment into a document.
|
||||
|
||||
The details depend on the builder.
|
||||
"""
|
||||
return self.default_builder.test_fragment_to_document(markup)
|
||||
|
||||
def assertSoupEquals(self, to_parse, compare_parsed_to=None):
|
||||
builder = self.default_builder
|
||||
obj = BeautifulSoup(to_parse, builder=builder)
|
||||
if compare_parsed_to is None:
|
||||
compare_parsed_to = to_parse
|
||||
|
||||
self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
|
||||
|
||||
|
||||
class HTMLTreeBuilderSmokeTest(object):
|
||||
|
||||
"""A basic test of a treebuilder's competence.
|
||||
|
||||
Any HTML treebuilder, present or future, should be able to pass
|
||||
these tests. With invalid markup, there's room for interpretation,
|
||||
and different parsers can handle it differently. But with the
|
||||
markup in these tests, there's not much room for interpretation.
|
||||
"""
|
||||
|
||||
def assertDoctypeHandled(self, doctype_fragment):
|
||||
"""Assert that a given doctype string is handled correctly."""
|
||||
doctype_str, soup = self._document_with_doctype(doctype_fragment)
|
||||
|
||||
# Make sure a Doctype object was created.
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual(doctype.__class__, Doctype)
|
||||
self.assertEqual(doctype, doctype_fragment)
|
||||
self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
|
||||
|
||||
# Make sure that the doctype was correctly associated with the
|
||||
# parse tree and that the rest of the document parsed.
|
||||
self.assertEqual(soup.p.contents[0], 'foo')
|
||||
|
||||
def _document_with_doctype(self, doctype_fragment):
|
||||
"""Generate and parse a document with the given doctype."""
|
||||
doctype = '<!DOCTYPE %s>' % doctype_fragment
|
||||
markup = doctype + '\n<p>foo</p>'
|
||||
soup = self.soup(markup)
|
||||
return doctype, soup
|
||||
|
||||
def test_normal_doctypes(self):
|
||||
"""Make sure normal, everyday HTML doctypes are handled correctly."""
|
||||
self.assertDoctypeHandled("html")
|
||||
self.assertDoctypeHandled(
|
||||
'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
|
||||
|
||||
def test_empty_doctype(self):
|
||||
soup = self.soup("<!DOCTYPE>")
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual("", doctype.strip())
|
||||
|
||||
def test_public_doctype_with_url(self):
|
||||
doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
|
||||
self.assertDoctypeHandled(doctype)
|
||||
|
||||
def test_system_doctype(self):
|
||||
self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
|
||||
|
||||
def test_namespaced_system_doctype(self):
|
||||
# We can handle a namespaced doctype with a system ID.
|
||||
self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
|
||||
|
||||
def test_namespaced_public_doctype(self):
|
||||
# Test a namespaced doctype with a public id.
|
||||
self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out more or less the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>Hello.</title></head>
|
||||
<body>Goodbye.</body>
|
||||
</html>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
soup.encode("utf-8").replace(b"\n", b""),
|
||||
markup.replace(b"\n", b""))
|
||||
|
||||
def test_deepcopy(self):
|
||||
"""Make sure you can copy the tree builder.
|
||||
|
||||
This is important because the builder is part of a
|
||||
BeautifulSoup object, and we want to be able to copy that.
|
||||
"""
|
||||
copy.deepcopy(self.default_builder)
|
||||
|
||||
def test_p_tag_is_never_empty_element(self):
|
||||
"""A <p> tag is never designated as an empty-element tag.
|
||||
|
||||
Even if the markup shows it as an empty-element tag, it
|
||||
shouldn't be presented that way.
|
||||
"""
|
||||
soup = self.soup("<p/>")
|
||||
self.assertFalse(soup.p.is_empty_element)
|
||||
self.assertEqual(str(soup.p), "<p></p>")
|
||||
|
||||
def test_unclosed_tags_get_closed(self):
|
||||
"""A tag that's not closed by the end of the document should be closed.
|
||||
|
||||
This applies to all tags except empty-element tags.
|
||||
"""
|
||||
self.assertSoupEquals("<p>", "<p></p>")
|
||||
self.assertSoupEquals("<b>", "<b></b>")
|
||||
|
||||
self.assertSoupEquals("<br>", "<br/>")
|
||||
|
||||
def test_br_is_always_empty_element_tag(self):
|
||||
"""A <br> tag is designated as an empty-element tag.
|
||||
|
||||
Some parsers treat <br></br> as one <br/> tag, some parsers as
|
||||
two tags, but it should always be an empty-element tag.
|
||||
"""
|
||||
soup = self.soup("<br></br>")
|
||||
self.assertTrue(soup.br.is_empty_element)
|
||||
self.assertEqual(str(soup.br), "<br/>")
|
||||
|
||||
def test_nested_formatting_elements(self):
|
||||
self.assertSoupEquals("<em><em></em></em>")
|
||||
|
||||
def test_comment(self):
|
||||
# Comments are represented as Comment objects.
|
||||
markup = "<p>foo<!--foobar-->baz</p>"
|
||||
self.assertSoupEquals(markup)
|
||||
|
||||
soup = self.soup(markup)
|
||||
comment = soup.find(text="foobar")
|
||||
self.assertEqual(comment.__class__, Comment)
|
||||
|
||||
# The comment is properly integrated into the tree.
|
||||
foo = soup.find(text="foo")
|
||||
self.assertEqual(comment, foo.next_element)
|
||||
baz = soup.find(text="baz")
|
||||
self.assertEqual(comment, baz.previous_element)
|
||||
|
||||
def test_preserved_whitespace_in_pre_and_textarea(self):
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags."""
|
||||
self.assertSoupEquals("<pre> </pre>")
|
||||
self.assertSoupEquals("<textarea> woo </textarea>")
|
||||
|
||||
def test_nested_inline_elements(self):
|
||||
"""Inline elements can be nested indefinitely."""
|
||||
b_tag = "<b>Inside a B tag</b>"
|
||||
self.assertSoupEquals(b_tag)
|
||||
|
||||
nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
|
||||
self.assertSoupEquals(nested_b_tag)
|
||||
|
||||
double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
|
||||
self.assertSoupEquals(nested_b_tag)
|
||||
|
||||
def test_nested_block_level_elements(self):
|
||||
"""Block elements can be nested."""
|
||||
soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
|
||||
blockquote = soup.blockquote
|
||||
self.assertEqual(blockquote.p.b.string, 'Foo')
|
||||
self.assertEqual(blockquote.b.string, 'Foo')
|
||||
|
||||
def test_correctly_nested_tables(self):
|
||||
"""One table can go inside another one."""
|
||||
markup = ('<table id="1">'
|
||||
'<tr>'
|
||||
"<td>Here's another table:"
|
||||
'<table id="2">'
|
||||
'<tr><td>foo</td></tr>'
|
||||
'</table></td>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
markup,
|
||||
'<table id="1"><tr><td>Here\'s another table:'
|
||||
'<table id="2"><tr><td>foo</td></tr></table>'
|
||||
'</td></tr></table>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
"<table><thead><tr><td>Foo</td></tr></thead>"
|
||||
"<tbody><tr><td>Bar</td></tr></tbody>"
|
||||
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
|
||||
|
||||
def test_deeply_nested_multivalued_attribute(self):
|
||||
# html5lib can set the attributes of the same tag many times
|
||||
# as it rearranges the tree. This has caused problems with
|
||||
# multivalued attributes.
|
||||
markup = '<table><div><div class="css"></div></div></table>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(["css"], soup.div.div['class'])
|
||||
|
||||
def test_angle_brackets_in_attribute_values_are_escaped(self):
|
||||
self.assertSoupEquals('<a b="<a>"></a>', '<a b="<a>"></a>')
|
||||
|
||||
def test_entities_in_attributes_converted_to_unicode(self):
|
||||
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
|
||||
def test_entities_in_text_converted_to_unicode(self):
|
||||
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
|
||||
def test_quot_entity_converted_to_quotation_mark(self):
|
||||
self.assertSoupEquals("<p>I said "good day!"</p>",
|
||||
'<p>I said "good day!"</p>')
|
||||
|
||||
def test_out_of_range_entity(self):
|
||||
expect = u"\N{REPLACEMENT CHARACTER}"
|
||||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
|
||||
def test_multipart_strings(self):
|
||||
"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
|
||||
soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
|
||||
self.assertEqual("p", soup.h2.string.next_element.name)
|
||||
self.assertEqual("p", soup.p.name)
|
||||
|
||||
def test_basic_namespaces(self):
|
||||
"""Parsers don't need to *understand* namespaces, but at the
|
||||
very least they should not choke on namespaces or lose
|
||||
data."""
|
||||
|
||||
markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode())
|
||||
html = soup.html
|
||||
self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
|
||||
self.assertEqual(
|
||||
'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
|
||||
self.assertEqual(
|
||||
'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
|
||||
|
||||
def test_multivalued_attribute_value_becomes_list(self):
|
||||
markup = b'<a class="foo bar">'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(['foo', 'bar'], soup.a['class'])
|
||||
|
||||
#
|
||||
# Generally speaking, tests below this point are more tests of
|
||||
# Beautiful Soup than tests of the tree builders. But parsers are
|
||||
# weird, so we run these tests separately for every tree builder
|
||||
# to detect any differences between them.
|
||||
#
|
||||
|
||||
def test_can_parse_unicode_document(self):
|
||||
# A seemingly innocuous document... but it's in Unicode! And
|
||||
# it contains characters that can't be represented in the
|
||||
# encoding found in the declaration! The horror!
|
||||
markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
|
||||
|
||||
def test_soupstrainer(self):
|
||||
"""Parsers should be able to work with SoupStrainers."""
|
||||
strainer = SoupStrainer("b")
|
||||
soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
|
||||
parse_only=strainer)
|
||||
self.assertEqual(soup.decode(), "<b>bold</b>")
|
||||
|
||||
def test_single_quote_attribute_values_become_double_quotes(self):
|
||||
self.assertSoupEquals("<foo attr='bar'></foo>",
|
||||
'<foo attr="bar"></foo>')
|
||||
|
||||
def test_attribute_values_with_nested_quotes_are_left_alone(self):
|
||||
text = """<foo attr='bar "brawls" happen'>a</foo>"""
|
||||
self.assertSoupEquals(text)
|
||||
|
||||
def test_attribute_values_with_double_nested_quotes_get_quoted(self):
|
||||
text = """<foo attr='bar "brawls" happen'>a</foo>"""
|
||||
soup = self.soup(text)
|
||||
soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
|
||||
self.assertSoupEquals(
|
||||
soup.foo.decode(),
|
||||
"""<foo attr="Brawls happen at "Bob\'s Bar"">a</foo>""")
|
||||
|
||||
def test_ampersand_in_attribute_value_gets_escaped(self):
|
||||
self.assertSoupEquals('<this is="really messed up & stuff"></this>',
|
||||
'<this is="really messed up & stuff"></this>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
'<a href="http://example.org?a=1&b=2;3">foo</a>',
|
||||
'<a href="http://example.org?a=1&b=2;3">foo</a>')
|
||||
|
||||
def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
|
||||
self.assertSoupEquals('<a href="http://example.org?a=1&b=2;3"></a>')
|
||||
|
||||
def test_entities_in_strings_converted_during_parsing(self):
|
||||
# Both XML and HTML entities are converted to Unicode characters
|
||||
# during parsing.
|
||||
text = "<p><<sacré bleu!>></p>"
|
||||
expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>"
|
||||
self.assertSoupEquals(text, expected)
|
||||
|
||||
def test_smart_quotes_converted_on_the_way_in(self):
|
||||
# Microsoft smart quotes are converted to Unicode characters during
|
||||
# parsing.
|
||||
quote = b"<p>\x91Foo\x92</p>"
|
||||
soup = self.soup(quote)
|
||||
self.assertEqual(
|
||||
soup.p.string,
|
||||
u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
|
||||
|
||||
def test_non_breaking_spaces_converted_on_the_way_in(self):
|
||||
soup = self.soup("<a> </a>")
|
||||
self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
|
||||
|
||||
def test_entities_converted_on_the_way_out(self):
|
||||
text = "<p><<sacré bleu!>></p>"
|
||||
expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8")
|
||||
soup = self.soup(text)
|
||||
self.assertEqual(soup.p.encode("utf-8"), expected)
|
||||
|
||||
def test_real_iso_latin_document(self):
|
||||
# Smoke test of interrelated functionality, using an
|
||||
# easy-to-understand document.
|
||||
|
||||
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
|
||||
unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
|
||||
|
||||
# That's because we're going to encode it into ISO-Latin-1, and use
|
||||
# that to test.
|
||||
iso_latin_html = unicode_html.encode("iso-8859-1")
|
||||
|
||||
# Parse the ISO-Latin-1 HTML.
|
||||
soup = self.soup(iso_latin_html)
|
||||
# Encode it to UTF-8.
|
||||
result = soup.encode("utf-8")
|
||||
|
||||
# What do we expect the result to look like? Well, it would
|
||||
# look like unicode_html, except that the META tag would say
|
||||
# UTF-8 instead of ISO-Latin-1.
|
||||
expected = unicode_html.replace("ISO-Latin-1", "utf-8")
|
||||
|
||||
# And, of course, it would be in UTF-8, not Unicode.
|
||||
expected = expected.encode("utf-8")
|
||||
|
||||
# Ta-da!
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def test_real_shift_jis_document(self):
|
||||
# Smoke test to make sure the parser can handle a document in
|
||||
# Shift-JIS encoding, without choking.
|
||||
shift_jis_html = (
|
||||
b'<html><head></head><body><pre>'
|
||||
b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
|
||||
b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
|
||||
b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
|
||||
b'</pre></body></html>')
|
||||
unicode_html = shift_jis_html.decode("shift-jis")
|
||||
soup = self.soup(unicode_html)
|
||||
|
||||
# Make sure the parse tree is correctly encoded to various
|
||||
# encodings.
|
||||
self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
|
||||
self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
|
||||
|
||||
def test_real_hebrew_document(self):
|
||||
# A real-world test to make sure we can convert ISO-8859-9 (a
|
||||
# Hebrew encoding) to UTF-8.
|
||||
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
|
||||
soup = self.soup(
|
||||
hebrew_document, from_encoding="iso8859-8")
|
||||
self.assertEqual(soup.original_encoding, 'iso8859-8')
|
||||
self.assertEqual(
|
||||
soup.encode('utf-8'),
|
||||
hebrew_document.decode("iso8859-8").encode("utf-8"))
|
||||
|
||||
def test_meta_tag_reflects_current_encoding(self):
|
||||
# Here's the <meta> tag saying that a document is
|
||||
# encoded in Shift-JIS.
|
||||
meta_tag = ('<meta content="text/html; charset=x-sjis" '
|
||||
'http-equiv="Content-type"/>')
|
||||
|
||||
# Here's a document incorporating that meta tag.
|
||||
shift_jis_html = (
|
||||
'<html><head>\n%s\n'
|
||||
'<meta http-equiv="Content-language" content="ja"/>'
|
||||
'</head><body>Shift-JIS markup goes here.') % meta_tag
|
||||
soup = self.soup(shift_jis_html)
|
||||
|
||||
# Parse the document, and the charset is seemingly unaffected.
|
||||
parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
|
||||
content = parsed_meta['content']
|
||||
self.assertEqual('text/html; charset=x-sjis', content)
|
||||
|
||||
# But that value is actually a ContentMetaAttributeValue object.
|
||||
self.assertTrue(isinstance(content, ContentMetaAttributeValue))
|
||||
|
||||
# And it will take on a value that reflects its current
|
||||
# encoding.
|
||||
self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
|
||||
|
||||
# For the rest of the story, see TestSubstitutions in
|
||||
# test_tree.py.
|
||||
|
||||
def test_html5_style_meta_tag_reflects_current_encoding(self):
|
||||
# Here's the <meta> tag saying that a document is
|
||||
# encoded in Shift-JIS.
|
||||
meta_tag = ('<meta id="encoding" charset="x-sjis" />')
|
||||
|
||||
# Here's a document incorporating that meta tag.
|
||||
shift_jis_html = (
|
||||
'<html><head>\n%s\n'
|
||||
'<meta http-equiv="Content-language" content="ja"/>'
|
||||
'</head><body>Shift-JIS markup goes here.') % meta_tag
|
||||
soup = self.soup(shift_jis_html)
|
||||
|
||||
# Parse the document, and the charset is seemingly unaffected.
|
||||
parsed_meta = soup.find('meta', id="encoding")
|
||||
charset = parsed_meta['charset']
|
||||
self.assertEqual('x-sjis', charset)
|
||||
|
||||
# But that value is actually a CharsetMetaAttributeValue object.
|
||||
self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
|
||||
|
||||
# And it will take on a value that reflects its current
|
||||
# encoding.
|
||||
self.assertEqual('utf8', charset.encode("utf8"))
|
||||
|
||||
def test_tag_with_no_attributes_can_have_attributes_added(self):
|
||||
data = self.soup("<a>text</a>")
|
||||
data.a['foo'] = 'bar'
|
||||
self.assertEqual('<a foo="bar">text</a>', data.a.decode())
|
||||
|
||||
class XMLTreeBuilderSmokeTest(object):
|
||||
|
||||
def test_docstring_generated(self):
|
||||
soup = self.soup("<root/>")
|
||||
self.assertEqual(
|
||||
soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out *exactly* the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>Hello.</title></head>
|
||||
<body>Goodbye.</body>
|
||||
</html>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
soup.encode("utf-8"), markup)
|
||||
|
||||
def test_formatter_processes_script_tag_for_xml_documents(self):
|
||||
doc = """
|
||||
<script type="text/javascript">
|
||||
</script>
|
||||
"""
|
||||
soup = BeautifulSoup(doc, "xml")
|
||||
# lxml would have stripped this while parsing, but we can add
|
||||
# it later.
|
||||
soup.script.string = 'console.log("< < hey > > ");'
|
||||
encoded = soup.encode()
|
||||
self.assertTrue(b"< < hey > >" in encoded)
|
||||
|
||||
def test_can_parse_unicode_document(self):
|
||||
markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
|
||||
|
||||
def test_popping_namespaced_tag(self):
|
||||
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
unicode(soup.rss), markup)
|
||||
|
||||
def test_docstring_includes_correct_encoding(self):
|
||||
soup = self.soup("<root/>")
|
||||
self.assertEqual(
|
||||
soup.encode("latin1"),
|
||||
b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
|
||||
|
||||
def test_large_xml_document(self):
|
||||
"""A large XML document should come out the same as it went in."""
|
||||
markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
|
||||
+ b'0' * (2**12)
|
||||
+ b'</root>')
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(soup.encode("utf-8"), markup)
|
||||
|
||||
|
||||
def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
|
||||
self.assertSoupEquals("<p>", "<p/>")
|
||||
self.assertSoupEquals("<p>foo</p>")
|
||||
|
||||
def test_namespaces_are_preserved(self):
|
||||
markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
|
||||
soup = self.soup(markup)
|
||||
root = soup.root
|
||||
self.assertEqual("http://example.com/", root['xmlns:a'])
|
||||
self.assertEqual("http://example.net/", root['xmlns:b'])
|
||||
|
||||
def test_closing_namespaced_tag(self):
|
||||
markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.p), markup)
|
||||
|
||||
def test_namespaced_attributes(self):
|
||||
markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
def test_namespaced_attributes_xml_namespace(self):
|
||||
markup = '<foo xml:lang="fr">bar</foo>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
||||
"""Smoke test for a tree builder that supports HTML5."""
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
# Since XHTML is not HTML5, HTML5 parsers are not tested to handle
|
||||
# XHTML documents in any particular way.
|
||||
pass
|
||||
|
||||
def test_html_tags_have_namespace(self):
|
||||
markup = "<a>"
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
|
||||
|
||||
def test_svg_tags_have_namespace(self):
|
||||
markup = '<svg><circle/></svg>'
|
||||
soup = self.soup(markup)
|
||||
namespace = "http://www.w3.org/2000/svg"
|
||||
self.assertEqual(namespace, soup.svg.namespace)
|
||||
self.assertEqual(namespace, soup.circle.namespace)
|
||||
|
||||
|
||||
def test_mathml_tags_have_namespace(self):
|
||||
markup = '<math><msqrt>5</msqrt></math>'
|
||||
soup = self.soup(markup)
|
||||
namespace = 'http://www.w3.org/1998/Math/MathML'
|
||||
self.assertEqual(namespace, soup.math.namespace)
|
||||
self.assertEqual(namespace, soup.msqrt.namespace)
|
||||
|
||||
def test_xml_declaration_becomes_comment(self):
|
||||
markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertTrue(isinstance(soup.contents[0], Comment))
|
||||
self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
|
||||
self.assertEqual("html", soup.contents[0].next_element.name)
|
||||
|
||||
def skipIf(condition, reason):
|
||||
def nothing(test, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def decorator(test_item):
|
||||
if condition:
|
||||
return nothing
|
||||
else:
|
||||
return test_item
|
||||
|
||||
return decorator
|
||||
@@ -0,0 +1 @@
|
||||
"The beautifulsoup tests."
|
||||
@@ -0,0 +1,141 @@
|
||||
"""Tests of the builder registry."""
|
||||
|
||||
import unittest
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.builder import (
|
||||
builder_registry as registry,
|
||||
HTMLParserTreeBuilder,
|
||||
TreeBuilderRegistry,
|
||||
)
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError:
|
||||
HTML5LIB_PRESENT = False
|
||||
|
||||
try:
|
||||
from bs4.builder import (
|
||||
LXMLTreeBuilderForXML,
|
||||
LXMLTreeBuilder,
|
||||
)
|
||||
LXML_PRESENT = True
|
||||
except ImportError:
|
||||
LXML_PRESENT = False
|
||||
|
||||
|
||||
class BuiltInRegistryTest(unittest.TestCase):
|
||||
"""Test the built-in registry with the default builders registered."""
|
||||
|
||||
def test_combination(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('fast', 'html'),
|
||||
LXMLTreeBuilder)
|
||||
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('permissive', 'xml'),
|
||||
LXMLTreeBuilderForXML)
|
||||
self.assertEqual(registry.lookup('strict', 'html'),
|
||||
HTMLParserTreeBuilder)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html5lib', 'html'),
|
||||
HTML5TreeBuilder)
|
||||
|
||||
def test_lookup_by_markup_type(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
|
||||
self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
|
||||
else:
|
||||
self.assertEqual(registry.lookup('xml'), None)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
|
||||
else:
|
||||
self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
|
||||
|
||||
def test_named_library(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('lxml', 'xml'),
|
||||
LXMLTreeBuilderForXML)
|
||||
self.assertEqual(registry.lookup('lxml', 'html'),
|
||||
LXMLTreeBuilder)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html5lib'),
|
||||
HTML5TreeBuilder)
|
||||
|
||||
self.assertEqual(registry.lookup('html.parser'),
|
||||
HTMLParserTreeBuilder)
|
||||
|
||||
def test_beautifulsoup_constructor_does_lookup(self):
|
||||
# You can pass in a string.
|
||||
BeautifulSoup("", features="html")
|
||||
# Or a list of strings.
|
||||
BeautifulSoup("", features=["html", "fast"])
|
||||
|
||||
# You'll get an exception if BS can't find an appropriate
|
||||
# builder.
|
||||
self.assertRaises(ValueError, BeautifulSoup,
|
||||
"", features="no-such-feature")
|
||||
|
||||
class RegistryTest(unittest.TestCase):
|
||||
"""Test the TreeBuilderRegistry class in general."""
|
||||
|
||||
def setUp(self):
|
||||
self.registry = TreeBuilderRegistry()
|
||||
|
||||
def builder_for_features(self, *feature_list):
|
||||
cls = type('Builder_' + '_'.join(feature_list),
|
||||
(object,), {'features' : feature_list})
|
||||
|
||||
self.registry.register(cls)
|
||||
return cls
|
||||
|
||||
def test_register_with_no_features(self):
|
||||
builder = self.builder_for_features()
|
||||
|
||||
# Since the builder advertises no features, you can't find it
|
||||
# by looking up features.
|
||||
self.assertEqual(self.registry.lookup('foo'), None)
|
||||
|
||||
# But you can find it by doing a lookup with no features, if
|
||||
# this happens to be the only registered builder.
|
||||
self.assertEqual(self.registry.lookup(), builder)
|
||||
|
||||
def test_register_with_features_makes_lookup_succeed(self):
|
||||
builder = self.builder_for_features('foo', 'bar')
|
||||
self.assertEqual(self.registry.lookup('foo'), builder)
|
||||
self.assertEqual(self.registry.lookup('bar'), builder)
|
||||
|
||||
def test_lookup_fails_when_no_builder_implements_feature(self):
|
||||
builder = self.builder_for_features('foo', 'bar')
|
||||
self.assertEqual(self.registry.lookup('baz'), None)
|
||||
|
||||
def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
|
||||
builder1 = self.builder_for_features('foo')
|
||||
builder2 = self.builder_for_features('bar')
|
||||
self.assertEqual(self.registry.lookup(), builder2)
|
||||
|
||||
def test_lookup_fails_when_no_tree_builders_registered(self):
|
||||
self.assertEqual(self.registry.lookup(), None)
|
||||
|
||||
def test_lookup_gets_most_recent_builder_supporting_all_features(self):
|
||||
has_one = self.builder_for_features('foo')
|
||||
has_the_other = self.builder_for_features('bar')
|
||||
has_both_early = self.builder_for_features('foo', 'bar', 'baz')
|
||||
has_both_late = self.builder_for_features('foo', 'bar', 'quux')
|
||||
lacks_one = self.builder_for_features('bar')
|
||||
has_the_other = self.builder_for_features('foo')
|
||||
|
||||
# There are two builders featuring 'foo' and 'bar', but
|
||||
# the one that also features 'quux' was registered later.
|
||||
self.assertEqual(self.registry.lookup('foo', 'bar'),
|
||||
has_both_late)
|
||||
|
||||
# There is only one builder featuring 'foo', 'bar', and 'baz'.
|
||||
self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
|
||||
has_both_early)
|
||||
|
||||
def test_lookup_fails_when_cannot_reconcile_requested_features(self):
|
||||
builder1 = self.builder_for_features('foo', 'bar')
|
||||
builder2 = self.builder_for_features('foo', 'baz')
|
||||
self.assertEqual(self.registry.lookup('bar', 'baz'), None)
|
||||
@@ -0,0 +1,36 @@
|
||||
"Test harness for doctests."
|
||||
|
||||
# pylint: disable-msg=E0611,W0142
|
||||
|
||||
__metaclass__ = type
|
||||
__all__ = [
|
||||
'additional_tests',
|
||||
]
|
||||
|
||||
import atexit
|
||||
import doctest
|
||||
import os
|
||||
#from pkg_resources import (
|
||||
# resource_filename, resource_exists, resource_listdir, cleanup_resources)
|
||||
import unittest
|
||||
|
||||
DOCTEST_FLAGS = (
|
||||
doctest.ELLIPSIS |
|
||||
doctest.NORMALIZE_WHITESPACE |
|
||||
doctest.REPORT_NDIFF)
|
||||
|
||||
|
||||
# def additional_tests():
|
||||
# "Run the doc tests (README.txt and docs/*, if any exist)"
|
||||
# doctest_files = [
|
||||
# os.path.abspath(resource_filename('bs4', 'README.txt'))]
|
||||
# if resource_exists('bs4', 'docs'):
|
||||
# for name in resource_listdir('bs4', 'docs'):
|
||||
# if name.endswith('.txt'):
|
||||
# doctest_files.append(
|
||||
# os.path.abspath(
|
||||
# resource_filename('bs4', 'docs/%s' % name)))
|
||||
# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
|
||||
# atexit.register(cleanup_resources)
|
||||
# return unittest.TestSuite((
|
||||
# doctest.DocFileSuite(*doctest_files, **kwargs)))
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Tests to ensure that the html5lib tree builder generates good trees."""
|
||||
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError, e:
|
||||
HTML5LIB_PRESENT = False
|
||||
from bs4.element import SoupStrainer
|
||||
from bs4.testing import (
|
||||
HTML5TreeBuilderSmokeTest,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
|
||||
@skipIf(
|
||||
not HTML5LIB_PRESENT,
|
||||
"html5lib seems not to be present, not testing its tree builder.")
|
||||
class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
"""See ``HTML5TreeBuilderSmokeTest``."""
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return HTML5TreeBuilder()
|
||||
|
||||
def test_soupstrainer(self):
|
||||
# The html5lib tree builder does not support SoupStrainers.
|
||||
strainer = SoupStrainer("b")
|
||||
markup = "<p>A <b>bold</b> statement.</p>"
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(markup, parse_only=strainer)
|
||||
self.assertEqual(
|
||||
soup.decode(), self.document_for(markup))
|
||||
|
||||
self.assertTrue(
|
||||
"the html5lib tree builder doesn't support parse_only" in
|
||||
str(w[0].message))
|
||||
|
||||
def test_correctly_nested_tables(self):
|
||||
"""html5lib inserts <tbody> tags where other parsers don't."""
|
||||
markup = ('<table id="1">'
|
||||
'<tr>'
|
||||
"<td>Here's another table:"
|
||||
'<table id="2">'
|
||||
'<tr><td>foo</td></tr>'
|
||||
'</table></td>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
markup,
|
||||
'<table id="1"><tbody><tr><td>Here\'s another table:'
|
||||
'<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
|
||||
'</td></tr></tbody></table>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
"<table><thead><tr><td>Foo</td></tr></thead>"
|
||||
"<tbody><tr><td>Bar</td></tr></tbody>"
|
||||
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
|
||||
|
||||
def test_xml_declaration_followed_by_doctype(self):
|
||||
markup = '''<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<body>
|
||||
<p>foo</p>
|
||||
</body>
|
||||
</html>'''
|
||||
soup = self.soup(markup)
|
||||
# Verify that we can reach the <p> tag; this means the tree is connected.
|
||||
self.assertEqual(b"<p>foo</p>", soup.p.encode())
|
||||
|
||||
def test_reparented_markup(self):
|
||||
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
|
||||
|
||||
def test_reparented_markup_ends_with_whitespace(self):
|
||||
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
@@ -0,0 +1,19 @@
|
||||
"""Tests to ensure that the html.parser tree builder generates good
|
||||
trees."""
|
||||
|
||||
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
|
||||
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return HTMLParserTreeBuilder()
|
||||
|
||||
def test_namespaced_system_doctype(self):
|
||||
# html.parser can't handle namespaced doctypes, so skip this one.
|
||||
pass
|
||||
|
||||
def test_namespaced_public_doctype(self):
|
||||
# html.parser can't handle namespaced doctypes, so skip this one.
|
||||
pass
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user