Compare commits
471 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1b96dbae3d | |||
| 244e183a2b | |||
| 5cb00a0532 | |||
| 09ce46f46a | |||
| 881a23ec7f | |||
| d53da82ddf | |||
| 177d95128f | |||
| 867a162fcf | |||
| fe0291ef55 | |||
| 1a21ab513d | |||
| 1a275e9501 | |||
| 084284d1ee | |||
| 13b087e44b | |||
| 22b318f05e | |||
| a575e40859 | |||
| ef044e4937 | |||
| 1e1f8e7ca0 | |||
| 814395b58e | |||
| 5ac5c3c595 | |||
| 64a8daab76 | |||
| 3fb6017976 | |||
| 9379e84ba2 | |||
| 8eaa468b1c | |||
| a1c3e64bf3 | |||
| e90e1bd0c5 | |||
| 30cec00f0e | |||
| 2a0c1a13ad | |||
| 072aa0883b | |||
| 2e22c585d0 | |||
| 3240b19649 | |||
| 2f4b47e456 | |||
| f735c9128c | |||
| 56e8cb0f44 | |||
| d5253f130c | |||
| 261c6f3c7e | |||
| 2ad59e6592 | |||
| f5cf977788 | |||
| d392707ecf | |||
| cbc57fbc0b | |||
| b32a2ded77 | |||
| e7ee9ae747 | |||
| 97acfb6845 | |||
| 709197a957 | |||
| 7d003cdc3b | |||
| c0266a5b84 | |||
| 5b61c71cdd | |||
| 3423b42a8a | |||
| 942124ac67 | |||
| 58d4534176 | |||
| 93517582d1 | |||
| 75c60c2b60 | |||
| 1fbd9cfd50 | |||
| 2e6843fd78 | |||
| c073de4acd | |||
| dcd85c85d0 | |||
| 6e5bfd162a | |||
| b579fa7804 | |||
| f356313e67 | |||
| 4055debc6f | |||
| fcc907c507 | |||
| 8a90a51182 | |||
| 4c42b3090a | |||
| 626d519c81 | |||
| dae3672a9a | |||
| 640bf5515f | |||
| 476fd09397 | |||
| bfbf12914f | |||
| 91eae536ae | |||
| 404becadba | |||
| d71d33d899 | |||
| 65e72da01e | |||
| 8556bebb1f | |||
| dc5c353b8d | |||
| 9f7f877cf2 | |||
| 9a827b783a | |||
| d2641f045e | |||
| e4ef6dc604 | |||
| c8cc9bb188 | |||
| a21dd3d0c0 | |||
| b16d6658f8 | |||
| 01aab808c3 | |||
| eb1ae54739 | |||
| 5483d02a6f | |||
| 9d434eb1e9 | |||
| 43269befd6 | |||
| d8d2b06c6c | |||
| 1f9a2f6554 | |||
| 940162a8b5 | |||
| 3c2b39453a | |||
| 459cd92017 | |||
| a5aa0a773d | |||
| d1b569fbbe | |||
| 6d609f628b | |||
| 8d5eaf0f8d | |||
| de93b439ca | |||
| d11d9ef03c | |||
| f1fc8e1d82 | |||
| 9a44c37cab | |||
| 25a9e5efdf | |||
| 9352193986 | |||
| 61436ca278 | |||
| 17b6fcc48a | |||
| 9f9c5cf27a | |||
| 8fd38fbb40 | |||
| ac2c9fff38 | |||
| 8dc4877379 | |||
| d22a3a3953 | |||
| 182538d2a7 | |||
| 997c0bc297 | |||
| f9099cd680 | |||
| e8b47c33b6 | |||
| 6618fdd86b | |||
| 0b5ef5e257 | |||
| 4f36e6119c | |||
| 24b58d9615 | |||
| 4621c21907 | |||
| a53f6005b3 | |||
| 8bad1b2dfc | |||
| 856ec02083 | |||
| 45c63bdac7 | |||
| a5202b8eb8 | |||
| 766e47a757 | |||
| 0026ef7db7 | |||
| 368c7927ff | |||
| 1dd1ec3a0d | |||
| 6ed5c83b05 | |||
| 3efd1e56c4 | |||
| 1e18c9e309 | |||
| c79048027c | |||
| b2c981fca1 | |||
| 88af4d608d | |||
| 2008b35e8e | |||
| a082714ad5 | |||
| 2f28fde4e6 | |||
| e3004b9db7 | |||
| b192f4f80d | |||
| 809331b9fd | |||
| 3828c8bf89 | |||
| 4731750684 | |||
| 54f2308944 | |||
| afdd44323e | |||
| 9b88d5814c | |||
| 02a924e97d | |||
| e167439ed0 | |||
| 9f26d5a401 | |||
| d7f72470ec | |||
| abc45b1a2f | |||
| 5bc530deb2 | |||
| 6a206b0c5e | |||
| 2485639e11 | |||
| d056c14b91 | |||
| 834a8dd0a8 | |||
| ea5e4d48d3 | |||
| 2b08a8958a | |||
| 759b09c8d6 | |||
| 0266afe9ab | |||
| 109c5e0703 | |||
| 40a79c2cc4 | |||
| debc425f99 | |||
| 602a1cc8a3 | |||
| d080eae809 | |||
| 631b5033fe | |||
| af8ea6934b | |||
| 19740ae6c2 | |||
| 7b78b71487 | |||
| 86a43a79c8 | |||
| 6035a1bde4 | |||
| a32e952323 | |||
| d55b1c67df | |||
| 103f7bc18b | |||
| e857c223d4 | |||
| ea07997522 | |||
| d492c73f94 | |||
| 3b836d29a2 | |||
| 9248916527 | |||
| 2006ebb244 | |||
| 58c852cdba | |||
| 9e77a8e304 | |||
| e9817f1e0d | |||
| 123dde7b8f | |||
| c1b84eabdb | |||
| c7ececde77 | |||
| 6f305d636e | |||
| d25990895c | |||
| d406ced759 | |||
| b858b56120 | |||
| c94fe81dbf | |||
| a67bbebb84 | |||
| cf577c81e1 | |||
| ad236be02c | |||
| 3412e379d6 | |||
| 95f240ab07 | |||
| 0c8ae3f45b | |||
| fe87944049 | |||
| 2cbe290916 | |||
| a85321a1a9 | |||
| c55071d157 | |||
| 86eac774e7 | |||
| dac6df4282 | |||
| d7918b1714 | |||
| c4de84a23a | |||
| c147c29756 | |||
| 5a4a50bc9d | |||
| 55ea4009c9 | |||
| 536fd7dfe4 | |||
| a1f6568b84 | |||
| 6a9112f03c | |||
| 89b4305ccb | |||
| 8643e6a055 | |||
| e2756e85b7 | |||
| 0f7bc36e86 | |||
| 5e20032976 | |||
| c7dbac05a9 | |||
| a0a5adb807 | |||
| ac6a43f6e5 | |||
| 91f57da735 | |||
| 488ac604f9 | |||
| 70ab3e456f | |||
| d0017d2ab8 | |||
| 9633abc09e | |||
| 8f608acc71 | |||
| dbce582bdf | |||
| 62f03bcf11 | |||
| 530eb9ef66 | |||
| 12509eb93a | |||
| 621623bdb6 | |||
| 497a94e3a5 | |||
| a2f5ce797d | |||
| e17082d27e | |||
| 2eefb8e225 | |||
| 5d9b1a1810 | |||
| f274e76253 | |||
| 3bfef7f67b | |||
| 5d6651e00e | |||
| f0ed0b7c41 | |||
| 0d4bf7b6b3 | |||
| a5c7c656e6 | |||
| fb3a937c81 | |||
| e50820abd0 | |||
| 083084136c | |||
| 0188b81220 | |||
| c7468dbfb5 | |||
| d92ba7125e | |||
| 050d5dd063 | |||
| a860c57bd1 | |||
| 1b0b189c16 | |||
| 7d2b3d6663 | |||
| 2899d68973 | |||
| 0cc8238b1a | |||
| f277751d86 | |||
| 74d63a9144 | |||
| 07f7b4e7fb | |||
| 92fda093f7 | |||
| 714751d2d8 | |||
| 2c949192b2 | |||
| c0e3c6a0eb | |||
| 764484f735 | |||
| 208bd4fcb2 | |||
| 6b17825fa2 | |||
| d20e0bd2c2 | |||
| ba53a5fa93 | |||
| 4d40da5661 | |||
| 4ab157e2a1 | |||
| dbf64d2a2b | |||
| 03d4ee3482 | |||
| 959a061380 | |||
| f5432dfb9e | |||
| 6e2f2fb9d2 | |||
| fb494a911d | |||
| bc9dec659c | |||
| b68cc3f61e | |||
| 0db80add2c | |||
| 2a67632497 | |||
| 5260b28c15 | |||
| 4d365cba22 | |||
| 8174a8efc3 | |||
| a5d8df35b6 | |||
| 0ad429ffaa | |||
| 3108572387 | |||
| 98a406ff9e | |||
| 9257550e56 | |||
| ef19ed0a26 | |||
| 80daa8560d | |||
| 797cc16a91 | |||
| 771e0464d7 | |||
| 715e9c0015 | |||
| d13a0c4fb3 | |||
| 2bb0517264 | |||
| ac174673ef | |||
| dacab5ece7 | |||
| 69a5ef6f18 | |||
| 47be8eef62 | |||
| fe7760e779 | |||
| 18dddaf0a1 | |||
| b32066e6f8 | |||
| eca378c09e | |||
| 2c3e4173f4 | |||
| 488a65055b | |||
| cb94f0c2c6 | |||
| 8dc4cf8d63 | |||
| 82ec5e0d5e | |||
| 91cebd2902 | |||
| cecee18d8e | |||
| 2b1ea2eb6f | |||
| bc67b380e5 | |||
| b7b784f442 | |||
| 6889effbb6 | |||
| ae7865ecb8 | |||
| 83c9d4887b | |||
| 75da4dab70 | |||
| 07fccf9b52 | |||
| 6cfafd60ef | |||
| b24bd740c2 | |||
| 6c81ee7b3a | |||
| cd00194819 | |||
| 0eda52e3b2 | |||
| 56de3b5658 | |||
| b8f31fc36f | |||
| 7354110d2f | |||
| c08335b5a8 | |||
| f4d9a3c65c | |||
| 174b73a5cb | |||
| 5df5123682 | |||
| 1aef828fcd | |||
| 6401183eff | |||
| 82757a2f0c | |||
| 736386bc31 | |||
| 922bed81fa | |||
| 708e8c5b14 | |||
| 1e02082472 | |||
| 9599bcb70f | |||
| dad8460574 | |||
| 021d12963f | |||
| e5599650ac | |||
| 22a1eff98e | |||
| fc00566469 | |||
| 2e05eb91ca | |||
| 7587860c12 | |||
| fabb5dd003 | |||
| 314da8b50f | |||
| 031e035a50 | |||
| 02374575bc | |||
| adef9e1014 | |||
| 5bb3f15332 | |||
| 089e0d5d6c | |||
| c8fbfcbc24 | |||
| a922961621 | |||
| 513bc2ae8b | |||
| 8a1c61ac22 | |||
| 3e1910a28b | |||
| b5e5341436 | |||
| 223ef16583 | |||
| 114312e1e5 | |||
| 1a49159b64 | |||
| d0ee9badb2 | |||
| b9116c30ed | |||
| d7e6436d8d | |||
| c039172880 | |||
| bd5da47370 | |||
| e9aabe0a5e | |||
| f3f09dbb9d | |||
| 3cc8a98f67 | |||
| 31e923c080 | |||
| 39b3b4a0c2 | |||
| 8470daa20f | |||
| e852137baf | |||
| 753c46d9fd | |||
| e06ca730a2 | |||
| f84e84b17b | |||
| 4f927b272b | |||
| 662e1a93a9 | |||
| e25a043457 | |||
| b32f923513 | |||
| ad8898266e | |||
| 51e87bdda5 | |||
| f88677b0f6 | |||
| fc71ec0250 | |||
| ca6089c220 | |||
| 7cc051fd90 | |||
| 5b01fda526 | |||
| 585f6b8a4d | |||
| 81aeba0874 | |||
| d9133e2793 | |||
| 9ef740ae1f | |||
| e54fe71e93 | |||
| 9df878b8e3 | |||
| 1a59c267c1 | |||
| f8a07d983b | |||
| 1f1847f246 | |||
| a32dfd6b37 | |||
| b1cce92e04 | |||
| fdf32439c9 | |||
| fc2208f9e5 | |||
| 1a4eb366bb | |||
| b89c64a2c2 | |||
| 68e8f6e753 | |||
| f15cc4cb3c | |||
| 903273e3ef | |||
| 1c9b744d31 | |||
| 7c0fb29886 | |||
| 2505a7510c | |||
| 0a66db40a2 | |||
| 6c68893979 | |||
| c512eab0b6 | |||
| 3cedd4bd0f | |||
| 0759c5e4c6 | |||
| ad6cf4be79 | |||
| 23c3899fb2 | |||
| 1a6515a660 | |||
| 58815a7650 | |||
| c15ec9fefc | |||
| 0e18d59680 | |||
| 2d88efa5b4 | |||
| b3da7572f3 | |||
| 099ec4e85d | |||
| ff88a15c61 | |||
| 839791b0fa | |||
| 159a533731 | |||
| fb5835baa4 | |||
| a3f05cd597 | |||
| f3af1672f6 | |||
| c984c9849b | |||
| e28d264125 | |||
| 7166ab9502 | |||
| ab242c2ecb | |||
| 6f829dd4c7 | |||
| 3e0602cdf0 | |||
| 67cdebfb67 | |||
| 0f87973742 | |||
| 92317f7730 | |||
| ce936c2553 | |||
| b995f16c34 | |||
| 49c7adcc40 | |||
| 88eee6fe48 | |||
| cbe425d150 | |||
| 1c7d6b7bf8 | |||
| 8323608558 | |||
| 3f8a5ec125 | |||
| 464b1695a9 | |||
| d85602612b | |||
| 59440d251b | |||
| d774f09427 | |||
| 45be650db9 | |||
| d54847803f | |||
| ce3b66eda7 | |||
| 5b6bcc7d12 | |||
| 24d4c2ae2c | |||
| 98e451d57d | |||
| 8c491c45be | |||
| 6f271c5638 | |||
| f9c083ebc6 | |||
| e79360915d | |||
| 2fbd8fdc08 | |||
| 5a9d5ec9a1 | |||
| 9ace798ee5 | |||
| 63e0dc0cb0 | |||
| 974aae3ec6 | |||
| 3268975849 | |||
| b6adb4cff5 | |||
| 78191bb750 | |||
| 2ab66671e5 | |||
| fdcfc630b3 | |||
| 3a717a8876 | |||
| 2dfb381b96 | |||
| d8a7e3331b | |||
| 2995eb1cac | |||
| bedb097955 | |||
| e6cebe41dc | |||
| 5aa123d42b | |||
| 758b732142 | |||
| 50b80f3267 |
@@ -0,0 +1,3 @@
|
||||
.gitattributes export-ignore
|
||||
/Wiki export-ignore
|
||||
.gitignore export-ignore
|
||||
+138
@@ -1,3 +1,141 @@
|
||||
2.0.23.1464 RC10.1
|
||||
- core: huge bugfix; please check `Library/Application Support/Plex Media\ Server/Plug-in Support/Data/com.plexapp.agents.subzero/DataItems`
|
||||
for any `subs_XXXXX.json.gz` file bigger than 500kb and delete them
|
||||
|
||||
|
||||
2.0.23.1456 RC10
|
||||
- core: findBetterSubtitles: increase series cutoff by 2 (resolution match)
|
||||
- core: add VTT format
|
||||
- core: fix crashes regarding DBM/cache management
|
||||
- core: update rarfile.py
|
||||
- core: add missing encodings
|
||||
- core: full support for Serbian subtitles (Cyrillic and Latin)
|
||||
- podnapisi: fix pt-BR, srp-cyrl and srp-latn
|
||||
- core: implement own provider registry and ditch the subliminal one
|
||||
- core: use ftfy library to fix re-encoding errors inside subtitles introduced by the subtitle author
|
||||
- core: always store and save subtitles normalized to UTF-8
|
||||
- core: replace spaced dashes in movie/series names before re-refining with plex metadata info
|
||||
- submod: remove_HI: handle multiline brackets correctly
|
||||
|
||||
|
||||
2.0.20.1364 RC9
|
||||
- core: performance improvements
|
||||
- core: if info couldn't be guessed from the filename, fill missing info from PMS #270
|
||||
- submod: OCR: add more to the eng dictionary
|
||||
- submod: HI: fixed some issues with font style tags
|
||||
- core: don't ignore subtitles from providers that don't have hearing impaired info, when hearing impaired mode is set to "force non-HI"
|
||||
- legendastv/menu: fix manual subtitle selection issues in menu
|
||||
- core: improve specials matching on OpenSubtitles
|
||||
- core: update guessit
|
||||
|
||||
|
||||
2.0.19.1337 RC8
|
||||
- napiprojekt: fixed: couldn't convert microdvd to SRT in certain occasions
|
||||
- core: when normalize to UTF-8 is enabled, also store the subtitle in UTF-8 encoding in the internal storage
|
||||
- core: add more encodings for western/eastern/northern europe
|
||||
- submod: OCR: update dictionaries from SubtitleEdit
|
||||
- submod: common: be smarter about uppercase i's in words that should have lowercase L's
|
||||
- submod: fix unopened/unclosed font style tags after modification
|
||||
- core: re-enable OMDB support
|
||||
- core: update guessit for better matching
|
||||
- core: fix SearchAllRecentlyMissing (was broken since RC3)
|
||||
|
||||
|
||||
2.0.19.1299 RC7
|
||||
- submod: offset mods now get merged internally when applied multiple times (to avoid errors and increase performance)
|
||||
- submod: improve performance
|
||||
- submod: core mods (OCR, common, remove_HI) now are always applied in a fixed order internally, regardless of the order they were added in
|
||||
- submod: CM_spaces_in_numbers: don't break up ellipses (30... 29... 28...)
|
||||
- submod: CM_spaces_in_numbers: don't fix countdown numbers (30, 29, 28)
|
||||
- submod: remove_HI: make bracket removal more aggressive
|
||||
- submod: remove_HI: be less aggressive when removing text-before-colon
|
||||
- submod: remove_HI: remove all-uppercase-before-sentence (THIS IS ALL UPPERCASE And here starts a sentence -> And here starts a sentence)
|
||||
- submod: fix all character ranges to include non-ASCII characters
|
||||
- add new README for 2.0
|
||||
|
||||
|
||||
2.0.19.1267 RC6
|
||||
- core: add new SZ subtitle storage format
|
||||
- smaller data files and less cumbersome
|
||||
- it will auto migrate when old data is accessed - to speed this up, use "Trigger subtitle storage migration (expensive)" in advanced menu)
|
||||
- core: performance optimizations
|
||||
- addic7ed: when release group matches, assume the format matches, too (leftover change from RC5)
|
||||
- submod: fix patterns for beginlines/endlines
|
||||
- submod: add our own dictionaries to OCR fixes (english)
|
||||
- submod: hearing impaired: also remove full-caps with punctuation inside
|
||||
- submod: correctly handle partiallines
|
||||
- submod: in numbers with spaces (incorrect), also allow for some punctuation (,.:')
|
||||
|
||||
|
||||
2.0.18.1245 RC5
|
||||
- core: add more debug info
|
||||
- core: fix subtitle modifications (was broken in RC4, created non-usable subtitles)
|
||||
- submod: add ANSI colors
|
||||
- menu/submod: add color mod menu
|
||||
- submod: exclusive mods now are mutually exclusive and get cleaned on duplicate
|
||||
- menu/core: naming
|
||||
|
||||
For everyone who runs RC4: your subtitles are broken. Go to the advanced menu and trigger `Re-Apply mods of all stored subtitles` to fix them.
|
||||
|
||||
|
||||
2.0.17.1234 RC4
|
||||
- core: backport provider-download-retry implementation
|
||||
- core: implement custom user agent (for OpenSubtitles)
|
||||
- core/menu: correct handling of media with multiple files
|
||||
- core: fix SearchAllRecentlyMissing; also wait 5 seconds between searches
|
||||
- core: SearchAllRecentlyMissing: honor physical ignores
|
||||
- submod: pattern fixes
|
||||
- submod: better unicode handling
|
||||
- submod: add color mod (only automatic by now)
|
||||
|
||||
|
||||
2.0.15.1216 RC3
|
||||
- core: fixes
|
||||
- scheduler: revert some of the aggressive changes in RC2
|
||||
- submod: be smarter about WholeLine matches
|
||||
|
||||
|
||||
2.0.15.1209 RC2
|
||||
- core: fixes
|
||||
- core: submod-common: fix multiple dots at start of line
|
||||
- core/menu: add subtitle modification debug setting
|
||||
- core/menu: when manually listing available subtitles in menu, display those with wrong FPS also (opensubtitles), because you can fix them later
|
||||
- core/menu: advanced-menu: add apply-all-default-mods menu item; add re-apply all mods menu item
|
||||
- core: always look for currently (not-) existing subtitles when called; hopefully fixes #276
|
||||
- scheduler/menu: be faster; also launch scheduled tasks in threads, not just manually launched ones
|
||||
- core: don't delete subtitles with .custom or .embedded in their filenames when running auto cleanup, if the correct media file exists
|
||||
- menu: add back-to-previous menu items
|
||||
|
||||
|
||||
2.0.12.1180 RC1
|
||||
- core: update subliminal to version 2
|
||||
- core: update all dependencies
|
||||
- core: add new providers: legendastv (pt-BR), napiprojekt (pl), shooter (cn), subscenter (heb)
|
||||
- core: rewritten all subliminal patches for version 2
|
||||
- menu: add icons for menu items; update main channel icon
|
||||
- core: use SSL again for opensubtitles
|
||||
- core: improved matching due to subliminal 2 (and SZ custom) tvdb/omdb refiners
|
||||
- menu: add "Get my logs" function to the advanced menu, which zips up all necessary logs suitable for posting in the forums
|
||||
- core: on non-windows systems, utilize a file-based cache database for provider media lists and subliminal refiner results
|
||||
- core: add manual and automatic subtitle modification framework (fix common OCR issues, remove hearing impaired etc.)
|
||||
- menu: add subtitle modifications (subtitle content fixes, offset-based shifting, framerate conversion)
|
||||
- menu: add recently played menu
|
||||
- improve almost everything Sub-Zero did in 1.4 :)
|
||||
|
||||
|
||||
1.4.27.973
|
||||
- core: ignore "obfuscated" and "scrambled" tags in filenames when searching for subtitles
|
||||
- core: exotic embedded subtitles are now also considered when searching (and when the option is enabled); fixes #264
|
||||
|
||||
|
||||
1.4.27.967
|
||||
- core: remember the last 10 played items; only consider on_playback for "playing" state within the first 60 seconds of an item
|
||||
|
||||
|
||||
1.4.27.965
|
||||
- core: on_playback activity bugfixes
|
||||
|
||||
|
||||
1.4.27.957
|
||||
- core: correctly fall back to the next best subtitle if the current one couldn't be downloaded; hopefully fixes #231
|
||||
- core: add "Scan: which external subtitles should be picked up?"-setting
|
||||
|
||||
+30
-62
@@ -1,7 +1,8 @@
|
||||
# coding=utf-8
|
||||
import sys
|
||||
import datetime
|
||||
import os
|
||||
|
||||
from subliminal_patch import compute_score
|
||||
from subzero.sandbox import restore_builtins
|
||||
|
||||
module = sys.modules['__main__']
|
||||
@@ -16,18 +17,15 @@ import logger
|
||||
|
||||
sys.modules["logger"] = logger
|
||||
|
||||
import subliminal
|
||||
import support
|
||||
|
||||
import interface
|
||||
sys.modules["interface"] = interface
|
||||
|
||||
from subliminal.cli import MutexLock
|
||||
from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
|
||||
from interface.menu import *
|
||||
from support.plex_media import media_to_videos, get_media_item_ids, scan_videos
|
||||
from support.subtitlehelpers import get_subtitles_from_metadata
|
||||
from support.storage import whack_missing_parts, save_subtitles
|
||||
from support.storage import save_subtitles, store_subtitle_info
|
||||
from support.items import is_ignored
|
||||
from support.config import config
|
||||
from support.lib import get_intent
|
||||
@@ -35,15 +33,14 @@ from support.helpers import track_usage, get_title_for_video_metadata, get_ident
|
||||
from support.history import get_history
|
||||
from support.data import dispatch_migrate
|
||||
from support.activities import activity
|
||||
from support.download import download_best_subtitles
|
||||
|
||||
|
||||
def Start():
|
||||
HTTP.CacheTime = 0
|
||||
HTTP.Headers['User-agent'] = OS_PLEX_USERAGENT
|
||||
|
||||
subliminal.region.configure('dogpile.cache.dbm', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'filename': os.path.join(config.data_items_path, 'subzero.dbm'),
|
||||
'lock_factory': MutexLock})
|
||||
config.init_cache()
|
||||
|
||||
# clear expired intents
|
||||
intent = get_intent()
|
||||
@@ -52,9 +49,12 @@ def Start():
|
||||
# clear expired menu history items
|
||||
now = datetime.datetime.now()
|
||||
if "menu_history" in Dict:
|
||||
for key, timeout in Dict["menu_history"].items():
|
||||
for key, timeout in Dict["menu_history"].copy().items():
|
||||
if now > timeout:
|
||||
del Dict["menu_history"][key]
|
||||
try:
|
||||
del Dict["menu_history"][key]
|
||||
except:
|
||||
pass
|
||||
|
||||
# run migrations
|
||||
if "subs" in Dict or "history" in Dict:
|
||||
@@ -90,45 +90,6 @@ def Start():
|
||||
track_usage("General", "plugin", "start", config.version)
|
||||
|
||||
|
||||
def download_best_subtitles(video_part_map, min_score=0):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
languages = config.lang_list
|
||||
if not languages:
|
||||
return
|
||||
|
||||
missing_languages = False
|
||||
for video, part in video_part_map.iteritems():
|
||||
if not Prefs['subtitles.save.filesystem']:
|
||||
# scan for existing metadata subtitles
|
||||
meta_subs = get_subtitles_from_metadata(part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
if subList:
|
||||
video.subtitle_languages.add(language)
|
||||
Log.Debug("Found metadata subtitle %s for %s", language, video)
|
||||
|
||||
missing_subs = (languages - video.subtitle_languages)
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
|
||||
if not missing_subs or found_one_which_is_enough:
|
||||
if found_one_which_is_enough:
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
continue
|
||||
missing_languages = True
|
||||
break
|
||||
|
||||
if missing_languages:
|
||||
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s" % (min_score, hearing_impaired))
|
||||
|
||||
return subliminal.download_best_subtitles(video_part_map.keys(), languages, min_score, hearing_impaired, providers=config.providers,
|
||||
provider_configs=config.provider_settings, pool_class=config.provider_pool,
|
||||
compute_score=compute_score)
|
||||
Log.Debug("All languages for all requested videos exist. Doing nothing.")
|
||||
|
||||
|
||||
def update_local_media(metadata, media, media_type="movies"):
|
||||
# Look for subtitles
|
||||
if media_type == "movies":
|
||||
@@ -169,10 +130,6 @@ class SubZeroAgent(object):
|
||||
results.Append(MetadataSearchResult(id='null', score=100))
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
if not config.enable_agent:
|
||||
Log.Debug("Skipping Sub-Zero agent(s)")
|
||||
return
|
||||
|
||||
Log.Debug("Sub-Zero %s, %s update called" % (config.version, self.agent_type))
|
||||
intent = get_intent()
|
||||
|
||||
@@ -185,6 +142,9 @@ class SubZeroAgent(object):
|
||||
config.init_subliminal_patches()
|
||||
videos = media_to_videos(media, kind=self.agent_type)
|
||||
|
||||
# find local media
|
||||
update_local_media(metadata, media, media_type=self.agent_type)
|
||||
|
||||
# media ignored?
|
||||
use_any_parts = False
|
||||
for video in videos:
|
||||
@@ -205,20 +165,24 @@ class SubZeroAgent(object):
|
||||
|
||||
set_refresh_menu_state(media, media_type=self.agent_type)
|
||||
|
||||
# find local media
|
||||
update_local_media(metadata, media, media_type=self.agent_type)
|
||||
|
||||
# scanned_video_part_map = {subliminal.Video: plex_part, ...}
|
||||
scanned_video_part_map = scan_videos(videos, kind=self.agent_type)
|
||||
|
||||
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
|
||||
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score)
|
||||
item_ids = get_media_item_ids(media, kind=self.agent_type)
|
||||
downloaded_subtitles = None
|
||||
if not config.enable_agent:
|
||||
Log.Debug("Skipping Sub-Zero agent(s)")
|
||||
|
||||
whack_missing_parts(scanned_video_part_map)
|
||||
else:
|
||||
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
|
||||
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score)
|
||||
item_ids = get_media_item_ids(media, kind=self.agent_type)
|
||||
|
||||
downloaded_any = False
|
||||
if downloaded_subtitles:
|
||||
save_subtitles(scanned_video_part_map, downloaded_subtitles)
|
||||
downloaded_any = any(downloaded_subtitles.values())
|
||||
|
||||
if downloaded_any:
|
||||
save_subtitles(scanned_video_part_map, downloaded_subtitles, mods=config.default_mods)
|
||||
track_usage("Subtitle", "refreshed", "download", 1)
|
||||
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
@@ -228,6 +192,10 @@ class SubZeroAgent(object):
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle)
|
||||
else:
|
||||
# store subtitle info even if we've downloaded none
|
||||
store_subtitle_info(scanned_video_part_map, dict((k, []) for k in scanned_video_part_map.keys()),
|
||||
None, mode="a")
|
||||
|
||||
update_local_media(metadata, media, media_type=self.agent_type)
|
||||
|
||||
@@ -237,7 +205,7 @@ class SubZeroAgent(object):
|
||||
|
||||
# notify any running tasks about our finished update
|
||||
for item_id in item_ids:
|
||||
scheduler.signal("updated_metadata", item_id)
|
||||
#scheduler.signal("updated_metadata", item_id)
|
||||
|
||||
# resolve existing intent for that id
|
||||
intent.resolve("force", item_id)
|
||||
|
||||
@@ -2,6 +2,22 @@ import sys
|
||||
|
||||
import menu
|
||||
sys.modules["interface.menu"] = menu
|
||||
sys.modules["menu"] = menu
|
||||
|
||||
import menu_helpers
|
||||
sys.modules["interface.menu_helpers"] = menu_helpers
|
||||
sys.modules["interface.menu_helpers"] = menu_helpers
|
||||
|
||||
import advanced
|
||||
sys.modules["interface.advanced"] = advanced
|
||||
|
||||
import main
|
||||
sys.modules["interface.main"] = main
|
||||
|
||||
import refresh_item
|
||||
sys.modules["interface.refresh_item"] = refresh_item
|
||||
|
||||
import item_details
|
||||
sys.modules["interface.item_details"] = item_details
|
||||
|
||||
import sub_mod
|
||||
sys.modules["interface.modification"] = sub_mod
|
||||
|
||||
@@ -0,0 +1,339 @@
|
||||
# coding=utf-8
|
||||
import datetime
|
||||
import StringIO
|
||||
import glob
|
||||
import os
|
||||
import traceback
|
||||
import urlparse
|
||||
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
||||
from babelfish import Language
|
||||
|
||||
from subzero.lib.io import FileIO
|
||||
from subzero.constants import PREFIX, PLUGIN_IDENTIFIER
|
||||
from menu_helpers import SubFolderObjectContainer, debounce, set_refresh_menu_state, ZipObject, ObjectContainer
|
||||
from main import fatality
|
||||
from support.helpers import timestamp, pad_title
|
||||
from support.config import config
|
||||
from support.lib import Plex
|
||||
from support.storage import reset_storage, log_storage, get_subtitle_storage
|
||||
from support.scheduler import scheduler
|
||||
from support.items import set_mods_for_part, get_item_kind_from_rating_key
|
||||
|
||||
|
||||
@route(PREFIX + '/advanced')
|
||||
def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
oc = SubFolderObjectContainer(header=header or "Internal stuff, pay attention!", message=message, no_cache=True,
|
||||
no_history=True,
|
||||
replace_parent=False, title2="Advanced")
|
||||
|
||||
if config.lock_advanced_menu and not config.pin_correct:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), success_go_to="advanced"),
|
||||
title=pad_title("Enter PIN"),
|
||||
summary="The owner has restricted the access to this menu. Please enter the correct pin",
|
||||
))
|
||||
return oc
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerRestart, randomize=timestamp()),
|
||||
title=pad_title("Restart the plugin"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(GetLogsLink),
|
||||
title="Get my logs (copy the appearing link and open it in your browser, please)",
|
||||
summary="Copy the appearing link and open it in your browser, please",
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerBetterSubtitles, randomize=timestamp()),
|
||||
title=pad_title("Trigger find better subtitles"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerStorageMaintenance, randomize=timestamp()),
|
||||
title=pad_title("Trigger subtitle storage maintenance"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerStorageMigration, randomize=timestamp()),
|
||||
title=pad_title("Trigger subtitle storage migration (expensive)"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ApplyDefaultMods, randomize=timestamp()),
|
||||
title=pad_title("Apply configured default subtitle mods to all (active) stored subtitles"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ReApplyMods, randomize=timestamp()),
|
||||
title=pad_title("Re-Apply mods of all stored subtitles"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key="tasks", randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's scheduled tasks state storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key="ignore", randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's internal ignorelist storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key=None, randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's complete state storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="tasks", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's scheduled tasks state storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="ignore", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's internal ignorelist storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(InvalidateCache, randomize=timestamp()),
|
||||
title=pad_title("Invalidate Sub-Zero metadata caches (subliminal)"),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
def DispatchRestart():
|
||||
Thread.CreateTimer(1.0, Restart)
|
||||
|
||||
|
||||
@route(PREFIX + '/advanced/restart/trigger')
|
||||
@debounce
|
||||
def TriggerRestart(randomize=None):
|
||||
set_refresh_menu_state("Restarting the plugin")
|
||||
DispatchRestart()
|
||||
return fatality(header="Restart triggered, please wait about 5 seconds", force_title=" ", only_refresh=True,
|
||||
replace_parent=True,
|
||||
no_history=True, randomize=timestamp())
|
||||
|
||||
|
||||
@route(PREFIX + '/advanced/restart/execute')
|
||||
def Restart():
|
||||
Plex[":/plugins"].restart(PLUGIN_IDENTIFIER)
|
||||
|
||||
|
||||
@route(PREFIX + '/storage/reset', sure=bool)
|
||||
@debounce
|
||||
def ResetStorage(key, randomize=None, sure=False):
|
||||
if not sure:
|
||||
oc = SubFolderObjectContainer(no_history=True, title1="Reset subtitle storage", title2="Are you sure?")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key=key, sure=True, randomize=timestamp()),
|
||||
title=pad_title("Are you really sure?"),
|
||||
|
||||
))
|
||||
return oc
|
||||
|
||||
reset_storage(key)
|
||||
|
||||
if key == "tasks":
|
||||
# reinitialize the scheduler
|
||||
scheduler.init_storage()
|
||||
scheduler.setup_tasks()
|
||||
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Information Storage (%s) reset' % key
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/storage/log')
|
||||
def LogStorage(key, randomize=None):
|
||||
log_storage(key)
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Information Storage (%s) logged' % key
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/triggerbetter')
|
||||
@debounce
|
||||
def TriggerBetterSubtitles(randomize=None):
|
||||
scheduler.dispatch_task("FindBetterSubtitles")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='FindBetterSubtitles triggered'
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/triggermaintenance')
|
||||
@debounce
|
||||
def TriggerStorageMaintenance(randomize=None):
|
||||
scheduler.dispatch_task("SubtitleStorageMaintenance")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='SubtitleStorageMaintenance triggered'
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/triggerstoragemigration')
|
||||
@debounce
|
||||
def TriggerStorageMigration(randomize=None):
|
||||
scheduler.dispatch_task("MigrateSubtitleStorage")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='MigrateSubtitleStorage triggered'
|
||||
)
|
||||
|
||||
|
||||
def apply_default_mods(reapply_current=False):
|
||||
storage = get_subtitle_storage()
|
||||
subs_applied = 0
|
||||
for fn in storage.get_all_files():
|
||||
data = storage.load(None, filename=fn)
|
||||
if data:
|
||||
video_id = data.video_id
|
||||
item_type = get_item_kind_from_rating_key(video_id)
|
||||
if not item_type:
|
||||
continue
|
||||
|
||||
for part_id, part in data.parts.iteritems():
|
||||
for lang, subs in part.iteritems():
|
||||
current_sub = subs.get("current")
|
||||
if not current_sub:
|
||||
continue
|
||||
sub = subs[current_sub]
|
||||
|
||||
if not sub.content:
|
||||
continue
|
||||
|
||||
current_mods = sub.mods or []
|
||||
if not reapply_current:
|
||||
add_mods = list(set(config.default_mods).difference(set(current_mods)))
|
||||
if not add_mods:
|
||||
continue
|
||||
else:
|
||||
if not current_mods:
|
||||
continue
|
||||
add_mods = []
|
||||
|
||||
try:
|
||||
set_mods_for_part(video_id, part_id, Language.fromietf(lang), item_type, add_mods, mode="add")
|
||||
except:
|
||||
Log.Error("Couldn't set mods for %s:%s: %s", video_id, part_id, traceback.format_exc())
|
||||
continue
|
||||
|
||||
subs_applied += 1
|
||||
Log.Debug("Applied mods to %i items" % subs_applied)
|
||||
|
||||
|
||||
@route(PREFIX + '/applydefaultmods')
|
||||
@debounce
|
||||
def ApplyDefaultMods(randomize=None):
|
||||
Thread.CreateTimer(1.0, apply_default_mods)
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='This may take some time ...'
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/reapplyallmods')
|
||||
@debounce
|
||||
def ReApplyMods(randomize=None):
|
||||
Thread.CreateTimer(1.0, apply_default_mods, reapply_current=True)
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='This may take some time ...'
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/get_logs_link')
|
||||
def GetLogsLink():
|
||||
if not config.plex_token:
|
||||
oc = ObjectContainer(title2="Download Logs", no_cache=True, no_history=True,
|
||||
header="Sorry, feature unavailable",
|
||||
message="Universal Plex token not available")
|
||||
return oc
|
||||
|
||||
# try getting the link base via the request in context, first, otherwise use the public ip
|
||||
req_headers = Core.sandbox.context.request.headers
|
||||
get_external_ip = True
|
||||
link_base = ""
|
||||
|
||||
if "Origin" in req_headers:
|
||||
link_base = req_headers["Origin"]
|
||||
Log.Debug("Using origin-based link_base")
|
||||
get_external_ip = False
|
||||
|
||||
elif "Referer" in req_headers:
|
||||
parsed = urlparse.urlparse(req_headers["Referer"])
|
||||
link_base = "%s://%s:%s" % (parsed.scheme, parsed.hostname, parsed.port)
|
||||
Log.Debug("Using referer-based link_base")
|
||||
get_external_ip = False
|
||||
|
||||
if get_external_ip or "plex.tv" in link_base:
|
||||
ip = Core.networking.http_request("http://www.plexapp.com/ip.php", cacheTime=7200).content.strip()
|
||||
link_base = "https://%s:32400" % ip
|
||||
Log.Debug("Using ip-based fallback link_base")
|
||||
|
||||
logs_link = "%s%s?X-Plex-Token=%s" % (link_base, PREFIX + '/logs', config.plex_token)
|
||||
oc = ObjectContainer(title2=logs_link, no_cache=True, no_history=True,
|
||||
header="Copy this link and open this in your browser, please",
|
||||
message=logs_link)
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/logs')
|
||||
def DownloadLogs():
|
||||
buff = StringIO.StringIO()
|
||||
zip_archive = ZipFile(buff, mode='w', compression=ZIP_DEFLATED)
|
||||
|
||||
logs = sorted(glob.glob(config.plugin_log_path + '*')) + [config.server_log_path]
|
||||
for path in logs:
|
||||
data = StringIO.StringIO()
|
||||
data.write(FileIO.read(path))
|
||||
zip_archive.writestr(os.path.basename(path), data.getvalue())
|
||||
|
||||
zip_archive.close()
|
||||
|
||||
return ZipObject(buff.getvalue())
|
||||
|
||||
|
||||
@route(PREFIX + '/invalidatecache')
|
||||
@debounce
|
||||
def InvalidateCache(randomize=None):
|
||||
from subliminal.cache import region
|
||||
region.invalidate()
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Cache invalidated'
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/pin')
|
||||
def PinMenu(pin="", randomize=None, success_go_to="channel"):
|
||||
oc = ObjectContainer(title2="Enter PIN number %s" % (len(pin) + 1), no_cache=True, no_history=True,
|
||||
skip_pin_lock=True)
|
||||
|
||||
if pin == config.pin:
|
||||
Dict["pin_correct_time"] = datetime.datetime.now()
|
||||
config.locked = False
|
||||
if success_go_to == "channel":
|
||||
return fatality(force_title="PIN correct", header="PIN correct", no_history=True)
|
||||
elif success_go_to == "advanced":
|
||||
return AdvancedMenu(randomize=timestamp())
|
||||
|
||||
for i in range(10):
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), pin=pin + str(i), success_go_to=success_go_to),
|
||||
title=pad_title(str(i)),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), success_go_to=success_go_to),
|
||||
title=pad_title("Reset"),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/pin_lock')
|
||||
def ClearPin(randomize=None):
|
||||
Dict["pin_correct_time"] = None
|
||||
config.locked = True
|
||||
return fatality(force_title="Menu locked", header=" ", no_history=True)
|
||||
@@ -0,0 +1,293 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
|
||||
from sub_mod import SubtitleModificationsMenu
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_ignore_options, get_item_task_data, \
|
||||
set_refresh_menu_state
|
||||
|
||||
from refresh_item import RefreshItem
|
||||
from subzero.constants import PREFIX
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, cast_bool, df, get_language
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub
|
||||
from support.lib import Plex
|
||||
from support.plex_media import get_plex_metadata, scan_videos, PMSMediaProxy
|
||||
from support.scheduler import scheduler
|
||||
from support.storage import get_subtitle_storage
|
||||
|
||||
|
||||
# fixme: needs kwargs cleanup
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}/actions')
|
||||
@debounce
|
||||
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None):
|
||||
"""
|
||||
displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
|
||||
:param rating_key:
|
||||
:param title:
|
||||
:param base_title:
|
||||
:param item_title:
|
||||
:param randomize:
|
||||
:return:
|
||||
"""
|
||||
from interface.main import IgnoreMenu
|
||||
|
||||
title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
|
||||
item = get_item(rating_key)
|
||||
current_kind = get_item_kind_from_rating_key(rating_key)
|
||||
|
||||
timeout = 30
|
||||
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
|
||||
|
||||
# add back to season for episode
|
||||
if current_kind == "episode":
|
||||
from interface.menu import MetadataMenu
|
||||
show = get_item(item.show.rating_key)
|
||||
season = get_item(item.season.rating_key)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(MetadataMenu, rating_key=season.rating_key, title=season.title, base_title=show.title,
|
||||
previous_item_type="show", previous_rating_key=show.rating_key,
|
||||
display_items=True, randomize=timestamp()),
|
||||
title=u"< Back to %s" % season.title,
|
||||
summary="Back to %s > %s" % (show.title, season.title),
|
||||
thumb=season.thumb or default_thumb
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, randomize=timestamp(),
|
||||
timeout=timeout * 1000),
|
||||
title=u"Refresh: %s" % item_title,
|
||||
summary="Refreshes the %s, possibly searching for missing and picking up new subtitles on disk" % current_kind,
|
||||
thumb=item.thumb or default_thumb
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(),
|
||||
timeout=timeout * 1000),
|
||||
title=u"Force-find subtitles: %s" % item_title,
|
||||
summary="Issues a forced refresh, ignoring known subtitles and searching for new ones",
|
||||
thumb=item.thumb or default_thumb
|
||||
))
|
||||
|
||||
# get stored subtitle info for item id
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
|
||||
# get the plex item
|
||||
plex_item = get_item(rating_key)
|
||||
|
||||
# look for subtitles for all available media parts and all of their languages
|
||||
has_multiple_parts = len(plex_item.media) > 1
|
||||
part_index = 0
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
filename = os.path.basename(part.file)
|
||||
if not os.path.exists(part.file):
|
||||
continue
|
||||
|
||||
part_id = str(part.id)
|
||||
part_index += 1
|
||||
|
||||
# iterate through all configured languages
|
||||
for lang in config.lang_list:
|
||||
# get corresponding stored subtitle data for that media part (physical media item), for language
|
||||
current_sub = stored_subs.get_any(part_id, lang)
|
||||
current_sub_id = None
|
||||
current_sub_provider_name = None
|
||||
|
||||
part_index_addon = ""
|
||||
part_summary_addon = ""
|
||||
if has_multiple_parts:
|
||||
part_index_addon = u"File %s: " % part_index
|
||||
part_summary_addon = "%s " % filename
|
||||
|
||||
summary = u"%sNo current subtitle in storage" % part_summary_addon
|
||||
current_score = None
|
||||
if current_sub:
|
||||
current_sub_id = current_sub.id
|
||||
current_sub_provider_name = current_sub.provider_name
|
||||
current_score = current_sub.score
|
||||
|
||||
summary = u"%sCurrent subtitle: %s (added: %s, %s), Language: %s, Score: %i, Storage: %s" % \
|
||||
(part_summary_addon, current_sub.provider_name, df(current_sub.date_added),
|
||||
current_sub.mode_verbose, lang, current_sub.score, current_sub.storage_type)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, language_name=lang.name,
|
||||
current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"%sActions for %s subtitle" % (part_index_addon, lang.name),
|
||||
summary=summary
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, language_name=lang.name,
|
||||
current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"%sList %s subtitles" % (part_index_addon, lang.name),
|
||||
summary=summary
|
||||
))
|
||||
|
||||
add_ignore_options(oc, "videos", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleOptionsMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
title=kwargs["title"], randomize=timestamp()),
|
||||
title=u"< Back to %s" % kwargs["title"],
|
||||
summary=kwargs["current_data"],
|
||||
thumb=default_thumb
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"List %s subtitles" % kwargs["language_name"],
|
||||
summary=kwargs["current_data"]
|
||||
))
|
||||
if current_sub:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Modify %s subtitle" % kwargs["language_name"],
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/search/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item_title=None, filename=None,
|
||||
item_type="episode", language=None, language_name=None, force=False, current_id=None,
|
||||
current_data=None,
|
||||
current_provider=None, current_score=None, randomize=None):
|
||||
assert rating_key, part_id
|
||||
|
||||
running = scheduler.is_task_running("AvailableSubsForItem")
|
||||
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
|
||||
|
||||
if (search_results is None or force) and not running:
|
||||
scheduler.dispatch_task("AvailableSubsForItem", rating_key=rating_key, item_type=item_type, part_id=part_id,
|
||||
language=language)
|
||||
running = True
|
||||
|
||||
oc = SubFolderObjectContainer(title2=unicode(title), replace_parent=True)
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=rating_key, item_title=item_title, title=title, randomize=timestamp()),
|
||||
title=u"< Back to %s" % title,
|
||||
summary=current_data,
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
return oc
|
||||
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
video_display_data = [video.format] if video.format else []
|
||||
if video.release_group:
|
||||
video_display_data.append(u"by %s" % video.release_group)
|
||||
video_display_data = " ".join(video_display_data)
|
||||
|
||||
current_display = (u"Current: %s (%s) " % (current_provider, current_score) if current_provider else "")
|
||||
if not running:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title, language=language,
|
||||
filename=filename, part_id=part_id, title=title, current_id=current_id, force=True,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
current_data=current_data, item_type=item_type, randomize=timestamp()),
|
||||
title=u"Search for %s subs (%s)" % (get_language(language).name, video_display_data),
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
if search_results == "found_none":
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
|
||||
language=language, filename=filename, current_data=current_data, force=True,
|
||||
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
randomize=timestamp()),
|
||||
title=u"No subtitles found",
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
|
||||
language=language, filename=filename, current_data=current_data,
|
||||
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
randomize=timestamp()),
|
||||
title=u"Searching for %s subs (%s), refresh here ..." % (get_language(language).name, video_display_data),
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
if not search_results or search_results == "found_none":
|
||||
return oc
|
||||
|
||||
seen = []
|
||||
for subtitle in search_results:
|
||||
if subtitle.id in seen:
|
||||
continue
|
||||
|
||||
wrong_fps_addon = ""
|
||||
if subtitle.wrong_fps:
|
||||
wrong_fps_addon = " (wrong FPS, sub: %s, media: %s)" % (subtitle.fps, plex_part.fps)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerDownloadSubtitle, rating_key=rating_key, randomize=timestamp(), item_title=item_title,
|
||||
subtitle_id=str(subtitle.id), language=language),
|
||||
title=u"%s: %s, score: %s%s" % ("Available" if current_id != subtitle.id else "Current",
|
||||
subtitle.provider_name, subtitle.score, wrong_fps_addon),
|
||||
summary=u"Release: %s, Matches: %s" % (subtitle.release_info, ", ".join(subtitle.matches)),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
seen.append(current_id)
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/download_subtitle/{rating_key}')
|
||||
@debounce
|
||||
def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None, language=None, randomize=None):
|
||||
from interface.main import fatality
|
||||
|
||||
set_refresh_menu_state("Downloading subtitle for %s" % item_title or rating_key)
|
||||
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
|
||||
|
||||
download_subtitle = None
|
||||
for subtitle in search_results:
|
||||
if str(subtitle.id) == subtitle_id:
|
||||
download_subtitle = subtitle
|
||||
break
|
||||
if not download_subtitle:
|
||||
Log.Error(u"Something went horribly wrong")
|
||||
|
||||
else:
|
||||
scheduler.dispatch_task("DownloadSubtitleForItem", rating_key=rating_key, subtitle=download_subtitle)
|
||||
|
||||
return fatality(randomize=timestamp(), header=" ", replace_parent=True)
|
||||
@@ -0,0 +1,420 @@
|
||||
# coding=utf-8
|
||||
|
||||
from subzero.constants import PREFIX, TITLE, ART
|
||||
from support.config import config
|
||||
from support.helpers import pad_title, timestamp, df, get_plex_item_display_title
|
||||
from support.scheduler import scheduler
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, \
|
||||
get_item_kind_from_item
|
||||
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_ignore_options,\
|
||||
ObjectContainer
|
||||
from item_details import ItemDetailsMenu
|
||||
|
||||
|
||||
@handler(PREFIX, TITLE if not config.is_development else TITLE + " DEV", art=ART, thumb=main_icon)
|
||||
@route(PREFIX)
|
||||
def fatality(randomize=None, force_title=None, header=None, message=None, only_refresh=False, no_history=False,
|
||||
replace_parent=False):
|
||||
"""
|
||||
subzero main menu
|
||||
"""
|
||||
from interface.advanced import PinMenu, ClearPin, AdvancedMenu
|
||||
from interface.menu import RefreshMissing, IgnoreListMenu, HistoryMenu
|
||||
|
||||
title = config.full_version # force_title if force_title is not None else config.full_version
|
||||
oc = ObjectContainer(title1=title, title2=title, header=unicode(header) if header else title, message=message,
|
||||
no_history=no_history,
|
||||
replace_parent=replace_parent, no_cache=True)
|
||||
|
||||
# always re-check permissions
|
||||
config.refresh_permissions_status()
|
||||
|
||||
# always re-check enabled sections
|
||||
config.refresh_enabled_sections()
|
||||
|
||||
if config.lock_menu and not config.pin_correct:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp()),
|
||||
title=pad_title("Enter PIN"),
|
||||
summary="The owner has restricted the access to this menu. Please enter the correct pin",
|
||||
))
|
||||
return oc
|
||||
|
||||
if not config.permissions_ok and config.missing_permissions:
|
||||
for title, path in config.missing_permissions:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, randomize=timestamp()),
|
||||
title=pad_title("Insufficient permissions"),
|
||||
summary="Insufficient permissions on library %s, folder: %s" % (title, path),
|
||||
))
|
||||
return oc
|
||||
|
||||
if not config.enabled_sections:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, randomize=timestamp()),
|
||||
title=pad_title("I'm not enabled!"),
|
||||
summary="Please enable me for some of your libraries in your server settings; currently I do nothing",
|
||||
))
|
||||
return oc
|
||||
|
||||
if not only_refresh:
|
||||
if Dict["current_refresh_state"]:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("Working ... refresh here"),
|
||||
summary="Current state: %s; Last state: %s" % (
|
||||
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
|
||||
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
|
||||
)
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(OnDeckMenu),
|
||||
title="On-deck items",
|
||||
summary="Shows the current on deck items and allows you to individually (force-) refresh their metadata/"
|
||||
"subtitles.",
|
||||
thumb=R("icon-ondeck.jpg")
|
||||
))
|
||||
if "last_played_items" in Dict and Dict["last_played_items"]:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentlyPlayedMenu),
|
||||
title=pad_title("Recently played items"),
|
||||
summary="Shows the %i recently played items and allows you to individually (force-) refresh their "
|
||||
"metadata/subtitles." % config.store_recently_played_amount,
|
||||
thumb=R("icon-played.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentlyAddedMenu),
|
||||
title="Recently-added items",
|
||||
summary="Shows the recently added items per section.",
|
||||
thumb=R("icon-added.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, randomize=timestamp()),
|
||||
title="Items with missing subtitles",
|
||||
summary="Shows the items honoring the configured 'Item age to be considered recent'-setting (%s)"
|
||||
" and allowing you to individually (force-) refresh their metadata/subtitles. " %
|
||||
Prefs["scheduler.item_is_recent_age"],
|
||||
thumb=R("icon-missing.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SectionsMenu),
|
||||
title="Browse all items",
|
||||
summary="Go through your whole library and manage your ignore list. You can also "
|
||||
"(force-) refresh the metadata/subtitles of individual items.",
|
||||
thumb=R("icon-browse.jpg")
|
||||
))
|
||||
|
||||
task_name = "SearchAllRecentlyAddedMissing"
|
||||
task = scheduler.task(task_name)
|
||||
|
||||
if task.ready_for_display:
|
||||
task_state = "Running: %s/%s (%s%%)" % (task.items_done, task.items_searching, task.percentage)
|
||||
else:
|
||||
task_state = "Last scheduler run: %s; Next scheduled run: %s; Last runtime: %s" % (
|
||||
df(scheduler.last_run(task_name)) or "never",
|
||||
df(scheduler.next_run(task_name)) or "never",
|
||||
str(task.last_run_time).split(".")[0])
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshMissing, randomize=timestamp()),
|
||||
title="Search for missing subtitles (in recently-added items, max-age: %s)" % Prefs[
|
||||
"scheduler.item_is_recent_age"],
|
||||
summary="Automatically run periodically by the scheduler, if configured. %s" % task_state,
|
||||
thumb=R("icon-search.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(IgnoreListMenu),
|
||||
title="Display ignore list (%d)" % len(ignore_list),
|
||||
summary="Show the current ignore list (mainly used for the automatic tasks)",
|
||||
thumb=R("icon-ignore.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(HistoryMenu),
|
||||
title="History",
|
||||
summary="Show the last %i downloaded subtitles" % int(Prefs["history_size"]),
|
||||
thumb=R("icon-history.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("Refresh"),
|
||||
summary="Current state: %s; Last state: %s" % (
|
||||
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
|
||||
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
|
||||
),
|
||||
thumb=R("icon-refresh.jpg")
|
||||
))
|
||||
|
||||
# add re-lock after pin unlock
|
||||
if config.pin:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ClearPin, randomize=timestamp()),
|
||||
title=pad_title("Re-lock menu(s)"),
|
||||
summary="Enabled the PIN again for menu(s)"
|
||||
))
|
||||
|
||||
if not only_refresh:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(AdvancedMenu),
|
||||
title=pad_title("Advanced functions"),
|
||||
summary="Use at your own risk",
|
||||
thumb=R("icon-advanced.jpg")
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/on_deck')
|
||||
def OnDeckMenu(message=None):
|
||||
"""
|
||||
displays the items on deck
|
||||
:param message:
|
||||
:return:
|
||||
"""
|
||||
return mergedItemsMenu(title="Items On Deck", base_title="Items On Deck", itemGetter=get_on_deck_items)
|
||||
|
||||
|
||||
@route(PREFIX + '/recently_played')
|
||||
def RecentlyPlayedMenu():
|
||||
base_title = "Recently Played"
|
||||
oc = SubFolderObjectContainer(title2=base_title, replace_parent=True)
|
||||
|
||||
for item in [get_item(rating_key) for rating_key in Dict["last_played_items"]]:
|
||||
kind = get_item_kind_from_item(item)
|
||||
if kind not in ("episode", "movie"):
|
||||
continue
|
||||
|
||||
if kind == "episode":
|
||||
item_title = get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
|
||||
parent_title=item.show.title)
|
||||
else:
|
||||
item_title = get_plex_item_display_title(item, kind, section_title=None)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
title=item_title,
|
||||
key=Callback(ItemDetailsMenu, title=base_title + " > " + item.title, item_title=item.title,
|
||||
rating_key=item.rating_key)
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/recently_added')
|
||||
def RecentlyAddedMenu(message=None):
|
||||
"""
|
||||
displays the items recently added per section
|
||||
:param message:
|
||||
:return:
|
||||
"""
|
||||
return SectionsMenu(base_title="Recently added", section_items_key="recently_added", ignore_options=False)
|
||||
|
||||
|
||||
@route(PREFIX + '/recent', force=bool)
|
||||
@debounce
|
||||
def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
title = "Items with missing subtitles"
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
|
||||
running = scheduler.is_task_running("MissingSubtitles")
|
||||
task_data = scheduler.get_task_data("MissingSubtitles")
|
||||
missing_items = task_data["missing_subtitles"] if task_data else None
|
||||
|
||||
if ((missing_items is None) or force) and not running:
|
||||
scheduler.dispatch_task("MissingSubtitles")
|
||||
running = True
|
||||
|
||||
if not running:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, force=True, randomize=timestamp()),
|
||||
title=u"Get items with missing subtitles",
|
||||
thumb=default_thumb
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, force=False, randomize=timestamp()),
|
||||
title=u"Updating, refresh here ...",
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
if missing_items is not None:
|
||||
for added_at, item_id, item_title, item, missing_languages in missing_items:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, title=title + " > " + item_title, item_title=item_title,
|
||||
rating_key=item_id),
|
||||
title=item_title,
|
||||
summary="Missing: %s" % ", ".join(l.name for l in missing_languages),
|
||||
thumb=get_item_thumb(item) or default_thumb
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
def mergedItemsMenu(title, itemGetter, itemGetterKwArgs=None, base_title=None, *args, **kwargs):
|
||||
"""
|
||||
displays an item list of dynamic kinds of items
|
||||
:param title:
|
||||
:param itemGetter:
|
||||
:param itemGetterKwArgs:
|
||||
:param base_title:
|
||||
:param args:
|
||||
:param kwargs:
|
||||
:return:
|
||||
"""
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
items = itemGetter(*args, **kwargs)
|
||||
|
||||
for kind, title, item_id, deeper, item in items:
|
||||
oc.add(DirectoryObject(
|
||||
title=title,
|
||||
key=Callback(ItemDetailsMenu, title=base_title + " > " + title, item_title=title, rating_key=item_id),
|
||||
thumb=get_item_thumb(item) or default_thumb
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
def determine_section_display(kind, item, pass_kwargs=None):
|
||||
"""
|
||||
returns the menu function for a section based on the size of it (amount of items)
|
||||
:param kind:
|
||||
:param item:
|
||||
:return:
|
||||
"""
|
||||
if pass_kwargs and pass_kwargs.get("section_items_key", "all") != "all":
|
||||
return SectionMenu
|
||||
if item.size > 80:
|
||||
return SectionFirstLetterMenu
|
||||
return SectionMenu
|
||||
|
||||
|
||||
@route(PREFIX + '/ignore/set/{kind}/{rating_key}/{todo}/sure={sure}', kind=str, rating_key=str, todo=str, sure=bool)
|
||||
def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
|
||||
"""
|
||||
displays the ignore options for a menu
|
||||
:param kind:
|
||||
:param rating_key:
|
||||
:param title:
|
||||
:param sure:
|
||||
:param todo:
|
||||
:return:
|
||||
"""
|
||||
is_ignored = rating_key in ignore_list[kind]
|
||||
if not sure:
|
||||
oc = SubFolderObjectContainer(no_history=True, replace_parent=True, title1="%s %s %s %s the ignore list" % (
|
||||
"Add" if not is_ignored else "Remove", ignore_list.verbose(kind), title,
|
||||
"to" if not is_ignored else "from"), title2="Are you sure?")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(IgnoreMenu, kind=kind, rating_key=rating_key, title=title, sure=True,
|
||||
todo="add" if not is_ignored else "remove"),
|
||||
title=pad_title("Are you sure?"),
|
||||
))
|
||||
return oc
|
||||
|
||||
rel = ignore_list[kind]
|
||||
dont_change = False
|
||||
if todo == "remove":
|
||||
if not is_ignored:
|
||||
dont_change = True
|
||||
else:
|
||||
rel.remove(rating_key)
|
||||
Log.Info("Removed %s (%s) from the ignore list", title, rating_key)
|
||||
ignore_list.remove_title(kind, rating_key)
|
||||
ignore_list.save()
|
||||
state = "removed from"
|
||||
elif todo == "add":
|
||||
if is_ignored:
|
||||
dont_change = True
|
||||
else:
|
||||
rel.append(rating_key)
|
||||
Log.Info("Added %s (%s) to the ignore list", title, rating_key)
|
||||
ignore_list.add_title(kind, rating_key, title)
|
||||
ignore_list.save()
|
||||
state = "added to"
|
||||
else:
|
||||
dont_change = True
|
||||
|
||||
if dont_change:
|
||||
return fatality(force_title=" ", header="Didn't change the ignore list", no_history=True)
|
||||
|
||||
return fatality(force_title=" ", header="%s %s the ignore list" % (title, state), no_history=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/sections')
|
||||
def SectionsMenu(base_title="Sections", section_items_key="all", ignore_options=True):
|
||||
"""
|
||||
displays the menu for all sections
|
||||
:return:
|
||||
"""
|
||||
items = get_all_items("sections")
|
||||
|
||||
return dig_tree(SubFolderObjectContainer(title2="Sections", no_cache=True, no_history=True), items, None,
|
||||
menu_determination_callback=determine_section_display, pass_kwargs={"base_title": base_title,
|
||||
"section_items_key": section_items_key,
|
||||
"ignore_options": ignore_options},
|
||||
fill_args={"title": "section_title"})
|
||||
|
||||
|
||||
@route(PREFIX + '/section', ignore_options=bool)
|
||||
def SectionMenu(rating_key, title=None, base_title=None, section_title=None, ignore_options=True,
|
||||
section_items_key="all"):
|
||||
"""
|
||||
displays the contents of a section
|
||||
:param section_items_key:
|
||||
:param rating_key:
|
||||
:param title:
|
||||
:param base_title:
|
||||
:param section_title:
|
||||
:param ignore_options:
|
||||
:return:
|
||||
"""
|
||||
from menu import MetadataMenu
|
||||
items = get_all_items(key=section_items_key, value=rating_key, base="library/sections")
|
||||
|
||||
kind, deeper = get_items_info(items)
|
||||
title = unicode(title)
|
||||
|
||||
section_title = title
|
||||
title = base_title + " > " + title
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
if ignore_options:
|
||||
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
return dig_tree(oc, items, MetadataMenu,
|
||||
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": "section",
|
||||
"previous_rating_key": rating_key})
|
||||
|
||||
|
||||
@route(PREFIX + '/section/firstLetter', deeper=bool)
|
||||
def SectionFirstLetterMenu(rating_key, title=None, base_title=None, section_title=None, ignore_options=True,
|
||||
section_items_key="all"):
|
||||
"""
|
||||
displays the contents of a section indexed by its first char (A-Z, 0-9...)
|
||||
:param ignore_options: ignored
|
||||
:param section_items_key: ignored
|
||||
:param rating_key:
|
||||
:param title:
|
||||
:param base_title:
|
||||
:param section_title:
|
||||
:return:
|
||||
"""
|
||||
from menu import FirstLetterMetadataMenu
|
||||
items = get_all_items(key="first_character", value=rating_key, base="library/sections")
|
||||
|
||||
kind, deeper = get_items_info(items)
|
||||
|
||||
title = unicode(title)
|
||||
oc = SubFolderObjectContainer(title2=section_title, no_cache=True, no_history=True)
|
||||
title = base_title + " > " + title
|
||||
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SectionMenu, title="All", base_title=title, rating_key=rating_key, ignore_options=False),
|
||||
title="All"
|
||||
)
|
||||
)
|
||||
return dig_tree(oc, items, FirstLetterMetadataMenu, force_rating_key=rating_key, fill_args={"key": "key"},
|
||||
pass_kwargs={"base_title": title, "display_items": deeper, "previous_rating_key": rating_key})
|
||||
+72
-785
@@ -1,27 +1,23 @@
|
||||
# coding=utf-8
|
||||
import locale
|
||||
import logging
|
||||
import datetime
|
||||
import logger
|
||||
import os
|
||||
import StringIO
|
||||
import glob
|
||||
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
import logger
|
||||
|
||||
from item_details import ItemDetailsMenu
|
||||
from refresh_item import RefreshItem
|
||||
from menu_helpers import add_ignore_options, dig_tree, set_refresh_menu_state, \
|
||||
should_display_ignore, enable_channel_wrapper, default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, \
|
||||
ZipObject
|
||||
from subzero.constants import TITLE, ART, ICON, PREFIX, PLUGIN_IDENTIFIER, DEPENDENCY_MODULE_NAMES
|
||||
from support.background import scheduler
|
||||
should_display_ignore, enable_channel_wrapper, default_thumb, debounce, ObjectContainer, SubFolderObjectContainer
|
||||
from main import fatality, IgnoreMenu
|
||||
from advanced import DispatchRestart
|
||||
from subzero.constants import ART, PREFIX, DEPENDENCY_MODULE_NAMES
|
||||
from support.scheduler import scheduler
|
||||
from support.config import config
|
||||
from support.helpers import pad_title, timestamp, get_language, df, cast_bool
|
||||
from support.helpers import timestamp, df
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_item, get_on_deck_items, refresh_item, get_all_items, get_items_info, \
|
||||
get_item_thumb, get_item_kind_from_rating_key
|
||||
from support.lib import Plex
|
||||
from subzero.lib.io import FileIO
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.storage import reset_storage, log_storage, get_subtitle_storage
|
||||
from support.items import get_all_items, get_items_info, \
|
||||
get_item_kind_from_rating_key, get_item
|
||||
|
||||
# init GUI
|
||||
ObjectContainer.art = R(ART)
|
||||
@@ -35,412 +31,6 @@ route = enable_channel_wrapper(route)
|
||||
# noinspection PyUnboundLocalVariable
|
||||
handler = enable_channel_wrapper(handler)
|
||||
|
||||
main_icon = ICON if not config.is_development else "icon-dev.jpg"
|
||||
|
||||
|
||||
@handler(PREFIX, TITLE if not config.is_development else TITLE + " DEV", art=ART, thumb=main_icon)
|
||||
@route(PREFIX)
|
||||
def fatality(randomize=None, force_title=None, header=None, message=None, only_refresh=False, no_history=False,
|
||||
replace_parent=False):
|
||||
"""
|
||||
subzero main menu
|
||||
"""
|
||||
title = config.full_version # force_title if force_title is not None else config.full_version
|
||||
oc = ObjectContainer(title1=title, title2=title, header=unicode(header) if header else title, message=message,
|
||||
no_history=no_history,
|
||||
replace_parent=replace_parent, no_cache=True)
|
||||
|
||||
# always re-check permissions
|
||||
config.refresh_permissions_status()
|
||||
|
||||
# always re-check enabled sections
|
||||
config.refresh_enabled_sections()
|
||||
|
||||
if config.lock_menu and not config.pin_correct:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp()),
|
||||
title=pad_title("Enter PIN"),
|
||||
summary="The owner has restricted the access to this menu. Please enter the correct pin",
|
||||
))
|
||||
return oc
|
||||
|
||||
if not config.permissions_ok and config.missing_permissions:
|
||||
for title, path in config.missing_permissions:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, randomize=timestamp()),
|
||||
title=pad_title("Insufficient permissions"),
|
||||
summary="Insufficient permissions on library %s, folder: %s" % (title, path),
|
||||
))
|
||||
return oc
|
||||
|
||||
if not config.enabled_sections:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, randomize=timestamp()),
|
||||
title=pad_title("I'm not enabled!"),
|
||||
summary="Please enable me for some of your libraries in your server settings; currently I do nothing",
|
||||
))
|
||||
return oc
|
||||
|
||||
if not only_refresh:
|
||||
if Dict["current_refresh_state"]:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("Working ... refresh here"),
|
||||
summary="Current state: %s; Last state: %s" % (
|
||||
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
|
||||
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
|
||||
)
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(OnDeckMenu),
|
||||
title="On Deck items",
|
||||
summary="Shows the current on deck items and allows you to individually (force-) refresh their metadata/"
|
||||
"subtitles.",
|
||||
thumb=R("icon-ondeck.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentlyAddedMenu),
|
||||
title="Recently Added items",
|
||||
summary="Shows the recently added items per section.",
|
||||
thumb=R("icon-recent.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, randomize=timestamp()),
|
||||
title="Items with missing subtitles",
|
||||
summary="Shows the items honoring the configured 'Item age to be considered recent'-setting (%s)"
|
||||
" and allowing you to individually (force-) refresh their metadata/subtitles. " %
|
||||
Prefs["scheduler.item_is_recent_age"],
|
||||
thumb=R("icon-missing.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SectionsMenu),
|
||||
title="Browse all items",
|
||||
summary="Go through your whole library and manage your ignore list. You can also "
|
||||
"(force-) refresh the metadata/subtitles of individual items.",
|
||||
thumb=R("icon-browse.jpg")
|
||||
))
|
||||
|
||||
task_name = "SearchAllRecentlyAddedMissing"
|
||||
task = scheduler.task(task_name)
|
||||
|
||||
if task.ready_for_display:
|
||||
task_state = "Running: %s/%s (%s%%)" % (len(task.items_done), len(task.items_searching), task.percentage)
|
||||
else:
|
||||
task_state = "Last scheduler run: %s; Next scheduled run: %s; Last runtime: %s" % (
|
||||
df(scheduler.last_run(task_name)) or "never",
|
||||
df(scheduler.next_run(task_name)) or "never",
|
||||
str(task.last_run_time).split(".")[0])
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshMissing, randomize=timestamp()),
|
||||
title="Search for missing subtitles (in recently-added items, max-age: %s)" % Prefs[
|
||||
"scheduler.item_is_recent_age"],
|
||||
summary="Automatically run periodically by the scheduler, if configured. %s" % task_state,
|
||||
thumb=R("icon-search.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(IgnoreListMenu),
|
||||
title="Display ignore list (%d)" % len(ignore_list),
|
||||
summary="Show the current ignore list (mainly used for the automatic tasks)",
|
||||
thumb=R("icon-ignore.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(HistoryMenu),
|
||||
title="History",
|
||||
summary="Show the last %i downloaded subtitles" % int(Prefs["history_size"]),
|
||||
thumb=R("icon-history.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("Refresh"),
|
||||
summary="Current state: %s; Last state: %s" % (
|
||||
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
|
||||
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
|
||||
),
|
||||
thumb=R("icon-refresh.jpg")
|
||||
))
|
||||
|
||||
# add re-lock after pin unlock
|
||||
if config.pin:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ClearPin, randomize=timestamp()),
|
||||
title=pad_title("Re-lock menu(s)"),
|
||||
summary="Enabled the PIN again for menu(s)"
|
||||
))
|
||||
|
||||
if not only_refresh:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(AdvancedMenu),
|
||||
title=pad_title("Advanced functions"),
|
||||
summary="Use at your own risk",
|
||||
thumb=R("icon-advanced.jpg")
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/pin')
|
||||
def PinMenu(pin="", randomize=None, success_go_to="channel"):
|
||||
oc = ObjectContainer(title2="Enter PIN number %s" % (len(pin) + 1), no_cache=True, no_history=True,
|
||||
skip_pin_lock=True)
|
||||
|
||||
if pin == config.pin:
|
||||
Dict["pin_correct_time"] = datetime.datetime.now()
|
||||
config.locked = False
|
||||
if success_go_to == "channel":
|
||||
return fatality(force_title="PIN correct", header="PIN correct", no_history=True)
|
||||
elif success_go_to == "advanced":
|
||||
return AdvancedMenu(randomize=timestamp())
|
||||
|
||||
for i in range(10):
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), pin=pin + str(i), success_go_to=success_go_to),
|
||||
title=pad_title(str(i)),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), success_go_to=success_go_to),
|
||||
title=pad_title("Reset"),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/pin_lock')
|
||||
def ClearPin(randomize=None):
|
||||
Dict["pin_correct_time"] = None
|
||||
config.locked = True
|
||||
return fatality(force_title="Menu locked", header=" ", no_history=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/on_deck')
|
||||
def OnDeckMenu(message=None):
|
||||
"""
|
||||
displays the items on deck
|
||||
:param message:
|
||||
:return:
|
||||
"""
|
||||
return mergedItemsMenu(title="Items On Deck", base_title="Items On Deck", itemGetter=get_on_deck_items)
|
||||
|
||||
|
||||
@route(PREFIX + '/recently_added')
|
||||
def RecentlyAddedMenu(message=None):
|
||||
"""
|
||||
displays the items recently added per section
|
||||
:param message:
|
||||
:return:
|
||||
"""
|
||||
return SectionsMenu(base_title="Recently added", section_items_key="recently_added", ignore_options=False)
|
||||
|
||||
|
||||
@route(PREFIX + '/recent', force=bool)
|
||||
@debounce
|
||||
def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
title = "Items with missing subtitles"
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
|
||||
running = scheduler.is_task_running("MissingSubtitles")
|
||||
task_data = scheduler.get_task_data("MissingSubtitles")
|
||||
missing_items = task_data["missing_subtitles"] if task_data else None
|
||||
|
||||
if ((missing_items is None) or force) and not running:
|
||||
scheduler.dispatch_task("MissingSubtitles")
|
||||
running = True
|
||||
|
||||
if not running:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, force=True, randomize=timestamp()),
|
||||
title=u"Get items with missing subtitles",
|
||||
thumb=default_thumb
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, force=False, randomize=timestamp()),
|
||||
title=u"Updating, refresh here ...",
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
if missing_items is not None:
|
||||
for added_at, item_id, item_title, item, missing_languages in missing_items:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, title=title + " > " + item_title, item_title=item_title,
|
||||
rating_key=item_id),
|
||||
title=item_title,
|
||||
summary="Missing: %s" % ", ".join(l.name for l in missing_languages),
|
||||
thumb=get_item_thumb(item) or default_thumb
|
||||
))
|
||||
|
||||
scheduler.clear_task_data("MissingSubtitles")
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
def mergedItemsMenu(title, itemGetter, itemGetterKwArgs=None, base_title=None, *args, **kwargs):
|
||||
"""
|
||||
displays an item list of dynamic kinds of items
|
||||
:param title:
|
||||
:param itemGetter:
|
||||
:param itemGetterKwArgs:
|
||||
:param base_title:
|
||||
:param args:
|
||||
:param kwargs:
|
||||
:return:
|
||||
"""
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
items = itemGetter(*args, **kwargs)
|
||||
|
||||
for kind, title, item_id, deeper, item in items:
|
||||
oc.add(DirectoryObject(
|
||||
title=title,
|
||||
key=Callback(ItemDetailsMenu, title=base_title + " > " + title, item_title=title, rating_key=item_id),
|
||||
thumb=get_item_thumb(item) or default_thumb
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
def determine_section_display(kind, item, pass_kwargs=None):
|
||||
"""
|
||||
returns the menu function for a section based on the size of it (amount of items)
|
||||
:param kind:
|
||||
:param item:
|
||||
:return:
|
||||
"""
|
||||
if pass_kwargs and pass_kwargs.get("section_items_key", "all") != "all":
|
||||
return SectionMenu
|
||||
if item.size > 80:
|
||||
return SectionFirstLetterMenu
|
||||
return SectionMenu
|
||||
|
||||
|
||||
@route(PREFIX + '/ignore/set/{kind}/{rating_key}/{todo}/sure={sure}', kind=str, rating_key=str, todo=str, sure=bool)
|
||||
def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
|
||||
"""
|
||||
displays the ignore options for a menu
|
||||
:param kind:
|
||||
:param rating_key:
|
||||
:param title:
|
||||
:param sure:
|
||||
:param todo:
|
||||
:return:
|
||||
"""
|
||||
is_ignored = rating_key in ignore_list[kind]
|
||||
if not sure:
|
||||
oc = SubFolderObjectContainer(no_history=True, replace_parent=True, title1="%s %s %s %s the ignore list" % (
|
||||
"Add" if not is_ignored else "Remove", ignore_list.verbose(kind), title,
|
||||
"to" if not is_ignored else "from"), title2="Are you sure?")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(IgnoreMenu, kind=kind, rating_key=rating_key, title=title, sure=True,
|
||||
todo="add" if not is_ignored else "remove"),
|
||||
title=pad_title("Are you sure?"),
|
||||
))
|
||||
return oc
|
||||
|
||||
rel = ignore_list[kind]
|
||||
dont_change = False
|
||||
if todo == "remove":
|
||||
if not is_ignored:
|
||||
dont_change = True
|
||||
else:
|
||||
rel.remove(rating_key)
|
||||
Log.Info("Removed %s (%s) from the ignore list", title, rating_key)
|
||||
ignore_list.remove_title(kind, rating_key)
|
||||
ignore_list.save()
|
||||
state = "removed from"
|
||||
elif todo == "add":
|
||||
if is_ignored:
|
||||
dont_change = True
|
||||
else:
|
||||
rel.append(rating_key)
|
||||
Log.Info("Added %s (%s) to the ignore list", title, rating_key)
|
||||
ignore_list.add_title(kind, rating_key, title)
|
||||
ignore_list.save()
|
||||
state = "added to"
|
||||
else:
|
||||
dont_change = True
|
||||
|
||||
if dont_change:
|
||||
return fatality(force_title=" ", header="Didn't change the ignore list", no_history=True)
|
||||
|
||||
return fatality(force_title=" ", header="%s %s the ignore list" % (title, state), no_history=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/sections')
|
||||
def SectionsMenu(base_title="Sections", section_items_key="all", ignore_options=True):
|
||||
"""
|
||||
displays the menu for all sections
|
||||
:return:
|
||||
"""
|
||||
items = get_all_items("sections")
|
||||
|
||||
return dig_tree(SubFolderObjectContainer(title2="Sections", no_cache=True, no_history=True), items, None,
|
||||
menu_determination_callback=determine_section_display, pass_kwargs={"base_title": base_title,
|
||||
"section_items_key": section_items_key,
|
||||
"ignore_options": ignore_options},
|
||||
fill_args={"title": "section_title"})
|
||||
|
||||
|
||||
@route(PREFIX + '/section', ignore_options=bool)
|
||||
def SectionMenu(rating_key, title=None, base_title=None, section_title=None, ignore_options=True,
|
||||
section_items_key="all"):
|
||||
"""
|
||||
displays the contents of a section
|
||||
:param section_items_key:
|
||||
:param rating_key:
|
||||
:param title:
|
||||
:param base_title:
|
||||
:param section_title:
|
||||
:param ignore_options:
|
||||
:return:
|
||||
"""
|
||||
items = get_all_items(key=section_items_key, value=rating_key, base="library/sections")
|
||||
|
||||
kind, deeper = get_items_info(items)
|
||||
title = unicode(title)
|
||||
|
||||
section_title = title
|
||||
title = base_title + " > " + title
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
if ignore_options:
|
||||
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
return dig_tree(oc, items, MetadataMenu,
|
||||
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": "section",
|
||||
"previous_rating_key": rating_key})
|
||||
|
||||
|
||||
@route(PREFIX + '/section/firstLetter', deeper=bool)
|
||||
def SectionFirstLetterMenu(rating_key, title=None, base_title=None, section_title=None, ignore_options=True,
|
||||
section_items_key="all"):
|
||||
"""
|
||||
displays the contents of a section indexed by its first char (A-Z, 0-9...)
|
||||
:param ignore_options: ignored
|
||||
:param section_items_key: ignored
|
||||
:param rating_key:
|
||||
:param title:
|
||||
:param base_title:
|
||||
:param section_title:
|
||||
:return:
|
||||
"""
|
||||
items = get_all_items(key="first_character", value=rating_key, base="library/sections")
|
||||
|
||||
kind, deeper = get_items_info(items)
|
||||
|
||||
title = unicode(title)
|
||||
oc = SubFolderObjectContainer(title2=section_title, no_cache=True, no_history=True)
|
||||
title = base_title + " > " + title
|
||||
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SectionMenu, title="All", base_title=title, rating_key=rating_key, ignore_options=False),
|
||||
title="All"
|
||||
)
|
||||
)
|
||||
return dig_tree(oc, items, FirstLetterMetadataMenu, force_rating_key=rating_key, fill_args={"key": "key"},
|
||||
pass_kwargs={"base_title": title, "display_items": deeper, "previous_rating_key": rating_key})
|
||||
|
||||
|
||||
@route(PREFIX + '/section/firstLetter/key', deeper=bool)
|
||||
def FirstLetterMetadataMenu(rating_key, key, title=None, base_title=None, display_items=False, previous_item_type=None,
|
||||
@@ -466,7 +56,7 @@ def FirstLetterMetadataMenu(rating_key, key, title=None, base_title=None, displa
|
||||
|
||||
@route(PREFIX + '/section/contents', display_items=bool)
|
||||
def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, previous_item_type=None,
|
||||
previous_rating_key=None):
|
||||
previous_rating_key=None, randomize=None):
|
||||
"""
|
||||
displays the contents of a section based on whether it has a deeper tree or not (movies->movie (item) list; series->series list)
|
||||
:param rating_key:
|
||||
@@ -485,6 +75,22 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
current_kind = get_item_kind_from_rating_key(rating_key)
|
||||
|
||||
if display_items:
|
||||
timeout = 30
|
||||
|
||||
# add back to series for season
|
||||
if current_kind == "season":
|
||||
timeout = 360
|
||||
|
||||
show = get_item(previous_rating_key)
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(MetadataMenu, rating_key=show.rating_key, title=show.title, base_title=show.section.title,
|
||||
previous_item_type="section", display_items=True, randomize=timestamp()),
|
||||
title=u"< Back to %s" % show.title,
|
||||
thumb=show.thumb or default_thumb
|
||||
))
|
||||
elif current_kind == "series":
|
||||
timeout = 1800
|
||||
|
||||
items = get_all_items(key="children", value=rating_key, base="library/metadata")
|
||||
kind, deeper = get_items_info(items)
|
||||
dig_tree(oc, items, MetadataMenu,
|
||||
@@ -494,12 +100,6 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
if should_display_ignore(items, previous=previous_item_type):
|
||||
add_ignore_options(oc, "series", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
timeout = 30
|
||||
if current_kind == "season":
|
||||
timeout = 360
|
||||
elif current_kind == "series":
|
||||
timeout = 1800
|
||||
|
||||
# add refresh
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=title, refresh_kind=current_kind,
|
||||
@@ -549,210 +149,6 @@ def HistoryMenu():
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}/actions')
|
||||
@debounce
|
||||
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None):
|
||||
"""
|
||||
displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
|
||||
:param rating_key:
|
||||
:param title:
|
||||
:param base_title:
|
||||
:param item_title:
|
||||
:param randomize:
|
||||
:return:
|
||||
"""
|
||||
title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
|
||||
item = get_item(rating_key)
|
||||
current_kind = get_item_kind_from_rating_key(rating_key)
|
||||
|
||||
timeout = 30
|
||||
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, randomize=timestamp(),
|
||||
timeout=timeout * 1000),
|
||||
title=u"Refresh: %s" % item_title,
|
||||
summary="Refreshes the %s, possibly searching for missing and picking up new subtitles on disk" % current_kind,
|
||||
thumb=item.thumb or default_thumb
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(),
|
||||
timeout=timeout * 1000),
|
||||
title=u"Auto-search: %s" % item_title,
|
||||
summary="Issues a forced refresh, ignoring known subtitles and searching for new ones",
|
||||
thumb=item.thumb or default_thumb
|
||||
))
|
||||
|
||||
# get stored subtitle info for item id
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
|
||||
# get the plex item
|
||||
plex_item = list(Plex["library"].metadata(rating_key))[0]
|
||||
|
||||
# get current media info for that item
|
||||
media = plex_item.media
|
||||
|
||||
# look for subtitles for all available media parts and all of their languages
|
||||
for part in media.parts:
|
||||
filename = os.path.basename(part.file)
|
||||
part_id = str(part.id)
|
||||
|
||||
# iterate through all configured languages
|
||||
for lang in config.lang_list:
|
||||
lang_a2 = lang.alpha2
|
||||
# ietf lang?
|
||||
if cast_bool(Prefs["subtitles.language.ietf"]) and "-" in lang_a2:
|
||||
lang_a2 = lang_a2.split("-")[0]
|
||||
|
||||
# get corresponding stored subtitle data for that media part (physical media item), for language
|
||||
current_sub = stored_subs.get_any(part_id, lang_a2)
|
||||
current_sub_id = None
|
||||
current_sub_provider_name = None
|
||||
|
||||
summary = u"No current subtitle in storage"
|
||||
current_score = None
|
||||
if current_sub:
|
||||
current_sub_id = current_sub.id
|
||||
current_sub_provider_name = current_sub.provider_name
|
||||
current_score = current_sub.score
|
||||
|
||||
summary = u"Current subtitle: %s (added: %s, %s), Language: %s, Score: %i, Storage: %s" % \
|
||||
(current_sub.provider_name, df(current_sub.date_added), current_sub.mode_verbose, lang,
|
||||
current_sub.score, current_sub.storage_type)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"List %s subtitles" % lang.name,
|
||||
summary=summary
|
||||
))
|
||||
|
||||
add_ignore_options(oc, "videos", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
def get_item_task_data(task_name, rating_key, language):
|
||||
task_data = scheduler.get_task_data(task_name)
|
||||
search_results = task_data.get(rating_key, {}) if task_data else {}
|
||||
return search_results.get(language)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/search/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item_title=None, filename=None,
|
||||
item_type="episode", language=None, force=False, current_id=None, current_data=None,
|
||||
current_provider=None, current_score=None, randomize=None):
|
||||
assert rating_key, part_id
|
||||
|
||||
running = scheduler.is_task_running("AvailableSubsForItem")
|
||||
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
|
||||
|
||||
if (search_results is None or force) and not running:
|
||||
scheduler.dispatch_task("AvailableSubsForItem", rating_key=rating_key, item_type=item_type, part_id=part_id,
|
||||
language=language)
|
||||
running = True
|
||||
|
||||
oc = SubFolderObjectContainer(title2=unicode(title), replace_parent=True)
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=rating_key, item_title=item_title, title=title, randomize=timestamp()),
|
||||
title=u"Back to: %s" % title,
|
||||
summary=current_data,
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
return oc
|
||||
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
video_display_data = [video.format] if video.format else []
|
||||
if video.release_group:
|
||||
video_display_data.append(u"by %s" % video.release_group)
|
||||
video_display_data = " ".join(video_display_data)
|
||||
|
||||
current_display = (u"Current: %s (%s) " % (current_provider, current_score) if current_provider else "")
|
||||
if not running:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title, language=language,
|
||||
filename=filename, part_id=part_id, title=title, current_id=current_id, force=True,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
current_data=current_data, item_type=item_type, randomize=timestamp()),
|
||||
title=u"Search for %s subs (%s)" % (get_language(language).name, video_display_data),
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
|
||||
language=language, filename=filename, current_data=current_data,
|
||||
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
randomize=timestamp()),
|
||||
title=u"Searching for %s subs (%s), refresh here ..." % (get_language(language).name, video_display_data),
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
if not search_results:
|
||||
return oc
|
||||
|
||||
for subtitle in search_results:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerDownloadSubtitle, rating_key=rating_key, randomize=timestamp(), item_title=item_title,
|
||||
subtitle_id=str(subtitle.id), language=language),
|
||||
title=u"%s: %s, score: %s" % ("Available" if current_id != subtitle.id else "Current",
|
||||
subtitle.provider_name, subtitle.score),
|
||||
summary=u"Release: %s, Matches: %s" % (subtitle.release_info, ", ".join(subtitle.matches)),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/download_subtitle/{rating_key}')
|
||||
@debounce
|
||||
def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None, language=None, randomize=None):
|
||||
set_refresh_menu_state("Downloading subtitle for %s" % item_title or rating_key)
|
||||
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
|
||||
|
||||
download_subtitle = None
|
||||
for subtitle in search_results:
|
||||
if str(subtitle.id) == subtitle_id:
|
||||
download_subtitle = subtitle
|
||||
break
|
||||
if not download_subtitle:
|
||||
Log.Error(u"Something went horribly wrong")
|
||||
|
||||
else:
|
||||
scheduler.dispatch_task("DownloadSubtitleForItem", rating_key=rating_key, subtitle=download_subtitle)
|
||||
|
||||
return fatality(randomize=timestamp(), header=" ", replace_parent=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}')
|
||||
@debounce
|
||||
def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=False, refresh_kind=None,
|
||||
previous_rating_key=None, timeout=8000, randomize=None, trigger=True):
|
||||
assert rating_key
|
||||
header = " "
|
||||
if trigger:
|
||||
set_refresh_menu_state(u"Triggering %sRefresh for %s" % ("Force-" if force else "", item_title))
|
||||
Thread.Create(refresh_item, rating_key=rating_key, force=force, refresh_kind=refresh_kind,
|
||||
parent_rating_key=previous_rating_key, timeout=int(timeout))
|
||||
|
||||
header = u"%s of item %s triggered" % ("Refresh" if not force else "Forced-refresh", rating_key)
|
||||
return fatality(randomize=timestamp(), header=header, replace_parent=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/missing/refresh')
|
||||
@debounce
|
||||
def RefreshMissing(randomize=None):
|
||||
@@ -761,60 +157,9 @@ def RefreshMissing(randomize=None):
|
||||
return fatality(header=header, replace_parent=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/advanced')
|
||||
def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
oc = SubFolderObjectContainer(header=header or "Internal stuff, pay attention!", message=message, no_cache=True,
|
||||
no_history=True,
|
||||
replace_parent=False, title2="Advanced")
|
||||
|
||||
if config.lock_advanced_menu and not config.pin_correct:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), success_go_to="advanced"),
|
||||
title=pad_title("Enter PIN"),
|
||||
summary="The owner has restricted the access to this menu. Please enter the correct pin",
|
||||
))
|
||||
return oc
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerRestart, randomize=timestamp()),
|
||||
title=pad_title("Restart the plugin"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(GetLogsLink),
|
||||
title="Get my logs (copy the appearing link and open it in your browser, please)",
|
||||
summary="Copy the appearing link and open it in your browser, please",
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerBetterSubtitles, randomize=timestamp()),
|
||||
title=pad_title("Trigger find better subtitles"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key="tasks", randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's scheduled tasks state storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key="ignore", randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's internal ignorelist storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="tasks", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's scheduled tasks state storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="ignore", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's internal ignorelist storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(InvalidateCache, randomize=timestamp()),
|
||||
title=pad_title("Invalidate Sub-Zero metadata caches (subliminal)"),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/ValidatePrefs', enforce_route=True)
|
||||
def ValidatePrefs():
|
||||
Core.log.setLevel(logging.DEBUG)
|
||||
Log.Debug("Validate Prefs called.")
|
||||
|
||||
# cache the channel state
|
||||
update_dict = False
|
||||
@@ -849,109 +194,51 @@ def ValidatePrefs():
|
||||
Core.log.removeHandler(logger.console_handler)
|
||||
Log.Debug("Stop logging to console")
|
||||
|
||||
Log.Debug("Validate Prefs called.")
|
||||
|
||||
# SZ config debug
|
||||
Log.Debug("--- SZ Config-Debug ---")
|
||||
for attr in [
|
||||
"app_support_path", "data_path", "data_items_path", "enable_agent",
|
||||
"enable_channel", "permissions_ok", "missing_permissions", "fs_encoding",
|
||||
"subtitle_destination_folder", "dbm_supported", "lang_list"]:
|
||||
Log.Debug("config.%s: %s", attr, getattr(config, attr))
|
||||
|
||||
for attr in ["plugin_log_path", "server_log_path"]:
|
||||
value = getattr(config, attr)
|
||||
access = os.access(value, os.R_OK)
|
||||
if Core.runtime.os == "Windows":
|
||||
try:
|
||||
f = open(value, "r")
|
||||
f.read(1)
|
||||
f.close()
|
||||
except:
|
||||
access = False
|
||||
|
||||
Log.Debug("config.%s: %s (accessible: %s)", attr, value, access)
|
||||
|
||||
for attr in [
|
||||
"subtitles.save.filesystem", ]:
|
||||
Log.Debug("Pref.%s: %s", attr, Prefs[attr])
|
||||
|
||||
# fixme: check existance of and os access of logs
|
||||
Log.Debug("Platform: %s", Core.runtime.platform)
|
||||
Log.Debug("OS: %s", Core.runtime.os)
|
||||
Log.Debug("----- Environment -----")
|
||||
for key, value in os.environ.iteritems():
|
||||
if key.startswith("PLEX") or key.startswith("SZ_"):
|
||||
if "TOKEN" in key:
|
||||
outval = "xxxxxxxxxxxxxxxxxxx"
|
||||
|
||||
else:
|
||||
outval = value
|
||||
Log.Debug("%s: %s", key, outval)
|
||||
Log.Debug("Locale: %s", locale.getdefaultlocale())
|
||||
Log.Debug("-----------------------")
|
||||
|
||||
Log.Debug("Setting log-level to %s", Prefs["log_level"])
|
||||
logger.register_logging_handler(DEPENDENCY_MODULE_NAMES, level=Prefs["log_level"])
|
||||
Core.log.setLevel(logging.getLevelName(Prefs["log_level"]))
|
||||
os.environ['U1pfT01EQl9LRVk'] = '789CF30DAC2C8B0AF433F5C9AD34290A712DF30D7135F12D0FB3E502006FDE081E'
|
||||
|
||||
return
|
||||
|
||||
|
||||
def DispatchRestart():
|
||||
Thread.CreateTimer(1.0, Restart)
|
||||
|
||||
|
||||
@route(PREFIX + '/advanced/restart/trigger')
|
||||
@debounce
|
||||
def TriggerRestart(randomize=None):
|
||||
set_refresh_menu_state("Restarting the plugin")
|
||||
DispatchRestart()
|
||||
return fatality(header="Restart triggered, please wait about 5 seconds", force_title=" ", only_refresh=True,
|
||||
replace_parent=True,
|
||||
no_history=True, randomize=timestamp())
|
||||
|
||||
|
||||
@route(PREFIX + '/advanced/restart/execute')
|
||||
def Restart():
|
||||
Plex[":/plugins"].restart(PLUGIN_IDENTIFIER)
|
||||
|
||||
|
||||
@route(PREFIX + '/storage/reset', sure=bool)
|
||||
def ResetStorage(key, randomize=None, sure=False):
|
||||
if not sure:
|
||||
oc = SubFolderObjectContainer(no_history=True, title1="Reset subtitle storage", title2="Are you sure?")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key=key, sure=True, randomize=timestamp()),
|
||||
title=pad_title("Are you really sure?"),
|
||||
|
||||
))
|
||||
return oc
|
||||
|
||||
reset_storage(key)
|
||||
|
||||
if key == "tasks":
|
||||
# reinitialize the scheduler
|
||||
scheduler.init_storage()
|
||||
scheduler.setup_tasks()
|
||||
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Information Storage (%s) reset' % key
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/storage/log')
|
||||
def LogStorage(key, randomize=None):
|
||||
log_storage(key)
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Information Storage (%s) logged' % key
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/triggerbetter')
|
||||
def TriggerBetterSubtitles(randomize=None):
|
||||
scheduler.dispatch_task("FindBetterSubtitles")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='FindBetterSubtitles triggered'
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/get_logs_link')
|
||||
def GetLogsLink():
|
||||
ip = Core.networking.http_request("http://www.plexapp.com/ip.php", cacheTime=7200).content.strip()
|
||||
logs_link = "http://%s:32400%s?X-Plex-Token=%s" % (ip, PREFIX + '/logs', config.universal_plex_token)
|
||||
oc = ObjectContainer(title2="Download Logs", no_cache=True, no_history=True,
|
||||
header="Copy this link and open this in your browser, please",
|
||||
message=logs_link)
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/logs')
|
||||
def DownloadLogs():
|
||||
buff = StringIO.StringIO()
|
||||
zip_archive = ZipFile(buff, mode='w', compression=ZIP_DEFLATED)
|
||||
|
||||
logs = sorted(glob.glob(config.plugin_log_path + '*')) + [config.server_log_path]
|
||||
for path in logs:
|
||||
data = StringIO.StringIO()
|
||||
data.write(FileIO.read(path))
|
||||
zip_archive.writestr(os.path.basename(path), data.getvalue())
|
||||
|
||||
zip_archive.close()
|
||||
|
||||
return ZipObject(buff.getvalue())
|
||||
|
||||
|
||||
@route(PREFIX + '/invalidatecache')
|
||||
def InvalidateCache(randomize=None):
|
||||
from subliminal.cache import region
|
||||
region.invalidate()
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Cache invalidated'
|
||||
)
|
||||
|
||||
@@ -7,9 +7,11 @@ from support.helpers import get_video_display_title
|
||||
from support.ignore import ignore_list
|
||||
from support.lib import get_intent
|
||||
from support.config import config
|
||||
from subzero.constants import ICON_SUB
|
||||
from subzero.constants import ICON_SUB, ICON
|
||||
from support.scheduler import scheduler
|
||||
|
||||
default_thumb = R(ICON_SUB)
|
||||
main_icon = ICON if not config.is_development else "icon-dev.jpg"
|
||||
|
||||
|
||||
def should_display_ignore(items, previous=None):
|
||||
@@ -41,8 +43,8 @@ def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(callback_menu, kind=use_kind, rating_key=rating_key, title=title),
|
||||
title=u"%s %s \"%s\" %s the ignore list" % (
|
||||
"Remove" if in_list else "Add", ignore_list.verbose(kind) if add_kind else "", unicode(title), "from" if in_list else "to")
|
||||
title=u"%s %s \"%s\"" % (
|
||||
"Un-Ignore" if in_list else "Ignore", ignore_list.verbose(kind) if add_kind else "", unicode(title))
|
||||
)
|
||||
)
|
||||
|
||||
@@ -104,6 +106,12 @@ def set_refresh_menu_state(state_or_media, media_type="movies"):
|
||||
Dict["current_refresh_state"] = u"%sRefreshing %s" % ("Force-" if force_refresh else "", unicode(title))
|
||||
|
||||
|
||||
def get_item_task_data(task_name, rating_key, language):
|
||||
task_data = scheduler.get_task_data(task_name)
|
||||
search_results = task_data.get(rating_key, {}) if task_data else {}
|
||||
return search_results.get(language)
|
||||
|
||||
|
||||
def enable_channel_wrapper(func):
|
||||
"""
|
||||
returns the original wrapper :func: (route or handler) if applicable, else the plain to-be-wrapped function
|
||||
@@ -140,7 +148,7 @@ def debounce(func):
|
||||
|
||||
def wrap(*args, **kwargs):
|
||||
if "randomize" in kwargs:
|
||||
if not "menu_history" in Dict:
|
||||
if "menu_history" not in Dict:
|
||||
Dict["menu_history"] = {}
|
||||
|
||||
key = get_lookup_key([func] + list(args), kwargs)
|
||||
@@ -148,8 +156,13 @@ def debounce(func):
|
||||
Log.Debug("not triggering %s twice with %s, %s" % (func, args, kwargs))
|
||||
return ObjectContainer()
|
||||
else:
|
||||
Dict["menu_history"][key] = datetime.datetime.now() + datetime.timedelta(days=1)
|
||||
Dict.Save()
|
||||
Dict["menu_history"][key] = datetime.datetime.now() + datetime.timedelta(hours=6)
|
||||
try:
|
||||
Dict.Save()
|
||||
except TypeError:
|
||||
Log.Error("Can't save menu history for: %r", key)
|
||||
del Dict["menu_history"][key]
|
||||
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrap
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
# coding=utf-8
|
||||
|
||||
from subzero.constants import PREFIX
|
||||
from menu_helpers import debounce, set_refresh_menu_state
|
||||
from support.items import refresh_item
|
||||
from support.helpers import timestamp
|
||||
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}')
|
||||
@debounce
|
||||
def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=False, refresh_kind=None,
|
||||
previous_rating_key=None, timeout=8000, randomize=None, trigger=True):
|
||||
assert rating_key
|
||||
from interface.main import fatality
|
||||
header = " "
|
||||
if trigger:
|
||||
set_refresh_menu_state(u"Triggering %sRefresh for %s" % ("Force-" if force else "", item_title))
|
||||
Thread.Create(refresh_item, rating_key=rating_key, force=force, refresh_kind=refresh_kind,
|
||||
parent_rating_key=previous_rating_key, timeout=int(timeout))
|
||||
|
||||
header = u"%s of item %s triggered" % ("Refresh" if not force else "Forced-refresh", rating_key)
|
||||
return fatality(randomize=timestamp(), header=header, replace_parent=True)
|
||||
@@ -0,0 +1,251 @@
|
||||
# coding=utf-8
|
||||
|
||||
import traceback
|
||||
import types
|
||||
|
||||
from babelfish import Language
|
||||
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb
|
||||
from subzero.modification import registry as mod_registry, SubtitleModifications
|
||||
from subzero.constants import PREFIX
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.helpers import timestamp, pad_title
|
||||
from support.items import get_current_sub, set_mods_for_part
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleModificationsMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
current_mods = current_sub.mods or []
|
||||
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
|
||||
from interface.item_details import SubtitleOptionsMenu
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleOptionsMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"< Back to subtitle options for: %s" % kwargs["title"],
|
||||
summary=kwargs["current_data"],
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
for identifier, mod in mod_registry.mods.iteritems():
|
||||
if mod.advanced:
|
||||
continue
|
||||
|
||||
if mod.exclusive and identifier in current_mods:
|
||||
continue
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=identifier, mode="add", randomize=timestamp(), **kwargs),
|
||||
title=pad_title(mod.description), summary=mod.long_description or ""
|
||||
))
|
||||
|
||||
fps_mod = SubtitleModifications.get_mod_class("change_FPS")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleFPSModMenu, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(fps_mod.description), summary=fps_mod.long_description or ""
|
||||
))
|
||||
|
||||
shift_mod = SubtitleModifications.get_mod_class("shift_offset")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(shift_mod.description), summary=shift_mod.long_description or ""
|
||||
))
|
||||
|
||||
color_mod = SubtitleModifications.get_mod_class("color")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleColorModMenu, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(color_mod.description), summary=color_mod.long_description or ""
|
||||
))
|
||||
|
||||
if current_mods:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=None, mode="remove_last", randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Remove last applied mod (%s)" % current_mods[-1]),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleListMods, randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Manage applied mods"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods))
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=None, mode="clear", randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Restore original version"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_mod_fps/{rating_key}/{part_id}', force=bool)
|
||||
def SubtitleFPSModMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
item_type = kwargs["item_type"]
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modification menu"
|
||||
))
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
target_fps = plex_part.fps
|
||||
|
||||
for fps in ["23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
|
||||
if float(fps) == float(target_fps):
|
||||
continue
|
||||
|
||||
if float(fps) > float(target_fps):
|
||||
indicator = "subs constantly getting faster"
|
||||
else:
|
||||
indicator = "subs constantly getting slower"
|
||||
|
||||
mod_ident = SubtitleModifications.get_mod_signature("change_FPS", **{"from": fps, "to": target_fps})
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
|
||||
title="%s fps -> %s fps (%s)" % (fps, target_fps, indicator)
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
POSSIBLE_UNITS = (("ms", "milliseconds"), ("s", "seconds"), ("m", "minutes"), ("h", "hours"))
|
||||
POSSIBLE_UNITS_D = dict(POSSIBLE_UNITS)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_mod_shift_unit/{rating_key}/{part_id}', force=bool)
|
||||
def SubtitleShiftModUnitMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modifications"
|
||||
))
|
||||
|
||||
for unit, title in POSSIBLE_UNITS:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleShiftModMenu, unit=unit, randomize=timestamp(), **kwargs),
|
||||
title="Adjust by %s" % title
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_mod_shift/{rating_key}/{part_id}/{unit}', force=bool)
|
||||
def SubtitleShiftModMenu(unit=None, **kwargs):
|
||||
if unit not in POSSIBLE_UNITS_D:
|
||||
raise NotImplementedError
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to unit selection"
|
||||
))
|
||||
|
||||
rng = []
|
||||
if unit == "h":
|
||||
rng = range(-10, 11)
|
||||
elif unit in ("m", "s"):
|
||||
rng = range(-15, 15)
|
||||
elif unit == "ms":
|
||||
rng = range(-900, 1000, 100)
|
||||
|
||||
for i in rng:
|
||||
if i == 0:
|
||||
continue
|
||||
|
||||
mod_ident = SubtitleModifications.get_mod_signature("shift_offset", **{unit: i})
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
|
||||
title="%s %s" % (("%s" if i < 0 else "+%s") % i, unit)
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_mod_colors/{rating_key}/{part_id}', force=bool)
|
||||
def SubtitleColorModMenu(**kwargs):
|
||||
kwargs.pop("randomize")
|
||||
|
||||
color_mod = SubtitleModifications.get_mod_class("color")
|
||||
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modification menu"
|
||||
))
|
||||
|
||||
for color, code in color_mod.colors.iteritems():
|
||||
mod_ident = SubtitleModifications.get_mod_signature("color", **{"name": color})
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
|
||||
title="%s (%s)" % (color, code)
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_set_mods/{rating_key}/{part_id}/{mods}/{mode}', force=bool)
|
||||
@debounce
|
||||
def SubtitleSetMods(mods=None, mode=None, **kwargs):
|
||||
if not isinstance(mods, types.ListType) and mods:
|
||||
mods = [mods]
|
||||
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
lang_a2 = kwargs["language"]
|
||||
item_type = kwargs["item_type"]
|
||||
|
||||
language = Language.fromietf(lang_a2)
|
||||
|
||||
set_mods_for_part(rating_key, part_id, language, item_type, mods, mode=mode)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_list_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleListMods(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modifications"
|
||||
))
|
||||
|
||||
for identifier in current_sub.mods:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=identifier, mode="remove", randomize=timestamp(), **kwargs),
|
||||
title="Remove: %s" % identifier
|
||||
))
|
||||
|
||||
return oc
|
||||
@@ -18,7 +18,7 @@ sys.modules["support.plex_media"] = plex_media
|
||||
|
||||
import localmedia
|
||||
|
||||
sys.modules["subzero.localmedia"] = localmedia
|
||||
sys.modules["support.localmedia"] = localmedia
|
||||
|
||||
import subtitlehelpers
|
||||
|
||||
@@ -32,9 +32,9 @@ import missing_subtitles
|
||||
|
||||
sys.modules["support.missing_subtitles"] = missing_subtitles
|
||||
|
||||
import background
|
||||
import scheduler
|
||||
|
||||
sys.modules["support.background"] = background
|
||||
sys.modules["support.scheduler"] = scheduler
|
||||
|
||||
import tasks
|
||||
|
||||
@@ -58,3 +58,6 @@ sys.modules["support.data"] = data
|
||||
|
||||
import activities
|
||||
sys.modules["support.activities"] = activities
|
||||
|
||||
import download
|
||||
sys.modules["support.download"] = download
|
||||
|
||||
@@ -11,9 +11,9 @@ class PlexActivityManager(object):
|
||||
def start(self):
|
||||
activity_sources_enabled = None
|
||||
|
||||
if config.universal_plex_token:
|
||||
if config.plex_token:
|
||||
from plex import Plex
|
||||
Plex.configuration.defaults.authentication(config.universal_plex_token)
|
||||
Plex.configuration.defaults.authentication(config.plex_token)
|
||||
activity_sources_enabled = ["websocket"]
|
||||
Activity.on('websocket.playing', self.on_playing)
|
||||
|
||||
@@ -27,9 +27,6 @@ class PlexActivityManager(object):
|
||||
|
||||
@throttle(5, instance_method=True)
|
||||
def on_playing(self, info):
|
||||
if not config.use_activities:
|
||||
return
|
||||
|
||||
# ignore non-playing states and anything too far in
|
||||
if info["state"] != "playing" or info["viewOffset"] > 60000:
|
||||
return
|
||||
@@ -41,13 +38,22 @@ class PlexActivityManager(object):
|
||||
return
|
||||
|
||||
rating_key = info["ratingKey"]
|
||||
if rating_key not in Dict["last_played_items"]:
|
||||
# new playing; store last 10 recently played items
|
||||
if rating_key in Dict["last_played_items"] and rating_key != Dict["last_played_items"][0]:
|
||||
# shift last played
|
||||
Dict["last_played_items"].insert(0,
|
||||
Dict["last_played_items"].pop(Dict["last_played_items"].index(rating_key)))
|
||||
Dict.Save()
|
||||
|
||||
elif rating_key not in Dict["last_played_items"]:
|
||||
# new playing; store last X recently played items
|
||||
Dict["last_played_items"].insert(0, rating_key)
|
||||
Dict["last_played_items"] = Dict["last_played_items"][:10]
|
||||
Dict["last_played_items"] = Dict["last_played_items"][:config.store_recently_played_amount]
|
||||
|
||||
Dict.Save()
|
||||
|
||||
if not config.react_to_activities:
|
||||
return
|
||||
|
||||
debug_msg = "Started playing %s. Refreshing it." % rating_key
|
||||
|
||||
key_to_refresh = None
|
||||
@@ -108,4 +114,5 @@ class PlexActivityManager(object):
|
||||
if ep.index == 1:
|
||||
return ep
|
||||
|
||||
|
||||
activity = PlexActivityManager()
|
||||
|
||||
+117
-15
@@ -3,18 +3,23 @@
|
||||
import os
|
||||
import re
|
||||
import inspect
|
||||
import sys
|
||||
|
||||
import datetime
|
||||
|
||||
import subliminal
|
||||
import subliminal_patch
|
||||
|
||||
from whichdb import whichdb
|
||||
from babelfish import Language
|
||||
from subliminal.cli import MutexLock
|
||||
from subzero.lib.io import FileIO, get_viable_encoding
|
||||
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW
|
||||
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW, MEDIA_TYPE_TO_STRING
|
||||
from lib import Plex
|
||||
from helpers import check_write_permissions, cast_bool
|
||||
|
||||
SUBTITLE_EXTS = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'txt', 'psb']
|
||||
SUBTITLE_EXTS = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'txt', 'psb',
|
||||
'vtt']
|
||||
VIDEO_EXTS = ['3g2', '3gp', 'asf', 'asx', 'avc', 'avi', 'avs', 'bivx', 'bup', 'divx', 'dv', 'dvr-ms', 'evo', 'fli',
|
||||
'flv',
|
||||
'm2t', 'm2ts', 'm2v', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'mts', 'nsv', 'nuv', 'ogm', 'ogv', 'tp',
|
||||
@@ -27,6 +32,8 @@ IGNORE_FN = ("subzero.ignore", ".subzero.ignore", ".nosz")
|
||||
VERSION_RE = re.compile(ur'CFBundleVersion.+?<string>([0-9\.]+)</string>', re.DOTALL)
|
||||
DEV_RE = re.compile(ur'PlexPluginDevMode.+?<string>([01]+)</string>', re.DOTALL)
|
||||
|
||||
impawrt = getattr(sys.modules['__main__'], "__builtins__").get("__import__")
|
||||
|
||||
|
||||
def int_or_default(s, default):
|
||||
try:
|
||||
@@ -45,7 +52,9 @@ class Config(object):
|
||||
data_path = None
|
||||
data_items_path = None
|
||||
universal_plex_token = None
|
||||
plex_token = None
|
||||
is_development = False
|
||||
dbm_supported = False
|
||||
|
||||
enable_channel = True
|
||||
enable_agent = True
|
||||
@@ -56,6 +65,7 @@ class Config(object):
|
||||
pin_valid_minutes = 10
|
||||
lang_list = None
|
||||
subtitle_destination_folder = None
|
||||
subtitle_formats = None
|
||||
providers = None
|
||||
provider_settings = None
|
||||
max_recent_items_per_library = 200
|
||||
@@ -67,14 +77,23 @@ class Config(object):
|
||||
notify_executable = None
|
||||
sections = None
|
||||
enabled_sections = None
|
||||
enforce_encoding = False
|
||||
remove_hi = False
|
||||
fix_ocr = False
|
||||
fix_common = False
|
||||
colors = ""
|
||||
chmod = None
|
||||
forced_only = False
|
||||
exotic_ext = False
|
||||
treat_und_as_first = False
|
||||
ext_match_strictness = False
|
||||
use_activities = False
|
||||
default_mods = None
|
||||
debug_mods = False
|
||||
react_to_activities = False
|
||||
activity_mode = None
|
||||
subtitles_save_to = None
|
||||
no_refresh = False
|
||||
|
||||
store_recently_played_amount = 20
|
||||
|
||||
initialized = False
|
||||
|
||||
@@ -89,6 +108,9 @@ class Config(object):
|
||||
self.data_path = getattr(Data, "_core").storage.data_path
|
||||
self.data_items_path = os.path.join(self.data_path, "DataItems")
|
||||
self.universal_plex_token = self.get_universal_plex_token()
|
||||
self.plex_token = os.environ.get("PLEXTOKEN", self.universal_plex_token)
|
||||
|
||||
os.environ["SZ_USER_AGENT"] = self.get_user_agent()
|
||||
|
||||
self.set_plugin_mode()
|
||||
self.set_plugin_lock()
|
||||
@@ -96,6 +118,8 @@ class Config(object):
|
||||
|
||||
self.lang_list = self.get_lang_list()
|
||||
self.subtitle_destination_folder = self.get_subtitle_destination_folder()
|
||||
self.subtitle_formats = self.get_subtitle_formats()
|
||||
self.forced_only = cast_bool(Prefs["subtitles.only_foreign"])
|
||||
self.providers = self.get_providers()
|
||||
self.provider_settings = self.get_provider_settings()
|
||||
self.max_recent_items_per_library = int_or_default(Prefs["scheduler.max_recent_items_per_library"], 2000)
|
||||
@@ -106,14 +130,59 @@ class Config(object):
|
||||
self.enabled_sections = self.check_enabled_sections()
|
||||
self.permissions_ok = self.check_permissions()
|
||||
self.notify_executable = self.check_notify_executable()
|
||||
self.enforce_encoding = cast_bool(Prefs['subtitles.enforce_encoding'])
|
||||
self.remove_hi = cast_bool(Prefs['subtitles.remove_hi'])
|
||||
self.fix_ocr = cast_bool(Prefs['subtitles.fix_ocr'])
|
||||
self.fix_common = cast_bool(Prefs['subtitles.fix_common'])
|
||||
self.colors = Prefs['subtitles.colors'] if Prefs['subtitles.colors'] != "don't change" else None
|
||||
self.chmod = self.check_chmod()
|
||||
self.forced_only = cast_bool(Prefs["subtitles.only_foreign"])
|
||||
self.exotic_ext = cast_bool(Prefs["subtitles.scan.exotic_ext"])
|
||||
self.treat_und_as_first = cast_bool(Prefs["subtitles.language.treat_und_as_first"])
|
||||
self.ext_match_strictness = self.determine_ext_sub_strictness()
|
||||
self.default_mods = self.get_default_mods()
|
||||
self.debug_mods = cast_bool(Prefs['log_debug_mods'])
|
||||
self.subtitles_save_to = Prefs['subtitles.save.filesystem']
|
||||
self.no_refresh = os.environ.get("SZ_NO_REFRESH", False)
|
||||
self.initialized = True
|
||||
|
||||
def init_cache(self):
|
||||
names = ['dbhash', 'gdbm', 'dbm']
|
||||
dbfn = None
|
||||
self.dbm_supported = False
|
||||
|
||||
# try importing dbm modules
|
||||
if impawrt:
|
||||
for name in names:
|
||||
try:
|
||||
impawrt(name)
|
||||
except:
|
||||
continue
|
||||
if not self.dbm_supported:
|
||||
self.dbm_supported = name
|
||||
break
|
||||
|
||||
if self.dbm_supported:
|
||||
# anydbm checks; try guessing the format and importing the correct module
|
||||
dbfn = os.path.join(config.data_items_path, 'subzero.dbm')
|
||||
db_which = whichdb(dbfn)
|
||||
if db_which is not None and db_which != "":
|
||||
try:
|
||||
impawrt(db_which)
|
||||
except ImportError:
|
||||
self.dbm_supported = False
|
||||
|
||||
if Core.runtime.os != "Windows" and self.dbm_supported:
|
||||
try:
|
||||
subliminal.region.configure('dogpile.cache.dbm', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'filename': dbfn,
|
||||
'lock_factory': MutexLock})
|
||||
Log.Info("Using file based cache!")
|
||||
return
|
||||
except:
|
||||
self.dbm_supported = False
|
||||
|
||||
Log.Warn("Not using file based cache!")
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
|
||||
def set_log_paths(self):
|
||||
# find log handler
|
||||
for handler in Core.log.handlers:
|
||||
@@ -138,7 +207,9 @@ class Config(object):
|
||||
except:
|
||||
Log.Warn("Couldn't determine Plex Token")
|
||||
else:
|
||||
Log("Did NOT find Preferences file - please check logfile and hierarchy. Aborting!")
|
||||
Log("Did NOT find Preferences file - most likely Windows OS. Otherwise please check logfile and hierarchy.")
|
||||
|
||||
# fixme: windows
|
||||
|
||||
def set_plugin_mode(self):
|
||||
if Prefs["plugin_mode"] == "only agent":
|
||||
@@ -213,11 +284,17 @@ class Config(object):
|
||||
return all_permissions_ok
|
||||
|
||||
def get_version(self):
|
||||
return self.get_bare_version() + ("" if not self.is_development else " DEV")
|
||||
|
||||
def get_bare_version(self):
|
||||
result = VERSION_RE.search(self.plugin_info)
|
||||
add = "" if not self.is_development else " DEV"
|
||||
|
||||
if result:
|
||||
return result.group(1) + add
|
||||
return result.group(1)
|
||||
return "2.x.x.x"
|
||||
|
||||
def get_user_agent(self):
|
||||
return "Sub-Zero/%s" % (self.get_bare_version() + ("" if not self.is_development else "-dev"))
|
||||
|
||||
def get_dev_mode(self):
|
||||
dev = DEV_RE.search(self.plugin_info)
|
||||
@@ -270,7 +347,7 @@ class Config(object):
|
||||
self.enabled_sections = self.check_enabled_sections()
|
||||
|
||||
def check_enabled_sections(self):
|
||||
enabled_for_primary_agents = []
|
||||
enabled_for_primary_agents = {"movie": [], "show": []}
|
||||
enabled_sections = {}
|
||||
|
||||
# find which agents we're enabled for
|
||||
@@ -283,11 +360,11 @@ class Config(object):
|
||||
related_agents = Plex.primary_agent(agent.identifier, t.media_type)
|
||||
for a in related_agents:
|
||||
if a.identifier == PLUGIN_IDENTIFIER and a.enabled:
|
||||
enabled_for_primary_agents.append(agent.identifier)
|
||||
enabled_for_primary_agents[MEDIA_TYPE_TO_STRING[t.media_type]].append(agent.identifier)
|
||||
|
||||
# find the libraries that use them
|
||||
for library in self.sections:
|
||||
if library.agent in enabled_for_primary_agents:
|
||||
if library.agent in enabled_for_primary_agents.get(library.type, []):
|
||||
enabled_sections[library.key] = library
|
||||
|
||||
Log.Debug(u"I'm enabled for: %s" % [lib.title for key, lib in enabled_sections.iteritems()])
|
||||
@@ -330,6 +407,15 @@ class Config(object):
|
||||
return fld_custom or (
|
||||
Prefs["subtitles.save.subFolder"] if Prefs["subtitles.save.subFolder"] != "current folder" else None)
|
||||
|
||||
def get_subtitle_formats(self):
|
||||
formats = Prefs["subtitles.save.formats"]
|
||||
out = []
|
||||
if "SRT" in formats:
|
||||
out.append("srt")
|
||||
if "VTT" in formats:
|
||||
out.append("vtt")
|
||||
return out
|
||||
|
||||
def get_providers(self):
|
||||
providers = {'opensubtitles': cast_bool(Prefs['provider.opensubtitles.enabled']),
|
||||
# 'thesubdb': Prefs['provider.thesubdb.enabled'],
|
||||
@@ -343,10 +429,13 @@ class Config(object):
|
||||
}
|
||||
|
||||
# ditch non-forced-subtitles-reporting providers
|
||||
if cast_bool(Prefs['subtitles.only_foreign']):
|
||||
if self.forced_only:
|
||||
providers["addic7ed"] = False
|
||||
providers["tvsubtitles"] = False
|
||||
providers["legendastv"] = False
|
||||
providers["napiprojekt"] = False
|
||||
providers["shooter"] = False
|
||||
providers["subscenter"] = False
|
||||
|
||||
return filter(lambda prov: providers[prov], providers)
|
||||
|
||||
@@ -404,13 +493,26 @@ class Config(object):
|
||||
return "loose"
|
||||
return "strict"
|
||||
|
||||
def get_default_mods(self):
|
||||
mods = []
|
||||
if self.remove_hi:
|
||||
mods.append("remove_HI")
|
||||
if self.fix_ocr:
|
||||
mods.append("OCR_fixes")
|
||||
if self.fix_common:
|
||||
mods.append("common")
|
||||
if self.colors:
|
||||
mods.append("color(name=%s)" % self.colors)
|
||||
|
||||
return mods
|
||||
|
||||
def set_activity_modes(self):
|
||||
val = Prefs["activity.on_playback"]
|
||||
if val == "never":
|
||||
self.use_activities = False
|
||||
self.react_to_activities = False
|
||||
return
|
||||
|
||||
self.use_activities = True
|
||||
self.react_to_activities = True
|
||||
if val == "current media item":
|
||||
self.activity_mode = "refresh"
|
||||
elif val == "hybrid: current item or next episode":
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
# coding=utf-8
|
||||
|
||||
import subliminal_patch as subliminal
|
||||
|
||||
from support.config import config
|
||||
from subtitlehelpers import get_subtitles_from_metadata
|
||||
from subliminal_patch import compute_score
|
||||
|
||||
|
||||
def download_best_subtitles(video_part_map, min_score=0):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
languages = config.lang_list
|
||||
if not languages:
|
||||
return
|
||||
|
||||
missing_languages = False
|
||||
for video, part in video_part_map.iteritems():
|
||||
if not Prefs['subtitles.save.filesystem']:
|
||||
# scan for existing metadata subtitles
|
||||
meta_subs = get_subtitles_from_metadata(part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
if subList:
|
||||
video.subtitle_languages.add(language)
|
||||
Log.Debug("Found metadata subtitle %s for %s", language, video)
|
||||
|
||||
missing_subs = (languages - video.subtitle_languages)
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
|
||||
if not missing_subs or found_one_which_is_enough:
|
||||
if found_one_which_is_enough:
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
continue
|
||||
missing_languages = True
|
||||
break
|
||||
|
||||
if missing_languages:
|
||||
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s" % (min_score, hearing_impaired))
|
||||
|
||||
return subliminal.download_best_subtitles(video_part_map.keys(), languages, min_score, hearing_impaired, providers=config.providers,
|
||||
provider_configs=config.provider_settings, pool_class=config.provider_pool,
|
||||
compute_score=compute_score)
|
||||
Log.Debug("All languages for all requested videos exist. Doing nothing.")
|
||||
@@ -9,15 +9,26 @@ import time
|
||||
import re
|
||||
import platform
|
||||
import subprocess
|
||||
|
||||
from bs4 import UnicodeDammit
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
|
||||
import chardet
|
||||
|
||||
from bs4 import UnicodeDammit
|
||||
from babelfish import Language
|
||||
|
||||
from subzero.analytics import track_event
|
||||
|
||||
mswindows = (sys.platform == "win32")
|
||||
if mswindows:
|
||||
from subprocess import list2cmdline
|
||||
quote_args = list2cmdline
|
||||
else:
|
||||
# POSIX
|
||||
from pipes import quote
|
||||
|
||||
def quote_args(seq):
|
||||
return ' '.join(quote(arg) for arg in seq)
|
||||
|
||||
# Unicode control characters can appear in ID3v2 tags but are not legal in XML.
|
||||
RE_UNICODE_CONTROL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \
|
||||
u'|' + \
|
||||
@@ -30,7 +41,7 @@ RE_UNICODE_CONTROL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])'
|
||||
|
||||
|
||||
def cast_bool(value):
|
||||
return str(value) in ("true", "True")
|
||||
return str(value).strip() in ("true", "True")
|
||||
|
||||
|
||||
# A platform independent way to split paths which might come in with different separators.
|
||||
@@ -110,9 +121,9 @@ def str_pad(s, length, align='left', pad_char=' ', trim=False):
|
||||
raise ValueError("Unknown align type, expected either 'left' or 'right'")
|
||||
|
||||
|
||||
def pad_title(value):
|
||||
def pad_title(value, width=49):
|
||||
"""Pad a title to 30 characters to force the 'details' view."""
|
||||
return str_pad(value, 49, pad_char=' ')
|
||||
return str_pad(value, width, pad_char=' ')
|
||||
|
||||
|
||||
def get_plex_item_display_title(item, kind, parent=None, parent_title=None, section_title=None,
|
||||
@@ -236,13 +247,13 @@ def get_item_hints(data):
|
||||
:param data: video item dict of media_to_videos
|
||||
:return:
|
||||
"""
|
||||
hints = {"title": data["title"], "type": "movie"}
|
||||
hints = {"title": data["original_title"] or data["title"], "type": "movie"}
|
||||
if data["type"] == "episode":
|
||||
hints.update(
|
||||
{
|
||||
"type": "episode",
|
||||
"episode_title": data["title"],
|
||||
"title": data["series"],
|
||||
"title": data["original_title"] or data["series"],
|
||||
}
|
||||
)
|
||||
return hints
|
||||
@@ -256,7 +267,7 @@ def notify_executable(exe_info, videos, subtitles, storage):
|
||||
exe, arguments = exe_info
|
||||
for video, video_subtitles in subtitles.items():
|
||||
for subtitle in video_subtitles:
|
||||
lang = Locale.Language.Match(subtitle.language.alpha2)
|
||||
lang = str(subtitle.language)
|
||||
data = video.plexapi_metadata.copy()
|
||||
data.update({
|
||||
"subtitle_language": lang,
|
||||
@@ -273,9 +284,21 @@ def notify_executable(exe_info, videos, subtitles, storage):
|
||||
prepared_arguments = [arg % prepared_data for arg in arguments]
|
||||
|
||||
Log.Debug(u"Calling %s with arguments: %s" % (exe, prepared_arguments))
|
||||
env = os.environ
|
||||
if not mswindows:
|
||||
env_path = {"PATH": os.pathsep.join(
|
||||
[
|
||||
"/usr/local/bin",
|
||||
"/usr/bin",
|
||||
os.environ.get("PATH", "")
|
||||
]
|
||||
)
|
||||
}
|
||||
env = dict(os.environ, **env_path)
|
||||
|
||||
try:
|
||||
output = subprocess.check_output(subprocess.list2cmdline([exe] + prepared_arguments),
|
||||
stderr=subprocess.STDOUT, shell=True)
|
||||
output = subprocess.check_output(quote_args([exe] + prepared_arguments),
|
||||
stderr=subprocess.STDOUT, shell=True, env=env)
|
||||
except subprocess.CalledProcessError:
|
||||
Log.Error(u"Calling %s failed: %s" % (exe, traceback.format_exc()))
|
||||
else:
|
||||
@@ -286,6 +309,26 @@ def track_usage(category=None, action=None, label=None, value=None):
|
||||
if not cast_bool(Prefs["track_usage"]):
|
||||
return
|
||||
|
||||
if "last_tracked" not in Dict:
|
||||
Dict["last_tracked"] = OrderedDict()
|
||||
Dict.Save()
|
||||
|
||||
event_key = (category, action, label, value)
|
||||
now = datetime.datetime.now()
|
||||
if event_key in Dict["last_tracked"] and (Dict["last_tracked"][event_key] + datetime.timedelta(minutes=30)) < now:
|
||||
return
|
||||
|
||||
Dict["last_tracked"][event_key] = now
|
||||
|
||||
# maintenance
|
||||
for key, value in Dict["last_tracked"].copy().iteritems():
|
||||
# kill day old values
|
||||
if value < now - datetime.timedelta(days=1):
|
||||
try:
|
||||
del Dict["last_tracked"][key]
|
||||
except:
|
||||
pass
|
||||
|
||||
Thread.Create(dispatch_track_usage, category, action, label, value,
|
||||
identifier=Dict["anon_id"], first_use=Dict["first_use"],
|
||||
add=Network.PublicAddress)
|
||||
@@ -303,3 +346,7 @@ def dispatch_track_usage(*args, **kwargs):
|
||||
|
||||
def get_language(lang_short):
|
||||
return Language.fromietf(lang_short)
|
||||
|
||||
|
||||
class PartUnknownException(Exception):
|
||||
pass
|
||||
+116
-19
@@ -2,12 +2,15 @@
|
||||
|
||||
import logging
|
||||
import re
|
||||
import traceback
|
||||
import types
|
||||
import os
|
||||
from ignore import ignore_list
|
||||
from helpers import is_recent, get_plex_item_display_title, query_plex
|
||||
from helpers import is_recent, get_plex_item_display_title, query_plex, PartUnknownException
|
||||
from lib import Plex, get_intent
|
||||
from config import config, IGNORE_FN
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
from subzero.modification import registry as mod_registry, SubtitleModifications
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -22,7 +25,7 @@ def get_item(key):
|
||||
|
||||
try:
|
||||
return list(item_container)[0]
|
||||
except IndexError:
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
@@ -40,11 +43,11 @@ PLEX_API_TYPE_MAP = {
|
||||
|
||||
def get_item_kind_from_rating_key(key):
|
||||
item = get_item(key)
|
||||
return PLEX_API_TYPE_MAP[get_item_kind(item)]
|
||||
return PLEX_API_TYPE_MAP.get(get_item_kind(item))
|
||||
|
||||
|
||||
def get_item_kind_from_item(item):
|
||||
return PLEX_API_TYPE_MAP[get_item_kind(item)]
|
||||
return PLEX_API_TYPE_MAP.get(get_item_kind(item))
|
||||
|
||||
|
||||
def get_item_thumb(item):
|
||||
@@ -164,14 +167,17 @@ def get_recent_items():
|
||||
"X-Plex-Container-Size": "%s" % config.max_recent_items_per_library
|
||||
}
|
||||
|
||||
episode_re = re.compile(ur'ratingKey="(?P<key>\d+)"'
|
||||
episode_re = re.compile(ur'(?su)ratingKey="(?P<key>\d+)"'
|
||||
ur'.+?grandparentRatingKey="(?P<parent_key>\d+)"'
|
||||
ur'.+?title="(?P<title>.*?)"'
|
||||
ur'.+?grandparentTitle="(?P<parent_title>.*?)"'
|
||||
ur'.+?index="(?P<episode>\d+?)"'
|
||||
ur'.+?parentIndex="(?P<season>\d+?)".+?addedAt="(?P<added>\d+)"')
|
||||
movie_re = re.compile(ur'ratingKey="(?P<key>\d+)".+?title="(?P<title>.*?)".+?addedAt="(?P<added>\d+)"')
|
||||
available_keys = ("key", "title", "parent_key", "parent_title", "season", "episode", "added")
|
||||
ur'.+?parentIndex="(?P<season>\d+?)".+?addedAt="(?P<added>\d+)"'
|
||||
ur'.+?<Part.+? file="(?P<filename>[^"]+?)"')
|
||||
movie_re = re.compile(ur'(?su)ratingKey="(?P<key>\d+)".+?title="(?P<title>.*?)'
|
||||
ur'".+?addedAt="(?P<added>\d+)"'
|
||||
ur'.+?<Part.+? file="(?P<filename>[^"]+?)"')
|
||||
available_keys = ("key", "title", "parent_key", "parent_title", "season", "episode", "added", "filename")
|
||||
recent = []
|
||||
|
||||
for section in Plex["library"].sections():
|
||||
@@ -182,8 +188,10 @@ def get_recent_items():
|
||||
continue
|
||||
|
||||
use_args = args.copy()
|
||||
plex_item_type = "Movie"
|
||||
if section.type == "show":
|
||||
use_args["type"] = "4"
|
||||
plex_item_type = "Episode"
|
||||
|
||||
url = "http://127.0.0.1:32400/library/sections/%s/all" % int(section.key)
|
||||
response = query_plex(url, use_args)
|
||||
@@ -198,6 +206,10 @@ def get_recent_items():
|
||||
if data["key"] in ignore_list.videos:
|
||||
Log.Debug(u"Skipping item: %s" % data["title"])
|
||||
continue
|
||||
if is_physically_ignored(data["filename"], plex_item_type):
|
||||
Log.Debug(u"Skipping item: %s" % data["title"])
|
||||
continue
|
||||
|
||||
if is_recent(int(data["added"])):
|
||||
recent.append((int(data["added"]), section.type, section.title, data["key"]))
|
||||
|
||||
@@ -242,6 +254,16 @@ def is_ignored(rating_key, item=None):
|
||||
return True
|
||||
|
||||
# physical/path ignore
|
||||
if config.ignore_sz_files or config.ignore_paths:
|
||||
for media in item.media:
|
||||
for part in media.parts:
|
||||
if is_physically_ignored(part.file, kind):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def is_physically_ignored(fn, kind):
|
||||
if config.ignore_sz_files or config.ignore_paths:
|
||||
# normally check current item folder and the library
|
||||
check_ignore_paths = [".", "../"]
|
||||
@@ -249,18 +271,15 @@ def is_ignored(rating_key, item=None):
|
||||
# series/episode, we've got a season folder here, also
|
||||
check_ignore_paths.append("../../")
|
||||
|
||||
for part in item.media.parts:
|
||||
if config.ignore_paths and config.is_path_ignored(part.file):
|
||||
Log.Debug("Item %s's path is manually ignored" % rating_key)
|
||||
return True
|
||||
if config.ignore_paths and config.is_path_ignored(fn):
|
||||
Log.Debug("Item %s's path is manually ignored" % fn)
|
||||
return True
|
||||
|
||||
if config.ignore_sz_files:
|
||||
for sub_path in check_ignore_paths:
|
||||
if config.is_physically_ignored(os.path.abspath(os.path.join(os.path.dirname(part.file), sub_path))):
|
||||
Log.Debug("An ignore file exists in either the items or its parent folders")
|
||||
return True
|
||||
|
||||
return False
|
||||
if config.ignore_sz_files:
|
||||
for sub_path in check_ignore_paths:
|
||||
if config.is_physically_ignored(os.path.normpath(os.path.join(os.path.dirname(fn), sub_path))):
|
||||
Log.Debug("An ignore file exists in either the items or its parent folders")
|
||||
return True
|
||||
|
||||
|
||||
def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, parent_rating_key=None):
|
||||
@@ -283,3 +302,81 @@ def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, paren
|
||||
for key in refresh:
|
||||
Log.Info("%s item %s", "Refreshing" if not force else "Forced-refreshing", key)
|
||||
Plex["library/metadata"].refresh(key)
|
||||
|
||||
|
||||
def get_current_sub(rating_key, part_id, language):
|
||||
from support.storage import get_subtitle_storage
|
||||
|
||||
item = get_item(rating_key)
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
current_sub = stored_subs.get_any(part_id, language)
|
||||
return current_sub, stored_subs, subtitle_storage
|
||||
|
||||
|
||||
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.storage import save_subtitles
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
if mode == "add":
|
||||
for mod in mods:
|
||||
identifier, args = SubtitleModifications.parse_identifier(mod)
|
||||
mod_class = SubtitleModifications.get_mod_class(identifier)
|
||||
|
||||
if identifier not in mod_registry.mods_available:
|
||||
raise NotImplementedError("Mod unknown or not registered")
|
||||
|
||||
# clean exclusive mods
|
||||
if mod_class.exclusive and current_sub.mods:
|
||||
for current_mod in current_sub.mods[:]:
|
||||
if current_mod.startswith(identifier):
|
||||
current_sub.mods.remove(current_mod)
|
||||
Log.Info("Removing superseded mod %s" % current_mod)
|
||||
|
||||
current_sub.add_mod(mod)
|
||||
elif mode == "clear":
|
||||
current_sub.add_mod(None)
|
||||
elif mode == "remove":
|
||||
for mod in mods:
|
||||
current_sub.mods.remove(mod)
|
||||
|
||||
elif mode == "remove_last":
|
||||
if current_sub.mods:
|
||||
current_sub.mods.pop()
|
||||
else:
|
||||
raise NotImplementedError("Wrong mode given")
|
||||
storage.save(stored_subs)
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
except PartUnknownException:
|
||||
return
|
||||
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True,
|
||||
no_refining=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
subtitle = ModifiedSubtitle(language, mods=current_sub.mods)
|
||||
subtitle.content = current_sub.content
|
||||
if current_sub.encoding:
|
||||
# thanks plex
|
||||
setattr(subtitle, "_guessed_encoding", current_sub.encoding)
|
||||
|
||||
if current_sub.encoding != "utf-8":
|
||||
subtitle.set_encoding("utf-8")
|
||||
current_sub.content = subtitle.content
|
||||
current_sub.encoding = "utf-8"
|
||||
storage.save(stored_subs)
|
||||
|
||||
subtitle.plex_media_fps = plex_part.fps
|
||||
subtitle.page_link = "modify subtitles with: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
subtitle.language = language
|
||||
subtitle.id = current_sub.id
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
|
||||
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
|
||||
", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
except:
|
||||
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
|
||||
|
||||
@@ -108,7 +108,8 @@ def find_subtitles(part):
|
||||
if ext.lower()[1:] in config.SUBTITLE_EXTS:
|
||||
# get fn without forced/default/normal tag
|
||||
split_tag = root.rsplit(".", 1)
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded',
|
||||
'custom']:
|
||||
root = split_tag[0]
|
||||
|
||||
# get associated media file name without language
|
||||
@@ -160,9 +161,8 @@ def find_subtitles(part):
|
||||
# determine whether to pick up the subtitle based on our match strictness
|
||||
elif not filename_matches_part:
|
||||
if sz_config.ext_match_strictness == "strict" or (
|
||||
sz_config.ext_match_strictness == "loose" and not filename_contains_part):
|
||||
|
||||
#Log.Debug("%s doesn't match %s, skipping" % (helpers.unicodize(local_filename),
|
||||
sz_config.ext_match_strictness == "loose" and not filename_contains_part):
|
||||
# Log.Debug("%s doesn't match %s, skipping" % (helpers.unicodize(local_filename),
|
||||
# helpers.unicodize(part_basename)))
|
||||
continue
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
import time
|
||||
|
||||
from support.config import config
|
||||
from support.helpers import get_plex_item_display_title, cast_bool
|
||||
@@ -8,8 +9,6 @@ from support.lib import Plex
|
||||
|
||||
|
||||
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()):
|
||||
existing_subs = {"internal": [], "external": [], "count": 0}
|
||||
|
||||
item_id = int(rating_key)
|
||||
item = get_item(rating_key)
|
||||
|
||||
@@ -18,36 +17,41 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
else:
|
||||
item_title = get_plex_item_display_title(item, kind, section_title=section_title)
|
||||
|
||||
video = item.media
|
||||
missing = set()
|
||||
languages_set = set(languages)
|
||||
for media in item.media:
|
||||
existing_subs = {"internal": [], "external": [], "count": 0}
|
||||
for part in media.parts:
|
||||
for stream in part.streams:
|
||||
if stream.stream_type == 3:
|
||||
if stream.index:
|
||||
key = "internal"
|
||||
else:
|
||||
key = "external"
|
||||
|
||||
for part in video.parts:
|
||||
for stream in part.streams:
|
||||
if stream.stream_type == 3:
|
||||
if stream.index:
|
||||
key = "internal"
|
||||
else:
|
||||
key = "external"
|
||||
existing_subs[key].append(Locale.Language.Match(stream.language_code or ""))
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
|
||||
existing_subs[key].append(Locale.Language.Match(stream.language_code or ""))
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
missing_from_part = set(languages_set)
|
||||
if existing_subs["count"]:
|
||||
existing_flat = set((existing_subs["internal"] if internal else []) + (existing_subs["external"] if external else []))
|
||||
if languages_set.issubset(existing_flat) or (len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
|
||||
# all subs found
|
||||
#Log.Info(u"All subtitles exist for '%s'", item_title)
|
||||
continue
|
||||
|
||||
missing = languages
|
||||
if existing_subs["count"]:
|
||||
existing_flat = (existing_subs["internal"] if internal else []) + (existing_subs["external"] if external else [])
|
||||
languages_set = set(languages)
|
||||
if languages_set.issubset(existing_flat) or (len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
|
||||
# all subs found
|
||||
Log.Info(u"All subtitles exist for '%s'", item_title)
|
||||
return
|
||||
missing_from_part = languages_set - existing_flat
|
||||
|
||||
missing = languages_set - set(existing_flat)
|
||||
Log.Info(u"Subs still missing for '%s': %s", item_title, missing)
|
||||
if missing_from_part:
|
||||
Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id,
|
||||
missing_from_part)
|
||||
missing.update(missing_from_part)
|
||||
|
||||
if missing:
|
||||
return added_at, item_id, item_title, item, missing
|
||||
|
||||
|
||||
def items_get_all_missing_subs(items):
|
||||
def items_get_all_missing_subs(items, sleep_after_request=False):
|
||||
missing = []
|
||||
for added_at, kind, section_title, key in items:
|
||||
try:
|
||||
@@ -65,13 +69,13 @@ def items_get_all_missing_subs(items):
|
||||
missing.append(state)
|
||||
except:
|
||||
Log.Error("Something went wrong when getting the state of item %s: %s", key, traceback.format_exc())
|
||||
if sleep_after_request:
|
||||
time.sleep(sleep_after_request)
|
||||
return missing
|
||||
|
||||
|
||||
def refresh_item(item):
|
||||
Plex["library/metadata"].refresh(item)
|
||||
if not config.no_refresh:
|
||||
Plex["library/metadata"].refresh(item)
|
||||
|
||||
|
||||
def refresh_items(items):
|
||||
for item, title in items:
|
||||
refresh_item(item)
|
||||
|
||||
@@ -1,15 +1,14 @@
|
||||
# coding=utf-8
|
||||
|
||||
import os
|
||||
from urllib2 import URLError
|
||||
|
||||
import helpers
|
||||
|
||||
from config import config
|
||||
from items import get_item
|
||||
from lib import get_intent, Plex
|
||||
from config import config
|
||||
from subzero.video import parse_video
|
||||
|
||||
|
||||
def get_metadata_dict(item, part, add):
|
||||
data = {
|
||||
"item": item,
|
||||
@@ -22,6 +21,54 @@ def get_metadata_dict(item, part, add):
|
||||
return data
|
||||
|
||||
|
||||
imdb_guid_identifier = "com.plexapp.agents.imdb://"
|
||||
tvdb_guid_identifier = "com.plexapp.agents.thetvdb://"
|
||||
|
||||
|
||||
def get_plexapi_stream_info(plex_item, part_id=None):
|
||||
d = {"stream": {}}
|
||||
data = d["stream"]
|
||||
|
||||
# find current part
|
||||
current_part = None
|
||||
current_media = None
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
if not part_id or str(part.id) == part_id:
|
||||
current_part = part
|
||||
current_media = media
|
||||
break
|
||||
if current_part:
|
||||
break
|
||||
|
||||
if not current_part:
|
||||
return d
|
||||
|
||||
data["video_codec"] = current_media.video_codec
|
||||
data["audio_codec"] = current_media.audio_codec.upper()
|
||||
|
||||
if data["audio_codec"] == "DCA":
|
||||
data["audio_codec"] = "DTS"
|
||||
|
||||
if current_media.audio_channels == 8:
|
||||
data["audio_channels"] = "7.1"
|
||||
|
||||
elif current_media.audio_channels == 6:
|
||||
data["audio_channels"] = "5.1"
|
||||
else:
|
||||
data["audio_channels"] = "%s.0" % str(current_media.audio_channels)
|
||||
|
||||
# iter streams
|
||||
for stream in current_part.streams:
|
||||
if stream.stream_type == 1:
|
||||
# video stream
|
||||
data["resolution"] = "%s%s" % (current_media.video_resolution,
|
||||
"i" if stream.scan_type != "progressive" else "p")
|
||||
break
|
||||
|
||||
return d
|
||||
|
||||
|
||||
def media_to_videos(media, kind="series"):
|
||||
"""
|
||||
iterates through media and returns the associated parts (videos)
|
||||
@@ -31,36 +78,61 @@ def media_to_videos(media, kind="series"):
|
||||
"""
|
||||
videos = []
|
||||
|
||||
# this is a Show or a Movie object
|
||||
plex_item = get_item(media.id)
|
||||
year = plex_item.year
|
||||
original_title = plex_item.title_original
|
||||
|
||||
if kind == "series":
|
||||
for season in media.seasons:
|
||||
season_object = media.seasons[season]
|
||||
for episode in media.seasons[season].episodes:
|
||||
ep = media.seasons[season].episodes[episode]
|
||||
|
||||
tvdb_id = None
|
||||
series_tvdb_id = None
|
||||
if tvdb_guid_identifier in ep.guid:
|
||||
tvdb_id = ep.guid[len(tvdb_guid_identifier):].split("?")[0]
|
||||
series_tvdb_id = tvdb_id.split("/")[0]
|
||||
|
||||
# get plex item via API for additional metadata
|
||||
plex_episode = get_item(ep.id)
|
||||
stream_info = get_plexapi_stream_info(plex_episode)
|
||||
|
||||
for item in media.seasons[season].episodes[episode].items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
get_metadata_dict(plex_episode, part,
|
||||
{"plex_part": part, "type": "episode", "title": ep.title,
|
||||
"series": media.title, "id": ep.id,
|
||||
"series_id": media.id, "season_id": season_object.id,
|
||||
"episode": plex_episode.index, "season": plex_episode.season.index,
|
||||
"section": plex_episode.section.title
|
||||
})
|
||||
dict(stream_info, **{"plex_part": part, "type": "episode",
|
||||
"title": ep.title,
|
||||
"series": media.title, "id": ep.id, "year": year,
|
||||
"series_id": media.id,
|
||||
"season_id": season_object.id,
|
||||
"imdb_id": None, "series_tvdb_id": series_tvdb_id,
|
||||
"tvdb_id": tvdb_id,
|
||||
"original_title": original_title,
|
||||
"episode": plex_episode.index,
|
||||
"season": plex_episode.season.index,
|
||||
"section": plex_episode.section.title
|
||||
})
|
||||
)
|
||||
)
|
||||
else:
|
||||
plex_item = get_item(media.id)
|
||||
stream_info = get_plexapi_stream_info(plex_item)
|
||||
imdb_id = None
|
||||
if imdb_guid_identifier in media.guid:
|
||||
imdb_id = media.guid[len(imdb_guid_identifier):].split("?")[0]
|
||||
for item in media.items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
get_metadata_dict(plex_item, part, {"plex_part": part, "type": "movie",
|
||||
"title": media.title, "id": media.id,
|
||||
"series_id": None,
|
||||
"season_id": None,
|
||||
"section": plex_item.section.title})
|
||||
get_metadata_dict(plex_item, part, dict(stream_info, **{"plex_part": part, "type": "movie",
|
||||
"title": media.title, "id": media.id,
|
||||
"series_id": None, "year": year,
|
||||
"season_id": None, "imdb_id": imdb_id,
|
||||
"original_title": original_title,
|
||||
"series_tvdb_id": None, "tvdb_id": None,
|
||||
"section": plex_item.section.title})
|
||||
)
|
||||
)
|
||||
return videos
|
||||
|
||||
@@ -92,10 +164,10 @@ def get_media_item_ids(media, kind="series"):
|
||||
return ids
|
||||
|
||||
|
||||
def scan_video(plex_part, ignore_all=False, hints=None, rating_key=None):
|
||||
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, no_refining=False):
|
||||
"""
|
||||
returnes a subliminal/guessit-refined parsed video
|
||||
:param plex_part:
|
||||
:param pms_video_info:
|
||||
:param ignore_all:
|
||||
:param hints:
|
||||
:param rating_key:
|
||||
@@ -104,14 +176,19 @@ def scan_video(plex_part, ignore_all=False, hints=None, rating_key=None):
|
||||
embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
|
||||
external_subtitles = not ignore_all and Prefs['subtitles.scan.external']
|
||||
|
||||
plex_part = pms_video_info["plex_part"]
|
||||
|
||||
if ignore_all:
|
||||
Log.Debug("Force refresh intended.")
|
||||
|
||||
Log.Debug("Scanning video: %s, subtitles=%s, embedded_subtitles=%s" % (
|
||||
Log.Debug("Scanning video: %s, external_subtitles=%s, embedded_subtitles=%s" % (
|
||||
plex_part.file, external_subtitles, embedded_subtitles))
|
||||
|
||||
known_embedded = []
|
||||
parts = list(Plex["library"].metadata(rating_key))[0].media.parts
|
||||
parts = []
|
||||
for media in list(Plex["library"].metadata(rating_key))[0].media:
|
||||
parts += media.parts
|
||||
|
||||
plexpy_part = None
|
||||
for part in parts:
|
||||
if int(part.id) == int(plex_part.id):
|
||||
@@ -139,17 +216,19 @@ def scan_video(plex_part, ignore_all=False, hints=None, rating_key=None):
|
||||
|
||||
try:
|
||||
# get basic video info scan (filename)
|
||||
video = parse_video(plex_part.file, hints, external_subtitles=external_subtitles,
|
||||
video = parse_video(plex_part.file, pms_video_info, hints, external_subtitles=external_subtitles,
|
||||
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
forced_only=config.forced_only, video_fps=plex_part.fps)
|
||||
forced_only=config.forced_only, no_refining=no_refining)
|
||||
|
||||
# add video fps info
|
||||
video.fps = plex_part.fps
|
||||
return video
|
||||
|
||||
except ValueError:
|
||||
Log.Warn("File could not be guessed by subliminal: %s" % plex_part.file)
|
||||
|
||||
|
||||
def scan_videos(videos, kind="series", ignore_all=False):
|
||||
def scan_videos(videos, kind="series", ignore_all=False, no_refining=False):
|
||||
"""
|
||||
receives a list of videos containing dictionaries returned by media_to_videos
|
||||
:param videos:
|
||||
@@ -165,8 +244,8 @@ def scan_videos(videos, kind="series", ignore_all=False):
|
||||
|
||||
hints = helpers.get_item_hints(video)
|
||||
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
|
||||
scanned_video = scan_video(video["plex_part"], ignore_all=force_refresh or ignore_all, hints=hints,
|
||||
rating_key=video["id"])
|
||||
scanned_video = scan_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
|
||||
rating_key=video["id"], no_refining=no_refining)
|
||||
|
||||
if not scanned_video:
|
||||
continue
|
||||
@@ -179,49 +258,79 @@ def scan_videos(videos, kind="series", ignore_all=False):
|
||||
return ret
|
||||
|
||||
|
||||
class PartUnknownException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def get_plex_metadata(rating_key, part_id, item_type):
|
||||
def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
|
||||
"""
|
||||
uses the Plex 3rd party API accessor to get metadata information
|
||||
|
||||
:param rating_key:
|
||||
:param rating_key: movie or episode
|
||||
:param part_id:
|
||||
:param item_type:
|
||||
:return:
|
||||
"""
|
||||
|
||||
plex_item = list(Plex["library"].metadata(rating_key))[0]
|
||||
if not plex_item:
|
||||
plex_item = get_item(rating_key)
|
||||
|
||||
if not plex_item:
|
||||
return
|
||||
|
||||
# find current part
|
||||
current_part = None
|
||||
for part in plex_item.media.parts:
|
||||
if str(part.id) == part_id:
|
||||
current_part = part
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
if str(part.id) == str(part_id):
|
||||
current_part = part
|
||||
|
||||
if not current_part:
|
||||
raise PartUnknownException("Part unknown")
|
||||
raise helpers.PartUnknownException("Part unknown")
|
||||
|
||||
stream_info = get_plexapi_stream_info(plex_item, part_id)
|
||||
|
||||
# get normalized metadata
|
||||
# fixme: duplicated logic of media_to_videos
|
||||
if item_type == "episode":
|
||||
show = list(Plex["library"].metadata(plex_item.show.rating_key))[0]
|
||||
year = show.year
|
||||
tvdb_id = None
|
||||
series_tvdb_id = None
|
||||
original_title = show.title_original
|
||||
if tvdb_guid_identifier in plex_item.guid:
|
||||
tvdb_id = plex_item.guid[len(tvdb_guid_identifier):].split("?")[0]
|
||||
series_tvdb_id = tvdb_id.split("/")[0]
|
||||
metadata = get_metadata_dict(plex_item, current_part,
|
||||
{"plex_part": current_part, "type": "episode", "title": plex_item.title,
|
||||
"series": plex_item.show.title, "id": plex_item.rating_key,
|
||||
"series_id": plex_item.show.rating_key,
|
||||
"season_id": plex_item.season.rating_key,
|
||||
"season": plex_item.season.index,
|
||||
"episode": plex_item.index
|
||||
})
|
||||
dict(stream_info,
|
||||
**{"plex_part": current_part, "type": "episode", "title": plex_item.title,
|
||||
"series": plex_item.show.title, "id": plex_item.rating_key,
|
||||
"series_id": plex_item.show.rating_key,
|
||||
"season_id": plex_item.season.rating_key,
|
||||
"imdb_id": None,
|
||||
"year": year,
|
||||
"tvdb_id": tvdb_id,
|
||||
"series_tvdb_id": series_tvdb_id,
|
||||
"original_title": original_title,
|
||||
"season": plex_item.season.index,
|
||||
"episode": plex_item.index
|
||||
})
|
||||
)
|
||||
else:
|
||||
metadata = get_metadata_dict(plex_item, current_part, {"plex_part": current_part, "type": "movie",
|
||||
"title": plex_item.title, "id": plex_item.rating_key,
|
||||
"series_id": None,
|
||||
"season_id": None,
|
||||
"season": None,
|
||||
"episode": None,
|
||||
"section": plex_item.section.title})
|
||||
imdb_id = None
|
||||
original_title = plex_item.title_original
|
||||
if imdb_guid_identifier in plex_item.guid:
|
||||
imdb_id = plex_item.guid[len(imdb_guid_identifier):].split("?")[0]
|
||||
metadata = get_metadata_dict(plex_item, current_part,
|
||||
dict(stream_info, **{"plex_part": current_part, "type": "movie",
|
||||
"title": plex_item.title, "id": plex_item.rating_key,
|
||||
"series_id": None,
|
||||
"season_id": None,
|
||||
"imdb_id": imdb_id,
|
||||
"year": plex_item.year,
|
||||
"tvdb_id": None,
|
||||
"series_tvdb_id": None,
|
||||
"original_title": original_title,
|
||||
"season": None,
|
||||
"episode": None,
|
||||
"section": plex_item.section.title})
|
||||
)
|
||||
return metadata
|
||||
|
||||
|
||||
@@ -257,3 +366,24 @@ class PMSMediaProxy(object):
|
||||
break
|
||||
|
||||
m = m.children[0]
|
||||
|
||||
def get_all_parts(self):
|
||||
"""
|
||||
walk the mediatree until the given part was found; if no part was given, return the first one
|
||||
:param part_id:
|
||||
:return:
|
||||
"""
|
||||
m = self.mediatree
|
||||
parts = []
|
||||
while 1:
|
||||
if m.items:
|
||||
media_item = m.items[0]
|
||||
for part in media_item.parts:
|
||||
parts.append(part)
|
||||
break
|
||||
|
||||
if not m.children:
|
||||
break
|
||||
|
||||
m = m.children[0]
|
||||
return parts
|
||||
|
||||
Executable → Regular
+8
-3
@@ -72,7 +72,7 @@ class DefaultScheduler(object):
|
||||
try:
|
||||
task_frequency = Prefs["scheduler.tasks.%s.frequency" % task.name]
|
||||
except KeyError:
|
||||
task_frequency = None
|
||||
task_frequency = getattr(task, "frequency", None)
|
||||
|
||||
self.tasks[task.name] = {"task": task, "frequency": parse_frequency(task_frequency)}
|
||||
|
||||
@@ -168,6 +168,7 @@ class DefaultScheduler(object):
|
||||
for args, kwargs in queue:
|
||||
Log.Debug("Dispatching single task: %s, %s", args, kwargs)
|
||||
Thread.Create(self.run_task, True, *args, **kwargs)
|
||||
Thread.Sleep(5.0)
|
||||
|
||||
# scheduled tasks
|
||||
for name, info in self.tasks.iteritems():
|
||||
@@ -185,9 +186,13 @@ class DefaultScheduler(object):
|
||||
continue
|
||||
|
||||
if not task.last_run or (task.last_run + datetime.timedelta(**{frequency_key: frequency_num}) <= now):
|
||||
self.run_task(name)
|
||||
# fixme: scheduled tasks run synchronously. is this the best idea?
|
||||
Thread.Create(self.run_task, True, name)
|
||||
#Thread.Sleep(5.0)
|
||||
#self.run_task(name)
|
||||
Thread.Sleep(5.0)
|
||||
|
||||
Thread.Sleep(5.0)
|
||||
Thread.Sleep(1)
|
||||
|
||||
|
||||
scheduler = DefaultScheduler()
|
||||
@@ -5,61 +5,24 @@ import os
|
||||
import pprint
|
||||
import copy
|
||||
|
||||
import subliminal
|
||||
from items import get_item
|
||||
from subliminal_patch.core import save_subtitles as subliminal_save_subtitles
|
||||
from subzero.subtitle_storage import StoredSubtitlesManager
|
||||
|
||||
from subtitlehelpers import force_utf8
|
||||
from config import config
|
||||
from helpers import notify_executable, get_title_for_video_metadata, cast_bool, force_unicode
|
||||
from plex_media import PMSMediaProxy
|
||||
from support.items import get_item
|
||||
|
||||
|
||||
get_subtitle_storage = lambda: StoredSubtitlesManager(Data, get_item)
|
||||
|
||||
|
||||
def whack_missing_parts(scanned_video_part_map, existing_parts=None):
|
||||
"""
|
||||
cleans out our internal storage's video parts (parts may get updated/deleted/whatever)
|
||||
:param existing_parts: optional list of part ids known
|
||||
:param scanned_video_part_map: videos to check for
|
||||
:return:
|
||||
"""
|
||||
# shortcut
|
||||
|
||||
if "subs" not in Dict:
|
||||
return
|
||||
|
||||
if not existing_parts:
|
||||
existing_parts = []
|
||||
for part in scanned_video_part_map.viewvalues():
|
||||
existing_parts.append(str(part.id))
|
||||
|
||||
whacked_parts = False
|
||||
for video in scanned_video_part_map.keys():
|
||||
video_id = str(video.id)
|
||||
if video_id not in Dict["subs"]:
|
||||
continue
|
||||
|
||||
parts = Dict["subs"][video_id].keys()
|
||||
|
||||
for part_id in parts:
|
||||
part_id = str(part_id)
|
||||
if part_id not in existing_parts:
|
||||
Log.Info("Whacking part %s in internal storage of video %s (%s, %s)", part_id, video_id,
|
||||
repr(existing_parts), repr(parts))
|
||||
del Dict["subs"][video_id][part_id]
|
||||
whacked_parts = True
|
||||
|
||||
if whacked_parts:
|
||||
Dict.Save()
|
||||
def get_subtitle_storage():
|
||||
return StoredSubtitlesManager(Data, get_item)
|
||||
|
||||
|
||||
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a"):
|
||||
"""
|
||||
stores information about downloaded subtitles in plex's Dict()
|
||||
"""
|
||||
existing_parts = []
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
part = scanned_video_part_map[video]
|
||||
part_id = str(part.id)
|
||||
@@ -71,27 +34,21 @@ def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_ty
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(plex_item)
|
||||
|
||||
existing_parts.append(part_id)
|
||||
|
||||
stored_any = False
|
||||
for subtitle in video_subtitles:
|
||||
lang = Locale.Language.Match(subtitle.language.alpha2)
|
||||
Log.Debug(u"Adding subtitle to storage: %s, %s, %s" % (video_id, part_id, title))
|
||||
lang = str(subtitle.language)
|
||||
subtitle.set_encoding("utf-8")
|
||||
Log.Debug(u"Adding subtitle to storage: %s, %s, %s, %s" % (video_id, part_id, title,
|
||||
subtitle.guess_encoding()))
|
||||
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode)
|
||||
|
||||
if ret_val:
|
||||
Log.Debug("Subtitle stored")
|
||||
stored_any = True
|
||||
|
||||
else:
|
||||
Log.Debug("Subtitle already existing in storage")
|
||||
|
||||
if stored_any:
|
||||
Log.Debug("Saving subtitle storage for %s" % video_id)
|
||||
subtitle_storage.save(stored_subs)
|
||||
|
||||
#if existing_parts:
|
||||
# whack_missing_parts(scanned_video_part_map, existing_parts=existing_parts)
|
||||
Log.Debug("Saving subtitle storage for %s" % video_id)
|
||||
subtitle_storage.save(stored_subs)
|
||||
|
||||
|
||||
def reset_storage(key):
|
||||
@@ -107,6 +64,8 @@ def reset_storage(key):
|
||||
|
||||
|
||||
def log_storage(key):
|
||||
if not key:
|
||||
Log.Debug(pprint.pformat(getattr(Dict, "_dict")))
|
||||
if key in Dict:
|
||||
Log.Debug(pprint.pformat(Dict[key]))
|
||||
|
||||
@@ -134,9 +93,9 @@ def save_subtitles_to_file(subtitles):
|
||||
fld = force_unicode(fld)
|
||||
if not os.path.exists(fld):
|
||||
os.makedirs(fld)
|
||||
subliminal.save_subtitles(video, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
|
||||
encode_with=force_utf8 if config.enforce_encoding else None,
|
||||
chmod=config.chmod, forced_tag=config.forced_only, path_decoder=force_unicode)
|
||||
subliminal_save_subtitles(video, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
|
||||
chmod=config.chmod, forced_tag=config.forced_only, path_decoder=force_unicode,
|
||||
debug_mods=config.debug_mods, formats=config.subtitle_formats)
|
||||
return True
|
||||
|
||||
|
||||
@@ -144,7 +103,7 @@ def save_subtitles_to_metadata(videos, subtitles):
|
||||
for video, video_subtitles in subtitles.items():
|
||||
mediaPart = videos[video]
|
||||
for subtitle in video_subtitles:
|
||||
content = force_utf8(subtitle.text) if config.enforce_encoding else subtitle.content
|
||||
content = subtitle.get_modified_content(debug=config.debug_mods)
|
||||
|
||||
if not isinstance(mediaPart, Framework.api.agentkit.MediaPart):
|
||||
# we're being handed a Plex.py model instance here, not an internal PMS MediaPart object.
|
||||
@@ -156,9 +115,29 @@ def save_subtitles_to_metadata(videos, subtitles):
|
||||
return True
|
||||
|
||||
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a"):
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None):
|
||||
"""
|
||||
|
||||
:param scanned_video_part_map:
|
||||
:param downloaded_subtitles:
|
||||
:param mode:
|
||||
:param bare_save: don't trigger anything; don't store information
|
||||
:param mods: enabled mods
|
||||
:return:
|
||||
"""
|
||||
meta_fallback = False
|
||||
save_successful = False
|
||||
|
||||
if mods:
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
for subtitle in video_subtitles:
|
||||
Log.Info("Applying mods: %s to %s", mods, subtitle)
|
||||
subtitle.mods = mods
|
||||
subtitle.plex_media_fps = video.fps
|
||||
|
||||
storage = "metadata"
|
||||
if Prefs['subtitles.save.filesystem']:
|
||||
storage = "filesystem"
|
||||
@@ -180,7 +159,11 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a"):
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
|
||||
|
||||
if save_successful and config.notify_executable:
|
||||
if not bare_save and save_successful and config.notify_executable:
|
||||
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
|
||||
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode)
|
||||
if not bare_save and save_successful:
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode)
|
||||
|
||||
return save_successful
|
||||
|
||||
|
||||
@@ -86,7 +86,7 @@ class VobSubSubtitleHelper(SubtitleHelper):
|
||||
|
||||
|
||||
IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
|
||||
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2})?$")
|
||||
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
|
||||
|
||||
|
||||
def match_ietf_language(s):
|
||||
@@ -129,13 +129,12 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
default = '1'
|
||||
|
||||
# Attempt to extract the language from the filename (e.g. Avatar (2009).eng)
|
||||
language = ""
|
||||
|
||||
# IETF support thanks to https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
|
||||
# IETF support thanks to
|
||||
# https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
|
||||
language = Locale.Language.Match(match_ietf_language(file))
|
||||
|
||||
# skip non-SRT if wanted
|
||||
if not helpers.cast_bool(Prefs["subtitles.scan.exotic_ext"]) and ext not in ["srt", "ass", "ssa"]:
|
||||
if not helpers.cast_bool(Prefs["subtitles.scan.exotic_ext"]) and ext not in ["srt", "ass", "ssa", "vtt"]:
|
||||
return lang_sub_map
|
||||
|
||||
codec = None
|
||||
@@ -158,7 +157,7 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
Log("An error occurred while attempting to parse the subtitle file, skipping... : " + self.filename)
|
||||
return lang_sub_map
|
||||
|
||||
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb']:
|
||||
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb', 'vtt']:
|
||||
codec = ext.replace('ass', 'ssa')
|
||||
|
||||
if format is None:
|
||||
@@ -194,7 +193,10 @@ def get_subtitles_from_metadata(part):
|
||||
def force_utf8(content):
|
||||
a = UnicodeDammit(content)
|
||||
|
||||
Log.Debug("detected encoding: %s (None: most likely already successfully decoded)" % a.original_encoding)
|
||||
if a.original_encoding:
|
||||
Log.Debug("detected encoding: %s (None: most likely already successfully decoded)" % a.original_encoding)
|
||||
else:
|
||||
Log.Debug("detected encoding: unicode (already decoded)")
|
||||
|
||||
# easy way out - already utf-8
|
||||
if a.original_encoding and a.original_encoding == "utf-8":
|
||||
|
||||
+295
-119
@@ -11,13 +11,13 @@ from subliminal import list_subtitles as list_all_subtitles
|
||||
from babelfish import Language
|
||||
|
||||
from missing_subtitles import items_get_all_missing_subs, refresh_item
|
||||
from background import scheduler
|
||||
from storage import save_subtitles, whack_missing_parts, get_subtitle_storage
|
||||
from scheduler import scheduler
|
||||
from storage import save_subtitles, get_subtitle_storage
|
||||
from support.config import config
|
||||
from support.items import get_recent_items, is_ignored, get_item
|
||||
from support.lib import Plex
|
||||
from support.helpers import track_usage, get_title_for_video_metadata, cast_bool
|
||||
from support.plex_media import scan_videos, get_plex_metadata, PartUnknownException
|
||||
from support.items import get_recent_items, get_item, is_ignored
|
||||
from support.helpers import track_usage, get_title_for_video_metadata, cast_bool, PartUnknownException
|
||||
from support.plex_media import scan_videos, get_plex_metadata
|
||||
from download import download_best_subtitles
|
||||
|
||||
|
||||
class Task(object):
|
||||
@@ -80,124 +80,52 @@ class Task(object):
|
||||
return
|
||||
|
||||
def run(self):
|
||||
Log.Info(u"Task: running: %s", self.name)
|
||||
self.time_start = datetime.datetime.now()
|
||||
|
||||
def post_run(self, data_holder):
|
||||
self.running = False
|
||||
self.last_run = datetime.datetime.now()
|
||||
if self.time_start:
|
||||
if self.time_start and self.last_run:
|
||||
self.last_run_time = self.last_run - self.time_start
|
||||
self.time_start = None
|
||||
|
||||
|
||||
class SearchAllRecentlyAddedMissing(Task):
|
||||
periodic = True
|
||||
items_done = None
|
||||
items_searching = None
|
||||
items_searching_ids = None
|
||||
items_failed = None
|
||||
percentage = 0
|
||||
|
||||
stall_time = 30
|
||||
|
||||
def __init__(self, scheduler):
|
||||
super(SearchAllRecentlyAddedMissing, self).__init__(scheduler)
|
||||
self.items_done = None
|
||||
self.items_searching = None
|
||||
self.items_searching_ids = None
|
||||
self.items_failed = None
|
||||
self.percentage = 0
|
||||
|
||||
def signal(self, signal_name, *args, **kwargs):
|
||||
handler = getattr(self, "signal_%s" % signal_name)
|
||||
return handler(*args, **kwargs) if handler else None
|
||||
|
||||
def signal_updated_metadata(self, *args, **kwargs):
|
||||
item_id = int(args[0])
|
||||
|
||||
if self.items_searching_ids is not None and item_id in self.items_searching_ids:
|
||||
self.items_done.append(item_id)
|
||||
return True
|
||||
|
||||
def prepare(self, *args, **kwargs):
|
||||
self.items_done = []
|
||||
recent_items = get_recent_items()
|
||||
missing = items_get_all_missing_subs(recent_items)
|
||||
ids = set([id for added_at, id, title, item, missing_languages in missing if not is_ignored(id, item=item)])
|
||||
self.items_searching = missing
|
||||
self.items_searching_ids = ids
|
||||
self.items_failed = []
|
||||
self.percentage = 0
|
||||
self.ready_for_display = True
|
||||
|
||||
def run(self):
|
||||
super(SearchAllRecentlyAddedMissing, self).run()
|
||||
self.running = True
|
||||
missing_count = len(self.items_searching)
|
||||
items_done_count = 0
|
||||
|
||||
for added_at, item_id, title, item, missing_languages in self.items_searching:
|
||||
Log.Debug(u"Task: %s, triggering refresh for %s (%s)", self.name, title, item_id)
|
||||
refresh_item(item_id)
|
||||
search_started = datetime.datetime.now()
|
||||
tries = 1
|
||||
while 1:
|
||||
if item_id in self.items_done:
|
||||
items_done_count += 1
|
||||
Log.Debug(u"Task: %s, item %s done", self.name, item_id)
|
||||
self.percentage = int(items_done_count * 100 / missing_count)
|
||||
break
|
||||
|
||||
# item considered stalled after self.stall_time seconds passed after last refresh
|
||||
if (datetime.datetime.now() - search_started).total_seconds() > self.stall_time:
|
||||
if tries > 3:
|
||||
self.items_failed.append(item_id)
|
||||
Log.Debug(u"Task: %s, item stalled for %s times: %s, skipping", self.name, tries, item_id)
|
||||
break
|
||||
|
||||
Log.Debug(u"Task: %s, item stalled for %s seconds: %s, retrying", self.name, self.stall_time,
|
||||
item_id)
|
||||
tries += 1
|
||||
refresh_item(item_id)
|
||||
search_started = datetime.datetime.now()
|
||||
time.sleep(1)
|
||||
time.sleep(0.1)
|
||||
# we can't hammer the PMS, otherwise requests will be stalled
|
||||
time.sleep(1)
|
||||
|
||||
Log.Debug("Task: %s, done. Failed items: %s", self.name, self.items_failed)
|
||||
self.running = False
|
||||
|
||||
def post_run(self, task_data):
|
||||
super(SearchAllRecentlyAddedMissing, self).post_run(task_data)
|
||||
self.ready_for_display = False
|
||||
self.percentage = 0
|
||||
self.items_done = None
|
||||
self.items_failed = None
|
||||
self.items_searching = None
|
||||
self.items_searching_ids = None
|
||||
Log.Info(u"Task: ran: %s", self.name)
|
||||
|
||||
|
||||
class SubtitleListingMixin(object):
|
||||
def list_subtitles(self, rating_key, item_type, part_id, language):
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
def list_subtitles(self, rating_key, item_type, part_id, language, skip_wrong_fps=True, metadata=None,
|
||||
scanned_parts=None):
|
||||
|
||||
if item_type == "episode":
|
||||
min_score = 240
|
||||
else:
|
||||
min_score = 60
|
||||
if not metadata:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
if not metadata:
|
||||
return
|
||||
|
||||
if not scanned_parts:
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
return
|
||||
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
config.init_subliminal_patches()
|
||||
|
||||
provider_settings = config.provider_settings.copy()
|
||||
if not skip_wrong_fps:
|
||||
provider_settings = config.provider_settings.copy()
|
||||
provider_settings["opensubtitles"]["skip_wrong_fps"] = False
|
||||
|
||||
if item_type == "episode":
|
||||
min_score = 240
|
||||
if video.is_special:
|
||||
min_score = 180
|
||||
else:
|
||||
min_score = 60
|
||||
|
||||
available_subs = list_all_subtitles(scanned_parts, {Language.fromietf(language)},
|
||||
providers=config.providers,
|
||||
provider_configs=config.provider_settings,
|
||||
provider_configs=provider_settings,
|
||||
pool_class=config.provider_pool)
|
||||
|
||||
use_hearing_impaired = Prefs['subtitles.search.hearingImpaired'] in ("prefer", "force HI")
|
||||
@@ -247,8 +175,7 @@ class DownloadSubtitleMixin(object):
|
||||
|
||||
if subtitle.content:
|
||||
try:
|
||||
whack_missing_parts(scanned_parts)
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode=mode)
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode=mode, mods=config.default_mods)
|
||||
Log.Debug("Manually downloaded subtitle for: %s", rating_key)
|
||||
download_successful = True
|
||||
refresh_item(rating_key)
|
||||
@@ -266,6 +193,8 @@ class DownloadSubtitleMixin(object):
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle,
|
||||
mode=mode)
|
||||
else:
|
||||
set_refresh_menu_state("Subtitle download failed (%s)" % rating_key)
|
||||
return download_successful
|
||||
|
||||
|
||||
@@ -291,7 +220,13 @@ class AvailableSubsForItem(SubtitleListingMixin, Task):
|
||||
super(AvailableSubsForItem, self).run()
|
||||
self.running = True
|
||||
track_usage("Subtitle", "manual", "list", 1)
|
||||
self.data = self.list_subtitles(self.rating_key, self.item_type, self.part_id, self.language)
|
||||
subs = self.list_subtitles(self.rating_key, self.item_type, self.part_id, self.language, skip_wrong_fps=False)
|
||||
if not subs:
|
||||
self.data = "found_none"
|
||||
return
|
||||
|
||||
# we can't have nasty unpicklable stuff like ZipFile, BytesIO etc in self.data
|
||||
self.data = [s.make_picklable() for s in subs]
|
||||
|
||||
def post_run(self, task_data):
|
||||
super(AvailableSubsForItem, self).post_run(task_data)
|
||||
@@ -335,11 +270,174 @@ class MissingSubtitles(Task):
|
||||
task_data["missing_subtitles"] = self.data
|
||||
|
||||
|
||||
class SearchAllRecentlyAddedMissing(Task):
|
||||
periodic = True
|
||||
|
||||
items_done = None
|
||||
items_searching = None
|
||||
percentage = 0
|
||||
|
||||
def __init__(self, scheduler):
|
||||
super(SearchAllRecentlyAddedMissing, self).__init__(scheduler)
|
||||
self.items_done = None
|
||||
self.items_searching = None
|
||||
self.percentage = 0
|
||||
|
||||
def signal_updated_metadata(self, *args, **kwargs):
|
||||
return True
|
||||
|
||||
def prepare(self):
|
||||
self.items_done = 0
|
||||
self.items_searching = 0
|
||||
self.percentage = 0
|
||||
self.ready_for_display = True
|
||||
|
||||
def run(self):
|
||||
super(SearchAllRecentlyAddedMissing, self).run()
|
||||
|
||||
self.running = True
|
||||
self.prepare()
|
||||
|
||||
from support.history import get_history
|
||||
history = get_history()
|
||||
|
||||
now = datetime.datetime.now()
|
||||
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
|
||||
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
|
||||
|
||||
is_recent_str = Prefs["scheduler.item_is_recent_age"]
|
||||
num, ident = is_recent_str.split()
|
||||
|
||||
max_search_days = 0
|
||||
if ident == "days":
|
||||
max_search_days = int(num)
|
||||
elif ident == "weeks":
|
||||
max_search_days = int(num) * 7
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
recent_sub_fns = subtitle_storage.get_recent_files(age_days=max_search_days)
|
||||
viable_items = {}
|
||||
|
||||
# determine viable items
|
||||
for fn in recent_sub_fns:
|
||||
# added_date <= max_search_days?
|
||||
stored_subs = subtitle_storage.load(filename=fn)
|
||||
if not stored_subs:
|
||||
continue
|
||||
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
continue
|
||||
|
||||
viable_items[fn] = stored_subs
|
||||
|
||||
self.items_searching = len(viable_items)
|
||||
|
||||
download_count = 0
|
||||
videos_with_downloads = 0
|
||||
|
||||
config.init_subliminal_patches()
|
||||
|
||||
Log.Info("%s: Searching for subtitles for %s items", self.name, self.items_searching)
|
||||
|
||||
# search for subtitles in viable items
|
||||
for fn, stored_subs in viable_items.iteritems():
|
||||
video_id = stored_subs.video_id
|
||||
|
||||
if stored_subs.item_type == "episode":
|
||||
min_score = min_score_series
|
||||
else:
|
||||
min_score = min_score_movies
|
||||
|
||||
parts = []
|
||||
plex_item = get_item(video_id)
|
||||
|
||||
if is_ignored(video_id, item=plex_item):
|
||||
continue
|
||||
|
||||
for media in plex_item.media:
|
||||
parts += media.parts
|
||||
|
||||
downloads_per_video = 0
|
||||
for part in parts:
|
||||
part_id = part.id
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(video_id, part_id, stored_subs.item_type)
|
||||
except PartUnknownException:
|
||||
Log.Info("%s: Part %s:%s unknown", self.name, video_id, part_id)
|
||||
continue
|
||||
|
||||
if not metadata:
|
||||
Log.Info("%s: Part %s:%s unknown", self.name, video_id, part_id)
|
||||
continue
|
||||
|
||||
Log.Debug("%s: Looking for missing subtitles: %s:%s", self.name, video_id, part_id)
|
||||
scanned_parts = scan_videos([metadata], kind="series"
|
||||
if stored_subs.item_type == "episode" else "movie")
|
||||
|
||||
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score)
|
||||
download_successful = False
|
||||
|
||||
if downloaded_subtitles:
|
||||
downloaded_any = any(downloaded_subtitles.values())
|
||||
if not downloaded_any:
|
||||
continue
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, downloaded_subtitles, mode="a", mods=config.default_mods)
|
||||
Log.Debug("%s: Downloaded subtitle for item with missing subs: %s", self.name, video_id)
|
||||
download_successful = True
|
||||
refresh_item(video_id)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
except:
|
||||
Log.Error("%s: Something went wrong when downloading specific subtitle: %s", self.name,
|
||||
traceback.format_exc())
|
||||
finally:
|
||||
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
|
||||
if download_successful:
|
||||
# store item in history
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
for subtitle in video_subtitles:
|
||||
downloads_per_video += 1
|
||||
history.add(item_title, video.id, section_title=metadata["section"],
|
||||
subtitle=subtitle,
|
||||
mode="a")
|
||||
|
||||
download_count += downloads_per_video
|
||||
|
||||
if downloads_per_video:
|
||||
videos_with_downloads += 1
|
||||
|
||||
self.items_done = self.items_done + 1
|
||||
self.percentage = int(self.items_done * 100 / self.items_searching)
|
||||
|
||||
if downloads_per_video:
|
||||
time.sleep(5)
|
||||
else:
|
||||
time.sleep(1)
|
||||
|
||||
if download_count:
|
||||
Log.Debug("Task: %s, done. Missing subtitles found for %s/%s items (%s subs downloaded)", self.name,
|
||||
videos_with_downloads, self.items_searching, download_count)
|
||||
else:
|
||||
Log.Debug("Task: %s, done. No subtitles found for %s items", self.name, self.items_searching)
|
||||
|
||||
def post_run(self, task_data):
|
||||
super(SearchAllRecentlyAddedMissing, self).post_run(task_data)
|
||||
self.ready_for_display = False
|
||||
self.percentage = 0
|
||||
self.items_done = None
|
||||
self.items_searching = None
|
||||
|
||||
|
||||
class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
periodic = True
|
||||
|
||||
# TV: episode, format, series, year, season, video_codec, release_group, hearing_impaired
|
||||
series_cutoff = 355
|
||||
# TV: episode, format, series, year, season, video_codec, release_group, hearing_impaired, resolution
|
||||
series_cutoff = 357
|
||||
|
||||
# movies: format, title, release_group, year, video_codec, resolution, hearing_impaired
|
||||
movies_cutoff = 117
|
||||
@@ -362,13 +460,26 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
return
|
||||
|
||||
now = datetime.datetime.now()
|
||||
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
|
||||
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
|
||||
overwrite_manually_modified = cast_bool(
|
||||
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified"])
|
||||
overwrite_manually_selected = cast_bool(
|
||||
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected"])
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
recent_subs = subtitle_storage.load_recent_files(age_days=max_search_days)
|
||||
viable_item_count = 0
|
||||
|
||||
for fn, stored_subs in recent_subs.iteritems():
|
||||
video_id = stored_subs.video_id
|
||||
cutoff = self.series_cutoff if stored_subs.item_type == "episode" else self.movies_cutoff
|
||||
|
||||
if stored_subs.item_type == "episode":
|
||||
cutoff = self.series_cutoff
|
||||
min_score = min_score_series
|
||||
else:
|
||||
cutoff = self.movies_cutoff
|
||||
min_score = min_score_movies
|
||||
|
||||
# don't search for better subtitles until at least 30 minutes have passed
|
||||
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
|
||||
@@ -379,6 +490,7 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
continue
|
||||
|
||||
viable_item_count += 1
|
||||
ditch_parts = []
|
||||
|
||||
# look through all stored subtitle data
|
||||
@@ -398,14 +510,20 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
|
||||
# late cutoff met? skip
|
||||
if current_score >= cutoff:
|
||||
Log.Debug(u"Skipping finding better subs, cutoff met (current: %s, cutoff: %s): %s",
|
||||
current_score, cutoff, stored_subs.title)
|
||||
Log.Debug(u"Skipping finding better subs, cutoff met (current: %s, cutoff: %s): %s (%s)",
|
||||
current_score, cutoff, stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
# got manual subtitle but don't want to touch those?
|
||||
if current_mode == "m" and \
|
||||
not cast_bool(Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected"]):
|
||||
Log.Debug(u"Skipping finding better subs, had manual: %s", stored_subs.title)
|
||||
if current_mode == "m" and not overwrite_manually_selected:
|
||||
Log.Debug(u"Skipping finding better subs, had manual: %s (%s)", stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
# subtitle modifications different from default
|
||||
if not overwrite_manually_modified and current.mods \
|
||||
and set(current.mods).difference(set(config.default_mods)):
|
||||
Log.Debug(u"Skipping finding better subs, it has manual modifications: %s (%s)",
|
||||
stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
try:
|
||||
@@ -420,7 +538,7 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
better_downloaded = False
|
||||
better_tried_download = 0
|
||||
for sub in subs:
|
||||
if sub.score > current_score:
|
||||
if sub.score > current_score and sub.score > min_score:
|
||||
Log.Debug("Better subtitle found for %s, downloading", video_id)
|
||||
better_tried_download += 1
|
||||
ret = self.download_subtitle(sub, video_id, mode="b")
|
||||
@@ -444,9 +562,64 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
pass
|
||||
subtitle_storage.save(stored_subs)
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
if better_found:
|
||||
Log.Debug("Task: %s, done. Better subtitles found for %s items", self.name, better_found)
|
||||
self.running = False
|
||||
Log.Debug("Task: %s, done. Better subtitles found for %s/%s items", self.name, better_found,
|
||||
viable_item_count)
|
||||
else:
|
||||
Log.Debug("Task: %s, done. No better subtitles found for %s items", self.name, viable_item_count)
|
||||
|
||||
|
||||
class SubtitleStorageMaintenance(Task):
|
||||
periodic = True
|
||||
frequency = "every 7 days"
|
||||
|
||||
def run(self):
|
||||
super(SubtitleStorageMaintenance, self).run()
|
||||
self.running = True
|
||||
Log.Info("Running subtitle storage maintenance")
|
||||
storage = get_subtitle_storage()
|
||||
deleted_items = storage.delete_missing(wanted_languages=set(str(l) for l in config.lang_list))
|
||||
if deleted_items:
|
||||
Log.Info("Subtitle information for %d non-existant videos have been cleaned up" % len(deleted_items))
|
||||
Log.Debug("Videos: %s" % deleted_items)
|
||||
else:
|
||||
Log.Info("Nothing to do")
|
||||
|
||||
|
||||
class MenuHistoryMaintenance(Task):
|
||||
periodic = True
|
||||
frequency = "every 7 days"
|
||||
|
||||
def run(self):
|
||||
super(MenuHistoryMaintenance, self).run()
|
||||
self.running = True
|
||||
Log.Info("Running menu history maintenance")
|
||||
now = datetime.datetime.now()
|
||||
if "menu_history" in Dict:
|
||||
for key, timeout in Dict["menu_history"].copy().items():
|
||||
if now > timeout:
|
||||
try:
|
||||
del Dict["menu_history"][key]
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
class MigrateSubtitleStorage(Task):
|
||||
periodic = False
|
||||
frequency = None
|
||||
|
||||
def run(self):
|
||||
super(MigrateSubtitleStorage, self).run()
|
||||
self.running = True
|
||||
Log.Info("Running subtitle storage migration")
|
||||
storage = get_subtitle_storage()
|
||||
for fn in storage.get_all_files():
|
||||
if fn.endswith(".json.gz"):
|
||||
continue
|
||||
Log.Debug("Migrating %s", fn)
|
||||
storage.load(None, fn)
|
||||
|
||||
|
||||
scheduler.register(SearchAllRecentlyAddedMissing)
|
||||
@@ -454,3 +627,6 @@ scheduler.register(AvailableSubsForItem)
|
||||
scheduler.register(DownloadSubtitleForItem)
|
||||
scheduler.register(MissingSubtitles)
|
||||
scheduler.register(FindBetterSubtitles)
|
||||
scheduler.register(SubtitleStorageMaintenance)
|
||||
scheduler.register(MigrateSubtitleStorage)
|
||||
scheduler.register(MenuHistoryMaintenance)
|
||||
|
||||
@@ -40,6 +40,8 @@
|
||||
"ro",
|
||||
"ru",
|
||||
"sr",
|
||||
"sr-cyrl",
|
||||
"sr-latn",
|
||||
"sk",
|
||||
"sl",
|
||||
"es",
|
||||
@@ -94,6 +96,8 @@
|
||||
"ro",
|
||||
"ru",
|
||||
"sr",
|
||||
"sr-cyrl",
|
||||
"sr-latn",
|
||||
"sk",
|
||||
"sl",
|
||||
"es",
|
||||
@@ -148,6 +152,8 @@
|
||||
"ro",
|
||||
"ru",
|
||||
"sr",
|
||||
"sr-cyrl",
|
||||
"sr-latn",
|
||||
"sk",
|
||||
"sl",
|
||||
"es",
|
||||
@@ -258,13 +264,14 @@
|
||||
"35",
|
||||
"30",
|
||||
"25",
|
||||
"21",
|
||||
"20",
|
||||
"15",
|
||||
"10",
|
||||
"5",
|
||||
"0"
|
||||
],
|
||||
"default": "25"
|
||||
"default": "21"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.use_random_agents",
|
||||
@@ -332,7 +339,7 @@
|
||||
},
|
||||
{
|
||||
"id": "providers.multithreading",
|
||||
"label": "Search enabled providers simuntaneously (multithreading)",
|
||||
"label": "Search enabled providers simultaneously (multithreading)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
@@ -356,7 +363,7 @@
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.exotic_ext",
|
||||
"label": "Scan: include \"exotic\" external subtitle formats (anything else than .srt/.ssa/.ass)",
|
||||
"label": "Scan: include \"exotic\" subtitle formats (anything else than .srt/.ssa/.ass/.vtt; embedded or external)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
@@ -381,7 +388,7 @@
|
||||
"id": "subtitles.search.minimumMovieScore2",
|
||||
"label": "Minimum score for movies (min: 60, def/sane: 69, min-ideal: 82; see http://v.ht/szscores)",
|
||||
"type": "text",
|
||||
"default": "69"
|
||||
"default": "60"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.search.hearingImpaired",
|
||||
@@ -396,17 +403,65 @@
|
||||
"default": "don't prefer"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.enforce_encoding",
|
||||
"label": "Normalize subtitle encoding to UTF-8",
|
||||
"id": "subtitles.remove_hi",
|
||||
"label": "Remove Hearing Impaired tags from downloaded subtitles",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.fix_common",
|
||||
"label": "Fix common whitespace/punctuation issues in subtitles",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.fix_ocr",
|
||||
"label": "Fix common OCR errors in downloaded subtitles",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.colors",
|
||||
"label": "Change colors of subtitles to",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"don't change",
|
||||
"white",
|
||||
"light-grey",
|
||||
"red",
|
||||
"green",
|
||||
"yellow",
|
||||
"blue",
|
||||
"magenta",
|
||||
"cyan",
|
||||
"black",
|
||||
"dark-red",
|
||||
"dark-green",
|
||||
"dark-yellow",
|
||||
"dark-blue",
|
||||
"dark-magenta",
|
||||
"dark-cyan",
|
||||
"dark-grey"
|
||||
],
|
||||
"default": "don't change"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.save.filesystem",
|
||||
"label": "Store subtitles next to media files (instead of metadata)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.save.formats",
|
||||
"label": "Subtitle formats to save (non-SRT only works if the previous option is enabled)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"SRT",
|
||||
"VTT",
|
||||
"SRT+VTT"
|
||||
],
|
||||
"default": "SRT"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.save.subFolder",
|
||||
"label": "Subtitle Folder (\"current folder\" is the folder the current media file lives in)",
|
||||
@@ -492,7 +547,7 @@
|
||||
"id": "scheduler.max_recent_items_per_library",
|
||||
"label": "Scheduler: Recent items to consider per library",
|
||||
"type": "text",
|
||||
"default": "500"
|
||||
"default": "1000"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.frequency",
|
||||
@@ -516,7 +571,13 @@
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected",
|
||||
"label": "Scheduler: Overwrite manually selected subtitles when better found",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified",
|
||||
"label": "Scheduler: Overwrite subtitles with non-default subtitle modifications when better found",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "history_size",
|
||||
@@ -593,7 +654,7 @@
|
||||
},
|
||||
{
|
||||
"id": "notify_executable",
|
||||
"label": "Call this executable upon successful subtitle download",
|
||||
"label": "Call this executable upon successful subtitle download (see Wiki for details)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
@@ -616,6 +677,12 @@
|
||||
],
|
||||
"default": "WARNING"
|
||||
},
|
||||
{
|
||||
"id": "log_debug_mods",
|
||||
"label": "Log subtitle modification (debug)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "log_console",
|
||||
"label": "Log to console (for development/debugging)",
|
||||
|
||||
+4
-4
@@ -9,11 +9,11 @@
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>2.0.0</string>
|
||||
<string>2.0.24</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>2.0.0.0</string>
|
||||
<string>2.0.24.1558</string>
|
||||
<key>PlexFrameworkVersion</key>
|
||||
<string>2</string>
|
||||
<key>PlexPluginClass</key>
|
||||
@@ -23,7 +23,7 @@
|
||||
<key>PlexPluginConsoleLogging</key>
|
||||
<string>0</string>
|
||||
<key>PlexPluginDevMode</key>
|
||||
<string>1</string>
|
||||
<string>0</string>
|
||||
<key>PlexPluginCodePolicy</key>
|
||||
<!-- this allows channels to access some python methods which are otherwise blocked, as well as import external code libraries, and interact with the PMS HTTP API -->
|
||||
<string>Elevated</string>
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
<h1>Sub-Zero for Plex</h1><i>Subtitles done right</i>
|
||||
|
||||
Version 2.0.0.0 DEV #7
|
||||
Version 2.0.24.1558
|
||||
|
||||
Originally based on @bramwalet's awesome <a href="https://github.com/bramwalet/Subliminal.bundle">Subliminal.bundle</a>
|
||||
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Generic interface to all dbm clones.
|
||||
|
||||
Instead of
|
||||
|
||||
import dbm
|
||||
d = dbm.open(file, 'w', 0666)
|
||||
|
||||
use
|
||||
|
||||
import anydbm
|
||||
d = anydbm.open(file, 'w')
|
||||
|
||||
The returned object is a dbhash, gdbm, dbm or dumbdbm object,
|
||||
dependent on the type of database being opened (determined by whichdb
|
||||
module) in the case of an existing dbm. If the dbm does not exist and
|
||||
the create or new flag ('c' or 'n') was specified, the dbm type will
|
||||
be determined by the availability of the modules (tested in the above
|
||||
order).
|
||||
|
||||
It has the following interface (key and data are strings):
|
||||
|
||||
d[key] = data # store data at key (may override data at
|
||||
# existing key)
|
||||
data = d[key] # retrieve data at key (raise KeyError if no
|
||||
# such key)
|
||||
del d[key] # delete data stored at key (raises KeyError
|
||||
# if no such key)
|
||||
flag = key in d # true if the key exists
|
||||
list = d.keys() # return a list of all existing keys (slow!)
|
||||
|
||||
Future versions may change the order in which implementations are
|
||||
tested for existence, and add interfaces to other dbm-like
|
||||
implementations.
|
||||
"""
|
||||
|
||||
class error(Exception):
|
||||
pass
|
||||
|
||||
_names = ['dbhash', 'gdbm', 'dbm', 'dumbdbm']
|
||||
_errors = [error]
|
||||
_defaultmod = None
|
||||
|
||||
for _name in _names:
|
||||
try:
|
||||
_mod = __import__(_name)
|
||||
except ImportError:
|
||||
continue
|
||||
if not _defaultmod:
|
||||
_defaultmod = _mod
|
||||
_errors.append(_mod.error)
|
||||
|
||||
if not _defaultmod:
|
||||
raise ImportError, "no dbm clone found; tried %s" % _names
|
||||
|
||||
error = tuple(_errors)
|
||||
|
||||
def open(file, flag='r', mode=0666):
|
||||
"""Open or create database at path given by *file*.
|
||||
|
||||
Optional argument *flag* can be 'r' (default) for read-only access, 'w'
|
||||
for read-write access of an existing database, 'c' for read-write access
|
||||
to a new or existing database, and 'n' for read-write access to a new
|
||||
database.
|
||||
|
||||
Note: 'r' and 'w' fail if the database doesn't exist; 'c' creates it
|
||||
only if it doesn't exist; and 'n' always creates a new database.
|
||||
"""
|
||||
|
||||
# guess the type of an existing database
|
||||
from whichdb import whichdb
|
||||
result=whichdb(file)
|
||||
if result is None:
|
||||
# db doesn't exist
|
||||
if 'c' in flag or 'n' in flag:
|
||||
# file doesn't exist and the new
|
||||
# flag was used so use default type
|
||||
mod = _defaultmod
|
||||
else:
|
||||
raise error, "need 'c' or 'n' flag to open new db"
|
||||
elif result == "":
|
||||
# db type cannot be determined
|
||||
raise error, "db type could not be determined"
|
||||
else:
|
||||
mod = __import__(result)
|
||||
return mod.open(file, flag, mode)
|
||||
@@ -1,25 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
__title__ = 'babelfish'
|
||||
__version__ = '0.5.5-dev'
|
||||
__author__ = 'Antoine Bertin'
|
||||
__license__ = 'BSD'
|
||||
__copyright__ = 'Copyright 2015 the BabelFish authors'
|
||||
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
basestr = str
|
||||
else:
|
||||
basestr = basestring
|
||||
|
||||
from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
|
||||
CountryReverseConverter)
|
||||
from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
|
||||
from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
|
||||
from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
|
||||
from .script import SCRIPTS, SCRIPT_MATRIX, Script
|
||||
@@ -1,287 +0,0 @@
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
import collections
|
||||
from pkg_resources import iter_entry_points, EntryPoint
|
||||
from ..exceptions import LanguageConvertError, LanguageReverseError
|
||||
|
||||
|
||||
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
|
||||
class CaseInsensitiveDict(collections.MutableMapping):
|
||||
"""A case-insensitive ``dict``-like object.
|
||||
|
||||
Implements all methods and operations of
|
||||
``collections.MutableMapping`` as well as dict's ``copy``. Also
|
||||
provides ``lower_items``.
|
||||
|
||||
All keys are expected to be strings. The structure remembers the
|
||||
case of the last key to be set, and ``iter(instance)``,
|
||||
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
|
||||
will contain case-sensitive keys. However, querying and contains
|
||||
testing is case insensitive:
|
||||
|
||||
cid = CaseInsensitiveDict()
|
||||
cid['English'] = 'eng'
|
||||
cid['ENGLISH'] == 'eng' # True
|
||||
list(cid) == ['English'] # True
|
||||
|
||||
If the constructor, ``.update``, or equality comparison
|
||||
operations are given keys that have equal ``.lower()``s, the
|
||||
behavior is undefined.
|
||||
|
||||
"""
|
||||
def __init__(self, data=None, **kwargs):
|
||||
self._store = dict()
|
||||
if data is None:
|
||||
data = {}
|
||||
self.update(data, **kwargs)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
# Use the lowercased key for lookups, but store the actual
|
||||
# key alongside the value.
|
||||
self._store[key.lower()] = (key, value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._store[key.lower()][1]
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._store[key.lower()]
|
||||
|
||||
def __iter__(self):
|
||||
return (casedkey for casedkey, mappedvalue in self._store.values())
|
||||
|
||||
def __len__(self):
|
||||
return len(self._store)
|
||||
|
||||
def lower_items(self):
|
||||
"""Like iteritems(), but with all lowercase keys."""
|
||||
return (
|
||||
(lowerkey, keyval[1])
|
||||
for (lowerkey, keyval)
|
||||
in self._store.items()
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, collections.Mapping):
|
||||
other = CaseInsensitiveDict(other)
|
||||
else:
|
||||
return NotImplemented
|
||||
# Compare insensitively
|
||||
return dict(self.lower_items()) == dict(other.lower_items())
|
||||
|
||||
# Copy is required
|
||||
def copy(self):
|
||||
return CaseInsensitiveDict(self._store.values())
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
|
||||
|
||||
|
||||
class LanguageConverter(object):
|
||||
"""A :class:`LanguageConverter` supports converting an alpha3 language code with an
|
||||
alpha2 country code and a script code into a custom code
|
||||
|
||||
.. attribute:: codes
|
||||
|
||||
Set of possible custom codes
|
||||
|
||||
"""
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
"""Convert an alpha3 language code with an alpha2 country code and a script code
|
||||
into a custom code
|
||||
|
||||
:param string alpha3: ISO-639-3 language code
|
||||
:param country: ISO-3166 country code, if any
|
||||
:type country: string or None
|
||||
:param script: ISO-15924 script code, if any
|
||||
:type script: string or None
|
||||
:return: the corresponding custom code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.LanguageConvertError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LanguageReverseConverter(LanguageConverter):
|
||||
"""A :class:`LanguageConverter` able to reverse a custom code into a alpha3
|
||||
ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
||||
|
||||
"""
|
||||
def reverse(self, code):
|
||||
"""Reverse a custom code into alpha3, country and script code
|
||||
|
||||
:param string code: custom code to reverse
|
||||
:return: the corresponding alpha3 ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
||||
:rtype: tuple
|
||||
:raise: :class:`~babelfish.exceptions.LanguageReverseError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LanguageEquivalenceConverter(LanguageReverseConverter):
|
||||
"""A :class:`LanguageEquivalenceConverter` is a utility class that allows you to easily define a
|
||||
:class:`LanguageReverseConverter` by only specifying the dict from alpha3 to their corresponding symbols.
|
||||
|
||||
You must specify the dict of equivalence as a class variable named SYMBOLS.
|
||||
|
||||
If you also set the class variable CASE_SENSITIVE to ``True`` then the reverse conversion function will be
|
||||
case-sensitive (it is case-insensitive by default).
|
||||
|
||||
Example::
|
||||
|
||||
class MyCodeConverter(babelfish.LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {'fra': 'mycode1', 'eng': 'mycode2'}
|
||||
|
||||
"""
|
||||
CASE_SENSITIVE = False
|
||||
|
||||
def __init__(self):
|
||||
self.codes = set()
|
||||
self.to_symbol = {}
|
||||
if self.CASE_SENSITIVE:
|
||||
self.from_symbol = {}
|
||||
else:
|
||||
self.from_symbol = CaseInsensitiveDict()
|
||||
|
||||
for alpha3, symbol in self.SYMBOLS.items():
|
||||
self.to_symbol[alpha3] = symbol
|
||||
self.from_symbol[symbol] = (alpha3, None, None)
|
||||
self.codes.add(symbol)
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
try:
|
||||
return self.to_symbol[alpha3]
|
||||
except KeyError:
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
|
||||
def reverse(self, code):
|
||||
try:
|
||||
return self.from_symbol[code]
|
||||
except KeyError:
|
||||
raise LanguageReverseError(code)
|
||||
|
||||
|
||||
class CountryConverter(object):
|
||||
"""A :class:`CountryConverter` supports converting an alpha2 country code
|
||||
into a custom code
|
||||
|
||||
.. attribute:: codes
|
||||
|
||||
Set of possible custom codes
|
||||
|
||||
"""
|
||||
def convert(self, alpha2):
|
||||
"""Convert an alpha2 country code into a custom code
|
||||
|
||||
:param string alpha2: ISO-3166-1 language code
|
||||
:return: the corresponding custom code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.CountryConvertError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class CountryReverseConverter(CountryConverter):
|
||||
"""A :class:`CountryConverter` able to reverse a custom code into a alpha2
|
||||
ISO-3166-1 country code
|
||||
|
||||
"""
|
||||
def reverse(self, code):
|
||||
"""Reverse a custom code into alpha2 code
|
||||
|
||||
:param string code: custom code to reverse
|
||||
:return: the corresponding alpha2 ISO-3166-1 country code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.CountryReverseError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ConverterManager(object):
|
||||
"""Manager for babelfish converters behaving like a dict with lazy loading
|
||||
|
||||
Loading is done in this order:
|
||||
|
||||
* Entry point converters
|
||||
* Registered converters
|
||||
* Internal converters
|
||||
|
||||
.. attribute:: entry_point
|
||||
|
||||
The entry point where to look for converters
|
||||
|
||||
.. attribute:: internal_converters
|
||||
|
||||
Internal converters with entry point syntax
|
||||
|
||||
"""
|
||||
entry_point = ''
|
||||
internal_converters = []
|
||||
|
||||
def __init__(self):
|
||||
#: Registered converters with entry point syntax
|
||||
self.registered_converters = []
|
||||
|
||||
#: Loaded converters
|
||||
self.converters = {}
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""Get a converter, lazy loading it if necessary"""
|
||||
if name in self.converters:
|
||||
return self.converters[name]
|
||||
for ep in iter_entry_points(self.entry_point):
|
||||
if ep.name == name:
|
||||
self.converters[ep.name] = ep.load()()
|
||||
return self.converters[ep.name]
|
||||
for ep in (EntryPoint.parse(c) for c in self.registered_converters + self.internal_converters):
|
||||
if ep.name == name:
|
||||
# `require` argument of ep.load() is deprecated in newer versions of setuptools
|
||||
if hasattr(ep, 'resolve'):
|
||||
plugin = ep.resolve()
|
||||
elif hasattr(ep, '_load'):
|
||||
plugin = ep._load()
|
||||
else:
|
||||
plugin = ep.load(require=False)
|
||||
self.converters[ep.name] = plugin()
|
||||
return self.converters[ep.name]
|
||||
raise KeyError(name)
|
||||
|
||||
def __setitem__(self, name, converter):
|
||||
"""Load a converter"""
|
||||
self.converters[name] = converter
|
||||
|
||||
def __delitem__(self, name):
|
||||
"""Unload a converter"""
|
||||
del self.converters[name]
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterator over loaded converters"""
|
||||
return iter(self.converters)
|
||||
|
||||
def register(self, entry_point):
|
||||
"""Register a converter
|
||||
|
||||
:param string entry_point: converter to register (entry point syntax)
|
||||
:raise: ValueError if already registered
|
||||
|
||||
"""
|
||||
if entry_point in self.registered_converters:
|
||||
raise ValueError('Already registered')
|
||||
self.registered_converters.insert(0, entry_point)
|
||||
|
||||
def unregister(self, entry_point):
|
||||
"""Unregister a converter
|
||||
|
||||
:param string entry_point: converter to unregister (entry point syntax)
|
||||
|
||||
"""
|
||||
self.registered_converters.remove(entry_point)
|
||||
|
||||
def __contains__(self, name):
|
||||
return name in self.converters
|
||||
@@ -1,17 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha2Converter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha2:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha2
|
||||
@@ -1,17 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha3BConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha3b:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha3b
|
||||
@@ -1,17 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha3TConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha3t:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha3t
|
||||
@@ -1,31 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import CountryReverseConverter, CaseInsensitiveDict
|
||||
from ..country import COUNTRY_MATRIX
|
||||
from ..exceptions import CountryConvertError, CountryReverseError
|
||||
|
||||
|
||||
class CountryNameConverter(CountryReverseConverter):
|
||||
def __init__(self):
|
||||
self.codes = set()
|
||||
self.to_name = {}
|
||||
self.from_name = CaseInsensitiveDict()
|
||||
for country in COUNTRY_MATRIX:
|
||||
self.codes.add(country.name)
|
||||
self.to_name[country.alpha2] = country.name
|
||||
self.from_name[country.name] = country.alpha2
|
||||
|
||||
def convert(self, alpha2):
|
||||
if alpha2 not in self.to_name:
|
||||
raise CountryConvertError(alpha2)
|
||||
return self.to_name[alpha2]
|
||||
|
||||
def reverse(self, name):
|
||||
if name not in self.from_name:
|
||||
raise CountryReverseError(name)
|
||||
return self.from_name[name]
|
||||
@@ -1,17 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class NameConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = False
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.name:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.name
|
||||
@@ -1,36 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageReverseConverter, CaseInsensitiveDict
|
||||
from ..exceptions import LanguageReverseError
|
||||
from ..language import language_converters
|
||||
|
||||
|
||||
class OpenSubtitlesConverter(LanguageReverseConverter):
|
||||
def __init__(self):
|
||||
self.alpha3b_converter = language_converters['alpha3b']
|
||||
self.alpha2_converter = language_converters['alpha2']
|
||||
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
|
||||
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
|
||||
'scc': ('srp', None), 'mne': ('srp', 'ME')})
|
||||
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(['pob', 'pb', 'scc', 'mne']))
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
|
||||
if (alpha3b, country) in self.to_opensubtitles:
|
||||
return self.to_opensubtitles[(alpha3b, country)]
|
||||
return alpha3b
|
||||
|
||||
def reverse(self, opensubtitles):
|
||||
if opensubtitles in self.from_opensubtitles:
|
||||
return self.from_opensubtitles[opensubtitles]
|
||||
for conv in [self.alpha3b_converter, self.alpha2_converter]:
|
||||
try:
|
||||
return conv.reverse(opensubtitles)
|
||||
except LanguageReverseError:
|
||||
pass
|
||||
raise LanguageReverseError(opensubtitles)
|
||||
@@ -1,23 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageConverter
|
||||
from ..exceptions import LanguageConvertError
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class ScopeConverter(LanguageConverter):
|
||||
FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.scope
|
||||
codes = set(SYMBOLS.values())
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
||||
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
@@ -1,23 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageConverter
|
||||
from ..exceptions import LanguageConvertError
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class LanguageTypeConverter(LanguageConverter):
|
||||
FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.type
|
||||
codes = set(SYMBOLS.values())
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
||||
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
@@ -1,104 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from .converters import ConverterManager
|
||||
from . import basestr
|
||||
|
||||
|
||||
COUNTRIES = {}
|
||||
COUNTRY_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`COUNTRY_MATRIX`
|
||||
IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
|
||||
|
||||
f = resource_stream('babelfish', 'data/iso-3166-1.txt')
|
||||
f.readline()
|
||||
for l in f:
|
||||
iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
|
||||
COUNTRIES[iso_country.alpha2] = iso_country.name
|
||||
COUNTRY_MATRIX.append(iso_country)
|
||||
f.close()
|
||||
|
||||
|
||||
class CountryConverterManager(ConverterManager):
|
||||
""":class:`~babelfish.converters.ConverterManager` for country converters"""
|
||||
entry_point = 'babelfish.country_converters'
|
||||
internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
|
||||
|
||||
country_converters = CountryConverterManager()
|
||||
|
||||
|
||||
class CountryMeta(type):
|
||||
"""The :class:`Country` metaclass
|
||||
|
||||
Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
|
||||
|
||||
"""
|
||||
def __getattr__(cls, name):
|
||||
if name.startswith('from'):
|
||||
return partial(cls.fromcode, converter=name[4:])
|
||||
return type.__getattribute__(cls, name)
|
||||
|
||||
|
||||
class Country(CountryMeta(str('CountryBase'), (object,), {})):
|
||||
"""A country on Earth
|
||||
|
||||
A country is represented by a 2-letter code from the ISO-3166 standard
|
||||
|
||||
:param string country: 2-letter ISO-3166 country code
|
||||
|
||||
"""
|
||||
def __init__(self, country):
|
||||
if country not in COUNTRIES:
|
||||
raise ValueError('%r is not a valid country' % country)
|
||||
|
||||
#: ISO-3166 2-letter country code
|
||||
self.alpha2 = country
|
||||
|
||||
@classmethod
|
||||
def fromcode(cls, code, converter):
|
||||
"""Create a :class:`Country` by its `code` using `converter` to
|
||||
:meth:`~babelfish.converters.CountryReverseConverter.reverse` it
|
||||
|
||||
:param string code: the code to reverse
|
||||
:param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
|
||||
:return: the corresponding :class:`Country` instance
|
||||
:rtype: :class:`Country`
|
||||
|
||||
"""
|
||||
return cls(country_converters[converter].reverse(code))
|
||||
|
||||
def __getstate__(self):
|
||||
return self.alpha2
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.alpha2 = state
|
||||
|
||||
def __getattr__(self, name):
|
||||
return country_converters[name].convert(self.alpha2)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.alpha2)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, basestr):
|
||||
return str(self) == other
|
||||
if not isinstance(other, Country):
|
||||
return False
|
||||
return self.alpha2 == other.alpha2
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return '<Country [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
return self.alpha2
|
||||
@@ -1,45 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
import os.path
|
||||
import tempfile
|
||||
import zipfile
|
||||
import requests
|
||||
|
||||
|
||||
DATA_DIR = os.path.dirname(__file__)
|
||||
|
||||
# iso-3166-1.txt
|
||||
print('Downloading ISO-3166-1 standard (ISO country codes)...')
|
||||
with open(os.path.join(DATA_DIR, 'iso-3166-1.txt'), 'w') as f:
|
||||
r = requests.get('http://www.iso.org/iso/home/standards/country_codes/country_names_and_code_elements_txt.htm')
|
||||
f.write(r.content.strip())
|
||||
|
||||
# iso-639-3.tab
|
||||
print('Downloading ISO-639-3 standard (ISO language codes)...')
|
||||
with tempfile.TemporaryFile() as f:
|
||||
r = requests.get('http://www-01.sil.org/iso639-3/iso-639-3_Code_Tables_20130531.zip')
|
||||
f.write(r.content)
|
||||
with zipfile.ZipFile(f) as z:
|
||||
z.extract('iso-639-3.tab', DATA_DIR)
|
||||
|
||||
# iso-15924
|
||||
print('Downloading ISO-15924 standard (ISO script codes)...')
|
||||
with tempfile.TemporaryFile() as f:
|
||||
r = requests.get('http://www.unicode.org/iso15924/iso15924.txt.zip')
|
||||
f.write(r.content)
|
||||
with zipfile.ZipFile(f) as z:
|
||||
z.extract('iso15924-utf8-20131012.txt', DATA_DIR)
|
||||
|
||||
# opensubtitles supported languages
|
||||
print('Downloading OpenSubtitles supported languages...')
|
||||
with open(os.path.join(DATA_DIR, 'opensubtitles_languages.txt'), 'w') as f:
|
||||
r = requests.get('http://www.opensubtitles.org/addons/export_languages.php')
|
||||
f.write(r.content)
|
||||
|
||||
print('Done!')
|
||||
@@ -1,250 +0,0 @@
|
||||
Country Name;ISO 3166-1-alpha-2 code
|
||||
AFGHANISTAN;AF
|
||||
ÅLAND ISLANDS;AX
|
||||
ALBANIA;AL
|
||||
ALGERIA;DZ
|
||||
AMERICAN SAMOA;AS
|
||||
ANDORRA;AD
|
||||
ANGOLA;AO
|
||||
ANGUILLA;AI
|
||||
ANTARCTICA;AQ
|
||||
ANTIGUA AND BARBUDA;AG
|
||||
ARGENTINA;AR
|
||||
ARMENIA;AM
|
||||
ARUBA;AW
|
||||
AUSTRALIA;AU
|
||||
AUSTRIA;AT
|
||||
AZERBAIJAN;AZ
|
||||
BAHAMAS;BS
|
||||
BAHRAIN;BH
|
||||
BANGLADESH;BD
|
||||
BARBADOS;BB
|
||||
BELARUS;BY
|
||||
BELGIUM;BE
|
||||
BELIZE;BZ
|
||||
BENIN;BJ
|
||||
BERMUDA;BM
|
||||
BHUTAN;BT
|
||||
BOLIVIA, PLURINATIONAL STATE OF;BO
|
||||
BONAIRE, SINT EUSTATIUS AND SABA;BQ
|
||||
BOSNIA AND HERZEGOVINA;BA
|
||||
BOTSWANA;BW
|
||||
BOUVET ISLAND;BV
|
||||
BRAZIL;BR
|
||||
BRITISH INDIAN OCEAN TERRITORY;IO
|
||||
BRUNEI DARUSSALAM;BN
|
||||
BULGARIA;BG
|
||||
BURKINA FASO;BF
|
||||
BURUNDI;BI
|
||||
CAMBODIA;KH
|
||||
CAMEROON;CM
|
||||
CANADA;CA
|
||||
CAPE VERDE;CV
|
||||
CAYMAN ISLANDS;KY
|
||||
CENTRAL AFRICAN REPUBLIC;CF
|
||||
CHAD;TD
|
||||
CHILE;CL
|
||||
CHINA;CN
|
||||
CHRISTMAS ISLAND;CX
|
||||
COCOS (KEELING) ISLANDS;CC
|
||||
COLOMBIA;CO
|
||||
COMOROS;KM
|
||||
CONGO;CG
|
||||
CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
|
||||
COOK ISLANDS;CK
|
||||
COSTA RICA;CR
|
||||
CÔTE D'IVOIRE;CI
|
||||
CROATIA;HR
|
||||
CUBA;CU
|
||||
CURAÇAO;CW
|
||||
CYPRUS;CY
|
||||
CZECH REPUBLIC;CZ
|
||||
DENMARK;DK
|
||||
DJIBOUTI;DJ
|
||||
DOMINICA;DM
|
||||
DOMINICAN REPUBLIC;DO
|
||||
ECUADOR;EC
|
||||
EGYPT;EG
|
||||
EL SALVADOR;SV
|
||||
EQUATORIAL GUINEA;GQ
|
||||
ERITREA;ER
|
||||
ESTONIA;EE
|
||||
ETHIOPIA;ET
|
||||
FALKLAND ISLANDS (MALVINAS);FK
|
||||
FAROE ISLANDS;FO
|
||||
FIJI;FJ
|
||||
FINLAND;FI
|
||||
FRANCE;FR
|
||||
FRENCH GUIANA;GF
|
||||
FRENCH POLYNESIA;PF
|
||||
FRENCH SOUTHERN TERRITORIES;TF
|
||||
GABON;GA
|
||||
GAMBIA;GM
|
||||
GEORGIA;GE
|
||||
GERMANY;DE
|
||||
GHANA;GH
|
||||
GIBRALTAR;GI
|
||||
GREECE;GR
|
||||
GREENLAND;GL
|
||||
GRENADA;GD
|
||||
GUADELOUPE;GP
|
||||
GUAM;GU
|
||||
GUATEMALA;GT
|
||||
GUERNSEY;GG
|
||||
GUINEA;GN
|
||||
GUINEA-BISSAU;GW
|
||||
GUYANA;GY
|
||||
HAITI;HT
|
||||
HEARD ISLAND AND MCDONALD ISLANDS;HM
|
||||
HOLY SEE (VATICAN CITY STATE);VA
|
||||
HONDURAS;HN
|
||||
HONG KONG;HK
|
||||
HUNGARY;HU
|
||||
ICELAND;IS
|
||||
INDIA;IN
|
||||
INDONESIA;ID
|
||||
IRAN, ISLAMIC REPUBLIC OF;IR
|
||||
IRAQ;IQ
|
||||
IRELAND;IE
|
||||
ISLE OF MAN;IM
|
||||
ISRAEL;IL
|
||||
ITALY;IT
|
||||
JAMAICA;JM
|
||||
JAPAN;JP
|
||||
JERSEY;JE
|
||||
JORDAN;JO
|
||||
KAZAKHSTAN;KZ
|
||||
KENYA;KE
|
||||
KIRIBATI;KI
|
||||
KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
|
||||
KOREA, REPUBLIC OF;KR
|
||||
KUWAIT;KW
|
||||
KYRGYZSTAN;KG
|
||||
LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
|
||||
LATVIA;LV
|
||||
LEBANON;LB
|
||||
LESOTHO;LS
|
||||
LIBERIA;LR
|
||||
LIBYA;LY
|
||||
LIECHTENSTEIN;LI
|
||||
LITHUANIA;LT
|
||||
LUXEMBOURG;LU
|
||||
MACAO;MO
|
||||
MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
|
||||
MADAGASCAR;MG
|
||||
MALAWI;MW
|
||||
MALAYSIA;MY
|
||||
MALDIVES;MV
|
||||
MALI;ML
|
||||
MALTA;MT
|
||||
MARSHALL ISLANDS;MH
|
||||
MARTINIQUE;MQ
|
||||
MAURITANIA;MR
|
||||
MAURITIUS;MU
|
||||
MAYOTTE;YT
|
||||
MEXICO;MX
|
||||
MICRONESIA, FEDERATED STATES OF;FM
|
||||
MOLDOVA, REPUBLIC OF;MD
|
||||
MONACO;MC
|
||||
MONGOLIA;MN
|
||||
MONTENEGRO;ME
|
||||
MONTSERRAT;MS
|
||||
MOROCCO;MA
|
||||
MOZAMBIQUE;MZ
|
||||
MYANMAR;MM
|
||||
NAMIBIA;NA
|
||||
NAURU;NR
|
||||
NEPAL;NP
|
||||
NETHERLANDS;NL
|
||||
NEW CALEDONIA;NC
|
||||
NEW ZEALAND;NZ
|
||||
NICARAGUA;NI
|
||||
NIGER;NE
|
||||
NIGERIA;NG
|
||||
NIUE;NU
|
||||
NORFOLK ISLAND;NF
|
||||
NORTHERN MARIANA ISLANDS;MP
|
||||
NORWAY;NO
|
||||
OMAN;OM
|
||||
PAKISTAN;PK
|
||||
PALAU;PW
|
||||
PALESTINE, STATE OF;PS
|
||||
PANAMA;PA
|
||||
PAPUA NEW GUINEA;PG
|
||||
PARAGUAY;PY
|
||||
PERU;PE
|
||||
PHILIPPINES;PH
|
||||
PITCAIRN;PN
|
||||
POLAND;PL
|
||||
PORTUGAL;PT
|
||||
PUERTO RICO;PR
|
||||
QATAR;QA
|
||||
RÉUNION;RE
|
||||
ROMANIA;RO
|
||||
RUSSIAN FEDERATION;RU
|
||||
RWANDA;RW
|
||||
SAINT BARTHÉLEMY;BL
|
||||
SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
|
||||
SAINT KITTS AND NEVIS;KN
|
||||
SAINT LUCIA;LC
|
||||
SAINT MARTIN (FRENCH PART);MF
|
||||
SAINT PIERRE AND MIQUELON;PM
|
||||
SAINT VINCENT AND THE GRENADINES;VC
|
||||
SAMOA;WS
|
||||
SAN MARINO;SM
|
||||
SAO TOME AND PRINCIPE;ST
|
||||
SAUDI ARABIA;SA
|
||||
SENEGAL;SN
|
||||
SERBIA;RS
|
||||
SEYCHELLES;SC
|
||||
SIERRA LEONE;SL
|
||||
SINGAPORE;SG
|
||||
SINT MAARTEN (DUTCH PART);SX
|
||||
SLOVAKIA;SK
|
||||
SLOVENIA;SI
|
||||
SOLOMON ISLANDS;SB
|
||||
SOMALIA;SO
|
||||
SOUTH AFRICA;ZA
|
||||
SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
|
||||
SOUTH SUDAN;SS
|
||||
SPAIN;ES
|
||||
SRI LANKA;LK
|
||||
SUDAN;SD
|
||||
SURINAME;SR
|
||||
SVALBARD AND JAN MAYEN;SJ
|
||||
SWAZILAND;SZ
|
||||
SWEDEN;SE
|
||||
SWITZERLAND;CH
|
||||
SYRIAN ARAB REPUBLIC;SY
|
||||
TAIWAN, PROVINCE OF CHINA;TW
|
||||
TAJIKISTAN;TJ
|
||||
TANZANIA, UNITED REPUBLIC OF;TZ
|
||||
THAILAND;TH
|
||||
TIMOR-LESTE;TL
|
||||
TOGO;TG
|
||||
TOKELAU;TK
|
||||
TONGA;TO
|
||||
TRINIDAD AND TOBAGO;TT
|
||||
TUNISIA;TN
|
||||
TURKEY;TR
|
||||
TURKMENISTAN;TM
|
||||
TURKS AND CAICOS ISLANDS;TC
|
||||
TUVALU;TV
|
||||
UGANDA;UG
|
||||
UKRAINE;UA
|
||||
UNITED ARAB EMIRATES;AE
|
||||
UNITED KINGDOM;GB
|
||||
UNITED STATES;US
|
||||
UNITED STATES MINOR OUTLYING ISLANDS;UM
|
||||
URUGUAY;UY
|
||||
UZBEKISTAN;UZ
|
||||
VANUATU;VU
|
||||
VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
|
||||
VIET NAM;VN
|
||||
VIRGIN ISLANDS, BRITISH;VG
|
||||
VIRGIN ISLANDS, U.S.;VI
|
||||
WALLIS AND FUTUNA;WF
|
||||
WESTERN SAHARA;EH
|
||||
YEMEN;YE
|
||||
ZAMBIA;ZM
|
||||
ZIMBABWE;ZW
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,176 +0,0 @@
|
||||
#
|
||||
# ISO 15924 - Codes for the representation of names of scripts
|
||||
# Codes pour la représentation des noms d’écritures
|
||||
# Format:
|
||||
# Code;N°;English Name;Nom français;PVA;Date
|
||||
#
|
||||
|
||||
Afak;439;Afaka;afaka;;2010-12-21
|
||||
Aghb;239;Caucasian Albanian;aghbanien;;2012-10-16
|
||||
Ahom;338;Ahom, Tai Ahom;âhom;;2012-11-01
|
||||
Arab;160;Arabic;arabe;Arabic;2004-05-01
|
||||
Armi;124;Imperial Aramaic;araméen impérial;Imperial_Aramaic;2009-06-01
|
||||
Armn;230;Armenian;arménien;Armenian;2004-05-01
|
||||
Avst;134;Avestan;avestique;Avestan;2009-06-01
|
||||
Bali;360;Balinese;balinais;Balinese;2006-10-10
|
||||
Bamu;435;Bamum;bamoum;Bamum;2009-06-01
|
||||
Bass;259;Bassa Vah;bassa;;2010-03-26
|
||||
Batk;365;Batak;batik;Batak;2010-07-23
|
||||
Beng;325;Bengali;bengalî;Bengali;2004-05-01
|
||||
Blis;550;Blissymbols;symboles Bliss;;2004-05-01
|
||||
Bopo;285;Bopomofo;bopomofo;Bopomofo;2004-05-01
|
||||
Brah;300;Brahmi;brahma;Brahmi;2010-07-23
|
||||
Brai;570;Braille;braille;Braille;2004-05-01
|
||||
Bugi;367;Buginese;bouguis;Buginese;2006-06-21
|
||||
Buhd;372;Buhid;bouhide;Buhid;2004-05-01
|
||||
Cakm;349;Chakma;chakma;Chakma;2012-02-06
|
||||
Cans;440;Unified Canadian Aboriginal Syllabics;syllabaire autochtone canadien unifié;Canadian_Aboriginal;2004-05-29
|
||||
Cari;201;Carian;carien;Carian;2007-07-02
|
||||
Cham;358;Cham;cham (čam, tcham);Cham;2009-11-11
|
||||
Cher;445;Cherokee;tchérokî;Cherokee;2004-05-01
|
||||
Cirt;291;Cirth;cirth;;2004-05-01
|
||||
Copt;204;Coptic;copte;Coptic;2006-06-21
|
||||
Cprt;403;Cypriot;syllabaire chypriote;Cypriot;2004-05-01
|
||||
Cyrl;220;Cyrillic;cyrillique;Cyrillic;2004-05-01
|
||||
Cyrs;221;Cyrillic (Old Church Slavonic variant);cyrillique (variante slavonne);;2004-05-01
|
||||
Deva;315;Devanagari (Nagari);dévanâgarî;Devanagari;2004-05-01
|
||||
Dsrt;250;Deseret (Mormon);déseret (mormon);Deseret;2004-05-01
|
||||
Dupl;755;Duployan shorthand, Duployan stenography;sténographie Duployé;;2010-07-18
|
||||
Egyd;070;Egyptian demotic;démotique égyptien;;2004-05-01
|
||||
Egyh;060;Egyptian hieratic;hiératique égyptien;;2004-05-01
|
||||
Egyp;050;Egyptian hieroglyphs;hiéroglyphes égyptiens;Egyptian_Hieroglyphs;2009-06-01
|
||||
Elba;226;Elbasan;elbasan;;2010-07-18
|
||||
Ethi;430;Ethiopic (Geʻez);éthiopien (geʻez, guèze);Ethiopic;2004-10-25
|
||||
Geor;240;Georgian (Mkhedruli);géorgien (mkhédrouli);Georgian;2004-05-29
|
||||
Geok;241;Khutsuri (Asomtavruli and Nuskhuri);khoutsouri (assomtavrouli et nouskhouri);Georgian;2012-10-16
|
||||
Glag;225;Glagolitic;glagolitique;Glagolitic;2006-06-21
|
||||
Goth;206;Gothic;gotique;Gothic;2004-05-01
|
||||
Gran;343;Grantha;grantha;;2009-11-11
|
||||
Grek;200;Greek;grec;Greek;2004-05-01
|
||||
Gujr;320;Gujarati;goudjarâtî (gujrâtî);Gujarati;2004-05-01
|
||||
Guru;310;Gurmukhi;gourmoukhî;Gurmukhi;2004-05-01
|
||||
Hang;286;Hangul (Hangŭl, Hangeul);hangûl (hangŭl, hangeul);Hangul;2004-05-29
|
||||
Hani;500;Han (Hanzi, Kanji, Hanja);idéogrammes han (sinogrammes);Han;2009-02-23
|
||||
Hano;371;Hanunoo (Hanunóo);hanounóo;Hanunoo;2004-05-29
|
||||
Hans;501;Han (Simplified variant);idéogrammes han (variante simplifiée);;2004-05-29
|
||||
Hant;502;Han (Traditional variant);idéogrammes han (variante traditionnelle);;2004-05-29
|
||||
Hatr;127;Hatran;hatrénien;;2012-11-01
|
||||
Hebr;125;Hebrew;hébreu;Hebrew;2004-05-01
|
||||
Hira;410;Hiragana;hiragana;Hiragana;2004-05-01
|
||||
Hluw;080;Anatolian Hieroglyphs (Luwian Hieroglyphs, Hittite Hieroglyphs);hiéroglyphes anatoliens (hiéroglyphes louvites, hiéroglyphes hittites);;2011-12-09
|
||||
Hmng;450;Pahawh Hmong;pahawh hmong;;2004-05-01
|
||||
Hrkt;412;Japanese syllabaries (alias for Hiragana + Katakana);syllabaires japonais (alias pour hiragana + katakana);Katakana_Or_Hiragana;2011-06-21
|
||||
Hung;176;Old Hungarian (Hungarian Runic);runes hongroises (ancien hongrois);;2012-10-16
|
||||
Inds;610;Indus (Harappan);indus;;2004-05-01
|
||||
Ital;210;Old Italic (Etruscan, Oscan, etc.);ancien italique (étrusque, osque, etc.);Old_Italic;2004-05-29
|
||||
Java;361;Javanese;javanais;Javanese;2009-06-01
|
||||
Jpan;413;Japanese (alias for Han + Hiragana + Katakana);japonais (alias pour han + hiragana + katakana);;2006-06-21
|
||||
Jurc;510;Jurchen;jurchen;;2010-12-21
|
||||
Kali;357;Kayah Li;kayah li;Kayah_Li;2007-07-02
|
||||
Kana;411;Katakana;katakana;Katakana;2004-05-01
|
||||
Khar;305;Kharoshthi;kharochthî;Kharoshthi;2006-06-21
|
||||
Khmr;355;Khmer;khmer;Khmer;2004-05-29
|
||||
Khoj;322;Khojki;khojkî;;2011-06-21
|
||||
Knda;345;Kannada;kannara (canara);Kannada;2004-05-29
|
||||
Kore;287;Korean (alias for Hangul + Han);coréen (alias pour hangûl + han);;2007-06-13
|
||||
Kpel;436;Kpelle;kpèllé;;2010-03-26
|
||||
Kthi;317;Kaithi;kaithî;Kaithi;2009-06-01
|
||||
Lana;351;Tai Tham (Lanna);taï tham (lanna);Tai_Tham;2009-06-01
|
||||
Laoo;356;Lao;laotien;Lao;2004-05-01
|
||||
Latf;217;Latin (Fraktur variant);latin (variante brisée);;2004-05-01
|
||||
Latg;216;Latin (Gaelic variant);latin (variante gaélique);;2004-05-01
|
||||
Latn;215;Latin;latin;Latin;2004-05-01
|
||||
Lepc;335;Lepcha (Róng);lepcha (róng);Lepcha;2007-07-02
|
||||
Limb;336;Limbu;limbou;Limbu;2004-05-29
|
||||
Lina;400;Linear A;linéaire A;;2004-05-01
|
||||
Linb;401;Linear B;linéaire B;Linear_B;2004-05-29
|
||||
Lisu;399;Lisu (Fraser);lisu (Fraser);Lisu;2009-06-01
|
||||
Loma;437;Loma;loma;;2010-03-26
|
||||
Lyci;202;Lycian;lycien;Lycian;2007-07-02
|
||||
Lydi;116;Lydian;lydien;Lydian;2007-07-02
|
||||
Mahj;314;Mahajani;mahâjanî;;2012-10-16
|
||||
Mand;140;Mandaic, Mandaean;mandéen;Mandaic;2010-07-23
|
||||
Mani;139;Manichaean;manichéen;;2007-07-15
|
||||
Maya;090;Mayan hieroglyphs;hiéroglyphes mayas;;2004-05-01
|
||||
Mend;438;Mende Kikakui;mendé kikakui;;2013-10-12
|
||||
Merc;101;Meroitic Cursive;cursif méroïtique;Meroitic_Cursive;2012-02-06
|
||||
Mero;100;Meroitic Hieroglyphs;hiéroglyphes méroïtiques;Meroitic_Hieroglyphs;2012-02-06
|
||||
Mlym;347;Malayalam;malayâlam;Malayalam;2004-05-01
|
||||
Modi;323;Modi, Moḍī;modî;;2013-10-12
|
||||
Moon;218;Moon (Moon code, Moon script, Moon type);écriture Moon;;2006-12-11
|
||||
Mong;145;Mongolian;mongol;Mongolian;2004-05-01
|
||||
Mroo;199;Mro, Mru;mro;;2010-12-21
|
||||
Mtei;337;Meitei Mayek (Meithei, Meetei);meitei mayek;Meetei_Mayek;2009-06-01
|
||||
Mult;323; Multani;multanî;;2012-11-01
|
||||
Mymr;350;Myanmar (Burmese);birman;Myanmar;2004-05-01
|
||||
Narb;106;Old North Arabian (Ancient North Arabian);nord-arabique;;2010-03-26
|
||||
Nbat;159;Nabataean;nabatéen;;2010-03-26
|
||||
Nkgb;420;Nakhi Geba ('Na-'Khi ²Ggŏ-¹baw, Naxi Geba);nakhi géba;;2009-02-23
|
||||
Nkoo;165;N’Ko;n’ko;Nko;2006-10-10
|
||||
Nshu;499;Nüshu;nüshu;;2010-12-21
|
||||
Ogam;212;Ogham;ogam;Ogham;2004-05-01
|
||||
Olck;261;Ol Chiki (Ol Cemet’, Ol, Santali);ol tchiki;Ol_Chiki;2007-07-02
|
||||
Orkh;175;Old Turkic, Orkhon Runic;orkhon;Old_Turkic;2009-06-01
|
||||
Orya;327;Oriya;oriyâ;Oriya;2004-05-01
|
||||
Osma;260;Osmanya;osmanais;Osmanya;2004-05-01
|
||||
Palm;126;Palmyrene;palmyrénien;;2010-03-26
|
||||
Pauc;263;Pau Cin Hau;paou chin haou;;2013-10-12
|
||||
Perm;227;Old Permic;ancien permien;;2004-05-01
|
||||
Phag;331;Phags-pa;’phags pa;Phags_Pa;2006-10-10
|
||||
Phli;131;Inscriptional Pahlavi;pehlevi des inscriptions;Inscriptional_Pahlavi;2009-06-01
|
||||
Phlp;132;Psalter Pahlavi;pehlevi des psautiers;;2007-11-26
|
||||
Phlv;133;Book Pahlavi;pehlevi des livres;;2007-07-15
|
||||
Phnx;115;Phoenician;phénicien;Phoenician;2006-10-10
|
||||
Plrd;282;Miao (Pollard);miao (Pollard);Miao;2012-02-06
|
||||
Prti;130;Inscriptional Parthian;parthe des inscriptions;Inscriptional_Parthian;2009-06-01
|
||||
Qaaa;900;Reserved for private use (start);réservé à l’usage privé (début);;2004-05-29
|
||||
Qabx;949;Reserved for private use (end);réservé à l’usage privé (fin);;2004-05-29
|
||||
Rjng;363;Rejang (Redjang, Kaganga);redjang (kaganga);Rejang;2009-02-23
|
||||
Roro;620;Rongorongo;rongorongo;;2004-05-01
|
||||
Runr;211;Runic;runique;Runic;2004-05-01
|
||||
Samr;123;Samaritan;samaritain;Samaritan;2009-06-01
|
||||
Sara;292;Sarati;sarati;;2004-05-29
|
||||
Sarb;105;Old South Arabian;sud-arabique, himyarite;Old_South_Arabian;2009-06-01
|
||||
Saur;344;Saurashtra;saurachtra;Saurashtra;2007-07-02
|
||||
Sgnw;095;SignWriting;SignÉcriture, SignWriting;;2006-10-10
|
||||
Shaw;281;Shavian (Shaw);shavien (Shaw);Shavian;2004-05-01
|
||||
Shrd;319;Sharada, Śāradā;charada, shard;Sharada;2012-02-06
|
||||
Sidd;302;Siddham, Siddhaṃ, Siddhamātṛkā;siddham;;2013-10-12
|
||||
Sind;318;Khudawadi, Sindhi;khoudawadî, sindhî;;2010-12-21
|
||||
Sinh;348;Sinhala;singhalais;Sinhala;2004-05-01
|
||||
Sora;398;Sora Sompeng;sora sompeng;Sora_Sompeng;2012-02-06
|
||||
Sund;362;Sundanese;sundanais;Sundanese;2007-07-02
|
||||
Sylo;316;Syloti Nagri;sylotî nâgrî;Syloti_Nagri;2006-06-21
|
||||
Syrc;135;Syriac;syriaque;Syriac;2004-05-01
|
||||
Syre;138;Syriac (Estrangelo variant);syriaque (variante estranghélo);;2004-05-01
|
||||
Syrj;137;Syriac (Western variant);syriaque (variante occidentale);;2004-05-01
|
||||
Syrn;136;Syriac (Eastern variant);syriaque (variante orientale);;2004-05-01
|
||||
Tagb;373;Tagbanwa;tagbanoua;Tagbanwa;2004-05-01
|
||||
Takr;321;Takri, Ṭākrī, Ṭāṅkrī;tâkrî;Takri;2012-02-06
|
||||
Tale;353;Tai Le;taï-le;Tai_Le;2004-10-25
|
||||
Talu;354;New Tai Lue;nouveau taï-lue;New_Tai_Lue;2006-06-21
|
||||
Taml;346;Tamil;tamoul;Tamil;2004-05-01
|
||||
Tang;520;Tangut;tangoute;;2010-12-21
|
||||
Tavt;359;Tai Viet;taï viêt;Tai_Viet;2009-06-01
|
||||
Telu;340;Telugu;télougou;Telugu;2004-05-01
|
||||
Teng;290;Tengwar;tengwar;;2004-05-01
|
||||
Tfng;120;Tifinagh (Berber);tifinagh (berbère);Tifinagh;2006-06-21
|
||||
Tglg;370;Tagalog (Baybayin, Alibata);tagal (baybayin, alibata);Tagalog;2009-02-23
|
||||
Thaa;170;Thaana;thâna;Thaana;2004-05-01
|
||||
Thai;352;Thai;thaï;Thai;2004-05-01
|
||||
Tibt;330;Tibetan;tibétain;Tibetan;2004-05-01
|
||||
Tirh;326;Tirhuta;tirhouta;;2011-12-09
|
||||
Ugar;040;Ugaritic;ougaritique;Ugaritic;2004-05-01
|
||||
Vaii;470;Vai;vaï;Vai;2007-07-02
|
||||
Visp;280;Visible Speech;parole visible;;2004-05-01
|
||||
Wara;262;Warang Citi (Varang Kshiti);warang citi;;2009-11-11
|
||||
Wole;480;Woleai;woléaï;;2010-12-21
|
||||
Xpeo;030;Old Persian;cunéiforme persépolitain;Old_Persian;2006-06-21
|
||||
Xsux;020;Cuneiform, Sumero-Akkadian;cunéiforme suméro-akkadien;Cuneiform;2006-10-10
|
||||
Yiii;460;Yi;yi;Yi;2004-05-01
|
||||
Zinh;994;Code for inherited script;codet pour écriture héritée;Inherited;2009-02-23
|
||||
Zmth;995;Mathematical notation;notation mathématique;;2007-11-26
|
||||
Zsym;996;Symbols;symboles;;2007-11-26
|
||||
Zxxx;997;Code for unwritten documents;codet pour les documents non écrits;;2011-06-21
|
||||
Zyyy;998;Code for undetermined script;codet pour écriture indéterminée;Common;2004-05-29
|
||||
Zzzz;999;Code for uncoded script;codet pour écriture non codée;Unknown;2006-10-10
|
||||
@@ -1,474 +0,0 @@
|
||||
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
|
||||
aar aa Afar, afar 0 0
|
||||
abk ab Abkhazian 0 0
|
||||
ace Achinese 0 0
|
||||
ach Acoli 0 0
|
||||
ada Adangme 0 0
|
||||
ady adyghé 0 0
|
||||
afa Afro-Asiatic (Other) 0 0
|
||||
afh Afrihili 0 0
|
||||
afr af Afrikaans 1 0
|
||||
ain Ainu 0 0
|
||||
aka ak Akan 0 0
|
||||
akk Akkadian 0 0
|
||||
alb sq Albanian 1 1
|
||||
ale Aleut 0 0
|
||||
alg Algonquian languages 0 0
|
||||
alt Southern Altai 0 0
|
||||
amh am Amharic 0 0
|
||||
ang English, Old (ca.450-1100) 0 0
|
||||
apa Apache languages 0 0
|
||||
ara ar Arabic 1 1
|
||||
arc Aramaic 0 0
|
||||
arg an Aragonese 0 0
|
||||
arm hy Armenian 1 0
|
||||
arn Araucanian 0 0
|
||||
arp Arapaho 0 0
|
||||
art Artificial (Other) 0 0
|
||||
arw Arawak 0 0
|
||||
asm as Assamese 0 0
|
||||
ast Asturian, Bable 0 0
|
||||
ath Athapascan languages 0 0
|
||||
aus Australian languages 0 0
|
||||
ava av Avaric 0 0
|
||||
ave ae Avestan 0 0
|
||||
awa Awadhi 0 0
|
||||
aym ay Aymara 0 0
|
||||
aze az Azerbaijani 0 0
|
||||
bad Banda 0 0
|
||||
bai Bamileke languages 0 0
|
||||
bak ba Bashkir 0 0
|
||||
bal Baluchi 0 0
|
||||
bam bm Bambara 0 0
|
||||
ban Balinese 0 0
|
||||
baq eu Basque 1 1
|
||||
bas Basa 0 0
|
||||
bat Baltic (Other) 0 0
|
||||
bej Beja 0 0
|
||||
bel be Belarusian 0 0
|
||||
bem Bemba 0 0
|
||||
ben bn Bengali 1 0
|
||||
ber Berber (Other) 0 0
|
||||
bho Bhojpuri 0 0
|
||||
bih bh Bihari 0 0
|
||||
bik Bikol 0 0
|
||||
bin Bini 0 0
|
||||
bis bi Bislama 0 0
|
||||
bla Siksika 0 0
|
||||
bnt Bantu (Other) 0 0
|
||||
bos bs Bosnian 1 0
|
||||
bra Braj 0 0
|
||||
bre br Breton 1 0
|
||||
btk Batak (Indonesia) 0 0
|
||||
bua Buriat 0 0
|
||||
bug Buginese 0 0
|
||||
bul bg Bulgarian 1 1
|
||||
bur my Burmese 1 0
|
||||
byn Blin 0 0
|
||||
cad Caddo 0 0
|
||||
cai Central American Indian (Other) 0 0
|
||||
car Carib 0 0
|
||||
cat ca Catalan 1 1
|
||||
cau Caucasian (Other) 0 0
|
||||
ceb Cebuano 0 0
|
||||
cel Celtic (Other) 0 0
|
||||
cha ch Chamorro 0 0
|
||||
chb Chibcha 0 0
|
||||
che ce Chechen 0 0
|
||||
chg Chagatai 0 0
|
||||
chi zh Chinese 1 1
|
||||
chk Chuukese 0 0
|
||||
chm Mari 0 0
|
||||
chn Chinook jargon 0 0
|
||||
cho Choctaw 0 0
|
||||
chp Chipewyan 0 0
|
||||
chr Cherokee 0 0
|
||||
chu cu Church Slavic 0 0
|
||||
chv cv Chuvash 0 0
|
||||
chy Cheyenne 0 0
|
||||
cmc Chamic languages 0 0
|
||||
cop Coptic 0 0
|
||||
cor kw Cornish 0 0
|
||||
cos co Corsican 0 0
|
||||
cpe Creoles and pidgins, English based (Other) 0 0
|
||||
cpf Creoles and pidgins, French-based (Other) 0 0
|
||||
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
|
||||
cre cr Cree 0 0
|
||||
crh Crimean Tatar 0 0
|
||||
crp Creoles and pidgins (Other) 0 0
|
||||
csb Kashubian 0 0
|
||||
cus Cushitic (Other)' couchitiques, autres langues 0 0
|
||||
cze cs Czech 1 1
|
||||
dak Dakota 0 0
|
||||
dan da Danish 1 1
|
||||
dar Dargwa 0 0
|
||||
day Dayak 0 0
|
||||
del Delaware 0 0
|
||||
den Slave (Athapascan) 0 0
|
||||
dgr Dogrib 0 0
|
||||
din Dinka 0 0
|
||||
div dv Divehi 0 0
|
||||
doi Dogri 0 0
|
||||
dra Dravidian (Other) 0 0
|
||||
dua Duala 0 0
|
||||
dum Dutch, Middle (ca.1050-1350) 0 0
|
||||
dut nl Dutch 1 1
|
||||
dyu Dyula 0 0
|
||||
dzo dz Dzongkha 0 0
|
||||
efi Efik 0 0
|
||||
egy Egyptian (Ancient) 0 0
|
||||
eka Ekajuk 0 0
|
||||
elx Elamite 0 0
|
||||
eng en English 1 1
|
||||
enm English, Middle (1100-1500) 0 0
|
||||
epo eo Esperanto 1 0
|
||||
est et Estonian 1 1
|
||||
ewe ee Ewe 0 0
|
||||
ewo Ewondo 0 0
|
||||
fan Fang 0 0
|
||||
fao fo Faroese 0 0
|
||||
fat Fanti 0 0
|
||||
fij fj Fijian 0 0
|
||||
fil Filipino 0 0
|
||||
fin fi Finnish 1 1
|
||||
fiu Finno-Ugrian (Other) 0 0
|
||||
fon Fon 0 0
|
||||
fre fr French 1 1
|
||||
frm French, Middle (ca.1400-1600) 0 0
|
||||
fro French, Old (842-ca.1400) 0 0
|
||||
fry fy Frisian 0 0
|
||||
ful ff Fulah 0 0
|
||||
fur Friulian 0 0
|
||||
gaa Ga 0 0
|
||||
gay Gayo 0 0
|
||||
gba Gbaya 0 0
|
||||
gem Germanic (Other) 0 0
|
||||
geo ka Georgian 1 1
|
||||
ger de German 1 1
|
||||
gez Geez 0 0
|
||||
gil Gilbertese 0 0
|
||||
gla gd Gaelic 0 0
|
||||
gle ga Irish 0 0
|
||||
glg gl Galician 1 1
|
||||
glv gv Manx 0 0
|
||||
gmh German, Middle High (ca.1050-1500) 0 0
|
||||
goh German, Old High (ca.750-1050) 0 0
|
||||
gon Gondi 0 0
|
||||
gor Gorontalo 0 0
|
||||
got Gothic 0 0
|
||||
grb Grebo 0 0
|
||||
grc Greek, Ancient (to 1453) 0 0
|
||||
ell el Greek 1 1
|
||||
grn gn Guarani 0 0
|
||||
guj gu Gujarati 0 0
|
||||
gwi Gwich´in 0 0
|
||||
hai Haida 0 0
|
||||
hat ht Haitian 0 0
|
||||
hau ha Hausa 0 0
|
||||
haw Hawaiian 0 0
|
||||
heb he Hebrew 1 1
|
||||
her hz Herero 0 0
|
||||
hil Hiligaynon 0 0
|
||||
him Himachali 0 0
|
||||
hin hi Hindi 1 1
|
||||
hit Hittite 0 0
|
||||
hmn Hmong 0 0
|
||||
hmo ho Hiri Motu 0 0
|
||||
hrv hr Croatian 1 1
|
||||
hun hu Hungarian 1 1
|
||||
hup Hupa 0 0
|
||||
iba Iban 0 0
|
||||
ibo ig Igbo 0 0
|
||||
ice is Icelandic 1 1
|
||||
ido io Ido 0 0
|
||||
iii ii Sichuan Yi 0 0
|
||||
ijo Ijo 0 0
|
||||
iku iu Inuktitut 0 0
|
||||
ile ie Interlingue 0 0
|
||||
ilo Iloko 0 0
|
||||
ina ia Interlingua (International Auxiliary Language Asso 0 0
|
||||
inc Indic (Other) 0 0
|
||||
ind id Indonesian 1 1
|
||||
ine Indo-European (Other) 0 0
|
||||
inh Ingush 0 0
|
||||
ipk ik Inupiaq 0 0
|
||||
ira Iranian (Other) 0 0
|
||||
iro Iroquoian languages 0 0
|
||||
ita it Italian 1 1
|
||||
jav jv Javanese 0 0
|
||||
jpn ja Japanese 1 1
|
||||
jpr Judeo-Persian 0 0
|
||||
jrb Judeo-Arabic 0 0
|
||||
kaa Kara-Kalpak 0 0
|
||||
kab Kabyle 0 0
|
||||
kac Kachin 0 0
|
||||
kal kl Kalaallisut 0 0
|
||||
kam Kamba 0 0
|
||||
kan kn Kannada 0 0
|
||||
kar Karen 0 0
|
||||
kas ks Kashmiri 0 0
|
||||
kau kr Kanuri 0 0
|
||||
kaw Kawi 0 0
|
||||
kaz kk Kazakh 1 0
|
||||
kbd Kabardian 0 0
|
||||
kha Khasi 0 0
|
||||
khi Khoisan (Other) 0 0
|
||||
khm km Khmer 1 1
|
||||
kho Khotanese 0 0
|
||||
kik ki Kikuyu 0 0
|
||||
kin rw Kinyarwanda 0 0
|
||||
kir ky Kirghiz 0 0
|
||||
kmb Kimbundu 0 0
|
||||
kok Konkani 0 0
|
||||
kom kv Komi 0 0
|
||||
kon kg Kongo 0 0
|
||||
kor ko Korean 1 1
|
||||
kos Kosraean 0 0
|
||||
kpe Kpelle 0 0
|
||||
krc Karachay-Balkar 0 0
|
||||
kro Kru 0 0
|
||||
kru Kurukh 0 0
|
||||
kua kj Kuanyama 0 0
|
||||
kum Kumyk 0 0
|
||||
kur ku Kurdish 0 0
|
||||
kut Kutenai 0 0
|
||||
lad Ladino 0 0
|
||||
lah Lahnda 0 0
|
||||
lam Lamba 0 0
|
||||
lao lo Lao 0 0
|
||||
lat la Latin 0 0
|
||||
lav lv Latvian 1 0
|
||||
lez Lezghian 0 0
|
||||
lim li Limburgan 0 0
|
||||
lin ln Lingala 0 0
|
||||
lit lt Lithuanian 1 0
|
||||
lol Mongo 0 0
|
||||
loz Lozi 0 0
|
||||
ltz lb Luxembourgish 1 0
|
||||
lua Luba-Lulua 0 0
|
||||
lub lu Luba-Katanga 0 0
|
||||
lug lg Ganda 0 0
|
||||
lui Luiseno 0 0
|
||||
lun Lunda 0 0
|
||||
luo Luo (Kenya and Tanzania) 0 0
|
||||
lus lushai 0 0
|
||||
mac mk Macedonian 1 1
|
||||
mad Madurese 0 0
|
||||
mag Magahi 0 0
|
||||
mah mh Marshallese 0 0
|
||||
mai Maithili 0 0
|
||||
mak Makasar 0 0
|
||||
mal ml Malayalam 1 0
|
||||
man Mandingo 0 0
|
||||
mao mi Maori 0 0
|
||||
map Austronesian (Other) 0 0
|
||||
mar mr Marathi 0 0
|
||||
mas Masai 0 0
|
||||
may ms Malay 1 1
|
||||
mdf Moksha 0 0
|
||||
mdr Mandar 0 0
|
||||
men Mende 0 0
|
||||
mga Irish, Middle (900-1200) 0 0
|
||||
mic Mi'kmaq 0 0
|
||||
min Minangkabau 0 0
|
||||
mis Miscellaneous languages 0 0
|
||||
mkh Mon-Khmer (Other) 0 0
|
||||
mlg mg Malagasy 0 0
|
||||
mlt mt Maltese 0 0
|
||||
mnc Manchu 0 0
|
||||
mni Manipuri 0 0
|
||||
mno Manobo languages 0 0
|
||||
moh Mohawk 0 0
|
||||
mol mo Moldavian 0 0
|
||||
mon mn Mongolian 1 0
|
||||
mos Mossi 0 0
|
||||
mwl Mirandese 0 0
|
||||
mul Multiple languages 0 0
|
||||
mun Munda languages 0 0
|
||||
mus Creek 0 0
|
||||
mwr Marwari 0 0
|
||||
myn Mayan languages 0 0
|
||||
myv Erzya 0 0
|
||||
nah Nahuatl 0 0
|
||||
nai North American Indian 0 0
|
||||
nap Neapolitan 0 0
|
||||
nau na Nauru 0 0
|
||||
nav nv Navajo 0 0
|
||||
nbl nr Ndebele, South 0 0
|
||||
nde nd Ndebele, North 0 0
|
||||
ndo ng Ndonga 0 0
|
||||
nds Low German 0 0
|
||||
nep ne Nepali 0 0
|
||||
new Nepal Bhasa 0 0
|
||||
nia Nias 0 0
|
||||
nic Niger-Kordofanian (Other) 0 0
|
||||
niu Niuean 0 0
|
||||
nno nn Norwegian Nynorsk 0 0
|
||||
nob nb Norwegian Bokmal 0 0
|
||||
nog Nogai 0 0
|
||||
non Norse, Old 0 0
|
||||
nor no Norwegian 1 1
|
||||
nso Northern Sotho 0 0
|
||||
nub Nubian languages 0 0
|
||||
nwc Classical Newari 0 0
|
||||
nya ny Chichewa 0 0
|
||||
nym Nyamwezi 0 0
|
||||
nyn Nyankole 0 0
|
||||
nyo Nyoro 0 0
|
||||
nzi Nzima 0 0
|
||||
oci oc Occitan 1 1
|
||||
oji oj Ojibwa 0 0
|
||||
ori or Oriya 0 0
|
||||
orm om Oromo 0 0
|
||||
osa Osage 0 0
|
||||
oss os Ossetian 0 0
|
||||
ota Turkish, Ottoman (1500-1928) 0 0
|
||||
oto Otomian languages 0 0
|
||||
paa Papuan (Other) 0 0
|
||||
pag Pangasinan 0 0
|
||||
pal Pahlavi 0 0
|
||||
pam Pampanga 0 0
|
||||
pan pa Panjabi 0 0
|
||||
pap Papiamento 0 0
|
||||
pau Palauan 0 0
|
||||
peo Persian, Old (ca.600-400 B.C.) 0 0
|
||||
per fa Persian 1 1
|
||||
phi Philippine (Other) 0 0
|
||||
phn Phoenician 0 0
|
||||
pli pi Pali 0 0
|
||||
pol pl Polish 1 1
|
||||
pon Pohnpeian 0 0
|
||||
por pt Portuguese 1 1
|
||||
pra Prakrit languages 0 0
|
||||
pro Provençal, Old (to 1500) 0 0
|
||||
pus ps Pushto 0 0
|
||||
que qu Quechua 0 0
|
||||
raj Rajasthani 0 0
|
||||
rap Rapanui 0 0
|
||||
rar Rarotongan 0 0
|
||||
roa Romance (Other) 0 0
|
||||
roh rm Raeto-Romance 0 0
|
||||
rom Romany 0 0
|
||||
run rn Rundi 0 0
|
||||
rup Aromanian 0 0
|
||||
rus ru Russian 1 1
|
||||
sad Sandawe 0 0
|
||||
sag sg Sango 0 0
|
||||
sah Yakut 0 0
|
||||
sai South American Indian (Other) 0 0
|
||||
sal Salishan languages 0 0
|
||||
sam Samaritan Aramaic 0 0
|
||||
san sa Sanskrit 0 0
|
||||
sas Sasak 0 0
|
||||
sat Santali 0 0
|
||||
scc sr Serbian 1 1
|
||||
scn Sicilian 0 0
|
||||
sco Scots 0 0
|
||||
sel Selkup 0 0
|
||||
sem Semitic (Other) 0 0
|
||||
sga Irish, Old (to 900) 0 0
|
||||
sgn Sign Languages 0 0
|
||||
shn Shan 0 0
|
||||
sid Sidamo 0 0
|
||||
sin si Sinhalese 1 1
|
||||
sio Siouan languages 0 0
|
||||
sit Sino-Tibetan (Other) 0 0
|
||||
sla Slavic (Other) 0 0
|
||||
slo sk Slovak 1 1
|
||||
slv sl Slovenian 1 1
|
||||
sma Southern Sami 0 0
|
||||
sme se Northern Sami 0 0
|
||||
smi Sami languages (Other) 0 0
|
||||
smj Lule Sami 0 0
|
||||
smn Inari Sami 0 0
|
||||
smo sm Samoan 0 0
|
||||
sms Skolt Sami 0 0
|
||||
sna sn Shona 0 0
|
||||
snd sd Sindhi 0 0
|
||||
snk Soninke 0 0
|
||||
sog Sogdian 0 0
|
||||
som so Somali 0 0
|
||||
son Songhai 0 0
|
||||
sot st Sotho, Southern 0 0
|
||||
spa es Spanish 1 1
|
||||
srd sc Sardinian 0 0
|
||||
srr Serer 0 0
|
||||
ssa Nilo-Saharan (Other) 0 0
|
||||
ssw ss Swati 0 0
|
||||
suk Sukuma 0 0
|
||||
sun su Sundanese 0 0
|
||||
sus Susu 0 0
|
||||
sux Sumerian 0 0
|
||||
swa sw Swahili 1 0
|
||||
swe sv Swedish 1 1
|
||||
syr Syriac 1 0
|
||||
tah ty Tahitian 0 0
|
||||
tai Tai (Other) 0 0
|
||||
tam ta Tamil 1 0
|
||||
tat tt Tatar 0 0
|
||||
tel te Telugu 1 0
|
||||
tem Timne 0 0
|
||||
ter Tereno 0 0
|
||||
tet Tetum 0 0
|
||||
tgk tg Tajik 0 0
|
||||
tgl tl Tagalog 1 1
|
||||
tha th Thai 1 1
|
||||
tib bo Tibetan 0 0
|
||||
tig Tigre 0 0
|
||||
tir ti Tigrinya 0 0
|
||||
tiv Tiv 0 0
|
||||
tkl Tokelau 0 0
|
||||
tlh Klingon 0 0
|
||||
tli Tlingit 0 0
|
||||
tmh Tamashek 0 0
|
||||
tog Tonga (Nyasa) 0 0
|
||||
ton to Tonga (Tonga Islands) 0 0
|
||||
tpi Tok Pisin 0 0
|
||||
tsi Tsimshian 0 0
|
||||
tsn tn Tswana 0 0
|
||||
tso ts Tsonga 0 0
|
||||
tuk tk Turkmen 0 0
|
||||
tum Tumbuka 0 0
|
||||
tup Tupi languages 0 0
|
||||
tur tr Turkish 1 1
|
||||
tut Altaic (Other) 0 0
|
||||
tvl Tuvalu 0 0
|
||||
twi tw Twi 0 0
|
||||
tyv Tuvinian 0 0
|
||||
udm Udmurt 0 0
|
||||
uga Ugaritic 0 0
|
||||
uig ug Uighur 0 0
|
||||
ukr uk Ukrainian 1 1
|
||||
umb Umbundu 0 0
|
||||
und Undetermined 0 0
|
||||
urd ur Urdu 1 0
|
||||
uzb uz Uzbek 0 0
|
||||
vai Vai 0 0
|
||||
ven ve Venda 0 0
|
||||
vie vi Vietnamese 1 1
|
||||
vol vo Volapük 0 0
|
||||
vot Votic 0 0
|
||||
wak Wakashan languages 0 0
|
||||
wal Walamo 0 0
|
||||
war Waray 0 0
|
||||
was Washo 0 0
|
||||
wel cy Welsh 0 0
|
||||
wen Sorbian languages 0 0
|
||||
wln wa Walloon 0 0
|
||||
wol wo Wolof 0 0
|
||||
xal Kalmyk 0 0
|
||||
xho xh Xhosa 0 0
|
||||
yao Yao 0 0
|
||||
yap Yapese 0 0
|
||||
yid yi Yiddish 0 0
|
||||
yor yo Yoruba 0 0
|
||||
ypk Yupik languages 0 0
|
||||
zap Zapotec 0 0
|
||||
zen Zenaga 0 0
|
||||
zha za Zhuang 0 0
|
||||
znd Zande 0 0
|
||||
zul zu Zulu 0 0
|
||||
zun Zuni 0 0
|
||||
rum ro Romanian 1 1
|
||||
pob pb Brazilian 1 1
|
||||
mne Montenegrin 1 0
|
||||
@@ -1,85 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
"""Base class for all exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class LanguageError(Error, AttributeError):
|
||||
"""Base class for all language exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class LanguageConvertError(LanguageError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
|
||||
|
||||
:param string alpha3: alpha3 code that failed conversion
|
||||
:param country: country code that failed conversion, if any
|
||||
:type country: string or None
|
||||
:param script: script code that failed conversion, if any
|
||||
:type script: string or None
|
||||
|
||||
"""
|
||||
def __init__(self, alpha3, country=None, script=None):
|
||||
self.alpha3 = alpha3
|
||||
self.country = country
|
||||
self.script = script
|
||||
|
||||
def __str__(self):
|
||||
s = self.alpha3
|
||||
if self.country is not None:
|
||||
s += '-' + self.country
|
||||
if self.script is not None:
|
||||
s += '-' + self.script
|
||||
return s
|
||||
|
||||
|
||||
class LanguageReverseError(LanguageError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
|
||||
|
||||
:param string code: code that failed reverse conversion
|
||||
|
||||
"""
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.code)
|
||||
|
||||
|
||||
class CountryError(Error, AttributeError):
|
||||
"""Base class for all country exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class CountryConvertError(CountryError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
|
||||
|
||||
:param string alpha2: alpha2 code that failed conversion
|
||||
|
||||
"""
|
||||
def __init__(self, alpha2):
|
||||
self.alpha2 = alpha2
|
||||
|
||||
def __str__(self):
|
||||
return self.alpha2
|
||||
|
||||
|
||||
class CountryReverseError(CountryError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
|
||||
|
||||
:param string code: code that failed reverse conversion
|
||||
|
||||
"""
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.code)
|
||||
@@ -1,185 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from .converters import ConverterManager
|
||||
from .country import Country
|
||||
from .exceptions import LanguageConvertError
|
||||
from .script import Script
|
||||
from . import basestr
|
||||
|
||||
|
||||
LANGUAGES = set()
|
||||
LANGUAGE_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
|
||||
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
|
||||
|
||||
f = resource_stream('babelfish', 'data/iso-639-3.tab')
|
||||
f.readline()
|
||||
for l in f:
|
||||
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
|
||||
LANGUAGES.add(iso_language.alpha3)
|
||||
LANGUAGE_MATRIX.append(iso_language)
|
||||
f.close()
|
||||
|
||||
|
||||
class LanguageConverterManager(ConverterManager):
|
||||
""":class:`~babelfish.converters.ConverterManager` for language converters"""
|
||||
entry_point = 'babelfish.language_converters'
|
||||
internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
|
||||
'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
|
||||
'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
|
||||
'name = babelfish.converters.name:NameConverter',
|
||||
'scope = babelfish.converters.scope:ScopeConverter',
|
||||
'type = babelfish.converters.type:LanguageTypeConverter',
|
||||
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
|
||||
|
||||
language_converters = LanguageConverterManager()
|
||||
|
||||
|
||||
class LanguageMeta(type):
|
||||
"""The :class:`Language` metaclass
|
||||
|
||||
Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
|
||||
|
||||
"""
|
||||
def __getattr__(cls, name):
|
||||
if name.startswith('from'):
|
||||
return partial(cls.fromcode, converter=name[4:])
|
||||
return type.__getattribute__(cls, name)
|
||||
|
||||
|
||||
class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
|
||||
"""A human language
|
||||
|
||||
A human language is composed of a language part following the ISO-639
|
||||
standard and can be country-specific when a :class:`~babelfish.country.Country`
|
||||
is specified.
|
||||
|
||||
The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
|
||||
|
||||
:param string language: the language as a 3-letter ISO-639-3 code
|
||||
:param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
|
||||
:type country: string or :class:`~babelfish.country.Country` or None
|
||||
:param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
|
||||
:type script: string or :class:`~babelfish.script.Script` or None
|
||||
:param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
|
||||
:type unknown: string or None
|
||||
:raise: ValueError if the language could not be recognized and `unknown` is ``None``
|
||||
|
||||
"""
|
||||
def __init__(self, language, country=None, script=None, unknown=None):
|
||||
if unknown is not None and language not in LANGUAGES:
|
||||
language = unknown
|
||||
if language not in LANGUAGES:
|
||||
raise ValueError('%r is not a valid language' % language)
|
||||
self.alpha3 = language
|
||||
self.country = None
|
||||
if isinstance(country, Country):
|
||||
self.country = country
|
||||
elif country is None:
|
||||
self.country = None
|
||||
else:
|
||||
self.country = Country(country)
|
||||
self.script = None
|
||||
if isinstance(script, Script):
|
||||
self.script = script
|
||||
elif script is None:
|
||||
self.script = None
|
||||
else:
|
||||
self.script = Script(script)
|
||||
|
||||
@classmethod
|
||||
def fromcode(cls, code, converter):
|
||||
"""Create a :class:`Language` by its `code` using `converter` to
|
||||
:meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
|
||||
|
||||
:param string code: the code to reverse
|
||||
:param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
|
||||
:return: the corresponding :class:`Language` instance
|
||||
:rtype: :class:`Language`
|
||||
|
||||
"""
|
||||
return cls(*language_converters[converter].reverse(code))
|
||||
|
||||
@classmethod
|
||||
def fromietf(cls, ietf):
|
||||
"""Create a :class:`Language` by from an IETF language code
|
||||
|
||||
:param string ietf: the ietf code
|
||||
:return: the corresponding :class:`Language` instance
|
||||
:rtype: :class:`Language`
|
||||
|
||||
"""
|
||||
subtags = ietf.split('-')
|
||||
language_subtag = subtags.pop(0).lower()
|
||||
if len(language_subtag) == 2:
|
||||
language = cls.fromalpha2(language_subtag)
|
||||
else:
|
||||
language = cls(language_subtag)
|
||||
while subtags:
|
||||
subtag = subtags.pop(0)
|
||||
if len(subtag) == 2:
|
||||
language.country = Country(subtag.upper())
|
||||
else:
|
||||
language.script = Script(subtag.capitalize())
|
||||
if language.script is not None:
|
||||
if subtags:
|
||||
raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
|
||||
break
|
||||
return language
|
||||
|
||||
def __getstate__(self):
|
||||
return self.alpha3, self.country, self.script
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.alpha3, self.country, self.script = state
|
||||
|
||||
def __getattr__(self, name):
|
||||
alpha3 = self.alpha3
|
||||
country = self.country.alpha2 if self.country is not None else None
|
||||
script = self.script.code if self.script is not None else None
|
||||
try:
|
||||
return language_converters[name].convert(alpha3, country, script)
|
||||
except KeyError:
|
||||
raise AttributeError(name)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, basestr):
|
||||
return str(self) == other
|
||||
if not isinstance(other, Language):
|
||||
return False
|
||||
return (self.alpha3 == other.alpha3 and
|
||||
self.country == other.country and
|
||||
self.script == other.script)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __bool__(self):
|
||||
return self.alpha3 != 'und'
|
||||
__nonzero__ = __bool__
|
||||
|
||||
def __repr__(self):
|
||||
return '<Language [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
try:
|
||||
s = self.alpha2
|
||||
except LanguageConvertError:
|
||||
s = self.alpha3
|
||||
if self.country is not None:
|
||||
s += '-' + str(self.country)
|
||||
if self.script is not None:
|
||||
s += '-' + str(self.script)
|
||||
return s
|
||||
@@ -1,76 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from . import basestr
|
||||
|
||||
#: Script code to script name mapping
|
||||
SCRIPTS = {}
|
||||
|
||||
#: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
|
||||
SCRIPT_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`SCRIPT_MATRIX`
|
||||
IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
|
||||
|
||||
f = resource_stream('babelfish', 'data/iso15924-utf8-20131012.txt')
|
||||
f.readline()
|
||||
for l in f:
|
||||
l = l.decode('utf-8').strip()
|
||||
if not l or l.startswith('#'):
|
||||
continue
|
||||
script = IsoScript._make(l.split(';'))
|
||||
SCRIPT_MATRIX.append(script)
|
||||
SCRIPTS[script.code] = script.name
|
||||
f.close()
|
||||
|
||||
|
||||
class Script(object):
|
||||
"""A human writing system
|
||||
|
||||
A script is represented by a 4-letter code from the ISO-15924 standard
|
||||
|
||||
:param string script: 4-letter ISO-15924 script code
|
||||
|
||||
"""
|
||||
def __init__(self, script):
|
||||
if script not in SCRIPTS:
|
||||
raise ValueError('%r is not a valid script' % script)
|
||||
|
||||
#: ISO-15924 4-letter script code
|
||||
self.code = script
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""English name of the script"""
|
||||
return SCRIPTS[self.code]
|
||||
|
||||
def __getstate__(self):
|
||||
return self.code
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.code = state
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.code)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, basestr):
|
||||
return self.code == other
|
||||
if not isinstance(other, Script):
|
||||
return False
|
||||
return self.code == other.code
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return '<Script [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
return self.code
|
||||
@@ -1,368 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import sys
|
||||
import pickle
|
||||
from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
|
||||
LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
|
||||
|
||||
|
||||
if sys.version_info[:2] <= (2, 6):
|
||||
_MAX_LENGTH = 80
|
||||
|
||||
def safe_repr(obj, short=False):
|
||||
try:
|
||||
result = repr(obj)
|
||||
except Exception:
|
||||
result = object.__repr__(obj)
|
||||
if not short or len(result) < _MAX_LENGTH:
|
||||
return result
|
||||
return result[:_MAX_LENGTH] + ' [truncated]...'
|
||||
|
||||
class _AssertRaisesContext(object):
|
||||
"""A context manager used to implement TestCase.assertRaises* methods."""
|
||||
|
||||
def __init__(self, expected, test_case, expected_regexp=None):
|
||||
self.expected = expected
|
||||
self.failureException = test_case.failureException
|
||||
self.expected_regexp = expected_regexp
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, tb):
|
||||
if exc_type is None:
|
||||
try:
|
||||
exc_name = self.expected.__name__
|
||||
except AttributeError:
|
||||
exc_name = str(self.expected)
|
||||
raise self.failureException(
|
||||
"{0} not raised".format(exc_name))
|
||||
if not issubclass(exc_type, self.expected):
|
||||
# let unexpected exceptions pass through
|
||||
return False
|
||||
self.exception = exc_value # store for later retrieval
|
||||
if self.expected_regexp is None:
|
||||
return True
|
||||
|
||||
expected_regexp = self.expected_regexp
|
||||
if isinstance(expected_regexp, basestring):
|
||||
expected_regexp = re.compile(expected_regexp)
|
||||
if not expected_regexp.search(str(exc_value)):
|
||||
raise self.failureException('"%s" does not match "%s"' %
|
||||
(expected_regexp.pattern, str(exc_value)))
|
||||
return True
|
||||
|
||||
class _Py26FixTestCase(object):
|
||||
def assertIsNone(self, obj, msg=None):
|
||||
"""Same as self.assertTrue(obj is None), with a nicer default message."""
|
||||
if obj is not None:
|
||||
standardMsg = '%s is not None' % (safe_repr(obj),)
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIsNotNone(self, obj, msg=None):
|
||||
"""Included for symmetry with assertIsNone."""
|
||||
if obj is None:
|
||||
standardMsg = 'unexpectedly None'
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIn(self, member, container, msg=None):
|
||||
"""Just like self.assertTrue(a in b), but with a nicer default message."""
|
||||
if member not in container:
|
||||
standardMsg = '%s not found in %s' % (safe_repr(member),
|
||||
safe_repr(container))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertNotIn(self, member, container, msg=None):
|
||||
"""Just like self.assertTrue(a not in b), but with a nicer default message."""
|
||||
if member in container:
|
||||
standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
|
||||
safe_repr(container))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIs(self, expr1, expr2, msg=None):
|
||||
"""Just like self.assertTrue(a is b), but with a nicer default message."""
|
||||
if expr1 is not expr2:
|
||||
standardMsg = '%s is not %s' % (safe_repr(expr1),
|
||||
safe_repr(expr2))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIsNot(self, expr1, expr2, msg=None):
|
||||
"""Just like self.assertTrue(a is not b), but with a nicer default message."""
|
||||
if expr1 is expr2:
|
||||
standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
else:
|
||||
class _Py26FixTestCase(object):
|
||||
pass
|
||||
|
||||
|
||||
class TestScript(TestCase, _Py26FixTestCase):
|
||||
def test_wrong_script(self):
|
||||
self.assertRaises(ValueError, lambda: Script('Azer'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Script('Latn'), Script('Latn'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Script('Cyrl'), Script('Latn'))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Script('Hira')), hash('Hira'))
|
||||
|
||||
def test_pickle(self):
|
||||
self.assertEqual(pickle.loads(pickle.dumps(Script('Latn'))), Script('Latn'))
|
||||
|
||||
|
||||
class TestCountry(TestCase, _Py26FixTestCase):
|
||||
def test_wrong_country(self):
|
||||
self.assertRaises(ValueError, lambda: Country('ZZ'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Country('US'), Country('US'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Country('GB'), Country('US'))
|
||||
self.assertIsNotNone(Country('US'))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Country('US')), hash('US'))
|
||||
|
||||
def test_pickle(self):
|
||||
for country in [Country('GB'), Country('US')]:
|
||||
self.assertEqual(pickle.loads(pickle.dumps(country)), country)
|
||||
|
||||
def test_converter_name(self):
|
||||
self.assertEqual(Country('US').name, 'UNITED STATES')
|
||||
self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
|
||||
self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
|
||||
self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
|
||||
self.assertEqual(len(country_converters['name'].codes), 249)
|
||||
|
||||
|
||||
class TestLanguage(TestCase, _Py26FixTestCase):
|
||||
def test_languages(self):
|
||||
self.assertEqual(len(LANGUAGES), 7874)
|
||||
|
||||
def test_wrong_language(self):
|
||||
self.assertRaises(ValueError, lambda: Language('zzz'))
|
||||
|
||||
def test_unknown_language(self):
|
||||
self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
|
||||
|
||||
def test_converter_alpha2(self):
|
||||
self.assertEqual(Language('eng').alpha2, 'en')
|
||||
self.assertEqual(Language.fromalpha2('en'), Language('eng'))
|
||||
self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
|
||||
self.assertEqual(len(language_converters['alpha2'].codes), 184)
|
||||
|
||||
def test_converter_alpha3b(self):
|
||||
self.assertEqual(Language('fra').alpha3b, 'fre')
|
||||
self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
|
||||
self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
|
||||
self.assertEqual(len(language_converters['alpha3b'].codes), 418)
|
||||
|
||||
def test_converter_alpha3t(self):
|
||||
self.assertEqual(Language('fra').alpha3t, 'fra')
|
||||
self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
|
||||
self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
|
||||
self.assertEqual(len(language_converters['alpha3t'].codes), 418)
|
||||
|
||||
def test_converter_name(self):
|
||||
self.assertEqual(Language('eng').name, 'English')
|
||||
self.assertEqual(Language.fromname('English'), Language('eng'))
|
||||
self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
|
||||
self.assertEqual(len(language_converters['name'].codes), 7874)
|
||||
|
||||
def test_converter_scope(self):
|
||||
self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
|
||||
self.assertEqual(Language('eng').scope, 'individual')
|
||||
self.assertEqual(Language('und').scope, 'special')
|
||||
|
||||
def test_converter_type(self):
|
||||
self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
|
||||
self.assertEqual(Language('eng').type, 'living')
|
||||
self.assertEqual(Language('und').type, 'special')
|
||||
|
||||
def test_converter_opensubtitles(self):
|
||||
self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
|
||||
self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
|
||||
self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
|
||||
self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
|
||||
self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
|
||||
# Montenegrin is not recognized as an ISO language (yet?) but for now it is
|
||||
# unofficially accepted as Serbian from Montenegro
|
||||
self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
|
||||
self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
|
||||
self.assertEqual(len(language_converters['opensubtitles'].codes), 606)
|
||||
|
||||
# test with all the LANGUAGES from the opensubtitles api
|
||||
# downloaded from: http://www.opensubtitles.org/addons/export_languages.php
|
||||
f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
|
||||
f.readline()
|
||||
for l in f:
|
||||
idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
|
||||
if not int(upload_enabled) and not int(web_enabled):
|
||||
# do not test LANGUAGES that are too esoteric / not widely available
|
||||
continue
|
||||
self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
|
||||
if alpha2:
|
||||
self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
|
||||
f.close()
|
||||
|
||||
def test_fromietf_country_script(self):
|
||||
language = Language.fromietf('fra-FR-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_country_no_script(self):
|
||||
language = Language.fromietf('fra-FR')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertIsNone(language.script)
|
||||
|
||||
def test_fromietf_no_country_no_script(self):
|
||||
language = Language.fromietf('fra-FR')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertIsNone(language.script)
|
||||
|
||||
def test_fromietf_no_country_script(self):
|
||||
language = Language.fromietf('fra-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertIsNone(language.country)
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_alpha2_language(self):
|
||||
language = Language.fromietf('fr-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertIsNone(language.country)
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_wrong_language(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
|
||||
|
||||
def test_fromietf_wrong_country(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
|
||||
|
||||
def test_fromietf_wrong_script(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Language('eng'), Language('eng'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Language('fra'), Language('eng'))
|
||||
self.assertIsNotNone(Language('fra'))
|
||||
|
||||
def test_nonzero(self):
|
||||
self.assertFalse(bool(Language('und')))
|
||||
self.assertTrue(bool(Language('eng')))
|
||||
|
||||
def test_language_hasattr(self):
|
||||
self.assertTrue(hasattr(Language('fra'), 'alpha3'))
|
||||
self.assertTrue(hasattr(Language('fra'), 'alpha2'))
|
||||
self.assertFalse(hasattr(Language('bej'), 'alpha2'))
|
||||
|
||||
def test_country(self):
|
||||
self.assertEqual(Language('por', 'BR').country, Country('BR'))
|
||||
self.assertEqual(Language('eng', Country('US')).country, Country('US'))
|
||||
|
||||
def test_eq_with_country(self):
|
||||
self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
|
||||
|
||||
def test_ne_with_country(self):
|
||||
self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
|
||||
|
||||
def test_script(self):
|
||||
self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
|
||||
self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
|
||||
|
||||
def test_eq_with_script(self):
|
||||
self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
|
||||
|
||||
def test_ne_with_script(self):
|
||||
self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
|
||||
|
||||
def test_eq_with_country_and_script(self):
|
||||
self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
|
||||
|
||||
def test_ne_with_country_and_script(self):
|
||||
self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Language('fra')), hash('fr'))
|
||||
self.assertEqual(hash(Language('ace')), hash('ace'))
|
||||
self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
|
||||
self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
|
||||
self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
|
||||
|
||||
def test_pickle(self):
|
||||
for lang in [Language('fra'),
|
||||
Language('eng', 'US'),
|
||||
Language('srp', script='Latn'),
|
||||
Language('eng', 'US', 'Latn')]:
|
||||
self.assertEqual(pickle.loads(pickle.dumps(lang)), lang)
|
||||
|
||||
def test_str(self):
|
||||
self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
|
||||
self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
|
||||
self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
|
||||
|
||||
def test_register_converter(self):
|
||||
class TestConverter(LanguageReverseConverter):
|
||||
def __init__(self):
|
||||
self.to_test = {'fra': 'test1', 'eng': 'test2'}
|
||||
self.from_test = {'test1': 'fra', 'test2': 'eng'}
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if alpha3 not in self.to_test:
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
return self.to_test[alpha3]
|
||||
|
||||
def reverse(self, test):
|
||||
if test not in self.from_test:
|
||||
raise LanguageReverseError(test)
|
||||
return (self.from_test[test], None)
|
||||
language = Language('fra')
|
||||
self.assertFalse(hasattr(language, 'test'))
|
||||
language_converters['test'] = TestConverter()
|
||||
self.assertTrue(hasattr(language, 'test'))
|
||||
self.assertIn('test', language_converters)
|
||||
self.assertEqual(Language('fra').test, 'test1')
|
||||
self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
|
||||
del language_converters['test']
|
||||
self.assertNotIn('test', language_converters)
|
||||
self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
|
||||
self.assertRaises(AttributeError, lambda: Language('fra').test)
|
||||
|
||||
|
||||
def suite():
|
||||
suite = TestSuite()
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
|
||||
return suite
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner().run(suite())
|
||||
@@ -0,0 +1,18 @@
|
||||
"""Provide a (g)dbm-compatible interface to bsddb.hashopen."""
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
warnings.warnpy3k("in 3.x, the dbhash module has been removed", stacklevel=2)
|
||||
try:
|
||||
import bsddb
|
||||
except ImportError:
|
||||
# prevent a second import of this module from spuriously succeeding
|
||||
del sys.modules[__name__]
|
||||
raise
|
||||
|
||||
__all__ = ["error","open"]
|
||||
|
||||
error = bsddb.error # Exported for anydbm
|
||||
|
||||
def open(file, flag = 'r', mode=0666):
|
||||
return bsddb.hashopen(file, flag, mode)
|
||||
@@ -1,27 +0,0 @@
|
||||
Copyright (c) 2011-2014 Mike Bayer
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author or contributors may not be used to endorse or
|
||||
promote products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
@@ -1,91 +0,0 @@
|
||||
dogpile.cache
|
||||
=============
|
||||
|
||||
A caching API built around the concept of a "dogpile lock", which allows
|
||||
continued access to an expiring data value while a single thread generates a
|
||||
new value.
|
||||
|
||||
dogpile.cache builds on the `dogpile.core <http://pypi.python.org/pypi/dogpile.core>`_
|
||||
locking system, which implements the idea of "allow one creator to write while
|
||||
others read" in the abstract. Overall, dogpile.cache is intended as a
|
||||
replacement to the `Beaker <http://beaker.groovie.org>`_ caching system, the internals
|
||||
of which are written by the same author. All the ideas of Beaker which "work"
|
||||
are re-implemented in dogpile.cache in a more efficient and succinct manner,
|
||||
and all the cruft (Beaker's internals were first written in 2005) relegated
|
||||
to the trash heap.
|
||||
|
||||
Features
|
||||
--------
|
||||
|
||||
* A succinct API which encourages up-front configuration of pre-defined
|
||||
"regions", each one defining a set of caching characteristics including
|
||||
storage backend, configuration options, and default expiration time.
|
||||
* A standard get/set/delete API as well as a function decorator API is
|
||||
provided.
|
||||
* The mechanics of key generation are fully customizable. The function
|
||||
decorator API features a pluggable "key generator" to customize how
|
||||
cache keys are made to correspond to function calls, and an optional
|
||||
"key mangler" feature provides for pluggable mangling of keys
|
||||
(such as encoding, SHA-1 hashing) as desired for each region.
|
||||
* The dogpile lock, first developed as the core engine behind the Beaker
|
||||
caching system, here vastly simplified, improved, and better tested.
|
||||
Some key performance
|
||||
issues that were intrinsic to Beaker's architecture, particularly that
|
||||
values would frequently be "double-fetched" from the cache, have been fixed.
|
||||
* Backends implement their own version of a "distributed" lock, where the
|
||||
"distribution" matches the backend's storage system. For example, the
|
||||
memcached backends allow all clients to coordinate creation of values
|
||||
using memcached itself. The dbm file backend uses a lockfile
|
||||
alongside the dbm file. New backends, such as a Redis-based backend,
|
||||
can provide their own locking mechanism appropriate to the storage
|
||||
engine.
|
||||
* Writing new backends or hacking on the existing backends is intended to be
|
||||
routine - all that's needed are basic get/set/delete methods. A distributed
|
||||
lock tailored towards the backend is an optional addition, else dogpile uses
|
||||
a regular thread mutex. New backends can be registered with dogpile.cache
|
||||
directly or made available via setuptools entry points.
|
||||
* Included backends feature three memcached backends (python-memcached, pylibmc,
|
||||
bmemcached), a Redis backend, a backend based on Python's
|
||||
anydbm, and a plain dictionary backend.
|
||||
* Space for third party plugins, including the first which provides the
|
||||
dogpile.cache engine to Mako templates.
|
||||
* Python 3 compatible in place - no 2to3 required.
|
||||
|
||||
Synopsis
|
||||
--------
|
||||
|
||||
dogpile.cache features a single public usage object known as the ``CacheRegion``.
|
||||
This object then refers to a particular ``CacheBackend``. Typical usage
|
||||
generates a region using ``make_region()``, which can then be used at the
|
||||
module level to decorate functions, or used directly in code with a traditional
|
||||
get/set interface. Configuration of the backend is applied to the region
|
||||
using ``configure()`` or ``configure_from_config()``, allowing deferred
|
||||
config-file based configuration to occur after modules have been imported::
|
||||
|
||||
from dogpile.cache import make_region
|
||||
|
||||
region = make_region().configure(
|
||||
'dogpile.cache.pylibmc',
|
||||
expiration_time = 3600,
|
||||
arguments = {
|
||||
'url':["127.0.0.1"],
|
||||
'binary':True,
|
||||
'behaviors':{"tcp_nodelay": True,"ketama":True}
|
||||
}
|
||||
)
|
||||
|
||||
@region.cache_on_arguments()
|
||||
def load_user_info(user_id):
|
||||
return some_database.lookup_user_by_id(user_id)
|
||||
|
||||
|
||||
Documentation
|
||||
-------------
|
||||
|
||||
See dogpile.cache's full documentation at
|
||||
`dogpile.cache documentation <http://dogpilecache.readthedocs.org>`_.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
# See http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages
|
||||
try:
|
||||
__import__('pkg_resources').declare_namespace(__name__)
|
||||
except ImportError:
|
||||
from pkgutil import extend_path
|
||||
__path__ = extend_path(__path__, __name__)
|
||||
@@ -1,3 +0,0 @@
|
||||
__version__ = '0.5.4'
|
||||
|
||||
from .region import CacheRegion, register_backend, make_region
|
||||
-193
@@ -1,193 +0,0 @@
|
||||
import operator
|
||||
from .compat import py3k
|
||||
|
||||
|
||||
class NoValue(object):
|
||||
"""Describe a missing cache value.
|
||||
|
||||
The :attr:`.NO_VALUE` module global
|
||||
should be used.
|
||||
|
||||
"""
|
||||
@property
|
||||
def payload(self):
|
||||
return self
|
||||
|
||||
if py3k:
|
||||
def __bool__(self): #pragma NO COVERAGE
|
||||
return False
|
||||
else:
|
||||
def __nonzero__(self): #pragma NO COVERAGE
|
||||
return False
|
||||
|
||||
NO_VALUE = NoValue()
|
||||
"""Value returned from ``get()`` that describes
|
||||
a key not present."""
|
||||
|
||||
class CachedValue(tuple):
|
||||
"""Represent a value stored in the cache.
|
||||
|
||||
:class:`.CachedValue` is a two-tuple of
|
||||
``(payload, metadata)``, where ``metadata``
|
||||
is dogpile.cache's tracking information (
|
||||
currently the creation time). The metadata
|
||||
and tuple structure is pickleable, if
|
||||
the backend requires serialization.
|
||||
|
||||
"""
|
||||
payload = property(operator.itemgetter(0))
|
||||
"""Named accessor for the payload."""
|
||||
|
||||
metadata = property(operator.itemgetter(1))
|
||||
"""Named accessor for the dogpile.cache metadata dictionary."""
|
||||
|
||||
def __new__(cls, payload, metadata):
|
||||
return tuple.__new__(cls, (payload, metadata))
|
||||
|
||||
def __reduce__(self):
|
||||
return CachedValue, (self.payload, self.metadata)
|
||||
|
||||
class CacheBackend(object):
|
||||
"""Base class for backend implementations."""
|
||||
|
||||
key_mangler = None
|
||||
"""Key mangling function.
|
||||
|
||||
May be None, or otherwise declared
|
||||
as an ordinary instance method.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, arguments): #pragma NO COVERAGE
|
||||
"""Construct a new :class:`.CacheBackend`.
|
||||
|
||||
Subclasses should override this to
|
||||
handle the given arguments.
|
||||
|
||||
:param arguments: The ``arguments`` parameter
|
||||
passed to :func:`.make_registry`.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@classmethod
|
||||
def from_config_dict(cls, config_dict, prefix):
|
||||
prefix_len = len(prefix)
|
||||
return cls(
|
||||
dict(
|
||||
(key[prefix_len:], config_dict[key])
|
||||
for key in config_dict
|
||||
if key.startswith(prefix)
|
||||
)
|
||||
)
|
||||
|
||||
def get_mutex(self, key):
|
||||
"""Return an optional mutexing object for the given key.
|
||||
|
||||
This object need only provide an ``acquire()``
|
||||
and ``release()`` method.
|
||||
|
||||
May return ``None``, in which case the dogpile
|
||||
lock will use a regular ``threading.Lock``
|
||||
object to mutex concurrent threads for
|
||||
value creation. The default implementation
|
||||
returns ``None``.
|
||||
|
||||
Different backends may want to provide various
|
||||
kinds of "mutex" objects, such as those which
|
||||
link to lock files, distributed mutexes,
|
||||
memcached semaphores, etc. Whatever
|
||||
kind of system is best suited for the scope
|
||||
and behavior of the caching backend.
|
||||
|
||||
A mutex that takes the key into account will
|
||||
allow multiple regenerate operations across
|
||||
keys to proceed simultaneously, while a mutex
|
||||
that does not will serialize regenerate operations
|
||||
to just one at a time across all keys in the region.
|
||||
The latter approach, or a variant that involves
|
||||
a modulus of the given key's hash value,
|
||||
can be used as a means of throttling the total
|
||||
number of value recreation operations that may
|
||||
proceed at one time.
|
||||
|
||||
"""
|
||||
return None
|
||||
|
||||
def get(self, key): #pragma NO COVERAGE
|
||||
"""Retrieve a value from the cache.
|
||||
|
||||
The returned value should be an instance of
|
||||
:class:`.CachedValue`, or ``NO_VALUE`` if
|
||||
not present.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_multi(self, keys): #pragma NO COVERAGE
|
||||
"""Retrieve multiple values from the cache.
|
||||
|
||||
The returned value should be a list, corresponding
|
||||
to the list of keys given.
|
||||
|
||||
.. versionadded:: 0.5.0
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def set(self, key, value): #pragma NO COVERAGE
|
||||
"""Set a value in the cache.
|
||||
|
||||
The key will be whatever was passed
|
||||
to the registry, processed by the
|
||||
"key mangling" function, if any.
|
||||
The value will always be an instance
|
||||
of :class:`.CachedValue`.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def set_multi(self, mapping): #pragma NO COVERAGE
|
||||
"""Set multiple values in the cache.
|
||||
|
||||
The key will be whatever was passed
|
||||
to the registry, processed by the
|
||||
"key mangling" function, if any.
|
||||
The value will always be an instance
|
||||
of :class:`.CachedValue`.
|
||||
|
||||
.. versionadded:: 0.5.0
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def delete(self, key): #pragma NO COVERAGE
|
||||
"""Delete a value from the cache.
|
||||
|
||||
The key will be whatever was passed
|
||||
to the registry, processed by the
|
||||
"key mangling" function, if any.
|
||||
|
||||
The behavior here should be idempotent,
|
||||
that is, can be called any number of times
|
||||
regardless of whether or not the
|
||||
key exists.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def delete_multi(self, keys): #pragma NO COVERAGE
|
||||
"""Delete multiple values from the cache.
|
||||
|
||||
The key will be whatever was passed
|
||||
to the registry, processed by the
|
||||
"key mangling" function, if any.
|
||||
|
||||
The behavior here should be idempotent,
|
||||
that is, can be called any number of times
|
||||
regardless of whether or not the
|
||||
key exists.
|
||||
|
||||
.. versionadded:: 0.5.0
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
@@ -1,10 +0,0 @@
|
||||
from dogpile.cache.region import register_backend
|
||||
|
||||
register_backend("dogpile.cache.null", "dogpile.cache.backends.null", "NullBackend")
|
||||
register_backend("dogpile.cache.dbm", "dogpile.cache.backends.file", "DBMBackend")
|
||||
register_backend("dogpile.cache.pylibmc", "dogpile.cache.backends.memcached", "PylibmcBackend")
|
||||
register_backend("dogpile.cache.bmemcached", "dogpile.cache.backends.memcached", "BMemcachedBackend")
|
||||
register_backend("dogpile.cache.memcached", "dogpile.cache.backends.memcached", "MemcachedBackend")
|
||||
register_backend("dogpile.cache.memory", "dogpile.cache.backends.memory", "MemoryBackend")
|
||||
register_backend("dogpile.cache.memory_pickle", "dogpile.cache.backends.memory", "MemoryPickleBackend")
|
||||
register_backend("dogpile.cache.redis", "dogpile.cache.backends.redis", "RedisBackend")
|
||||
@@ -1,441 +0,0 @@
|
||||
"""
|
||||
File Backends
|
||||
------------------
|
||||
|
||||
Provides backends that deal with local filesystem access.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import with_statement
|
||||
from dogpile.cache.api import CacheBackend, NO_VALUE
|
||||
from contextlib import contextmanager
|
||||
from dogpile.cache import compat
|
||||
from dogpile.cache import util
|
||||
import os
|
||||
|
||||
__all__ = 'DBMBackend', 'FileLock', 'AbstractFileLock'
|
||||
|
||||
class DBMBackend(CacheBackend):
|
||||
"""A file-backend using a dbm file to store keys.
|
||||
|
||||
Basic usage::
|
||||
|
||||
from dogpile.cache import make_region
|
||||
|
||||
region = make_region().configure(
|
||||
'dogpile.cache.dbm',
|
||||
expiration_time = 3600,
|
||||
arguments = {
|
||||
"filename":"/path/to/cachefile.dbm"
|
||||
}
|
||||
)
|
||||
|
||||
DBM access is provided using the Python ``anydbm`` module,
|
||||
which selects a platform-specific dbm module to use.
|
||||
This may be made to be more configurable in a future
|
||||
release.
|
||||
|
||||
Note that different dbm modules have different behaviors.
|
||||
Some dbm implementations handle their own locking, while
|
||||
others don't. The :class:`.DBMBackend` uses a read/write
|
||||
lockfile by default, which is compatible even with those
|
||||
DBM implementations for which this is unnecessary,
|
||||
though the behavior can be disabled.
|
||||
|
||||
The DBM backend by default makes use of two lockfiles.
|
||||
One is in order to protect the DBM file itself from
|
||||
concurrent writes, the other is to coordinate
|
||||
value creation (i.e. the dogpile lock). By default,
|
||||
these lockfiles use the ``flock()`` system call
|
||||
for locking; this is **only available on Unix
|
||||
platforms**. An alternative lock implementation, such as one
|
||||
which is based on threads or uses a third-party system
|
||||
such as `portalocker <https://pypi.python.org/pypi/portalocker>`_,
|
||||
can be dropped in using the ``lock_factory`` argument
|
||||
in conjunction with the :class:`.AbstractFileLock` base class.
|
||||
|
||||
Currently, the dogpile lock is against the entire
|
||||
DBM file, not per key. This means there can
|
||||
only be one "creator" job running at a time
|
||||
per dbm file.
|
||||
|
||||
A future improvement might be to have the dogpile lock
|
||||
using a filename that's based on a modulus of the key.
|
||||
Locking on a filename that uniquely corresponds to the
|
||||
key is problematic, since it's not generally safe to
|
||||
delete lockfiles as the application runs, implying an
|
||||
unlimited number of key-based files would need to be
|
||||
created and never deleted.
|
||||
|
||||
Parameters to the ``arguments`` dictionary are
|
||||
below.
|
||||
|
||||
:param filename: path of the filename in which to
|
||||
create the DBM file. Note that some dbm backends
|
||||
will change this name to have additional suffixes.
|
||||
:param rw_lockfile: the name of the file to use for
|
||||
read/write locking. If omitted, a default name
|
||||
is used by appending the suffix ".rw.lock" to the
|
||||
DBM filename. If False, then no lock is used.
|
||||
:param dogpile_lockfile: the name of the file to use
|
||||
for value creation, i.e. the dogpile lock. If
|
||||
omitted, a default name is used by appending the
|
||||
suffix ".dogpile.lock" to the DBM filename. If
|
||||
False, then dogpile.cache uses the default dogpile
|
||||
lock, a plain thread-based mutex.
|
||||
:param lock_factory: a function or class which provides
|
||||
for a read/write lock. Defaults to :class:`.FileLock`.
|
||||
Custom implementations need to implement context-manager
|
||||
based ``read()`` and ``write()`` functions - the
|
||||
:class:`.AbstractFileLock` class is provided as a base class
|
||||
which provides these methods based on individual read/write lock
|
||||
functions. E.g. to replace the lock with the dogpile.core
|
||||
:class:`.ReadWriteMutex`::
|
||||
|
||||
from dogpile.core.readwrite_lock import ReadWriteMutex
|
||||
from dogpile.cache.backends.file import AbstractFileLock
|
||||
|
||||
class MutexLock(AbstractFileLock):
|
||||
def __init__(self, filename):
|
||||
self.mutex = ReadWriteMutex()
|
||||
|
||||
def acquire_read_lock(self, wait):
|
||||
ret = self.mutex.acquire_read_lock(wait)
|
||||
return wait or ret
|
||||
|
||||
def acquire_write_lock(self, wait):
|
||||
ret = self.mutex.acquire_write_lock(wait)
|
||||
return wait or ret
|
||||
|
||||
def release_read_lock(self):
|
||||
return self.mutex.release_read_lock()
|
||||
|
||||
def release_write_lock(self):
|
||||
return self.mutex.release_write_lock()
|
||||
|
||||
from dogpile.cache import make_region
|
||||
|
||||
region = make_region().configure(
|
||||
"dogpile.cache.dbm",
|
||||
expiration_time=300,
|
||||
arguments={
|
||||
"filename": "file.dbm",
|
||||
"lock_factory": MutexLock
|
||||
}
|
||||
)
|
||||
|
||||
While the included :class:`.FileLock` uses ``os.flock()``, a
|
||||
windows-compatible implementation can be built using a library
|
||||
such as `portalocker <https://pypi.python.org/pypi/portalocker>`_.
|
||||
|
||||
.. versionadded:: 0.5.2
|
||||
|
||||
|
||||
|
||||
"""
|
||||
def __init__(self, arguments):
|
||||
self.filename = os.path.abspath(
|
||||
os.path.normpath(arguments['filename'])
|
||||
)
|
||||
dir_, filename = os.path.split(self.filename)
|
||||
|
||||
self.lock_factory = arguments.get("lock_factory", FileLock)
|
||||
self._rw_lock = self._init_lock(
|
||||
arguments.get('rw_lockfile'),
|
||||
".rw.lock", dir_, filename)
|
||||
self._dogpile_lock = self._init_lock(
|
||||
arguments.get('dogpile_lockfile'),
|
||||
".dogpile.lock",
|
||||
dir_, filename,
|
||||
util.KeyReentrantMutex.factory)
|
||||
|
||||
# TODO: make this configurable
|
||||
if compat.py3k:
|
||||
import dbm
|
||||
else:
|
||||
import anydbm as dbm
|
||||
self.dbmmodule = dbm
|
||||
self._init_dbm_file()
|
||||
|
||||
def _init_lock(self, argument, suffix, basedir, basefile, wrapper=None):
|
||||
if argument is None:
|
||||
lock = self.lock_factory(os.path.join(basedir, basefile + suffix))
|
||||
elif argument is not False:
|
||||
lock = self.lock_factory(
|
||||
os.path.abspath(
|
||||
os.path.normpath(argument)
|
||||
))
|
||||
else:
|
||||
return None
|
||||
if wrapper:
|
||||
lock = wrapper(lock)
|
||||
return lock
|
||||
|
||||
def _init_dbm_file(self):
|
||||
exists = os.access(self.filename, os.F_OK)
|
||||
if not exists:
|
||||
for ext in ('db', 'dat', 'pag', 'dir'):
|
||||
if os.access(self.filename + os.extsep + ext, os.F_OK):
|
||||
exists = True
|
||||
break
|
||||
if not exists:
|
||||
fh = self.dbmmodule.open(self.filename, 'c')
|
||||
fh.close()
|
||||
|
||||
def get_mutex(self, key):
|
||||
# using one dogpile for the whole file. Other ways
|
||||
# to do this might be using a set of files keyed to a
|
||||
# hash/modulus of the key. the issue is it's never
|
||||
# really safe to delete a lockfile as this can
|
||||
# break other processes trying to get at the file
|
||||
# at the same time - so handling unlimited keys
|
||||
# can't imply unlimited filenames
|
||||
if self._dogpile_lock:
|
||||
return self._dogpile_lock(key)
|
||||
else:
|
||||
return None
|
||||
|
||||
@contextmanager
|
||||
def _use_rw_lock(self, write):
|
||||
if self._rw_lock is None:
|
||||
yield
|
||||
elif write:
|
||||
with self._rw_lock.write():
|
||||
yield
|
||||
else:
|
||||
with self._rw_lock.read():
|
||||
yield
|
||||
|
||||
@contextmanager
|
||||
def _dbm_file(self, write):
|
||||
with self._use_rw_lock(write):
|
||||
dbm = self.dbmmodule.open(self.filename,
|
||||
"w" if write else "r")
|
||||
yield dbm
|
||||
dbm.close()
|
||||
|
||||
def get(self, key):
|
||||
with self._dbm_file(False) as dbm:
|
||||
if hasattr(dbm, 'get'):
|
||||
value = dbm.get(key, NO_VALUE)
|
||||
else:
|
||||
# gdbm objects lack a .get method
|
||||
try:
|
||||
value = dbm[key]
|
||||
except KeyError:
|
||||
value = NO_VALUE
|
||||
if value is not NO_VALUE:
|
||||
value = compat.pickle.loads(value)
|
||||
return value
|
||||
|
||||
def get_multi(self, keys):
|
||||
return [self.get(key) for key in keys]
|
||||
|
||||
def set(self, key, value):
|
||||
with self._dbm_file(True) as dbm:
|
||||
dbm[key] = compat.pickle.dumps(value)
|
||||
|
||||
def set_multi(self, mapping):
|
||||
with self._dbm_file(True) as dbm:
|
||||
for key,value in mapping.items():
|
||||
dbm[key] = compat.pickle.dumps(value)
|
||||
|
||||
def delete(self, key):
|
||||
with self._dbm_file(True) as dbm:
|
||||
try:
|
||||
del dbm[key]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def delete_multi(self, keys):
|
||||
with self._dbm_file(True) as dbm:
|
||||
for key in keys:
|
||||
try:
|
||||
del dbm[key]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
class AbstractFileLock(object):
|
||||
"""Coordinate read/write access to a file.
|
||||
|
||||
typically is a file-based lock but doesn't necessarily have to be.
|
||||
|
||||
The default implementation here is :class:`.FileLock`.
|
||||
|
||||
Implementations should provide the following methods::
|
||||
|
||||
* __init__()
|
||||
* acquire_read_lock()
|
||||
* acquire_write_lock()
|
||||
* release_read_lock()
|
||||
* release_write_lock()
|
||||
|
||||
The ``__init__()`` method accepts a single argument "filename", which
|
||||
may be used as the "lock file", for those implementations that use a lock
|
||||
file.
|
||||
|
||||
Note that multithreaded environments must provide a thread-safe
|
||||
version of this lock. The recommended approach for file-descriptor-based
|
||||
locks is to use a Python ``threading.local()`` so that a unique file descriptor
|
||||
is held per thread. See the source code of :class:`.FileLock` for an
|
||||
implementation example.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, filename):
|
||||
"""Constructor, is given the filename of a potential lockfile.
|
||||
|
||||
The usage of this filename is optional and no file is
|
||||
created by default.
|
||||
|
||||
Raises ``NotImplementedError`` by default, must be
|
||||
implemented by subclasses.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def acquire(self, wait=True):
|
||||
"""Acquire the "write" lock.
|
||||
|
||||
This is a direct call to :meth:`.AbstractFileLock.acquire_write_lock`.
|
||||
|
||||
"""
|
||||
return self.acquire_write_lock(wait)
|
||||
|
||||
def release(self):
|
||||
"""Release the "write" lock.
|
||||
|
||||
This is a direct call to :meth:`.AbstractFileLock.release_write_lock`.
|
||||
|
||||
"""
|
||||
self.release_write_lock()
|
||||
|
||||
@contextmanager
|
||||
def read(self):
|
||||
"""Provide a context manager for the "read" lock.
|
||||
|
||||
This method makes use of :meth:`.AbstractFileLock.acquire_read_lock`
|
||||
and :meth:`.AbstractFileLock.release_read_lock`
|
||||
|
||||
"""
|
||||
|
||||
self.acquire_read_lock(True)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.release_read_lock()
|
||||
|
||||
@contextmanager
|
||||
def write(self):
|
||||
"""Provide a context manager for the "write" lock.
|
||||
|
||||
This method makes use of :meth:`.AbstractFileLock.acquire_write_lock`
|
||||
and :meth:`.AbstractFileLock.release_write_lock`
|
||||
|
||||
"""
|
||||
|
||||
self.acquire_write_lock(True)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.release_write_lock()
|
||||
|
||||
@property
|
||||
def is_open(self):
|
||||
"""optional method."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def acquire_read_lock(self, wait):
|
||||
"""Acquire a 'reader' lock.
|
||||
|
||||
Raises ``NotImplementedError`` by default, must be
|
||||
implemented by subclasses.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def acquire_write_lock(self, wait):
|
||||
"""Acquire a 'write' lock.
|
||||
|
||||
Raises ``NotImplementedError`` by default, must be
|
||||
implemented by subclasses.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def release_read_lock(self):
|
||||
"""Release a 'reader' lock.
|
||||
|
||||
Raises ``NotImplementedError`` by default, must be
|
||||
implemented by subclasses.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def release_write_lock(self):
|
||||
"""Release a 'writer' lock.
|
||||
|
||||
Raises ``NotImplementedError`` by default, must be
|
||||
implemented by subclasses.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
class FileLock(AbstractFileLock):
|
||||
"""Use lockfiles to coordinate read/write access to a file.
|
||||
|
||||
Only works on Unix systems, using
|
||||
`fcntl.flock() <http://docs.python.org/library/fcntl.html>`_.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, filename):
|
||||
self._filedescriptor = compat.threading.local()
|
||||
self.filename = filename
|
||||
|
||||
@util.memoized_property
|
||||
def _module(self):
|
||||
import fcntl
|
||||
return fcntl
|
||||
|
||||
@property
|
||||
def is_open(self):
|
||||
return hasattr(self._filedescriptor, 'fileno')
|
||||
|
||||
def acquire_read_lock(self, wait):
|
||||
return self._acquire(wait, os.O_RDONLY, self._module.LOCK_SH)
|
||||
|
||||
def acquire_write_lock(self, wait):
|
||||
return self._acquire(wait, os.O_WRONLY, self._module.LOCK_EX)
|
||||
|
||||
def release_read_lock(self):
|
||||
self._release()
|
||||
|
||||
def release_write_lock(self):
|
||||
self._release()
|
||||
|
||||
def _acquire(self, wait, wrflag, lockflag):
|
||||
wrflag |= os.O_CREAT
|
||||
fileno = os.open(self.filename, wrflag)
|
||||
try:
|
||||
if not wait:
|
||||
lockflag |= self._module.LOCK_NB
|
||||
self._module.flock(fileno, lockflag)
|
||||
except IOError:
|
||||
os.close(fileno)
|
||||
if not wait:
|
||||
# this is typically
|
||||
# "[Errno 35] Resource temporarily unavailable",
|
||||
# because of LOCK_NB
|
||||
return False
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
self._filedescriptor.fileno = fileno
|
||||
return True
|
||||
|
||||
def _release(self):
|
||||
try:
|
||||
fileno = self._filedescriptor.fileno
|
||||
except AttributeError:
|
||||
return
|
||||
else:
|
||||
self._module.flock(fileno, self._module.LOCK_UN)
|
||||
os.close(fileno)
|
||||
del self._filedescriptor.fileno
|
||||
@@ -1,332 +0,0 @@
|
||||
"""
|
||||
Memcached Backends
|
||||
------------------
|
||||
|
||||
Provides backends for talking to `memcached <http://memcached.org>`_.
|
||||
|
||||
"""
|
||||
|
||||
from dogpile.cache.api import CacheBackend, NO_VALUE
|
||||
from dogpile.cache import compat
|
||||
from dogpile.cache import util
|
||||
import random
|
||||
import time
|
||||
|
||||
__all__ = 'GenericMemcachedBackend', 'MemcachedBackend',\
|
||||
'PylibmcBackend', 'BMemcachedBackend', 'MemcachedLock'
|
||||
|
||||
class MemcachedLock(object):
|
||||
"""Simple distributed lock using memcached.
|
||||
|
||||
This is an adaptation of the lock featured at
|
||||
http://amix.dk/blog/post/19386
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, client_fn, key):
|
||||
self.client_fn = client_fn
|
||||
self.key = "_lock" + key
|
||||
|
||||
def acquire(self, wait=True):
|
||||
client = self.client_fn()
|
||||
i = 0
|
||||
while True:
|
||||
if client.add(self.key, 1):
|
||||
return True
|
||||
elif not wait:
|
||||
return False
|
||||
else:
|
||||
sleep_time = (((i+1)*random.random()) + 2**i) / 2.5
|
||||
time.sleep(sleep_time)
|
||||
if i < 15:
|
||||
i += 1
|
||||
|
||||
def release(self):
|
||||
client = self.client_fn()
|
||||
client.delete(self.key)
|
||||
|
||||
class GenericMemcachedBackend(CacheBackend):
|
||||
"""Base class for memcached backends.
|
||||
|
||||
This base class accepts a number of paramters
|
||||
common to all backends.
|
||||
|
||||
:param url: the string URL to connect to. Can be a single
|
||||
string or a list of strings. This is the only argument
|
||||
that's required.
|
||||
:param distributed_lock: boolean, when True, will use a
|
||||
memcached-lock as the dogpile lock (see :class:`.MemcachedLock`).
|
||||
Use this when multiple
|
||||
processes will be talking to the same memcached instance.
|
||||
When left at False, dogpile will coordinate on a regular
|
||||
threading mutex.
|
||||
:param memcached_expire_time: integer, when present will
|
||||
be passed as the ``time`` parameter to ``pylibmc.Client.set``.
|
||||
This is used to set the memcached expiry time for a value.
|
||||
|
||||
.. note::
|
||||
|
||||
This parameter is **different** from Dogpile's own
|
||||
``expiration_time``, which is the number of seconds after
|
||||
which Dogpile will consider the value to be expired.
|
||||
When Dogpile considers a value to be expired,
|
||||
it **continues to use the value** until generation
|
||||
of a new value is complete, when using
|
||||
:meth:`.CacheRegion.get_or_create`.
|
||||
Therefore, if you are setting ``memcached_expire_time``, you'll
|
||||
want to make sure it is greater than ``expiration_time``
|
||||
by at least enough seconds for new values to be generated,
|
||||
else the value won't be available during a regeneration,
|
||||
forcing all threads to wait for a regeneration each time
|
||||
a value expires.
|
||||
|
||||
The :class:`.GenericMemachedBackend` uses a ``threading.local()``
|
||||
object to store individual client objects per thread,
|
||||
as most modern memcached clients do not appear to be inherently
|
||||
threadsafe.
|
||||
|
||||
In particular, ``threading.local()`` has the advantage over pylibmc's
|
||||
built-in thread pool in that it automatically discards objects
|
||||
associated with a particular thread when that thread ends.
|
||||
|
||||
"""
|
||||
|
||||
set_arguments = {}
|
||||
"""Additional arguments which will be passed
|
||||
to the :meth:`set` method."""
|
||||
|
||||
def __init__(self, arguments):
|
||||
self._imports()
|
||||
# using a plain threading.local here. threading.local
|
||||
# automatically deletes the __dict__ when a thread ends,
|
||||
# so the idea is that this is superior to pylibmc's
|
||||
# own ThreadMappedPool which doesn't handle this
|
||||
# automatically.
|
||||
self.url = util.to_list(arguments['url'])
|
||||
self.distributed_lock = arguments.get('distributed_lock', False)
|
||||
self.memcached_expire_time = arguments.get(
|
||||
'memcached_expire_time', 0)
|
||||
|
||||
def _imports(self):
|
||||
"""client library imports go here."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def _create_client(self):
|
||||
"""Creation of a Client instance goes here."""
|
||||
raise NotImplementedError()
|
||||
|
||||
@util.memoized_property
|
||||
def _clients(self):
|
||||
backend = self
|
||||
class ClientPool(compat.threading.local):
|
||||
def __init__(self):
|
||||
self.memcached = backend._create_client()
|
||||
|
||||
return ClientPool()
|
||||
|
||||
@property
|
||||
def client(self):
|
||||
"""Return the memcached client.
|
||||
|
||||
This uses a threading.local by
|
||||
default as it appears most modern
|
||||
memcached libs aren't inherently
|
||||
threadsafe.
|
||||
|
||||
"""
|
||||
return self._clients.memcached
|
||||
|
||||
def get_mutex(self, key):
|
||||
if self.distributed_lock:
|
||||
return MemcachedLock(lambda: self.client, key)
|
||||
else:
|
||||
return None
|
||||
|
||||
def get(self, key):
|
||||
value = self.client.get(key)
|
||||
if value is None:
|
||||
return NO_VALUE
|
||||
else:
|
||||
return value
|
||||
|
||||
def get_multi(self, keys):
|
||||
values = self.client.get_multi(keys)
|
||||
return [
|
||||
NO_VALUE if key not in values
|
||||
else values[key] for key in keys
|
||||
]
|
||||
|
||||
def set(self, key, value):
|
||||
self.client.set(key,
|
||||
value,
|
||||
**self.set_arguments
|
||||
)
|
||||
|
||||
def set_multi(self, mapping):
|
||||
self.client.set_multi(mapping,
|
||||
**self.set_arguments
|
||||
)
|
||||
|
||||
def delete(self, key):
|
||||
self.client.delete(key)
|
||||
|
||||
def delete_multi(self, keys):
|
||||
self.client.delete_multi(keys)
|
||||
|
||||
class MemcacheArgs(object):
|
||||
"""Mixin which provides support for the 'time' argument to set(),
|
||||
'min_compress_len' to other methods.
|
||||
|
||||
"""
|
||||
def __init__(self, arguments):
|
||||
self.min_compress_len = arguments.get('min_compress_len', 0)
|
||||
|
||||
self.set_arguments = {}
|
||||
if "memcached_expire_time" in arguments:
|
||||
self.set_arguments["time"] =\
|
||||
arguments["memcached_expire_time"]
|
||||
if "min_compress_len" in arguments:
|
||||
self.set_arguments["min_compress_len"] =\
|
||||
arguments["min_compress_len"]
|
||||
super(MemcacheArgs, self).__init__(arguments)
|
||||
|
||||
class PylibmcBackend(MemcacheArgs, GenericMemcachedBackend):
|
||||
"""A backend for the
|
||||
`pylibmc <http://sendapatch.se/projects/pylibmc/index.html>`_
|
||||
memcached client.
|
||||
|
||||
A configuration illustrating several of the optional
|
||||
arguments described in the pylibmc documentation::
|
||||
|
||||
from dogpile.cache import make_region
|
||||
|
||||
region = make_region().configure(
|
||||
'dogpile.cache.pylibmc',
|
||||
expiration_time = 3600,
|
||||
arguments = {
|
||||
'url':["127.0.0.1"],
|
||||
'binary':True,
|
||||
'behaviors':{"tcp_nodelay": True,"ketama":True}
|
||||
}
|
||||
)
|
||||
|
||||
Arguments accepted here include those of
|
||||
:class:`.GenericMemcachedBackend`, as well as
|
||||
those below.
|
||||
|
||||
:param binary: sets the ``binary`` flag understood by
|
||||
``pylibmc.Client``.
|
||||
:param behaviors: a dictionary which will be passed to
|
||||
``pylibmc.Client`` as the ``behaviors`` parameter.
|
||||
:param min_compress_len: Integer, will be passed as the
|
||||
``min_compress_len`` parameter to the ``pylibmc.Client.set``
|
||||
method.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, arguments):
|
||||
self.binary = arguments.get('binary', False)
|
||||
self.behaviors = arguments.get('behaviors', {})
|
||||
super(PylibmcBackend, self).__init__(arguments)
|
||||
|
||||
|
||||
def _imports(self):
|
||||
global pylibmc
|
||||
import pylibmc
|
||||
|
||||
def _create_client(self):
|
||||
return pylibmc.Client(self.url,
|
||||
binary=self.binary,
|
||||
behaviors=self.behaviors
|
||||
)
|
||||
|
||||
class MemcachedBackend(MemcacheArgs, GenericMemcachedBackend):
|
||||
"""A backend using the standard `Python-memcached <http://www.tummy.com/Community/software/python-memcached/>`_
|
||||
library.
|
||||
|
||||
Example::
|
||||
|
||||
from dogpile.cache import make_region
|
||||
|
||||
region = make_region().configure(
|
||||
'dogpile.cache.memcached',
|
||||
expiration_time = 3600,
|
||||
arguments = {
|
||||
'url':"127.0.0.1:11211"
|
||||
}
|
||||
)
|
||||
|
||||
"""
|
||||
def _imports(self):
|
||||
global memcache
|
||||
import memcache
|
||||
|
||||
def _create_client(self):
|
||||
return memcache.Client(self.url)
|
||||
|
||||
class BMemcachedBackend(GenericMemcachedBackend):
|
||||
"""A backend for the
|
||||
`python-binary-memcached <https://github.com/jaysonsantos/python-binary-memcached>`_
|
||||
memcached client.
|
||||
|
||||
This is a pure Python memcached client which
|
||||
includes the ability to authenticate with a memcached
|
||||
server using SASL.
|
||||
|
||||
A typical configuration using username/password::
|
||||
|
||||
from dogpile.cache import make_region
|
||||
|
||||
region = make_region().configure(
|
||||
'dogpile.cache.bmemcached',
|
||||
expiration_time = 3600,
|
||||
arguments = {
|
||||
'url':["127.0.0.1"],
|
||||
'username':'scott',
|
||||
'password':'tiger'
|
||||
}
|
||||
)
|
||||
|
||||
Arguments which can be passed to the ``arguments``
|
||||
dictionary include:
|
||||
|
||||
:param username: optional username, will be used for
|
||||
SASL authentication.
|
||||
:param password: optional password, will be used for
|
||||
SASL authentication.
|
||||
|
||||
"""
|
||||
def __init__(self, arguments):
|
||||
self.username = arguments.get('username', None)
|
||||
self.password = arguments.get('password', None)
|
||||
super(BMemcachedBackend, self).__init__(arguments)
|
||||
|
||||
def _imports(self):
|
||||
global bmemcached
|
||||
import bmemcached
|
||||
|
||||
class RepairBMemcachedAPI(bmemcached.Client):
|
||||
"""Repairs BMemcached's non-standard method
|
||||
signatures, which was fixed in BMemcached
|
||||
ef206ed4473fec3b639e.
|
||||
|
||||
"""
|
||||
|
||||
def add(self, key, value):
|
||||
try:
|
||||
return super(RepairBMemcachedAPI, self).add(key, value)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
self.Client = RepairBMemcachedAPI
|
||||
|
||||
def _create_client(self):
|
||||
return self.Client(self.url,
|
||||
username=self.username,
|
||||
password=self.password
|
||||
)
|
||||
|
||||
def delete_multi(self, keys):
|
||||
"""python-binary-memcached api does not implements delete_multi"""
|
||||
for key in keys:
|
||||
self.delete(key)
|
||||
@@ -1,122 +0,0 @@
|
||||
"""
|
||||
Memory Backends
|
||||
---------------
|
||||
|
||||
Provides simple dictionary-based backends.
|
||||
|
||||
The two backends are :class:`.MemoryBackend` and :class:`.MemoryPickleBackend`;
|
||||
the latter applies a serialization step to cached values while the former
|
||||
places the value as given into the dictionary.
|
||||
|
||||
"""
|
||||
|
||||
from dogpile.cache.api import CacheBackend, NO_VALUE
|
||||
from dogpile.cache.compat import pickle
|
||||
|
||||
class MemoryBackend(CacheBackend):
|
||||
"""A backend that uses a plain dictionary.
|
||||
|
||||
There is no size management, and values which
|
||||
are placed into the dictionary will remain
|
||||
until explicitly removed. Note that
|
||||
Dogpile's expiration of items is based on
|
||||
timestamps and does not remove them from
|
||||
the cache.
|
||||
|
||||
E.g.::
|
||||
|
||||
from dogpile.cache import make_region
|
||||
|
||||
region = make_region().configure(
|
||||
'dogpile.cache.memory'
|
||||
)
|
||||
|
||||
|
||||
To use a Python dictionary of your choosing,
|
||||
it can be passed in with the ``cache_dict``
|
||||
argument::
|
||||
|
||||
my_dictionary = {}
|
||||
region = make_region().configure(
|
||||
'dogpile.cache.memory',
|
||||
arguments={
|
||||
"cache_dict":my_dictionary
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
pickle_values = False
|
||||
|
||||
def __init__(self, arguments):
|
||||
self._cache = arguments.pop("cache_dict", {})
|
||||
|
||||
def get(self, key):
|
||||
value = self._cache.get(key, NO_VALUE)
|
||||
if value is not NO_VALUE and self.pickle_values:
|
||||
value = pickle.loads(value)
|
||||
return value
|
||||
|
||||
def get_multi(self, keys):
|
||||
ret = [self._cache.get(key, NO_VALUE)
|
||||
for key in keys]
|
||||
if self.pickle_values:
|
||||
ret = [
|
||||
pickle.loads(value)
|
||||
if value is not NO_VALUE else value
|
||||
for value in ret
|
||||
]
|
||||
return ret
|
||||
|
||||
def set(self, key, value):
|
||||
if self.pickle_values:
|
||||
value = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
|
||||
self._cache[key] = value
|
||||
|
||||
def set_multi(self, mapping):
|
||||
pickle_values = self.pickle_values
|
||||
for key, value in mapping.items():
|
||||
if pickle_values:
|
||||
value = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
|
||||
self._cache[key] = value
|
||||
|
||||
def delete(self, key):
|
||||
self._cache.pop(key, None)
|
||||
|
||||
def delete_multi(self, keys):
|
||||
for key in keys:
|
||||
self._cache.pop(key, None)
|
||||
|
||||
|
||||
class MemoryPickleBackend(MemoryBackend):
|
||||
"""A backend that uses a plain dictionary, but serializes objects on
|
||||
:meth:`.MemoryBackend.set` and deserializes :meth:`.MemoryBackend.get`.
|
||||
|
||||
E.g.::
|
||||
|
||||
from dogpile.cache import make_region
|
||||
|
||||
region = make_region().configure(
|
||||
'dogpile.cache.memory_pickle'
|
||||
)
|
||||
|
||||
The usage of pickle to serialize cached values allows an object
|
||||
as placed in the cache to be a copy of the original given object, so
|
||||
that any subsequent changes to the given object aren't reflected
|
||||
in the cached value, thus making the backend behave the same way
|
||||
as other backends which make use of serialization.
|
||||
|
||||
The serialization is performed via pickle, and incurs the same
|
||||
performance hit in doing so as that of other backends; in this way
|
||||
the :class:`.MemoryPickleBackend` performance is somewhere in between
|
||||
that of the pure :class:`.MemoryBackend` and the remote server oriented
|
||||
backends such as that of Memcached or Redis.
|
||||
|
||||
Pickle behavior here is the same as that of the Redis backend, using
|
||||
either ``cPickle`` or ``pickle`` and specifying ``HIGHEST_PROTOCOL``
|
||||
upon serialize.
|
||||
|
||||
.. versionadded:: 0.5.3
|
||||
|
||||
"""
|
||||
pickle_values = True
|
||||
@@ -1,62 +0,0 @@
|
||||
"""
|
||||
Null Backend
|
||||
-------------
|
||||
|
||||
The Null backend does not do any caching at all. It can be
|
||||
used to test behavior without caching, or as a means of disabling
|
||||
caching for a region that is otherwise used normally.
|
||||
|
||||
.. versionadded:: 0.5.4
|
||||
|
||||
"""
|
||||
|
||||
from dogpile.cache.api import CacheBackend, NO_VALUE
|
||||
|
||||
|
||||
__all__ = ['NullBackend']
|
||||
|
||||
|
||||
class NullLock(object):
|
||||
def acquire(self):
|
||||
pass
|
||||
|
||||
def release(self):
|
||||
pass
|
||||
|
||||
|
||||
class NullBackend(CacheBackend):
|
||||
"""A "null" backend that effectively disables all cache operations.
|
||||
|
||||
Basic usage::
|
||||
|
||||
from dogpile.cache import make_region
|
||||
|
||||
region = make_region().configure(
|
||||
'dogpile.cache.null'
|
||||
)
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, arguments):
|
||||
pass
|
||||
|
||||
def get_mutex(self, key):
|
||||
return NullLock()
|
||||
|
||||
def get(self, key):
|
||||
return NO_VALUE
|
||||
|
||||
def get_multi(self, keys):
|
||||
return [NO_VALUE for k in keys]
|
||||
|
||||
def set(self, key, value):
|
||||
pass
|
||||
|
||||
def set_multi(self, mapping):
|
||||
pass
|
||||
|
||||
def delete(self, key):
|
||||
pass
|
||||
|
||||
def delete_multi(self, keys):
|
||||
pass
|
||||
@@ -1,181 +0,0 @@
|
||||
"""
|
||||
Redis Backends
|
||||
------------------
|
||||
|
||||
Provides backends for talking to `Redis <http://redis.io>`_.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from dogpile.cache.api import CacheBackend, NO_VALUE
|
||||
from dogpile.cache.compat import pickle, u
|
||||
|
||||
redis = None
|
||||
|
||||
__all__ = 'RedisBackend',
|
||||
|
||||
|
||||
class RedisBackend(CacheBackend):
|
||||
"""A `Redis <http://redis.io/>`_ backend, using the
|
||||
`redis-py <http://pypi.python.org/pypi/redis/>`_ backend.
|
||||
|
||||
Example configuration::
|
||||
|
||||
from dogpile.cache import make_region
|
||||
|
||||
region = make_region().configure(
|
||||
'dogpile.cache.redis',
|
||||
arguments = {
|
||||
'host': 'localhost',
|
||||
'port': 6379,
|
||||
'db': 0,
|
||||
'redis_expiration_time': 60*60*2, # 2 hours
|
||||
'distributed_lock':True
|
||||
}
|
||||
)
|
||||
|
||||
Arguments accepted in the arguments dictionary:
|
||||
|
||||
:param url: string. If provided, will override separate host/port/db
|
||||
params. The format is that accepted by ``StrictRedis.from_url()``.
|
||||
|
||||
.. versionadded:: 0.4.1
|
||||
|
||||
:param host: string, default is ``localhost``.
|
||||
|
||||
:param password: string, default is no password.
|
||||
|
||||
.. versionadded:: 0.4.1
|
||||
|
||||
:param port: integer, default is ``6379``.
|
||||
|
||||
:param db: integer, default is ``0``.
|
||||
|
||||
:param redis_expiration_time: integer, number of seconds after setting
|
||||
a value that Redis should expire it. This should be larger than dogpile's
|
||||
cache expiration. By default no expiration is set.
|
||||
|
||||
:param distributed_lock: boolean, when True, will use a
|
||||
redis-lock as the dogpile lock.
|
||||
Use this when multiple
|
||||
processes will be talking to the same redis instance.
|
||||
When left at False, dogpile will coordinate on a regular
|
||||
threading mutex.
|
||||
|
||||
:param lock_timeout: integer, number of seconds after acquiring a lock that
|
||||
Redis should expire it. This argument is only valid when
|
||||
``distributed_lock`` is ``True``.
|
||||
|
||||
.. versionadded:: 0.5.0
|
||||
|
||||
:param socket_timeout: float, seconds for socket timeout.
|
||||
Default is None (no timeout).
|
||||
|
||||
.. versionadded:: 0.5.4
|
||||
|
||||
:param lock_sleep: integer, number of seconds to sleep when failed to
|
||||
acquire a lock. This argument is only valid when
|
||||
``distributed_lock`` is ``True``.
|
||||
|
||||
.. versionadded:: 0.5.0
|
||||
|
||||
:param connection_pool: ``redis.ConnectionPool`` object. If provided,
|
||||
this object supersedes other connection arguments passed to the
|
||||
``redis.StrictRedis`` instance, including url and/or host as well as
|
||||
socket_timeout, and will be passed to ``redis.StrictRedis`` as the
|
||||
source of connectivity.
|
||||
|
||||
.. versionadded:: 0.5.4
|
||||
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, arguments):
|
||||
self._imports()
|
||||
self.url = arguments.pop('url', None)
|
||||
self.host = arguments.pop('host', 'localhost')
|
||||
self.password = arguments.pop('password', None)
|
||||
self.port = arguments.pop('port', 6379)
|
||||
self.db = arguments.pop('db', 0)
|
||||
self.distributed_lock = arguments.get('distributed_lock', False)
|
||||
self.socket_timeout = arguments.pop('socket_timeout', None)
|
||||
|
||||
self.lock_timeout = arguments.get('lock_timeout', None)
|
||||
self.lock_sleep = arguments.get('lock_sleep', 0.1)
|
||||
|
||||
self.redis_expiration_time = arguments.pop('redis_expiration_time', 0)
|
||||
self.connection_pool = arguments.get('connection_pool', None)
|
||||
self.client = self._create_client()
|
||||
|
||||
def _imports(self):
|
||||
# defer imports until backend is used
|
||||
global redis
|
||||
import redis
|
||||
|
||||
def _create_client(self):
|
||||
if self.connection_pool is not None:
|
||||
# the connection pool already has all other connection
|
||||
# options present within, so here we disregard socket_timeout
|
||||
# and others.
|
||||
return redis.StrictRedis(connection_pool=self.connection_pool)
|
||||
|
||||
args = {}
|
||||
if self.socket_timeout:
|
||||
args['socket_timeout'] = self.socket_timeout
|
||||
|
||||
if self.url is not None:
|
||||
args.update(url=self.url)
|
||||
return redis.StrictRedis.from_url(**args)
|
||||
else:
|
||||
args.update(
|
||||
host=self.host, password=self.password,
|
||||
port=self.port, db=self.db
|
||||
)
|
||||
return redis.StrictRedis(**args)
|
||||
|
||||
|
||||
def get_mutex(self, key):
|
||||
if self.distributed_lock:
|
||||
return self.client.lock(u('_lock{0}').format(key),
|
||||
self.lock_timeout, self.lock_sleep)
|
||||
else:
|
||||
return None
|
||||
|
||||
def get(self, key):
|
||||
value = self.client.get(key)
|
||||
if value is None:
|
||||
return NO_VALUE
|
||||
return pickle.loads(value)
|
||||
|
||||
def get_multi(self, keys):
|
||||
values = self.client.mget(keys)
|
||||
return [pickle.loads(v) if v is not None else NO_VALUE
|
||||
for v in values]
|
||||
|
||||
def set(self, key, value):
|
||||
if self.redis_expiration_time:
|
||||
self.client.setex(key, self.redis_expiration_time,
|
||||
pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
|
||||
else:
|
||||
self.client.set(key, pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
|
||||
|
||||
def set_multi(self, mapping):
|
||||
mapping = dict(
|
||||
(k, pickle.dumps(v, pickle.HIGHEST_PROTOCOL))
|
||||
for k, v in mapping.items()
|
||||
)
|
||||
|
||||
if not self.redis_expiration_time:
|
||||
self.client.mset(mapping)
|
||||
else:
|
||||
pipe = self.client.pipeline()
|
||||
for key, value in mapping.items():
|
||||
pipe.setex(key, self.redis_expiration_time, value)
|
||||
pipe.execute()
|
||||
|
||||
def delete(self, key):
|
||||
self.client.delete(key)
|
||||
|
||||
def delete_multi(self, keys):
|
||||
self.client.delete(*keys)
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
import sys
|
||||
|
||||
|
||||
py2k = sys.version_info < (3, 0)
|
||||
py3k = sys.version_info >= (3, 0)
|
||||
py32 = sys.version_info >= (3, 2)
|
||||
py27 = sys.version_info >= (2, 7)
|
||||
jython = sys.platform.startswith('java')
|
||||
win32 = sys.platform.startswith('win')
|
||||
|
||||
try:
|
||||
import threading
|
||||
except ImportError:
|
||||
import dummy_threading as threading
|
||||
|
||||
|
||||
if py3k: # pragma: no cover
|
||||
string_types = str,
|
||||
text_type = str
|
||||
string_type = str
|
||||
|
||||
if py32:
|
||||
callable = callable
|
||||
else:
|
||||
def callable(fn):
|
||||
return hasattr(fn, '__call__')
|
||||
|
||||
def u(s):
|
||||
return s
|
||||
|
||||
def ue(s):
|
||||
return s
|
||||
|
||||
import configparser
|
||||
import io
|
||||
import _thread as thread
|
||||
else:
|
||||
string_types = basestring,
|
||||
text_type = unicode
|
||||
string_type = str
|
||||
|
||||
def u(s):
|
||||
return unicode(s, "utf-8")
|
||||
|
||||
def ue(s):
|
||||
return unicode(s, "unicode_escape")
|
||||
|
||||
import ConfigParser as configparser
|
||||
import StringIO as io
|
||||
|
||||
|
||||
callable = callable
|
||||
import thread
|
||||
|
||||
|
||||
if py3k or jython:
|
||||
import pickle
|
||||
else:
|
||||
import cPickle as pickle
|
||||
|
||||
|
||||
def timedelta_total_seconds(td):
|
||||
if py27:
|
||||
return td.total_seconds()
|
||||
else:
|
||||
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 1e6) / 1e6
|
||||
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
"""Exception classes for dogpile.cache."""
|
||||
|
||||
|
||||
class DogpileCacheException(Exception):
|
||||
"""Base Exception for dogpile.cache exceptions to inherit from."""
|
||||
|
||||
|
||||
class RegionAlreadyConfigured(DogpileCacheException):
|
||||
"""CacheRegion instance is already configured."""
|
||||
|
||||
|
||||
class RegionNotConfigured(DogpileCacheException):
|
||||
"""CacheRegion instance has not been configured."""
|
||||
|
||||
|
||||
class ValidationError(DogpileCacheException):
|
||||
"""Error validating a value or option."""
|
||||
@@ -1,87 +0,0 @@
|
||||
"""
|
||||
Mako Integration
|
||||
----------------
|
||||
|
||||
dogpile.cache includes a `Mako <http://www.makotemplates.org>`_ plugin that replaces `Beaker <http://beaker.groovie.org>`_
|
||||
as the cache backend.
|
||||
Setup a Mako template lookup using the "dogpile.cache" cache implementation
|
||||
and a region dictionary::
|
||||
|
||||
from dogpile.cache import make_region
|
||||
from mako.lookup import TemplateLookup
|
||||
|
||||
my_regions = {
|
||||
"local":make_region().configure(
|
||||
"dogpile.cache.dbm",
|
||||
expiration_time=360,
|
||||
arguments={"filename":"file.dbm"}
|
||||
),
|
||||
"memcached":make_region().configure(
|
||||
"dogpile.cache.pylibmc",
|
||||
expiration_time=3600,
|
||||
arguments={"url":["127.0.0.1"]}
|
||||
)
|
||||
}
|
||||
|
||||
mako_lookup = TemplateLookup(
|
||||
directories=["/myapp/templates"],
|
||||
cache_impl="dogpile.cache",
|
||||
cache_args={
|
||||
'regions':my_regions
|
||||
}
|
||||
)
|
||||
|
||||
To use the above configuration in a template, use the ``cached=True`` argument on any
|
||||
Mako tag which accepts it, in conjunction with the name of the desired region
|
||||
as the ``cache_region`` argument::
|
||||
|
||||
<%def name="mysection()" cached="True" cache_region="memcached">
|
||||
some content that's cached
|
||||
</%def>
|
||||
|
||||
|
||||
"""
|
||||
from mako.cache import CacheImpl
|
||||
|
||||
class MakoPlugin(CacheImpl):
|
||||
"""A Mako ``CacheImpl`` which talks to dogpile.cache."""
|
||||
|
||||
def __init__(self, cache):
|
||||
super(MakoPlugin, self).__init__(cache)
|
||||
try:
|
||||
self.regions = self.cache.template.cache_args['regions']
|
||||
except KeyError:
|
||||
raise KeyError(
|
||||
"'cache_regions' argument is required on the "
|
||||
"Mako Lookup or Template object for usage "
|
||||
"with the dogpile.cache plugin.")
|
||||
|
||||
def _get_region(self, **kw):
|
||||
try:
|
||||
region = kw['region']
|
||||
except KeyError:
|
||||
raise KeyError(
|
||||
"'cache_region' argument must be specified with 'cache=True'"
|
||||
"within templates for usage with the dogpile.cache plugin.")
|
||||
try:
|
||||
return self.regions[region]
|
||||
except KeyError:
|
||||
raise KeyError("No such region '%s'" % region)
|
||||
|
||||
def get_and_replace(self, key, creation_function, **kw):
|
||||
expiration_time = kw.pop("timeout", None)
|
||||
return self._get_region(**kw).get_or_create(key, creation_function,
|
||||
expiration_time=expiration_time)
|
||||
|
||||
def get_or_create(self, key, creation_function, **kw):
|
||||
return self.get_and_replace(key, creation_function, **kw)
|
||||
|
||||
def put(self, key, value, **kw):
|
||||
self._get_region(**kw).put(key, value)
|
||||
|
||||
def get(self, key, **kw):
|
||||
expiration_time = kw.pop("timeout", None)
|
||||
return self._get_region(**kw).get(key, expiration_time=expiration_time)
|
||||
|
||||
def invalidate(self, key, **kw):
|
||||
self._get_region(**kw).delete(key)
|
||||
@@ -1,93 +0,0 @@
|
||||
"""
|
||||
Proxy Backends
|
||||
------------------
|
||||
|
||||
Provides a utility and a decorator class that allow for modifying the behavior
|
||||
of different backends without altering the class itself or having to extend the
|
||||
base backend.
|
||||
|
||||
.. versionadded:: 0.5.0 Added support for the :class:`.ProxyBackend` class.
|
||||
|
||||
"""
|
||||
|
||||
from .api import CacheBackend
|
||||
|
||||
class ProxyBackend(CacheBackend):
|
||||
"""A decorator class for altering the functionality of backends.
|
||||
|
||||
Basic usage::
|
||||
|
||||
from dogpile.cache import make_region
|
||||
from dogpile.cache.proxy import ProxyBackend
|
||||
|
||||
class MyFirstProxy(ProxyBackend):
|
||||
def get(self, key):
|
||||
# ... custom code goes here ...
|
||||
return self.proxied.get(key)
|
||||
|
||||
def set(self, key, value):
|
||||
# ... custom code goes here ...
|
||||
self.proxied.set(key)
|
||||
|
||||
class MySecondProxy(ProxyBackend):
|
||||
def get(self, key):
|
||||
# ... custom code goes here ...
|
||||
return self.proxied.get(key)
|
||||
|
||||
|
||||
region = make_region().configure(
|
||||
'dogpile.cache.dbm',
|
||||
expiration_time = 3600,
|
||||
arguments = {
|
||||
"filename":"/path/to/cachefile.dbm"
|
||||
},
|
||||
wrap = [ MyFirstProxy, MySecondProxy ]
|
||||
)
|
||||
|
||||
Classes that extend :class:`.ProxyBackend` can be stacked
|
||||
together. The ``.proxied`` property will always
|
||||
point to either the concrete backend instance or
|
||||
the next proxy in the chain that a method can be
|
||||
delegated towards.
|
||||
|
||||
.. versionadded:: 0.5.0
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.proxied = None
|
||||
|
||||
def wrap(self, backend):
|
||||
''' Take a backend as an argument and setup the self.proxied property.
|
||||
Return an object that be used as a backend by a :class:`.CacheRegion`
|
||||
object.
|
||||
'''
|
||||
assert(isinstance(backend, CacheBackend) or isinstance(backend, ProxyBackend))
|
||||
self.proxied = backend
|
||||
return self
|
||||
|
||||
#
|
||||
# Delegate any functions that are not already overridden to
|
||||
# the proxies backend
|
||||
#
|
||||
def get(self, key):
|
||||
return self.proxied.get(key)
|
||||
|
||||
def set(self, key, value):
|
||||
self.proxied.set(key, value)
|
||||
|
||||
def delete(self, key):
|
||||
self.proxied.delete(key)
|
||||
|
||||
def get_multi(self, keys):
|
||||
return self.proxied.get_multi(keys)
|
||||
|
||||
def set_multi(self, keys):
|
||||
self.proxied.set_multi(keys)
|
||||
|
||||
def delete_multi(self, keys):
|
||||
self.proxied.delete_multi(keys)
|
||||
|
||||
def get_mutex(self, key):
|
||||
return self.proxied.get_mutex(key)
|
||||
|
||||
-1240
File diff suppressed because it is too large
Load Diff
-189
@@ -1,189 +0,0 @@
|
||||
from hashlib import sha1
|
||||
import inspect
|
||||
import re
|
||||
import collections
|
||||
from . import compat
|
||||
|
||||
|
||||
def coerce_string_conf(d):
|
||||
result = {}
|
||||
for k, v in d.items():
|
||||
if not isinstance(v, compat.string_types):
|
||||
result[k] = v
|
||||
continue
|
||||
|
||||
v = v.strip()
|
||||
if re.match(r'^[-+]?\d+$', v):
|
||||
result[k] = int(v)
|
||||
elif re.match(r'^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?$', v):
|
||||
result[k] = float(v)
|
||||
elif v.lower() in ('false', 'true'):
|
||||
result[k] = v.lower() == 'true'
|
||||
elif v == 'None':
|
||||
result[k] = None
|
||||
else:
|
||||
result[k] = v
|
||||
return result
|
||||
|
||||
class PluginLoader(object):
|
||||
def __init__(self, group):
|
||||
self.group = group
|
||||
self.impls = {}
|
||||
|
||||
def load(self, name):
|
||||
if name in self.impls:
|
||||
return self.impls[name]()
|
||||
else: # pragma NO COVERAGE
|
||||
import pkg_resources
|
||||
for impl in pkg_resources.iter_entry_points(
|
||||
self.group,
|
||||
name):
|
||||
self.impls[name] = impl.load
|
||||
return impl.load()
|
||||
else:
|
||||
raise Exception(
|
||||
"Can't load plugin %s %s" %
|
||||
(self.group, name))
|
||||
|
||||
def register(self, name, modulepath, objname):
|
||||
def load():
|
||||
mod = __import__(modulepath)
|
||||
for token in modulepath.split(".")[1:]:
|
||||
mod = getattr(mod, token)
|
||||
return getattr(mod, objname)
|
||||
self.impls[name] = load
|
||||
|
||||
|
||||
def function_key_generator(namespace, fn, to_str=compat.string_type):
|
||||
"""Return a function that generates a string
|
||||
key, based on a given function as well as
|
||||
arguments to the returned function itself.
|
||||
|
||||
This is used by :meth:`.CacheRegion.cache_on_arguments`
|
||||
to generate a cache key from a decorated function.
|
||||
|
||||
It can be replaced using the ``function_key_generator``
|
||||
argument passed to :func:`.make_region`.
|
||||
|
||||
"""
|
||||
|
||||
if namespace is None:
|
||||
namespace = '%s:%s' % (fn.__module__, fn.__name__)
|
||||
else:
|
||||
namespace = '%s:%s|%s' % (fn.__module__, fn.__name__, namespace)
|
||||
|
||||
args = inspect.getargspec(fn)
|
||||
has_self = args[0] and args[0][0] in ('self', 'cls')
|
||||
def generate_key(*args, **kw):
|
||||
if kw:
|
||||
raise ValueError(
|
||||
"dogpile.cache's default key creation "
|
||||
"function does not accept keyword arguments.")
|
||||
if has_self:
|
||||
args = args[1:]
|
||||
|
||||
return namespace + "|" + " ".join(map(to_str, args))
|
||||
return generate_key
|
||||
|
||||
def function_multi_key_generator(namespace, fn, to_str=compat.string_type):
|
||||
|
||||
if namespace is None:
|
||||
namespace = '%s:%s' % (fn.__module__, fn.__name__)
|
||||
else:
|
||||
namespace = '%s:%s|%s' % (fn.__module__, fn.__name__, namespace)
|
||||
|
||||
args = inspect.getargspec(fn)
|
||||
has_self = args[0] and args[0][0] in ('self', 'cls')
|
||||
def generate_keys(*args, **kw):
|
||||
if kw:
|
||||
raise ValueError(
|
||||
"dogpile.cache's default key creation "
|
||||
"function does not accept keyword arguments.")
|
||||
if has_self:
|
||||
args = args[1:]
|
||||
return [namespace + "|" + key for key in map(to_str, args)]
|
||||
return generate_keys
|
||||
|
||||
def sha1_mangle_key(key):
|
||||
"""a SHA1 key mangler."""
|
||||
|
||||
return sha1(key).hexdigest()
|
||||
|
||||
def length_conditional_mangler(length, mangler):
|
||||
"""a key mangler that mangles if the length of the key is
|
||||
past a certain threshold.
|
||||
|
||||
"""
|
||||
def mangle(key):
|
||||
if len(key) >= length:
|
||||
return mangler(key)
|
||||
else:
|
||||
return key
|
||||
return mangle
|
||||
|
||||
class memoized_property(object):
|
||||
"""A read-only @property that is only evaluated once."""
|
||||
def __init__(self, fget, doc=None):
|
||||
self.fget = fget
|
||||
self.__doc__ = doc or fget.__doc__
|
||||
self.__name__ = fget.__name__
|
||||
|
||||
def __get__(self, obj, cls):
|
||||
if obj is None:
|
||||
return self
|
||||
obj.__dict__[self.__name__] = result = self.fget(obj)
|
||||
return result
|
||||
|
||||
def to_list(x, default=None):
|
||||
"""Coerce to a list."""
|
||||
if x is None:
|
||||
return default
|
||||
if not isinstance(x, (list, tuple)):
|
||||
return [x]
|
||||
else:
|
||||
return x
|
||||
|
||||
|
||||
class KeyReentrantMutex(object):
|
||||
|
||||
def __init__(self, key, mutex, keys):
|
||||
self.key = key
|
||||
self.mutex = mutex
|
||||
self.keys = keys
|
||||
|
||||
@classmethod
|
||||
def factory(cls, mutex):
|
||||
# this collection holds zero or one
|
||||
# thread idents as the key; a set of
|
||||
# keynames held as the value.
|
||||
keystore = collections.defaultdict(set)
|
||||
def fac(key):
|
||||
return KeyReentrantMutex(key, mutex, keystore)
|
||||
return fac
|
||||
|
||||
def acquire(self, wait=True):
|
||||
current_thread = compat.threading.current_thread().ident
|
||||
keys = self.keys.get(current_thread)
|
||||
if keys is not None and \
|
||||
self.key not in keys:
|
||||
# current lockholder, new key. add it in
|
||||
keys.add(self.key)
|
||||
return True
|
||||
elif self.mutex.acquire(wait=wait):
|
||||
# after acquire, create new set and add our key
|
||||
self.keys[current_thread].add(self.key)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def release(self):
|
||||
current_thread = compat.threading.current_thread().ident
|
||||
keys = self.keys.get(current_thread)
|
||||
assert keys is not None, "this thread didn't do the acquire"
|
||||
assert self.key in keys, "No acquire held for key '%s'" % self.key
|
||||
keys.remove(self.key)
|
||||
if not keys:
|
||||
# when list of keys empty, remove
|
||||
# the thread ident and unlock.
|
||||
del self.keys[current_thread]
|
||||
self.mutex.release()
|
||||
@@ -1,11 +0,0 @@
|
||||
from .dogpile import NeedRegenerationException, Lock
|
||||
from .nameregistry import NameRegistry
|
||||
from .readwrite_lock import ReadWriteMutex
|
||||
from .legacy import Dogpile, SyncReaderDogpile
|
||||
|
||||
__all__ = [
|
||||
'Dogpile', 'SyncReaderDogpile', 'NeedRegenerationException',
|
||||
'NameRegistry', 'ReadWriteMutex', 'Lock']
|
||||
|
||||
__version__ = '0.4.1'
|
||||
|
||||
@@ -1,162 +0,0 @@
|
||||
import time
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class NeedRegenerationException(Exception):
|
||||
"""An exception that when raised in the 'with' block,
|
||||
forces the 'has_value' flag to False and incurs a
|
||||
regeneration of the value.
|
||||
|
||||
"""
|
||||
|
||||
NOT_REGENERATED = object()
|
||||
|
||||
class Lock(object):
|
||||
"""Dogpile lock class.
|
||||
|
||||
Provides an interface around an arbitrary mutex
|
||||
that allows one thread/process to be elected as
|
||||
the creator of a new value, while other threads/processes
|
||||
continue to return the previous version
|
||||
of that value.
|
||||
|
||||
.. versionadded:: 0.4.0
|
||||
The :class:`.Lock` class was added as a single-use object
|
||||
representing the dogpile API without dependence on
|
||||
any shared state between multiple instances.
|
||||
|
||||
:param mutex: A mutex object that provides ``acquire()``
|
||||
and ``release()`` methods.
|
||||
:param creator: Callable which returns a tuple of the form
|
||||
(new_value, creation_time). "new_value" should be a newly
|
||||
generated value representing completed state. "creation_time"
|
||||
should be a floating point time value which is relative
|
||||
to Python's ``time.time()`` call, representing the time
|
||||
at which the value was created. This time value should
|
||||
be associated with the created value.
|
||||
:param value_and_created_fn: Callable which returns
|
||||
a tuple of the form (existing_value, creation_time). This
|
||||
basically should return what the last local call to the ``creator()``
|
||||
callable has returned, i.e. the value and the creation time,
|
||||
which would be assumed here to be from a cache. If the
|
||||
value is not available, the :class:`.NeedRegenerationException`
|
||||
exception should be thrown.
|
||||
:param expiretime: Expiration time in seconds. Set to
|
||||
``None`` for never expires. This timestamp is compared
|
||||
to the creation_time result and ``time.time()`` to determine if
|
||||
the value returned by value_and_created_fn is "expired".
|
||||
:param async_creator: A callable. If specified, this callable will be
|
||||
passed the mutex as an argument and is responsible for releasing the mutex
|
||||
after it finishes some asynchronous value creation. The intent is for
|
||||
this to be used to defer invocation of the creator callable until some
|
||||
later time.
|
||||
|
||||
.. versionadded:: 0.4.1 added the async_creator argument.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
mutex,
|
||||
creator,
|
||||
value_and_created_fn,
|
||||
expiretime,
|
||||
async_creator=None,
|
||||
):
|
||||
self.mutex = mutex
|
||||
self.creator = creator
|
||||
self.value_and_created_fn = value_and_created_fn
|
||||
self.expiretime = expiretime
|
||||
self.async_creator = async_creator
|
||||
|
||||
def _is_expired(self, createdtime):
|
||||
"""Return true if the expiration time is reached, or no
|
||||
value is available."""
|
||||
|
||||
return not self._has_value(createdtime) or \
|
||||
(
|
||||
self.expiretime is not None and
|
||||
time.time() - createdtime > self.expiretime
|
||||
)
|
||||
|
||||
def _has_value(self, createdtime):
|
||||
"""Return true if the creation function has proceeded
|
||||
at least once."""
|
||||
return createdtime > 0
|
||||
|
||||
def _enter(self):
|
||||
value_fn = self.value_and_created_fn
|
||||
|
||||
try:
|
||||
value = value_fn()
|
||||
value, createdtime = value
|
||||
except NeedRegenerationException:
|
||||
log.debug("NeedRegenerationException")
|
||||
value = NOT_REGENERATED
|
||||
createdtime = -1
|
||||
|
||||
generated = self._enter_create(createdtime)
|
||||
|
||||
if generated is not NOT_REGENERATED:
|
||||
generated, createdtime = generated
|
||||
return generated
|
||||
elif value is NOT_REGENERATED:
|
||||
try:
|
||||
value, createdtime = value_fn()
|
||||
return value
|
||||
except NeedRegenerationException:
|
||||
raise Exception("Generation function should "
|
||||
"have just been called by a concurrent "
|
||||
"thread.")
|
||||
else:
|
||||
return value
|
||||
|
||||
def _enter_create(self, createdtime):
|
||||
|
||||
if not self._is_expired(createdtime):
|
||||
return NOT_REGENERATED
|
||||
|
||||
async = False
|
||||
|
||||
if self._has_value(createdtime):
|
||||
if not self.mutex.acquire(False):
|
||||
log.debug("creation function in progress "
|
||||
"elsewhere, returning")
|
||||
return NOT_REGENERATED
|
||||
else:
|
||||
log.debug("no value, waiting for create lock")
|
||||
self.mutex.acquire()
|
||||
|
||||
try:
|
||||
log.debug("value creation lock %r acquired" % self.mutex)
|
||||
|
||||
# see if someone created the value already
|
||||
try:
|
||||
value, createdtime = self.value_and_created_fn()
|
||||
except NeedRegenerationException:
|
||||
pass
|
||||
else:
|
||||
if not self._is_expired(createdtime):
|
||||
log.debug("value already present")
|
||||
return value, createdtime
|
||||
elif self.async_creator:
|
||||
log.debug("Passing creation lock to async runner")
|
||||
self.async_creator(self.mutex)
|
||||
async = True
|
||||
return value, createdtime
|
||||
|
||||
log.debug("Calling creation function")
|
||||
created = self.creator()
|
||||
return created
|
||||
finally:
|
||||
if not async:
|
||||
self.mutex.release()
|
||||
log.debug("Released creation lock")
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
return self._enter()
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
pass
|
||||
|
||||
@@ -1,154 +0,0 @@
|
||||
from __future__ import with_statement
|
||||
|
||||
from .util import threading
|
||||
from .readwrite_lock import ReadWriteMutex
|
||||
from .dogpile import Lock
|
||||
import time
|
||||
import contextlib
|
||||
|
||||
class Dogpile(object):
|
||||
"""Dogpile lock class.
|
||||
|
||||
.. deprecated:: 0.4.0
|
||||
The :class:`.Lock` object specifies the full
|
||||
API of the :class:`.Dogpile` object in a single way,
|
||||
rather than providing multiple modes of usage which
|
||||
don't necessarily work in the majority of cases.
|
||||
:class:`.Dogpile` is now a wrapper around the :class:`.Lock` object
|
||||
which provides dogpile.core's original usage pattern.
|
||||
This usage pattern began as something simple, but was
|
||||
not of general use in real-world caching environments without
|
||||
several extra complicating factors; the :class:`.Lock`
|
||||
object presents the "real-world" API more succinctly,
|
||||
and also fixes a cross-process concurrency issue.
|
||||
|
||||
:param expiretime: Expiration time in seconds. Set to
|
||||
``None`` for never expires.
|
||||
:param init: if True, set the 'createdtime' to the
|
||||
current time.
|
||||
:param lock: a mutex object that provides
|
||||
``acquire()`` and ``release()`` methods.
|
||||
|
||||
"""
|
||||
def __init__(self, expiretime, init=False, lock=None):
|
||||
"""Construct a new :class:`.Dogpile`.
|
||||
|
||||
"""
|
||||
if lock:
|
||||
self.dogpilelock = lock
|
||||
else:
|
||||
self.dogpilelock = threading.Lock()
|
||||
|
||||
self.expiretime = expiretime
|
||||
if init:
|
||||
self.createdtime = time.time()
|
||||
|
||||
createdtime = -1
|
||||
"""The last known 'creation time' of the value,
|
||||
stored as an epoch (i.e. from ``time.time()``).
|
||||
|
||||
If the value here is -1, it is assumed the value
|
||||
should recreate immediately.
|
||||
|
||||
"""
|
||||
|
||||
def acquire(self, creator,
|
||||
value_fn=None,
|
||||
value_and_created_fn=None):
|
||||
"""Acquire the lock, returning a context manager.
|
||||
|
||||
:param creator: Creation function, used if this thread
|
||||
is chosen to create a new value.
|
||||
|
||||
:param value_fn: Optional function that returns
|
||||
the value from some datasource. Will be returned
|
||||
if regeneration is not needed.
|
||||
|
||||
:param value_and_created_fn: Like value_fn, but returns a tuple
|
||||
of (value, createdtime). The returned createdtime
|
||||
will replace the "createdtime" value on this dogpile
|
||||
lock. This option removes the need for the dogpile lock
|
||||
itself to remain persistent across usages; another
|
||||
dogpile can come along later and pick up where the
|
||||
previous one left off.
|
||||
|
||||
"""
|
||||
|
||||
if value_and_created_fn is None:
|
||||
if value_fn is None:
|
||||
def value_and_created_fn():
|
||||
return None, self.createdtime
|
||||
else:
|
||||
def value_and_created_fn():
|
||||
return value_fn(), self.createdtime
|
||||
|
||||
def creator_wrapper():
|
||||
value = creator()
|
||||
self.createdtime = time.time()
|
||||
return value, self.createdtime
|
||||
else:
|
||||
def creator_wrapper():
|
||||
value = creator()
|
||||
self.createdtime = time.time()
|
||||
return value
|
||||
|
||||
return Lock(
|
||||
self.dogpilelock,
|
||||
creator_wrapper,
|
||||
value_and_created_fn,
|
||||
self.expiretime
|
||||
)
|
||||
|
||||
@property
|
||||
def is_expired(self):
|
||||
"""Return true if the expiration time is reached, or no
|
||||
value is available."""
|
||||
|
||||
return not self.has_value or \
|
||||
(
|
||||
self.expiretime is not None and
|
||||
time.time() - self.createdtime > self.expiretime
|
||||
)
|
||||
|
||||
@property
|
||||
def has_value(self):
|
||||
"""Return true if the creation function has proceeded
|
||||
at least once."""
|
||||
return self.createdtime > 0
|
||||
|
||||
|
||||
class SyncReaderDogpile(Dogpile):
|
||||
"""Provide a read-write lock function on top of the :class:`.Dogpile`
|
||||
class.
|
||||
|
||||
.. deprecated:: 0.4.0
|
||||
The :class:`.ReadWriteMutex` object can be used directly.
|
||||
|
||||
"""
|
||||
def __init__(self, *args, **kw):
|
||||
super(SyncReaderDogpile, self).__init__(*args, **kw)
|
||||
self.readwritelock = ReadWriteMutex()
|
||||
|
||||
@contextlib.contextmanager
|
||||
def acquire_write_lock(self):
|
||||
"""Return the "write" lock context manager.
|
||||
|
||||
This will provide a section that is mutexed against
|
||||
all readers/writers for the dogpile-maintained value.
|
||||
|
||||
"""
|
||||
|
||||
self.readwritelock.acquire_write_lock()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.readwritelock.release_write_lock()
|
||||
|
||||
@contextlib.contextmanager
|
||||
def acquire(self, *arg, **kw):
|
||||
with super(SyncReaderDogpile, self).acquire(*arg, **kw) as value:
|
||||
self.readwritelock.acquire_read_lock()
|
||||
try:
|
||||
yield value
|
||||
finally:
|
||||
self.readwritelock.release_read_lock()
|
||||
@@ -1,83 +0,0 @@
|
||||
from .util import threading
|
||||
import weakref
|
||||
|
||||
class NameRegistry(object):
|
||||
"""Generates and return an object, keeping it as a
|
||||
singleton for a certain identifier for as long as its
|
||||
strongly referenced.
|
||||
|
||||
e.g.::
|
||||
|
||||
class MyFoo(object):
|
||||
"some important object."
|
||||
def __init__(self, identifier):
|
||||
self.identifier = identifier
|
||||
|
||||
registry = NameRegistry(MyFoo)
|
||||
|
||||
# thread 1:
|
||||
my_foo = registry.get("foo1")
|
||||
|
||||
# thread 2
|
||||
my_foo = registry.get("foo1")
|
||||
|
||||
Above, ``my_foo`` in both thread #1 and #2 will
|
||||
be *the same object*. The constructor for
|
||||
``MyFoo`` will be called once, passing the
|
||||
identifier ``foo1`` as the argument.
|
||||
|
||||
When thread 1 and thread 2 both complete or
|
||||
otherwise delete references to ``my_foo``, the
|
||||
object is *removed* from the :class:`.NameRegistry` as
|
||||
a result of Python garbage collection.
|
||||
|
||||
:param creator: A function that will create a new
|
||||
value, given the identifier passed to the :meth:`.NameRegistry.get`
|
||||
method.
|
||||
|
||||
"""
|
||||
_locks = weakref.WeakValueDictionary()
|
||||
_mutex = threading.RLock()
|
||||
|
||||
def __init__(self, creator):
|
||||
"""Create a new :class:`.NameRegistry`.
|
||||
|
||||
|
||||
"""
|
||||
self._values = weakref.WeakValueDictionary()
|
||||
self._mutex = threading.RLock()
|
||||
self.creator = creator
|
||||
|
||||
def get(self, identifier, *args, **kw):
|
||||
"""Get and possibly create the value.
|
||||
|
||||
:param identifier: Hash key for the value.
|
||||
If the creation function is called, this identifier
|
||||
will also be passed to the creation function.
|
||||
:param \*args, \**kw: Additional arguments which will
|
||||
also be passed to the creation function if it is
|
||||
called.
|
||||
|
||||
"""
|
||||
try:
|
||||
if identifier in self._values:
|
||||
return self._values[identifier]
|
||||
else:
|
||||
return self._sync_get(identifier, *args, **kw)
|
||||
except KeyError:
|
||||
return self._sync_get(identifier, *args, **kw)
|
||||
|
||||
def _sync_get(self, identifier, *args, **kw):
|
||||
self._mutex.acquire()
|
||||
try:
|
||||
try:
|
||||
if identifier in self._values:
|
||||
return self._values[identifier]
|
||||
else:
|
||||
self._values[identifier] = value = self.creator(identifier, *args, **kw)
|
||||
return value
|
||||
except KeyError:
|
||||
self._values[identifier] = value = self.creator(identifier, *args, **kw)
|
||||
return value
|
||||
finally:
|
||||
self._mutex.release()
|
||||
@@ -1,130 +0,0 @@
|
||||
from .util import threading
|
||||
|
||||
import logging
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class LockError(Exception):
|
||||
pass
|
||||
|
||||
class ReadWriteMutex(object):
|
||||
"""A mutex which allows multiple readers, single writer.
|
||||
|
||||
:class:`.ReadWriteMutex` uses a Python ``threading.Condition``
|
||||
to provide this functionality across threads within a process.
|
||||
|
||||
The Beaker package also contained a file-lock based version
|
||||
of this concept, so that readers/writers could be synchronized
|
||||
across processes with a common filesystem. A future Dogpile
|
||||
release may include this additional class at some point.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# counts how many asynchronous methods are executing
|
||||
self.async = 0
|
||||
|
||||
# pointer to thread that is the current sync operation
|
||||
self.current_sync_operation = None
|
||||
|
||||
# condition object to lock on
|
||||
self.condition = threading.Condition(threading.Lock())
|
||||
|
||||
def acquire_read_lock(self, wait = True):
|
||||
"""Acquire the 'read' lock."""
|
||||
self.condition.acquire()
|
||||
try:
|
||||
# see if a synchronous operation is waiting to start
|
||||
# or is already running, in which case we wait (or just
|
||||
# give up and return)
|
||||
if wait:
|
||||
while self.current_sync_operation is not None:
|
||||
self.condition.wait()
|
||||
else:
|
||||
if self.current_sync_operation is not None:
|
||||
return False
|
||||
|
||||
self.async += 1
|
||||
log.debug("%s acquired read lock", self)
|
||||
finally:
|
||||
self.condition.release()
|
||||
|
||||
if not wait:
|
||||
return True
|
||||
|
||||
def release_read_lock(self):
|
||||
"""Release the 'read' lock."""
|
||||
self.condition.acquire()
|
||||
try:
|
||||
self.async -= 1
|
||||
|
||||
# check if we are the last asynchronous reader thread
|
||||
# out the door.
|
||||
if self.async == 0:
|
||||
# yes. so if a sync operation is waiting, notifyAll to wake
|
||||
# it up
|
||||
if self.current_sync_operation is not None:
|
||||
self.condition.notifyAll()
|
||||
elif self.async < 0:
|
||||
raise LockError("Synchronizer error - too many "
|
||||
"release_read_locks called")
|
||||
log.debug("%s released read lock", self)
|
||||
finally:
|
||||
self.condition.release()
|
||||
|
||||
def acquire_write_lock(self, wait = True):
|
||||
"""Acquire the 'write' lock."""
|
||||
self.condition.acquire()
|
||||
try:
|
||||
# here, we are not a synchronous reader, and after returning,
|
||||
# assuming waiting or immediate availability, we will be.
|
||||
|
||||
if wait:
|
||||
# if another sync is working, wait
|
||||
while self.current_sync_operation is not None:
|
||||
self.condition.wait()
|
||||
else:
|
||||
# if another sync is working,
|
||||
# we dont want to wait, so forget it
|
||||
if self.current_sync_operation is not None:
|
||||
return False
|
||||
|
||||
# establish ourselves as the current sync
|
||||
# this indicates to other read/write operations
|
||||
# that they should wait until this is None again
|
||||
self.current_sync_operation = threading.currentThread()
|
||||
|
||||
# now wait again for asyncs to finish
|
||||
if self.async > 0:
|
||||
if wait:
|
||||
# wait
|
||||
self.condition.wait()
|
||||
else:
|
||||
# we dont want to wait, so forget it
|
||||
self.current_sync_operation = None
|
||||
return False
|
||||
log.debug("%s acquired write lock", self)
|
||||
finally:
|
||||
self.condition.release()
|
||||
|
||||
if not wait:
|
||||
return True
|
||||
|
||||
def release_write_lock(self):
|
||||
"""Release the 'write' lock."""
|
||||
self.condition.acquire()
|
||||
try:
|
||||
if self.current_sync_operation is not threading.currentThread():
|
||||
raise LockError("Synchronizer error - current thread doesn't "
|
||||
"have the write lock")
|
||||
|
||||
# reset the current sync operation so
|
||||
# another can get it
|
||||
self.current_sync_operation = None
|
||||
|
||||
# tell everyone to get ready
|
||||
self.condition.notifyAll()
|
||||
|
||||
log.debug("%s released write lock", self)
|
||||
finally:
|
||||
# everyone go !!
|
||||
self.condition.release()
|
||||
@@ -1,8 +0,0 @@
|
||||
import sys
|
||||
py3k = sys.version_info >= (3, 0)
|
||||
|
||||
try:
|
||||
import threading
|
||||
except ImportError:
|
||||
import dummy_threading as threading
|
||||
|
||||
@@ -0,0 +1,249 @@
|
||||
"""A dumb and slow but simple dbm clone.
|
||||
|
||||
For database spam, spam.dir contains the index (a text file),
|
||||
spam.bak *may* contain a backup of the index (also a text file),
|
||||
while spam.dat contains the data (a binary file).
|
||||
|
||||
XXX TO DO:
|
||||
|
||||
- seems to contain a bug when updating...
|
||||
|
||||
- reclaim free space (currently, space once occupied by deleted or expanded
|
||||
items is never reused)
|
||||
|
||||
- support concurrent access (currently, if two processes take turns making
|
||||
updates, they can mess up the index)
|
||||
|
||||
- support efficient access to large databases (currently, the whole index
|
||||
is read when the database is opened, and some updates rewrite the whole index)
|
||||
|
||||
- support opening for read-only (flag = 'm')
|
||||
|
||||
"""
|
||||
|
||||
import ast as _ast
|
||||
import os as _os
|
||||
import __builtin__
|
||||
import UserDict
|
||||
|
||||
_open = __builtin__.open
|
||||
|
||||
_BLOCKSIZE = 512
|
||||
|
||||
error = IOError # For anydbm
|
||||
|
||||
class _Database(UserDict.DictMixin):
|
||||
|
||||
# The on-disk directory and data files can remain in mutually
|
||||
# inconsistent states for an arbitrarily long time (see comments
|
||||
# at the end of __setitem__). This is only repaired when _commit()
|
||||
# gets called. One place _commit() gets called is from __del__(),
|
||||
# and if that occurs at program shutdown time, module globals may
|
||||
# already have gotten rebound to None. Since it's crucial that
|
||||
# _commit() finish successfully, we can't ignore shutdown races
|
||||
# here, and _commit() must not reference any globals.
|
||||
_os = _os # for _commit()
|
||||
_open = _open # for _commit()
|
||||
|
||||
def __init__(self, filebasename, mode):
|
||||
self._mode = mode
|
||||
|
||||
# The directory file is a text file. Each line looks like
|
||||
# "%r, (%d, %d)\n" % (key, pos, siz)
|
||||
# where key is the string key, pos is the offset into the dat
|
||||
# file of the associated value's first byte, and siz is the number
|
||||
# of bytes in the associated value.
|
||||
self._dirfile = filebasename + _os.extsep + 'dir'
|
||||
|
||||
# The data file is a binary file pointed into by the directory
|
||||
# file, and holds the values associated with keys. Each value
|
||||
# begins at a _BLOCKSIZE-aligned byte offset, and is a raw
|
||||
# binary 8-bit string value.
|
||||
self._datfile = filebasename + _os.extsep + 'dat'
|
||||
self._bakfile = filebasename + _os.extsep + 'bak'
|
||||
|
||||
# The index is an in-memory dict, mirroring the directory file.
|
||||
self._index = None # maps keys to (pos, siz) pairs
|
||||
|
||||
# Mod by Jack: create data file if needed
|
||||
try:
|
||||
f = _open(self._datfile, 'r')
|
||||
except IOError:
|
||||
with _open(self._datfile, 'w') as f:
|
||||
self._chmod(self._datfile)
|
||||
else:
|
||||
f.close()
|
||||
self._update()
|
||||
|
||||
# Read directory file into the in-memory index dict.
|
||||
def _update(self):
|
||||
self._index = {}
|
||||
try:
|
||||
f = _open(self._dirfile)
|
||||
except IOError:
|
||||
pass
|
||||
else:
|
||||
with f:
|
||||
for line in f:
|
||||
line = line.rstrip()
|
||||
key, pos_and_siz_pair = _ast.literal_eval(line)
|
||||
self._index[key] = pos_and_siz_pair
|
||||
|
||||
# Write the index dict to the directory file. The original directory
|
||||
# file (if any) is renamed with a .bak extension first. If a .bak
|
||||
# file currently exists, it's deleted.
|
||||
def _commit(self):
|
||||
# CAUTION: It's vital that _commit() succeed, and _commit() can
|
||||
# be called from __del__(). Therefore we must never reference a
|
||||
# global in this routine.
|
||||
if self._index is None:
|
||||
return # nothing to do
|
||||
|
||||
try:
|
||||
self._os.unlink(self._bakfile)
|
||||
except self._os.error:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._os.rename(self._dirfile, self._bakfile)
|
||||
except self._os.error:
|
||||
pass
|
||||
|
||||
with self._open(self._dirfile, 'w') as f:
|
||||
self._chmod(self._dirfile)
|
||||
for key, pos_and_siz_pair in self._index.iteritems():
|
||||
f.write("%r, %r\n" % (key, pos_and_siz_pair))
|
||||
|
||||
sync = _commit
|
||||
|
||||
def __getitem__(self, key):
|
||||
pos, siz = self._index[key] # may raise KeyError
|
||||
with _open(self._datfile, 'rb') as f:
|
||||
f.seek(pos)
|
||||
dat = f.read(siz)
|
||||
return dat
|
||||
|
||||
# Append val to the data file, starting at a _BLOCKSIZE-aligned
|
||||
# offset. The data file is first padded with NUL bytes (if needed)
|
||||
# to get to an aligned offset. Return pair
|
||||
# (starting offset of val, len(val))
|
||||
def _addval(self, val):
|
||||
with _open(self._datfile, 'rb+') as f:
|
||||
f.seek(0, 2)
|
||||
pos = int(f.tell())
|
||||
npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
|
||||
f.write('\0'*(npos-pos))
|
||||
pos = npos
|
||||
f.write(val)
|
||||
return (pos, len(val))
|
||||
|
||||
# Write val to the data file, starting at offset pos. The caller
|
||||
# is responsible for ensuring that there's enough room starting at
|
||||
# pos to hold val, without overwriting some other value. Return
|
||||
# pair (pos, len(val)).
|
||||
def _setval(self, pos, val):
|
||||
with _open(self._datfile, 'rb+') as f:
|
||||
f.seek(pos)
|
||||
f.write(val)
|
||||
return (pos, len(val))
|
||||
|
||||
# key is a new key whose associated value starts in the data file
|
||||
# at offset pos and with length siz. Add an index record to
|
||||
# the in-memory index dict, and append one to the directory file.
|
||||
def _addkey(self, key, pos_and_siz_pair):
|
||||
self._index[key] = pos_and_siz_pair
|
||||
with _open(self._dirfile, 'a') as f:
|
||||
self._chmod(self._dirfile)
|
||||
f.write("%r, %r\n" % (key, pos_and_siz_pair))
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
if not type(key) == type('') == type(val):
|
||||
raise TypeError, "keys and values must be strings"
|
||||
if key not in self._index:
|
||||
self._addkey(key, self._addval(val))
|
||||
else:
|
||||
# See whether the new value is small enough to fit in the
|
||||
# (padded) space currently occupied by the old value.
|
||||
pos, siz = self._index[key]
|
||||
oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
|
||||
newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
|
||||
if newblocks <= oldblocks:
|
||||
self._index[key] = self._setval(pos, val)
|
||||
else:
|
||||
# The new value doesn't fit in the (padded) space used
|
||||
# by the old value. The blocks used by the old value are
|
||||
# forever lost.
|
||||
self._index[key] = self._addval(val)
|
||||
|
||||
# Note that _index may be out of synch with the directory
|
||||
# file now: _setval() and _addval() don't update the directory
|
||||
# file. This also means that the on-disk directory and data
|
||||
# files are in a mutually inconsistent state, and they'll
|
||||
# remain that way until _commit() is called. Note that this
|
||||
# is a disaster (for the database) if the program crashes
|
||||
# (so that _commit() never gets called).
|
||||
|
||||
def __delitem__(self, key):
|
||||
# The blocks used by the associated value are lost.
|
||||
del self._index[key]
|
||||
# XXX It's unclear why we do a _commit() here (the code always
|
||||
# XXX has, so I'm not changing it). _setitem__ doesn't try to
|
||||
# XXX keep the directory file in synch. Why should we? Or
|
||||
# XXX why shouldn't __setitem__?
|
||||
self._commit()
|
||||
|
||||
def keys(self):
|
||||
return self._index.keys()
|
||||
|
||||
def has_key(self, key):
|
||||
return key in self._index
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self._index
|
||||
|
||||
def iterkeys(self):
|
||||
return self._index.iterkeys()
|
||||
__iter__ = iterkeys
|
||||
|
||||
def __len__(self):
|
||||
return len(self._index)
|
||||
|
||||
def close(self):
|
||||
try:
|
||||
self._commit()
|
||||
finally:
|
||||
self._index = self._datfile = self._dirfile = self._bakfile = None
|
||||
|
||||
__del__ = close
|
||||
|
||||
def _chmod (self, file):
|
||||
if hasattr(self._os, 'chmod'):
|
||||
self._os.chmod(file, self._mode)
|
||||
|
||||
|
||||
def open(file, flag=None, mode=0666):
|
||||
"""Open the database file, filename, and return corresponding object.
|
||||
|
||||
The flag argument, used to control how the database is opened in the
|
||||
other DBM implementations, is ignored in the dumbdbm module; the
|
||||
database is always opened for update, and will be created if it does
|
||||
not exist.
|
||||
|
||||
The optional mode argument is the UNIX mode of the file, used only when
|
||||
the database has to be created. It defaults to octal code 0666 (and
|
||||
will be modified by the prevailing umask).
|
||||
|
||||
"""
|
||||
# flag argument is currently ignored
|
||||
|
||||
# Modify mode depending on the umask
|
||||
try:
|
||||
um = _os.umask(0)
|
||||
_os.umask(um)
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
# Turn off any bits that are set in the umask
|
||||
mode = mode & (~um)
|
||||
|
||||
return _Database(file, mode)
|
||||
@@ -369,7 +369,8 @@ class Chapter(object):
|
||||
if chapterdisplays:
|
||||
string = chapterdisplays[0].get('ChapString')
|
||||
language = chapterdisplays[0].get('ChapLanguage')
|
||||
return cls(start, hidden, enabled, end, string, language)
|
||||
return cls(start, hidden, enabled, end, string, language)
|
||||
return cls(start, hidden, enabled, end)
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s [%s, enabled=%s]>' % (self.__class__.__name__, self.start, self.enabled)
|
||||
|
||||
@@ -168,9 +168,13 @@ def parse(stream, specs, size=None, ignore_element_types=None, ignore_element_na
|
||||
while size is None or stream.tell() - start < size:
|
||||
try:
|
||||
element = parse_element(stream, specs)
|
||||
if not element or not hasattr(element, "type"):
|
||||
stream.seek(element.size, 1)
|
||||
continue
|
||||
|
||||
if element.type is None:
|
||||
logger.error('Element with id 0x%x is not in the specs' % element_id)
|
||||
stream.seek(element_size, 1)
|
||||
logger.error('Element with id 0x%x is not in the specs' % element.id)
|
||||
stream.seek(element.size, 1)
|
||||
continue
|
||||
elif element.type in ignore_element_types or element.name in ignore_element_names:
|
||||
logger.info('%s %s %s ignored', element.__class__.__name__, element.name, element.type)
|
||||
|
||||
@@ -0,0 +1,411 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
ftfy: fixes text for you
|
||||
|
||||
This is a module for making text less broken. See the `fix_text` function
|
||||
for more information.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import unicodedata
|
||||
import ftfy.bad_codecs
|
||||
from ftfy import fixes
|
||||
from ftfy.formatting import display_ljust
|
||||
from ftfy.compatibility import is_printable
|
||||
|
||||
__version__ = '4.4.3'
|
||||
|
||||
|
||||
# See the docstring for ftfy.bad_codecs to see what we're doing here.
|
||||
ftfy.bad_codecs.ok()
|
||||
|
||||
|
||||
def fix_text(text,
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
fix_latin_ligatures=True,
|
||||
fix_character_width=True,
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
fix_surrogates=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True,
|
||||
normalization='NFC',
|
||||
max_decode_length=10**6):
|
||||
r"""
|
||||
Given Unicode text as input, fix inconsistencies and glitches in it,
|
||||
such as mojibake.
|
||||
|
||||
Let's start with some examples:
|
||||
|
||||
>>> print(fix_text('ünicode'))
|
||||
ünicode
|
||||
|
||||
>>> print(fix_text('Broken text… it’s flubberific!',
|
||||
... normalization='NFKC'))
|
||||
Broken text... it's flubberific!
|
||||
|
||||
>>> print(fix_text('HTML entities <3'))
|
||||
HTML entities <3
|
||||
|
||||
>>> print(fix_text('<em>HTML entities <3</em>'))
|
||||
<em>HTML entities <3</em>
|
||||
|
||||
>>> print(fix_text("¯\\_(ã\x83\x84)_/¯"))
|
||||
¯\_(ツ)_/¯
|
||||
|
||||
>>> # This example string starts with a byte-order mark, even if
|
||||
>>> # you can't see it on the Web.
|
||||
>>> print(fix_text('\ufeffParty like\nit’s 1999!'))
|
||||
Party like
|
||||
it's 1999!
|
||||
|
||||
>>> print(fix_text('LOUD NOISES'))
|
||||
LOUD NOISES
|
||||
|
||||
>>> len(fix_text('fi' * 100000))
|
||||
200000
|
||||
|
||||
>>> len(fix_text(''))
|
||||
0
|
||||
|
||||
Based on the options you provide, ftfy applies these steps in order:
|
||||
|
||||
- If `remove_terminal_escapes` is True, remove sequences of bytes that are
|
||||
instructions for Unix terminals, such as the codes that make text appear
|
||||
in different colors.
|
||||
|
||||
- If `fix_encoding` is True, look for common mistakes that come from
|
||||
encoding or decoding Unicode text incorrectly, and fix them if they are
|
||||
reasonably fixable. See `fixes.fix_encoding` for details.
|
||||
|
||||
- If `fix_entities` is True, replace HTML entities with their equivalent
|
||||
characters. If it's "auto" (the default), then consider replacing HTML
|
||||
entities, but don't do so in text where you have seen a pair of actual
|
||||
angle brackets (that's probably actually HTML and you shouldn't mess
|
||||
with the entities).
|
||||
|
||||
- If `uncurl_quotes` is True, replace various curly quotation marks with
|
||||
plain-ASCII straight quotes.
|
||||
|
||||
- If `fix_latin_ligatures` is True, then ligatures made of Latin letters,
|
||||
such as `fi`, will be separated into individual letters. These ligatures
|
||||
are usually not meaningful outside of font rendering, and often represent
|
||||
copy-and-paste errors.
|
||||
|
||||
- If `fix_character_width` is True, half-width and full-width characters
|
||||
will be replaced by their standard-width form.
|
||||
|
||||
- If `fix_line_breaks` is true, convert all line breaks to Unix style
|
||||
(CRLF and CR line breaks become LF line breaks).
|
||||
|
||||
- If `fix_surrogates` is true, ensure that there are no UTF-16 surrogates
|
||||
in the resulting string, by converting them to the correct characters
|
||||
when they're appropriately paired, or replacing them with \ufffd
|
||||
otherwise.
|
||||
|
||||
- If `remove_control_chars` is true, remove control characters that
|
||||
are not suitable for use in text. This includes most of the ASCII control
|
||||
characters, plus some Unicode controls such as the byte order mark
|
||||
(U+FEFF). Useful control characters, such as Tab, Line Feed, and
|
||||
bidirectional marks, are left as they are.
|
||||
|
||||
- If `remove_bom` is True, remove the Byte-Order Mark at the start of the
|
||||
string if it exists. (This is largely redundant, because it's a special
|
||||
case of `remove_control_characters`. This option will become deprecated
|
||||
in a later version.)
|
||||
|
||||
- If `normalization` is not None, apply the specified form of Unicode
|
||||
normalization, which can be one of 'NFC', 'NFKC', 'NFD', and 'NFKD'.
|
||||
|
||||
- The default normalization, NFC, combines characters and diacritics that
|
||||
are written using separate code points, such as converting "e" plus an
|
||||
acute accent modifier into "é", or converting "ka" (か) plus a dakuten
|
||||
into the single character "ga" (が). Unicode can be converted to NFC
|
||||
form without any change in its meaning.
|
||||
|
||||
- If you ask for NFKC normalization, it will apply additional
|
||||
normalizations that can change the meanings of characters. For example,
|
||||
ellipsis characters will be replaced with three periods, all ligatures
|
||||
will be replaced with the individual characters that make them up,
|
||||
and characters that differ in font style will be converted to the same
|
||||
character.
|
||||
|
||||
- If anything was changed, repeat all the steps, so that the function is
|
||||
idempotent. "&amp;" will become "&", for example, not "&".
|
||||
|
||||
`fix_text` will work one line at a time, with the possibility that some
|
||||
lines are in different encodings, allowing it to fix text that has been
|
||||
concatenated together from different sources.
|
||||
|
||||
When it encounters lines longer than `max_decode_length` (1 million
|
||||
codepoints by default), it will not run the `fix_encoding` step, to avoid
|
||||
unbounded slowdowns.
|
||||
|
||||
If you're certain that any decoding errors in the text would have affected
|
||||
the entire text in the same way, and you don't mind operations that scale
|
||||
with the length of the text, you can use `fix_text_segment` directly to
|
||||
fix the whole string in one batch.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
|
||||
|
||||
out = []
|
||||
pos = 0
|
||||
while pos < len(text):
|
||||
textbreak = text.find('\n', pos) + 1
|
||||
fix_encoding_this_time = fix_encoding
|
||||
if textbreak == 0:
|
||||
textbreak = len(text)
|
||||
if (textbreak - pos) > max_decode_length:
|
||||
fix_encoding_this_time = False
|
||||
|
||||
substring = text[pos:textbreak]
|
||||
|
||||
if fix_entities == 'auto' and '<' in substring and '>' in substring:
|
||||
# we see angle brackets together; this could be HTML
|
||||
fix_entities = False
|
||||
|
||||
out.append(
|
||||
fix_text_segment(
|
||||
substring,
|
||||
fix_entities=fix_entities,
|
||||
remove_terminal_escapes=remove_terminal_escapes,
|
||||
fix_encoding=fix_encoding_this_time,
|
||||
uncurl_quotes=uncurl_quotes,
|
||||
fix_latin_ligatures=fix_latin_ligatures,
|
||||
fix_character_width=fix_character_width,
|
||||
fix_line_breaks=fix_line_breaks,
|
||||
fix_surrogates=fix_surrogates,
|
||||
remove_control_chars=remove_control_chars,
|
||||
remove_bom=remove_bom,
|
||||
normalization=normalization
|
||||
)
|
||||
)
|
||||
pos = textbreak
|
||||
|
||||
return ''.join(out)
|
||||
|
||||
# Some alternate names for the main functions
|
||||
ftfy = fix_text
|
||||
fix_encoding = fixes.fix_encoding
|
||||
fix_text_encoding = fixes.fix_text_encoding # deprecated
|
||||
|
||||
|
||||
def fix_file(input_file,
|
||||
encoding=None,
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
fix_latin_ligatures=True,
|
||||
fix_character_width=True,
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
fix_surrogates=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True,
|
||||
normalization='NFC'):
|
||||
"""
|
||||
Fix text that is found in a file.
|
||||
|
||||
If the file is being read as Unicode text, use that. If it's being read as
|
||||
bytes, then we hope an encoding was supplied. If not, unfortunately, we
|
||||
have to guess what encoding it is. We'll try a few common encodings, but we
|
||||
make no promises. See the `guess_bytes` function for how this is done.
|
||||
|
||||
The output is a stream of fixed lines of text.
|
||||
"""
|
||||
entities = fix_entities
|
||||
for line in input_file:
|
||||
if isinstance(line, bytes):
|
||||
if encoding is None:
|
||||
line, encoding = guess_bytes(line)
|
||||
else:
|
||||
line = line.decode(encoding)
|
||||
if fix_entities == 'auto' and '<' in line and '>' in line:
|
||||
entities = False
|
||||
yield fix_text_segment(
|
||||
line,
|
||||
fix_entities=entities,
|
||||
remove_terminal_escapes=remove_terminal_escapes,
|
||||
fix_encoding=fix_encoding,
|
||||
fix_latin_ligatures=fix_latin_ligatures,
|
||||
fix_character_width=fix_character_width,
|
||||
uncurl_quotes=uncurl_quotes,
|
||||
fix_line_breaks=fix_line_breaks,
|
||||
fix_surrogates=fix_surrogates,
|
||||
remove_control_chars=remove_control_chars,
|
||||
remove_bom=remove_bom,
|
||||
normalization=normalization
|
||||
)
|
||||
|
||||
|
||||
def fix_text_segment(text,
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
fix_latin_ligatures=True,
|
||||
fix_character_width=True,
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
fix_surrogates=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True,
|
||||
normalization='NFC'):
|
||||
"""
|
||||
Apply fixes to text in a single chunk. This could be a line of text
|
||||
within a larger run of `fix_text`, or it could be a larger amount
|
||||
of text that you are certain is in a consistent encoding.
|
||||
|
||||
See `fix_text` for a description of the parameters.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
|
||||
|
||||
if fix_entities == 'auto' and '<' in text and '>' in text:
|
||||
fix_entities = False
|
||||
while True:
|
||||
origtext = text
|
||||
if remove_terminal_escapes:
|
||||
text = fixes.remove_terminal_escapes(text)
|
||||
if fix_encoding:
|
||||
text = fixes.fix_encoding(text)
|
||||
if fix_entities:
|
||||
text = fixes.unescape_html(text)
|
||||
if fix_latin_ligatures:
|
||||
text = fixes.fix_latin_ligatures(text)
|
||||
if fix_character_width:
|
||||
text = fixes.fix_character_width(text)
|
||||
if uncurl_quotes:
|
||||
text = fixes.uncurl_quotes(text)
|
||||
if fix_line_breaks:
|
||||
text = fixes.fix_line_breaks(text)
|
||||
if fix_surrogates:
|
||||
text = fixes.fix_surrogates(text)
|
||||
if remove_control_chars:
|
||||
text = fixes.remove_control_chars(text)
|
||||
if remove_bom and not remove_control_chars:
|
||||
# Skip this step if we've already done `remove_control_chars`,
|
||||
# because it would be redundant.
|
||||
text = fixes.remove_bom(text)
|
||||
if normalization is not None:
|
||||
text = unicodedata.normalize(normalization, text)
|
||||
if text == origtext:
|
||||
return text
|
||||
|
||||
|
||||
def guess_bytes(bstring):
|
||||
"""
|
||||
NOTE: Using `guess_bytes` is not the recommended way of using ftfy. ftfy
|
||||
is not designed to be an encoding detector.
|
||||
|
||||
In the unfortunate situation that you have some bytes in an unknown
|
||||
encoding, ftfy can guess a reasonable strategy for decoding them, by trying
|
||||
a few common encodings that can be distinguished from each other.
|
||||
|
||||
Unlike the rest of ftfy, this may not be accurate, and it may *create*
|
||||
Unicode problems instead of solving them!
|
||||
|
||||
It doesn't try East Asian encodings at all, and if you have East Asian text
|
||||
that you don't know how to decode, you are somewhat out of luck. East
|
||||
Asian encodings require some serious statistics to distinguish from each
|
||||
other, so we can't support them without decreasing the accuracy of ftfy.
|
||||
|
||||
If you don't know which encoding you have at all, I recommend
|
||||
trying the 'chardet' module, and being appropriately skeptical about its
|
||||
results.
|
||||
|
||||
The encodings we try here are:
|
||||
|
||||
- UTF-16 with a byte order mark, because a UTF-16 byte order mark looks
|
||||
like nothing else
|
||||
- UTF-8, because it's the global standard, which has been used by a
|
||||
majority of the Web since 2008
|
||||
- "utf-8-variants", because it's what people actually implement when they
|
||||
think they're doing UTF-8
|
||||
- MacRoman, because Microsoft Office thinks it's still a thing, and it
|
||||
can be distinguished by its line breaks. (If there are no line breaks in
|
||||
the string, though, you're out of luck.)
|
||||
- "sloppy-windows-1252", the Latin-1-like encoding that is the most common
|
||||
single-byte encoding
|
||||
"""
|
||||
if type(bstring) == type(''):
|
||||
raise UnicodeError(
|
||||
"This string was already decoded as Unicode. You should pass "
|
||||
"bytes to guess_bytes, not Unicode."
|
||||
)
|
||||
|
||||
if bstring.startswith(b'\xfe\xff') or bstring.startswith(b'\xff\xfe'):
|
||||
return bstring.decode('utf-16'), 'utf-16'
|
||||
|
||||
byteset = set(bytes(bstring))
|
||||
byte_ed, byte_c0, byte_CR, byte_LF = b'\xed\xc0\r\n'
|
||||
|
||||
try:
|
||||
if byte_ed in byteset or byte_c0 in byteset:
|
||||
# Byte 0xed can be used to encode a range of codepoints that
|
||||
# are UTF-16 surrogates. UTF-8 does not use UTF-16 surrogates,
|
||||
# so when we see 0xed, it's very likely we're being asked to
|
||||
# decode CESU-8, the variant that encodes UTF-16 surrogates
|
||||
# instead of the original characters themselves.
|
||||
#
|
||||
# This will occasionally trigger on standard UTF-8, as there
|
||||
# are some Korean characters that also use byte 0xed, but that's
|
||||
# not harmful.
|
||||
#
|
||||
# Byte 0xc0 is impossible because, numerically, it would only
|
||||
# encode characters lower than U+0040. Those already have
|
||||
# single-byte representations, and UTF-8 requires using the
|
||||
# shortest possible representation. However, Java hides the null
|
||||
# codepoint, U+0000, in a non-standard longer representation -- it
|
||||
# encodes it as 0xc0 0x80 instead of 0x00, guaranteeing that 0x00
|
||||
# will never appear in the encoded bytes.
|
||||
#
|
||||
# The 'utf-8-variants' decoder can handle both of these cases, as
|
||||
# well as standard UTF-8, at the cost of a bit of speed.
|
||||
return bstring.decode('utf-8-variants'), 'utf-8-variants'
|
||||
else:
|
||||
return bstring.decode('utf-8'), 'utf-8'
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
if byte_CR in bstring and byte_LF not in bstring:
|
||||
return bstring.decode('macroman'), 'macroman'
|
||||
else:
|
||||
return bstring.decode('sloppy-windows-1252'), 'sloppy-windows-1252'
|
||||
|
||||
|
||||
def explain_unicode(text):
|
||||
"""
|
||||
A utility method that's useful for debugging mysterious Unicode.
|
||||
|
||||
It breaks down a string, showing you for each codepoint its number in
|
||||
hexadecimal, its glyph, its category in the Unicode standard, and its name
|
||||
in the Unicode standard.
|
||||
|
||||
>>> explain_unicode('(╯°□°)╯︵ ┻━┻')
|
||||
U+0028 ( [Ps] LEFT PARENTHESIS
|
||||
U+256F ╯ [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
|
||||
U+00B0 ° [So] DEGREE SIGN
|
||||
U+25A1 □ [So] WHITE SQUARE
|
||||
U+00B0 ° [So] DEGREE SIGN
|
||||
U+0029 ) [Pe] RIGHT PARENTHESIS
|
||||
U+256F ╯ [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
|
||||
U+FE35 ︵ [Ps] PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
|
||||
U+0020 [Zs] SPACE
|
||||
U+253B ┻ [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
|
||||
U+2501 ━ [So] BOX DRAWINGS HEAVY HORIZONTAL
|
||||
U+253B ┻ [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
|
||||
"""
|
||||
for char in text:
|
||||
if is_printable(char):
|
||||
display = char
|
||||
else:
|
||||
display = char.encode('unicode-escape').decode('ascii')
|
||||
print('U+{code:04X} {display} [{category}] {name}'.format(
|
||||
display=display_ljust(display, 7),
|
||||
code=ord(char),
|
||||
category=unicodedata.category(char),
|
||||
name=unicodedata.name(char, '<unknown>')
|
||||
))
|
||||
@@ -0,0 +1,94 @@
|
||||
# coding: utf-8
|
||||
r"""
|
||||
Give Python the ability to decode some common, flawed encodings.
|
||||
|
||||
Python does not want you to be sloppy with your text. Its encoders and decoders
|
||||
("codecs") follow the relevant standards whenever possible, which means that
|
||||
when you get text that *doesn't* follow those standards, you'll probably fail
|
||||
to decode it. Or you might succeed at decoding it for implementation-specific
|
||||
reasons, which is perhaps worse.
|
||||
|
||||
There are some encodings out there that Python wishes didn't exist, which are
|
||||
widely used outside of Python:
|
||||
|
||||
- "utf-8-variants", a family of not-quite-UTF-8 encodings, including the
|
||||
ever-popular CESU-8 and "Java modified UTF-8".
|
||||
- "Sloppy" versions of character map encodings, where bytes that don't map to
|
||||
anything will instead map to the Unicode character with the same number.
|
||||
|
||||
Simply importing this module, or in fact any part of the `ftfy` package, will
|
||||
make these new "bad codecs" available to Python through the standard Codecs
|
||||
API. You never have to actually call any functions inside `ftfy.bad_codecs`.
|
||||
|
||||
However, if you want to call something because your code checker insists on it,
|
||||
you can call ``ftfy.bad_codecs.ok()``.
|
||||
|
||||
A quick example of decoding text that's encoded in CESU-8:
|
||||
|
||||
>>> import ftfy.bad_codecs
|
||||
>>> print(b'\xed\xa0\xbd\xed\xb8\x8d'.decode('utf-8-variants'))
|
||||
😍
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
from encodings import normalize_encoding
|
||||
import codecs
|
||||
|
||||
_CACHE = {}
|
||||
|
||||
# Define some aliases for 'utf-8-variants'. All hyphens get turned into
|
||||
# underscores, because of `normalize_encoding`.
|
||||
UTF8_VAR_NAMES = (
|
||||
'utf_8_variants', 'utf8_variants',
|
||||
'utf_8_variant', 'utf8_variant',
|
||||
'utf_8_var', 'utf8_var',
|
||||
'cesu_8', 'cesu8',
|
||||
'java_utf_8', 'java_utf8'
|
||||
)
|
||||
|
||||
|
||||
def search_function(encoding):
|
||||
"""
|
||||
Register our "bad codecs" with Python's codecs API. This involves adding
|
||||
a search function that takes in an encoding name, and returns a codec
|
||||
for that encoding if it knows one, or None if it doesn't.
|
||||
|
||||
The encodings this will match are:
|
||||
|
||||
- Encodings of the form 'sloppy-windows-NNNN' or 'sloppy-iso-8859-N',
|
||||
where the non-sloppy version is an encoding that leaves some bytes
|
||||
unmapped to characters.
|
||||
- The 'utf-8-variants' encoding, which has the several aliases seen
|
||||
above.
|
||||
"""
|
||||
if encoding in _CACHE:
|
||||
return _CACHE[encoding]
|
||||
|
||||
norm_encoding = normalize_encoding(encoding)
|
||||
codec = None
|
||||
if norm_encoding in UTF8_VAR_NAMES:
|
||||
from ftfy.bad_codecs.utf8_variants import CODEC_INFO
|
||||
codec = CODEC_INFO
|
||||
elif norm_encoding.startswith('sloppy_'):
|
||||
from ftfy.bad_codecs.sloppy import CODECS
|
||||
codec = CODECS.get(norm_encoding)
|
||||
|
||||
if codec is not None:
|
||||
_CACHE[encoding] = codec
|
||||
|
||||
return codec
|
||||
|
||||
|
||||
def ok():
|
||||
"""
|
||||
A feel-good function that gives you something to call after importing
|
||||
this package.
|
||||
|
||||
Why is this here? Pyflakes. Pyflakes gets upset when you import a module
|
||||
and appear not to use it. It doesn't know that you're using it when
|
||||
you use the ``unicode.encode`` and ``bytes.decode`` methods with certain
|
||||
encodings.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
codecs.register(search_function)
|
||||
@@ -0,0 +1,164 @@
|
||||
# coding: utf-8
|
||||
r"""
|
||||
Decodes single-byte encodings, filling their "holes" in the same messy way that
|
||||
everyone else does.
|
||||
|
||||
A single-byte encoding maps each byte to a Unicode character, except that some
|
||||
bytes are left unmapped. In the commonly-used Windows-1252 encoding, for
|
||||
example, bytes 0x81 and 0x8D, among others, have no meaning.
|
||||
|
||||
Python, wanting to preserve some sense of decorum, will handle these bytes
|
||||
as errors. But Windows knows that 0x81 and 0x8D are possible bytes and they're
|
||||
different from each other. It just hasn't defined what they are in terms of
|
||||
Unicode.
|
||||
|
||||
Software that has to interoperate with Windows-1252 and Unicode -- such as all
|
||||
the common Web browsers -- will pick some Unicode characters for them to map
|
||||
to, and the characters they pick are the Unicode characters with the same
|
||||
numbers: U+0081 and U+008D. This is the same as what Latin-1 does, and the
|
||||
resulting characters tend to fall into a range of Unicode that's set aside for
|
||||
obselete Latin-1 control characters anyway.
|
||||
|
||||
These sloppy codecs let Python do the same thing, thus interoperating with
|
||||
other software that works this way. It defines a sloppy version of many
|
||||
single-byte encodings with holes. (There is no need for a sloppy version of
|
||||
an encoding without holes: for example, there is no such thing as
|
||||
sloppy-iso-8859-2 or sloppy-macroman.)
|
||||
|
||||
The following encodings will become defined:
|
||||
|
||||
- sloppy-windows-1250 (Central European, sort of based on ISO-8859-2)
|
||||
- sloppy-windows-1251 (Cyrillic)
|
||||
- sloppy-windows-1252 (Western European, based on Latin-1)
|
||||
- sloppy-windows-1253 (Greek, sort of based on ISO-8859-7)
|
||||
- sloppy-windows-1254 (Turkish, based on ISO-8859-9)
|
||||
- sloppy-windows-1255 (Hebrew, based on ISO-8859-8)
|
||||
- sloppy-windows-1256 (Arabic)
|
||||
- sloppy-windows-1257 (Baltic, based on ISO-8859-13)
|
||||
- sloppy-windows-1258 (Vietnamese)
|
||||
- sloppy-cp874 (Thai, based on ISO-8859-11)
|
||||
- sloppy-iso-8859-3 (Maltese and Esperanto, I guess)
|
||||
- sloppy-iso-8859-6 (different Arabic)
|
||||
- sloppy-iso-8859-7 (Greek)
|
||||
- sloppy-iso-8859-8 (Hebrew)
|
||||
- sloppy-iso-8859-11 (Thai)
|
||||
|
||||
Aliases such as "sloppy-cp1252" for "sloppy-windows-1252" will also be
|
||||
defined.
|
||||
|
||||
Only sloppy-windows-1251 and sloppy-windows-1252 are used by the rest of ftfy;
|
||||
the rest are rather uncommon.
|
||||
|
||||
Here are some examples, using `ftfy.explain_unicode` to illustrate how
|
||||
sloppy-windows-1252 merges Windows-1252 with Latin-1:
|
||||
|
||||
>>> from ftfy import explain_unicode
|
||||
>>> some_bytes = b'\x80\x81\x82'
|
||||
>>> explain_unicode(some_bytes.decode('latin-1'))
|
||||
U+0080 \x80 [Cc] <unknown>
|
||||
U+0081 \x81 [Cc] <unknown>
|
||||
U+0082 \x82 [Cc] <unknown>
|
||||
|
||||
>>> explain_unicode(some_bytes.decode('windows-1252', 'replace'))
|
||||
U+20AC € [Sc] EURO SIGN
|
||||
U+FFFD � [So] REPLACEMENT CHARACTER
|
||||
U+201A ‚ [Ps] SINGLE LOW-9 QUOTATION MARK
|
||||
|
||||
>>> explain_unicode(some_bytes.decode('sloppy-windows-1252'))
|
||||
U+20AC € [Sc] EURO SIGN
|
||||
U+0081 \x81 [Cc] <unknown>
|
||||
U+201A ‚ [Ps] SINGLE LOW-9 QUOTATION MARK
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import codecs
|
||||
from encodings import normalize_encoding
|
||||
import sys
|
||||
|
||||
REPLACEMENT_CHAR = '\ufffd'
|
||||
PY26 = sys.version_info[:2] == (2, 6)
|
||||
|
||||
def make_sloppy_codec(encoding):
|
||||
"""
|
||||
Take a codec name, and return a 'sloppy' version of that codec that can
|
||||
encode and decode the unassigned bytes in that encoding.
|
||||
|
||||
Single-byte encodings in the standard library are defined using some
|
||||
boilerplate classes surrounding the functions that do the actual work,
|
||||
`codecs.charmap_decode` and `charmap_encode`. This function, given an
|
||||
encoding name, *defines* those boilerplate classes.
|
||||
"""
|
||||
# Make an array of all 256 possible bytes.
|
||||
all_bytes = bytearray(range(256))
|
||||
|
||||
# Get a list of what they would decode to in Latin-1.
|
||||
sloppy_chars = list(all_bytes.decode('latin-1'))
|
||||
|
||||
# Get a list of what they decode to in the given encoding. Use the
|
||||
# replacement character for unassigned bytes.
|
||||
if PY26:
|
||||
decoded_chars = all_bytes.decode(encoding, 'replace')
|
||||
else:
|
||||
decoded_chars = all_bytes.decode(encoding, errors='replace')
|
||||
|
||||
# Update the sloppy_chars list. Each byte that was successfully decoded
|
||||
# gets its decoded value in the list. The unassigned bytes are left as
|
||||
# they are, which gives their decoding in Latin-1.
|
||||
for i, char in enumerate(decoded_chars):
|
||||
if char != REPLACEMENT_CHAR:
|
||||
sloppy_chars[i] = char
|
||||
|
||||
# For ftfy's own purposes, we're going to allow byte 1A, the "Substitute"
|
||||
# control code, to encode the Unicode replacement character U+FFFD.
|
||||
sloppy_chars[0x1a] = REPLACEMENT_CHAR
|
||||
|
||||
# Create the data structures that tell the charmap methods how to encode
|
||||
# and decode in this sloppy encoding.
|
||||
decoding_table = ''.join(sloppy_chars)
|
||||
encoding_table = codecs.charmap_build(decoding_table)
|
||||
|
||||
# Now produce all the class boilerplate. Look at the Python source for
|
||||
# `encodings.cp1252` for comparison; this is almost exactly the same,
|
||||
# except I made it follow pep8.
|
||||
class Codec(codecs.Codec):
|
||||
def encode(self, input, errors='strict'):
|
||||
return codecs.charmap_encode(input, errors, encoding_table)
|
||||
|
||||
def decode(self, input, errors='strict'):
|
||||
return codecs.charmap_decode(input, errors, decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input, self.errors, encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
return codecs.charmap_decode(input, self.errors, decoding_table)[0]
|
||||
|
||||
class StreamWriter(Codec, codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec, codecs.StreamReader):
|
||||
pass
|
||||
|
||||
return codecs.CodecInfo(
|
||||
name='sloppy-' + encoding,
|
||||
encode=Codec().encode,
|
||||
decode=Codec().decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamreader=StreamReader,
|
||||
streamwriter=StreamWriter,
|
||||
)
|
||||
|
||||
# Define a codec for each incomplete encoding. The resulting CODECS dictionary
|
||||
# can be used by the main module of ftfy.bad_codecs.
|
||||
CODECS = {}
|
||||
INCOMPLETE_ENCODINGS = (
|
||||
['windows-%s' % num for num in range(1250, 1259)] +
|
||||
['iso-8859-%s' % num for num in (3, 6, 7, 8, 11)] +
|
||||
['cp%s' % num for num in range(1250, 1259)] + ['cp874']
|
||||
)
|
||||
|
||||
for _encoding in INCOMPLETE_ENCODINGS:
|
||||
_new_name = normalize_encoding('sloppy-' + _encoding)
|
||||
CODECS[_new_name] = make_sloppy_codec(_encoding)
|
||||
@@ -0,0 +1,282 @@
|
||||
r"""
|
||||
This file defines a codec called "utf-8-variants" (or "utf-8-var"), which can
|
||||
decode text that's been encoded with a popular non-standard version of UTF-8.
|
||||
This includes CESU-8, the accidental encoding made by layering UTF-8 on top of
|
||||
UTF-16, as well as Java's twist on CESU-8 that contains a two-byte encoding for
|
||||
codepoint 0.
|
||||
|
||||
This is particularly relevant in Python 3, which provides no other way of
|
||||
decoding CESU-8 [1]_.
|
||||
|
||||
The easiest way to use the codec is to simply import `ftfy.bad_codecs`:
|
||||
|
||||
>>> import ftfy.bad_codecs
|
||||
>>> result = b'here comes a null! \xc0\x80'.decode('utf-8-var')
|
||||
>>> print(repr(result).lstrip('u'))
|
||||
'here comes a null! \x00'
|
||||
|
||||
The codec does not at all enforce "correct" CESU-8. For example, the Unicode
|
||||
Consortium's not-quite-standard describing CESU-8 requires that there is only
|
||||
one possible encoding of any character, so it does not allow mixing of valid
|
||||
UTF-8 and CESU-8. This codec *does* allow that, just like Python 2's UTF-8
|
||||
decoder does.
|
||||
|
||||
Characters in the Basic Multilingual Plane still have only one encoding. This
|
||||
codec still enforces the rule, within the BMP, that characters must appear in
|
||||
their shortest form. There is one exception: the sequence of bytes `0xc0 0x80`,
|
||||
instead of just `0x00`, may be used to encode the null character `U+0000`, like
|
||||
in Java.
|
||||
|
||||
If you encode with this codec, you get legitimate UTF-8. Decoding with this
|
||||
codec and then re-encoding is not idempotent, although encoding and then
|
||||
decoding is. So this module won't produce CESU-8 for you. Look for that
|
||||
functionality in the sister module, "Breaks Text For You", coming approximately
|
||||
never.
|
||||
|
||||
.. [1] In a pinch, you can decode CESU-8 in Python 2 using the UTF-8 codec:
|
||||
first decode the bytes (incorrectly), then encode them, then decode them
|
||||
again, using UTF-8 as the codec every time.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import codecs
|
||||
from encodings.utf_8 import (IncrementalDecoder as UTF8IncrementalDecoder,
|
||||
IncrementalEncoder as UTF8IncrementalEncoder)
|
||||
from ftfy.compatibility import bytes_to_ints, unichr, PYTHON2
|
||||
|
||||
NAME = 'utf-8-variants'
|
||||
|
||||
# This regular expression matches all possible six-byte CESU-8 sequences,
|
||||
# plus truncations of them at the end of the string. (If any of the
|
||||
# subgroups matches $, then all the subgroups after it also have to match $,
|
||||
# as there are no more characters to match.)
|
||||
CESU8_EXPR = (
|
||||
b'('
|
||||
b'\xed'
|
||||
b'([\xa0-\xaf]|$)'
|
||||
b'([\x80-\xbf]|$)'
|
||||
b'(\xed|$)'
|
||||
b'([\xb0-\xbf]|$)'
|
||||
b'([\x80-\xbf]|$)'
|
||||
b')'
|
||||
)
|
||||
|
||||
CESU8_RE = re.compile(CESU8_EXPR)
|
||||
|
||||
# This expression matches isolated surrogate characters that aren't
|
||||
# CESU-8, which have to be handled carefully on Python 2.
|
||||
SURROGATE_EXPR = (b'(\xed([\xa0-\xbf]|$)([\x80-\xbf]|$))')
|
||||
|
||||
# This expression matches the Java encoding of U+0, including if it's
|
||||
# truncated and we need more bytes.
|
||||
NULL_EXPR = b'(\xc0(\x80|$))'
|
||||
|
||||
# This regex matches cases that we need to decode differently from
|
||||
# standard UTF-8.
|
||||
SPECIAL_BYTES_RE = re.compile(b'|'.join([NULL_EXPR, CESU8_EXPR, SURROGATE_EXPR]))
|
||||
|
||||
|
||||
class IncrementalDecoder(UTF8IncrementalDecoder):
|
||||
"""
|
||||
An incremental decoder that extends Python's built-in UTF-8 decoder.
|
||||
|
||||
This encoder needs to take in bytes, possibly arriving in a stream, and
|
||||
output the correctly decoded text. The general strategy for doing this
|
||||
is to fall back on the real UTF-8 decoder whenever possible, because
|
||||
the real UTF-8 decoder is way optimized, but to call specialized methods
|
||||
we define here for the cases the real encoder isn't expecting.
|
||||
"""
|
||||
def _buffer_decode(self, input, errors, final):
|
||||
"""
|
||||
Decode bytes that may be arriving in a stream, following the Codecs
|
||||
API.
|
||||
|
||||
`input` is the incoming sequence of bytes. `errors` tells us how to
|
||||
handle errors, though we delegate all error-handling cases to the real
|
||||
UTF-8 decoder to ensure correct behavior. `final` indicates whether
|
||||
this is the end of the sequence, in which case we should raise an
|
||||
error given incomplete input.
|
||||
|
||||
Returns as much decoded text as possible, and the number of bytes
|
||||
consumed.
|
||||
"""
|
||||
# decoded_segments are the pieces of text we have decoded so far,
|
||||
# and position is our current position in the byte string. (Bytes
|
||||
# before this position have been consumed, and bytes after it have
|
||||
# yet to be decoded.)
|
||||
decoded_segments = []
|
||||
position = 0
|
||||
while True:
|
||||
# Use _buffer_decode_step to decode a segment of text.
|
||||
decoded, consumed = self._buffer_decode_step(
|
||||
input[position:],
|
||||
errors,
|
||||
final
|
||||
)
|
||||
if consumed == 0:
|
||||
# Either there's nothing left to decode, or we need to wait
|
||||
# for more input. Either way, we're done for now.
|
||||
break
|
||||
|
||||
# Append the decoded text to the list, and update our position.
|
||||
decoded_segments.append(decoded)
|
||||
position += consumed
|
||||
|
||||
if final:
|
||||
# _buffer_decode_step must consume all the bytes when `final` is
|
||||
# true.
|
||||
assert position == len(input)
|
||||
|
||||
return ''.join(decoded_segments), position
|
||||
|
||||
def _buffer_decode_step(self, input, errors, final):
|
||||
"""
|
||||
There are three possibilities for each decoding step:
|
||||
|
||||
- Decode as much real UTF-8 as possible.
|
||||
- Decode a six-byte CESU-8 sequence at the current position.
|
||||
- Decode a Java-style null at the current position.
|
||||
|
||||
This method figures out which step is appropriate, and does it.
|
||||
"""
|
||||
# Get a reference to the superclass method that we'll be using for
|
||||
# most of the real work.
|
||||
sup = UTF8IncrementalDecoder._buffer_decode
|
||||
|
||||
# Find the next byte position that indicates a variant of UTF-8.
|
||||
match = SPECIAL_BYTES_RE.search(input)
|
||||
if match is None:
|
||||
return sup(input, errors, final)
|
||||
|
||||
cutoff = match.start()
|
||||
if cutoff > 0:
|
||||
return sup(input[:cutoff], errors, True)
|
||||
|
||||
# Some byte sequence that we intend to handle specially matches
|
||||
# at the beginning of the input.
|
||||
if input.startswith(b'\xc0'):
|
||||
if len(input) > 1:
|
||||
# Decode the two-byte sequence 0xc0 0x80.
|
||||
return '\u0000', 2
|
||||
else:
|
||||
if final:
|
||||
# We hit the end of the stream. Let the superclass method
|
||||
# handle it.
|
||||
return sup(input, errors, True)
|
||||
else:
|
||||
# Wait to see another byte.
|
||||
return '', 0
|
||||
else:
|
||||
# Decode a possible six-byte sequence starting with 0xed.
|
||||
return self._buffer_decode_surrogates(sup, input, errors, final)
|
||||
|
||||
@staticmethod
|
||||
def _buffer_decode_surrogates(sup, input, errors, final):
|
||||
"""
|
||||
When we have improperly encoded surrogates, we can still see the
|
||||
bits that they were meant to represent.
|
||||
|
||||
The surrogates were meant to encode a 20-bit number, to which we
|
||||
add 0x10000 to get a codepoint. That 20-bit number now appears in
|
||||
this form:
|
||||
|
||||
11101101 1010abcd 10efghij 11101101 1011klmn 10opqrst
|
||||
|
||||
The CESU8_RE above matches byte sequences of this form. Then we need
|
||||
to extract the bits and assemble a codepoint number from them.
|
||||
"""
|
||||
if len(input) < 6:
|
||||
if final:
|
||||
# We found 0xed near the end of the stream, and there aren't
|
||||
# six bytes to decode. Delegate to the superclass method to
|
||||
# handle it as an error.
|
||||
if PYTHON2 and len(input) >= 3:
|
||||
# We can't trust Python 2 to raise an error when it's
|
||||
# asked to decode a surrogate, so let's force the issue.
|
||||
input = mangle_surrogates(input)
|
||||
return sup(input, errors, final)
|
||||
else:
|
||||
# We found a surrogate, the stream isn't over yet, and we don't
|
||||
# know enough of the following bytes to decode anything, so
|
||||
# consume zero bytes and wait.
|
||||
return '', 0
|
||||
else:
|
||||
if CESU8_RE.match(input):
|
||||
# Given this is a CESU-8 sequence, do some math to pull out
|
||||
# the intended 20-bit value, and consume six bytes.
|
||||
bytenums = bytes_to_ints(input[:6])
|
||||
codepoint = (
|
||||
((bytenums[1] & 0x0f) << 16) +
|
||||
((bytenums[2] & 0x3f) << 10) +
|
||||
((bytenums[4] & 0x0f) << 6) +
|
||||
(bytenums[5] & 0x3f) +
|
||||
0x10000
|
||||
)
|
||||
return unichr(codepoint), 6
|
||||
else:
|
||||
# This looked like a CESU-8 sequence, but it wasn't one.
|
||||
# 0xed indicates the start of a three-byte sequence, so give
|
||||
# three bytes to the superclass to decode as usual -- except
|
||||
# for working around the Python 2 discrepancy as before.
|
||||
if PYTHON2:
|
||||
input = mangle_surrogates(input)
|
||||
return sup(input[:3], errors, False)
|
||||
|
||||
|
||||
def mangle_surrogates(bytestring):
|
||||
"""
|
||||
When Python 3 sees the UTF-8 encoding of a surrogate codepoint, it treats
|
||||
it as an error (which it is). In 'replace' mode, it will decode as three
|
||||
replacement characters. But Python 2 will just output the surrogate
|
||||
codepoint.
|
||||
|
||||
To ensure consistency between Python 2 and Python 3, and protect downstream
|
||||
applications from malformed strings, we turn surrogate sequences at the
|
||||
start of the string into the bytes `ff ff ff`, which we're *sure* won't
|
||||
decode, and which turn into three replacement characters in 'replace' mode.
|
||||
|
||||
This function does nothing in Python 3, and it will be deprecated in ftfy
|
||||
5.0.
|
||||
"""
|
||||
if PYTHON2:
|
||||
if bytestring.startswith(b'\xed') and len(bytestring) >= 3:
|
||||
decoded = bytestring[:3].decode('utf-8', 'replace')
|
||||
if '\ud800' <= decoded <= '\udfff':
|
||||
return b'\xff\xff\xff' + mangle_surrogates(bytestring[3:])
|
||||
return bytestring
|
||||
else:
|
||||
# On Python 3, nothing needs to be done.
|
||||
return bytestring
|
||||
|
||||
# The encoder is identical to UTF-8.
|
||||
IncrementalEncoder = UTF8IncrementalEncoder
|
||||
|
||||
|
||||
# Everything below here is boilerplate that matches the modules in the
|
||||
# built-in `encodings` package.
|
||||
def encode(input, errors='strict'):
|
||||
return IncrementalEncoder(errors).encode(input, final=True), len(input)
|
||||
|
||||
|
||||
def decode(input, errors='strict'):
|
||||
return IncrementalDecoder(errors).decode(input, final=True), len(input)
|
||||
|
||||
|
||||
class StreamWriter(codecs.StreamWriter):
|
||||
encode = encode
|
||||
|
||||
|
||||
class StreamReader(codecs.StreamReader):
|
||||
decode = decode
|
||||
|
||||
|
||||
CODEC_INFO = codecs.CodecInfo(
|
||||
name=NAME,
|
||||
encode=encode,
|
||||
decode=decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamreader=StreamReader,
|
||||
streamwriter=StreamWriter,
|
||||
)
|
||||
@@ -0,0 +1,162 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Heuristics to determine whether re-encoding text is actually making it
|
||||
more reasonable.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import unicodedata
|
||||
from ftfy.chardata import chars_to_classes
|
||||
|
||||
# The following regex uses the mapping of character classes to ASCII
|
||||
# characters defined in chardata.py and build_data.py:
|
||||
#
|
||||
# L = Latin capital letter
|
||||
# l = Latin lowercase letter
|
||||
# A = Non-latin capital or title-case letter
|
||||
# a = Non-latin lowercase letter
|
||||
# C = Non-cased letter (Lo)
|
||||
# X = Control character (Cc)
|
||||
# m = Letter modifier (Lm)
|
||||
# M = Mark (Mc, Me, Mn)
|
||||
# N = Miscellaneous numbers (No)
|
||||
# 1 = Math symbol (Sm) or currency symbol (Sc)
|
||||
# 2 = Symbol modifier (Sk)
|
||||
# 3 = Other symbol (So)
|
||||
# S = UTF-16 surrogate
|
||||
# _ = Unassigned character
|
||||
# = Whitespace
|
||||
# o = Other
|
||||
|
||||
|
||||
def _make_weirdness_regex():
|
||||
"""
|
||||
Creates a list of regexes that match 'weird' character sequences.
|
||||
The more matches there are, the weirder the text is.
|
||||
"""
|
||||
groups = []
|
||||
|
||||
# Match lowercase letters that are followed by non-ASCII uppercase letters
|
||||
groups.append('lA')
|
||||
|
||||
# Match diacritical marks, except when they modify a non-cased letter or
|
||||
# another mark.
|
||||
#
|
||||
# You wouldn't put a diacritical mark on a digit or a space, for example.
|
||||
# You might put it on a Latin letter, but in that case there will almost
|
||||
# always be a pre-composed version, and we normalize to pre-composed
|
||||
# versions first. The cases that can't be pre-composed tend to be in
|
||||
# large scripts without case, which are in class C.
|
||||
groups.append('[^CM]M')
|
||||
|
||||
# Match non-Latin characters adjacent to Latin characters.
|
||||
#
|
||||
# This is a simplification from ftfy version 2, which compared all
|
||||
# adjacent scripts. However, the ambiguities we need to resolve come from
|
||||
# encodings designed to represent Latin characters.
|
||||
groups.append('[Ll][AaC]')
|
||||
groups.append('[AaC][Ll]')
|
||||
|
||||
# Match IPA letters next to capital letters.
|
||||
#
|
||||
# IPA uses lowercase letters only. Some accented capital letters next to
|
||||
# punctuation can accidentally decode as IPA letters, and an IPA letter
|
||||
# appearing next to a capital letter is a good sign that this happened.
|
||||
groups.append('[LA]i')
|
||||
groups.append('i[LA]')
|
||||
|
||||
# Match non-combining diacritics. We've already set aside the common ones
|
||||
# like ^ (the CIRCUMFLEX ACCENT, repurposed as a caret, exponent sign,
|
||||
# or happy eye) and assigned them to category 'o'. The remaining ones,
|
||||
# like the diaeresis (¨), are pretty weird to see on their own instead
|
||||
# of combined with a letter.
|
||||
groups.append('2')
|
||||
|
||||
# Match C1 control characters, which are almost always the result of
|
||||
# decoding Latin-1 that was meant to be Windows-1252.
|
||||
groups.append('X')
|
||||
|
||||
# Match private use and unassigned characters.
|
||||
groups.append('P')
|
||||
groups.append('_')
|
||||
|
||||
# Match adjacent characters from any different pair of these categories:
|
||||
# - Modifier marks (M)
|
||||
# - Letter modifiers (m)
|
||||
# - Miscellaneous numbers (N)
|
||||
# - Symbols (1 or 3, because 2 is already weird on its own)
|
||||
|
||||
exclusive_categories = 'MmN13'
|
||||
for cat1 in exclusive_categories:
|
||||
others_range = ''.join(c for c in exclusive_categories if c != cat1)
|
||||
groups.append('{cat1}[{others_range}]'.format(
|
||||
cat1=cat1, others_range=others_range
|
||||
))
|
||||
regex = '|'.join('({0})'.format(group) for group in groups)
|
||||
return re.compile(regex)
|
||||
|
||||
WEIRDNESS_RE = _make_weirdness_regex()
|
||||
|
||||
# These characters appear in mojibake but also appear commonly on their own.
|
||||
# We have a slight preference to leave them alone.
|
||||
COMMON_SYMBOL_RE = re.compile(
|
||||
'['
|
||||
'\N{HORIZONTAL ELLIPSIS}\N{EM DASH}\N{EN DASH}'
|
||||
'\N{LEFT SINGLE QUOTATION MARK}\N{LEFT DOUBLE QUOTATION MARK}'
|
||||
'\N{RIGHT SINGLE QUOTATION MARK}\N{RIGHT DOUBLE QUOTATION MARK}'
|
||||
'\N{INVERTED EXCLAMATION MARK}\N{INVERTED QUESTION MARK}\N{DEGREE SIGN}'
|
||||
'\N{TRADE MARK SIGN}'
|
||||
'\N{REGISTERED SIGN}'
|
||||
'\N{SINGLE LEFT-POINTING ANGLE QUOTATION MARK}'
|
||||
'\N{SINGLE RIGHT-POINTING ANGLE QUOTATION MARK}'
|
||||
'\N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}'
|
||||
'\N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}'
|
||||
'\N{NO-BREAK SPACE}'
|
||||
'\N{ACUTE ACCENT}\N{MULTIPLICATION SIGN}\N{LATIN SMALL LETTER SHARP S}'
|
||||
'\ufeff' # The byte-order mark, whose encoding '' looks common
|
||||
']'
|
||||
)
|
||||
|
||||
def sequence_weirdness(text):
|
||||
"""
|
||||
Determine how often a text has unexpected characters or sequences of
|
||||
characters. This metric is used to disambiguate when text should be
|
||||
re-decoded or left as is.
|
||||
|
||||
We start by normalizing text in NFC form, so that penalties for
|
||||
diacritical marks don't apply to characters that know what to do with
|
||||
them.
|
||||
|
||||
The following things are deemed weird:
|
||||
|
||||
- Lowercase letters followed by non-ASCII uppercase letters
|
||||
- Non-Latin characters next to Latin characters
|
||||
- Un-combined diacritical marks, unless they're stacking on non-alphabetic
|
||||
characters (in languages that do that kind of thing a lot) or other
|
||||
marks
|
||||
- C1 control characters
|
||||
- Adjacent symbols from any different pair of these categories:
|
||||
|
||||
- Modifier marks
|
||||
- Letter modifiers
|
||||
- Non-digit numbers
|
||||
- Symbols (including math and currency)
|
||||
|
||||
The return value is the number of instances of weirdness.
|
||||
"""
|
||||
text2 = unicodedata.normalize('NFC', text)
|
||||
weirdness = len(WEIRDNESS_RE.findall(chars_to_classes(text2)))
|
||||
punct_discount = len(COMMON_SYMBOL_RE.findall(text2))
|
||||
return weirdness * 2 - punct_discount
|
||||
|
||||
|
||||
def text_cost(text):
|
||||
"""
|
||||
An overall cost function for text. Weirder is worse, but all else being
|
||||
equal, shorter strings are better.
|
||||
|
||||
The overall cost is measured as the "weirdness" (see
|
||||
:func:`sequence_weirdness`) plus the length.
|
||||
"""
|
||||
return sequence_weirdness(text) + len(text)
|
||||
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
A script to make the char_classes.dat file.
|
||||
|
||||
This never needs to run in normal usage. It needs to be run if the character
|
||||
classes we care about change, or if a new version of Python supports a new
|
||||
Unicode standard and we want it to affect our string decoding.
|
||||
|
||||
The file that we generate is based on Unicode 9.0, as supported by Python 3.6.
|
||||
You can certainly use it in earlier versions. This simply makes sure that we
|
||||
get consistent results from running ftfy on different versions of Python.
|
||||
|
||||
The file will be written to the current directory.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import unicodedata
|
||||
import sys
|
||||
import zlib
|
||||
if sys.hexversion >= 0x03000000:
|
||||
unichr = chr
|
||||
|
||||
# L = Latin capital letter
|
||||
# l = Latin lowercase letter
|
||||
# A = Non-latin capital or title-case letter
|
||||
# a = Non-latin lowercase letter
|
||||
# C = Non-cased letter (Lo)
|
||||
# X = Control character (Cc)
|
||||
# m = Letter modifier (Lm)
|
||||
# M = Mark (Mc, Me, Mn)
|
||||
# N = Miscellaneous numbers (No)
|
||||
# P = Private use (Co)
|
||||
# 1 = Math symbol (Sm) or currency symbol (Sc)
|
||||
# 2 = Symbol modifier (Sk)
|
||||
# 3 = Other symbol (So)
|
||||
# S = UTF-16 surrogate
|
||||
# _ = Unassigned character
|
||||
# = Whitespace
|
||||
# o = Other
|
||||
|
||||
|
||||
def make_char_data_file(do_it_anyway=False):
|
||||
"""
|
||||
Build the compressed data file 'char_classes.dat' and write it to the
|
||||
current directory.
|
||||
|
||||
If you run this, run it in Python 3.6 or later. It will run in earlier
|
||||
versions, but you won't get the Unicode 9 standard, leading to inconsistent
|
||||
behavior.
|
||||
|
||||
To protect against this, running this in the wrong version of Python will
|
||||
raise an error unless you pass `do_it_anyway=True`.
|
||||
"""
|
||||
if sys.hexversion < 0x03060000 and not do_it_anyway:
|
||||
raise RuntimeError(
|
||||
"This function should be run in Python 3.6 or later."
|
||||
)
|
||||
|
||||
cclasses = [None] * 0x110000
|
||||
for codepoint in range(0x0, 0x110000):
|
||||
char = unichr(codepoint)
|
||||
category = unicodedata.category(char)
|
||||
|
||||
if (0x250 <= codepoint < 0x300) and char != 'ə':
|
||||
# IPA symbols and modifiers.
|
||||
#
|
||||
# This category excludes the schwa (ə), which is used as a normal
|
||||
# Latin letter in some languages.
|
||||
cclasses[codepoint] = 'i'
|
||||
elif category.startswith('L'): # letters
|
||||
if unicodedata.name(char, '').startswith('LATIN'):
|
||||
if category == 'Lu':
|
||||
cclasses[codepoint] = 'L'
|
||||
else:
|
||||
cclasses[codepoint] = 'l'
|
||||
else:
|
||||
if category == 'Lu' or category == 'Lt':
|
||||
cclasses[codepoint] = 'A'
|
||||
elif category == 'Ll':
|
||||
cclasses[codepoint] = 'a'
|
||||
elif category == 'Lo':
|
||||
cclasses[codepoint] = 'C'
|
||||
elif category == 'Lm':
|
||||
cclasses[codepoint] = 'm'
|
||||
else:
|
||||
raise ValueError('got some weird kind of letter')
|
||||
elif 0xfe00 <= codepoint <= 0xfe0f or 0x1f3fb <= codepoint <= 0x1f3ff:
|
||||
# Variation selectors and skin-tone modifiers have the category
|
||||
# of non-spacing marks, but they act like symbols
|
||||
cclasses[codepoint] = '3'
|
||||
elif category.startswith('M'): # marks
|
||||
cclasses[codepoint] = 'M'
|
||||
elif category == 'No':
|
||||
cclasses[codepoint] = 'N'
|
||||
elif category == 'Sm' or category == 'Sc':
|
||||
cclasses[codepoint] = '1'
|
||||
elif category == 'Sk':
|
||||
cclasses[codepoint] = '2'
|
||||
elif category == 'So':
|
||||
cclasses[codepoint] = '3'
|
||||
elif category == 'Cc':
|
||||
cclasses[codepoint] = 'X'
|
||||
elif category == 'Cs':
|
||||
cclasses[codepoint] = 'S'
|
||||
elif category == 'Co':
|
||||
cclasses[codepoint] = 'P'
|
||||
elif category.startswith('Z'):
|
||||
cclasses[codepoint] = ' '
|
||||
elif 0x1f000 <= codepoint <= 0x1ffff:
|
||||
# This range is rapidly having emoji added to it. Assume that
|
||||
# an unassigned codepoint in this range is just a symbol we
|
||||
# don't know yet.
|
||||
cclasses[codepoint] = '3'
|
||||
elif category == 'Cn':
|
||||
cclasses[codepoint] = '_'
|
||||
else:
|
||||
cclasses[codepoint] = 'o'
|
||||
|
||||
# Mark whitespace control characters as whitespace
|
||||
cclasses[9] = cclasses[10] = cclasses[12] = cclasses[13] = ' '
|
||||
|
||||
# Some other exceptions for characters that are more commonly used as
|
||||
# punctuation or decoration than for their ostensible purpose.
|
||||
# For example, tilde is not usually a "math symbol", and the accents
|
||||
# `´ are much more like quotation marks than modifiers.
|
||||
for char in "^~`´˝^`":
|
||||
cclasses[ord(char)] = 'o'
|
||||
|
||||
out = open('char_classes.dat', 'wb')
|
||||
out.write(zlib.compress(''.join(cclasses).encode('ascii')))
|
||||
out.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
make_char_data_file()
|
||||
Binary file not shown.
@@ -0,0 +1,215 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This gives other modules access to the gritty details about characters and the
|
||||
encodings that use them.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import zlib
|
||||
import unicodedata
|
||||
import itertools
|
||||
from pkg_resources import resource_string
|
||||
from ftfy.compatibility import unichr
|
||||
|
||||
# These are the encodings we will try to fix in ftfy, in the
|
||||
# order that they should be tried.
|
||||
CHARMAP_ENCODINGS = [
|
||||
'latin-1',
|
||||
'sloppy-windows-1252',
|
||||
'sloppy-windows-1250',
|
||||
'iso-8859-2',
|
||||
'sloppy-windows-1251',
|
||||
'macroman',
|
||||
'cp437',
|
||||
]
|
||||
|
||||
|
||||
def _build_regexes():
|
||||
"""
|
||||
ENCODING_REGEXES contain reasonably fast ways to detect if we
|
||||
could represent a given string in a given encoding. The simplest one is
|
||||
the 'ascii' detector, which of course just determines if all characters
|
||||
are between U+0000 and U+007F.
|
||||
"""
|
||||
# Define a regex that matches ASCII text.
|
||||
encoding_regexes = {'ascii': re.compile('^[\x00-\x7f]*$')}
|
||||
|
||||
for encoding in CHARMAP_ENCODINGS:
|
||||
# Make a sequence of characters that bytes \x80 to \xFF decode to
|
||||
# in each encoding, as well as byte \x1A, which is used to represent
|
||||
# the replacement character � in the sloppy-* encodings.
|
||||
latin1table = ''.join(unichr(i) for i in range(128, 256)) + '\x1a'
|
||||
charlist = latin1table.encode('latin-1').decode(encoding)
|
||||
|
||||
# The rest of the ASCII bytes -- bytes \x00 to \x19 and \x1B
|
||||
# to \x7F -- will decode as those ASCII characters in any encoding we
|
||||
# support, so we can just include them as ranges. This also lets us
|
||||
# not worry about escaping regex special characters, because all of
|
||||
# them are in the \x1B to \x7F range.
|
||||
regex = '^[\x00-\x19\x1b-\x7f{0}]*$'.format(charlist)
|
||||
encoding_regexes[encoding] = re.compile(regex)
|
||||
return encoding_regexes
|
||||
ENCODING_REGEXES = _build_regexes()
|
||||
|
||||
|
||||
def _build_utf8_punct_regex():
|
||||
"""
|
||||
Recognize UTF-8 mojibake that's so blatant that we can fix it even when the
|
||||
rest of the string doesn't decode as UTF-8 -- namely, UTF-8 sequences for
|
||||
the 'General Punctuation' characters U+2000 to U+2040, re-encoded in
|
||||
Windows-1252.
|
||||
|
||||
These are recognizable by the distinctive 'â€' ('\xe2\x80') sequence they
|
||||
all begin with when decoded as Windows-1252.
|
||||
"""
|
||||
# We're making a regex that has all the literal bytes from 0x80 to 0xbf in
|
||||
# a range. "Couldn't this have just said [\x80-\xbf]?", you might ask.
|
||||
# However, when we decode the regex as Windows-1252, the resulting
|
||||
# characters won't even be remotely contiguous.
|
||||
#
|
||||
# Unrelatedly, the expression that generates these bytes will be so much
|
||||
# prettier when we deprecate Python 2.
|
||||
continuation_char_list = ''.join(
|
||||
unichr(i) for i in range(0x80, 0xc0)
|
||||
).encode('latin-1')
|
||||
obvious_utf8 = ('â€['
|
||||
+ continuation_char_list.decode('sloppy-windows-1252')
|
||||
+ ']')
|
||||
return re.compile(obvious_utf8)
|
||||
PARTIAL_UTF8_PUNCT_RE = _build_utf8_punct_regex()
|
||||
|
||||
|
||||
# Recognize UTF-8 sequences that would be valid if it weren't for a b'\xa0'
|
||||
# that some Windows-1252 program converted to a plain space.
|
||||
#
|
||||
# The smaller values are included on a case-by-case basis, because we don't want
|
||||
# to decode likely input sequences to unlikely characters. These are the ones
|
||||
# that *do* form likely characters before 0xa0:
|
||||
#
|
||||
# 0xc2 -> U+A0 NO-BREAK SPACE
|
||||
# 0xc3 -> U+E0 LATIN SMALL LETTER A WITH GRAVE
|
||||
# 0xc5 -> U+160 LATIN CAPITAL LETTER S WITH CARON
|
||||
# 0xce -> U+3A0 GREEK CAPITAL LETTER PI
|
||||
# 0xd0 -> U+420 CYRILLIC CAPITAL LETTER ER
|
||||
#
|
||||
# These still need to come with a cost, so that they only get converted when
|
||||
# there's evidence that it fixes other things. Any of these could represent
|
||||
# characters that legitimately appear surrounded by spaces, particularly U+C5
|
||||
# (Å), which is a word in multiple languages!
|
||||
#
|
||||
# We should consider checking for b'\x85' being converted to ... in the future.
|
||||
# I've seen it once, but the text still wasn't recoverable.
|
||||
|
||||
ALTERED_UTF8_RE = re.compile(b'[\xc2\xc3\xc5\xce\xd0][ ]'
|
||||
b'|[\xe0-\xef][ ][\x80-\xbf]'
|
||||
b'|[\xe0-\xef][\x80-\xbf][ ]'
|
||||
b'|[\xf0-\xf4][ ][\x80-\xbf][\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x80-\xbf][ ][\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x80-\xbf][\x80-\xbf][ ]')
|
||||
|
||||
# This expression matches UTF-8 and CESU-8 sequences where some of the
|
||||
# continuation bytes have been lost. The byte 0x1a (sometimes written as ^Z) is
|
||||
# used within ftfy to represent a byte that produced the replacement character
|
||||
# \ufffd. We don't know which byte it was, but we can at least decode the UTF-8
|
||||
# sequence as \ufffd instead of failing to re-decode it at all.
|
||||
LOSSY_UTF8_RE = re.compile(
|
||||
b'[\xc2-\xdf][\x1a]'
|
||||
b'|\xed[\xa0-\xaf][\x1a]\xed[\xb0-\xbf][\x1a\x80-\xbf]'
|
||||
b'|\xed[\xa0-\xaf][\x1a\x80-\xbf]\xed[\xb0-\xbf][\x1a]'
|
||||
b'|[\xe0-\xef][\x1a][\x1a\x80-\xbf]'
|
||||
b'|[\xe0-\xef][\x1a\x80-\xbf][\x1a]'
|
||||
b'|[\xf0-\xf4][\x1a][\x1a\x80-\xbf][\x1a\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x1a\x80-\xbf][\x1a][\x1a\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x1a\x80-\xbf][\x1a\x80-\xbf][\x1a]'
|
||||
b'|\x1a'
|
||||
)
|
||||
|
||||
# These regexes match various Unicode variations on single and double quotes.
|
||||
SINGLE_QUOTE_RE = re.compile('[\u2018-\u201b]')
|
||||
DOUBLE_QUOTE_RE = re.compile('[\u201c-\u201f]')
|
||||
|
||||
|
||||
def possible_encoding(text, encoding):
|
||||
"""
|
||||
Given text and a single-byte encoding, check whether that text could have
|
||||
been decoded from that single-byte encoding.
|
||||
|
||||
In other words, check whether it can be encoded in that encoding, possibly
|
||||
sloppily.
|
||||
"""
|
||||
return bool(ENCODING_REGEXES[encoding].match(text))
|
||||
|
||||
|
||||
CHAR_CLASS_STRING = zlib.decompress(
|
||||
resource_string(__name__, 'char_classes.dat')
|
||||
).decode('ascii')
|
||||
|
||||
def chars_to_classes(string):
|
||||
"""
|
||||
Convert each Unicode character to a letter indicating which of many
|
||||
classes it's in.
|
||||
|
||||
See build_data.py for where this data comes from and what it means.
|
||||
"""
|
||||
return string.translate(CHAR_CLASS_STRING)
|
||||
|
||||
|
||||
def _build_control_char_mapping():
|
||||
"""
|
||||
Build a translate mapping that strips likely-unintended control characters.
|
||||
See :func:`ftfy.fixes.remove_control_chars` for a description of these
|
||||
codepoint ranges and why they should be removed.
|
||||
"""
|
||||
control_chars = {}
|
||||
|
||||
for i in itertools.chain(
|
||||
range(0x00, 0x09), [0x0b],
|
||||
range(0x0e, 0x20), [0x7f],
|
||||
range(0x206a, 0x2070),
|
||||
[0xfeff],
|
||||
range(0xfff9, 0xfffd),
|
||||
range(0x1d173, 0x1d17b),
|
||||
range(0xe0000, 0xe0080)
|
||||
):
|
||||
control_chars[i] = None
|
||||
|
||||
return control_chars
|
||||
CONTROL_CHARS = _build_control_char_mapping()
|
||||
|
||||
|
||||
# A translate mapping that breaks ligatures made of Latin letters. While
|
||||
# ligatures may be important to the representation of other languages, in
|
||||
# Latin letters they tend to represent a copy/paste error.
|
||||
#
|
||||
# Ligatures may also be separated by NFKC normalization, but that is sometimes
|
||||
# more normalization than you want.
|
||||
LIGATURES = {
|
||||
ord('IJ'): 'IJ',
|
||||
ord('ij'): 'ij',
|
||||
ord('ff'): 'ff',
|
||||
ord('fi'): 'fi',
|
||||
ord('fl'): 'fl',
|
||||
ord('ffi'): 'ffi',
|
||||
ord('ffl'): 'ffl',
|
||||
ord('ſt'): 'ſt',
|
||||
ord('st'): 'st'
|
||||
}
|
||||
|
||||
|
||||
def _build_width_map():
|
||||
"""
|
||||
Build a translate mapping that replaces halfwidth and fullwidth forms
|
||||
with their standard-width forms.
|
||||
"""
|
||||
# Though it's not listed as a fullwidth character, we'll want to convert
|
||||
# U+3000 IDEOGRAPHIC SPACE to U+20 SPACE on the same principle, so start
|
||||
# with that in the dictionary.
|
||||
width_map = {0x3000: ' '}
|
||||
for i in range(0xff01, 0xfff0):
|
||||
char = unichr(i)
|
||||
alternate = unicodedata.normalize('NFKC', char)
|
||||
if alternate != char:
|
||||
width_map[i] = alternate
|
||||
return width_map
|
||||
WIDTH_MAP = _build_width_map()
|
||||
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
A command-line utility for fixing text found in a file.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import io
|
||||
import codecs
|
||||
from ftfy import fix_file, __version__
|
||||
from ftfy.compatibility import PYTHON2
|
||||
|
||||
|
||||
ENCODE_ERROR_TEXT_UNIX = """ftfy error:
|
||||
Unfortunately, this output stream does not support Unicode.
|
||||
|
||||
Your system locale may be very old or misconfigured. You should use a locale
|
||||
that supports UTF-8. One way to do this is to `export LANG=C.UTF-8`.
|
||||
"""
|
||||
|
||||
ENCODE_ERROR_TEXT_WINDOWS = """ftfy error:
|
||||
Unfortunately, this output stream does not support Unicode.
|
||||
|
||||
You might be trying to output to the Windows Command Prompt (cmd.exe), which
|
||||
does not fully support Unicode for historical reasons. In general, we recommend
|
||||
finding a way to run Python without using cmd.exe.
|
||||
|
||||
You can work around this problem by using the '-o filename' option in ftfy to
|
||||
output to a file instead.
|
||||
"""
|
||||
|
||||
DECODE_ERROR_TEXT = """ftfy error:
|
||||
This input couldn't be decoded as %r. We got the following error:
|
||||
|
||||
%s
|
||||
|
||||
ftfy works best when its input is in a known encoding. You can use `ftfy -g`
|
||||
to guess, if you're desperate. Otherwise, give the encoding name with the
|
||||
`-e` option, such as `ftfy -e latin-1`.
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Run ftfy as a command-line utility.
|
||||
"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ftfy (fixes text for you), version %s" % __version__
|
||||
)
|
||||
parser.add_argument('filename', default='-', nargs='?',
|
||||
help='The file whose Unicode is to be fixed. Defaults '
|
||||
'to -, meaning standard input.')
|
||||
parser.add_argument('-o', '--output', type=str, default='-',
|
||||
help='The file to output to. Defaults to -, meaning '
|
||||
'standard output.')
|
||||
parser.add_argument('-g', '--guess', action='store_true',
|
||||
help="Ask ftfy to guess the encoding of your input. "
|
||||
"This is risky. Overrides -e.")
|
||||
parser.add_argument('-e', '--encoding', type=str, default='utf-8',
|
||||
help='The encoding of the input. Defaults to UTF-8.')
|
||||
parser.add_argument('-n', '--normalization', type=str, default='NFC',
|
||||
help='The normalization of Unicode to apply. '
|
||||
'Defaults to NFC. Can be "none".')
|
||||
parser.add_argument('--preserve-entities', action='store_true',
|
||||
help="Leave HTML entities as they are. The default "
|
||||
"is to decode them, as long as no HTML tags "
|
||||
"have appeared in the file.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
encoding = args.encoding
|
||||
if args.guess:
|
||||
encoding = None
|
||||
|
||||
if args.filename == '-':
|
||||
# Get a standard input stream made of bytes, so we can decode it as
|
||||
# whatever encoding is necessary.
|
||||
if PYTHON2:
|
||||
file = sys.stdin
|
||||
else:
|
||||
file = sys.stdin.buffer
|
||||
else:
|
||||
file = open(args.filename, 'rb')
|
||||
|
||||
if args.output == '-':
|
||||
encode_output = PYTHON2
|
||||
outfile = sys.stdout
|
||||
else:
|
||||
encode_output = False
|
||||
outfile = io.open(args.output, 'w', encoding='utf-8')
|
||||
|
||||
normalization = args.normalization
|
||||
if normalization.lower() == 'none':
|
||||
normalization = None
|
||||
|
||||
if args.preserve_entities:
|
||||
fix_entities = False
|
||||
else:
|
||||
fix_entities = 'auto'
|
||||
|
||||
try:
|
||||
for line in fix_file(file, encoding=encoding,
|
||||
fix_entities=fix_entities,
|
||||
normalization=normalization):
|
||||
if encode_output:
|
||||
outfile.write(line.encode('utf-8'))
|
||||
else:
|
||||
try:
|
||||
outfile.write(line)
|
||||
except UnicodeEncodeError:
|
||||
if sys.platform == 'win32':
|
||||
sys.stderr.write(ENCODE_ERROR_TEXT_WINDOWS)
|
||||
else:
|
||||
sys.stderr.write(ENCODE_ERROR_TEXT_UNIX)
|
||||
sys.exit(1)
|
||||
except UnicodeDecodeError as err:
|
||||
sys.stderr.write(DECODE_ERROR_TEXT % (encoding, err))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,55 @@
|
||||
"""
|
||||
Makes some function names and behavior consistent between Python 2 and
|
||||
Python 3, and also between narrow and wide builds.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import sys
|
||||
import unicodedata
|
||||
|
||||
if sys.hexversion >= 0x03000000:
|
||||
unichr = chr
|
||||
xrange = range
|
||||
PYTHON2 = False
|
||||
else:
|
||||
unichr = unichr
|
||||
xrange = xrange
|
||||
PYTHON2 = True
|
||||
|
||||
PYTHON34_OR_LATER = (sys.hexversion >= 0x03040000)
|
||||
|
||||
|
||||
def _narrow_unichr_workaround(codepoint):
|
||||
"""
|
||||
A replacement for unichr() on narrow builds of Python. This will get
|
||||
us the narrow representation of an astral character, which will be
|
||||
a string of length two, containing two UTF-16 surrogates.
|
||||
"""
|
||||
escaped = b'\\U%08x' % codepoint
|
||||
return escaped.decode('unicode-escape')
|
||||
|
||||
|
||||
if sys.maxunicode < 0x10000:
|
||||
unichr = _narrow_unichr_workaround
|
||||
|
||||
|
||||
def bytes_to_ints(bytestring):
|
||||
"""
|
||||
No matter what version of Python this is, make a sequence of integers from
|
||||
a bytestring. On Python 3, this is easy, because a 'bytes' object _is_ a
|
||||
sequence of integers.
|
||||
"""
|
||||
if PYTHON2:
|
||||
return [ord(b) for b in bytestring]
|
||||
else:
|
||||
return bytestring
|
||||
|
||||
|
||||
def is_printable(char):
|
||||
"""
|
||||
str.isprintable() is new in Python 3. It's useful in `explain_unicode`, so
|
||||
let's make a crude approximation in Python 2.
|
||||
"""
|
||||
if PYTHON2:
|
||||
return not unicodedata.category(char).startswith('C')
|
||||
else:
|
||||
return char.isprintable()
|
||||
@@ -0,0 +1,664 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This module contains the individual fixes that the main fix_text function
|
||||
can perform.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import sys
|
||||
import codecs
|
||||
import warnings
|
||||
from ftfy.chardata import (possible_encoding, CHARMAP_ENCODINGS,
|
||||
CONTROL_CHARS, LIGATURES, WIDTH_MAP,
|
||||
PARTIAL_UTF8_PUNCT_RE, ALTERED_UTF8_RE,
|
||||
LOSSY_UTF8_RE, SINGLE_QUOTE_RE, DOUBLE_QUOTE_RE)
|
||||
from ftfy.badness import text_cost
|
||||
from ftfy.compatibility import unichr
|
||||
from html5lib.constants import entities
|
||||
|
||||
|
||||
BYTES_ERROR_TEXT = """Hey wait, this isn't Unicode.
|
||||
|
||||
ftfy is designed to fix problems that were introduced by handling Unicode
|
||||
incorrectly. It might be able to fix the bytes you just handed it, but the
|
||||
fact that you just gave a pile of bytes to a function that fixes text means
|
||||
that your code is *also* handling Unicode incorrectly.
|
||||
|
||||
ftfy takes Unicode text as input. You should take these bytes and decode
|
||||
them from the encoding you think they are in. If you're not sure what encoding
|
||||
they're in:
|
||||
|
||||
- First, try to find out. 'utf-8' is a good assumption.
|
||||
- If the encoding is simply unknowable, try running your bytes through
|
||||
ftfy.guess_bytes. As the name implies, this may not always be accurate.
|
||||
|
||||
If you're confused by this, please read the Python Unicode HOWTO:
|
||||
|
||||
http://docs.python.org/%d/howto/unicode.html
|
||||
""" % sys.version_info[0]
|
||||
|
||||
|
||||
def fix_encoding(text):
|
||||
r"""
|
||||
Fix text with incorrectly-decoded garbage ("mojibake") whenever possible.
|
||||
|
||||
This function looks for the evidence of mojibake, formulates a plan to fix
|
||||
it, and applies the plan. It determines whether it should replace nonsense
|
||||
sequences of single-byte characters that were really meant to be UTF-8
|
||||
characters, and if so, turns them into the correctly-encoded Unicode
|
||||
character that they were meant to represent.
|
||||
|
||||
The input to the function must be Unicode. If you don't have Unicode text,
|
||||
you're not using the right tool to solve your problem.
|
||||
|
||||
`fix_encoding` decodes text that looks like it was decoded incorrectly. It
|
||||
leaves alone text that doesn't.
|
||||
|
||||
>>> print(fix_encoding('único'))
|
||||
único
|
||||
|
||||
>>> print(fix_encoding('This text is fine already :þ'))
|
||||
This text is fine already :þ
|
||||
|
||||
Because these characters often come from Microsoft products, we allow
|
||||
for the possibility that we get not just Unicode characters 128-255, but
|
||||
also Windows's conflicting idea of what characters 128-160 are.
|
||||
|
||||
>>> print(fix_encoding('This — should be an em dash'))
|
||||
This — should be an em dash
|
||||
|
||||
We might have to deal with both Windows characters and raw control
|
||||
characters at the same time, especially when dealing with characters like
|
||||
0x81 that have no mapping in Windows. This is a string that Python's
|
||||
standard `.encode` and `.decode` methods cannot correct.
|
||||
|
||||
>>> print(fix_encoding('This text is sad .â\x81”.'))
|
||||
This text is sad .⁔.
|
||||
|
||||
However, it has safeguards against fixing sequences of letters and
|
||||
punctuation that can occur in valid text. In the following example,
|
||||
the last three characters are not replaced with a Korean character,
|
||||
even though they could be.
|
||||
|
||||
>>> print(fix_encoding('not such a fan of Charlotte Brontë…”'))
|
||||
not such a fan of Charlotte Brontë…”
|
||||
|
||||
This function can now recover some complex manglings of text, such as when
|
||||
UTF-8 mojibake has been normalized in a way that replaces U+A0 with a
|
||||
space:
|
||||
|
||||
>>> print(fix_encoding('The more you know 🌠'))
|
||||
The more you know 🌠
|
||||
|
||||
Cases of genuine ambiguity can sometimes be addressed by finding other
|
||||
characters that are not double-encoded, and expecting the encoding to
|
||||
be consistent:
|
||||
|
||||
>>> print(fix_encoding('AHÅ™, the new sofa from IKEA®'))
|
||||
AHÅ™, the new sofa from IKEA®
|
||||
|
||||
Finally, we handle the case where the text is in a single-byte encoding
|
||||
that was intended as Windows-1252 all along but read as Latin-1:
|
||||
|
||||
>>> print(fix_encoding('This text was never UTF-8 at all\x85'))
|
||||
This text was never UTF-8 at all…
|
||||
|
||||
The best version of the text is found using
|
||||
:func:`ftfy.badness.text_cost`.
|
||||
"""
|
||||
text, _ = fix_encoding_and_explain(text)
|
||||
return text
|
||||
|
||||
|
||||
def fix_text_encoding(text):
|
||||
"""
|
||||
A deprecated name for :func:`ftfy.fixes.fix_encoding`.
|
||||
"""
|
||||
warnings.warn('fix_text_encoding is now known as fix_encoding',
|
||||
DeprecationWarning)
|
||||
return fix_encoding(text)
|
||||
|
||||
|
||||
# When we support discovering mojibake in more encodings, we run the risk
|
||||
# of more false positives. We can mitigate false positives by assigning an
|
||||
# additional cost to using encodings that are rarer than Windows-1252, so
|
||||
# that these encodings will only be used if they fix multiple problems.
|
||||
ENCODING_COSTS = {
|
||||
'macroman': 2,
|
||||
'iso-8859-2': 2,
|
||||
'sloppy-windows-1250': 2,
|
||||
'sloppy-windows-1251': 3,
|
||||
'cp437': 3,
|
||||
}
|
||||
|
||||
|
||||
def fix_encoding_and_explain(text):
|
||||
"""
|
||||
Re-decodes text that has been decoded incorrectly, and also return a
|
||||
"plan" indicating all the steps required to fix it.
|
||||
|
||||
The resulting plan could be used with :func:`ftfy.fixes.apply_plan`
|
||||
to fix additional strings that are broken in the same way.
|
||||
"""
|
||||
best_version = text
|
||||
best_cost = text_cost(text)
|
||||
best_plan = []
|
||||
plan_so_far = []
|
||||
while True:
|
||||
prevtext = text
|
||||
text, plan = fix_one_step_and_explain(text)
|
||||
plan_so_far.extend(plan)
|
||||
cost = text_cost(text)
|
||||
for _, _, step_cost in plan_so_far:
|
||||
cost += step_cost
|
||||
|
||||
if cost < best_cost:
|
||||
best_cost = cost
|
||||
best_version = text
|
||||
best_plan = list(plan_so_far)
|
||||
if text == prevtext:
|
||||
return best_version, best_plan
|
||||
|
||||
|
||||
def fix_one_step_and_explain(text):
|
||||
"""
|
||||
Performs a single step of re-decoding text that's been decoded incorrectly.
|
||||
|
||||
Returns the decoded text, plus a "plan" for how to reproduce what it did.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(BYTES_ERROR_TEXT)
|
||||
if len(text) == 0:
|
||||
return text, []
|
||||
|
||||
# The first plan is to return ASCII text unchanged.
|
||||
if possible_encoding(text, 'ascii'):
|
||||
return text, []
|
||||
|
||||
# As we go through the next step, remember the possible encodings
|
||||
# that we encounter but don't successfully fix yet. We may need them
|
||||
# later.
|
||||
possible_1byte_encodings = []
|
||||
|
||||
# Suppose the text was supposed to be UTF-8, but it was decoded using
|
||||
# a single-byte encoding instead. When these cases can be fixed, they
|
||||
# are usually the correct thing to do, so try them next.
|
||||
for encoding in CHARMAP_ENCODINGS:
|
||||
if possible_encoding(text, encoding):
|
||||
encoded_bytes = text.encode(encoding)
|
||||
encode_step = ('encode', encoding, ENCODING_COSTS.get(encoding, 0))
|
||||
transcode_steps = []
|
||||
|
||||
# Now, find out if it's UTF-8 (or close enough). Otherwise,
|
||||
# remember the encoding for later.
|
||||
try:
|
||||
decoding = 'utf-8'
|
||||
# Check encoded_bytes for sequences that would be UTF-8,
|
||||
# except they have b' ' where b'\xa0' would belong.
|
||||
if ALTERED_UTF8_RE.search(encoded_bytes):
|
||||
encoded_bytes = restore_byte_a0(encoded_bytes)
|
||||
cost = encoded_bytes.count(b'\xa0') * 2
|
||||
transcode_steps.append(('transcode', 'restore_byte_a0', cost))
|
||||
|
||||
# Check for the byte 0x1a, which indicates where one of our
|
||||
# 'sloppy' codecs found a replacement character.
|
||||
if encoding.startswith('sloppy') and b'\x1a' in encoded_bytes:
|
||||
encoded_bytes = replace_lossy_sequences(encoded_bytes)
|
||||
transcode_steps.append(('transcode', 'replace_lossy_sequences', 0))
|
||||
|
||||
if b'\xed' in encoded_bytes or b'\xc0' in encoded_bytes:
|
||||
decoding = 'utf-8-variants'
|
||||
|
||||
decode_step = ('decode', decoding, 0)
|
||||
steps = [encode_step] + transcode_steps + [decode_step]
|
||||
fixed = encoded_bytes.decode(decoding)
|
||||
return fixed, steps
|
||||
|
||||
except UnicodeDecodeError:
|
||||
possible_1byte_encodings.append(encoding)
|
||||
|
||||
# Look for a-hat-euro sequences that remain, and fix them in isolation.
|
||||
if PARTIAL_UTF8_PUNCT_RE.search(text):
|
||||
steps = [('transcode', 'fix_partial_utf8_punct_in_1252', 1)]
|
||||
fixed = fix_partial_utf8_punct_in_1252(text)
|
||||
return fixed, steps
|
||||
|
||||
# The next most likely case is that this is Latin-1 that was intended to
|
||||
# be read as Windows-1252, because those two encodings in particular are
|
||||
# easily confused.
|
||||
if 'latin-1' in possible_1byte_encodings:
|
||||
if 'windows-1252' in possible_1byte_encodings:
|
||||
# This text is in the intersection of Latin-1 and
|
||||
# Windows-1252, so it's probably legit.
|
||||
return text, []
|
||||
else:
|
||||
# Otherwise, it means we have characters that are in Latin-1 but
|
||||
# not in Windows-1252. Those are C1 control characters. Nobody
|
||||
# wants those. Assume they were meant to be Windows-1252. Don't
|
||||
# use the sloppy codec, because bad Windows-1252 characters are
|
||||
# a bad sign.
|
||||
encoded = text.encode('latin-1')
|
||||
try:
|
||||
fixed = encoded.decode('windows-1252')
|
||||
steps = []
|
||||
if fixed != text:
|
||||
steps = [('encode', 'latin-1', 0),
|
||||
('decode', 'windows-1252', 1)]
|
||||
return fixed, steps
|
||||
except UnicodeDecodeError:
|
||||
# This text contained characters that don't even make sense
|
||||
# if you assume they were supposed to be Windows-1252. In
|
||||
# that case, let's not assume anything.
|
||||
pass
|
||||
|
||||
# The cases that remain are mixups between two different single-byte
|
||||
# encodings, and not the common case of Latin-1 vs. Windows-1252.
|
||||
#
|
||||
# These cases may be unsolvable without adding false positives, though
|
||||
# I have vague ideas about how to optionally address them in the future.
|
||||
|
||||
# Return the text unchanged; the plan is empty.
|
||||
return text, []
|
||||
|
||||
|
||||
def apply_plan(text, plan):
|
||||
"""
|
||||
Apply a plan for fixing the encoding of text.
|
||||
|
||||
The plan is a list of tuples of the form (operation, encoding, cost):
|
||||
|
||||
- `operation` is 'encode' if it turns a string into bytes, 'decode' if it
|
||||
turns bytes into a string, and 'transcode' if it keeps the type the same.
|
||||
- `encoding` is the name of the encoding to use, such as 'utf-8' or
|
||||
'latin-1', or the function name in the case of 'transcode'.
|
||||
- The `cost` does not affect how the plan itself works. It's used by other
|
||||
users of plans, namely `fix_encoding_and_explain`, which has to decide
|
||||
*which* plan to use.
|
||||
"""
|
||||
obj = text
|
||||
for operation, encoding, _ in plan:
|
||||
if operation == 'encode':
|
||||
obj = obj.encode(encoding)
|
||||
elif operation == 'decode':
|
||||
obj = obj.decode(encoding)
|
||||
elif operation == 'transcode':
|
||||
if encoding in TRANSCODERS:
|
||||
obj = TRANSCODERS[encoding](obj)
|
||||
else:
|
||||
raise ValueError("Unknown transcode operation: %s" % encoding)
|
||||
else:
|
||||
raise ValueError("Unknown plan step: %s" % operation)
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
HTML_ENTITY_RE = re.compile(r"&#?\w{0,8};")
|
||||
|
||||
|
||||
def unescape_html(text):
|
||||
"""
|
||||
Decode all three types of HTML entities/character references.
|
||||
|
||||
Code by Fredrik Lundh of effbot.org. Rob Speer made a slight change
|
||||
to it for efficiency: it won't match entities longer than 8 characters,
|
||||
because there are no valid entities like that.
|
||||
|
||||
>>> print(unescape_html('<tag>'))
|
||||
<tag>
|
||||
"""
|
||||
def fixup(match):
|
||||
"""
|
||||
Replace one matched HTML entity with the character it represents,
|
||||
if possible.
|
||||
"""
|
||||
text = match.group(0)
|
||||
if text[:2] == "&#":
|
||||
# character reference
|
||||
try:
|
||||
if text[:3] == "&#x":
|
||||
codept = int(text[3:-1], 16)
|
||||
else:
|
||||
codept = int(text[2:-1])
|
||||
if 0x80 <= codept < 0xa0:
|
||||
# Decode this range of characters as Windows-1252, as Web
|
||||
# browsers do in practice.
|
||||
return unichr(codept).encode('latin-1').decode('sloppy-windows-1252')
|
||||
else:
|
||||
return unichr(codept)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
# named entity
|
||||
try:
|
||||
text = entities[text[1:]]
|
||||
except KeyError:
|
||||
pass
|
||||
return text # leave as is
|
||||
return HTML_ENTITY_RE.sub(fixup, text)
|
||||
|
||||
|
||||
ANSI_RE = re.compile('\033\\[((?:\\d|;)*)([a-zA-Z])')
|
||||
|
||||
def remove_terminal_escapes(text):
|
||||
r"""
|
||||
Strip out "ANSI" terminal escape sequences, such as those that produce
|
||||
colored text on Unix.
|
||||
|
||||
>>> print(remove_terminal_escapes(
|
||||
... "\033[36;44mI'm blue, da ba dee da ba doo...\033[0m"
|
||||
... ))
|
||||
I'm blue, da ba dee da ba doo...
|
||||
"""
|
||||
return ANSI_RE.sub('', text)
|
||||
|
||||
|
||||
def uncurl_quotes(text):
|
||||
r"""
|
||||
Replace curly quotation marks with straight equivalents.
|
||||
|
||||
>>> print(uncurl_quotes('\u201chere\u2019s a test\u201d'))
|
||||
"here's a test"
|
||||
"""
|
||||
return SINGLE_QUOTE_RE.sub("'", DOUBLE_QUOTE_RE.sub('"', text))
|
||||
|
||||
|
||||
def fix_latin_ligatures(text):
|
||||
"""
|
||||
Replace single-character ligatures of Latin letters, such as 'fi', with the
|
||||
characters that they contain, as in 'fi'. Latin ligatures are usually not
|
||||
intended in text strings (though they're lovely in *rendered* text). If
|
||||
you have such a ligature in your string, it is probably a result of a
|
||||
copy-and-paste glitch.
|
||||
|
||||
We leave ligatures in other scripts alone to be safe. They may be intended,
|
||||
and removing them may lose information. If you want to take apart nearly
|
||||
all ligatures, use NFKC normalization.
|
||||
|
||||
>>> print(fix_latin_ligatures("fluffiest"))
|
||||
fluffiest
|
||||
"""
|
||||
return text.translate(LIGATURES)
|
||||
|
||||
|
||||
def fix_character_width(text):
|
||||
"""
|
||||
The ASCII characters, katakana, and Hangul characters have alternate
|
||||
"halfwidth" or "fullwidth" forms that help text line up in a grid.
|
||||
|
||||
If you don't need these width properties, you probably want to replace
|
||||
these characters with their standard form, which is what this function
|
||||
does.
|
||||
|
||||
Note that this replaces the ideographic space, U+3000, with the ASCII
|
||||
space, U+20.
|
||||
|
||||
>>> print(fix_character_width("LOUD NOISES"))
|
||||
LOUD NOISES
|
||||
>>> print(fix_character_width("Uターン")) # this means "U-turn"
|
||||
Uターン
|
||||
"""
|
||||
return text.translate(WIDTH_MAP)
|
||||
|
||||
|
||||
def fix_line_breaks(text):
|
||||
r"""
|
||||
Convert all line breaks to Unix style.
|
||||
|
||||
This will convert the following sequences into the standard \\n
|
||||
line break:
|
||||
|
||||
- CRLF (\\r\\n), used on Windows and in some communication
|
||||
protocols
|
||||
- CR (\\r), once used on Mac OS Classic, and now kept alive
|
||||
by misguided software such as Microsoft Office for Mac
|
||||
- LINE SEPARATOR (\\u2028) and PARAGRAPH SEPARATOR (\\u2029),
|
||||
defined by Unicode and used to sow confusion and discord
|
||||
- NEXT LINE (\\x85), a C1 control character that is certainly
|
||||
not what you meant
|
||||
|
||||
The NEXT LINE character is a bit of an odd case, because it
|
||||
usually won't show up if `fix_encoding` is also being run.
|
||||
\\x85 is very common mojibake for \\u2026, HORIZONTAL ELLIPSIS.
|
||||
|
||||
>>> print(fix_line_breaks(
|
||||
... "This string is made of two things:\u2029"
|
||||
... "1. Unicode\u2028"
|
||||
... "2. Spite"
|
||||
... ))
|
||||
This string is made of two things:
|
||||
1. Unicode
|
||||
2. Spite
|
||||
|
||||
For further testing and examples, let's define a function to make sure
|
||||
we can see the control characters in their escaped form:
|
||||
|
||||
>>> def eprint(text):
|
||||
... print(text.encode('unicode-escape').decode('ascii'))
|
||||
|
||||
>>> eprint(fix_line_breaks("Content-type: text/plain\r\n\r\nHi."))
|
||||
Content-type: text/plain\n\nHi.
|
||||
|
||||
>>> eprint(fix_line_breaks("This is how Microsoft \r trolls Mac users"))
|
||||
This is how Microsoft \n trolls Mac users
|
||||
|
||||
>>> eprint(fix_line_breaks("What is this \x85 I don't even"))
|
||||
What is this \n I don't even
|
||||
"""
|
||||
return text.replace('\r\n', '\n').replace('\r', '\n')\
|
||||
.replace('\u2028', '\n').replace('\u2029', '\n')\
|
||||
.replace('\u0085', '\n')
|
||||
|
||||
|
||||
SURROGATE_RE = re.compile('[\ud800-\udfff]')
|
||||
SURROGATE_PAIR_RE = re.compile('[\ud800-\udbff][\udc00-\udfff]')
|
||||
|
||||
|
||||
def convert_surrogate_pair(match):
|
||||
"""
|
||||
Convert a surrogate pair to the single codepoint it represents.
|
||||
|
||||
This implements the formula described at:
|
||||
http://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates
|
||||
"""
|
||||
pair = match.group(0)
|
||||
codept = 0x10000 + (ord(pair[0]) - 0xd800) * 0x400 + (ord(pair[1]) - 0xdc00)
|
||||
return unichr(codept)
|
||||
|
||||
|
||||
def fix_surrogates(text):
|
||||
"""
|
||||
Replace 16-bit surrogate codepoints with the characters they represent
|
||||
(when properly paired), or with \ufffd otherwise.
|
||||
|
||||
>>> high_surrogate = unichr(0xd83d)
|
||||
>>> low_surrogate = unichr(0xdca9)
|
||||
>>> print(fix_surrogates(high_surrogate + low_surrogate))
|
||||
💩
|
||||
>>> print(fix_surrogates(low_surrogate + high_surrogate))
|
||||
��
|
||||
|
||||
The above doctest had to be very carefully written, because even putting
|
||||
the Unicode escapes of the surrogates in the docstring was causing
|
||||
various tools to fail, which I think just goes to show why this fixer is
|
||||
necessary.
|
||||
"""
|
||||
if SURROGATE_RE.search(text):
|
||||
text = SURROGATE_PAIR_RE.sub(convert_surrogate_pair, text)
|
||||
text = SURROGATE_RE.sub('\ufffd', text)
|
||||
return text
|
||||
|
||||
|
||||
def remove_control_chars(text):
|
||||
"""
|
||||
Remove various control characters that you probably didn't intend to be in
|
||||
your text. Many of these characters appear in the table of "Characters not
|
||||
suitable for use with markup" at
|
||||
http://www.unicode.org/reports/tr20/tr20-9.html.
|
||||
|
||||
This includes:
|
||||
|
||||
- ASCII control characters, except for the important whitespace characters
|
||||
(U+00 to U+08, U+0B, U+0E to U+1F, U+7F)
|
||||
- Deprecated Arabic control characters (U+206A to U+206F)
|
||||
- Interlinear annotation characters (U+FFF9 to U+FFFB)
|
||||
- The Object Replacement Character (U+FFFC)
|
||||
- The byte order mark (U+FEFF)
|
||||
- Musical notation control characters (U+1D173 to U+1D17A)
|
||||
- Tag characters (U+E0000 to U+E007F)
|
||||
|
||||
However, these similar characters are left alone:
|
||||
|
||||
- Control characters that produce whitespace (U+09, U+0A, U+0C, U+0D,
|
||||
U+2028, and U+2029)
|
||||
- C1 control characters (U+80 to U+9F) -- even though they are basically
|
||||
never used intentionally, they are important clues about what mojibake
|
||||
has happened
|
||||
- Control characters that affect glyph rendering, such as joiners and
|
||||
right-to-left marks (U+200C to U+200F, U+202A to U+202E)
|
||||
"""
|
||||
return text.translate(CONTROL_CHARS)
|
||||
|
||||
|
||||
def remove_bom(text):
|
||||
r"""
|
||||
Remove a byte-order mark that was accidentally decoded as if it were part
|
||||
of the text.
|
||||
|
||||
>>> print(remove_bom("\ufeffWhere do you want to go today?"))
|
||||
Where do you want to go today?
|
||||
"""
|
||||
return text.lstrip(unichr(0xfeff))
|
||||
|
||||
|
||||
# Define a regex to match valid escape sequences in Python string literals.
|
||||
ESCAPE_SEQUENCE_RE = re.compile(r'''
|
||||
( \\U........ # 8-digit hex escapes
|
||||
| \\u.... # 4-digit hex escapes
|
||||
| \\x.. # 2-digit hex escapes
|
||||
| \\[0-7]{1,3} # Octal escapes
|
||||
| \\N\{[^}]+\} # Unicode characters by name
|
||||
| \\[\\'"abfnrtv] # Single-character escapes
|
||||
)''', re.UNICODE | re.VERBOSE)
|
||||
|
||||
|
||||
def decode_escapes(text):
|
||||
r"""
|
||||
Decode backslashed escape sequences, including \\x, \\u, and \\U character
|
||||
references, even in the presence of other Unicode.
|
||||
|
||||
This is what Python's "string-escape" and "unicode-escape" codecs were
|
||||
meant to do, but in contrast, this actually works. It will decode the
|
||||
string exactly the same way that the Python interpreter decodes its string
|
||||
literals.
|
||||
|
||||
>>> factoid = '\\u20a1 is the currency symbol for the colón.'
|
||||
>>> print(factoid[1:])
|
||||
u20a1 is the currency symbol for the colón.
|
||||
>>> print(decode_escapes(factoid))
|
||||
₡ is the currency symbol for the colón.
|
||||
|
||||
Even though Python itself can read string literals with a combination of
|
||||
escapes and literal Unicode -- you're looking at one right now -- the
|
||||
"unicode-escape" codec doesn't work on literal Unicode. (See
|
||||
http://stackoverflow.com/a/24519338/773754 for more details.)
|
||||
|
||||
Instead, this function searches for just the parts of a string that
|
||||
represent escape sequences, and decodes them, leaving the rest alone. All
|
||||
valid escape sequences are made of ASCII characters, and this allows
|
||||
"unicode-escape" to work correctly.
|
||||
|
||||
This fix cannot be automatically applied by the `ftfy.fix_text` function,
|
||||
because escaped text is not necessarily a mistake, and there is no way
|
||||
to distinguish text that's supposed to be escaped from text that isn't.
|
||||
"""
|
||||
def decode_match(match):
|
||||
"Given a regex match, decode the escape sequence it contains."
|
||||
return codecs.decode(match.group(0), 'unicode-escape')
|
||||
|
||||
return ESCAPE_SEQUENCE_RE.sub(decode_match, text)
|
||||
|
||||
|
||||
def restore_byte_a0(byts):
|
||||
"""
|
||||
Some mojibake has been additionally altered by a process that said "hmm,
|
||||
byte A0, that's basically a space!" and replaced it with an ASCII space.
|
||||
When the A0 is part of a sequence that we intend to decode as UTF-8,
|
||||
changing byte A0 to 20 would make it fail to decode.
|
||||
|
||||
This process finds sequences that would convincingly decode as UTF-8 if
|
||||
byte 20 were changed to A0, and puts back the A0. For the purpose of
|
||||
deciding whether this is a good idea, this step gets a cost of twice
|
||||
the number of bytes that are changed.
|
||||
|
||||
This is used as a step within `fix_encoding`.
|
||||
"""
|
||||
def replacement(match):
|
||||
"The function to apply when this regex matches."
|
||||
return match.group(0).replace(b'\x20', b'\xa0')
|
||||
|
||||
return ALTERED_UTF8_RE.sub(replacement, byts)
|
||||
|
||||
|
||||
def replace_lossy_sequences(byts):
|
||||
"""
|
||||
This function identifies sequences where information has been lost in
|
||||
a "sloppy" codec, indicated by byte 1A, and if they would otherwise look
|
||||
like a UTF-8 sequence, it replaces them with the UTF-8 sequence for U+FFFD.
|
||||
|
||||
A further explanation:
|
||||
|
||||
ftfy can now fix text in a few cases that it would previously fix
|
||||
incompletely, because of the fact that it can't successfully apply the fix
|
||||
to the entire string. A very common case of this is when characters have
|
||||
been erroneously decoded as windows-1252, but instead of the "sloppy"
|
||||
windows-1252 that passes through unassigned bytes, the unassigned bytes get
|
||||
turned into U+FFFD (�), so we can't tell what they were.
|
||||
|
||||
This most commonly happens with curly quotation marks that appear
|
||||
``“ like this �``.
|
||||
|
||||
We can do better by building on ftfy's "sloppy codecs" to let them handle
|
||||
less-sloppy but more-lossy text. When they encounter the character ``�``,
|
||||
instead of refusing to encode it, they encode it as byte 1A -- an
|
||||
ASCII control code called SUBSTITUTE that once was meant for about the same
|
||||
purpose. We can then apply a fixer that looks for UTF-8 sequences where
|
||||
some continuation bytes have been replaced by byte 1A, and decode the whole
|
||||
sequence as �; if that doesn't work, it'll just turn the byte back into �
|
||||
itself.
|
||||
|
||||
As a result, the above text ``“ like this �`` will decode as
|
||||
``“ like this �``.
|
||||
|
||||
If U+1A was actually in the original string, then the sloppy codecs will
|
||||
not be used, and this function will not be run, so your weird control
|
||||
character will be left alone but wacky fixes like this won't be possible.
|
||||
|
||||
This is used as a step within `fix_encoding`.
|
||||
"""
|
||||
return LOSSY_UTF8_RE.sub('\ufffd'.encode('utf-8'), byts)
|
||||
|
||||
|
||||
def fix_partial_utf8_punct_in_1252(text):
|
||||
"""
|
||||
Fix particular characters that seem to be found in the wild encoded in
|
||||
UTF-8 and decoded in Latin-1 or Windows-1252, even when this fix can't be
|
||||
consistently applied.
|
||||
|
||||
For this function, we assume the text has been decoded in Windows-1252.
|
||||
If it was decoded in Latin-1, we'll call this right after it goes through
|
||||
the Latin-1-to-Windows-1252 fixer.
|
||||
|
||||
This is used as a step within `fix_encoding`.
|
||||
"""
|
||||
def replacement(match):
|
||||
"The function to apply when this regex matches."
|
||||
return match.group(0).encode('sloppy-windows-1252').decode('utf-8')
|
||||
return PARTIAL_UTF8_PUNCT_RE.sub(replacement, text)
|
||||
|
||||
|
||||
TRANSCODERS = {
|
||||
'restore_byte_a0': restore_byte_a0,
|
||||
'replace_lossy_sequences': replace_lossy_sequences,
|
||||
'fix_partial_utf8_punct_in_1252': fix_partial_utf8_punct_in_1252
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module provides functions for justifying Unicode text in a monospaced
|
||||
display such as a terminal.
|
||||
|
||||
We used to have our own implementation here, but now we mostly rely on
|
||||
the 'wcwidth' library.
|
||||
"""
|
||||
from __future__ import unicode_literals, division
|
||||
from unicodedata import normalize
|
||||
from wcwidth import wcwidth, wcswidth
|
||||
|
||||
|
||||
def character_width(char):
|
||||
r"""
|
||||
Determine the width that a character is likely to be displayed as in
|
||||
a monospaced terminal. The width for a printable character will
|
||||
always be 0, 1, or 2.
|
||||
|
||||
Nonprintable or control characters will return -1, a convention that comes
|
||||
from wcwidth.
|
||||
|
||||
>>> character_width('車')
|
||||
2
|
||||
>>> character_width('A')
|
||||
1
|
||||
>>> character_width('\N{ZERO WIDTH JOINER}')
|
||||
0
|
||||
>>> character_width('\n')
|
||||
-1
|
||||
"""
|
||||
return wcwidth(char)
|
||||
|
||||
|
||||
def monospaced_width(text):
|
||||
"""
|
||||
Return the number of character cells that this string is likely to occupy
|
||||
when displayed in a monospaced, modern, Unicode-aware terminal emulator.
|
||||
We refer to this as the "display width" of the string.
|
||||
|
||||
This can be useful for formatting text that may contain non-spacing
|
||||
characters, or CJK characters that take up two character cells.
|
||||
|
||||
Returns -1 if the string contains a non-printable or control character.
|
||||
|
||||
>>> monospaced_width('ちゃぶ台返し')
|
||||
12
|
||||
>>> len('ちゃぶ台返し')
|
||||
6
|
||||
>>> monospaced_width('owl\N{SOFT HYPHEN}flavored')
|
||||
12
|
||||
>>> monospaced_width('example\x80')
|
||||
-1
|
||||
|
||||
# The Korean word 'ibnida' can be written with 3 characters or 7 jamo.
|
||||
# Either way, it *looks* the same and takes up 6 character cells.
|
||||
>>> monospaced_width('입니다')
|
||||
6
|
||||
>>> monospaced_width('\u110b\u1175\u11b8\u1102\u1175\u1103\u1161')
|
||||
6
|
||||
"""
|
||||
# NFC-normalize the text first, so that we don't need special cases for
|
||||
# Hangul jamo.
|
||||
return wcswidth(normalize('NFC', text))
|
||||
|
||||
|
||||
def display_ljust(text, width, fillchar=' '):
|
||||
"""
|
||||
Return `text` left-justified in a Unicode string whose display width,
|
||||
in a monospaced terminal, should be at least `width` character cells.
|
||||
The rest of the string will be padded with `fillchar`, which must be
|
||||
a width-1 character.
|
||||
|
||||
"Left" here means toward the beginning of the string, which may actually
|
||||
appear on the right in an RTL context. This is similar to the use of the
|
||||
word "left" in "left parenthesis".
|
||||
|
||||
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||||
>>> for line in lines:
|
||||
... print(display_ljust(line, 20, '▒'))
|
||||
Table flip▒▒▒▒▒▒▒▒▒▒
|
||||
(╯°□°)╯︵ ┻━┻▒▒▒▒▒▒▒
|
||||
ちゃぶ台返し▒▒▒▒▒▒▒▒
|
||||
|
||||
This example, and the similar ones that follow, should come out justified
|
||||
correctly when viewed in a monospaced terminal. It will probably not look
|
||||
correct if you're viewing this code or documentation in a Web browser.
|
||||
"""
|
||||
if character_width(fillchar) != 1:
|
||||
raise ValueError("The padding character must have display width 1")
|
||||
|
||||
text_width = monospaced_width(text)
|
||||
if text_width == -1:
|
||||
# There's a control character here, so just don't add padding
|
||||
return text
|
||||
|
||||
padding = max(0, width - text_width)
|
||||
return text + fillchar * padding
|
||||
|
||||
|
||||
def display_rjust(text, width, fillchar=' '):
|
||||
"""
|
||||
Return `text` right-justified in a Unicode string whose display width,
|
||||
in a monospaced terminal, should be at least `width` character cells.
|
||||
The rest of the string will be padded with `fillchar`, which must be
|
||||
a width-1 character.
|
||||
|
||||
"Right" here means toward the end of the string, which may actually be on
|
||||
the left in an RTL context. This is similar to the use of the word "right"
|
||||
in "right parenthesis".
|
||||
|
||||
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||||
>>> for line in lines:
|
||||
... print(display_rjust(line, 20, '▒'))
|
||||
▒▒▒▒▒▒▒▒▒▒Table flip
|
||||
▒▒▒▒▒▒▒(╯°□°)╯︵ ┻━┻
|
||||
▒▒▒▒▒▒▒▒ちゃぶ台返し
|
||||
"""
|
||||
if character_width(fillchar) != 1:
|
||||
raise ValueError("The padding character must have display width 1")
|
||||
|
||||
text_width = monospaced_width(text)
|
||||
if text_width == -1:
|
||||
return text
|
||||
|
||||
padding = max(0, width - text_width)
|
||||
return fillchar * padding + text
|
||||
|
||||
|
||||
def display_center(text, width, fillchar=' '):
|
||||
"""
|
||||
Return `text` centered in a Unicode string whose display width, in a
|
||||
monospaced terminal, should be at least `width` character cells. The rest
|
||||
of the string will be padded with `fillchar`, which must be a width-1
|
||||
character.
|
||||
|
||||
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||||
>>> for line in lines:
|
||||
... print(display_center(line, 20, '▒'))
|
||||
▒▒▒▒▒Table flip▒▒▒▒▒
|
||||
▒▒▒(╯°□°)╯︵ ┻━┻▒▒▒▒
|
||||
▒▒▒▒ちゃぶ台返し▒▒▒▒
|
||||
"""
|
||||
if character_width(fillchar) != 1:
|
||||
raise ValueError("The padding character must have display width 1")
|
||||
|
||||
text_width = monospaced_width(text)
|
||||
if text_width == -1:
|
||||
return text
|
||||
|
||||
padding = max(0, width - text_width)
|
||||
left_padding = padding // 2
|
||||
right_padding = padding - left_padding
|
||||
return fillchar * left_padding + text + fillchar * right_padding
|
||||
@@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This file defines a general method for evaluating ftfy using data that arrives
|
||||
in a stream. A concrete implementation of it is found in `twitter_tester.py`.
|
||||
"""
|
||||
from __future__ import print_function, unicode_literals
|
||||
from ftfy import fix_text
|
||||
from ftfy.fixes import fix_encoding, unescape_html
|
||||
from ftfy.chardata import possible_encoding
|
||||
|
||||
|
||||
class StreamTester:
|
||||
"""
|
||||
Take in a sequence of texts, and show the ones that will be changed by
|
||||
ftfy. This will also periodically show updates, such as the proportion of
|
||||
texts that changed.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.num_fixed = 0
|
||||
self.count = 0
|
||||
|
||||
def check_ftfy(self, text, encoding_only=True):
|
||||
"""
|
||||
Given a single text input, check whether `ftfy.fix_text_encoding`
|
||||
would change it. If so, display the change.
|
||||
"""
|
||||
self.count += 1
|
||||
text = unescape_html(text)
|
||||
if not possible_encoding(text, 'ascii'):
|
||||
if encoding_only:
|
||||
fixed = fix_encoding(text)
|
||||
else:
|
||||
fixed = fix_text(text, uncurl_quotes=False, fix_character_width=False)
|
||||
if text != fixed:
|
||||
# possibly filter common bots before printing
|
||||
print('\nText:\t{text!r}\nFixed:\t{fixed!r}\n'.format(
|
||||
text=text, fixed=fixed
|
||||
))
|
||||
self.num_fixed += 1
|
||||
elif 'â€' in text or '\x80' in text:
|
||||
print('\nNot fixed:\t{text!r}'.format(text=text))
|
||||
|
||||
# Print status updates once in a while
|
||||
if self.count % 100 == 0:
|
||||
print('.', end='', flush=True)
|
||||
if self.count % 10000 == 0:
|
||||
print('\n%d/%d fixed' % (self.num_fixed, self.count))
|
||||
@@ -0,0 +1,72 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
Do what is necessary to authenticate this tester as a Twitter "app", using
|
||||
somebody's Twitter account.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import os
|
||||
|
||||
|
||||
AUTH_TOKEN_PATH = os.path.expanduser('~/.cache/oauth/twitter_ftfy.auth')
|
||||
|
||||
def get_auth():
|
||||
"""
|
||||
Twitter has some bizarre requirements about how to authorize an "app" to
|
||||
use its API.
|
||||
|
||||
The user of the app has to log in to get a secret token. That's fine. But
|
||||
the app itself has its own "consumer secret" token. The app has to know it,
|
||||
and the user of the app has to not know it.
|
||||
|
||||
This is, of course, impossible. It's equivalent to DRM. Your computer can't
|
||||
*really* make use of secret information while hiding the same information
|
||||
from you.
|
||||
|
||||
The threat appears to be that, if you have this super-sekrit token, you can
|
||||
impersonate the app while doing something different. Well, of course you
|
||||
can do that, because you *have the source code* and you can change it to do
|
||||
what you want. You still have to log in as a particular user who has a
|
||||
token that's actually secret, you know.
|
||||
|
||||
Even developers of closed-source applications that use the Twitter API are
|
||||
unsure what to do, for good reason. These "secrets" are not secret in any
|
||||
cryptographic sense. A bit of Googling shows that the secret tokens for
|
||||
every popular Twitter app are already posted on the Web.
|
||||
|
||||
Twitter wants us to pretend this string can be kept secret, and hide this
|
||||
secret behind a fig leaf like everybody else does. So that's what we've
|
||||
done.
|
||||
"""
|
||||
|
||||
from twitter.oauth import OAuth
|
||||
from twitter import oauth_dance, read_token_file
|
||||
|
||||
def unhide(secret):
|
||||
"""
|
||||
Do something mysterious and exactly as secure as every other Twitter
|
||||
app.
|
||||
"""
|
||||
return ''.join([chr(ord(c) - 0x2800) for c in secret])
|
||||
|
||||
fig_leaf = '⠴⡹⠹⡩⠶⠴⡶⡅⡂⡩⡅⠳⡏⡉⡈⠰⠰⡹⡥⡶⡈⡐⡍⡂⡫⡍⡗⡬⡒⡧⡶⡣⡰⡄⡧⡸⡑⡣⠵⡓⠶⠴⡁'
|
||||
consumer_key = 'OFhyNd2Zt4Ba6gJGJXfbsw'
|
||||
|
||||
if os.path.exists(AUTH_TOKEN_PATH):
|
||||
token, token_secret = read_token_file(AUTH_TOKEN_PATH)
|
||||
else:
|
||||
authdir = os.path.dirname(AUTH_TOKEN_PATH)
|
||||
if not os.path.exists(authdir):
|
||||
os.makedirs(authdir)
|
||||
token, token_secret = oauth_dance(
|
||||
app_name='ftfy-tester',
|
||||
consumer_key=consumer_key,
|
||||
consumer_secret=unhide(fig_leaf),
|
||||
token_filename=AUTH_TOKEN_PATH
|
||||
)
|
||||
|
||||
return OAuth(
|
||||
token=token,
|
||||
token_secret=token_secret,
|
||||
consumer_key=consumer_key,
|
||||
consumer_secret=unhide(fig_leaf)
|
||||
)
|
||||
@@ -0,0 +1,88 @@
|
||||
"""
|
||||
Implements a StreamTester that runs over Twitter data. See the class
|
||||
docstring.
|
||||
|
||||
This module is written for Python 3 only. The __future__ imports you see here
|
||||
are just to let Python 2 scan the file without crashing with a SyntaxError.
|
||||
"""
|
||||
from __future__ import print_function, unicode_literals
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from ftfy.streamtester import StreamTester
|
||||
|
||||
|
||||
class TwitterTester(StreamTester):
|
||||
"""
|
||||
This class uses the StreamTester code (defined in `__init__.py`) to
|
||||
evaluate ftfy's real-world performance, by feeding it live data from
|
||||
Twitter.
|
||||
|
||||
This is a semi-manual evaluation. It requires a human to look at the
|
||||
results and determine if they are good. The three possible cases we
|
||||
can see here are:
|
||||
|
||||
- Success: the process takes in mojibake and outputs correct text.
|
||||
- False positive: the process takes in correct text, and outputs
|
||||
mojibake. Every false positive should be considered a bug, and
|
||||
reported on GitHub if it isn't already.
|
||||
- Confusion: the process takes in mojibake and outputs different
|
||||
mojibake. Not a great outcome, but not as dire as a false
|
||||
positive.
|
||||
|
||||
This tester cannot reveal false negatives. So far, that can only be
|
||||
done by the unit tests.
|
||||
"""
|
||||
OUTPUT_DIR = './twitterlogs'
|
||||
|
||||
def __init__(self):
|
||||
self.lines_by_lang = defaultdict(list)
|
||||
super().__init__()
|
||||
|
||||
def save_files(self):
|
||||
"""
|
||||
When processing data from live Twitter, save it to log files so that
|
||||
it can be replayed later.
|
||||
"""
|
||||
if not os.path.exists(self.OUTPUT_DIR):
|
||||
os.makedirs(self.OUTPUT_DIR)
|
||||
for lang, lines in self.lines_by_lang.items():
|
||||
filename = 'tweets.{}.txt'.format(lang)
|
||||
fullname = os.path.join(self.OUTPUT_DIR, filename)
|
||||
langfile = open(fullname, 'a', encoding='utf-8')
|
||||
for line in lines:
|
||||
print(line.replace('\n', ' '), file=langfile)
|
||||
langfile.close()
|
||||
self.lines_by_lang = defaultdict(list)
|
||||
|
||||
def run_sample(self):
|
||||
"""
|
||||
Listen to live data from Twitter, and pass on the fully-formed tweets
|
||||
to `check_ftfy`. This requires the `twitter` Python package as a
|
||||
dependency.
|
||||
"""
|
||||
from twitter import TwitterStream
|
||||
from ftfy.streamtester.oauth import get_auth
|
||||
twitter_stream = TwitterStream(auth=get_auth())
|
||||
iterator = twitter_stream.statuses.sample()
|
||||
for tweet in iterator:
|
||||
if 'text' in tweet:
|
||||
self.check_ftfy(tweet['text'])
|
||||
if 'user' in tweet:
|
||||
lang = tweet['user'].get('lang', 'NONE')
|
||||
self.lines_by_lang[lang].append(tweet['text'])
|
||||
if self.count % 10000 == 100:
|
||||
self.save_files()
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
When run from the command line, this script connects to the Twitter stream
|
||||
and runs the TwitterTester on it forever. Or at least until the stream
|
||||
drops.
|
||||
"""
|
||||
tester = TwitterTester()
|
||||
tester.run_sample()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -33,8 +33,6 @@ def guess_filename(filename, options):
|
||||
if not options.get('yaml') and not options.get('json') and not options.get('show_property'):
|
||||
print('For:', filename)
|
||||
|
||||
options['implicit'] = True # Force implicit option in CLI
|
||||
|
||||
guess = api.guessit(filename, options)
|
||||
|
||||
if options.get('show_property'):
|
||||
|
||||
@@ -4,4 +4,4 @@
|
||||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '2.1.3.dev0'
|
||||
__version__ = '3.0.0.dev0'
|
||||
|
||||
@@ -126,7 +126,8 @@ class GuessItApi(object):
|
||||
for match in matches:
|
||||
if isinstance(match.value, six.text_type):
|
||||
match.value = match.value.encode("ascii")
|
||||
return matches.to_dict(options.get('advanced', False), options.get('implicit', False))
|
||||
return matches.to_dict(options.get('advanced', False), options.get('single_value', False),
|
||||
options.get('enforce_list', False))
|
||||
except:
|
||||
raise GuessitException(string, options)
|
||||
|
||||
|
||||
@@ -54,6 +54,10 @@ def build_argument_parser():
|
||||
help='Display the value of a single property (title, series, video_codec, year, ...)')
|
||||
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=None,
|
||||
help='Display advanced information for filename guesses, as json output')
|
||||
output_opts.add_argument('-s', '--single-value', dest='single_value', action='store_true', default=None,
|
||||
help='Keep only first value found for each property')
|
||||
output_opts.add_argument('-l', '--enforce-list', dest='enforce_list', action='store_true', default=None,
|
||||
help='Wrap each found value in a list even when property has a single value')
|
||||
output_opts.add_argument('-j', '--json', dest='json', action='store_true', default=None,
|
||||
help='Display information for filename guesses as json output')
|
||||
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=None,
|
||||
|
||||
@@ -39,12 +39,12 @@ def audio_codec():
|
||||
rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority)
|
||||
|
||||
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
|
||||
rebulk.regex("Dolby", "DolbyDigital", "Dolby-Digital", "DDP?", value="DolbyDigital")
|
||||
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='AC3')
|
||||
rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
|
||||
rebulk.regex("AAC", value="AAC")
|
||||
rebulk.regex("AC3D?", value="AC3")
|
||||
rebulk.regex("Flac", value="FLAC")
|
||||
rebulk.regex("DTS", value="DTS")
|
||||
rebulk.string("AAC", value="AAC")
|
||||
rebulk.string('EAC3', 'DDP', 'DD+', value="EAC3")
|
||||
rebulk.string("Flac", value="FLAC")
|
||||
rebulk.string("DTS", value="DTS")
|
||||
rebulk.regex("True-?HD", value="TrueHD")
|
||||
|
||||
rebulk.defaults(name="audio_profile")
|
||||
|
||||
@@ -34,15 +34,17 @@ def container():
|
||||
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
|
||||
'iso', 'vob']
|
||||
torrent = ['torrent']
|
||||
nzb = ['nzb']
|
||||
|
||||
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
|
||||
|
||||
rebulk.defaults(name='container',
|
||||
validator=seps_surround,
|
||||
formatter=lambda s: s.upper(),
|
||||
formatter=lambda s: s.lower(),
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name in ['format',
|
||||
'video_codec'] or other.name == 'container' and 'extension' in other.tags
|
||||
@@ -51,5 +53,6 @@ def container():
|
||||
rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle'])
|
||||
rebulk.string(*videos, tags=['video'])
|
||||
rebulk.string(*torrent, tags=['torrent'])
|
||||
rebulk.string(*nzb, tags=['nzb'])
|
||||
|
||||
return rebulk
|
||||
|
||||
@@ -24,12 +24,18 @@ def edition():
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name == 'episode_details' and other.value == 'Special'
|
||||
else '__default__')
|
||||
rebulk.string('SE', value='Special Edition', tags='has-neighbor')
|
||||
rebulk.string('se', value='Special Edition', tags='has-neighbor')
|
||||
rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition')
|
||||
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition')
|
||||
rebulk.regex('limited', 'limited-edition', value='Limited Edition')
|
||||
rebulk.regex('limited', 'limited-edition', value='Limited Edition', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical Edition')
|
||||
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
|
||||
value="Director's cut")
|
||||
value="Director's Cut")
|
||||
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
|
||||
value='Extended', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
|
||||
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
|
||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
|
||||
|
||||
return rebulk
|
||||
|
||||
@@ -5,7 +5,7 @@ Episode title
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
from rebulk import Rebulk, Rule, AppendMatch, RenameMatch, POST_PROCESS
|
||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PROCESS
|
||||
|
||||
from ..common import seps, title_seps
|
||||
from ..common.formatters import cleanup
|
||||
@@ -19,8 +19,12 @@ def episode_title():
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().rules(EpisodeTitleFromPosition,
|
||||
AlternativeTitleReplace,
|
||||
previous_names = ('episode', 'episode_details', 'episode_count',
|
||||
'season', 'season_count', 'date', 'title', 'year')
|
||||
|
||||
rebulk = Rebulk().rules(RemoveConflictsWithEpisodeTitle(previous_names),
|
||||
EpisodeTitleFromPosition(previous_names),
|
||||
AlternativeTitleReplace(previous_names),
|
||||
TitleToEpisodeTitle,
|
||||
Filepart3EpisodeTitle,
|
||||
Filepart2EpisodeTitle,
|
||||
@@ -28,6 +32,62 @@ def episode_title():
|
||||
return rebulk
|
||||
|
||||
|
||||
class RemoveConflictsWithEpisodeTitle(Rule):
|
||||
"""
|
||||
Remove conflicting matches that might lead to wrong episode_title parsing.
|
||||
"""
|
||||
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(RemoveConflictsWithEpisodeTitle, self).__init__()
|
||||
self.previous_names = previous_names
|
||||
self.next_names = ('streaming_service', 'screen_size', 'format',
|
||||
'video_codec', 'audio_codec', 'other', 'container')
|
||||
self.affected_if_holes_after = ('part', )
|
||||
self.affected_names = ('part', 'year')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: m.name in self.affected_names):
|
||||
before = matches.previous(match, index=0,
|
||||
predicate=lambda m, fp=filepart: not m.private and m.start >= fp.start)
|
||||
if not before or before.name not in self.previous_names:
|
||||
continue
|
||||
|
||||
after = matches.next(match, index=0,
|
||||
predicate=lambda m, fp=filepart: not m.private and m.end <= fp.end)
|
||||
if not after or after.name not in self.next_names:
|
||||
continue
|
||||
|
||||
group = matches.markers.at_match(match, predicate=lambda m: m.name == 'group', index=0)
|
||||
|
||||
def has_value_in_same_group(current_match, current_group=group):
|
||||
"""Return true if current match has value and belongs to the current group."""
|
||||
return current_match.value.strip(seps) and (
|
||||
current_group == matches.markers.at_match(current_match,
|
||||
predicate=lambda mm: mm.name == 'group', index=0)
|
||||
)
|
||||
|
||||
holes_before = matches.holes(before.end, match.start, predicate=has_value_in_same_group)
|
||||
holes_after = matches.holes(match.end, after.start, predicate=has_value_in_same_group)
|
||||
|
||||
if not holes_before and not holes_after:
|
||||
continue
|
||||
|
||||
if match.name in self.affected_if_holes_after and not holes_after:
|
||||
continue
|
||||
|
||||
to_remove.append(match)
|
||||
if match.parent:
|
||||
to_remove.append(match.parent)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class TitleToEpisodeTitle(Rule):
|
||||
"""
|
||||
If multiple different title are found, convert the one following episode number to episode_title.
|
||||
@@ -65,12 +125,14 @@ class EpisodeTitleFromPosition(TitleBaseRule):
|
||||
"""
|
||||
dependency = TitleToEpisodeTitle
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
|
||||
self.previous_names = previous_names
|
||||
|
||||
def hole_filter(self, hole, matches):
|
||||
episode = matches.previous(hole,
|
||||
lambda previous: any(name in previous.names
|
||||
for name in ['episode', 'episode_details',
|
||||
'episode_count', 'season', 'season_count',
|
||||
'date', 'title', 'year']),
|
||||
for name in self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
@@ -88,9 +150,6 @@ class EpisodeTitleFromPosition(TitleBaseRule):
|
||||
return False
|
||||
return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
|
||||
|
||||
def __init__(self):
|
||||
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
|
||||
|
||||
def when(self, matches, context):
|
||||
if matches.named('episode_title'):
|
||||
return
|
||||
@@ -104,6 +163,10 @@ class AlternativeTitleReplace(Rule):
|
||||
dependency = EpisodeTitleFromPosition
|
||||
consequence = RenameMatch
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(AlternativeTitleReplace, self).__init__()
|
||||
self.previous_names = previous_names
|
||||
|
||||
def when(self, matches, context):
|
||||
if matches.named('episode_title'):
|
||||
return
|
||||
@@ -115,10 +178,7 @@ class AlternativeTitleReplace(Rule):
|
||||
if main_title:
|
||||
episode = matches.previous(main_title,
|
||||
lambda previous: any(name in previous.names
|
||||
for name in ['episode', 'episode_details',
|
||||
'episode_count', 'season',
|
||||
'season_count',
|
||||
'date', 'title', 'year']),
|
||||
for name in self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
|
||||
@@ -98,7 +98,7 @@ def episodes():
|
||||
episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
|
||||
or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
|
||||
"""
|
||||
values = match.children.to_dict(implicit=True)
|
||||
values = match.children.to_dict()
|
||||
if 'season' in values and is_iterable(values['season']):
|
||||
# Season numbers must be in natural order to be validated.
|
||||
if not list(sorted(values['season'])) == values['season']:
|
||||
@@ -231,14 +231,16 @@ def episodes():
|
||||
formatter={'season': int, 'other': lambda match: 'Complete'})
|
||||
|
||||
# 12, 13
|
||||
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \
|
||||
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: context.get('type') == 'movie') \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<episode>\d{2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
|
||||
|
||||
# 012, 013
|
||||
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \
|
||||
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: context.get('type') == 'movie') \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'0(?P<episode>\d{1,2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
@@ -246,7 +248,8 @@ def episodes():
|
||||
|
||||
# 112, 113
|
||||
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: not context.get('episode_prefer_number', False)) \
|
||||
disabled=lambda context: (not context.get('episode_prefer_number', False) or
|
||||
context.get('type') == 'movie')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<episode>\d{3,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
@@ -287,7 +290,8 @@ def episodes():
|
||||
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode', 'weak-duplicate'],
|
||||
formatter={'season': int, 'episode': int, 'version': int},
|
||||
conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
|
||||
disabled=lambda context: context.get('episode_prefer_number', False)) \
|
||||
disabled=lambda context: (context.get('episode_prefer_number', False) or
|
||||
context.get('type') == 'movie')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
@@ -460,8 +464,21 @@ class RemoveWeakIfMovie(Rule):
|
||||
return context.get('type') != 'episode'
|
||||
|
||||
def when(self, matches, context):
|
||||
if matches.named('year'):
|
||||
return matches.tagged('weak-movie')
|
||||
to_remove = []
|
||||
to_ignore = set()
|
||||
remove = False
|
||||
for filepart in matches.markers.named('path'):
|
||||
year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
|
||||
if year:
|
||||
remove = True
|
||||
next_match = matches.next(year, predicate=lambda m, fp=filepart: m.private and m.end <= fp.end, index=0)
|
||||
if next_match and not matches.at_match(next_match, predicate=lambda m: m.name == 'year'):
|
||||
to_ignore.add(next_match.initiator)
|
||||
|
||||
if remove:
|
||||
to_remove.extend(matches.tagged('weak-movie', predicate=lambda m: m.initiator not in to_ignore))
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveWeakIfSxxExx(Rule):
|
||||
|
||||
@@ -39,8 +39,7 @@ COMMON_WORDS_STRICT = frozenset(['brazil'])
|
||||
|
||||
UNDETERMINED = babelfish.Language('und')
|
||||
|
||||
SYN = {('und', None): ['unknown', 'inconnu', 'unk'],
|
||||
('ell', None): ['gr', 'greek'],
|
||||
SYN = {('ell', None): ['gr', 'greek'],
|
||||
('spa', None): ['esp', 'español', 'espanol'],
|
||||
('fra', None): ['français', 'vf', 'vff', 'vfi', 'vfq'],
|
||||
('swe', None): ['se'],
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user