Compare commits

...

1028 Commits

Author SHA1 Message Date
panni 6d444ebe99 back from dev 2018-06-15 15:51:32 +02:00
panni 237eafed35 release 2.5.7.2663 2018-06-15 15:50:39 +02:00
panni fbc5069fb8 submod: HI: be less aggressive with HI_before_colon_noncaps; fixes #510 2018-06-15 15:42:10 +02:00
panni d23c44589e assrt/supersubtitles: adjust code style 2018-06-15 15:22:11 +02:00
pannal 42cc500b05 Update de.json (POEditor.com) 2018-06-15 15:11:10 +02:00
panni 81760192dc Merge branch 'master' into develop-2.5 2018-06-15 15:06:26 +02:00
panni 2cb077423d addic7ed: use correct throttle hours 2018-06-15 15:05:28 +02:00
panni de8aaaa5e5 addic7ed: raise TooManyRequests and throttle in case of too frequent login 2018-06-15 15:05:02 +02:00
panni b9ebd4e1d6 addic7ed: reduce DownloadLimitExceeded to 3 hours 2018-06-15 15:03:03 +02:00
pannal 8fdf1e841c Merge pull request #501 from morpheus133/provider_supersubtitles
Provider supersubtitles
2018-06-15 15:02:26 +02:00
pannal 9df92d0262 Merge pull request #523 from dimotsai/support-traditional-chinese-2
Add provider assrt.net (Chinese)
2018-06-15 15:02:08 +02:00
panni a07d5aa440 addic7ed: cache login data instead of re-login per search 2018-06-15 05:38:39 +02:00
panni 54bd222605 core: fix plugin_pin_mode 2018-06-15 04:58:43 +02:00
panni 6487258136 i18n: revert to english in case of error 2018-06-15 04:58:29 +02:00
panni d1935a4439 i18n: fix danish placeholders 2018-06-15 04:58:00 +02:00
pannal 026c30642e Update hu.json (POEditor.com) 2018-06-15 04:37:12 +02:00
pannal 036d036a61 Update es.json (POEditor.com) 2018-06-15 04:37:10 +02:00
pannal 2092d44627 Update nl.json (POEditor.com) 2018-06-15 04:37:07 +02:00
pannal c6e7e64ba3 Update de.json (POEditor.com) 2018-06-15 04:37:05 +02:00
pannal a8f5ad6435 Update da.json (POEditor.com) 2018-06-15 04:37:03 +02:00
panni afa0c3a1b0 i18n: add spanish, hungarian, dutch 2018-06-15 04:33:49 +02:00
panni b3132d57b2 Merge branch 'develop-2.5' into i18n 2018-06-15 02:25:57 +02:00
pannal 0a2a6b558f Update de.json (POEditor.com) 2018-06-15 02:24:00 +02:00
pannal adb9926928 Update da.json (POEditor.com) 2018-06-15 02:23:57 +02:00
panni 3ce25007b5 core: notify executable: drop pythonpath from env altogether if it was altered by plex 2018-06-14 15:45:33 +02:00
panni 5690ada2a7 core: notify executable: log error instead of info; properly clean up PYTHONPATH environment variable 2018-06-12 04:22:05 +02:00
panni 76481186e9 core: notify executable: unset PYTHONPATH in env if given and contains Plex 2018-06-10 22:45:18 +02:00
panni 8d2d2341c8 #355 don't use explicit env for mswindows; set working directory to executable directory 2018-06-07 16:10:16 +02:00
panni 4e20d282f7 #355 fix logging 2018-06-07 15:57:03 +02:00
morpheus133 edc3ce1ba4 Correct return value 2018-06-07 11:36:56 +02:00
Dimo Tsai b9249ff09a Fix language order in preferences 2018-06-07 14:08:02 +08:00
panni c3b2ffa97d submod: HI: support "&" and "+" in hi_before_colon 2018-06-06 21:59:31 +02:00
panni 4e3b8ee3c2 opensubtitles: only try logging out if token existed 2018-06-06 18:43:27 +02:00
panni a749ed4837 core: handle "ENGLISH" 2018-06-06 18:40:29 +02:00
panni 67ba6be6e2 #355 try finding executable in path 2018-06-05 14:38:15 +02:00
morpheus133 7a47e6617d - Changed coding to UTF8
- Using .json() insted of manually parsing
2018-06-05 12:29:06 +02:00
panni 4a4c6e7df2 enable logging of notification executable's output and error streams when its exit code is 1, #355 2018-05-30 16:20:38 +02:00
Dimo Tsai 5661528862 Add assrt provider and language converter
Since shooter.cn is not available any longer, implement a new provider
for Chinese as an alternative.
2018-05-29 23:28:21 +08:00
Dimo Tsai 696e9d6b64 Support Traditional Chinese
Since 'zh'always represents simplified Chinese in opensubtitles.org, add
'zh-Hant' as an additional language option in the menu. And fix the language
converter of opensubtitles.
2018-05-29 21:06:24 +08:00
panni c0aa465827 bump dev 2018-05-26 06:07:42 +02:00
panni a6120ae27a libfilebot: use filebot instead of xattr for darwin, just to be safe 2018-05-26 06:06:58 +02:00
panni ba8a165aa5 libfilebot: filebot executable fallback 2018-05-26 05:50:07 +02:00
panni 833d7072ed libfilebot: remove native xattr handling and use filebot itself 2018-05-26 05:06:16 +02:00
panni 9829137001 libfilebot: add sbin folders to environment as well 2018-05-25 17:41:59 +02:00
panni c686214f56 bump dev 2018-05-25 17:40:34 +02:00
panni 2252d7ea6a libfilebot: set correct environment for xattr calls 2018-05-25 17:39:42 +02:00
panni e7fbfca2d7 libfilebot: log output as well in case of returncode 1 2018-05-25 17:25:48 +02:00
panni 9ca959a20a libfilebot: use subprocess.Popen directly instead of check_output 2018-05-25 17:23:42 +02:00
panni bd8e26ecab add additional debug logging in case of filebot attr retrieval error 2018-05-25 16:57:23 +02:00
panni 451b34dceb bump dev 2018-05-25 16:39:16 +02:00
panni 02761db660 and even more logging 2018-05-25 16:38:51 +02:00
panni 42b7e9fa62 add logging for the filebot refiner 2018-05-25 16:34:02 +02:00
panni edf6c25e17 add libfilebot to logged dependencies 2018-05-25 16:32:17 +02:00
panni e91aac65cc add debug info 2018-05-25 16:08:00 +02:00
panni 01d5a18af8 bump dev 2018-05-25 16:06:13 +02:00
panni 70c1142f8d #518 correctly define functions for darwin and win32 2018-05-25 16:05:50 +02:00
panni 8b6b162073 try fixing #518 2018-05-25 15:48:13 +02:00
panni 5199fbe0cb fix #520 2018-05-25 15:28:11 +02:00
panni 924de62dff fix missing string, thanks @morpheus133 2018-05-17 15:08:39 +02:00
panni 4cba7d8684 Merge branch 'develop-2.5' into i18n
# Conflicts:
#	Contents/Code/interface/main.py
#	Contents/Code/interface/menu_helpers.py
2018-05-17 15:05:31 +02:00
panni f3f9ab1360 back to dev 2018-05-17 15:01:14 +02:00
panni 682d1d85ce remove dev flag 2018-05-17 14:58:38 +02:00
panni a1cc9a2049 release 2.5.4.2541 2018-05-17 14:41:36 +02:00
panni a7f7b3e572 bump dev changelog 2018-05-17 14:40:04 +02:00
panni 7c32a7c2c8 providers: addic7ed: set correct headers for endpoints 2018-05-17 14:35:42 +02:00
panni e842579f25 providers: addic7ed: handle empty r.content 2018-05-16 19:17:15 +02:00
panni bdd9134a0e providers: addic7ed: adapt to new (broken) search handling; use new ajax show-season endpoint 2018-05-16 18:39:00 +02:00
panni a01552e88c menu: ignore options: fix plugin not responding, fix unicode strings; resolve #509 2018-05-16 18:11:27 +02:00
panni 824957ae85 bump dev 2018-04-26 06:30:13 +02:00
pannal af335d5565 Update de.json (POEditor.com) 2018-04-26 06:24:25 +02:00
panni 2f9eb51868 i18n: add custom advanced_settings.json setting 2018-04-26 06:21:55 +02:00
panni aebbc17643 Merge branch 'develop-2.5' into i18n
# Conflicts:
#	Contents/Libraries/Shared/subliminal_patch/http.py
2018-04-26 06:21:09 +02:00
panni 84e78e1e20 core: try retrieving advanced_settings.json from the path given, which may be a file path or a directory 2018-04-26 06:19:42 +02:00
panni 89bb747ee3 update provider test; providers: opensubtitles: use e.response, not response in case of http error 2018-04-26 06:13:10 +02:00
panni 62e37dbd09 bump dev 2018-04-25 16:10:23 +02:00
panni edef9cb936 providers: opensubtitles: use new response handling for DownloadSubtitles as well 2018-04-25 16:09:36 +02:00
panni 3ae02c3050 providers: opensubtitles: properly handle opensubtitles responses with the new requests xmlrpc handler 2018-04-25 16:01:26 +02:00
panni a4016616a1 log usage of advanced config; add "adv_cfg_path" config variable to debug output 2018-04-25 15:33:19 +02:00
morpheus133 b4855611c4 removed unnecessary comments 2018-04-18 20:34:39 +02:00
morpheus133 1b44f6d220 Added Hungarian provider supersubtitles
https://www.feliratok.info/

Support:
   Movies
   Series
   SeriesPacks
2018-04-18 20:28:53 +02:00
panni b0f0af087b back to dev 2018-04-17 17:25:35 +02:00
panni 1344f7255d release 2.5.4.2527 2018-04-17 17:14:11 +02:00
panni 39fe3b0fd6 back from dev 2018-04-17 17:02:24 +02:00
panni 0ba676b5e7 Merge branch 'develop-2.5' 2018-04-17 17:02:10 +02:00
panni 4d6897c138 core: get_item: don't fail on socket timeout #498 2018-04-17 14:09:50 +02:00
panni c7c6ba09e9 bump dev 2018-04-16 16:04:30 +02:00
panni c06baa67f1 config: add optional custom path to advanced_settings.json (mostly for NVIDIA SHIELD) 2018-04-16 16:03:08 +02:00
panni cdb7946c00 bump dev 2018-04-16 15:14:10 +02:00
panni bdb5da8df0 core: simplify menu handling; comment out unneeded stuff for now; return to main menu in case of debounce 2018-04-16 15:13:42 +02:00
panni e961c8d3aa fix ZeroDivisionError, resolve #496 2018-04-16 13:24:31 +02:00
panni 3eb1a9eef8 menu: new debounce/menu history implementation, WIP 2018-04-12 19:14:44 +02:00
panni 67aead8fcc providers: addic7ed: reduce show cache to 1 week 2018-04-12 16:07:00 +02:00
panni fd764d0576 core: unrar check: be less verbose 2018-04-10 10:33:29 +02:00
panni dad55d7922 refiners: tvdb: fix spelling 2018-04-10 00:14:50 +02:00
panni fb32772512 core: clamp menu history to 25 items 2018-04-10 00:14:37 +02:00
panni 918ce65acd core: fix scandir errors 2018-04-07 22:51:11 +02:00
panni 9f03b9ee71 core: http cleanup 2018-04-07 05:56:08 +02:00
panni 2235de1a2d Merge remote-tracking branch 'origin/develop-2.5' into develop-2.5 2018-04-07 04:35:00 +02:00
panni 8804c89f04 clarify ssl_no_verify 2018-04-07 04:34:49 +02:00
pannal 2e8805015c Update README.md 2018-04-07 04:32:39 +02:00
pannal f435ca2961 Update README.md 2018-04-07 04:31:04 +02:00
pannal 71c3761b20 Update README.md 2018-04-07 04:30:18 +02:00
panni e4c441043a bump dev 2018-04-06 19:40:17 +02:00
panni 8a655a5d6e don't rely on _check_unrar_tool at all, rely on custom_check 2018-04-06 19:38:51 +02:00
panni 777c21ce87 try local unrar last; explicitly custom check 2018-04-06 19:32:54 +02:00
panni e22ff09691 wow. 2018-04-06 19:23:43 +02:00
panni d0f685e87c moep 2018-04-06 19:18:21 +02:00
panni 8f71c417a9 bump dev 2018-04-06 19:14:30 +02:00
panni b62977c494 more debugging 2018-04-06 19:14:06 +02:00
panni 8d11136c1c restore ORIG_UNRAR_TOOL as well 2018-04-06 19:07:12 +02:00
panni 4a7ea43095 bump dev 2018-04-06 19:05:21 +02:00
panni 8fe4bd2751 add rarfile debug test 2018-04-06 19:04:52 +02:00
panni 38bb819a24 set ORIG_UNRAR_TOOL as well 2018-04-06 18:51:13 +02:00
panni dbe75ad18d bump dev 2018-04-06 18:32:40 +02:00
panni 760441b45a core: check custom unrar tool first, then unrar, then bundled 2018-04-06 18:32:16 +02:00
panni 56645b601b legendastv: correctly check unrar tool 2018-04-06 18:31:57 +02:00
panni 885e4bc99f legendastv: disable if unrar wasn't found 2018-04-06 18:15:10 +02:00
panni b04e5510fd damn underscore check 2018-04-06 18:10:03 +02:00
panni 806000725b explicitly call rarfile._check_unrar_tool() after setting rarfile.UNRAR_TOOL 2018-04-06 18:07:00 +02:00
panni 71270641d3 Revert "core: add own RarFile.read implementation; hopefully fixes #311"
This reverts commit 8d97fb7
2018-04-06 18:05:43 +02:00
panni bf4f2bec91 always use ORIG_OPEN_ARGS for unrar 2018-04-06 17:56:02 +02:00
panni dafad3a7a3 bump dev 2018-04-06 17:49:18 +02:00
panni 182a1cc3fb remove possibly unneeded UNRAR_TOOL fix 2018-04-06 17:48:30 +02:00
panni 4b7664aaa6 use rarfile explicitly instead of individual imports 2018-04-06 17:44:41 +02:00
panni 2050aef1e5 properly set rar.UNRAR_TOOL? 2018-04-06 17:41:51 +02:00
panni 390af30bf6 add cmd debug to rar; set rar.UNRAR_TOOL 2018-04-06 17:22:49 +02:00
panni 698f48b1fd bump dev 2018-04-06 17:03:56 +02:00
panni 2e5cc61ac6 add unrar license 2018-04-06 17:01:18 +02:00
panni 8d97fb7633 core: add own RarFile.read implementation; hopefully fixes #311 2018-04-06 16:54:37 +02:00
panni 8a41c393bb add 3rd party licenses reference to about screen 2018-04-06 14:50:28 +02:00
panni 6ae38359d7 add licenses for 3rd party modules 2018-04-06 14:48:13 +02:00
panni 7ddd1e3497 cache: explicitly reset cache sync and buffer after clear 2018-04-06 14:23:31 +02:00
panni 20a0993aa8 tasks: move provider slack vars to class 2018-04-06 14:10:40 +02:00
panni 57d58056de Merge remote-tracking branch 'origin/i18n' into i18n 2018-04-06 13:58:33 +02:00
panni 06c6fa4d01 core: due to enum value changes, add plugin_mode and plugin_pin_mode suffixes 2018-04-06 13:58:21 +02:00
panni 41f884e129 i18n: lowercase language identifier (to make it actually work) 2018-04-06 13:53:29 +02:00
ukdtom 77a74c8839 Updated danish a tad 2018-04-05 18:59:07 +02:00
panni c198788017 i18n: defaultlocale placeholder 2018-04-05 18:01:28 +02:00
panni 4cbfa21b52 i18n: debug i18n: recognize %i when checking strings 2018-04-05 17:36:34 +02:00
panni f3754de394 i18n: make mod descriptions properly translatable; fix current mods display 2018-04-05 17:34:59 +02:00
pannal d47ad013cd Update de.json (POEditor.com) 2018-04-05 17:26:09 +02:00
panni 8c4372d0d3 i18n: add missing translations; fix summary passthrough with explicit unicode casting 2018-04-05 17:11:45 +02:00
panni 1c7b9145c8 i18n: add missing modification translations 2018-04-05 16:57:07 +02:00
panni c477f53ee6 Merge remote-tracking branch 'origin/i18n' into i18n 2018-04-05 16:51:06 +02:00
panni f99f03dc33 i18n: inject _ into helpers; fix untranslated strings; display translated language name 2018-04-05 16:50:58 +02:00
pannal 2ddd786819 Update de.json (POEditor.com) 2018-04-05 16:50:24 +02:00
pannal 6e604f98e3 Update de.json (POEditor.com) 2018-04-05 16:32:33 +02:00
panni 729404d05f i18n: replace badly translatable terms 2018-04-05 16:01:46 +02:00
panni de50dfdb7c i18n: replace badly translatable terms 2018-04-05 15:09:42 +02:00
panni 7bda522f0a i18n: replace badly translatable terms 2018-04-05 15:02:13 +02:00
panni 6c39fb0649 i18n: remove obsolete translations 2018-04-05 14:56:29 +02:00
panni a7342ac77e i18n: replace badly translatable strings 2018-04-05 14:51:51 +02:00
panni 5d45b8bbdd opensubtitles: log timeout 2018-04-05 14:33:50 +02:00
panni aa0ff38ed7 opensubtitles: add advanced setting for request timeout 2018-04-05 13:03:56 +02:00
panni d55aa3b569 refiners: drone: add advanced setting to skip SSL verification 2018-04-05 13:00:55 +02:00
panni d86a99fb32 refiners: drone: use certifi pem file for https connections 2018-04-05 11:06:22 +02:00
panni c687152724 readme: rename "channel" to "interface" 2018-04-05 01:28:20 +02:00
panni 65ec539875 rename "channel" to "interface" 2018-04-05 01:25:11 +02:00
panni 6dba0792d2 i18n: unicodize the result of _() 2018-04-04 15:32:43 +02:00
pannal df78cecb31 Update de.json (POEditor.com) 2018-04-04 15:28:33 +02:00
pannal 3d8687f69d Update de.json (POEditor.com) 2018-04-04 15:25:24 +02:00
pannal 92196897a9 Update da.json (POEditor.com) 2018-04-04 00:01:03 +02:00
panni 4206edfb13 i18n: revert last commit; add blank de/da 2018-04-04 00:00:30 +02:00
panni c08e63ab80 i18n: add non-blank de.json with languages translated; add base_template.json 2018-04-03 23:56:26 +02:00
panni 03646b4f87 Merge remote-tracking branch 'origin/i18n' into i18n
# Conflicts:
#	Contents/Strings/de.json
2018-04-03 23:52:58 +02:00
panni d9fa860b0c i18n: add non-blank de.json 2018-04-03 23:52:49 +02:00
pannal 93d8494ddc Update de.json (POEditor.com) 2018-04-03 23:49:01 +02:00
panni bd982958fa i18n: add blank de.json 2018-04-03 23:47:52 +02:00
panni e280b62f5c i18n: improve debug mode improper usage detection 2018-04-03 23:27:53 +02:00
panni 2bb050de40 i18n: add optional debug mode that checks correct supply of args/kwargs for a format string 2018-04-03 22:49:26 +02:00
panni f3ed3bf0bf providers: opensubtitles: return compatible status code in case of error
(cherry picked from commit 7945753)
2018-04-03 19:05:23 +02:00
panni 79457536f2 providers: opensubtitles: return compatible status code in case of error 2018-04-03 19:03:33 +02:00
panni 048f930da1 i18n: add missing strings 2018-04-03 19:02:41 +02:00
panni 6aa8108fce i18n: string update finished 2018-04-03 18:48:37 +02:00
panni c234f75d7e i18n: mid-string-update commit WIP 2018-04-03 17:48:57 +02:00
panni 064b634f77 i18n: _: don't fail check on localized string 2018-04-03 17:48:41 +02:00
panni 8d83184cd1 i18n: _: log error instead of raising an exception, which breaks menu code 2018-04-03 17:13:51 +02:00
panni 7a5112bee5 i18n: en: add missing string 2018-04-03 17:09:18 +02:00
panni 0c549c6bda i18n: support kwargs in _ in addition to {} as first non-keyword-argument 2018-04-03 17:03:33 +02:00
panni c48e704502 i18n: replace all F and L calls with _ 2018-04-03 17:00:53 +02:00
panni bec66895d9 Merge branch 'develop-2.5' into i18n
# Conflicts:
#	Contents/Info.plist
2018-04-03 16:57:11 +02:00
panni c9f1e8a8bb core: add i18n module; implement our own version of F and L as _ 2018-04-03 16:54:46 +02:00
ukdtom ac209e7ee2 Prefs translated 2018-04-03 00:27:08 +02:00
ukdtom 525256e15c Everything in /Contents/Code/Interface done 2018-04-02 21:16:49 +02:00
ukdtom 3b8c965f4b refresh_items done 2018-04-02 20:07:08 +02:00
ukdtom 8f8da8e6ea fixed menu_helpers 2018-04-02 20:01:32 +02:00
ukdtom ac9b81abea menu.py done 2018-04-02 19:59:58 +02:00
ukdtom 1c39c55423 menu_helpers done, but look at line 3 2018-04-02 19:20:05 +02:00
ukdtom ca11273b37 menu_helpers.py done, but look at line 3 2018-04-02 19:18:41 +02:00
ukdtom b532a60c3d main.py translation, but look at line 3 for misses 2018-04-02 19:02:36 +02:00
pannal 941662e9f2 Update LICENSE 2018-04-02 17:10:47 +02:00
panni 4d1e4c3ebe core: update rarfile to 2704344 2018-04-02 02:08:47 +02:00
panni f66fd9bcae core: update unrar to 5.60b2 for MacOSX 2018-04-02 01:54:27 +02:00
panni f5c5ecd1b9 core: add rarfile.BadRarFile debug info 2018-04-02 00:59:45 +02:00
panni f9b7855d19 core: early bailout on custom unrar environment variable; try supplied unrar if found and default unrar if applicable 2018-04-01 23:49:13 +02:00
panni 418a8af99a update linux-i386 and macosx-i386 unrar binaries to 5.5.0 2018-04-01 23:43:11 +02:00
ukdtom ce3b4661de Added item_details.py 2018-04-01 23:24:02 +02:00
ukdtom 4b811f38b0 Switch i18n to dev mode :) 2018-04-01 23:19:53 +02:00
ukdtom bba2823065 Fixed nasty syntax for placeholders, as well as some PEP8 2018-04-01 21:42:11 +02:00
ukdtom 5547e9658d Advanced.py done 2018-04-01 21:32:15 +02:00
root e14cbb19f5 make unrars executable 2018-04-01 17:52:45 +02:00
panni 0613a001c5 core: log used unrar location 2018-04-01 07:40:49 +02:00
panni 2970ba69f8 add and check unrar for aarch64, arm (armv5tel), linux/i386, MacOSX/i386 2018-04-01 07:26:48 +02:00
panni 2c6b811d4d add unrar_MacOSX_10.13.2_64bit; try using supplied UnRAR on MacOSX i386 2018-03-31 17:41:06 +02:00
panni d5a3caf961 back to dev 2018-03-31 16:50:24 +02:00
panni 7e64778546 Merge branch 'develop-2.5'
# Conflicts:
#	Contents/Info.plist
#	README.md
2018-03-31 16:49:28 +02:00
panni 1afd0d7c28 add Jose to beta team 2018-03-31 16:47:35 +02:00
panni 3027a3c3e8 Merge remote-tracking branch 'origin/develop-2.5' into develop-2.5 2018-03-31 16:47:11 +02:00
panni 3d7df100ff release 2.5.3.2452 2018-03-31 16:46:59 +02:00
pannal 4de5030196 Update README.md 2018-03-31 03:43:32 +02:00
pannal e3bfe368db Update README.md 2018-03-31 03:34:01 +02:00
panni e45fe0aaa0 add doc 2018-03-30 22:09:16 +02:00
panni 807d758bfa bump dev 2018-03-30 18:43:26 +02:00
panni 7c5164b9a5 core: cleanup #2 2018-03-30 18:41:36 +02:00
panni 1e15fb8e43 core: cleanup 2018-03-30 18:07:48 +02:00
panni ae996b4b9a core: revert last fix; explicitly store subs after writing stored subs to disk 2018-03-30 18:02:43 +02:00
panni 3259a7eec9 core: also store subtitle info on bare_save with set_current 2018-03-30 17:41:37 +02:00
panni 39a5aa1d63 core: metadata storage: kill existing metadata subtitles explicitly upon storing a new one 2018-03-30 17:17:28 +02:00
panni dbe378ad82 core: metadata storage: mediaproxy doesn't support item assignment 2018-03-30 16:53:51 +02:00
panni a316c11974 core: advanced settings: fix typo 2018-03-30 16:37:24 +02:00
panni 2fd05c2464 core: metadata storage: only parse latest metadata subtitle in localmedia 2018-03-30 16:21:37 +02:00
panni 8adabb946e core: metadata storage: only allow one subtitle per language 2018-03-30 16:17:32 +02:00
panni 3f251b9c0e bump dev 2018-03-30 07:04:55 +02:00
panni aadd60c3ad providers: opensubtitles: remove use https setting; add advanced setting; add debug 2018-03-30 07:03:57 +02:00
panni 99cc994865 providers: opensubtitles: mask token 2018-03-30 06:36:59 +02:00
panni da0355ca88 bump dev 2018-03-30 06:31:59 +02:00
panni aaa7c0934a core: update certifi to 2018.01.18 2018-03-30 06:31:27 +02:00
panni 03c70f4dfa providers: opensubtitles: use new requests based transport by default; don't use keepalive 2018-03-30 06:30:01 +02:00
panni 0704609fa5 providers: opensubtitles: try new transport 2018-03-30 05:56:20 +02:00
panni d26569b26f providers: opensubtitles: more debug info; add option to disable HTTPS 2018-03-30 05:26:41 +02:00
panni 007e93e526 providers: opensubtitles: more debug info 2018-03-30 05:16:00 +02:00
panni 8feec0284d bump dev 2018-03-27 17:34:27 +02:00
panni eaa79fb3bd submod: common: reduce multi spaces to one 2018-03-27 17:27:38 +02:00
panni 3af5102e93 submod: OCR: fix III'll=I'll 2018-03-27 17:14:29 +02:00
panni d936460d83 submod: common: extend non_word_only matching 2018-03-27 17:13:05 +02:00
panni f51649c59f fix uppercase Submit
(cherry picked from commit be1e33b)
2018-03-27 00:56:52 +02:00
panni be1e33b555 fix uppercase Submit 2018-03-27 00:56:28 +02:00
panni 059645dec7 menu: list subtitles: only skip items if hash verifiable and verification fails 2018-03-26 22:01:16 +02:00
panni 6439becd7d providers: for non-hash-verifiable providers (napiprojekt in this case) don't try verifying series/season/episode; fixes #478 2018-03-26 17:51:30 +02:00
panni 917fbc1ea2 release 2.5.3.2422 2018-03-26 16:39:45 +02:00
panni c97fee90b7 Merge remote-tracking branch 'origin/master' 2018-03-26 16:39:31 +02:00
panni 35d04946b4 release 2.5.3.2422 2018-03-26 16:39:08 +02:00
panni d0d71d626e providers: opensubtitles: speedup for result format fix 2018-03-26 16:32:02 +02:00
panni 5a1b39c67e providers: addic7ed: use new search endpoint 2018-03-26 16:27:42 +02:00
panni a8cbd37697 bump dev 2018-03-25 16:07:41 +02:00
panni b2bac94009 providers: don't use retry logic in case of ResponseNotReady 2018-03-25 16:05:39 +02:00
panni d88b7e2a17 providers: catch ResponseNotReady in list_subtitles_provider as well 2018-03-25 16:04:09 +02:00
panni 68bf35d83d don't fail on stream.language_code=None, fixes #473 2018-03-25 16:01:20 +02:00
pannal a78e6587ac Update README.md 2018-03-24 06:31:00 +01:00
panni 21f715a321 back to dev 2018-03-24 03:13:12 +01:00
panni 18a5dfd81f update version to 2.5.3.2414 2018-03-24 03:12:40 +01:00
panni 2a7b5e2efb back from dev 2018-03-24 03:11:41 +01:00
panni 0d63b0361f Merge branch 'develop-2.5' 2018-03-24 03:11:28 +01:00
panni 4e301ddd24 release 2.5.3.2408 2018-03-24 03:11:04 +01:00
panni bc182276ac submod: common: replace more than 3 consecutive dots with 3 dots; also replace three dashes with em dash 2018-03-24 02:59:06 +01:00
panni 4980523d10 core: don't fail on empty plex item API result 2018-03-24 02:37:33 +01:00
panni 85baf58b55 providers: hosszupuska: improve implementation 2018-03-24 02:32:46 +01:00
panni d7a4d02564 providers: argenteam: streamline; improve subtitle repr 2018-03-24 02:31:16 +01:00
panni 0e6f4c45db submod: HI: HI_before_colon_noncaps, don't assume single quotes are sentence enders 2018-03-23 22:17:24 +01:00
panni 932cadce3c providers: opensubtitles: add fallback for dict based query response in contrast to list/array based 2018-03-23 14:17:08 +01:00
panni 3926ea9c69 providers: argenteam: add subtitle.releases 2018-03-20 21:17:55 +01:00
panni dd1495c881 update year 2018-03-20 13:35:33 +01:00
panni 8c27e6aade bump dev 2018-03-20 13:35:21 +01:00
panni ba2774eeb5 providers: argenteam: avoid unnecessary typecasting 2018-03-20 13:12:13 +01:00
panni 8e854a8d64 providers: argenteam: doc 2018-03-20 13:09:41 +01:00
panni 86f5ed198f providers: argenteam: logging consistency 2018-03-20 13:08:05 +01:00
panni cc57520c71 providers: argenteam: rename multi_id_throttle to multi_result_throttle 2018-03-20 13:06:12 +01:00
panni 8d9f8960b2 providers: argenteam: add debug output; try to be even faster with movies in case of matching imdb id 2018-03-20 12:58:27 +01:00
panni f66573620b providers: argenteam: try quick matching movies; reduce provider impact 2018-03-20 12:41:36 +01:00
panni 3544a0e7f8 providers: argenteam: improve subtitle repr #2 2018-03-20 12:00:18 +01:00
panni 9c9db90886 providers: argenteam: improve subtitle repr 2018-03-20 11:59:24 +01:00
panni c4bc4d22e9 providers: argenteam: fix empty results 2018-03-20 11:55:31 +01:00
panni b107c70a0c providers: argenteam: fix downloading; search for multiple IDs; implement multi-id-search-throttling 2018-03-20 11:54:13 +01:00
Tommy Mikkelsen 084069441f Add files via upload 2018-03-20 00:10:15 +01:00
Tommy Mikkelsen 8b01433e61 Add files via upload
Resized images
2018-03-20 00:04:30 +01:00
panni b72902b8f4 providers: argenteam: remove unnecessary json import 2018-03-19 19:32:44 +01:00
panni 354e455ae7 remove debug print 2018-03-19 19:27:48 +01:00
panni 8aaed47e39 bump dev 2018-03-19 19:23:50 +01:00
panni c7598aaf12 update default prefs and advanced settings template for argenteam 2018-03-19 19:23:28 +01:00
panni cbe2d16d9b providers: argenteam: reimplement to also support movies 2018-03-19 19:22:48 +01:00
panni 953eb97513 bump dev 2018-03-19 18:42:14 +01:00
panni b340b3b699 providers: argenteam: implement as SZ provider fully, too many changes over the original subliminal pull request 2018-03-19 18:40:52 +01:00
panni f9f2579904 providers: argenteam: identify as Sub-Zero, not subliminal 2018-03-19 18:33:43 +01:00
panni 3a90653edd providers: argenteam: cleanup 2018-03-19 18:29:38 +01:00
panni a8ae18f43c providers: argenteam: compute and parse release_info properly; bail out if returned item wasn't an episode 2018-03-19 18:22:03 +01:00
panni c235dd934a bump dev 2018-03-19 18:06:43 +01:00
panni 3e7c2cb0c2 core: scoring: assume title match on tvdb_id match 2018-03-19 18:06:02 +01:00
panni 1c9398b5b9 providers: argenteam: first working implementation 2018-03-19 18:05:47 +01:00
panni 6a9c818e67 tasks: search all recently added missing: fix attribute access on missing stored subtitle info 2018-03-19 17:26:38 +01:00
panni 753baf85b6 providers: first argenteam subzero implementation 2018-03-19 17:24:05 +01:00
panni 7685c2a6b7 providers: add argenteam provider (spanish), from PR mmiraglia/subliminal/tree/feature/add_argenteam 2018-03-19 17:02:13 +01:00
panni cf1203566e core: add minimum score a subtitle has to have when considered by the find better subtitles task, when the current subtitle is an extracted embedded one; add advanced_settings entries 2018-03-19 16:56:07 +01:00
panni 052e6a475b core: treat 23.976, 23.98, 24.0 as equal 2018-03-19 16:39:14 +01:00
panni 8890acef3a core: update patches to newest subliminal 2018-03-19 16:23:42 +01:00
panni 72570ee21b tvsubtitles: update patches to newest subliminal 2018-03-19 16:21:00 +01:00
panni 100c94ad83 addic7ed: update patches to newest subliminal 2018-03-19 16:19:19 +01:00
panni 2ea3bf20a7 subliminal: reapply threadpoolexecutor windows fix 2018-03-19 16:16:49 +01:00
panni b1cb7c7259 subliminal: reapply strptime fix 2018-03-19 16:16:11 +01:00
panni 7510dfc5c5 core: update subliminal to 4ad5d31 2018-03-19 16:15:38 +01:00
pannal b18bbba23f Update README.md 2018-03-18 04:59:53 +01:00
panni 4e28cea2a3 config: rename "Fix common whitespace/punctuation issues in subtitles" to "Fix common issues in subtitles" 2018-03-18 01:21:14 +01:00
panni a9bafc5efd advanced_settings: clarify auto_extract_multithread 2018-03-18 00:54:28 +01:00
panni a04ff3343b submod: fix empty content if only non-line-mods were used, no line-mods; fixes #449 2018-03-18 00:31:18 +01:00
panni aa09fb28d2 bump dev 2018-03-17 16:45:53 +01:00
panni e6900c18b9 core/menu/submod: add reverse_rtl modification for Hebrew; fixes #409 2018-03-17 16:41:49 +01:00
panni 221a17a5af Merge branch 'heb_test' into develop-2.5 2018-03-17 16:21:38 +01:00
panni fc638c608b core: only allow one automatic extraction at a time; add optional advanced settings "auto_extract_multithread" 2018-03-17 16:19:59 +01:00
panni 71d9d96d81 core: make download_best_subtitles testable again by making language hook optional 2018-03-17 15:46:23 +01:00
panni 5a8b999509 core: reduce encoding logging even more
menu: simplify season extract embedded; only set current if needed, only refresh item if needed
2018-03-17 03:59:46 +01:00
panni 720d7e9d8d bump dev 2018-03-17 03:16:09 +01:00
panni c69be5934d core: reduce encoding change log spam 2018-03-17 03:15:35 +01:00
panni dae186fb03 core: fix set_current regression 2018-03-17 03:12:31 +01:00
panni 076ad78355 remove comment 2018-03-17 01:55:15 +01:00
panni 421aa3a95c core: skip duplicate data aggregation when auto extracting embedded subtitles 2018-03-17 01:54:57 +01:00
panni 153d186a1c core: auto extract embedded subtitles in a separate thread 2018-03-17 01:14:24 +01:00
panni 2238835868 submod: common: also count lines only consisting of dots as removable 2018-03-16 23:46:38 +01:00
panni e0be4542ab bump dev 2018-03-16 15:47:51 +01:00
panni fab841bc7a core: automatic extraction: add config setting to indicate whether there should be an immediate search for available subtitles after extraction or not (default: off) 2018-03-16 15:10:31 +01:00
panni 789a28a966 core: don't change our environ 2018-03-16 14:50:48 +01:00
panni 7cde652ed1 core: remove LD_LIBRARY_PATH from environment before calling notification executable 2018-03-16 14:49:53 +01:00
panni 5359116e72 providers: enable subscene by default 2018-03-16 14:45:01 +01:00
panni 17edfd215d bump dev 2018-03-16 14:42:17 +01:00
panni e292b46cca core: addic7ed: use random user agent by default (enforce for existing configs) 2018-03-16 14:41:53 +01:00
panni d091b20ebe core: addic7ed: use random user agent by default 2018-03-16 14:36:35 +01:00
panni 50a53562a1 core: expand user agent list 2018-03-16 14:36:15 +01:00
panni 55a479590b core: try finding Plex Transcoder in Resources folder, as well, hopefully fixes #460 2018-03-16 14:11:36 +01:00
panni 8874bb64fb core: extract embedded: let ffmpeg auto convert mov_text/tx3g to srt 2018-03-15 17:53:46 +01:00
panni 38afba3075 core: extract embedded: don't transcode to SRT using ffmpeg (Plex Transcoder), do the transcoding later using pysubs2; fixes offset issues 2018-03-15 17:42:18 +01:00
panni ba48e30128 bump dev 2018-03-15 15:18:21 +01:00
panni 77397b6877 submod: OCR: "H i." = "Hi." 2018-03-15 15:17:42 +01:00
panni f50fa0554a submod: common: don't break phone numbers (more than one spaced number pair found) 2018-03-15 15:14:06 +01:00
panni d0dd9f629d core: correctly skip immediately searching for new subtitle after successfully extracting embedded 2018-03-15 15:07:35 +01:00
panni c82637e760 core: fix automatic extraction of unknown embedded subtitle streams 2018-03-15 15:05:52 +01:00
panni 152cfb3f07 menu: fix season extract embedded 2018-03-14 16:28:38 +01:00
panni 7f579181fd bump dev 2018-03-14 16:26:03 +01:00
panni 3e0f39b6f1 submod: HI: count dots as chars inside brackets, for abbreviated names 2018-03-14 16:24:19 +01:00
panni 244d3b1a5b submod: common: don't uppercase after abbreviations 2018-03-14 16:21:07 +01:00
panni 7c24302f7c submod: common: double dash is actually em dash; fix removal 2018-03-14 16:12:48 +01:00
panni 6cafc3a1e8 submod: OCR/HI: don't remove stuff inside quotes 2018-03-14 15:48:23 +01:00
panni 1ab0d31baa bump dev 2018-03-13 18:29:50 +01:00
panni b2fadc5a90 submod: HI: correctly handle tags inside lines when checking for brackets 2018-03-13 18:19:41 +01:00
panni 38f3d85909 submod: fix style tags in line can result in no modifications at all 2018-03-13 18:06:31 +01:00
panni 3694100265 submod: only log processor name, not the full class 2018-03-13 18:01:30 +01:00
panni af44f271ab submod: correctly use the debug mods flag 2018-03-13 17:53:41 +01:00
panni 9984f6aef9 submod: shift timing: inversely reverse value list to make it easier accessible 2018-03-13 17:32:56 +01:00
panni 51a1debc39 Merge branch 'develop-2.5' into heb_test 2018-03-13 17:25:07 +01:00
panni b8a68f62a0 #460 don't bother auto extracting subtitles if the transcoder wasn't found; warn 2018-03-13 16:57:23 +01:00
panni 5ded188f51 add hosszupuska to advanced_settings.json; make text based subtitle formats configurable resolve #464 2018-03-13 16:45:54 +01:00
panni 12c5dda1fa bump dev 2018-03-06 02:49:10 +01:00
panni 25146049bf Merge branch 'master' into develop-2.5 2018-03-06 02:48:28 +01:00
pannal 5598ee0c78 Merge pull request #445 from morpheus133/hosszupuskasub_provider
Add Hungarian provider Hosszupuska
2018-03-06 02:45:30 +01:00
pannal 6e4b0cbcbf Merge pull request #456 from Ineluctable/patch-1
Update Channels to Plugins on install instructions
2018-03-06 02:42:16 +01:00
Ineluctable 572cf29974 Update Channels to Plugins on install instructions
Plex doesn't show the option as Channels anymore, it shows Plugins.
2018-03-05 13:45:30 -06:00
morpheus133 5601d19002 - Instead of parsing release information manually use releases as visible in other providers.
- Add asked_for_episode
2018-03-04 20:41:25 +01:00
panni e81dd5df76 core: subtitle srtorage: correctly skip blacklist key 2018-03-04 17:36:53 +01:00
panni e7919d5a47 bump dev 2018-03-04 06:50:45 +01:00
panni 6f634fbc21 #454 support extracting forced embedded subtitles and storing them as such; display message when extracting via menu 2018-03-04 06:50:02 +01:00
panni 7478ece1ff use the same forced detection for extract embedded; add fixme 2018-03-04 06:23:59 +01:00
panni cd72b6f477 bump dev 2018-03-04 06:15:18 +01:00
panni fab96de4c7 add fixme 2018-03-04 06:08:40 +01:00
panni 0ffa17cf67 #454 remove debug logging; exit early if embedded scanning isn't wanted 2018-03-04 06:06:51 +01:00
panni 777549a15f #454 embedded streams have an index, which is better than checking for inexistant stream_key 2018-03-04 05:59:27 +01:00
panni c07ded004d #454 attribute check 2018-03-04 05:50:41 +01:00
panni da3e96a9d8 #454 smarter stream title detection 2018-03-04 05:47:17 +01:00
panni d6e8a03ddf #454 treat "forced" contained by stream.title = forced subtitle 2018-03-04 05:40:25 +01:00
panni b13cbd1e54 #454 also treat stream.title=="forced" as forced subtitle 2018-03-04 05:36:42 +01:00
panni 6b2e5c154b #454 add more embedded stream logging 2018-03-04 02:39:14 +01:00
panni 137a4d1e0d core: fix embedded subtitle language detection; add debug log 2018-03-03 22:14:45 +01:00
panni 1725550acc core: fix unpacking of packs without asked-for-release-group 2018-03-03 14:40:55 +01:00
panni bd91e173b0 core: expand exception handling when trying to save subtitle 2018-03-03 04:29:56 +01:00
panni 47a11b3e64 core: correctly skip blacklist entries when iterating through currently known subs 2018-03-02 21:44:25 +01:00
panni b5e57519ff back to dev 2018-03-01 16:45:44 +01:00
panni 20845bbcd4 release 2.5.0.2287 2018-03-01 16:34:45 +01:00
panni 739c10ade6 submod: common: require at least one music symbol when fixing 2018-03-01 16:30:02 +01:00
panni 14ea2d72a7 Merge branch 'develop-2.1' 2018-03-01 16:19:01 +01:00
panni 4a9ea97ea1 update doc 2018-03-01 12:51:48 +01:00
panni b017a94353 update doc 2018-03-01 12:51:39 +01:00
panni 15b65dd844 core: better embedded subtitle stream language detection 2018-03-01 12:46:19 +01:00
morpheus133 079ea8c39d - Added mixin for archive handling (also add rar support)
- Remove LXML checking  (Needed only for official subliminal)
- Added fix_inconsistent_naming handling
2018-03-01 08:09:52 +01:00
panni 4b949dcd72 core: support mov_text for embedded subtitle extraction 2018-02-28 18:42:58 +01:00
panni 2626cf4253 core: handle nld for embedded subs 2018-02-28 18:14:59 +01:00
panni b260c8aaec config: clarify subscene being only enabled for TV shows by default 2018-02-28 11:44:35 +01:00
panni 1ece46473b bump dev 2018-02-27 17:45:56 +01:00
panni 890c3cc8b0 core: fix remove crap from filename; fixes non-matched release group in refiners 2018-02-27 15:15:25 +01:00
morpheus133 7b45c9f1c5 Add Hungarian provider Hosszupuska
link: http://hosszupuskasub.com/
2018-02-27 12:53:08 +01:00
panni 58fb2f5ea6 bump dev 2018-02-27 12:48:40 +01:00
panni a79f3e47ba submod: OCR: fix it'sjust, isn'tjust, Iam, Ican 2018-02-27 12:37:15 +01:00
panni b3b9db9ff6 core: get subtitles from archive: remove redundant get 2018-02-27 12:33:14 +01:00
panni 9aed245241 core: get subtitles from archive: don't assume any attributes in guess 2018-02-27 12:32:28 +01:00
panni aa03fdb445 core: get subtitles from archive: don't assume an episode match 2018-02-27 12:31:18 +01:00
panni 7cb8356598 submod: HI: HI_before_colon_noncaps: also consider multiple dashes a sentence 2018-02-27 12:28:37 +01:00
panni ac347755fd submod: HI: separate text before colon into two checks; try not to break actual sentences before colon 2018-02-27 12:09:26 +01:00
panni b16cb15e88 submod: HI: fix remove music-symbol-only lines 2018-02-27 11:36:28 +01:00
panni 4989c37964 submod: HI: remove music-symbol-only lines 2018-02-27 11:30:55 +01:00
panni 06849c5814 submod: common: fix music symbols 2018-02-27 11:26:53 +01:00
panni 78b67a6f5e submod: OCR: correctly fix broken HI tag colons 2018-02-27 11:22:58 +01:00
panni acf79df4d0 bump dev 2018-02-26 16:45:04 +01:00
panni bc5a9caf63 submod: OCR: fix "Ls"="Is" 2018-02-26 14:56:48 +01:00
panni 7b34b07cdc hard error on IOError while scanning videos; warn about hard error in menu #444 2018-02-26 10:06:52 +01:00
panni 8df1a1bf17 bump dev 2018-02-23 17:03:54 +01:00
panni 1143b0f2d2 providers: opensubtitles: try re-initializing the provider on ResponseNotReady 2018-02-23 17:01:42 +01:00
panni 86883336fd providers: opensubtitles: catch ResponseNotReady 2018-02-23 16:51:47 +01:00
panni 62d77c5811 #441 #440 add scandir listdir fallback mechanism 2018-02-23 15:22:39 +01:00
panni 8397dddbbe #441 patch sys.getfilesystemencoding 2018-02-23 12:28:48 +01:00
panni 47ef94d8c3 submod: common: rename CM_underscore_only to CM_non_word_only 2018-02-18 00:39:47 +01:00
panni 8aa4a485ed reduce main icon size 2018-02-17 17:08:47 +01:00
panni cb4ef9c9ea submod: common: dash underscore empty 2018-02-17 03:51:01 +01:00
panni 2f80852a7c submod: add entry index to debug 2018-02-16 13:21:08 +01:00
panni 190a580642 submod: common: remove lines that consists only of underscores; update test.srt 2018-02-16 13:18:44 +01:00
panni 6ba85f5069 submod: common: don't break "-- addicted --" 2018-02-16 13:13:54 +01:00
pannal 707b5921fb Update README.md 2018-02-16 10:05:05 +01:00
panni 2e25e68444 refiners: drone: add http:// to base url if needed 2018-02-15 19:31:01 +01:00
pannal 034260e426 Update README.md 2018-02-15 16:59:11 +01:00
pannal b4eda8bbff Update README.md 2018-02-15 09:46:51 +01:00
panni 93a1b7fb52 back to dev 2018-02-15 09:45:53 +01:00
panni 8ef44c3520 release 2.5.0.2247 2018-02-15 09:45:27 +01:00
panni 449de57fc7 config: debug sonarr/radarr 2018-02-15 09:44:59 +01:00
panni cbe29e233d Merge remote-tracking branch 'origin/master' 2018-02-15 09:42:11 +01:00
panni bef56ff124 core: fix wrong episode matches on hash match 2018-02-15 09:34:31 +01:00
Michael Goodnow 5a05c0f858 add images for wiki 2.5 2018-02-14 17:43:41 -05:00
panni c1e13e520b back to dev 2018-02-14 16:31:02 +01:00
panni cebe92bd8f release 2.5.0.2241 2018-02-14 16:19:23 +01:00
panni 6f8cfc7914 Merge branch 'develop-2.1'
# Conflicts:
#	README.md
2018-02-14 16:18:04 +01:00
panni e7e98b83d2 make crap removal less error prone 2018-02-14 16:10:17 +01:00
panni 4b72bb9d28 fix ignore list 2018-02-13 15:32:55 +01:00
pannal 221068874b Update README.md 2018-02-13 15:06:20 +01:00
pannal 6028d8b2f1 Update README.md 2018-02-13 14:12:13 +01:00
pannal ddaafe9310 Update README.md 2018-02-13 14:10:39 +01:00
pannal 139e38731a Update README.md 2018-02-13 14:07:15 +01:00
pannal d25056cb35 Update README.md 2018-02-13 14:06:19 +01:00
panni 5c80a7091b fix changelog 2018-02-13 14:06:11 +01:00
pannal 5faf190202 Update README.md 2018-02-13 14:05:26 +01:00
panni 169b114ff6 fix changelog 2018-02-13 14:05:16 +01:00
panni bc67326573 Merge remote-tracking branch 'origin/master'
# Conflicts:
#	README.md
2018-02-13 13:56:44 +01:00
panni a32543533d release 2.5.0.2221 2018-02-13 13:56:18 +01:00
panni 6b6e40ef96 Merge branch 'develop-2.1'
# Conflicts:
#	Contents/Info.plist
#	Contents/Libraries/Shared/submod_test.py
#	Contents/Libraries/Shared/subzero/modification/mods/common.py
2018-02-13 13:56:06 +01:00
panni 8127b7ecf0 2.5.0.2221 2018-02-13 13:55:17 +01:00
panni 09425ccbe0 update main icon for 2.5 2018-02-13 12:58:30 +01:00
panni 61fbc4e3b5 menu: use more natural way to display ignore options for seasons and episodes 2018-02-13 12:38:39 +01:00
panni 158e4f85da embedded: auto-extract: honor forced_only setting; only set extracted subtitle as current if there's no current one 2018-02-12 12:51:10 +01:00
panni 8b1107d2e1 menu: correctly use message/header 2018-02-12 12:50:17 +01:00
panni 59ffa9084f embedded: add debug log for automatic extraction 2018-02-11 03:43:20 +01:00
panni 19df673c50 bump 2.5.0.34 2018-02-11 03:38:47 +01:00
panni 5f20894413 embedded: only extract requested languages from embedded subtitle streams; add config.ietf_as_alpha3 2018-02-11 03:38:31 +01:00
panni 7349874804 don't refresh item on agent auto extract embedded 2018-02-11 03:18:22 +01:00
panni fda5dc7e89 disable subscene by default 2018-02-11 03:11:45 +01:00
panni d60b45a667 bump 2.5.0.33 2018-02-11 03:10:50 +01:00
panni ab2e69a76e core/config: add option for automatically extracting embedded subtitles upon agent call 2018-02-11 03:10:34 +01:00
panni 6a836338a5 embedded subtitles: only use first unknown stream if treat undefined as first has been set 2018-02-11 03:10:02 +01:00
panni 5a02365605 bump 2.5.0.32 2018-02-11 00:54:15 +01:00
panni 26b38c4f64 menu: add mass-extraction per season for all embedded subtitles 2018-02-11 00:53:57 +01:00
panni 9b7edf2960 submod: common: fix numbers once more, don't kill spaces after them, fix more than one space on multiple locations 2018-02-10 22:13:06 +01:00
panni 7050f64fae advanced: languages: log requested languages as "requested" instead of "got" 2018-02-09 23:29:48 +01:00
panni 4623a989d8 submod: common: be more aggressive when fixing numbers; correctly space out spaced ellipses; don't break spaced ellipses 2018-02-09 21:56:03 +01:00
panni 87b942bd6d config: actually wanted to fix only_one, not undefined as first 2018-02-09 18:12:58 +01:00
panni 87ee5cc627 config: correctly handle unknown as first 2018-02-09 18:10:18 +01:00
panni bff8fe8b70 refiners/renaming: let file_info_file supercede symlinks 2018-02-09 17:20:31 +01:00
panni 1495882dc7 refiners/renaming: add symlink support; rename Prefs["media_rename"] to media_rename1 2018-02-09 17:19:36 +01:00
panni 2e50d84f2a advanced: languages: more verbose logging 2018-02-09 15:35:37 +01:00
panni d32716f4c5 advanced: languages: mutability problem? 2018-02-09 15:28:38 +01:00
panni 876aa4eda0 advanced: fallback to default languages 2018-02-09 13:05:53 +01:00
panni 3673aee8e9 bump 2.5.0.31 2018-02-09 13:02:12 +01:00
panni a758191ee0 core: advanced: add thorough_cleaning setting 2018-02-09 13:01:54 +01:00
panni 99410249c7 core: advanced: add per-provider language config 2018-02-09 12:57:38 +01:00
panni a705f2ad30 bump 2.5.0.30 2018-02-08 18:50:07 +01:00
panni 33223dedc1 drone: radarr: invalidate cache if it's older than the movie file in question; sonarr: correct .refresh() usage 2018-02-08 18:49:03 +01:00
panni bd8e8ef346 config: rename mode: return early in case of kept original filenames 2018-02-08 17:39:51 +01:00
panni c75e7bf656 config debug: add version 2018-02-08 17:37:03 +01:00
panni cb4117376a drone: check connectivity in config debug 2018-02-08 17:35:14 +01:00
panni 0d37920aad localmedia: cleanup 2018-02-08 17:12:36 +01:00
panni 0da6e76200 #434 #resolve 2018-02-08 15:43:08 +01:00
panni 5f5934a6ee bump 2.5.0.29 2018-02-08 13:56:49 +01:00
panni 85b7a2f4f5 drone: sonarr: support for upcoming originalFilePath value 2018-02-08 13:52:36 +01:00
panni 3dcfd30a04 drone: rely on filename only to circumvent using bad cached data when quality upgrades occur 2018-02-08 13:45:31 +01:00
panni b5a0f65783 bump 2.5.0.28 2018-02-08 13:11:08 +01:00
panni 3862e6f3a4 drone: cache series endpoint for sonarr and movies endpoint for radarr 2018-02-08 13:10:28 +01:00
panni 1d4e2ec50b menu: don't allow blacklisting of extracted embedded subtitles 2018-02-08 12:40:15 +01:00
panni 8b85485510 core: increase request timeout by three times in case a proxy is being used 2018-02-08 12:37:57 +01:00
panni 722ce3ac8b submod: HI: HI_before_colon: remove bad escape sequence 2018-02-08 12:26:05 +01:00
panni 1e132f2808 submod: HI: rename HI_before_colon_universal 2018-02-08 12:25:28 +01:00
panni d007e0a172 submod: HI: improve HI_before_colon again; match mid-line HI: as well, don't mangle times 2018-02-08 12:25:08 +01:00
panni 3ddd722cc1 submod: HI: be less strict about HI_before_colon; accept 3 random chars instead of 2 uppercase chars before the colon 2018-02-08 11:51:37 +01:00
panni 82d8189966 submod: common: fix uppercase I's in lowercase words more aggressively 2018-02-08 11:50:51 +01:00
panni 2d533eb004 submod: OCR: fix l/L instead of I more aggressively 2018-02-08 11:50:16 +01:00
panni f9c899701f subtitle cleanup: add support for hi, cc, sdh secondary filename tags; don't autoclean .txt 2018-02-08 10:20:18 +01:00
panni e9f62fbb09 tvdb: skip empty firstAired data 2018-02-05 10:23:13 +01:00
panni 5b2f09318a menu: move embedded subtitle menu below manage subtitles menu 2018-02-04 01:15:46 +01:00
panni 8c260c43a8 menu: sort stored subtitles by date_added reversed 2018-02-04 00:37:55 +01:00
panni eee793302c bump 2.5.0.27 2018-02-04 00:23:23 +01:00
panni 0d1fdf6e60 menu: clarify items 2018-02-04 00:22:46 +01:00
panni 64398d8f30 findbetter: better logging when subtitle was downloaded 2018-02-04 00:16:31 +01:00
panni cab736b573 findBetter/config: limit by air date before searching and make it configurable 2018-02-04 00:09:30 +01:00
panni 93071dd81e bump to 2.5.0.26 2018-02-03 23:22:08 +01:00
panni e8fcb8f91a config: set "Scheduler: Overwrite manually selected subtitles when better found" to default-true 2018-02-03 23:14:48 +01:00
panni 33cacfe884 menu: add subtitle selection menu 2018-02-03 23:14:28 +01:00
panni f624f7f05a subtitle_storage: add get, get_all, count methods 2018-02-03 23:13:55 +01:00
panni 624195d870 advanced: add skip findbettersubtitles menu item, which sets the last_run to now 2018-02-03 22:09:35 +01:00
panni ab2ef66263 menu: extract embedded: support the major text based formats; honor treat unknown as language 1 2018-02-03 21:31:01 +01:00
panni 4ea0372212 refining: re-add old detected title as alternative title after re-refining with plex metadata's title; fixes #428 2018-02-03 18:59:05 +01:00
panni ff31912e8a core: don't cache provider settings 2018-02-03 02:00:33 +01:00
panni dcefed2e4c also resolve full series force refresh intents after the agent is done 2018-02-03 01:48:38 +01:00
panni 55bbc4f585 Revert "core: save Dict on set_refresh_menu_state"
This reverts commit 85342ee
2018-02-03 01:39:18 +01:00
panni 0f2bb99b39 core: use Thread.Sleep on multiple refresh 2018-02-03 01:38:07 +01:00
panni 85342eeed3 core: save Dict on set_refresh_menu_state 2018-02-03 01:35:28 +01:00
panni 374a6a668a core: increase force refresh season/series timeout by the power of two 2018-02-03 01:28:20 +01:00
panni e3be3195ee bump 2.1.0.25 2018-02-03 01:23:14 +01:00
panni 503279f3c2 core: use deepcopy on config.provider_settings 2018-02-03 01:08:30 +01:00
panni f8bb54024c fps equality: quicker return in case of full equality 2018-02-03 00:44:14 +01:00
panni 6e53fc606a bump 2.1.0.24 2018-02-03 00:26:07 +01:00
panni ab810c48af core: treat 23.976 and 23.980 equally 2018-02-03 00:25:31 +01:00
panni 13bb9183af fix podnapisi for newest subliminal 2018-02-03 00:00:08 +01:00
panni 2c5b6ea690 fix addic7ed language converter registering, exceptions, for newest subliminal 2018-02-02 23:56:56 +01:00
panni a8efa2e266 adapt to newest subliminal 2018-02-02 23:43:04 +01:00
panni e73eb2fd86 subliminal: reapply threadpoolexecutor windows fix 2018-02-02 23:07:23 +01:00
panni d38fa26e13 subliminal: reapply strptime fix 2018-02-02 23:06:23 +01:00
panni 716f4493e8 update subliminal to 62cdb3c 2018-02-02 23:05:30 +01:00
panni 3220974a4a bump 2.1.0.23 2018-02-02 22:56:39 +01:00
panni 6732272047 core: download_best: skip subtitle if we've got an episode and it doesn't verify against the subtitle episode data 2018-02-02 22:55:22 +01:00
panni 547f038139 providers: subscene: only search for season packs when season has fully aired 2018-02-02 22:43:40 +01:00
panni 3b0ee60eaa update provider_test to match new refining mechanism 2018-02-02 22:43:21 +01:00
panni a869281de7 core: add missing custom Video attributes 2018-02-02 22:43:01 +01:00
panni a4ed77c7bb core: set hints in scan_video 2018-02-02 22:42:45 +01:00
panni 81718e64d3 refiners: replace tvdb with sz_tvdb; add season fully aired info 2018-02-02 22:42:25 +01:00
panni dee0daf8aa bump 2.1.0.22 2018-01-31 11:55:52 +01:00
panni 8e599fb22a archives: support multi-episode subtitles 2018-01-31 11:46:05 +01:00
panni acb5589af1 subtitle packs: use hard fallback if needed 2018-01-29 12:11:34 +01:00
panni 6db2771cd6 core: scandir: fix typo 2018-01-28 15:04:14 +01:00
panni 06d4e0a19a core: use scandir for cache._all_filenames 2018-01-28 04:57:24 +01:00
panni 3b18c6c14f core: use scandir library instead of os.listdir where possible 2018-01-28 04:37:32 +01:00
panni 300359acf2 add fixme 2018-01-28 04:22:01 +01:00
panni 5456d0200a bump 2.1.0.21 2018-01-28 03:54:50 +01:00
panni 9890f66443 submod: HI: improve hi_brackets 2018-01-28 03:54:30 +01:00
panni aba863bc84 submod: HI: add separately split start_bracket and end_bracket replacements 2018-01-28 03:52:21 +01:00
panni ade416f5c8 submod: OCR: allow only word boundary after F', no further text needed 2018-01-28 03:51:55 +01:00
panni 7097267f7c submod: OCR: fix F'xxxx errors; rename SE_X to OCR_X 2018-01-28 02:44:18 +01:00
panni b0d8d1a86d try utf-8 first 2018-01-28 00:39:16 +01:00
panni 2c8296ba85 try decoding zip content filename from cp437 first 2018-01-27 23:59:37 +01:00
panni 4dd17de146 menu: update display logic for SearchAllRecentlyAddedMissing 2018-01-27 18:20:40 +01:00
panni 3a281b0b57 core: make logrotate backup count configurable 2018-01-27 17:37:45 +01:00
panni 04ed625f1a remove shooter for the time being 2018-01-27 17:16:17 +01:00
panni 1cddfb1b2d providers: subscene: disable by default; let throttling take precedence over any other provider setting 2018-01-27 15:01:39 +01:00
panni 796b64d83e advanced settings: Dicked: support bool() by implementing __nonzero__ 2018-01-27 05:08:16 +01:00
panni 240a3687d7 advanced settings: Dicked: support bool() by implementing __len__ 2018-01-27 05:06:47 +01:00
panni 9ed4764ab2 advanced settings: skip if necessary 2018-01-27 05:01:42 +01:00
panni f253a13297 remove debug print 2018-01-27 04:47:22 +01:00
panni 744cd57dd5 typo 2018-01-27 04:46:06 +01:00
panni e2a5647363 advanced settings: add comment 2018-01-27 04:45:54 +01:00
panni a1f324c105 advanced settings: add instructions 2018-01-27 04:45:02 +01:00
panni 767e0f8ac7 advanced settings: keep disabled provider disabled, even with advanced settings enabled_for provided 2018-01-27 04:41:22 +01:00
panni 0c0ad02234 advanced settings: only honor enabled_for if media type given 2018-01-27 04:36:33 +01:00
panni c09973ec56 disable debug log 2018-01-27 04:34:56 +01:00
panni 03a72e1917 core: implement advanced_settings.json 2018-01-27 04:34:07 +01:00
panni f9e0eaaf83 bump 2.1.0.20 2018-01-26 17:58:03 +01:00
panni 985f75f7da archives/subscene: grab matching subtitle from archive if we didn't find one that matches the format requested; dumber release group check 2018-01-26 17:52:52 +01:00
panni 171cbd6c53 core: move pack cache dir to config; add CacheMaintenance task 2018-01-25 14:46:56 +01:00
panni 9875bc5c5b bump 2.1.0.19 2018-01-25 14:17:11 +01:00
panni 882509f891 core: add pack handling to agent; fix force-refresh; fix agent 2018-01-25 14:16:46 +01:00
panni 3396502334 subscene: fall back to downloading new pack if we couldn't find the subtitle in the cached pack 2018-01-24 17:45:00 +01:00
panni b7fb99c3d4 subscene: add pack cache 2018-01-24 17:26:37 +01:00
panni c82307a710 tasks: listavailable: remove redundant condition 2018-01-24 14:35:16 +01:00
panni 309a99d183 tasks: listavailable: use correct condition 2018-01-24 14:30:55 +01:00
panni 09a6ef0194 core: reorder agent's update mechanism; debounce after resolving intents; exit earliest when agent is disabled; 2018-01-24 14:21:42 +01:00
panni 43afcb4239 tasks: availablesubs: don't skip non-matching episode if subtitle is pack 2018-01-24 12:26:10 +01:00
panni 7a78f33ac3 subscene: don't discard first results after searching for episode; search for packs also, afterwards 2018-01-24 12:25:30 +01:00
panni d5fb538630 menu: only pad the titles for the current view 2018-01-18 14:50:05 +01:00
panni a22cdf5d5b menu: pad titles for metadata menu and show/season submenus 2018-01-18 14:44:09 +01:00
panni fe0636bbbf bump 2.1.0.18 2018-01-18 13:57:47 +01:00
panni 13859cfbd7 submod: debug message when debug is enabled, only 2018-01-18 13:54:08 +01:00
panni 0adadc59ac menu: add reapply mods to current subtitle 2018-01-18 13:53:38 +01:00
panni d65ba19c6c submod: correctly drop empty line 2018-01-18 13:44:56 +01:00
panni 5cedbd2fa0 searchallrecentlymissing: dynamically adjust overall item count 2018-01-17 18:08:36 +01:00
panni 735fb09762 extract embedded: subtitle.id=stream_id 2018-01-17 14:48:28 +01:00
panni 79d61419b0 extract embedded: store embedded subtitle correctly inside subtitle storage 2018-01-17 14:44:17 +01:00
panni 248b93e5c6 store last modified timestamp in subtitle info; only write to storage if we haven't had one or any subtitle was downloaded 2018-01-17 14:12:40 +01:00
panni d8eff1adb5 fix language handling for treat undefined as first 2018-01-17 13:45:49 +01:00
panni c911620254 bump 2.1.0.17 2018-01-17 13:22:37 +01:00
panni c68a32b889 submod: skip provider hashing when applying mods 2018-01-17 13:22:24 +01:00
panni 788819a900 subtitle storage: remove debug log 2018-01-17 13:16:50 +01:00
panni 27c94af980 core: massive speedup; refine only when needed, exit early otherwise 2018-01-17 13:16:31 +01:00
panni 81122665a0 scanning: correctly use alpha3 lang code 2018-01-16 18:56:29 +01:00
panni 1856e687eb explicitly remove variable references (may not do anything) 2018-01-16 12:49:15 +01:00
panni 6055793d46 bump 2.1.0.16 2018-01-16 12:46:55 +01:00
panni 99b670ff10 tasks: optimize memory usage 2018-01-16 12:46:41 +01:00
panni 7a09218cc0 permissions: skip check for not existing paths 2018-01-16 12:46:23 +01:00
panni a34d0523b5 subtitle meta storage: remove legacy handling 2018-01-16 12:26:07 +01:00
panni f06e900bab opensubtitles: log bad credentials as error 2018-01-12 18:12:49 +01:00
panni 7da15a2d44 opensubtitles: try logging in when initial log in didn't return a token; correctly log login failed 2018-01-12 18:11:14 +01:00
panni e999cc53d0 core: log traceback in case of failed parse_video 2018-01-12 17:59:48 +01:00
panni b7d4bd00a5 config: remove redundant Prefs access 2018-01-12 17:54:06 +01:00
panni 8c2aa849d7 subscene: remove foreign_only support, as people don't get it 2018-01-12 17:50:59 +01:00
panni 01a759fff8 core: correctly handle non-verifiable hash matches; bump to 2.1.0.15 2018-01-12 15:28:48 +01:00
panni cb0008b59e low impact mode: skip hashing files if enabled 2018-01-12 15:23:42 +01:00
panni 9cd825aff1 core: only compute hashes for items for enabled providers 2018-01-12 15:11:12 +01:00
panni 8ad52d2979 legendastv: skip caching of archive contents 2018-01-10 15:26:31 +01:00
panni efd6143498 availablesubs: fix listing 2018-01-10 14:46:36 +01:00
panni 157fae5f83 subscene: fix self.matches stupidity 2018-01-09 15:13:18 +01:00
panni 6d63301b63 core: also don't faceplant if the saving wasn't successful 2018-01-09 13:51:03 +01:00
panni 9801c8c6b3 subscene: check video.release_group first 2018-01-09 13:46:50 +01:00
panni e04f4c0bd0 bump 2.1.0.14 2018-01-09 13:45:47 +01:00
panni b501578584 core: move store_subtitle_info calls to agent.store_blank_subtitle_metadata 2018-01-09 13:45:34 +01:00
panni 308f429c91 core: catch exceptions when downloading best subtitles 2018-01-09 13:41:01 +01:00
panni 1d45172475 subscene: fix empty release group 2018-01-09 13:39:18 +01:00
panni 085a4f30db tasks: revert empty storage fix; core: return on disabled agent; create meta storage even if download failed 2018-01-09 13:24:36 +01:00
panni 7a600dc2b6 fcache: log exception on fsync 2018-01-09 12:20:15 +01:00
panni c0c2891d8d scheduler: log wrongly matched series/episodes 2018-01-09 12:06:51 +01:00
panni 06b269a2ba scheduler: skip wrongly matched series 2018-01-09 12:04:49 +01:00
panni f3a4db0d87 subscene: use alternative search for episodes after searching for release name; don't ditch other match properties on hash match 2018-01-09 12:04:23 +01:00
panni bcd99d18c4 bump to 2.1.0.13 2018-01-09 11:23:53 +01:00
panni c05c400c6f remove obsolete comment 2018-01-09 11:23:37 +01:00
panni 0f081d8d7b napiprojekt: prone to memoryerror when hashing, mitigate 2018-01-09 10:42:50 +01:00
panni 833dc5e3ae task: searchrecentlymissing: treat non existing storage as missing; log item title instead of item IDs 2018-01-09 10:34:36 +01:00
panni 0be3df435b titlovi: initialize variables 2018-01-09 10:11:28 +01:00
panni f4446af57e subscene/titlovi/podnapisi: correctly implement subtitle archive listing mixin's needed attributes 2018-01-09 10:10:04 +01:00
panni 253aa664a8 subscene: debug log if we found a pack 2018-01-09 09:43:17 +01:00
panni 0df037a295 subscene: store season and episode in subtitle 2018-01-09 09:41:59 +01:00
panni ed49d743f9 SubtitleListingMixing: skip wrong season or episode when listing 2018-01-09 09:30:41 +01:00
panni 203cc392c0 fix usage of REMOVE_CRAP_FROM_FILENAME regex 2018-01-07 22:09:14 +01:00
panni 52ba5a7f24 bump dev to 2.1.0.12 2018-01-07 06:47:49 +01:00
panni 8aa0576bbc remove crap from filename: be more specific 2018-01-07 06:46:40 +01:00
panni 5ce9cc79c8 remove crap from filename: also remove brackets inside release group 2018-01-07 06:33:44 +01:00
panni 1a596dfdea subscene: fix release group detection 2018-01-07 05:56:45 +01:00
panni aeecb3ff59 add fixme 2018-01-07 05:50:50 +01:00
panni 85c8d2d558 add global proxy support 2018-01-07 05:50:32 +01:00
panni 2cf4e7ac59 add xmlrpclib proxy support 2018-01-07 05:43:22 +01:00
panni e7412a91f9 http: add basic proxy support 2018-01-07 05:36:13 +01:00
panni 9888d03982 use SubZeroTransport for xmlrpclib 2018-01-07 05:26:48 +01:00
panni 765cc39553 add 23.980 to FPS mod
(cherry picked from commit 1905187)
2018-01-07 05:24:14 +01:00
panni 6e58c2f984 update certifi to 2017.11.05 2018-01-07 03:32:14 +01:00
panni 295542ff18 scanning prefs: clear up naming; rename exotic to non-text 2018-01-06 15:22:40 +01:00
panni 9d72d9c647 bump dev to 2.1.0.11 2018-01-06 05:19:45 +01:00
panni 853897ec3e provider throttled: format datetime correctly 2018-01-06 05:17:31 +01:00
panni 9cf8ad7399 throttle for 20 minutes on ServiceUnavailable 2018-01-06 05:15:27 +01:00
panni fdf974c5e3 make throttle time and description dynamic 2018-01-06 04:55:20 +01:00
panni 2920dbfe8d subscene: add only_foreign support
providers: add provider throttling (TooManyRequests, DownloadLimitExceeded)
2018-01-06 04:50:31 +01:00
panni 77d05f7697 reorder archive content matching 2018-01-05 19:18:47 +01:00
panni 3ffeaeffb6 skip parsing release group in case of a pack (which might contain multiple different release groups and not the one asked for) 2018-01-05 17:14:16 +01:00
panni db2755675c fix pack handling and archive content matching 2018-01-05 17:10:51 +01:00
panni 7ca090f73c bump to 2.1.0.10 2018-01-05 16:59:26 +01:00
panni bb251ad29e add enum 1.1.6 2018-01-05 16:58:48 +01:00
panni 75d770e019 first subscene implementation 2018-01-05 16:58:37 +01:00
panni 49bf116c18 add subscene api fork; add contextlib2 0.5.5
(cherry picked from commit b6d98f6)
2018-01-05 15:16:47 +01:00
panni b7d227fe0f add webencodings 0.5.1 for html5lib 2018-01-05 14:19:38 +01:00
panni 83f59935f2 update html5lib to 1.0.1 (was: 0.999) 2018-01-05 05:17:54 +01:00
panni 37b794fa14 bump to 2.1.0.9 2018-01-05 03:22:19 +01:00
panni 1f5c45df91 correctly sync cache during tasks; use Thread.Sleep instead of time.sleep for tasks 2018-01-05 02:14:38 +01:00
panni 62e3020234 check for buffer attribute 2018-01-05 01:51:17 +01:00
panni 895d457500 add new dogpile file based default cache backend based on fcache 2018-01-05 01:49:43 +01:00
panni 586269efd3 add fcache 0.4.7 2018-01-05 01:11:58 +01:00
panni 576718fc03 update dogpile to 0.6.5
(cherry picked from commit a2c1349)
2018-01-05 01:10:08 +01:00
panni 648dd4147a bump to 2.1.0.8 2018-01-04 16:34:05 +01:00
panni c4df743c3e add plex transcoder detection for MacOS 2018-01-04 16:22:18 +01:00
panni b98fead37e plex transcoder has a different name on win32 2018-01-04 15:42:44 +01:00
panni 6522094164 add fallback path computation if PLEX_MEDIA_SERVER_HOME isn't set 2018-01-04 15:35:45 +01:00
panni fcd3dfe75c extract subtitle using a separate thread 2018-01-04 03:23:31 +01:00
panni ec9a798590 remove portalocker 2018-01-03 18:19:05 +01:00
panni 5825443d4d upgrade beautifulsoup4 to 4.6.0 (was 4.4.1) 2017-12-30 06:48:13 +01:00
panni 9768b3fadd debounce agent only if something has been searched for 2017-12-30 05:49:19 +01:00
panni 77a72d6663 add our own Language class 2017-12-30 04:54:03 +01:00
panni 08d647c024 add our own Language class 2017-12-30 04:48:05 +01:00
panni a77ef040be add fixme 2017-12-29 23:48:08 +01:00
panni 13e581b953 archives: log used subtitle filename 2017-12-29 23:05:52 +01:00
panni 1cc18617c5 handle releases being a list 2017-12-29 23:03:43 +01:00
panni 2642f65614 add doc 2017-12-29 22:58:14 +01:00
panni 4abb2aacf9 add doc 2017-12-29 22:57:27 +01:00
panni 904daaf2b3 podnapisi: retain matches for archive handling 2017-12-29 22:49:41 +01:00
panni 3044f2b1fb bump beta to 2.1.0.7 2017-12-29 22:40:56 +01:00
panni 826accb2d1 better. 2017-12-29 22:27:58 +01:00
panni d5cb35ed95 podnapisi/titlovi: archive handling: don't always assume episodes 2017-12-29 22:27:21 +01:00
panni 24c7e4be8c podnapisi/titlovi: add support for multiple subtitles in archives 2017-12-29 22:20:37 +01:00
panni abbd7283b2 opensubtitles: correctly use video.original_name for tag match; remove wrong original subliminal patch 2017-12-29 19:29:13 +01:00
panni 2980aa08d7 remove obsolete imdb_id check 2017-12-29 18:12:04 +01:00
panni e2344abbc4 add PMS year info if no tvdb ids found 2017-12-29 18:09:40 +01:00
panni 80097c3500 update libfilebot 2017-12-29 17:59:25 +01:00
panni 714f36caee parse_video: trust PMS season/episode data 2017-12-29 15:50:16 +01:00
panni fb1860d78b refiners: file_info: use utf-8 2017-12-29 15:44:58 +01:00
panni ce7acd278e allow file_info usage when rename mode is "none of the above" 2017-12-29 15:40:55 +01:00
panni ae8473183d second try 2017-12-28 22:44:42 +01:00
panni 69fb328b50 set reverse_rtl order to 50 2017-12-28 15:09:16 +01:00
panni b8d9899796 reverse_rtl test 2017-12-28 03:12:32 +01:00
panni e58fa1964d rename get_embedded_language to get_language_from_stream 2017-12-27 13:21:57 +01:00
panni 1627dee77e fix bad skip 2017-12-27 13:20:14 +01:00
panni bbac0c033f pad default menu debounce-timestamp() by the power of 1000 to circumvent empty menu when quickly refreshed 2017-12-27 01:27:51 +01:00
panni 6437e1dbad don't try to match None language 2017-12-24 13:17:36 +01:00
panni 48a9e998ff treat SDTV and HDTV the same; resolve #414 2017-12-24 01:57:53 +01:00
panni 6b6ca461f0 add AsRequested to garbage names 2017-12-23 14:12:59 +01:00
panni 7960952a30 try not to fail on unknown embedded language codes 2017-12-23 01:46:05 +01:00
panni 5ec64efb75 bump dev 2017-12-22 14:51:15 +01:00
panni 2440b2eae4 don't replace api_key by using copy.deepcopy instead 2017-12-22 14:51:00 +01:00
panni 54db2857c9 win32 storage fallback: don't use portalocker 2017-12-22 14:40:21 +01:00
panni 5b8f0b7361 add libfilebot manually 2017-12-22 04:05:23 +01:00
panni 053ebe3963 remove libfilebot submodule 2017-12-22 04:04:46 +01:00
panni 661b0367f5 refiner: rename sz_meta_file to file_info_file 2017-12-22 03:44:18 +01:00
panni 01da0697a0 add __all__ 2017-12-22 03:42:58 +01:00
panni a3d3b670ae use libfilebot 2017-12-22 03:40:48 +01:00
panni 5c64a332f8 rename sz_meta to file_info 2017-12-22 03:39:01 +01:00
panni 6fcd9b645a add gitmodules 2017-12-22 03:33:30 +01:00
panni 78da16654a add libfilebot 2017-12-22 03:29:38 +01:00
panni da20d4882b use thread.sleep instead of time.sleep 2017-12-21 23:34:45 +01:00
panni 1f31c38d24 bump dev 2017-12-21 19:27:35 +01:00
panni 5f2fd9733b log refiner settings on validateprefs 2017-12-21 19:27:24 +01:00
panni 8a225b4e09 add sz_meta_file refiner 2017-12-21 19:10:44 +01:00
pannal af05b41937 Merge pull request #411 from mmgoodnow/develop-2.1
Fix for Sonarr/Radarr/Filebot combined mode
2017-12-21 02:38:25 +01:00
Michael Goodnow d618da457e Fix for Sonarr/Radarr/Filebot combined mode 2017-12-20 20:29:50 -05:00
panni d16bdad782 force str on enum? 2017-12-21 01:29:15 +01:00
panni f6d33e73a0 filebot: log actual found filename 2017-12-20 22:46:01 +01:00
panni 7b48e445f5 make xattr logging better 2017-12-20 22:39:38 +01:00
panni 2390f904bd support attr and filebot as fallback for getfattr on default systems 2017-12-20 22:30:12 +01:00
panni 3bee3631a3 fix the default encoding order for non-script-serbian 2017-12-20 18:55:38 +01:00
panni 9da0b2d3c1 filebot: correctly display traceback on error 2017-12-20 18:25:45 +01:00
panni 7a092e4585 bump dev 2017-12-20 17:56:12 +01:00
panni 196fb6b4f6 win32: fallback to native gzip implementation when storage file couldn't be read 2017-12-20 17:55:44 +01:00
panni 9507002961 allow usage of sonarr, radarr and filebot at the same time 2017-12-20 17:51:51 +01:00
panni 943ed38c2f correctly lock history storage; bump dev 2017-12-20 04:28:15 +01:00
panni 496619b492 HistoryStorage: don't fail on Language None values 2017-12-20 04:18:37 +01:00
panni 4772b42d64 adapt HistoryStorage to be more like SubtitleStorage 2017-12-20 04:05:10 +01:00
panni 5bc10953cc update dev 2017-12-20 03:07:29 +01:00
panni 18deca202d Merge branch 'gzip_crc_test' into develop-2.1
# Conflicts:
#	Contents/Libraries/Shared/subzero/subtitle_storage.py
2017-12-20 03:05:14 +01:00
panni 84bc4b018d create win32 specialcase for subtitle storage; use zlib directly instead of gzip 2017-12-19 15:01:16 +01:00
panni 1a0598a47a add portalocker 2017-12-19 14:41:52 +01:00
panni 973d117887 add portalocker 2017-12-19 14:37:24 +01:00
panni c284c8f336 use zlib directly 2017-12-19 14:31:41 +01:00
panni df69cbc84c flush with Z_FINISH in close() 2017-12-19 13:54:19 +01:00
panni 646453887f use w+b; use temp_fn 2017-12-19 13:22:34 +01:00
panni 189d617005 re-add temp fn handling; remove zero-seek 2017-12-19 13:21:06 +01:00
panni 554cd8bfe7 add one more exception layer; add debug messages 2017-12-19 05:07:31 +01:00
panni 79505dea20 only lock the current json file instead of every storage file 2017-12-19 05:05:36 +01:00
panni 5358a46b7e remove gzip crc stuff; try more f.seek/flush stuff for windows 2017-12-19 05:01:23 +01:00
panni aff1599ce7 don't fail on missing log path values 2017-12-16 20:17:47 +01:00
panni bc7df1c8a1 don't fail when an embedded stream has no language code set 2017-12-15 18:46:40 +01:00
panni f1df1d25a8 embedded subtitles: show stream title as well if available 2017-12-15 15:54:42 +01:00
panni 47d9b472ed re-enable subtitle storage creation on nothing-downloaded; re-enable atomic os.rename after writing to subtitle storage 2017-12-15 14:57:50 +01:00
panni 89ab8c34d8 submod: HI: HI_before_colon: remove redundant regex data 2017-12-15 01:56:56 +01:00
panni 600498f9c1 submod: HI: be smarter about HI_before_colon 2017-12-15 01:56:32 +01:00
panni 845fbcd2ac submod: HI: fix HI_before_colon 2017-12-15 01:53:25 +01:00
panni 3cc9f19b8f ignore CRC when reading GZIP file 2017-12-14 23:19:37 +01:00
panni e68c642005 fix saving of subtitles 2017-12-14 22:50:29 +01:00
panni 81ae950577 refiners: store scene_name inside video.original_name instead of overwriting video.name (which results in badly named subtitle files) 2017-12-14 22:37:53 +01:00
panni 62b4496cd6 refiners: finalize sonarr/radarr integration for now 2017-12-14 16:56:18 +01:00
panni 29b7292d15 refiners: integrate sonarr/radarr/filebot settings 2017-12-14 16:30:30 +01:00
panni 791058a2d2 config: if sonarr or radarr given, use tag search on osub as well 2017-12-14 16:24:14 +01:00
panni b6c108faef config: add media renaming settings, sonarr/radarr refiner settings; remove "provider.opensubtitles.use_tags" 2017-12-14 16:22:56 +01:00
panni 72d592866a refiners: drone: also set video.name to the newly found scene_name if found 2017-12-14 16:09:56 +01:00
panni 4052993246 refiners: drone: fill release_group if scene_name not available; use tvdb_id and imdb_id for matching if possible 2017-12-14 16:06:12 +01:00
panni a24f6e7789 refiners: filebot: enable win32 (duh) 2017-12-14 16:05:24 +01:00
panni 0d0fd49924 add fixme for releaseGroup 2017-12-14 04:04:38 +01:00
panni 139dcb409e refiners: drone: add radarr support (>=0.2.0.897) (WIP) 2017-12-14 03:45:48 +01:00
panni 707e6e7d13 Merge remote-tracking branch 'origin/develop-2.1' into develop-2.1
# Conflicts:
#	Contents/Libraries/Shared/subliminal_patch/refiners/filebot.py
2017-12-14 01:47:17 +01:00
panni 36abb29ddd add win32 support for filebot extended attributes 2017-12-14 01:46:51 +01:00
panni a700fe761e add win32 support for filebot extended attributes 2017-12-14 01:46:00 +01:00
panni 7577164471 Merge branch 'develop-2.0' into develop-2.1
# Conflicts:
#	Contents/Info.plist
2017-12-14 01:37:04 +01:00
panni 1bce743ea3 log python version on validateprefs 2017-12-13 22:59:52 +01:00
panni f85ab0364a more subtitle storage tests 2017-12-13 22:55:12 +01:00
pannal eb3a0d52fd Update README.md 2017-12-12 15:20:06 +01:00
panni b8cd295a12 submod: common: remove redundant interpunction
(cherry picked from commit d3ff49e)
2017-12-12 15:18:54 +01:00
panni d3ff49ee0c submod: common: remove redundant interpunction 2017-12-12 15:02:20 +01:00
panni d4833f1e6e Merge remote-tracking branch 'origin/master' 2017-12-12 13:13:44 +01:00
panni 548483ed2f back from dev 2017-12-12 13:13:31 +01:00
panni f6f39b97c8 release 2.0.33.1871 2017-12-12 13:13:03 +01:00
panni 21ea5e0df9 don't error on "unexpected termination" 2017-12-12 13:05:56 +01:00
panni 3cbab6a5c7 fix MPL2 newline parsing; add format info when converting subtitle format 2017-12-12 12:52:24 +01:00
panni f19f39ba16 add language to storage log message 2017-12-12 12:23:39 +01:00
panni b9c0fd9a1c use storage lock when saving, as well 2017-12-11 13:30:52 +01:00
panni ce520e6944 bump dev 2017-12-10 15:11:33 +01:00
panni 0ad62a95e2 add storage lock to circumvent race condition when reading a subtitle storage item 2017-12-10 14:44:01 +01:00
panni 8f62a69e06 add more info logging for subtitle storage 2017-12-10 14:32:06 +01:00
panni 34bbb98f7f add fixme 2017-12-10 03:53:47 +01:00
panni 26cd6bb955 simplify darwin xattr to lambda 2017-12-10 03:50:28 +01:00
panni 97534c633d add filebot support for OSX/darwin 2017-12-10 03:47:32 +01:00
panni 0a9a2963c2 Merge branch 'develop-2.0' into develop-2.1 2017-12-10 03:24:39 +01:00
panni 05afc39a35 remove own single_request method because it isn't used anymore 2017-12-09 14:52:34 +01:00
panni 84fdc1f55f possibly fix response handling 2017-12-09 06:04:22 +01:00
panni 3b03c3c2bb be smarter when removing crap from file/foldernames 2017-12-09 05:10:00 +01:00
panni 980f62686d add linux filebot refiner 2017-12-09 05:09:41 +01:00
panni 202f2532a6 Merge branch 'develop-2.0' into develop-2.1
# Conflicts:
#	Contents/Info.plist
2017-12-09 03:51:07 +01:00
panni 78d193a2fd reduce log spam 2017-12-09 03:11:57 +01:00
panni 0c109b0f27 submod: common: fix CM_starting_spacedots 2017-12-09 03:05:37 +01:00
panni e33c0ab86c normalize line endings; skip empty lines; 2017-12-09 02:50:27 +01:00
pannal 3a0189069d Update README.md 2017-12-03 03:18:10 +01:00
panni 2688bd9edd fix typo 2017-12-03 03:17:11 +01:00
panni 889f7bd2d7 back to dev 2017-12-03 03:15:03 +01:00
panni 0561c2d640 back from dev 2017-12-03 03:14:31 +01:00
panni b76f1ad004 Merge branch 'develop-2.0'
# Conflicts:
#	Contents/Info.plist
2017-12-03 03:14:15 +01:00
panni cde6153f64 2.0.33.1849 2017-12-03 03:13:36 +01:00
panni 12bdaa510b 2.0.33.1849 2017-12-03 03:13:06 +01:00
panni 0e6a4acf80 bump dev to 2.0.33.1849 2017-12-02 23:40:34 +01:00
panni e7785f7094 submod: do OCR fixes before HI; submod: OCR: fix broken HI tag colons 2017-12-02 23:39:13 +01:00
panni 2dcf39eff8 submod: OCR: fix more broken "Hey"'s; fix WholeWord handling at beginning or end of line or both 2017-12-02 23:38:26 +01:00
panni 1125c5c133 submod: common: remove "xxxx downloaded from yyyy" lines 2017-12-01 22:09:34 +01:00
panni faf7cedfe2 remove debug print 2017-11-27 22:57:42 +01:00
panni 52a6127625 add IETF fixme 2017-11-27 22:57:30 +01:00
panni b552f6f9fa more ietf stuff; keep the original country in an alpha3 mapping instead of storing it on the Language instance 2017-11-27 22:55:29 +01:00
panni 9b558fcce2 deduplicate languages on MissingSubtitles 2017-11-27 20:03:48 +01:00
panni c8eae6df6c compare stringified languages when determining missing ones 2017-11-27 20:00:38 +01:00
panni 5f50bd7095 compare stringified languages when determining missing ones 2017-11-27 19:57:53 +01:00
panni c8617218dc again 2017-11-26 16:37:52 +01:00
panni a8ceae993e create actual copies of Language instances before trying to modify them 2017-11-26 16:34:10 +01:00
panni a72a8854c9 use copy of lang list 2017-11-26 15:47:02 +01:00
panni dc658db9ba scan_video: ensure checking lowercase stream codec name 2017-11-26 05:48:49 +01:00
panni 8d8ecfe9e1 MissingSubtitles: remove obsolete var dec 2017-11-26 05:34:58 +01:00
panni 4b77e63857 MissingSubtitles: more 2017-11-26 05:33:49 +01:00
panni 19aa800324 MissingSubtitles: streamline 2017-11-26 05:29:23 +01:00
panni 85adb6b0e3 MissingSubtitles: honor treat undefined as first language properly 2017-11-26 05:25:05 +01:00
panni bd2523821d add TEXT_SUBTITLE_EXTS to config and use the variable 2017-11-26 05:19:59 +01:00
panni c1838a3c84 correctly skip unwanted subtitle extensions in MissingSubtitles 2017-11-26 05:14:59 +01:00
panni d836f8f5d0 remove plex_activity logging handler 2017-11-26 00:00:37 +01:00
panni 37491c134e bump dev 2017-11-25 23:56:11 +01:00
panni aa6efb7e5c fix detection of PMS media stream language codes 2017-11-25 23:55:40 +01:00
panni e4d990c06d use babelfish language matching 2017-11-25 19:29:22 +01:00
panni 01288afac0 potential fix for unmatched language 2017-11-25 19:07:56 +01:00
panni 579e3ca3ab potential fix for strptime threadpool error 2017-11-21 10:01:55 +01:00
pannal f61bc3ce7c Update README.md 2017-11-20 14:29:59 +01:00
panni cc6004e981 add vip affiliate link 2017-11-20 14:24:10 +01:00
panni 35eb037d05 bump dev 2017-11-19 03:32:33 +01:00
panni 1eb0e4419d bump dev 2017-11-19 03:31:57 +01:00
panni 7b5ca875dc Merge remote-tracking branch 'origin/develop-2.1' into develop-2.1 2017-11-19 03:31:40 +01:00
panni 2d22a6c383 Merge branch 'develop-2.0' into develop-2.1
# Conflicts:
#	Contents/Code/interface/item_details.py
#	Contents/Info.plist
#	Contents/Libraries/Shared/subliminal_patch/core.py
2017-11-19 03:31:27 +01:00
panni f4884f1c18 opensubtitles: try using previous token 2017-11-14 19:44:40 +01:00
panni 27cc3bd185 bump dev 2017-11-12 17:00:23 +01:00
panni 9b894c2ea7 add explicit force endpoint for item refresh 2017-11-12 16:57:33 +01:00
panni a341808873 #300 add recently played blacklist endpoints 2017-11-12 16:52:51 +01:00
panni 8927513f8e recently played: don't show anything but Movie and Episode items; increase list size to 40 (was 20) 2017-11-12 16:51:30 +01:00
panni 84436dfa94 #300 add optional language to blacklist_all endpoint 2017-11-12 16:29:40 +01:00
panni 2b73f633e0 #300 add blacklist_all endpoint for bookmarklet usage 2017-11-12 16:25:14 +01:00
panni 3d7a452141 fix #300 return empty dicts instead of None when in doubt 2017-11-12 04:12:02 +01:00
panni 38a8557311 update user agent list 2017-11-12 04:01:16 +01:00
panni 79672923c5 bump dev 2017-11-12 03:55:57 +01:00
panni 3842182a83 remove debug prints 2017-11-12 03:55:20 +01:00
panni 8b0d359e0b Merge remote-tracking branch 'origin/develop-2.0' into develop-2.0 2017-11-12 03:54:23 +01:00
panni db2903edfd #300 full subtitle blacklist integration 2017-11-12 03:54:13 +01:00
panni 18d22a72bd #300 basic subtitle blacklist menu/storage implementation 2017-11-12 02:00:37 +01:00
pannal 402cfc1632 Update README.md 2017-11-11 04:18:10 +01:00
panni 9dec7e4971 bump dev 2017-11-11 04:11:49 +01:00
panni 931c224247 submod: remove_tags: make non-default 2017-11-11 04:11:21 +01:00
panni f6ee6d4027 remove resolved fixme 2017-11-11 04:02:30 +01:00
panni 332d41fb25 add fixme 2017-11-11 04:00:28 +01:00
panni 8303af25fb add generic get_part function; add fixme 2017-11-11 03:33:02 +01:00
panni ee02bdb19a advanced menu: speed up batch mods 2017-11-11 03:18:40 +01:00
panni e674132d5a bump dev 2017-11-11 03:01:27 +01:00
panni c9eb8bc7be submod: OCR: en/hrv update OCR dicts 2017-11-11 02:59:33 +01:00
panni 2076a2c6d0 submod: OCR: en: fix more "I" = "L" occurrences 2017-11-11 02:55:16 +01:00
panni 32c0f09b16 submod: HI: be even more aggressive at handling brackets 2017-11-11 02:52:42 +01:00
panni 1264cabb3f submod: remove_tags: fix newlines 2017-11-11 02:49:02 +01:00
panni fb722d0581 opensubtitles: raise timeout to 10 seconds (was 4) 2017-11-11 02:22:18 +01:00
panni cb00ab9610 submod: make remove_tags configurable and a default mod 2017-11-11 01:53:01 +01:00
panni 4102a1c8fd submod: removetags: show in menu 2017-11-11 01:47:56 +01:00
panni af6d7a1ae2 update submod test and test.srt 2017-11-11 01:41:58 +01:00
panni 36cae6311a submod: add remove_tags modification 2017-11-11 01:41:49 +01:00
panni 327bb31daa submod: color: apply colors at the end of processing, fixing possible broken color tags 2017-11-11 01:41:29 +01:00
panni 8c2effe337 submod: add postprocessing mods 2017-11-11 01:40:35 +01:00
panni da59adddf4 submod: drop "file" reference after modifying 2017-11-11 01:38:53 +01:00
panni 6f3c806a21 fix adv_tag=None exception for external subtitles without advanced tag 2017-11-10 09:48:20 +01:00
panni 3d119bcd98 fix typo 2017-11-09 11:42:05 +01:00
panni 6264c21e23 fix #384 2017-11-09 11:39:49 +01:00
panni d5d6aa0bd5 add throttling between searches in download_best_subtitles 2017-11-09 11:32:01 +01:00
panni 7ad49fa65a opensubtitles: disable token reusage for now 2017-11-08 19:37:30 +01:00
panni 5b8dfb48c3 update dev 2017-11-08 19:30:05 +01:00
pannal 4d557be99a Update VIP server to new URL; don't log out automatically 2017-11-08 14:01:09 +01:00
panni a7e022c6f4 move VIP benefits note to VIP switch 2017-11-07 19:35:55 +01:00
panni fc3f5dad4f improve opensubtitles VIP server handling; set VIP to http by default for the time being 2017-11-07 19:34:53 +01:00
panni fa42669580 add opensubtitles VIP server handling 2017-11-06 19:20:17 +01:00
panni 0c73de726a log opensubtitles response headers; add headers to response object 2017-11-05 17:05:59 +01:00
panni ea87d21977 Merge branch 'develop-2.0' 2017-11-05 06:51:44 +01:00
panni a9e9e8cf44 debounce for 10 seconds 2017-11-05 06:41:50 +01:00
panni 9905cd307f add debug log 2017-11-05 06:37:10 +01:00
panni 92ea32b52c debounce main thread for 5 seconds 2017-11-05 06:32:49 +01:00
panni 4c56f7583a add 10 seconds timeout on multiple refreshes 2017-11-05 05:42:54 +01:00
panni fc3050ef3d add 10 seconds timeout on multiple refreshes 2017-11-05 05:41:36 +01:00
panni 29c63e11bd Merge branch 'master' into develop-2.0 2017-11-05 05:09:09 +01:00
panni 64cbe21f6e fix json 2017-11-05 05:08:52 +01:00
panni a56bb97d45 decrease retry amount; increase retry timeout from 1 to 10 seconds; increase retry download from 2 to 6 seconds; add OS VIP note; remove 1-3 hours missing subtitles scheduler options
(cherry picked from commit 6edc6a1)
2017-11-05 05:03:43 +01:00
panni 6edc6a1c6d decrease retry amount; increase retry timeout from 1 to 10 seconds; increase retry download from 2 to 6 seconds; add OS VIP note; remove 1-3 hours missing subtitles scheduler options 2017-11-05 04:59:51 +01:00
pannal 01c656ffb2 quote value not key 2017-11-05 03:44:33 +01:00
panni 078c6d0c21 back to dev 2017-11-05 03:37:46 +01:00
panni 580a8c0f3e update debug logging 2017-11-05 03:33:15 +01:00
panni f0258349bf default getattr to None 2017-11-05 03:32:23 +01:00
panni d9080eeb80 add doc 2017-11-05 03:30:54 +01:00
panni b504744876 cleanup 2017-11-05 03:29:34 +01:00
panni 638e8b5b47 #319 implement drone api client; implement first sonarr refiner proof of concept 2017-11-05 03:28:34 +01:00
panni 9b9c40f310 add sonarr integration settings 2017-11-05 02:27:01 +01:00
panni cc3a1db879 Merge branch '#290_extract_subtitles' into develop-2.1 2017-11-05 02:11:01 +01:00
panni a16312803e Merge branch 'develop-2.0' into develop-2.1
# Conflicts:
#	Contents/Info.plist
2017-11-05 02:10:54 +01:00
panni 206f9fa5ad release 2.0.29.1767 2017-11-04 23:48:52 +01:00
panni f20e97574a use code shortcut when extracting subtitles 2017-11-04 23:28:35 +01:00
panni 51764f0ce0 submod: global: fix paragraph as music sign
(cherry picked from commit 7da48b7)
2017-11-04 14:54:43 +01:00
panni e698b9d608 add more garbage names to remove-crap-from-filename in addition to scrambled/obfuscated
(cherry picked from commit e2a7cc6)
2017-11-04 14:53:21 +01:00
panni e2a7cc6b45 add more garbage names to remove-crap-from-filename in addition to scrambled/obfuscated 2017-11-04 14:52:18 +01:00
panni 6eaf307be9 further support for embedded-forced
(cherry picked from commit c3e7e33)
2017-11-04 14:47:43 +01:00
panni 9743af5db0 handle "embedded-forced"
(cherry picked from commit fca052b)
2017-11-04 14:47:39 +01:00
panni 07d02ad75e rename menu entries 2017-11-04 14:41:52 +01:00
panni 91f51a27af extract embedded subtitle with or without default mods 2017-11-04 14:40:01 +01:00
panni a60318260a display language list instead of embedded subtitles amount in menu 2017-11-04 14:35:22 +01:00
panni c3e7e336b5 further support for embedded-forced 2017-11-04 04:01:39 +01:00
panni 0b1037b497 add fixme for video speedup for cases where we don't need the actual parsed video data 2017-11-04 03:44:38 +01:00
panni 7da48b7dc5 submod: global: fix paragraph as music sign 2017-11-04 03:31:07 +01:00
panni 73bcfc6151 re-add debounce 2017-11-04 03:16:51 +01:00
panni dfe1a16aa0 suppress subprocess output 2017-11-04 03:11:18 +01:00
panni 4f0e685feb first proof of concept attempt of extracting embedded subtitles 2017-11-04 03:07:32 +01:00
panni fca052b308 handle "embedded-forced" 2017-11-04 01:24:27 +01:00
panni c449f42444 never auto-save on load_or_new by default 2017-11-03 22:55:56 +01:00
panni 5ec956943c save subtitle info to storage: don't immediately save in certain load_or_new cases 2017-11-03 22:52:24 +01:00
panni 1ad696be6d try fixing race condition when saving subtitle storage file by writing a tmp file first 2017-11-03 22:30:57 +01:00
panni 92b3b762b2 add fixme for findbetter: check filesystem for existence 2017-11-01 03:13:16 +01:00
panni 0b29a57079 back to dev 2017-11-01 02:42:54 +01:00
panni 0dee015181 release 2.0.29.1756 2017-11-01 02:42:22 +01:00
panni 2f1294a119 release 2.0.29.1756 2017-11-01 02:26:13 +01:00
panni e609e55710 Merge branch 'develop-2.0' 2017-11-01 02:24:00 +01:00
panni b752ce8572 bump dev 2017-10-31 04:04:11 +01:00
panni de59c68328 if ietf parts should be ignored, normalize them when searching and in missing subtitles menu 2017-10-31 04:03:39 +01:00
panni f92e78e8be correctly show languages with script or country in menus 2017-10-31 04:02:19 +01:00
panni 9abc611f1e separate IETF setting into display and actual normalization 2017-10-31 04:01:59 +01:00
panni 8e42f61a52 fix #377 2017-10-30 22:56:47 +01:00
panni 48fd3f977d clear missing subtitles menu data after manual subtitle download 2017-10-30 22:54:20 +01:00
panni 451636e0b3 clear missing subtitles menu data once SZ gets an update call 2017-10-30 22:53:09 +01:00
panni 1fc810470b missing subtitles menu: fix wrong bracket 2017-10-30 22:34:43 +01:00
panni 1c96efdafa missing subtitles menu: add alpha2 country to language if applicable 2017-10-30 21:54:31 +01:00
panni 8fb0711973 add fixme for ietf handling 2017-10-30 21:48:50 +01:00
panni aabb4f2c13 bump dev 2017-10-30 18:49:14 +01:00
panni eb1c5d976f #339 also ignore country part in existing subs; possible fix 2017-10-30 17:36:10 +01:00
panni fd89533903 #339 re-add previously ignored country attribute to languages after determining the missing ones 2017-10-30 17:07:26 +01:00
panni d5ec60f0f6 bump dev 2017-10-30 11:25:20 +01:00
panni 18b896ec0b Revert "add warning icon on missing permissions"
This reverts commit 0e4a936
2017-10-30 11:24:58 +01:00
panni af93e1edec Revert "add warning icon on missing permissions"
This reverts commit 0e4a936
2017-10-30 11:24:21 +01:00
panni a8a5b4ad16 #373 if forced not explicitly wanted, treat only forced subtitle existing as non-existant 2017-10-30 10:57:30 +01:00
panni 0d40883929 fix #354 2017-10-29 14:45:01 +01:00
panni 3b6645156d #339 don't modify config.lang_list, create a copy instead 2017-10-29 14:13:37 +01:00
panni 7596346fcd bump dev 2017-10-29 14:10:56 +01:00
panni 877ff60077 #339 fix "Treat IETF language tags as ISO 639-1" handling for embedded subtitles 2017-10-29 14:07:35 +01:00
panni 928da6e679 #339 circumvent VTT duplication 2017-10-29 13:39:45 +01:00
panni c1a9ccef3c bump dev 2017-10-28 04:08:39 +02:00
panni 5f41c85281 remove "highly suggested" note in prefs 2017-10-28 03:55:40 +02:00
panni 18ef38b90b fix #366; bail out earlier if necessary; add fixme; fix absolute dir handling 2017-10-28 03:40:22 +02:00
panni 7b155e6b31 fix #366; missing subtitles: check for actual subtitle existence 2017-10-28 03:35:39 +02:00
panni ba4d7b2199 bump dev 2017-10-28 02:49:24 +02:00
panni 869387af34 fix #366; missing subtitles: honor those we've already downloaded, even if external subtitles are ignored 2017-10-28 02:48:37 +02:00
panni 5b16a80730 add fixme 2017-10-28 02:26:24 +02:00
panni adf1190584 fix #373; even if external subtitles shouldn't be considered, don't re-download if already downloaded before (and existing) 2017-10-28 02:22:28 +02:00
panni 1c16cf5926 fix error detecting uppercase extensions 2017-10-25 10:25:34 +02:00
panni a833cf7b0b try to circumvent #367 2017-10-24 22:25:55 +02:00
panni 62a35e7ced submod: swe: add Ĺ to Å 2017-10-20 12:59:16 +02:00
panni 7b005760c1 emphasize more 2017-10-19 23:42:08 +02:00
panni b07631f0b5 rename scan settings to be more clear; reorder them 2017-10-19 23:39:52 +02:00
panni 595d8a8f53 add more debug info when json data couldn't be loaded
(cherry picked from commit 35321b0)
2017-10-17 14:59:23 +02:00
panni 35321b00cd add more debug info when json data couldn't be loaded 2017-10-17 04:06:48 +02:00
panni 8928f19818 back to dev 2017-10-16 19:08:04 +02:00
panni 76cc8fad47 release 2.0.26.1715 2017-10-16 19:07:40 +02:00
panni cb851d8519 update to DEV 2.0.26.1715 2017-10-16 18:51:00 +02:00
panni af0aff3aee Merge remote-tracking branch 'origin/master' into develop-2.0 2017-10-16 18:50:14 +02:00
pannal 6d4099c79c Merge pull request #360 from andreashoyer/patch-1
Update item_details.py
2017-10-16 18:49:28 +02:00
pannal d9672e179c Merge pull request #347 from raduc/patch-1
Update localmedia.py
2017-10-16 18:49:18 +02:00
panni 1e291343fe #362 don't fail on not existing item; don't call Plex twice for item info 2017-10-16 18:40:17 +02:00
panni a5d0bf68fd #362 don't fail on migration error 2017-10-16 18:36:17 +02:00
Andreas Høyer b8e2b524e1 Update item_details.py
There is a small issue in the Contents/Code/interface/item_details.py file line 279 it says

seen.append(current_id)

but it should be

seen.append(subtitle.id)

To add the currect subtitle id to the dic
2017-10-12 01:04:49 +02:00
panni 6abd062477 fix handling of missing audio_codec info 2017-09-28 17:19:10 +02:00
raduc fbcc2644bf Update localmedia.py
There is an issue with subtitle ignoring ext_match_strictness if a custom subtitle folder is defined. Some other people have noted it (https://www.reddit.com/r/PlexACD/comments/6ileio/has_anyone_of_you_found_a_way_to_have_subzero/djphdly/).
I looked at the code and the issue is: if adding a custom subtitle absolute path folder, global_folders will be true and if filename_matches_part is false, the flow will go through this if case:
if global_folders and not filename_matches_part:
but now if the matching file is not in a global folder skip_path is false and the flow will continue, though it should still check match strictness in the next code.
If we change elif to if all will be fine, files that are matching but not in global folders will still go through regular processing and use the strictness defined.
2017-09-24 14:10:23 -07:00
panni 34b05c8c17 reset default addic7ed boost to 19 (was 21) 2017-09-02 04:12:29 +02:00
panni e3dce02716 bump dev 2017-09-02 04:06:57 +02:00
panni ed8a70b5c8 Merge remote-tracking branch 'origin/master' into develop-2.0
# Conflicts:
#	Contents/Info.plist
2017-09-02 04:05:41 +02:00
panni 35944b0776 bump dev 2017-09-02 04:02:08 +02:00
panni 2f80ee5b39 titlovi: handle multiple release groups and format matching results 2017-09-02 03:58:21 +02:00
panni 280eb71ae4 submod: OCR fixes: swe: replace ĺ with å inside words 2017-09-02 01:51:17 +02:00
pannal 9462b1b175 Update README.md 2017-08-29 10:38:55 +02:00
pannal 874204838d add titlovi to readme 2017-08-23 15:45:09 +02:00
panni 0e4a936176 add warning icon on missing permissions 2017-08-22 04:24:37 +02:00
panni 5089708e2d update provider_test.sh 2017-08-20 05:42:48 +02:00
panni e17367aa13 back from dev 2017-08-20 04:07:42 +02:00
panni 26be0978ee release 2.0.26.1695 2017-08-20 04:06:50 +02:00
panni de1aea9dd2 low_impact: indicate low impact mode 2017-08-20 03:49:48 +02:00
panni 4c143be906 low_impact: don't use plex_part when entering list available subtitles 2017-08-20 03:41:29 +02:00
panni b83cea1073 low_impact: don't scan video file when entering list available subtitles 2017-08-20 03:29:48 +02:00
panni 2418b67089 add low impact mode for remotely mounted filesystems 2017-08-20 03:29:28 +02:00
panni 7e550cf916 changelog updated 2017-08-20 00:47:02 +02:00
panni dce72fcb08 release 2.0.26.1689 2017-08-20 00:43:10 +02:00
panni adede7bb2e submod: OCR: update eng and hrv OCR replace dictionaries; fix ". L am huge" 2017-08-20 00:42:11 +02:00
panni 377799ace3 release 2.0.26.1687 2017-08-20 00:31:53 +02:00
panni 02a822c630 titlovi: try selecting the correct subtitle inside a multi-file archive 2017-08-20 00:29:16 +02:00
panni 8101bca753 do that correctly. 2017-08-19 23:35:14 +02:00
panni 40e177ded0 clamp request timeout to 45 seconds max 2017-08-19 23:30:25 +02:00
panni 13f732d733 increase default PMS API request timeout to 15 (from 10); add preference for that 2017-08-19 23:21:30 +02:00
panni fbca4cbf8c bump dev 2017-08-19 15:19:24 +02:00
panni 45c8cd1536 titlovi: show release names in manual listing 2017-08-19 15:18:33 +02:00
panni da293bbc2f scheduler: forgot time.sleep for queue worker; fixes #337 2017-08-19 15:10:26 +02:00
panni 7991568d6d titlovi: disable for forced subs 2017-08-19 07:07:02 +02:00
panni 5fc1c8cbb1 "fix" provider_registry 2017-08-19 07:06:47 +02:00
panni 596981aca2 titlovi: fix stuff 2017-08-19 07:03:02 +02:00
panni 6d55197218 correctly remove subscenter 2017-08-19 04:19:39 +02:00
panni 85cb813a75 bump dev 2017-08-19 04:18:15 +02:00
panni 5f99319985 #320 adapt titlovi, first attempt 2017-08-19 04:15:55 +02:00
pannal f34c76eb90 Merge pull request #320 from viking1304/develop-2.0
New privider Titlovi.com
2017-08-19 04:06:47 +02:00
panni adb08aff75 #316 remove subscenter credentials 2017-08-19 04:01:39 +02:00
panni 93f8bf561b fix #329; re-implement old SARAM task as LegacySearchAllRecentlyMissing for first run 2017-08-19 03:59:22 +02:00
panni 52e391aa83 bump dev version 2017-08-19 02:19:54 +02:00
panni 751e9fc0c5 #335: change naming of find missing subtitles menu item 2017-08-19 00:35:59 +02:00
panni 77b0b9dc6b ftfy: unfix ft ligature 2017-08-18 17:00:57 +02:00
panni 5729552206 ftfy: fix ft ligature 2017-08-18 16:47:23 +02:00
panni 929f53ac13 ftfy: fix LIGATURES 2017-08-18 16:43:33 +02:00
viking1304 c6b983ea6c Merge pull request #1 from pannal/develop-2.0
Keeping a fork up-to-date
2017-08-15 20:47:25 +02:00
panni 419bee76e2 encodings: eastern europe: try windows-1250 first, then 8859-2; possibly fixes #333 2017-08-12 04:44:15 +02:00
panni 2f3180cc07 don't stop scheduler tasks on validateprefs 2017-08-10 11:18:58 +02:00
panni b5eb917e10 format/release_group detection: exit earlier 2017-08-09 16:38:37 +02:00
panni 9fed8d6335 inject our own guess_matches; fixes #325 #330 2017-08-09 16:35:51 +02:00
panni becbdba56e scheduler: clear queue after restart 2017-08-09 15:27:29 +02:00
panni 85b9373760 guessit: update to 2.1.4 2017-08-09 15:17:46 +02:00
panni c069541cee availablesubs: handle possible exception; add debug log 2017-08-09 14:52:14 +02:00
panni 4c0f20694d Merge branch 'windows_encoding_bug' into develop-2.0 2017-08-09 14:41:47 +02:00
panni a99175d46c podnapisi: fix decompose 2017-08-09 13:42:05 +02:00
panni 4bab9b9f5b addic7ed: fix suggestion.decompose 2017-08-09 12:58:46 +02:00
panni a5ea603116 scheduler: adjust logging 2017-08-09 12:56:00 +02:00
panni 8be6d9bd77 scheduler: separate queue and scheduler workers 2017-08-09 12:54:43 +02:00
panni 9a9043aa67 DownloadSubtitleMixin: fix usage of set_refresh_menu_state; SearchAllRecentlyAddedMissing: add debug note 2017-08-09 12:47:57 +02:00
panni 7ed58386e5 subscenter: disable provider for now 2017-08-07 19:02:49 +02:00
panni 51660449a8 legendastv: use single_value=True when calling guessit; fixes #330 2017-08-07 18:52:03 +02:00
panni af1a8d13f1 #328 add playback activities to disabled features warning 2017-08-06 01:24:03 +02:00
panni 8e13e6c181 #328 add warning if metadata folder resides in special characters-folder on windows 2017-08-06 01:22:38 +02:00
panni de915ba840 use SZProviderPool instead of SZAsyncProviderPool on windows with special characters in path; fixes #328 2017-08-06 01:17:42 +02:00
panni 834922aa35 don't fail on unavailable Network.PublicAddress 2017-08-06 00:54:49 +02:00
panni 2d4e67c268 remove support for Activities on windows with special chars in path; possibly fixes #328 2017-08-06 00:54:23 +02:00
panni 48a036a2bb subliminal: remove support for multiprocessing on windows with special chars in path; possibly fixes #328 2017-08-06 00:45:58 +02:00
panni 140fb72aeb ftfy.chardata: remove unicode_literals import; possibly fixes #328 2017-08-03 23:20:15 +02:00
panni 2d4c3790a6 babelfish: remove unicode_literals import; possibly fixes #328 2017-08-03 18:50:23 +02:00
panni 74860fe2ee catch errors that may happen in langprefs2/3 2017-08-03 09:57:30 +02:00
panni aab69705b6 reset language settings 2017-08-03 09:54:21 +02:00
panni d6c88621f6 rarfile: make exception handler broader; scheduler: set "running" correctly to false in clear_task_data 2017-08-01 19:24:37 +02:00
panni bd275601aa back to dev 2017-07-31 18:37:07 +02:00
viking1304 d6dd93b9d0 New privider Titlovi.com
Subtitles for movies and TV shows from Titlovi.com

Supported languages:
* English
* Bosnian
* Croatian
* Macedonian
* Serbian (Cyrlic)
* Serbian (Latin)
* Slovenian
2017-07-24 00:37:12 +02:00
panni 47d61bb83a back to dev 2017-07-02 16:04:16 +02:00
panni d5850afcc2 Merge branch 'master' into develop-2.1 2017-07-02 16:03:56 +02:00
panni 0c48b0799e Merge remote-tracking branch 'origin/develop-2.0' into develop-2.1
# Conflicts:
#	Contents/Info.plist
2017-07-02 16:03:50 +02:00
panni 96a8c33767 back to dev 2017-06-30 14:38:59 +02:00
402 changed files with 39174 additions and 59641 deletions
+2 -1
View File
@@ -55,4 +55,5 @@ docs/_build/
# pycharm
.idea
icon.psd
icon.psd
main-icon.psd
View File
+324
View File
@@ -1,4 +1,328 @@
2.5.4.2541
- core: try retrieving advanced_settings.json from the path given, which may be a file path or a directory
- menu: ignore options: fix plugin not responding, fix unicode strings; resolve #509
- providers: addic7ed: fix usage/adapt to new show search method
- providers: opensubtitles: properly handle responses again, re-enable automatic throttling based on those (broken since XMLRPC handler rewrite)
2.5.4.2527
- core: bugfixes
- core: get_item: don't fail on socket timeout; fixes #498
- core: fix scandir encoding errors; #453 #461 #441
- core: clamp menu history to 25 items
- add UnRAR for aarch64 (untested), arm (armv5tel, untested), linux/i386, MacOSX/i386; fixes #311
- add 3rd party licenses
- menu: new debounce/history mechanism; fixes the back button usage
- config: add custom path option for advanced_settings.json
- providers: opensubtitles: re-add support for throttling based on HTTP response codes, which got ditched due to new connection interface
- providers: legendastv: disable if unrar wasn't found
- providers: addic7ed: reduce show cache to 1 week
- advanced settings: sonarr/radarr: make ssl verification optional
- advanced settings: opensubtitles: add configurable connection timeout
- refiners: drone: use certifi for HTTPS connections
- tasks: SearchAllRecentlyAddedMissing: fix ZeroDivisionError in edgecases; fixes #496
2.5.3.2452
- core: update certifi to 2018.01.18
- core: metadata storage: only allow one subtitle per language
- core: metadata storage: only parse latest metadata subtitle in localmedia
- core: metadata storage: kill existing metadata subtitles explicitly upon storing a new one
- core: metadata storage: fix selecting current subtitle from menu
- providers: opensubtitles: use new requests based transport by default, finally fixes ResponseNotReady properly
- providers: opensubtitles: mask token in logs
- providers: don't check for hash validity if it isn't verifiable (fixes napiprojekt, #478)
- submod: common: extend non_word_only matching
- submod: common: reduce multi spaces to one
- submod: OCR: fix III'll=I'll
- advanced settings: add option to use HTTP instead of HTTPS for OpenSubtitles
2.5.3.2422
- core: don't fail on embedded subtitle streams without language code set, fixes #473
- providers: catch ResponseNotReady in list_subtitles_provider as well (partly fixes OpenSubtitles)
- providers: don't use retry logic in case of ResponseNotReady
- providers: addic7ed: use new search endpoint
2.5.3.2414
- core: expand user agent list
- core: update subliminal to 4ad5d31
- core: treat 23.976, 23.98, 24.0 fps as equal
- core: correctly skip blacklist entries when iterating through currently known subs
- core: fix unpacking of packs without asked-for-release-group
- core: fix embedded subtitle language detection; add debug log
- core: treat embedded subtitle containing "forced" in its title as forced
- core: improve embedded subtitles detection
- core: store extracted embedded forced subtitles with the "forced" suffix (e.g.: video.en.forced.srt)
- core: don't bother trying to extract embedded subtitle if transcoder wasn't found
- core: fix automatic extraction of unknown embedded subtitle streams
- core: skip immediately searching for new subtitle after successfully extracting embedded
- core: extract embedded ASS: don't transcode to SRT using ffmpeg (Plex Transcoder), do the transcoding later using pysubs2; fixes offset issues
- core: extract embedded: let ffmpeg auto convert mov_text/tx3g to srt
- core: fix transcoder detection; add fallback #460
- core: remove LD_LIBRARY_PATH from environment before calling notification executable
- core: auto extract embedded subtitles in a separate thread
- core: reduce encoding change log spam
- core: only allow one automatic extraction at a time; add optional advanced settings "auto_extract_multithread"
- core: add minimum score a subtitle has to have when considered by the find better subtitles task, when the current subtitle is an extracted embedded one; add advanced_settings entries
- core/config: automatic extraction: add config setting to indicate whether there should be an immediate search for available subtitles after extraction or not (default: off)
- core/menu/submod: add reverse_rtl modification for Hebrew; fixes #409
- core: scoring: assume title match on tvdb_id match
- tasks: search all recently added missing: fix attribute access on missing stored subtitle info
- providers: add hosszupuska (hungarian, thanks morpheus133 for the basic implementation)
- providers: add argenteam (spanish, thanks mmiraglia for the basic implementation)
- providers: addic7ed: use random user agent by default (enforce for existing configs)
- providers: enable subscene by default
- providers: opensubtitles: add fallback for dict based query response in contrast to list/array based
- advanced settings: make text-based-subtitle-formats configurable
- menu: submod: inverse-reverse subtitle timing time-choices for better accessibility
- submod: reduce log spam in case of debug logs enabled
- submod: style tags could result in no output at all
- submod: fix empty content if only non-line-mods were used, no line-mods; fixes #449
- submod: HI: correctly handle style tags when checking for brackets
- submod: HI: don't remove anything that's surrounded by quotes
- submod: HI: double or triple dash is em dash
- submod: HI: HI_before_colon_noncaps, don't assume single quotes are sentence enders
- submod: common: don't uppercase after abbreviations
- submod: common: don't break phone numbers (more than one spaced number pair found)
- submod: common: also count lines only consisting of dots as removable
- submod: common: replace more than 3 consecutive dots with 3 dots
- submod: OCR: "H i." = "Hi."
2.5.0.2287
- core: reduce main icon size
- core: fix usage on NVIDIA SHIELD (hopefully, please report back), #441
- core: add scandir fallback to listdir in case of badly configured locale in environment, #441, #440
- core: get subtitles from archive: don't assume an episode match
- core: get subtitles from archive: don't assume any attributes in guess
- core: improve release group detection for drone/filebot/file_info refiners
- core: fix language detection for embedded subtitle streams
- core: support extraction of embedded mov_text subtitles in mp4 video files
- refiners: drone: add http:// to url if not given
- providers: opensubtitles: retry/reinitialize request when encountering ResponseNotReady
- config: clarify subscene being only enabled for TV series by default
- menu: when encountering permission errors when scanning media files, warn in the menu about them
- submod: common: don't break -- addic7ed --
- submod: common: remove lines that consist only of dash, underscore
- submod: OCR: fix Ls = Is
- submod: OCR: fix bad HI colons (ANNOUNCER; instead of ANNOUNCER:)
- submod: common: fix lines consisting only of bad music symbols (*#¶ = ♪)
- submod: HI: remove music-symbol-only-lines
- submod: HI: be less aggressive about lines ending with a colon; please re-apply all your mods via advanced menu
- submod: OCR: fix it'sjust, isn'tjust, Iam, Ican
2.5.0.2247
- fix ignoring by-hash-matched episodes
2.5.0.2241
- fix issue when removing crap from filenames to not accidentally remove release group #436
- fix initialization of soft ignore list after upgrade fron 2.0
2.5.0.2221
- refiners: add support for retrieving original filename from
- drone derivates: sonarr, radarr
- filebot
- symlinks
- file_info meta file lists (see wiki)
- providers: add subscene (disabled by default to not flood subscene on release)
- normal search
- season pack search if season has concluded
- core: add provider subtitle-archive/pack cache for retrieving single subtitles from previously downloaded (season-) packs (subscene)
- core/agent: massive performance improvements over 2.0
- core/agent/background-tasks: reduce memory usage to a fraction of 2.0
- core/providers: add dynamic provider throttling when certain events occur (ServiceUnavailable, too many downloads, ...), to lighten the provider-load
- core/agent/config: automatically extract embedded subtitles (and use them if no current subtitle)
- core: fix internal subtitle info storage issues
- core: always store internal subtitle information even if no subtitle was downloaded (fixes SearchAllRecentlyAddedMissing)
- core: fix internal subtitle info storage on windows (gzip handling is broken there)
- core: don't fail on missing logfile paths
- core: fix default encoding order for non-script-serbian
- core: improve logging
- core: add AsRequested to cleanup garbage names
- core: treat SDTV and HDTV the same when searching for subtitles
- core: parse_video: trust PMS season and episode numbers
- core: parse_video: add series year information from PMS if none found
- core: upgrade dependencies
- core: update subliminal to 62cdb3c
- core: add new file based cache mechanism, rendering DBM/memory backends obsolete
- core: treat 23.980 fps as 23.976 and vice-versa
- core: add HTTP proxy support for querying the providers (supports credentials)
- core: only compute file hashes for enabled providers
- core: massive speedup; refine only when needed, exit early otherwise
- core: store last modified timestamp in subtitle info storage
- core: only write to subtitle info storage if we haven't had one or any subtitle was downloaded
- core: only clean up the sub-folder if a subtitle-sub-folder has been selected, and not the parent one also
- core: support for CP437 encoded filenames in ZIP-Archives
- core: use scandir library instead of os.listdir if possible, reducing performance-impact
- core: archives: support multi-episode subtitles (partly)
- core: subtitle cleanup: add support for hi, cc, sdh secondary filename tags; don't autoclean .txt
- core: increase request timeout by three times in case a proxy is being used
- core: fix language=Unknown in Plex when "Restrict to one language"-setting is set
- core: refining: re-add old detected title as alternative title after re-refining with plex metadata's title; fixes #428
- core: implement advanced_settings.json (see advanced_settings.json.template for reference, copy to "Plug-in Support/Data/com.plexapp.agents.subzero" to use it)
- core/tasks: fix search all recently added missing (the total number of items will change in the menu while running), reduces memory usage
- core/menu: add support for extracting embedded subtitles using the builtin plex transcoder
- core/menu: skip wrong season or episode in returned subtitle results
- core/config: fix language handling if treat undefined as first language is set
- providers: remove shooter.cn
- providers: add support for zip/rar archives containing more than one subtitle file
- submod: common: remove redundant interpunction ("Hello !!!" -> "Hello!")
- submod: skip provider hashing when applying mods
- submod: correctly drop empty line (fixing broken display)
- submod: OCR: fix F'xxxxx -> Fxxxxx
- submod: HI: improve bracket matching
- submod: OCR: fix l/L instead of I more aggressively
- submod: common: fix uppercase I's in lowercase words more aggressively
- submod: HI: improve HI_before_colon
- submod: common: be more aggressive when fixing numbers; correctly space out spaced ellipses; don't break spaced ellipses; handle multiple spaces in numbers
- menu: add support for extracting embedded subtitles for a whole season
- menu: add reapply mods to current subtitle
- menu: pad titles for more submenus, resulting in detail view in PlexWeb
- menu: add subtitle selection submenu (if multiple subtitles are inside the subtitle info storage; e.g. previously downloaded ones or extracted embedded)
- menu: advanced: add skip findbettersubtitles menu item, which sets the last_run to now (for debugging purposes)
- menu: ignore: add more natural title for seasons and episodes (kills your old ignore lists!)
- config: skip provider hashing on low impact mode
- config: add limit by air date setting to consider for FindBetterSubtitles task (default: 1 year)
- advanced settings: define enabled-for media types per provider
- advanced settings: define enabled-for languages per provider
- advanced settings: add deep-clean option (clean up the subtitle-sub-folder and the parent one)
2.0.33.1871
- core: normalize line endings in subtitles to LF (\n)
- core: add subtitle storage lock to avoid race condition
- core: be more verbose about subtitle storage addition
- core: fix MPL2 newline parsing, which resulted in broken subtitles
- core: encoding change: reduce log spam
- submod: common: fix CM_starting_spacedots
- opensubtitles: fix request/response handling
2.0.33.1849
- opensubtitles: add VIP server handling + preference; VIP benefits: 10€/year, ad-free subs, 1000 subs/day, no-cache VIP server, help SZ and subscribe via http://v.ht/osvip
- opensubtitles: try to reuse previous token instead of logging in every time
- core: add throttling between searches (10 seconds)
- core: fix IETF handling for good
- core: fix no subtitles being searched in certain situations (when an external subtitle without special tag exists)
- core: add subtitle blacklist
- core: fixes
- core: fix detection of certain PMS media stream language tags ("FR" for example)
- core: missing subtitles: correctly skip unwanted subtitle extensions
- core: missing subtitles: honor "treat undefined as first language" option correctly
- api: add blacklisting endpoints for quickly searching for new subtitls via bookmarklet
- submod: colors: apply color mods at the end of processing modifications; fix color mods
- submod: new remove_tags modification to remove all styling tags from subtitles
- submod: HI: be more aggressive at handling brackets
- submod: OCR: update en and hrv
- submod: common: remove "torrent downloaded from ..." lines
- submod: OCR: fix WholeWord handling, improving modification
- submod: apply OCR fixes before HI
- submod: OCR: fix broken HI tag colons (ANNOUNCER'. instead of ANNOUNCER:)
- menu: advanced: speed up batch modifications
- menu: add subtitle blacklist
- menu: recently played: show only TV episodes and movies (music tracks were listed here as well)
2.0.29.1767
- core: fix internal subtitle storage issues
- core: handle "embedded-forced" tag (futureproofing)
- core: remove more garbage tags from release groups (nzbgeek, chamele0n, buymore, xpost, postbot)
- submod: OCR fix: fix music icon = paragraph
2.0.29.1756
- core: don't fail on uppercase file extensions
- core: don't re-download a subtitle if we already downloaded one, it still physically exists and external subtitles are configured to be ignored
- core: fix VTT subtitle duplication
- core: if forced subtitles not explicitly wanted, ignore existing forced subtitles when searching
- core: add full IETF language support for `Treat languages with country attribute as ISO 639-1 (e.g. don't download pt-BR if pt subtitle exists)`-setting for embedded subtitles
- menu: remove buggy dynamic permission-based channel icon introduced in 1715
- menu: improve `Items with missing subtitles` menu usage and item display
- menu: `Advanced -> Get my logs` handle custom domains without port
- menu: correctly show country/script part of languages with such attributes (e.g. pt-BR)
- config: rename `Scan:` settings; make them better understandable and translatable
- config: rephrase IETF options as "languages with country attribute" (e.g. pt-BR)
- config: separate IETF options into how to display languages with country attribute and how they should be handled when searching/scanning (e.g. pt-BR)
- config: `Scheduler: Item age to be considered recent` now can go up to 12 weeks
- config: `Scheduler: Periodically search for recent items with missing subtitles` added `every 2 hours`
- submod: swe: add Ĺ to Å
2.0.26.1715
- core: submod: OCR fixes: swe: replace ĺ with å inside words
- core: fix handling of non-existant PMS audio_codec info
- core: filename matching ignored the strictness setting in certain global directory configurations (thanks @raduc)
- core: don't fail on migration errors
- provider titlovi: handle multiple subtitles per archive
- provider addic7ed: reset default boost to 19 (was 21)
- menu: add warning icon on missing permissions
- menu: manual subtitle list sometimes listed duplicates (thanks @andreashoyer)
- menu: don't request PMS metadata in item details menu twice
- menu: don't fail badly on non existant PMS metadata in item details menu
2.0.26.1695
## ATTENTION: THIS RELEASE RESETS YOUR CONFIGURED LANGUAGES TO DEFAULT!
- core: fix bug that caused SZ not to work for Windows users with special characters in their username
- core: fix issues when logging failed manual download actions
- core: update guessit to 2.1.4
- core: fix issue causing the background task scheduler to stop after changing preferences
- core: fix polish encoding (try windows-1250 first, then iso 8859-2)
- core: remove subscenter provider as it now uses captchas
- core: add titlovi as default provider (thanks viking!)
- core: increase default PMS API request timeout to 15 (old: 10, max: 45); add preference for that
- core: re-add separate legacy FindMissingSubtitles task and run it on the first run to prime SZ's internal subtitle storage
- core: add "low impact mode" for people with remote filesystems (currently enabled for List LANGUAGE subtitles in detail menu); alleviates certain plexweb timeout issues
- menu: change naming of find missing subtitles menu item
- legendastv: fix multi value guessit issues
- submod: OCR: update eng and hrv OCR replace dictionaries; fix ". L am huge"
2.0.25.1635
- core: update memory handling, possibly reduce memory problems of 2.0
- core: support for MPL2 subtitle format
- core: update task handling
- core: re-enable NVIDIA SHIELD support by fixing rarfile behaviour
- core: add SZ_UNRAR_TOOL environment variable for custom unrar location
- core: disable SZ when no providers are enabled
- core: only start activity monitor if channel or agent are enabled
- core: improve custom provider integration
- core: update eastern european encoding detection (especially Romanian)
- tasks: reduce provider stress by introducing wait times between searches/downloads
- windows: correctly ship UnRAR.exe
- windows: skip DBM checks
- addic7ed: fix Nip/Tuck
- subscenter: use new domain
2.0.24.1581
- legendastv: ship unrar.exe for Windows users (fixes unrar issues)
- addic7ed: fix TooManyRequests error
- submod: OCR fixes NL: add custom dictionary data for malformed characters
- submod: OCR fixes: update hrv/NL dictionaries
- submod: common: remove spaces before punctuation
- podnapisi: now returns more subtitles again
ATTENTION: Sub-Zero is still broken on PMS for SHIELD. Help needed!
2.0.24.1565
- core: fix searchallrecentlymissing task erroring if item not found
- core: fix non-plex-items appearing in and crashing the recently played list
+121 -23
View File
@@ -1,12 +1,11 @@
# coding=utf-8
import sys
import datetime
import os
from subzero.sandbox import restore_builtins
from subzero.sandbox import fix_environment_stuff
module = sys.modules['__main__']
restore_builtins(module, {})
fix_environment_stuff(module, {})
globals = getattr(module, "__builtins__")["globals"]
for key, value in getattr(module, "__builtins__").iteritems():
@@ -24,8 +23,9 @@ sys.modules["interface"] = interface
from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
from interface.menu import *
from support.plex_media import media_to_videos, get_media_item_ids, scan_videos
from support.storage import save_subtitles, store_subtitle_info
from support.plex_media import media_to_videos, get_media_item_ids
from support.scanning import scan_videos
from support.storage import save_subtitles, store_subtitle_info, get_subtitle_storage
from support.items import is_ignored
from support.config import config
from support.lib import get_intent
@@ -46,6 +46,8 @@ def Start():
intent = get_intent()
intent.cleanup()
#Locale.DefaultLocale = "de"
# clear expired menu history items
now = datetime.datetime.now()
if "menu_history" in Dict:
@@ -114,12 +116,52 @@ def update_local_media(metadata, media, media_type="movies"):
pass
def agent_extract_embedded(video_part_map):
try:
subtitle_storage = get_subtitle_storage()
to_extract = []
item_count = 0
for scanned_video, part_info in video_part_map.iteritems():
plexapi_item = scanned_video.plexapi_metadata["item"]
stored_subs = subtitle_storage.load_or_new(plexapi_item)
for plexapi_part in get_all_parts(plexapi_item):
item_count = item_count + 1
for requested_language in config.lang_list:
embedded_subs = stored_subs.get_by_provider(plexapi_part.id, requested_language, "embedded")
current = stored_subs.get_any(plexapi_part.id, requested_language)
if not embedded_subs:
stream_data = get_embedded_subtitle_streams(plexapi_part, requested_language=requested_language,
get_forced=config.forced_only)
if stream_data:
stream = stream_data[0]["stream"]
to_extract.append(({scanned_video: part_info}, plexapi_part, str(stream.index),
str(requested_language), not current))
if not cast_bool(Prefs["subtitles.search_after_autoextract"]):
scanned_video.subtitle_languages.update({requested_language})
else:
Log.Debug("Skipping embedded subtitle extraction for %s, already got %r from %s",
plexapi_item.rating_key, requested_language, embedded_subs[0].id)
if to_extract:
Log.Info("Triggering extraction of %d embedded subtitles of %d items", len(to_extract), item_count)
Thread.Create(multi_extract_embedded, stream_list=to_extract, refresh=True, with_mods=True,
single_thread=not config.advanced.auto_extract_multithread)
except:
Log.Error("Something went wrong when auto-extracting subtitles, continuing: %s", traceback.format_exc())
class SubZeroAgent(object):
agent_type = None
agent_type_verbose = None
languages = [Locale.Language.English]
primary_provider = False
score_prefs_key = None
debounce = 10
def __init__(self, *args, **kwargs):
super(SubZeroAgent, self).__init__(*args, **kwargs)
@@ -130,7 +172,14 @@ class SubZeroAgent(object):
Log.Debug("Sub-Zero %s, %s search" % (config.version, self.agent_type))
results.Append(MetadataSearchResult(id='null', score=100))
def store_blank_subtitle_metadata(self, video_part_map):
store_subtitle_info(video_part_map, dict((k, []) for k in video_part_map.keys()), None, mode="a")
def update(self, metadata, media, lang):
if not config.enable_agent:
Log.Debug("Skipping Sub-Zero agent(s)")
return
Log.Debug("Sub-Zero %s, %s update called" % (config.version, self.agent_type))
intent = get_intent()
@@ -167,36 +216,81 @@ class SubZeroAgent(object):
set_refresh_menu_state(media, media_type=self.agent_type)
# scanned_video_part_map = {subliminal.Video: plex_part, ...}
scanned_video_part_map = scan_videos(videos, kind=self.agent_type)
providers = config.get_providers(media_type=self.agent_type)
try:
scanned_video_part_map = scan_videos(videos, providers=providers)
except IOError, e:
Log.Exception("Permission error, please check your folder/file permissions. Exiting.")
if cast_bool(Prefs["check_permissions"]):
config.permissions_ok = False
config.missing_permissions = e.message
return
# auto extract embedded
if config.embedded_auto_extract:
if config.plex_transcoder:
agent_extract_embedded(scanned_video_part_map)
else:
Log.Warning("Plex Transcoder not found, can't auto extract")
# clear missing subtitles menu data
if not scheduler.is_task_running("MissingSubtitles"):
scheduler.clear_task_data("MissingSubtitles")
downloaded_subtitles = None
if not config.enable_agent:
Log.Debug("Skipping Sub-Zero agent(s)")
else:
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score)
item_ids = get_media_item_ids(media, kind=self.agent_type)
# debounce for self.debounce seconds
now = datetime.datetime.now()
if "last_call" in Dict:
last_call = Dict["last_call"]
if last_call + datetime.timedelta(seconds=self.debounce) > now:
wait = self.debounce - (now - last_call).seconds
if wait >= 1:
Log.Debug("Waiting %s seconds until continuing", wait)
Thread.Sleep(wait)
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
try:
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score,
throttle_time=self.debounce, providers=providers)
except:
Log.Exception("Something went wrong when downloading subtitles")
if downloaded_subtitles is not None:
Dict["last_call"] = datetime.datetime.now()
item_ids = get_media_item_ids(media, kind=self.agent_type)
downloaded_any = False
if downloaded_subtitles:
downloaded_any = any(downloaded_subtitles.values())
if downloaded_any:
save_subtitles(scanned_video_part_map, downloaded_subtitles, mods=config.default_mods)
save_successful = False
try:
save_successful = save_subtitles(scanned_video_part_map, downloaded_subtitles,
mods=config.default_mods)
except:
Log.Exception("Something went wrong when saving subtitles")
track_usage("Subtitle", "refreshed", "download", 1)
for video, video_subtitles in downloaded_subtitles.items():
# store item(s) in history
for subtitle in video_subtitles:
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
history = get_history()
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
subtitle=subtitle)
# store SZ meta info even if download wasn't successful
if not save_successful:
self.store_blank_subtitle_metadata(scanned_video_part_map)
else:
for video, video_subtitles in downloaded_subtitles.items():
# store item(s) in history
for subtitle in video_subtitles:
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
history = get_history()
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
subtitle=subtitle)
history.destroy()
else:
# store subtitle info even if we've downloaded none
store_subtitle_info(scanned_video_part_map, dict((k, []) for k in scanned_video_part_map.keys()),
None, mode="a")
# store SZ meta info even if we've downloaded none
self.store_blank_subtitle_metadata(scanned_video_part_map)
update_local_media(metadata, media, media_type=self.agent_type)
@@ -213,6 +307,10 @@ class SubZeroAgent(object):
Dict.Save()
# fsync cache
if config.new_style_cache:
config.sync_cache()
class SubZeroSubtitlesAgentMovies(SubZeroAgent, Agent.Movies):
contributes_to = ['com.plexapp.agents.imdb', 'com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.themoviedb', 'com.plexapp.agents.hama']
+153 -60
View File
@@ -8,7 +8,7 @@ import urlparse
from zipfile import ZipFile, ZIP_DEFLATED
from babelfish import Language
from subzero.language import Language
from subzero.lib.io import FileIO
from subzero.constants import PREFIX, PLUGIN_IDENTIFIER
@@ -20,74 +20,98 @@ from support.lib import Plex
from support.storage import reset_storage, log_storage, get_subtitle_storage
from support.scheduler import scheduler
from support.items import set_mods_for_part, get_item_kind_from_rating_key
from support.i18n import _
@route(PREFIX + '/advanced')
def AdvancedMenu(randomize=None, header=None, message=None):
oc = SubFolderObjectContainer(header=header or "Internal stuff, pay attention!", message=message, no_cache=True,
no_history=True,
replace_parent=False, title2="Advanced")
oc = SubFolderObjectContainer(
header=header or _("Internal stuff, pay attention!"),
message=message,
no_cache=True,
no_history=True,
replace_parent=False,
title2=_("Advanced"))
if config.lock_advanced_menu and not config.pin_correct:
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp(), success_go_to="advanced"),
title=pad_title("Enter PIN"),
summary="The owner has restricted the access to this menu. Please enter the correct pin",
key=Callback(
PinMenu,
randomize=timestamp(),
success_go_to=_("advanced")),
title=pad_title(_("Enter PIN")),
summary=_("The owner has restricted the access to this menu. Please enter the correct pin"),
))
return oc
oc.add(DirectoryObject(
key=Callback(TriggerRestart, randomize=timestamp()),
title=pad_title("Restart the plugin"),
title=pad_title(_("Restart the plugin")),
))
oc.add(DirectoryObject(
key=Callback(GetLogsLink),
title="Get my logs (copy the appearing link and open it in your browser, please)",
summary="Copy the appearing link and open it in your browser, please",
title=_("Get my logs (copy the appearing link and open it in your browser, please)"),
summary=_("Copy the appearing link and open it in your browser, please"),
))
oc.add(DirectoryObject(
key=Callback(TriggerBetterSubtitles, randomize=timestamp()),
title=pad_title("Trigger find better subtitles"),
title=pad_title(_("Trigger find better subtitles")),
))
oc.add(DirectoryObject(
key=Callback(SkipFindBetterSubtitles, randomize=timestamp()),
title=pad_title(_("Skip next find better subtitles (sets last run to now)")),
))
oc.add(DirectoryObject(
key=Callback(TriggerStorageMaintenance, randomize=timestamp()),
title=pad_title("Trigger subtitle storage maintenance"),
title=pad_title(_("Trigger subtitle storage maintenance")),
))
oc.add(DirectoryObject(
key=Callback(TriggerStorageMigration, randomize=timestamp()),
title=pad_title("Trigger subtitle storage migration (expensive)"),
title=pad_title(_("Trigger subtitle storage migration (expensive)")),
))
oc.add(DirectoryObject(
key=Callback(TriggerCacheMaintenance, randomize=timestamp()),
title=pad_title(_("Trigger cache maintenance (refiners, providers and packs/archives)")),
))
oc.add(DirectoryObject(
key=Callback(ApplyDefaultMods, randomize=timestamp()),
title=pad_title("Apply configured default subtitle mods to all (active) stored subtitles"),
title=pad_title(_("Apply configured default subtitle mods to all (active) stored subtitles")),
))
oc.add(DirectoryObject(
key=Callback(ReApplyMods, randomize=timestamp()),
title=pad_title("Re-Apply mods of all stored subtitles"),
title=pad_title(_("Re-Apply mods of all stored subtitles")),
))
oc.add(DirectoryObject(
key=Callback(LogStorage, key="tasks", randomize=timestamp()),
title=pad_title("Log the plugin's scheduled tasks state storage"),
title=pad_title(_("Log the plugin's scheduled tasks state storage")),
))
oc.add(DirectoryObject(
key=Callback(LogStorage, key="ignore", randomize=timestamp()),
title=pad_title("Log the plugin's internal ignorelist storage"),
title=pad_title(_("Log the plugin's internal ignorelist storage")),
))
oc.add(DirectoryObject(
key=Callback(LogStorage, key=None, randomize=timestamp()),
title=pad_title("Log the plugin's complete state storage"),
title=pad_title(_("Log the plugin's complete state storage")),
))
oc.add(DirectoryObject(
key=Callback(ResetStorage, key="tasks", randomize=timestamp()),
title=pad_title("Reset the plugin's scheduled tasks state storage"),
title=pad_title(_("Reset the plugin's scheduled tasks state storage")),
))
oc.add(DirectoryObject(
key=Callback(ResetStorage, key="ignore", randomize=timestamp()),
title=pad_title("Reset the plugin's internal ignorelist storage"),
title=pad_title(_("Reset the plugin's internal ignorelist storage")),
))
oc.add(DirectoryObject(
key=Callback(ResetStorage, key="menu_history", randomize=timestamp()),
title=pad_title("Reset the plugin's menu history storage"),
))
oc.add(DirectoryObject(
key=Callback(InvalidateCache, randomize=timestamp()),
title=pad_title("Invalidate Sub-Zero metadata caches (subliminal)"),
title=pad_title(_("Invalidate Sub-Zero metadata caches (subliminal)")),
))
oc.add(DirectoryObject(
key=Callback(ResetProviderThrottle, randomize=timestamp()),
title=pad_title(_("Reset provider throttle states")),
))
return oc
@@ -99,15 +123,20 @@ def DispatchRestart():
@route(PREFIX + '/advanced/restart/trigger')
@debounce
def TriggerRestart(randomize=None):
set_refresh_menu_state("Restarting the plugin")
set_refresh_menu_state(_("Restarting the plugin"))
DispatchRestart()
return fatality(header="Restart triggered, please wait about 5 seconds", force_title=" ", only_refresh=True,
replace_parent=True,
no_history=True, randomize=timestamp())
return fatality(
header=_("Restart triggered, please wait about 5 seconds"),
force_title=" ",
only_refresh=True,
replace_parent=True,
no_history=True,
randomize=timestamp())
@route(PREFIX + '/advanced/restart/execute')
def Restart():
@debounce
def Restart(randomize=None):
Plex[":/plugins"].restart(PLUGIN_IDENTIFIER)
@@ -115,10 +144,17 @@ def Restart():
@debounce
def ResetStorage(key, randomize=None, sure=False):
if not sure:
oc = SubFolderObjectContainer(no_history=True, title1="Reset subtitle storage", title2="Are you sure?")
oc = SubFolderObjectContainer(
no_history=True,
title1=_("Reset subtitle storage"),
title2=_("Are you sure?"))
oc.add(DirectoryObject(
key=Callback(ResetStorage, key=key, sure=True, randomize=timestamp()),
title=pad_title("Are you really sure?"),
key=Callback(
ResetStorage,
key=key,
sure=True,
randomize=timestamp()),
title=pad_title(_("Are you really sure?")),
))
return oc
@@ -132,8 +168,8 @@ def ResetStorage(key, randomize=None, sure=False):
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='Information Storage (%s) reset' % key
header=_("Success"),
message=_("Information Storage (%s) reset", key)
)
@@ -142,8 +178,8 @@ def LogStorage(key, randomize=None):
log_storage(key)
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='Information Storage (%s) logged' % key
header=_("Success"),
message=_("Information Storage (%s) logged", key)
)
@@ -153,8 +189,21 @@ def TriggerBetterSubtitles(randomize=None):
scheduler.dispatch_task("FindBetterSubtitles")
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='FindBetterSubtitles triggered'
header=_("Success"),
message=_("FindBetterSubtitles triggered")
)
@route(PREFIX + '/skipbetter')
@debounce
def SkipFindBetterSubtitles(randomize=None):
task = scheduler.task("FindBetterSubtitles")
task.last_run = datetime.datetime.now()
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("FindBetterSubtitles skipped")
)
@@ -164,8 +213,8 @@ def TriggerStorageMaintenance(randomize=None):
scheduler.dispatch_task("SubtitleStorageMaintenance")
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='SubtitleStorageMaintenance triggered'
header=_("Success"),
message=_("SubtitleStorageMaintenance triggered")
)
@@ -175,8 +224,19 @@ def TriggerStorageMigration(randomize=None):
scheduler.dispatch_task("MigrateSubtitleStorage")
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='MigrateSubtitleStorage triggered'
header=_("Success"),
message=_("MigrateSubtitleStorage triggered")
)
@route(PREFIX + '/triggercachemaintenance')
@debounce
def TriggerCacheMaintenance(randomize=None):
scheduler.dispatch_task("CacheMaintenance")
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("TriggerCacheMaintenance triggered")
)
@@ -228,8 +288,8 @@ def ApplyDefaultMods(randomize=None):
Thread.CreateTimer(1.0, apply_default_mods)
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='This may take some time ...'
header=_("Success"),
message=_("This may take some time ...")
)
@@ -239,17 +299,20 @@ def ReApplyMods(randomize=None):
Thread.CreateTimer(1.0, apply_default_mods, reapply_current=True)
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='This may take some time ...'
header=_("Success"),
message=_("This may take some time ...")
)
@route(PREFIX + '/get_logs_link')
def GetLogsLink():
if not config.plex_token:
oc = ObjectContainer(title2="Download Logs", no_cache=True, no_history=True,
header="Sorry, feature unavailable",
message="Universal Plex token not available")
oc = ObjectContainer(
title2=_("Download Logs"),
no_cache=True,
no_history=True,
header=_("Sorry, feature unavailable"),
message=_("Universal Plex token not available"))
return oc
# try getting the link base via the request in context, first, otherwise use the public ip
@@ -264,7 +327,7 @@ def GetLogsLink():
elif "Referer" in req_headers:
parsed = urlparse.urlparse(req_headers["Referer"])
link_base = "%s://%s:%s" % (parsed.scheme, parsed.hostname, parsed.port)
link_base = "%s://%s%s" % (parsed.scheme, parsed.hostname, (":%s" % parsed.port) if parsed.port else "")
Log.Debug("Using referer-based link_base")
get_external_ip = False
@@ -274,9 +337,12 @@ def GetLogsLink():
Log.Debug("Using ip-based fallback link_base")
logs_link = "%s%s?X-Plex-Token=%s" % (link_base, PREFIX + '/logs', config.plex_token)
oc = ObjectContainer(title2=logs_link, no_cache=True, no_history=True,
header="Copy this link and open this in your browser, please",
message=logs_link)
oc = ObjectContainer(
title2=logs_link,
no_cache=True,
no_history=True,
header=_("Copy this link and open this in your browser, please"),
message=logs_link)
return oc
@@ -300,35 +366,51 @@ def DownloadLogs():
@debounce
def InvalidateCache(randomize=None):
from subliminal.cache import region
region.invalidate()
if config.new_style_cache:
region.backend.clear()
else:
region.invalidate()
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='Cache invalidated'
header=_("Success"),
message=_("Cache invalidated")
)
@route(PREFIX + '/pin')
def PinMenu(pin="", randomize=None, success_go_to="channel"):
oc = ObjectContainer(title2="Enter PIN number %s" % (len(pin) + 1), no_cache=True, no_history=True,
skip_pin_lock=True)
oc = ObjectContainer(
title2=_("Enter PIN number ") + str(len(pin) + 1),
no_cache=True,
no_history=True,
skip_pin_lock=True)
if pin == config.pin:
Dict["pin_correct_time"] = datetime.datetime.now()
config.locked = False
if success_go_to == "channel":
return fatality(force_title="PIN correct", header="PIN correct", no_history=True)
return fatality(
force_title=_("PIN correct"),
header=_("PIN correct"),
no_history=True)
elif success_go_to == "advanced":
return AdvancedMenu(randomize=timestamp())
for i in range(10):
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp(), pin=pin + str(i), success_go_to=success_go_to),
key=Callback(
PinMenu,
randomize=timestamp(),
pin=pin + str(i),
success_go_to=success_go_to),
title=pad_title(str(i)),
))
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp(), success_go_to=success_go_to),
title=pad_title("Reset"),
key=Callback(
PinMenu,
randomize=timestamp(),
success_go_to=success_go_to),
title=pad_title(_("Reset")),
))
return oc
@@ -337,4 +419,15 @@ def PinMenu(pin="", randomize=None, success_go_to="channel"):
def ClearPin(randomize=None):
Dict["pin_correct_time"] = None
config.locked = True
return fatality(force_title="Menu locked", header=" ", no_history=True)
return fatality(force_title=_("Menu locked"), header=" ", no_history=True)
@route(PREFIX + '/reset_throttle')
def ResetProviderThrottle(randomize=None):
Dict["provider_throttle"] = {}
Dict.Save()
return AdvancedMenu(
randomize=timestamp(),
header=_("Success"),
message=_("Provider throttles reset")
)
+158
View File
@@ -1,6 +1,10 @@
# coding=utf-8
import datetime
import operator
from support.config import config
from support.helpers import timestamp
def enable_channel_wrapper(func):
@@ -25,3 +29,157 @@ def enable_channel_wrapper(func):
return (func if (config.enable_channel or enforce_route) else noop)(*args, **kwargs)
return wrap
ROUTE_REGISTRY = {}
def get_func_name(args):
return list(args).pop(0).__name__
def get_lookup_key(f, args, kwargs):
return tuple([f.__name__, tuple(args), tuple([(key, value) for key, value in kwargs.iteritems()])])
def should_debounce(f, key, kw):
return getattr(f, "debounce", False) and "randomize" in kw and key in Dict["menu_history"]
def register_route_function(f):
fn = f.__name__
if fn != "ValidatePrefs" and fn not in ROUTE_REGISTRY:
ROUTE_REGISTRY[fn] = f
return f
def main_menu_fallback():
key = get_lookup_key(ROUTE_REGISTRY["fatality"], [], {})
Dict["last_menu_item"] = key
add_to_menu_history(key)
return ROUTE_REGISTRY["fatality"](randomize=timestamp())
def add_to_menu_history(key):
# add function to menu history
mh = Dict["menu_history"]
if key in mh:
del mh[key]
mh[key] = datetime.datetime.now() + datetime.timedelta(hours=6)
# limit to 25 items
Dict["menu_history"] = dict(sorted(sorted(mh.items(), key=operator.itemgetter(1),
reverse=True)[:25]))
try:
Dict.Save()
except TypeError:
Log.Error("Can't save menu history for: %r", key)
del Dict["menu_history"][key]
def route_wrapper(*args, **kwargs):
def wrap(f):
already_wrapped = getattr(f, "orig_f", False)
register_route_function(f)
def inner(*a, **kw):
if "menu_history" not in Dict:
Dict["menu_history"] = {}
if "last_menu_item" not in Dict:
Dict["last_menu_item"] = None
key = get_lookup_key(f, list(a), kw)
ret_f = f
ret_a = a
ret_kw = kw
# mh = Dict["menu_history"]
# mh_keys = [k for k, v in sorted(mh.items(), key=operator.itemgetter(1))]
#
# fallback_needed = False
# fallback_found = False
if should_debounce(ret_f, key, kw):
# special case for TriggerRestart
if ret_f.__name__ in ("TriggerRestart", "Restart"):
Log.Debug("Don't trigger a re-restart, falling back to main menu")
else:
Log.Debug("not triggering %s twice with %s, %s, returning to main menu" %
(f.__name__, a, kw))
return main_menu_fallback()
#
# fallback_needed = True
#
# # try to find a suitable fallback route in case we've encountered an already visited
# # debounced route
# fallbacks = []
# current_last_visit = mh[key]
# last_menu_item = Dict["last_menu_item"]
# direction_backwards = True
#
# if last_menu_item and last_menu_item in mh and key in mh:
# last_mi_pos = mh_keys.index(last_menu_item)
# current_mi_pos = mh_keys.index(key)
# if current_mi_pos > -1 and last_mi_pos > -1:
# print "SHEKEL", current_mi_pos, last_mi_pos, current_mi_pos < last_mi_pos
# only consider items in menu history that have an older timestamp than the current
# for key_, last_visit in sorted(mh.items(), key=operator.itemgetter(1),
# reverse=True):
# if last_visit < current_last_visit:
# fallbacks.append(key_)
#
# for key_ in fallbacks:
# # old data structure
# if not len(key_) == 3 or not (isinstance(key_[1], tuple) and isinstance(key_[2], tuple)):
# continue
#
# old_f, old_a, old_kw = key_
# if old_f == "ValidatePrefs":
# continue
#
# possible_fallback = ROUTE_REGISTRY[old_f]
#
# # non-debounced function found
# if not getattr(possible_fallback, "debounce", False):
# ret_kw = dict(old_kw)
# ret_a = old_a
# if "randomize" in ret_kw:
# ret_kw["randomize"] = timestamp()
#
# ret_f = possible_fallback
# key = get_lookup_key(ret_f, list(ret_a), ret_kw)
# fallback_found = True
#
# Log.Debug("not triggering %s twice with %s, %s, returning to %s, %s, %s" %
# (f.__name__, a, kw, ret_f.__name__, ret_a, ret_kw))
#
# break
#
# if not fallback_found:
# Log.Debug("No fallback found in menu history for %s, falling back to main menu", f)
# return main_menu_fallback()
# if not fallback_needed:
# add_to_menu_history(key)
# if ret_f.__name__ != "ValidatePrefs":
# Dict["last_menu_item"] = key
#
add_to_menu_history(key)
Dict["last_menu_item"] = key
return ret_f(*ret_a, **ret_kw)
# @route may be used multiple times
if not already_wrapped:
inner.orig_f = f
return enable_channel_wrapper(route(*args, **kwargs))(inner)
return enable_channel_wrapper(route(*args, **kwargs))(f)
return wrap
+475 -72
View File
@@ -1,26 +1,29 @@
# coding=utf-8
import os
from subzero.language import Language
from sub_mod import SubtitleModificationsMenu
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_ignore_options, get_item_task_data, \
set_refresh_menu_state, route
set_refresh_menu_state, route, extract_embedded_sub
from refresh_item import RefreshItem
from subzero.constants import PREFIX
from support.config import config
from support.helpers import timestamp, cast_bool, df, get_language
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub
from support.lib import Plex
from support.plex_media import get_plex_metadata, scan_videos, PMSMediaProxy
from support.config import config, TEXT_SUBTITLE_EXTS
from support.helpers import timestamp, df, get_language, display_language, get_language_from_stream
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub, get_item_title, save_stored_sub
from support.plex_media import get_plex_metadata, get_part, get_embedded_subtitle_streams
from support.scanning import scan_videos
from support.scheduler import scheduler
from support.storage import get_subtitle_storage
from support.i18n import _
# fixme: needs kwargs cleanup
@route(PREFIX + '/item/{rating_key}/actions')
@debounce
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None):
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None, header=None,
message=None):
"""
displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
:param rating_key:
@@ -33,12 +36,31 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
from interface.main import IgnoreMenu
title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
item = get_item(rating_key)
item = plex_item = get_item(rating_key)
current_kind = get_item_kind_from_rating_key(rating_key)
timeout = 30
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
oc = SubFolderObjectContainer(
title2=title,
replace_parent=True,
header=header,
message=message)
if not item:
oc.add(DirectoryObject(
key=Callback(
ItemDetailsMenu,
rating_key=rating_key,
title=title,
base_title=base_title,
item_title=item_title,
randomize=timestamp()),
title=_(u"Item not found: %s!", item_title),
summary=_("Plex didn't return any information about the item, please refresh it and come back later"),
thumb=default_thumb
))
return oc
# add back to season for episode
if current_kind == "episode":
@@ -47,26 +69,37 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
season = get_item(item.season.rating_key)
oc.add(DirectoryObject(
key=Callback(MetadataMenu, rating_key=season.rating_key, title=season.title, base_title=show.title,
previous_item_type="show", previous_rating_key=show.rating_key,
display_items=True, randomize=timestamp()),
title=u"< Back to %s" % season.title,
summary="Back to %s > %s" % (show.title, season.title),
key=Callback(
MetadataMenu,
rating_key=season.rating_key,
title=season.title,
base_title=show.title,
previous_item_type="show",
previous_rating_key=show.rating_key,
display_items=True,
randomize=timestamp()),
title=_(u"< Back to %s", season.title),
summary=_("Back to %s > %s", show.title, season.title),
thumb=season.thumb or default_thumb
))
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, randomize=timestamp(),
timeout=timeout * 1000),
title=u"Refresh: %s" % item_title,
summary="Refreshes the %s, possibly searching for missing and picking up new subtitles on disk" % current_kind,
key=Callback(
RefreshItem,
rating_key=rating_key,
item_title=item_title,
randomize=timestamp(),
timeout=timeout * 1000),
title=_(u"Refresh: %s", item_title),
summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up "
"new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind)),
thumb=item.thumb or default_thumb
))
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(),
timeout=timeout * 1000),
title=u"Force-find subtitles: %s" % item_title,
summary="Issues a forced refresh, ignoring known subtitles and searching for new ones",
title=_(u"Force-find subtitles: %(item_title)s", item_title=item_title),
summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones"),
thumb=item.thumb or default_thumb
))
@@ -74,9 +107,6 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load_or_new(item)
# get the plex item
plex_item = get_item(rating_key)
# look for subtitles for all available media parts and all of their languages
has_multiple_parts = len(plex_item.media) > 1
part_index = 0
@@ -89,6 +119,12 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
part_id = str(part.id)
part_index += 1
part_index_addon = u""
part_summary_addon = u""
if has_multiple_parts:
part_index_addon = _(u"File %(file_part_index)s: ", file_part_index=part_index)
part_summary_addon = u"%s " % filename
# iterate through all configured languages
for lang in config.lang_list:
# get corresponding stored subtitle data for that media part (physical media item), for language
@@ -96,87 +132,345 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
current_sub_id = None
current_sub_provider_name = None
part_index_addon = ""
part_summary_addon = ""
if has_multiple_parts:
part_index_addon = u"File %s: " % part_index
part_summary_addon = "%s " % filename
summary = u"%sNo current subtitle in storage" % part_summary_addon
summary = _(u"%(part_summary)sNo current subtitle in storage", part_summary=part_summary_addon)
current_score = None
if current_sub:
current_sub_id = current_sub.id
current_sub_provider_name = current_sub.provider_name
current_score = current_sub.score
summary = u"%sCurrent subtitle: %s (added: %s, %s), Language: %s, Score: %i, Storage: %s" % \
(part_summary_addon, current_sub.provider_name, df(current_sub.date_added),
current_sub.mode_verbose, lang, current_sub.score, current_sub.storage_type)
summary = _(u"%(part_summary)sCurrent subtitle: %(provider_name)s (added: %(date_added)s, "
u"%(mode)s), Language: %(language)s, Score: %(score)i, Storage: %(storage_type)s",
part_summary=part_summary_addon,
provider_name=current_sub.provider_name,
date_added=df(current_sub.date_added),
mode=current_sub.mode_verbose,
language=display_language(lang),
score=current_sub.score,
storage_type=current_sub.storage_type)
oc.add(DirectoryObject(
key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
item_title=item_title, language=lang, language_name=lang.name,
item_title=item_title, language=lang, language_name=display_language(lang),
current_id=current_sub_id,
item_type=plex_item.type, filename=filename, current_data=summary,
randomize=timestamp(), current_provider=current_sub_provider_name,
current_score=current_score),
title=u"%sActions for %s subtitle" % (part_index_addon, lang.name),
title=_(u"%(part_summary)sManage %(language)s subtitle", part_summary=part_index_addon,
language=display_language(lang)),
summary=summary
))
else:
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
item_title=item_title, language=lang, language_name=lang.name,
item_title=item_title, language=lang, language_name=display_language(lang),
current_id=current_sub_id,
item_type=plex_item.type, filename=filename, current_data=summary,
randomize=timestamp(), current_provider=current_sub_provider_name,
current_score=current_score),
title=u"%sList %s subtitles" % (part_index_addon, lang.name),
title=_(u"%(part_summary)sList %(language)s subtitles", part_summary=part_index_addon,
language=display_language(lang)),
summary=summary
))
add_ignore_options(oc, "videos", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
if config.plex_transcoder:
# embedded subtitles
embedded_count = 0
embedded_langs = []
for stream in part.streams:
# subtitle stream
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
lang = get_language_from_stream(stream.language_code)
if not lang and config.treat_und_as_first:
lang = list(config.lang_list)[0]
if lang:
embedded_langs.append(lang)
embedded_count += 1
if embedded_count:
oc.add(DirectoryObject(
key=Callback(ListEmbeddedSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
item_type=plex_item.type, item_title=item_title, base_title=base_title,
randomize=timestamp()),
title=_(u"%(part_summary)sEmbedded subtitles (%(languages)s)",
part_summary=part_index_addon,
languages=", ".join(display_language(l) for l in set(embedded_langs))),
summary=_(u"Extract and activate embedded subtitle streams")
))
ignore_title = item_title
if current_kind == "episode":
ignore_title = get_item_title(item)
add_ignore_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=IgnoreMenu)
subtitle_storage.destroy()
return oc
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}', force=bool)
@debounce
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}')
def SubtitleOptionsMenu(**kwargs):
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True, header=kwargs.get("header"),
message=kwargs.get("message"))
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
current_data = unicode(kwargs["current_data"])
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
subs_count = stored_subs.count(part_id, language)
kwargs.pop("randomize")
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
title=kwargs["title"], randomize=timestamp()),
title=u"< Back to %s" % kwargs["title"],
summary=kwargs["current_data"],
title=_(u"< Back to %s", kwargs["title"]),
summary=current_data,
thumb=default_thumb
))
if subs_count:
oc.add(DirectoryObject(
key=Callback(ListStoredSubsForItemMenu, randomize=timestamp(), **kwargs),
title=_(u"Select active %(language)s subtitle", language=kwargs["language_name"]),
summary=_(u"%(count)d subtitles in storage", count=subs_count)
))
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, randomize=timestamp(), **kwargs),
title=u"List %s subtitles" % kwargs["language_name"],
summary=kwargs["current_data"]
title=_(u"List available %(language)s subtitles", language=kwargs["language_name"]),
summary=current_data
))
if current_sub:
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title=u"Modify %s subtitle" % kwargs["language_name"],
summary=u"Currently applied mods: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
title=_(u"Modify current %(language)s subtitle", language=kwargs["language_name"]),
summary=_(u"Currently applied mods: %(mod_list)s",
mod_list=(", ".join(current_sub.mods) if current_sub.mods else "none"))
))
if current_sub.provider_name != "embedded":
oc.add(DirectoryObject(
key=Callback(BlacklistSubtitleMenu, randomize=timestamp(), **kwargs),
title=_(u"Blacklist current %(language)s subtitle and search for a new one",
language=kwargs["language_name"]),
summary=current_data
))
current_bl, subs = stored_subs.get_blacklist(part_id, language)
if current_bl:
oc.add(DirectoryObject(
key=Callback(ManageBlacklistMenu, randomize=timestamp(), **kwargs),
title=_(u"Manage blacklist (%(amount)s contained)", amount=len(current_bl)),
summary=_(u"Inspect currently blacklisted subtitles")
))
storage.destroy()
return oc
@route(PREFIX + '/item/search/{rating_key}/{part_id}', force=bool)
@route(PREFIX + '/item/list_stored_subs/{rating_key}/{part_id}')
def ListStoredSubsForItemMenu(**kwargs):
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = Language.fromietf(kwargs["language"])
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
all_subs = stored_subs.get_all(part_id, language)
kwargs.pop("randomize")
for key, subtitle in sorted(filter(lambda x: x[0] not in ("current", "blacklist"), all_subs.items()),
key=lambda x: x[1].date_added, reverse=True):
is_current = key == all_subs["current"]
summary = _(u"added: %(date_added)s, %(mode)s, Language: %(language)s, Score: %(score)i, Storage: "
u"%(storage_type)s",
date_added=df(subtitle.date_added),
mode=subtitle.mode_verbose,
language=display_language(language),
score=subtitle.score,
storage_type=subtitle.storage_type)
sub_name = subtitle.provider_name
if sub_name == "embedded":
sub_name += " (%s)" % subtitle.id
oc.add(DirectoryObject(
key=Callback(SelectStoredSubForItemMenu, randomize=timestamp(), sub_key="__".join(key), **kwargs),
title=_(u"%(current_state)s%(subtitle_name)s, Score: %(score)s",
current_state=_("Current: ") if is_current else _("Stored: "),
subtitle_name=sub_name,
score=subtitle.score),
summary=summary
))
return oc
@route(PREFIX + '/item/set_current_sub/{rating_key}/{part_id}')
@debounce
def SelectStoredSubForItemMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = Language.fromietf(kwargs["language"])
item_type = kwargs["item_type"]
sub_key = tuple(kwargs.pop("sub_key").split("__"))
plex_item = get_item(rating_key)
storage = get_subtitle_storage()
stored_subs = storage.load(plex_item.rating_key)
subtitles = stored_subs.get_all(part_id, language)
subtitle = subtitles[sub_key]
subtitles["current"] = sub_key
save_stored_sub(subtitle, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
stored_subs=stored_subs)
storage.destroy()
kwargs.pop("randomize")
kwargs["header"] = _("Success")
kwargs["message"] = _("Subtitle saved to disk")
return SubtitleOptionsMenu(randomize=timestamp(), **kwargs)
@route(PREFIX + '/item/blacklist_recent/{language}')
@route(PREFIX + '/item/blacklist_recent')
def BlacklistRecentSubtitleMenu(**kwargs):
if "last_played_items" not in Dict or not Dict["last_played_items"]:
return
rating_key = Dict["last_played_items"][0]
kwargs["rating_key"] = rating_key
return BlacklistAllPartsSubtitleMenu(**kwargs)
@route(PREFIX + '/item/blacklist_all/{rating_key}/{language}')
@route(PREFIX + '/item/blacklist_all/{rating_key}')
def BlacklistAllPartsSubtitleMenu(**kwargs):
rating_key = kwargs.get("rating_key")
language = kwargs.get("language")
if language:
language = Language.fromietf(language)
item = get_item(rating_key)
if not item:
return
item_title = get_item_title(item)
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load_or_new(item)
for part_id, languages in stored_subs.parts.iteritems():
sub_dict = languages
if language:
key = str(language)
if key not in sub_dict:
continue
sub_dict = {key: sub_dict[key]}
for language, subs in sub_dict.iteritems():
if "current" in subs:
stored_subs.blacklist(part_id, language, subs["current"])
Log.Info("Added %s to blacklist", subs["current"])
subtitle_storage.save(stored_subs)
subtitle_storage.destroy()
return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
def blacklist(rating_key, part_id, language):
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
if not current_sub:
return
stored_subs.blacklist(part_id, language, current_sub.key)
storage.save(stored_subs)
storage.destroy()
Log.Info("Added %s to blacklist", current_sub.key)
return True
@route(PREFIX + '/item/blacklist/{rating_key}/{part_id}')
@debounce
def BlacklistSubtitleMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
item_title = kwargs["item_title"]
blacklist(rating_key, part_id, language)
kwargs.pop("randomize")
return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
@route(PREFIX + '/item/manage_blacklist/{rating_key}/{part_id}', force=bool)
@debounce
def ManageBlacklistMenu(**kwargs):
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
remove_sub_key = kwargs.pop("remove_sub_key", None)
current_data = unicode(kwargs["current_data"])
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
current_bl, subs = stored_subs.get_blacklist(part_id, language)
if remove_sub_key:
remove_sub_key = tuple(remove_sub_key.split("__"))
stored_subs.blacklist(part_id, language, remove_sub_key, add=False)
storage.save(stored_subs)
Log.Info("Removed %s from blacklist", remove_sub_key)
kwargs.pop("randomize")
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
title=kwargs["title"], randomize=timestamp()),
title=_(u"< Back to %s", kwargs["title"]),
summary=current_data,
thumb=default_thumb
))
def sorter(pair):
# thanks RestrictedModule parser for messing with lambda (x, y)
return pair[1]["date_added"]
for sub_key, data in sorted(current_bl.iteritems(), key=sorter, reverse=True):
provider_name, subtitle_id = sub_key
title = _(u"%(provider_name)s, %(subtitle_id)s (added: %(date_added)s, %(mode)s), Language: %(language)s, "
u"Score: %(score)i, Storage: %(storage_type)s",
provider_name=provider_name,
subtitle_id=subtitle_id,
date_added=df(data["date_added"]),
mode=current_sub.get_mode_verbose(data["mode"]),
language=display_language(Language.fromietf(language)),
score=data["score"],
storage_type=data["storage_type"])
oc.add(DirectoryObject(
key=Callback(ManageBlacklistMenu, remove_sub_key="__".join(sub_key), randomize=timestamp(), **kwargs),
title=title,
summary=_(u"Remove subtitle from blacklist")
))
storage.destroy()
return oc
@route(PREFIX + '/item/search/{rating_key}/{part_id}', force=bool)
def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item_title=None, filename=None,
item_type="episode", language=None, language_name=None, force=False, current_id=None,
current_data=None,
@@ -186,6 +480,8 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
running = scheduler.is_task_running("AvailableSubsForItem")
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
current_data = unicode(current_data) if current_data else None
if (search_results is None or force) and not running:
scheduler.dispatch_task("AvailableSubsForItem", rating_key=rating_key, item_type=item_type, part_id=part_id,
language=language)
@@ -194,34 +490,42 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
oc = SubFolderObjectContainer(title2=unicode(title), replace_parent=True)
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, rating_key=rating_key, item_title=item_title, title=title, randomize=timestamp()),
title=u"< Back to %s" % title,
title=_(u"< Back to %s", title),
summary=current_data,
thumb=default_thumb
))
metadata = get_plex_metadata(rating_key, part_id, item_type)
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
plex_part = None
if not config.low_impact_mode:
scanned_parts = scan_videos([metadata], ignore_all=True)
if not scanned_parts:
Log.Error("Couldn't list available subtitles for %s", rating_key)
return oc
if not scanned_parts:
Log.Error("Couldn't list available subtitles for %s", rating_key)
return oc
video, plex_part = scanned_parts.items()[0]
video, plex_part = scanned_parts.items()[0]
video_display_data = [video.format] if video.format else []
if video.release_group:
video_display_data.append(u"by %s" % video.release_group)
video_display_data = " ".join(video_display_data)
video_display_data = [video.format] if video.format else []
if video.release_group:
video_display_data.append(unicode(_(u"by %(release_group)s", release_group=video.release_group)))
video_display_data = " ".join(video_display_data)
else:
video_display_data = metadata["filename"]
current_display = (u"Current: %s (%s) " % (current_provider, current_score) if current_provider else "")
current_display = (_(u"Current: %(provider_name)s (%(score)s) ",
provider_name=current_provider,
score=current_score if current_provider else ""))
if not running:
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title, language=language,
filename=filename, part_id=part_id, title=title, current_id=current_id, force=True,
current_provider=current_provider, current_score=current_score,
current_data=current_data, item_type=item_type, randomize=timestamp()),
title=u"Search for %s subs (%s)" % (get_language(language).name, video_display_data),
summary=u"%sFilename: %s" % (current_display, filename),
title=_(u"Search for %(language)s subs (%(video_data)s)",
language=get_language(language).name,
video_data=video_display_data),
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
thumb=default_thumb
))
@@ -232,8 +536,8 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
current_provider=current_provider, current_score=current_score,
randomize=timestamp()),
title=u"No subtitles found",
summary=u"%sFilename: %s" % (current_display, filename),
title=_(u"No subtitles found"),
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
thumb=default_thumb
))
else:
@@ -243,33 +547,54 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
current_provider=current_provider, current_score=current_score,
randomize=timestamp()),
title=u"Searching for %s subs (%s), refresh here ..." % (get_language(language).name, video_display_data),
summary=u"%sFilename: %s" % (current_display, filename),
title=_(u"Searching for %(language)s subs (%(video_data)s), refresh here ...",
language=display_language(get_language(language)),
video_data=video_display_data),
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
thumb=default_thumb
))
if not search_results or search_results == "found_none":
return oc
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
current_bl, subs = stored_subs.get_blacklist(part_id, language)
seen = []
for subtitle in search_results:
if subtitle.id in seen:
continue
bl_addon = ""
if (str(subtitle.provider_name), str(subtitle.id)) in current_bl:
bl_addon = "Blacklisted "
wrong_fps_addon = ""
if subtitle.wrong_fps:
wrong_fps_addon = " (wrong FPS, sub: %s, media: %s)" % (subtitle.fps, plex_part.fps)
if plex_part:
wrong_fps_addon = _(" (wrong FPS, sub: %(subtitle_fps)s, media: %(media_fps)s)",
subtitle_fps=subtitle.fps,
media_fps=plex_part.fps)
else:
wrong_fps_addon = _(" (wrong FPS, sub: %(subtitle_fps)s, media: unknown, low impact mode)",
subtitle_fps=subtitle.fps)
oc.add(DirectoryObject(
key=Callback(TriggerDownloadSubtitle, rating_key=rating_key, randomize=timestamp(), item_title=item_title,
subtitle_id=str(subtitle.id), language=language),
title=u"%s: %s, score: %s%s" % ("Available" if current_id != subtitle.id else "Current",
subtitle.provider_name, subtitle.score, wrong_fps_addon),
summary=u"Release: %s, Matches: %s" % (subtitle.release_info, ", ".join(subtitle.matches)),
title=_(u"%(blacklisted_state)s%(current_state)s: %(provider_name)s, score: %(score)s%(wrong_fps_state)s",
blacklisted_state=bl_addon,
current_state=_("Available") if current_id != subtitle.id else _("Current"),
provider_name=subtitle.provider_name,
score=subtitle.score,
wrong_fps_state=wrong_fps_addon),
summary=_(u"Release: %(release_info)s, Matches: %(matches)s",
release_info=subtitle.release_info,
matches=", ".join(subtitle.matches)),
thumb=default_thumb
))
seen.append(current_id)
seen.append(subtitle.id)
return oc
@@ -279,7 +604,7 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None, language=None, randomize=None):
from interface.main import fatality
set_refresh_menu_state("Downloading subtitle for %s" % item_title or rating_key)
set_refresh_menu_state(_("Downloading subtitle for %(title_or_id)s", title_or_id=item_title or rating_key))
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
download_subtitle = None
@@ -296,3 +621,81 @@ def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None,
scheduler.clear_task_data("AvailableSubsForItem")
return fatality(randomize=timestamp(), header=" ", replace_parent=True)
@route(PREFIX + '/item/embedded/{rating_key}/{part_id}')
def ListEmbeddedSubsForItemMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
title = kwargs["title"]
kwargs.pop("randomize")
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
base_title=kwargs["base_title"], title=kwargs["item_title"], randomize=timestamp()),
title=_("< Back to %s", kwargs["title"]),
thumb=default_thumb
))
plex_item = get_item(rating_key)
part = get_part(plex_item, part_id)
if part:
for stream_data in get_embedded_subtitle_streams(part, skip_duplicate_unknown=False):
language = stream_data["language"]
is_unknown = stream_data["is_unknown"]
stream = stream_data["stream"]
is_forced = stream_data["is_forced"]
if language:
oc.add(DirectoryObject(
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
stream_index=str(stream.index), language=language, with_mods=True, **kwargs),
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
u"%(stream_title)s with default mods",
stream_index=stream.index,
language=display_language(language),
unknown_state=_(" (unknown)") if is_unknown else "",
forced_state=_(" (forced)") if is_forced else "",
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
))
oc.add(DirectoryObject(
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
stream_index=str(stream.index), language=language, **kwargs),
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
u"%(stream_title)s",
stream_index=stream.index,
language=display_language(language),
unknown_state=_(" (unknown)") if is_unknown else "",
forced_state=_(" (forced)") if is_forced else "",
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
))
return oc
@route(PREFIX + '/item/extract_embedded/{rating_key}/{part_id}/{stream_index}')
@debounce
def TriggerExtractEmbeddedSubForItemMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs.get("part_id")
stream_index = kwargs.get("stream_index")
Thread.Create(extract_embedded_sub, **kwargs)
header = _(u"Extracting of embedded subtitle %s of part %s:%s triggered",
stream_index, rating_key, part_id)
kwargs.pop("randomize")
kwargs.pop("item_type")
kwargs.pop("stream_index")
kwargs.pop("part_id")
kwargs.pop("with_mods", False)
kwargs.pop("language")
kwargs["title"] = kwargs["item_title"]
kwargs["header"] = header
kwargs["message"] = header
return ItemDetailsMenu(randomize=timestamp(), **kwargs)
+103 -73
View File
@@ -2,13 +2,13 @@
from subzero.constants import PREFIX, TITLE, ART
from support.config import config
from support.helpers import pad_title, timestamp, df, get_plex_item_display_title
from support.helpers import pad_title, timestamp, df, display_language
from support.scheduler import scheduler
from support.ignore import ignore_list
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, \
get_item_kind_from_item
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, get_item_title
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_ignore_options, \
ObjectContainer, route, handler
from support.i18n import _
from item_details import ItemDetailsMenu
@@ -36,25 +36,34 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
if config.lock_menu and not config.pin_correct:
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp()),
title=pad_title("Enter PIN"),
summary="The owner has restricted the access to this menu. Please enter the correct pin",
title=pad_title(_("Enter PIN")),
summary=_("The owner has restricted the access to this menu. Please enter the correct pin"),
))
return oc
if not config.permissions_ok and config.missing_permissions:
for title, path in config.missing_permissions:
if not isinstance(config.missing_permissions, list):
oc.add(DirectoryObject(
key=Callback(fatality, randomize=timestamp()),
title=pad_title("Insufficient permissions"),
summary="Insufficient permissions on library %s, folder: %s" % (title, path),
title=pad_title(_("Insufficient permissions")),
summary=config.missing_permissions,
))
else:
for title, path in config.missing_permissions:
oc.add(DirectoryObject(
key=Callback(fatality, randomize=timestamp()),
title=pad_title(_("Insufficient permissions")),
summary=_("Insufficient permissions on library %(title)s, folder: %(path)s",
title=title,
path=path),
))
return oc
if not config.enabled_sections:
oc.add(DirectoryObject(
key=Callback(fatality, randomize=timestamp()),
title=pad_title("I'm not enabled!"),
summary="Please enable me for some of your libraries in your server settings; currently I do nothing",
title=pad_title(_("I'm not enabled!")),
summary=_("Please enable me for some of your libraries in your server settings; currently I do nothing"),
))
return oc
@@ -62,47 +71,42 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
if Dict["current_refresh_state"]:
oc.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title("Working ... refresh here"),
summary="Current state: %s; Last state: %s" % (
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
title=pad_title(_("Working ... refresh here")),
summary=_("Current state: %s; Last state: %s",
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
)
))
oc.add(DirectoryObject(
key=Callback(OnDeckMenu),
title="On-deck items",
summary="Shows the current on deck items and allows you to individually (force-) refresh their metadata/"
"subtitles.",
title=_("On-deck items"),
summary=_("Shows the current on deck items and allows you to individually (force-) refresh their metadata/subtitles."),
thumb=R("icon-ondeck.jpg")
))
if "last_played_items" in Dict and Dict["last_played_items"]:
oc.add(DirectoryObject(
key=Callback(RecentlyPlayedMenu),
title=pad_title("Recently played items"),
summary="Shows the %i recently played items and allows you to individually (force-) refresh their "
"metadata/subtitles." % config.store_recently_played_amount,
title=pad_title(_("Recently played items")),
summary=_("Shows the %s recently played items and allows you to individually (force-) refresh their metadata/subtitles.", config.store_recently_played_amount),
thumb=R("icon-played.jpg")
))
oc.add(DirectoryObject(
key=Callback(RecentlyAddedMenu),
title="Recently-added items",
summary="Shows the recently added items per section.",
title=_("Recently-added items"),
summary=_("Shows the recently added items per section."),
thumb=R("icon-added.jpg")
))
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, randomize=timestamp()),
title="Items with missing subtitles",
summary="Shows the items honoring the configured 'Item age to be considered recent'-setting (%s)"
" and allowing you to individually (force-) refresh their metadata/subtitles. " %
Prefs["scheduler.item_is_recent_age"],
title=_("Show recently added items with missing subtitles"),
summary=_("Lists items with missing subtitles. Click on Find recent items with missing subs to update list"),
thumb=R("icon-missing.jpg")
))
oc.add(DirectoryObject(
key=Callback(SectionsMenu),
title="Browse all items",
summary="Go through your whole library and manage your ignore list. You can also "
"(force-) refresh the metadata/subtitles of individual items.",
title=_("Browse all items"),
summary=_("Go through your whole library and manage your ignore list. You can also (force-) refresh the metadata/subtitles of individual items."),
thumb=R("icon-browse.jpg")
))
@@ -110,41 +114,46 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
task = scheduler.task(task_name)
if task.ready_for_display:
task_state = "Running: %s/%s (%s%%)" % (task.items_done, task.items_searching, task.percentage)
task_state = _("Running: %(items_done)s/%(items_searching)s (%(percentage)s%%)",
items_done=task.items_done,
items_searching=task.items_searching,
percentage=task.percentage)
else:
task_state = "Last scheduler run: %s; Next scheduled run: %s; Last runtime: %s" % (
df(scheduler.last_run(task_name)) or "never",
df(scheduler.next_run(task_name)) or "never",
lr = scheduler.last_run(task_name)
nr = scheduler.next_run(task_name)
task_state = _("Last run: %s; Next scheduled run: %s; Last runtime: %s",
df(scheduler.last_run(task_name)) if lr else "never",
df(scheduler.next_run(task_name)) if nr else "never",
str(task.last_run_time).split(".")[0])
oc.add(DirectoryObject(
key=Callback(RefreshMissing, randomize=timestamp()),
title="Search for missing subtitles (in recently-added items, max-age: %s)" % Prefs[
"scheduler.item_is_recent_age"],
summary="Automatically run periodically by the scheduler, if configured. %s" % task_state,
title=_("Search for missing subtitles (in recently-added items, max-age: %s)", Prefs[
"scheduler.item_is_recent_age"]),
summary=_("Automatically run periodically by the scheduler, if configured. %s", task_state),
thumb=R("icon-search.jpg")
))
oc.add(DirectoryObject(
key=Callback(IgnoreListMenu),
title="Display ignore list (%d)" % len(ignore_list),
summary="Show the current ignore list (mainly used for the automatic tasks)",
title=_("Display ignore list (%(ignored_count)d)", ignored_count=len(ignore_list)),
summary=_("Show the current ignore list (mainly used for the automatic tasks)"),
thumb=R("icon-ignore.jpg")
))
oc.add(DirectoryObject(
key=Callback(HistoryMenu),
title="History",
summary="Show the last %i downloaded subtitles" % int(Prefs["history_size"]),
title=_("History"),
summary=_("Show the last %i downloaded subtitles", int(Prefs["history_size"])),
thumb=R("icon-history.jpg")
))
oc.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title("Refresh"),
summary="Current state: %s; Last state: %s" % (
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
title=pad_title(_("Refresh")),
summary=_("Current state: %s; Last state: %s",
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
),
thumb=R("icon-refresh.jpg")
))
@@ -153,15 +162,31 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
if config.pin:
oc.add(DirectoryObject(
key=Callback(ClearPin, randomize=timestamp()),
title=pad_title("Re-lock menu(s)"),
summary="Enabled the PIN again for menu(s)"
title=pad_title(_("Re-lock menu(s)")),
summary=_("Enabled the PIN again for menu(s)")
))
if not only_refresh:
if "provider_throttle" in Dict and Dict["provider_throttle"].keys():
summary_data = []
for provider, data in Dict["provider_throttle"].iteritems():
reason, until, desc = data
summary_data.append(unicode(_("%(throttled_provider)s until %(until_date)s (%(reason)s)",
throttled_provider=provider,
until_date=until.strftime("%y/%m/%d %H:%M"),
reason=reason)))
oc.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title(_("Throttled providers: %s", ", ".join(Dict["provider_throttle"].keys()))),
summary=", ".join(summary_data),
thumb=R("icon-throttled.jpg")
))
oc.add(DirectoryObject(
key=Callback(AdvancedMenu),
title=pad_title("Advanced functions"),
summary="Use at your own risk",
title=pad_title(_("Advanced functions")),
summary=_("Use at your own risk"),
thumb=R("icon-advanced.jpg")
))
@@ -175,27 +200,22 @@ def OnDeckMenu(message=None):
:param message:
:return:
"""
return mergedItemsMenu(title="Items On Deck", base_title="Items On Deck", itemGetter=get_on_deck_items)
return mergedItemsMenu(title=_("Items On Deck"), base_title=_("Items On Deck"), itemGetter=get_on_deck_items)
@route(PREFIX + '/recently_played')
def RecentlyPlayedMenu():
base_title = "Recently Played"
base_title = _("Recently Played")
oc = SubFolderObjectContainer(title2=base_title, replace_parent=True)
for item in [get_item(rating_key) for rating_key in Dict["last_played_items"]]:
if not item:
continue
kind = get_item_kind_from_item(item)
if kind not in ("episode", "movie"):
if getattr(getattr(item, "__class__"), "__name__") not in ("Episode", "Movie"):
continue
if kind == "episode":
item_title = get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
parent_title=item.show.title)
else:
item_title = get_plex_item_display_title(item, kind, section_title=None)
item_title = get_item_title(item)
oc.add(DirectoryObject(
title=item_title,
@@ -213,13 +233,13 @@ def RecentlyAddedMenu(message=None):
:param message:
:return:
"""
return SectionsMenu(base_title="Recently added", section_items_key="recently_added", ignore_options=False)
return SectionsMenu(base_title=_("Recently added"), section_items_key="recently_added", ignore_options=False)
@route(PREFIX + '/recent', force=bool)
@debounce
def RecentMissingSubtitlesMenu(force=False, randomize=None):
title = "Items with missing subtitles"
title = _("Items with missing subtitles")
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
running = scheduler.is_task_running("MissingSubtitles")
@@ -233,13 +253,13 @@ def RecentMissingSubtitlesMenu(force=False, randomize=None):
if not running:
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, force=True, randomize=timestamp()),
title=u"Get items with missing subtitles",
title=_(u"Find recent items with missing subtitles"),
thumb=default_thumb
))
else:
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, force=False, randomize=timestamp()),
title=u"Updating, refresh here ...",
title=_(u"Updating, refresh here ..."),
thumb=default_thumb
))
@@ -249,7 +269,7 @@ def RecentMissingSubtitlesMenu(force=False, randomize=None):
key=Callback(ItemDetailsMenu, title=title + " > " + item_title, item_title=item_title,
rating_key=item_id),
title=item_title,
summary="Missing: %s" % ", ".join(l.name for l in missing_languages),
summary=_("Missing: %s", ", ".join(display_language(l) for l in missing_languages)),
thumb=get_item_thumb(item) or default_thumb
))
@@ -307,18 +327,25 @@ def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
"""
is_ignored = rating_key in ignore_list[kind]
if not sure:
oc = SubFolderObjectContainer(no_history=True, replace_parent=True, title1="%s %s %s %s the ignore list" % (
"Add" if not is_ignored else "Remove", ignore_list.verbose(kind), title,
"to" if not is_ignored else "from"), title2="Are you sure?")
t = u"Add %(kind)s %(title)s to the ignore list"
if is_ignored:
t = u"Remove %(kind)s %(title)s from the ignore list"
oc = SubFolderObjectContainer(no_history=True, replace_parent=True,
title1=_(t,
kind=ignore_list.verbose(kind),
title=title
),
title2=_("Are you sure?"))
oc.add(DirectoryObject(
key=Callback(IgnoreMenu, kind=kind, rating_key=rating_key, title=title, sure=True,
todo="add" if not is_ignored else "remove"),
title=pad_title("Are you sure?"),
title=pad_title(_("Are you sure?")),
))
return oc
rel = ignore_list[kind]
dont_change = False
state = None
if todo == "remove":
if not is_ignored:
dont_change = True
@@ -327,7 +354,6 @@ def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
Log.Info("Removed %s (%s) from the ignore list", title, rating_key)
ignore_list.remove_title(kind, rating_key)
ignore_list.save()
state = "removed from"
elif todo == "add":
if is_ignored:
dont_change = True
@@ -336,25 +362,29 @@ def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
Log.Info("Added %s (%s) to the ignore list", title, rating_key)
ignore_list.add_title(kind, rating_key, title)
ignore_list.save()
state = "added to"
else:
dont_change = True
if dont_change:
return fatality(force_title=" ", header="Didn't change the ignore list", no_history=True)
return fatality(force_title=" ", header=_("Didn't change the ignore list"), no_history=True)
return fatality(force_title=" ", header="%s %s the ignore list" % (title, state), no_history=True)
t = "%(title)s added to the ignore list"
if todo == "remove":
t = "%(title)s removed from the ignore list"
return fatality(force_title=" ", header=_(t,
title=title,),
no_history=True)
@route(PREFIX + '/sections')
def SectionsMenu(base_title="Sections", section_items_key="all", ignore_options=True):
def SectionsMenu(base_title=_("Sections"), section_items_key="all", ignore_options=True):
"""
displays the menu for all sections
:return:
"""
items = get_all_items("sections")
return dig_tree(SubFolderObjectContainer(title2="Sections", no_cache=True, no_history=True), items, None,
return dig_tree(SubFolderObjectContainer(title2=_("Sections"), no_cache=True, no_history=True), items, None,
menu_determination_callback=determine_section_display, pass_kwargs={"base_title": base_title,
"section_items_key": section_items_key,
"ignore_options": ignore_options},
@@ -415,7 +445,7 @@ def SectionFirstLetterMenu(rating_key, title=None, base_title=None, section_titl
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
oc.add(DirectoryObject(
key=Callback(SectionMenu, title="All", base_title=title, rating_key=rating_key, ignore_options=False),
key=Callback(SectionMenu, title=_("All"), base_title=title, rating_key=rating_key, ignore_options=False),
title="All"
)
)
+188 -35
View File
@@ -2,22 +2,29 @@
import locale
import logging
import os
import platform
import traceback
import logger
import copy
from requests import HTTPError
from item_details import ItemDetailsMenu
from refresh_item import RefreshItem
from menu_helpers import add_ignore_options, dig_tree, set_refresh_menu_state, \
should_display_ignore, default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route
default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route, \
extract_embedded_sub
from main import fatality, IgnoreMenu
from advanced import DispatchRestart
from subzero.constants import ART, PREFIX, DEPENDENCY_MODULE_NAMES
from support.plex_media import get_all_parts, get_embedded_subtitle_streams
from support.scheduler import scheduler
from support.config import config
from support.helpers import timestamp, df
from support.helpers import timestamp, df, display_language
from support.ignore import ignore_list
from support.items import get_all_items, get_items_info, \
get_item_kind_from_rating_key, get_item
from support.items import get_all_items, get_items_info, get_item_kind_from_rating_key, get_item, MI_KEY, get_item_title
from support.storage import get_subtitle_storage
from support.i18n import _
# init GUI
ObjectContainer.art = R(ART)
@@ -25,6 +32,7 @@ ObjectContainer.no_cache = True
# default thumb for DirectoryObjects
DirectoryObject.thumb = default_thumb
Plugin.AddViewGroup("full_details", viewMode="InfoList", mediaType="items", type="list", summary=2)
@route(PREFIX + '/section/firstLetter/key', deeper=bool)
@@ -51,7 +59,7 @@ def FirstLetterMetadataMenu(rating_key, key, title=None, base_title=None, displa
@route(PREFIX + '/section/contents', display_items=bool)
def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, previous_item_type=None,
previous_rating_key=None, randomize=None):
previous_rating_key=None, message=None, header=None, randomize=None):
"""
displays the contents of a section based on whether it has a deeper tree or not (movies->movie (item) list; series->series list)
:param rating_key:
@@ -65,49 +73,80 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
title = unicode(title)
item_title = title
title = base_title + " > " + title
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True, header=header, message=message,
view_group="full_details")
current_kind = get_item_kind_from_rating_key(rating_key)
if display_items:
timeout = 30
show = None
# add back to series for season
if current_kind == "season":
timeout = 360
timeout = 720
show = get_item(previous_rating_key)
oc.add(DirectoryObject(
key=Callback(MetadataMenu, rating_key=show.rating_key, title=show.title, base_title=show.section.title,
previous_item_type="section", display_items=True, randomize=timestamp()),
title=u"< Back to %s" % show.title,
title=_(u"< Back to %s", show.title),
thumb=show.thumb or default_thumb
))
elif current_kind == "series":
timeout = 1800
# it shouldn't take more than 6 minutes to scan all of a series' files and determine the force refresh
timeout = 3600
items = get_all_items(key="children", value=rating_key, base="library/metadata")
kind, deeper = get_items_info(items)
dig_tree(oc, items, MetadataMenu,
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": kind,
"previous_rating_key": rating_key})
# we don't know exactly where we are here, only add ignore option to series
if should_display_ignore(items, previous=previous_item_type):
add_ignore_options(oc, "series", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
if current_kind in ("series", "season"):
item = get_item(rating_key)
sub_title = get_item_title(item)
add_ignore_options(oc, current_kind, title=sub_title, rating_key=rating_key, callback_menu=IgnoreMenu)
# mass-extract embedded
if current_kind == "season" and config.plex_transcoder:
for lang in config.lang_list:
oc.add(DirectoryObject(
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
base_title=show.section.title, display_items=display_items, item_title=item_title,
title=title,
previous_item_type=previous_item_type, with_mods=True,
previous_rating_key=previous_rating_key, randomize=timestamp()),
title=_(u"Extract missing %(language)s embedded subtitles", language=display_language(lang)),
summary=_("Extracts the not yet extracted embedded subtitles of all episodes for the current "
"season with all configured default modifications")
))
oc.add(DirectoryObject(
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
base_title=show.section.title, display_items=display_items, item_title=item_title,
title=title, force=True,
previous_item_type=previous_item_type, with_mods=True,
previous_rating_key=previous_rating_key, randomize=timestamp()),
title=_(u"Extract and activate %(language)s embedded subtitles", language=display_language(lang)),
summary=_("Extracts embedded subtitles of all episodes for the current season "
"with all configured default modifications")
))
# add refresh
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=title, refresh_kind=current_kind,
previous_rating_key=previous_rating_key, timeout=timeout * 1000, randomize=timestamp()),
title=u"Refresh: %s" % item_title,
summary="Refreshes the %s, possibly searching for missing and picking up new subtitles on disk" % current_kind
title=_(u"Refresh: %s", item_title),
summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up "
"new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind))
))
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=title, force=True,
refresh_kind=current_kind, previous_rating_key=previous_rating_key, timeout=timeout * 1000,
randomize=timestamp()),
title=u"Auto-Find subtitles: %s" % item_title,
summary="Issues a forced refresh, ignoring known subtitles and searching for new ones"
title=_(u"Auto-Find subtitles: %s", item_title),
summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones")
))
else:
return ItemDetailsMenu(rating_key=rating_key, title=title, item_title=item_title)
@@ -115,6 +154,70 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
return oc
@route(PREFIX + '/season/extract_embedded/{rating_key}/{language}')
def SeasonExtractEmbedded(**kwargs):
rating_key = kwargs.get("rating_key")
requested_language = kwargs.pop("language")
with_mods = kwargs.pop("with_mods")
item_title = kwargs.pop("item_title")
title = kwargs.pop("title")
force = kwargs.pop("force", False)
Thread.Create(season_extract_embedded, **{"rating_key": rating_key, "requested_language": requested_language,
"with_mods": with_mods, "force": force})
kwargs["header"] = _("Success")
kwargs["message"] = _(u"Extracting of embedded subtitles for %s triggered", title)
kwargs.pop("randomize")
return MetadataMenu(randomize=timestamp(), title=item_title, **kwargs)
def multi_extract_embedded(stream_list, refresh=False, with_mods=False, single_thread=True):
def execute():
for video_part_map, plexapi_part, stream_index, language, set_current in stream_list:
plexapi_item = video_part_map.keys()[0].plexapi_metadata["item"]
extract_embedded_sub(rating_key=plexapi_item.rating_key, part_id=plexapi_part.id,
plex_item=plexapi_item, part=plexapi_part, scanned_videos=video_part_map,
stream_index=stream_index, set_current=set_current,
language=language, with_mods=with_mods, refresh=refresh)
if single_thread:
with Thread.Lock(key="extract_embedded"):
execute()
else:
execute()
def season_extract_embedded(rating_key, requested_language, with_mods=False, force=False):
# get stored subtitle info for item id
subtitle_storage = get_subtitle_storage()
try:
for data in get_all_items(key="children", value=rating_key, base="library/metadata"):
item = get_item(data[MI_KEY])
if item:
stored_subs = subtitle_storage.load_or_new(item)
for part in get_all_parts(item):
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
current = stored_subs.get_any(part.id, requested_language)
if not embedded_subs or force:
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language,
get_forced=config.forced_only)
if stream_data:
stream = stream_data[0]["stream"]
set_current = not current or force
refresh = not current
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
stream_index=str(stream.index), set_current=set_current,
refresh=refresh, language=requested_language, with_mods=with_mods)
finally:
subtitle_storage.destroy()
@route(PREFIX + '/ignore_list')
def IgnoreListMenu():
oc = SubFolderObjectContainer(title2="Ignore list", replace_parent=True)
@@ -130,17 +233,22 @@ def IgnoreListMenu():
def HistoryMenu():
from support.history import get_history
history = get_history()
oc = SubFolderObjectContainer(title2="History", replace_parent=True)
oc = SubFolderObjectContainer(title2=_("History"), replace_parent=True)
for item in history.items:
possible_language = item.language
language_display = item.lang_name if not possible_language else display_language(possible_language)
for item in history.history_items:
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, title=item.title, item_title=item.item_title,
rating_key=item.rating_key),
title=u"%s (%s)" % (item.item_title, item.mode_verbose),
summary=u"%s in %s (%s, score: %s), %s" % (item.lang_name, item.section_title,
summary=_(u"%s in %s (%s, score: %s), %s", language_display, item.section_title,
item.provider_name, item.score, df(item.time))
))
history.destroy()
return oc
@@ -152,6 +260,15 @@ def RefreshMissing(randomize=None):
return fatality(header=header, replace_parent=True)
def replace_item(obj, key, replace_value):
for k, v in obj.items():
if isinstance(v, dict):
obj[k] = replace_item(v, key, replace_value)
if key in obj:
obj[key] = replace_value
return obj
@route(PREFIX + '/ValidatePrefs', enforce_route=True)
def ValidatePrefs():
Core.log.setLevel(logging.DEBUG)
@@ -175,20 +292,20 @@ def ValidatePrefs():
update_dict = True
elif Dict["channel_enabled"] != config.enable_channel:
Log.Debug("Channel features %s, restarting plugin", "enabled" if config.enable_channel else "disabled")
Log.Debug("Interface features %s, restarting plugin", "enabled" if config.enable_channel else "disabled")
update_dict = True
restart = True
if "plugin_pin_mode" not in Dict:
if "plugin_pin_mode2" not in Dict:
update_dict = True
elif Dict["plugin_pin_mode"] != Prefs["plugin_pin_mode"]:
elif Dict["plugin_pin_mode2"] != Prefs["plugin_pin_mode2"]:
update_dict = True
restart = True
if update_dict:
Dict["channel_enabled"] = config.enable_channel
Dict["plugin_pin_mode"] = Prefs["plugin_pin_mode"]
Dict["plugin_pin_mode2"] = Prefs["plugin_pin_mode2"]
Dict.Save()
if restart:
@@ -196,8 +313,8 @@ def ValidatePrefs():
DispatchRestart()
return
scheduler.stop()
scheduler.setup_tasks()
scheduler.clear_task_data("MissingSubtitles")
set_refresh_menu_state(None)
Log.Debug("Validate Prefs called.")
@@ -205,21 +322,31 @@ def ValidatePrefs():
# SZ config debug
Log.Debug("--- SZ Config-Debug ---")
for attr in [
"app_support_path", "data_path", "data_items_path", "enable_agent",
"version", "app_support_path", "data_path", "data_items_path", "enable_agent",
"enable_channel", "permissions_ok", "missing_permissions", "fs_encoding",
"subtitle_destination_folder", "dbm_supported", "lang_list", "providers"]:
Log.Debug("config.%s: %s", attr, getattr(config, attr))
"subtitle_destination_folder", "new_style_cache", "dbm_supported", "lang_list", "providers",
"plex_transcoder", "refiner_settings", "unrar", "adv_cfg_path"]:
value = getattr(config, attr)
if isinstance(value, dict):
d = replace_item(copy.deepcopy(value), "api_key", "xxxxxxxxxxxxxxxxxxxxxxxxx")
Log.Debug("config.%s: %s", attr, d)
continue
Log.Debug("config.%s: %s", attr, value)
for attr in ["plugin_log_path", "server_log_path"]:
value = getattr(config, attr)
access = os.access(value, os.R_OK)
if Core.runtime.os == "Windows":
try:
f = open(value, "r")
f.read(1)
f.close()
except:
access = False
if value:
access = os.access(value, os.R_OK)
if Core.runtime.os == "Windows":
try:
f = open(value, "r")
f.read(1)
f.close()
except:
access = False
Log.Debug("config.%s: %s (accessible: %s)", attr, value, access)
@@ -227,10 +354,36 @@ def ValidatePrefs():
"subtitles.save.filesystem", ]:
Log.Debug("Pref.%s: %s", attr, Prefs[attr])
# debug drone
if "sonarr" in config.refiner_settings or "radarr" in config.refiner_settings:
Log.Debug("----- Connections -----")
try:
from subliminal_patch.refiners.drone import SonarrClient, RadarrClient
for key, cls in [("sonarr", SonarrClient), ("radarr", RadarrClient)]:
if key in config.refiner_settings:
cname = key.capitalize()
try:
status = cls(**config.refiner_settings[key]).status()
except HTTPError, e:
if e.response.status_code == 401:
Log.Debug("%s: NOT WORKING - BAD API KEY", cname)
else:
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
except:
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
else:
if status and status["version"]:
Log.Debug("%s: OK - %s", cname, status["version"])
else:
Log.Debug("%s: NOT WORKING - %s", cname)
except:
Log.Debug("Something went really wrong when evaluating Sonarr/Radarr: %s", traceback.format_exc())
# fixme: check existance of and os access of logs
Log.Debug("----- Environment -----")
Log.Debug("Platform: %s", Core.runtime.platform)
Log.Debug("OS: %s", Core.runtime.os)
Log.Debug("----- Environment -----")
Log.Debug("Python: %s", platform.python_version())
for key, value in os.environ.iteritems():
if key.startswith("PLEX") or key.startswith("SZ_"):
if "TOKEN" in key:
+102 -45
View File
@@ -1,33 +1,36 @@
# coding=utf-8
import traceback
import types
import datetime
import subprocess
import os
import operator
from func import enable_channel_wrapper
from support.items import get_kind, get_item_thumb
from support.helpers import get_video_display_title
from func import enable_channel_wrapper, route_wrapper, register_route_function
from subzero.language import Language
from support.i18n import is_localized_string, _
from support.items import get_kind, get_item_thumb, get_item, get_item_kind_from_item, refresh_item
from support.helpers import get_video_display_title, pad_title, display_language, quote_args, is_stream_forced
from support.ignore import ignore_list
from support.lib import get_intent
from support.config import config
from subzero.constants import ICON_SUB, ICON
from support.plex_media import get_part, get_plex_metadata
from support.scheduler import scheduler
from support.scanning import scan_videos
from support.storage import save_subtitles
from subliminal_patch.subtitle import ModifiedSubtitle
default_thumb = R(ICON_SUB)
main_icon = ICON if not config.is_development else "icon-dev.jpg"
# noinspection PyUnboundLocalVariable
route = enable_channel_wrapper(route)
route = route_wrapper
# noinspection PyUnboundLocalVariable
handler = enable_channel_wrapper(handler)
def should_display_ignore(items, previous=None):
kind = get_kind(items)
return items and (
(kind in ("show", "season")) or
(kind == "episode" and previous != "season")
)
def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None, add_kind=True):
"""
@@ -47,10 +50,15 @@ def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None
in_list = rating_key in ignore_list[use_kind]
t = u"Ignore %(kind)s \"%(title)s\""
if in_list:
t = u"Un-ignore %(kind)s \"%(title)s\""
oc.add(DirectoryObject(
key=Callback(callback_menu, kind=use_kind, rating_key=rating_key, title=title),
title=u"%s %s \"%s\"" % (
"Un-Ignore" if in_list else "Ignore", ignore_list.verbose(kind) if add_kind else "", unicode(title))
key=Callback(callback_menu, kind=use_kind, sure=False, todo="not_set", rating_key=rating_key, title=title),
title=_(t,
kind=ignore_list.verbose(kind) if add_kind else "",
title=unicode(title))
)
)
@@ -72,7 +80,7 @@ def dig_tree(oc, items, menu_callback, menu_determination_callback=None, force_r
oc.add(DirectoryObject(
key=Callback(menu_callback or menu_determination_callback(kind, item, pass_kwargs=pass_kwargs), title=title,
rating_key=force_rating_key or key, **add_kwargs),
title=title, thumb=thumb, summary=summary
title=pad_title(title) if kind in ("show", "season") else title, thumb=thumb, summary=summary
))
return oc
@@ -90,8 +98,8 @@ def set_refresh_menu_state(state_or_media, media_type="movies"):
Dict["current_refresh_state"] = None
return
if isinstance(state_or_media, types.StringTypes):
Dict["current_refresh_state"] = state_or_media
if isinstance(state_or_media, types.StringTypes) or is_localized_string(state_or_media):
Dict["current_refresh_state"] = unicode(state_or_media)
return
media = state_or_media
@@ -102,14 +110,19 @@ def set_refresh_menu_state(state_or_media, media_type="movies"):
for episode in media.seasons[season].episodes:
ep = media.seasons[season].episodes[episode]
media_id = ep.id
title = get_video_display_title("show", ep.title, parent_title=media.title, season=int(season), episode=int(episode))
title = get_video_display_title(_("show"), ep.title, parent_title=media.title, season=int(season), episode=int(episode))
else:
title = get_video_display_title("movie", media.title)
title = get_video_display_title(_("movie"), media.title)
intent = get_intent()
force_refresh = intent.get("force", media_id)
Dict["current_refresh_state"] = u"%sRefreshing %s" % ("Force-" if force_refresh else "", unicode(title))
t = u"Refreshing %(title)s"
if force_refresh:
t = u"Force-refreshing %(title)s"
Dict["current_refresh_state"] = unicode(_(t,
title=unicode(title)))
def get_item_task_data(task_name, rating_key, language):
@@ -124,30 +137,74 @@ def debounce(func):
:param func:
:return:
"""
def get_lookup_key(args, kwargs):
func_name = list(args).pop(0).__name__
return tuple([func_name] + [(key, value) for key, value in kwargs.iteritems()])
def wrap(*args, **kwargs):
if "randomize" in kwargs:
if "menu_history" not in Dict:
Dict["menu_history"] = {}
func.debounce = True
key = get_lookup_key([func] + list(args), kwargs)
if key in Dict["menu_history"]:
Log.Debug("not triggering %s twice with %s, %s" % (func, args, kwargs))
return ObjectContainer()
else:
Dict["menu_history"][key] = datetime.datetime.now() + datetime.timedelta(hours=6)
return func
def extract_embedded_sub(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs.pop("part_id")
stream_index = kwargs.pop("stream_index")
with_mods = kwargs.pop("with_mods", False)
language = Language.fromietf(kwargs.pop("language"))
refresh = kwargs.pop("refresh", True)
set_current = kwargs.pop("set_current", True)
plex_item = kwargs.pop("plex_item", get_item(rating_key))
item_type = get_item_kind_from_item(plex_item)
part = kwargs.pop("part", get_part(plex_item, part_id))
scanned_videos = kwargs.pop("scanned_videos", None)
any_successful = False
if part:
if not scanned_videos:
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
scanned_videos = scan_videos([metadata], ignore_all=True, skip_hashing=True)
for stream in part.streams:
# subtitle stream
if str(stream.index) == stream_index:
is_forced = is_stream_forced(stream)
bn = os.path.basename(part.file)
set_refresh_menu_state(_(u"Extracting subtitle %(stream_index)s of %(filename)s",
stream_index=stream_index,
filename=bn))
Log.Info(u"Extracting stream %s (%s) of %s", stream_index, display_language(language), bn)
out_codec = stream.codec if stream.codec != "mov_text" else "srt"
args = [
config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", out_codec, "-"
]
output = None
try:
Dict.Save()
except TypeError:
Log.Error("Can't save menu history for: %r", key)
del Dict["menu_history"][key]
output = subprocess.check_output(quote_args(args), stderr=subprocess.PIPE, shell=True)
except:
Log.Error("Extraction failed: %s", traceback.format_exc())
return func(*args, **kwargs)
if output:
subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None)
subtitle.content = output
subtitle.provider_name = "embedded"
subtitle.id = "stream_%s" % stream_index
subtitle.score = 0
subtitle.set_encoding("utf-8")
return wrap
# fixme: speedup video; only video.name is needed
save_successful = save_subtitles(scanned_videos, {scanned_videos.keys()[0]: [subtitle]}, mode="m",
set_current=set_current, is_forced=is_forced)
set_refresh_menu_state(None)
if save_successful and refresh:
refresh_item(rating_key)
any_successful = True
return any_successful
class SZObjectContainer(ObjectContainer):
@@ -181,10 +238,10 @@ class SubFolderObjectContainer(ObjectContainer):
from support.helpers import pad_title, timestamp
self.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title("<< Back to home"),
summary="Current state: %s; Last state: %s" % (
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
title=pad_title(_("<< Back to home")),
summary=_("Current state: %s; Last state: %s",
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
)
))
@@ -202,4 +259,4 @@ class ZipObject(ObjectClass):
self.SetHeader("Content-Disposition",
'attachment; filename="' + datetime.datetime.now().strftime("Logs_%y%m%d_%H-%M-%S.zip")
+ '"')
return self.zipdata
return self.zipdata
+13 -3
View File
@@ -4,9 +4,11 @@ from subzero.constants import PREFIX
from menu_helpers import debounce, set_refresh_menu_state, route
from support.items import refresh_item
from support.helpers import timestamp
from support.i18n import _
@route(PREFIX + '/item/{rating_key}')
@route(PREFIX + '/item/refresh/{rating_key}/force', force=True)
@route(PREFIX + '/item/refresh/{rating_key}')
@debounce
def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=False, refresh_kind=None,
previous_rating_key=None, timeout=8000, randomize=None, trigger=True):
@@ -14,9 +16,17 @@ def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=Fal
from interface.main import fatality
header = " "
if trigger:
set_refresh_menu_state(u"Triggering %sRefresh for %s" % ("Force-" if force else "", item_title))
t = u"Triggering refresh for %(title)s"
if force:
u"Triggering forced refresh for %(title)s"
set_refresh_menu_state(_(t,
title=item_title))
Thread.Create(refresh_item, rating_key=rating_key, force=force, refresh_kind=refresh_kind,
parent_rating_key=previous_rating_key, timeout=int(timeout))
header = u"%s of item %s triggered" % ("Refresh" if not force else "Forced-refresh", rating_key)
t = u"Refresh of item %(item_id)s triggered"
if force:
t = u"Forced refresh of item %(item_id)s triggered"
header = _(t,
item_id=rating_key)
return fatality(randomize=timestamp(), header=header, replace_parent=True)
+59 -30
View File
@@ -3,22 +3,24 @@
import traceback
import types
from babelfish import Language
from subzero.language import Language
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, route
from subzero.modification import registry as mod_registry, SubtitleModifications
from subzero.constants import PREFIX
from support.plex_media import get_plex_metadata, scan_videos
from support.plex_media import get_plex_metadata
from support.scanning import scan_videos
from support.helpers import timestamp, pad_title
from support.items import get_current_sub, set_mods_for_part
from support.i18n import _
@route(PREFIX + '/item/sub_mods/{rating_key}/{part_id}', force=bool)
@debounce
def SubtitleModificationsMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
lang_instance = Language.fromietf(language)
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
kwargs.pop("randomize")
@@ -29,8 +31,8 @@ def SubtitleModificationsMenu(**kwargs):
from interface.item_details import SubtitleOptionsMenu
oc.add(DirectoryObject(
key=Callback(SubtitleOptionsMenu, randomize=timestamp(), **kwargs),
title=u"< Back to subtitle options for: %s" % kwargs["title"],
summary=kwargs["current_data"],
title=_(u"< Back to subtitle options for: %s", kwargs["title"]),
summary=unicode(kwargs["current_data"]),
thumb=default_thumb
))
@@ -41,45 +43,53 @@ def SubtitleModificationsMenu(**kwargs):
if mod.exclusive and identifier in current_mods:
continue
if mod.languages and lang_instance not in mod.languages:
continue
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=identifier, mode="add", randomize=timestamp(), **kwargs),
title=pad_title(mod.description), summary=mod.long_description or ""
title=pad_title(_(mod.description)), summary=_(mod.long_description) or ""
))
fps_mod = SubtitleModifications.get_mod_class("change_FPS")
oc.add(DirectoryObject(
key=Callback(SubtitleFPSModMenu, randomize=timestamp(), **kwargs),
title=pad_title(fps_mod.description), summary=fps_mod.long_description or ""
title=pad_title(_(fps_mod.description)), summary=_(fps_mod.long_description) or ""
))
shift_mod = SubtitleModifications.get_mod_class("shift_offset")
oc.add(DirectoryObject(
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
title=pad_title(shift_mod.description), summary=shift_mod.long_description or ""
title=pad_title(_(shift_mod.description)), summary=_(shift_mod.long_description) or ""
))
color_mod = SubtitleModifications.get_mod_class("color")
oc.add(DirectoryObject(
key=Callback(SubtitleColorModMenu, randomize=timestamp(), **kwargs),
title=pad_title(color_mod.description), summary=color_mod.long_description or ""
title=pad_title(_(color_mod.description)), summary=_(color_mod.long_description) or ""
))
if current_mods:
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=None, mode="remove_last", randomize=timestamp(), **kwargs),
title=pad_title("Remove last applied mod (%s)" % current_mods[-1]),
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
title=pad_title(_("Remove last applied mod (%s)", current_mods[-1])),
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
))
oc.add(DirectoryObject(
key=Callback(SubtitleListMods, randomize=timestamp(), **kwargs),
title=pad_title("Manage applied mods"),
summary=u"Currently applied mods: %s" % (", ".join(current_mods))
title=pad_title(_("Manage applied mods")),
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods))
))
oc.add(DirectoryObject(
key=Callback(SubtitleReapplyMods, randomize=timestamp(), **kwargs),
title=pad_title(_("Reapply applied mods")),
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
))
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=None, mode="clear", randomize=timestamp(), **kwargs),
title=pad_title("Restore original version"),
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
title=pad_title(_("Restore original version")),
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
))
storage.destroy()
@@ -99,28 +109,31 @@ def SubtitleFPSModMenu(**kwargs):
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title="< Back to subtitle modification menu"
title=_("< Back to subtitle modification menu")
))
metadata = get_plex_metadata(rating_key, part_id, item_type)
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
video, plex_part = scanned_parts.items()[0]
target_fps = plex_part.fps
for fps in ["23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
for fps in ["23.980", "23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
if float(fps) == float(target_fps):
continue
if float(fps) > float(target_fps):
indicator = "subs constantly getting faster"
indicator = _("subs constantly getting faster")
else:
indicator = "subs constantly getting slower"
indicator = _("subs constantly getting slower")
mod_ident = SubtitleModifications.get_mod_signature("change_FPS", **{"from": fps, "to": target_fps})
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
title="%s fps -> %s fps (%s)" % (fps, target_fps, indicator)
title=_("%(from_fps)s fps -> %(to_fps)s fps (%(slower_or_faster_indicator)s)",
from_fps=fps,
to_fps=target_fps,
slower_or_faster_indicator=indicator)
))
return oc
@@ -138,13 +151,13 @@ def SubtitleShiftModUnitMenu(**kwargs):
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title="< Back to subtitle modifications"
title=_("< Back to subtitle modifications")
))
for unit, title in POSSIBLE_UNITS:
oc.add(DirectoryObject(
key=Callback(SubtitleShiftModMenu, unit=unit, randomize=timestamp(), **kwargs),
title="Adjust by %s" % title
title=_("Adjust by %(time_and_unit)s", time_and_unit=title)
))
return oc
@@ -161,16 +174,16 @@ def SubtitleShiftModMenu(unit=None, **kwargs):
oc.add(DirectoryObject(
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
title="< Back to unit selection"
title=_("< Back to unit selection")
))
rng = []
if unit == "h":
rng = range(-10, 11)
rng = list(reversed(range(-10, 0))) + list(reversed(range(1, 11)))
elif unit in ("m", "s"):
rng = range(-15, 15)
rng = list(reversed(range(-15, 0))) + list(reversed(range(1, 16)))
elif unit == "ms":
rng = range(-900, 1000, 100)
rng = list(reversed(range(-900, 0, 100))) + list(reversed(range(100, 1000, 100)))
for i in rng:
if i == 0:
@@ -195,7 +208,7 @@ def SubtitleColorModMenu(**kwargs):
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title="< Back to subtitle modification menu"
title=_("< Back to subtitle modification menu")
))
for color, code in color_mod.colors.iteritems():
@@ -227,6 +240,22 @@ def SubtitleSetMods(mods=None, mode=None, **kwargs):
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
@route(PREFIX + '/item/sub_reapply_mods/{rating_key}/{part_id}', force=bool)
@debounce
def SubtitleReapplyMods(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
lang_a2 = kwargs["language"]
item_type = kwargs["item_type"]
language = Language.fromietf(lang_a2)
set_mods_for_part(rating_key, part_id, language, item_type, [], mode="add")
kwargs.pop("randomize")
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
@route(PREFIX + '/item/sub_list_mods/{rating_key}/{part_id}', force=bool)
@debounce
def SubtitleListMods(**kwargs):
@@ -241,13 +270,13 @@ def SubtitleListMods(**kwargs):
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title="< Back to subtitle modifications"
title=_("< Back to subtitle modifications")
))
for identifier in current_sub.mods:
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=identifier, mode="remove", randomize=timestamp(), **kwargs),
title="Remove: %s" % identifier
title=_("Remove: %(mod_name)s", mod_name=identifier)
))
storage.destroy()
+18 -9
View File
@@ -13,6 +13,12 @@ import lib
sys.modules["support.lib"] = lib
import i18n
sys.modules["support.i18n"] = i18n
helpers._ = i18n._
import plex_media
sys.modules["support.plex_media"] = plex_media
@@ -28,22 +34,25 @@ import items
sys.modules["support.items"] = items
import missing_subtitles
sys.modules["support.missing_subtitles"] = missing_subtitles
import scheduler
sys.modules["support.scheduler"] = scheduler
import tasks
sys.modules["support.tasks"] = tasks
import storage
sys.modules["support.storage"] = storage
import scanning
sys.modules["support.scanning"] = scanning
import missing_subtitles
sys.modules["support.missing_subtitles"] = missing_subtitles
import tasks
sys.modules["support.tasks"] = tasks
import ignore
sys.modules["support.ignore"] = ignore
@@ -60,4 +69,4 @@ import activities
sys.modules["support.activities"] = activities
import download
sys.modules["support.download"] = download
sys.modules["support.download"] = download
+8 -2
View File
@@ -3,14 +3,20 @@ from wraptor.decorators import throttle
from config import config
from items import get_item, get_item_kind_from_item, refresh_item
from plex_activity import Activity
from plex_activity.sources.s_logging.main import Logging as Activity_Logging
Activity = None
try:
from plex_activity import Activity
except ImportError:
pass
class PlexActivityManager(object):
def start(self):
activity_sources_enabled = None
if not Activity:
return
if config.plex_token:
from plex import Plex
Plex.configuration.defaults.authentication(config.plex_token)
+423 -53
View File
@@ -1,27 +1,41 @@
# coding=utf-8
import copy
import os
import re
import inspect
import sys
import rarfile
import jstyleson
import datetime
import subliminal
import subliminal_patch
import subzero.constants
import lib
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError
from subliminal_patch.core import is_windows_special_path
from whichdb import whichdb
from babelfish import Language
from subliminal_patch.exceptions import TooManyRequests
from subzero.language import Language
from subliminal.cli import MutexLock
from subzero.lib.io import FileIO, get_viable_encoding
from subzero.lib.dict import Dicked
from subzero.util import get_root_path
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW, MEDIA_TYPE_TO_STRING
from dogpile.cache.region import register_backend as register_cache_backend
from lib import Plex
from helpers import check_write_permissions, cast_bool
from helpers import check_write_permissions, cast_bool, cast_int, mswindows
SUBTITLE_EXTS = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'txt', 'psb',
'vtt']
register_cache_backend(
"subzero.cache.file", "subzero.cache_backends.file", "SZFileBackend")
SUBTITLE_EXTS_BASE = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'psb',
'vtt']
SUBTITLE_EXTS = SUBTITLE_EXTS_BASE + ["txt"]
TEXT_SUBTITLE_EXTS = ("srt", "ass", "ssa", "vtt", "mov_text")
VIDEO_EXTS = ['3g2', '3gp', 'asf', 'asx', 'avc', 'avi', 'avs', 'bivx', 'bup', 'divx', 'dv', 'dvr-ms', 'evo', 'fli',
'flv',
'm2t', 'm2ts', 'm2v', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'mts', 'nsv', 'nuv', 'ogm', 'ogv', 'tp',
@@ -42,6 +56,25 @@ def int_or_default(s, default):
return default
VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable)
PROVIDER_THROTTLE_MAP = {
"default": {
TooManyRequests: (datetime.timedelta(hours=1), "1 hour"),
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
ServiceUnavailable: (datetime.timedelta(minutes=20), "20 minutes"),
},
"opensubtitles": {
TooManyRequests: (datetime.timedelta(hours=3), "3 hours"),
DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
},
"addic7ed": {
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
TooManyRequests: (datetime.timedelta(minutes=5), "5 minutes"),
}
}
class Config(object):
libraries_root = None
plugin_info = ""
@@ -56,6 +89,12 @@ class Config(object):
plex_token = None
is_development = False
dbm_supported = False
pms_request_timeout = 15
low_impact_mode = False
new_style_cache = False
pack_cache_dir = None
advanced = None
debug_i18n = False
enable_channel = True
enable_agent = True
@@ -64,11 +103,8 @@ class Config(object):
lock_advanced_menu = False
locked = False
pin_valid_minutes = 10
lang_list = None
subtitle_destination_folder = None
subtitle_formats = None
providers = None
provider_settings = None
max_recent_items_per_library = 200
permissions_ok = False
missing_permissions = None
@@ -79,22 +115,32 @@ class Config(object):
sections = None
enabled_sections = None
remove_hi = False
remove_tags = False
fix_ocr = False
fix_common = False
reverse_rtl = False
colors = ""
chmod = None
forced_only = False
exotic_ext = False
treat_und_as_first = False
subtitle_sub_dir = None, None
ext_match_strictness = False
default_mods = None
debug_mods = False
react_to_activities = False
activity_mode = None
subtitles_save_to = None
no_refresh = False
plex_transcoder = None
refiner_settings = None
exact_filenames = False
only_one = False
embedded_auto_extract = False
ietf_as_alpha3 = False
unrar = None
adv_cfg_path = None
store_recently_played_amount = 20
store_recently_played_amount = 40
initialized = False
@@ -102,6 +148,10 @@ class Config(object):
self.libraries_root = os.path.abspath(os.path.join(get_root_path(), ".."))
self.init_libraries()
if is_windows_special_path:
Log.Warn("The Plex metadata folder is residing inside a folder with special characters. "
"Multithreading and playback activities will be disabled.")
self.fs_encoding = get_viable_encoding()
self.plugin_info = self.get_plugin_info()
self.is_development = self.get_dev_mode()
@@ -113,20 +163,25 @@ class Config(object):
self.data_items_path = os.path.join(self.data_path, "DataItems")
self.universal_plex_token = self.get_universal_plex_token()
self.plex_token = os.environ.get("PLEXTOKEN", self.universal_plex_token)
subzero.constants.DEFAULT_TIMEOUT = lib.DEFAULT_TIMEOUT = self.pms_request_timeout = \
min(cast_int(Prefs['pms_request_timeout'], 15), 45)
self.low_impact_mode = cast_bool(Prefs['low_impact_mode'])
self.new_style_cache = cast_bool(Prefs['new_style_cache'])
self.pack_cache_dir = self.get_pack_cache_dir()
self.advanced = self.get_advanced_config()
self.debug_i18n = self.advanced.debug_i18n
os.environ["SZ_USER_AGENT"] = self.get_user_agent()
self.providers = self.get_providers()
self.setup_proxies()
self.set_plugin_mode()
self.set_plugin_lock()
self.set_activity_modes()
self.parse_rename_mode()
self.lang_list = self.get_lang_list()
self.subtitle_destination_folder = self.get_subtitle_destination_folder()
self.subtitle_formats = self.get_subtitle_formats()
self.forced_only = cast_bool(Prefs["subtitles.only_foreign"])
self.provider_settings = self.get_provider_settings()
self.max_recent_items_per_library = int_or_default(Prefs["scheduler.max_recent_items_per_library"], 2000)
self.sections = list(Plex["library"].sections())
self.missing_permissions = []
@@ -136,32 +191,73 @@ class Config(object):
self.permissions_ok = self.check_permissions()
self.notify_executable = self.check_notify_executable()
self.remove_hi = cast_bool(Prefs['subtitles.remove_hi'])
self.remove_tags = cast_bool(Prefs['subtitles.remove_tags'])
self.fix_ocr = cast_bool(Prefs['subtitles.fix_ocr'])
self.fix_common = cast_bool(Prefs['subtitles.fix_common'])
self.reverse_rtl = cast_bool(Prefs['subtitles.reverse_rtl'])
self.colors = Prefs['subtitles.colors'] if Prefs['subtitles.colors'] != "don't change" else None
self.chmod = self.check_chmod()
self.exotic_ext = cast_bool(Prefs["subtitles.scan.exotic_ext"])
self.treat_und_as_first = cast_bool(Prefs["subtitles.language.treat_und_as_first"])
self.subtitle_sub_dir = self.get_subtitle_sub_dir()
self.ext_match_strictness = self.determine_ext_sub_strictness()
self.default_mods = self.get_default_mods()
self.debug_mods = cast_bool(Prefs['log_debug_mods'])
self.subtitles_save_to = Prefs['subtitles.save.filesystem']
self.no_refresh = os.environ.get("SZ_NO_REFRESH", False)
self.plex_transcoder = self.get_plex_transcoder()
self.only_one = cast_bool(Prefs['subtitles.only_one'])
self.embedded_auto_extract = cast_bool(Prefs["subtitles.embedded.autoextract"])
self.ietf_as_alpha3 = cast_bool(Prefs["subtitles.language.ietf_normalize"])
self.initialized = True
def init_libraries(self):
try_executables = []
custom_unrar = os.environ.get("SZ_UNRAR_TOOL")
if custom_unrar:
if os.path.isfile(custom_unrar):
try_executables.append(custom_unrar)
unrar_exe = None
if Core.runtime.os == "Windows":
unrar_exe = os.path.abspath(os.path.join(self.libraries_root, "Windows", "i386", "UnRAR", "UnRAR.exe"))
if os.path.isfile(unrar_exe):
rarfile.UNRAR_TOOL = unrar_exe
Log.Info("Using UnRAR from: %s", unrar_exe)
custom_unrar = os.environ.get("SZ_UNRAR_TOOL")
if custom_unrar and os.path.isfile(custom_unrar):
rarfile.UNRAR_TOOL = custom_unrar
Log.Info("Using UnRAR from: %s", custom_unrar)
elif Core.runtime.os == "MacOSX":
unrar_exe = os.path.abspath(os.path.join(self.libraries_root, "MacOSX", "i386", "UnRAR", "unrar"))
elif Core.runtime.os == "Linux":
unrar_exe = os.path.abspath(os.path.join(self.libraries_root, "Linux", Core.runtime.cpu, "UnRAR", "unrar"))
if unrar_exe and os.path.isfile(unrar_exe):
try_executables.append(unrar_exe)
try_executables.append("unrar")
for exe in try_executables:
rarfile.UNRAR_TOOL = exe
rarfile.ORIG_UNRAR_TOOL = exe
try:
rarfile.custom_check([rarfile.UNRAR_TOOL], True)
except:
Log.Debug("custom check failed for: %s", exe)
continue
rarfile.OPEN_ARGS = rarfile.ORIG_OPEN_ARGS
rarfile.EXTRACT_ARGS = rarfile.ORIG_EXTRACT_ARGS
rarfile.TEST_ARGS = rarfile.ORIG_TEST_ARGS
Log.Info("Using UnRAR from: %s", exe)
self.unrar = exe
return
Log.Warn("UnRAR not found")
def init_cache(self):
if self.new_style_cache:
subliminal.region.configure('subzero.cache.file', expiration_time=datetime.timedelta(days=30),
arguments={'appname': "sz_cache",
'app_cache_dir': self.data_path})
Log.Info("Using new style file based cache!")
return
names = ['dbhash', 'gdbm', 'dbm']
dbfn = None
self.dbm_supported = False
@@ -207,12 +303,49 @@ class Config(object):
Log.Warn("Not using file based cache!")
subliminal.region.configure('dogpile.cache.memory')
def sync_cache(self):
if not self.new_style_cache:
return
Log.Debug("Syncing cache")
subliminal.region.backend.sync()
def get_pack_cache_dir(self):
pack_cache_dir = os.path.join(config.data_path, "pack_cache")
if not os.path.isdir(pack_cache_dir):
os.makedirs(pack_cache_dir)
return pack_cache_dir
def get_advanced_config(self):
paths = []
if Prefs['path_to_advanced_settings']:
paths = [
Prefs['path_to_advanced_settings'],
os.path.join(Prefs['path_to_advanced_settings'], "advanced_settings.json")
]
paths.append(os.path.join(config.data_path, "advanced_settings.json"))
for path in paths:
if os.path.isfile(path):
data = FileIO.read(path, "r")
d = Dicked(**jstyleson.loads(data))
self.adv_cfg_path = path
Log.Info(u"Using advanced settings from: %s", path)
return d
return Dicked()
def set_log_paths(self):
# find log handler
for handler in Core.log.handlers:
if getattr(getattr(handler, "__class__"), "__name__") in (
'FileHandler', 'RotatingFileHandler', 'TimedRotatingFileHandler'):
cls_name = getattr(getattr(handler, "__class__"), "__name__")
if cls_name in ('FileHandler', 'RotatingFileHandler', 'TimedRotatingFileHandler'):
plugin_log_file = handler.baseFilename
if cls_name in ("RotatingFileHandler", "TimedRotatingFileHandler"):
handler.backupCount = int_or_default(Prefs['log_rotate_keep'], 5)
if os.path.isfile(os.path.realpath(plugin_log_file)):
self.plugin_log_path = plugin_log_file
@@ -243,16 +376,16 @@ class Config(object):
if not self.providers:
self.enable_agent = False
self.enable_channel = False
Log.Warn("No providers enabled, disabling agent and channel!")
Log.Warn("No providers enabled, disabling agent and interface!")
return
if Prefs["plugin_mode"] == "only agent":
if Prefs["plugin_mode2"] == "only agent":
self.enable_channel = False
elif Prefs["plugin_mode"] == "only channel":
elif Prefs["plugin_mode2"] == "only interface":
self.enable_agent = False
def set_plugin_lock(self):
if Prefs["plugin_pin_mode"] in ("channel menu", "advanced menu"):
if Prefs["plugin_pin_mode2"] in ("interface", "advanced menu"):
# check pin
pin = Prefs["plugin_pin"]
if not pin or not len(pin):
@@ -265,8 +398,8 @@ class Config(object):
except ValueError:
Log.Warn("PIN has to be an integer (0-9)")
self.pin = pin
self.lock_advanced_menu = Prefs["plugin_pin_mode"] == "advanced menu"
self.lock_menu = Prefs["plugin_pin_mode"] == "channel menu"
self.lock_advanced_menu = Prefs["plugin_pin_mode2"] == "advanced menu"
self.lock_menu = Prefs["plugin_pin_mode2"] == "interface"
try:
self.pin_valid_minutes = int(Prefs["plugin_pin_valid_for"].strip())
@@ -284,7 +417,7 @@ class Config(object):
self.permissions_ok = self.check_permissions()
def check_permissions(self):
if not Prefs["subtitles.save.filesystem"] or not Prefs["check_permissions"]:
if not cast_bool(Prefs["subtitles.save.filesystem"]) or not cast_bool(Prefs["check_permissions"]):
return True
self.missing_permissions = []
@@ -300,6 +433,9 @@ class Config(object):
if isinstance(path_str, unicode):
path_str = path_str.encode(self.fs_encoding)
if not os.path.exists(path_str):
continue
if use_ignore_fs:
# check whether we've got an ignore file inside the section path
if self.is_physically_ignored(path_str):
@@ -369,12 +505,30 @@ class Config(object):
if not fn:
return
splitted_fn = fn.split()
exe_fn = splitted_fn[0]
arguments = [arg.strip() for arg in splitted_fn[1:]]
got_args = "%(" in fn
if got_args:
first_arg_pos = fn.index("%(")
exe_fn = fn[:first_arg_pos].strip()
arguments = [arg.strip() for arg in fn[first_arg_pos:].split()]
else:
exe_fn = fn
arguments = []
if os.path.isfile(exe_fn) and os.access(exe_fn, os.X_OK):
return exe_fn, arguments
# try finding the executable itself, the path might contain spaces and there might have been other arguments
fn_split = exe_fn.split(u" ")
tmp_exe_fn = fn_split[0]
for offset in range(1, len(fn_split)+1):
if os.path.isfile(tmp_exe_fn) and os.access(tmp_exe_fn, os.X_OK):
exe_fn = tmp_exe_fn.strip()
arguments = [arg.strip() for arg in fn_split[offset:]] + arguments
return exe_fn, arguments
tmp_exe_fn = u" ".join(fn_split[:offset+1])
Log.Error("Notify executable not existing or not executable: %s" % exe_fn)
def refresh_enabled_sections(self):
@@ -405,18 +559,44 @@ class Config(object):
return enabled_sections
# Prepare a list of languages we want subs for
def get_lang_list(self):
l = {Language.fromietf(Prefs["langPref1"])}
def get_lang_list(self, provider=None):
# advanced settings
if provider and self.advanced.providers and provider in self.advanced.providers:
adv_languages = self.advanced.providers[provider].get("languages", None)
if adv_languages:
adv_out = set()
for adv_lang in adv_languages:
adv_lang = adv_lang.strip()
try:
real_lang = Language.fromietf(adv_lang)
except:
try:
real_lang = Language.fromname(adv_lang)
except:
continue
adv_out.update({real_lang})
# fallback to default languages if no valid language was found in advanced settings
if adv_out:
return adv_out
l = {Language.fromietf(Prefs["langPref1a"])}
lang_custom = Prefs["langPrefCustom"].strip()
if Prefs['subtitles.only_one']:
return l
if Prefs["langPref2"] != "None":
l.update({Language.fromietf(Prefs["langPref2"])})
if Prefs["langPref2a"] != "None":
try:
l.update({Language.fromietf(Prefs["langPref2a"])})
except:
pass
if Prefs["langPref3"] != "None":
l.update({Language.fromietf(Prefs["langPref3"])})
if Prefs["langPref3a"] != "None":
try:
l.update({Language.fromietf(Prefs["langPref3a"])})
except:
pass
if len(lang_custom) and lang_custom != "None":
for lang in lang_custom.split(u","):
@@ -432,6 +612,8 @@ class Config(object):
return l
lang_list = property(get_lang_list)
def get_subtitle_destination_folder(self):
if not Prefs["subtitles.save.filesystem"]:
return
@@ -450,18 +632,30 @@ class Config(object):
out.append("vtt")
return out
def get_providers(self):
def get_providers(self, media_type="series"):
providers = {'opensubtitles': cast_bool(Prefs['provider.opensubtitles.enabled']),
# 'thesubdb': Prefs['provider.thesubdb.enabled'],
'podnapisi': cast_bool(Prefs['provider.podnapisi.enabled']),
'titlovi': cast_bool(Prefs['provider.titlovi.enabled']),
'addic7ed': cast_bool(Prefs['provider.addic7ed.enabled']),
'tvsubtitles': cast_bool(Prefs['provider.tvsubtitles.enabled']),
'legendastv': cast_bool(Prefs['provider.legendastv.enabled']),
'napiprojekt': cast_bool(Prefs['provider.napiprojekt.enabled']),
'shooter': cast_bool(Prefs['provider.shooter.enabled']),
'subscenter': cast_bool(Prefs['provider.subscenter.enabled']),
'hosszupuska': cast_bool(Prefs['provider.hosszupuska.enabled']),
'supersubtitles': cast_bool(Prefs['provider.supersubtitles.enabled']),
'shooter': False,
'subscene': cast_bool(Prefs['provider.subscene.enabled']),
'argenteam': cast_bool(Prefs['provider.argenteam.enabled']),
'subscenter': False,
'assrt': cast_bool(Prefs['provider.assrt.enabled']),
}
providers_by_prefs = copy.deepcopy(providers)
# disable subscene for movies by default
if media_type == "movies":
providers["subscene"] = False
# ditch non-forced-subtitles-reporting providers
if self.forced_only:
providers["addic7ed"] = False
@@ -469,33 +663,109 @@ class Config(object):
providers["legendastv"] = False
providers["napiprojekt"] = False
providers["shooter"] = False
providers["subscenter"] = False
providers["hosszupuska"] = False
providers["supersubtitles"] = False
providers["titlovi"] = False
providers["argenteam"] = False
providers["assrt"] = False
if not self.unrar and providers["legendastv"]:
providers["legendastv"] = False
Log.Info("Disabling LegendasTV, because UnRAR wasn't found")
# advanced settings
if media_type and self.advanced.providers:
for provider, data in self.advanced.providers.iteritems():
if provider not in providers or not providers_by_prefs[provider]:
continue
if data["enabled_for"] is not None:
providers[provider] = media_type in data["enabled_for"]
if "provider_throttle" not in Dict:
Dict["provider_throttle"] = {}
changed = False
for provider, enabled in dict(providers).iteritems():
reason, until, throttle_desc = Dict["provider_throttle"].get(provider, (None, None, None))
if reason:
now = datetime.datetime.now()
if now < until:
Log.Info("Not using %s until %s, because of: %s", provider,
until.strftime("%y/%m/%d %H:%M"), reason)
providers[provider] = False
else:
Log.Info("Using %s again after %s, (disabled because: %s)", provider, throttle_desc, reason)
del Dict["provider_throttle"][provider]
changed = True
if changed:
Dict.Save()
return filter(lambda prov: providers[prov], providers)
providers = property(get_providers)
def get_provider_settings(self):
os_use_https = self.advanced.providers.opensubtitles.use_https \
if self.advanced.providers.opensubtitles.use_https != None else True
provider_settings = {'addic7ed': {'username': Prefs['provider.addic7ed.username'],
'password': Prefs['provider.addic7ed.password'],
'use_random_agents': cast_bool(Prefs['provider.addic7ed.use_random_agents']),
'use_random_agents': cast_bool(Prefs['provider.addic7ed.use_random_agents1']),
},
'opensubtitles': {'username': Prefs['provider.opensubtitles.username'],
'password': Prefs['provider.opensubtitles.password'],
'use_tag_search': cast_bool(Prefs['provider.opensubtitles.use_tags']),
'only_foreign': cast_bool(Prefs['subtitles.only_foreign'])
'use_tag_search': self.exact_filenames,
'only_foreign': self.forced_only,
'is_vip': cast_bool(Prefs['provider.opensubtitles.is_vip']),
'use_ssl': os_use_https,
'timeout': self.advanced.providers.opensubtitles.timeout or 15
},
'podnapisi': {
'only_foreign': cast_bool(Prefs['subtitles.only_foreign'])
'only_foreign': self.forced_only,
},
'legendastv': {'username': Prefs['provider.legendastv.username'],
'password': Prefs['provider.legendastv.password'],
},
'subscenter': {'username': Prefs['provider.subscenter.username'],
'password': Prefs['provider.subscenter.password'],
},
'assrt': {'token': Prefs['provider.assrt.token'], }
}
return provider_settings
provider_settings = property(get_provider_settings)
def provider_throttle(self, name, exception):
"""
throttle a provider :name: for X hours based on the :exception: type
:param name:
:param exception:
:return:
"""
cls = getattr(exception, "__class__")
cls_name = getattr(cls, "__name__")
if cls not in VALID_THROTTLE_EXCEPTIONS:
for valid_cls in VALID_THROTTLE_EXCEPTIONS:
if isinstance(cls, valid_cls):
cls = valid_cls
throttle_data = PROVIDER_THROTTLE_MAP.get(name, PROVIDER_THROTTLE_MAP["default"]).get(cls, None) or \
PROVIDER_THROTTLE_MAP["default"].get(cls, None)
if not throttle_data:
return
throttle_delta, throttle_description = throttle_data
if "provider_throttle" not in Dict:
Dict["provider_throttle"] = {}
throttle_until = datetime.datetime.now() + throttle_delta
Dict["provider_throttle"][name] = (cls_name, throttle_until, throttle_description)
Log.Info("Throttling %s for %s, until %s, because of: %s", name, throttle_description,
throttle_until.strftime("%y/%m/%d %H:%M"), cls_name)
Dict.Save()
@property
def provider_pool(self):
if cast_bool(Prefs['providers.multithreading']):
@@ -519,6 +789,22 @@ class Config(object):
if wrong_chmod:
Log.Warn("Chmod setting ignored, please use only 4-digit integers with leading 0 (e.g.: 775)")
def get_subtitle_sub_dir(self):
"""
:return: folder, is_absolute
"""
if not cast_bool(Prefs['subtitles.save.filesystem']):
return None, None
if Prefs["subtitles.save.subFolder.Custom"]:
return Prefs["subtitles.save.subFolder.Custom"], os.path.isabs(Prefs["subtitles.save.subFolder.Custom"])
if Prefs["subtitles.save.subFolder"] == "current folder":
return ".", False
return Prefs["subtitles.save.subFolder"], False
def determine_ext_sub_strictness(self):
val = Prefs["subtitles.scan.filename_strictness"]
if val == "any":
@@ -531,15 +817,25 @@ class Config(object):
mods = []
if self.remove_hi:
mods.append("remove_HI")
if self.remove_tags:
mods.append("remove_tags")
if self.fix_ocr:
mods.append("OCR_fixes")
if self.fix_common:
mods.append("common")
if self.colors:
mods.append("color(name=%s)" % self.colors)
if self.reverse_rtl:
mods.append("reverse_rtl")
return mods
def setup_proxies(self):
proxy = Prefs["proxy"]
if proxy:
os.environ["SZ_HTTP_PROXY"] = proxy.strip()
Log.Debug("Using HTTP Proxy: %s", proxy)
def set_activity_modes(self):
val = Prefs["activity.on_playback"]
if val == "never":
@@ -556,6 +852,80 @@ class Config(object):
else:
self.activity_mode = "next_episode"
def get_plex_transcoder(self):
base_path = os.environ.get("PLEX_MEDIA_SERVER_HOME", None)
if not base_path:
# fall back to bundled plugins path
bundle_path = os.environ.get("PLEXBUNDLEDPLUGINSPATH", None)
if bundle_path:
base_path = os.path.normpath(os.path.join(bundle_path, "..", ".."))
if sys.platform == "darwin":
fn = os.path.join(base_path, "MacOS", "Plex Transcoder")
elif mswindows:
fn = os.path.join(base_path, "plextranscoder.exe")
else:
fn = os.path.join(base_path, "Plex Transcoder")
if os.path.isfile(fn):
return fn
# look inside Resources folder as fallback, as well
fn = os.path.join(base_path, "Resources", "Plex Transcoder")
if os.path.isfile(fn):
return fn
def parse_rename_mode(self):
# fixme: exact_filenames should be determined via callback combined with info about the current video
# (original_name)
mode = str(Prefs["media_rename1"])
self.refiner_settings = {}
if cast_bool(Prefs['use_file_info_file']):
self.refiner_settings["file_info_file"] = True
self.exact_filenames = True
if mode == "none of the above":
return
elif mode == "Symlink to original file":
self.refiner_settings["symlinks"] = True
self.exact_filenames = True
return
elif mode == "I keep the original filenames":
self.exact_filenames = True
return
if mode in ("Filebot", "Sonarr/Radarr/Filebot"):
self.refiner_settings["filebot"] = True
if mode in ("Sonarr/Radarr (fill api info below)", "Sonarr/Radarr/Filebot"):
if Prefs["drone_api.sonarr.url"] and Prefs["drone_api.sonarr.api_key"]:
self.refiner_settings["sonarr"] = {
"base_url": Prefs["drone_api.sonarr.url"],
"api_key": Prefs["drone_api.sonarr.api_key"],
}
if self.advanced.refiners.sonarr:
self.refiner_settings["sonarr"].update(self.advanced.refiners.sonarr)
self.exact_filenames = True
if Prefs["drone_api.radarr.url"] and Prefs["drone_api.radarr.api_key"]:
self.refiner_settings["radarr"] = {
"base_url": Prefs["drone_api.radarr.url"],
"api_key": Prefs["drone_api.radarr.api_key"]
}
if self.advanced.refiners.radarr:
self.refiner_settings["radarr"].update(self.advanced.refiners.radarr)
self.exact_filenames = True
@property
def text_based_formats(self):
return self.advanced.text_subtitle_formats or TEXT_SUBTITLE_EXTS
def init_subliminal_patches(self):
# configure custom subtitle destination folders for scanning pre-existing subs
Log.Debug("Patching subliminal ...")
@@ -564,7 +934,7 @@ class Config(object):
subliminal_patch.core.INCLUDE_EXOTIC_SUBS = self.exotic_ext
subliminal_patch.core.DOWNLOAD_TRIES = int(Prefs['subtitles.try_downloads'])
subliminal.score.episode_scores["addic7ed_boost"] = int(Prefs['provider.addic7ed.boost_by1'])
subliminal.score.episode_scores["addic7ed_boost"] = int(Prefs['provider.addic7ed.boost_by2'])
config = Config()
+4
View File
@@ -1,4 +1,5 @@
# coding=utf-8
import traceback
def dispatch_migrate():
@@ -6,6 +7,8 @@ def dispatch_migrate():
migrate()
except:
Log.Error("Migration failed: %s" % traceback.format_exc())
del Dict["subs"]
Dict.Save()
def migrate():
@@ -25,6 +28,7 @@ def migrate():
time=item.time)
del Dict["history"]
history.destroy()
Dict.Save()
# migrate subtitle storage from Dict to Data
+101 -27
View File
@@ -1,46 +1,120 @@
# coding=utf-8
import os
from subzero.language import Language
import subliminal_patch as subliminal
from support.config import config
from support.helpers import cast_bool
from subtitlehelpers import get_subtitles_from_metadata
from subliminal_patch import compute_score
from support.plex_media import get_blacklist_from_part_map
from subzero.video import refine_video
from support.storage import get_pack_data, store_pack_data
def download_best_subtitles(video_part_map, min_score=0):
def get_missing_languages(video, part):
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
# should we treat IETF as alpha3? (ditch the country part)
alpha3_map = {}
if config.ietf_as_alpha3:
for language in languages:
if language.country:
alpha3_map[language.alpha3] = language.country
language.country = None
if not Prefs['subtitles.save.filesystem']:
# scan for existing metadata subtitles
meta_subs = get_subtitles_from_metadata(part)
for language, subList in meta_subs.iteritems():
if subList:
video.subtitle_languages.add(language)
Log.Debug("Found metadata subtitle %s for %s", language, video)
have_languages = video.subtitle_languages.copy()
if config.ietf_as_alpha3:
for language in have_languages:
if language.country:
alpha3_map[language.alpha3] = language.country
language.country = None
missing_languages = (set(str(l) for l in languages) - set(str(l) for l in have_languages))
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
if not missing_languages or found_one_which_is_enough:
if found_one_which_is_enough:
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
else:
Log.Debug('All languages %r exist for %s', languages, video)
return False
# re-add country codes to the missing languages, in case we've removed them above
if config.ietf_as_alpha3:
for language in languages:
language.country = alpha3_map.get(language.alpha3, None)
return missing_languages
def pre_download_hook(subtitle):
if subtitle.is_pack:
# try retrieving the subtitle from a cached pack archive
pack_data = get_pack_data(subtitle)
if pack_data:
subtitle.pack_data = pack_data
def post_download_hook(subtitle):
# if a new pack was downloaded, store it in the cache; providers' download method is responsible for
# setting subtitle.pack_data to None in case the cached pack data we provided was successfully used
if subtitle.is_pack and subtitle.pack_data:
# store pack data in cache
store_pack_data(subtitle, subtitle.pack_data)
# may be redundant
subtitle.pack_data = None
def language_hook(provider):
return config.get_lang_list(provider=provider)
def download_best_subtitles(video_part_map, min_score=0, throttle_time=None, providers=None):
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
languages = config.lang_list
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
if not languages:
return
missing_languages = False
use_videos = []
for video, part in video_part_map.iteritems():
if not Prefs['subtitles.save.filesystem']:
# scan for existing metadata subtitles
meta_subs = get_subtitles_from_metadata(part)
for language, subList in meta_subs.iteritems():
if subList:
video.subtitle_languages.add(language)
Log.Debug("Found metadata subtitle %s for %s", language, video)
if not video.ignore_all:
missing_languages = get_missing_languages(video, part)
else:
missing_languages = languages
missing_subs = (languages - video.subtitle_languages)
if missing_languages:
Log.Info(u"%s has missing languages: %s", os.path.basename(video.name), missing_languages)
refine_video(video, refiner_settings=config.refiner_settings)
use_videos.append(video)
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
if not missing_subs or found_one_which_is_enough:
if found_one_which_is_enough:
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
else:
Log.Debug('All languages %r exist for %s', languages, video)
continue
missing_languages = True
break
# prepare blacklist
blacklist = get_blacklist_from_part_map(video_part_map, languages)
if missing_languages:
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s" % (min_score, hearing_impaired))
if use_videos:
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s, languages: %s" %
(min_score, hearing_impaired, languages))
return subliminal.download_best_subtitles(video_part_map.keys(), languages, min_score, hearing_impaired, providers=config.providers,
provider_configs=config.provider_settings, pool_class=config.provider_pool,
compute_score=compute_score)
return subliminal.download_best_subtitles(set(use_videos), languages, min_score, hearing_impaired,
providers=providers or config.providers,
provider_configs=config.provider_settings,
pool_class=config.provider_pool,
compute_score=compute_score, throttle_time=throttle_time,
blacklist=blacklist, throttle_callback=config.provider_throttle,
pre_download_hook=pre_download_hook,
post_download_hook=post_download_hook,
language_hook=language_hook)
Log.Debug("All languages for all requested videos exist. Doing nothing.")
+59 -12
View File
@@ -15,7 +15,7 @@ from collections import OrderedDict
import chardet
from bs4 import UnicodeDammit
from babelfish import Language
from subzero.language import Language
from subzero.analytics import track_event
mswindows = (sys.platform == "win32")
@@ -44,6 +44,13 @@ def cast_bool(value):
return str(value).strip() in ("true", "True")
def cast_int(value, default=None):
try:
return int(value)
except ValueError:
return default
# A platform independent way to split paths which might come in with different separators.
def split_path(str):
if str.find('\\') != -1:
@@ -151,10 +158,11 @@ def get_video_display_title(kind, title, section_title=None, parent_title=None,
if add_section_title:
section_add = ("%s: " % section_title) if section_title else ""
if kind == "show" and parent_title:
if kind in ("season", "show") and parent_title:
if season and episode:
return '%s%s S%02dE%02d%s' % (section_add, parent_title, season or 0, episode or 0,
(", %s" % title if title else ""))
return '%s%s%s' % (section_add, parent_title, (", %s" % title if title else ""))
return "%s%s" % (section_add, title)
@@ -202,7 +210,7 @@ def decode_message(s):
def timestamp():
return int(time.time())
return int(time.time()*1000)
def df(d):
@@ -284,7 +292,6 @@ def notify_executable(exe_info, videos, subtitles, storage):
prepared_arguments = [arg % prepared_data for arg in arguments]
Log.Debug(u"Calling %s with arguments: %s" % (exe, prepared_arguments))
env = os.environ
if not mswindows:
env_path = {"PATH": os.pathsep.join(
[
@@ -295,14 +302,30 @@ def notify_executable(exe_info, videos, subtitles, storage):
)
}
env = dict(os.environ, **env_path)
env.pop("LD_LIBRARY_PATH", None)
else:
env = dict(os.environ)
# clean out any Plex-PYTHONPATH that may bleed through the spawned process
if "PYTHONPATH" in env and "plex" in env["PYTHONPATH"].lower():
del env["PYTHONPATH"]
try:
output = subprocess.check_output(quote_args([exe] + prepared_arguments),
stderr=subprocess.STDOUT, shell=True, env=env)
except subprocess.CalledProcessError:
Log.Error(u"Calling %s failed: %s" % (exe, traceback.format_exc()))
proc = subprocess.Popen(quote_args([exe] + prepared_arguments), stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True, env=env, cwd=os.path.dirname(exe))
output, errors = proc.communicate()
if proc.returncode == 1:
Log.Error(u"Calling %s with args %s failed: output:\n%s, error:\n%s", exe, prepared_arguments,
output, errors)
return
output = output.decode()
except:
Log.Error(u"Calling %s failed: %s", exe, traceback.format_exc())
else:
Log.Debug(u"Process output: %s" % output)
Log.Debug(u"Process output: %s", output)
def track_usage(category=None, action=None, label=None, value=None):
@@ -329,9 +352,12 @@ def track_usage(category=None, action=None, label=None, value=None):
except:
pass
Thread.Create(dispatch_track_usage, category, action, label, value,
identifier=Dict["anon_id"], first_use=Dict["first_use"],
add=Network.PublicAddress)
try:
Thread.Create(dispatch_track_usage, category, action, label, value,
identifier=Dict["anon_id"], first_use=Dict["first_use"],
add=Network.PublicAddress)
except:
Log.Debug("Something went wrong when reporting anonymous user statistics: %s", traceback.format_exc())
def dispatch_track_usage(*args, **kwargs):
@@ -344,9 +370,30 @@ def dispatch_track_usage(*args, **kwargs):
Log.Debug("Something went wrong when reporting anonymous user statistics: %s", traceback.format_exc())
def get_language_from_stream(lang_code):
if lang_code:
lang = Locale.Language.Match(lang_code)
if lang and lang != "xx":
# Log.Debug("Found language: %r", lang)
return Language.fromietf(lang)
def get_language(lang_short):
return Language.fromietf(lang_short)
def display_language(l):
return _(str(l).lower())
def is_stream_forced(stream):
stream_title = getattr(stream, "title", "") or ""
forced = getattr(stream, "forced", False)
if not forced and stream_title and "forced" in stream_title.strip().lower():
forced = True
return forced
class PartUnknownException(Exception):
pass
+1 -1
View File
@@ -1,4 +1,4 @@
# coding=utf-8
from subzero.history_storage import SubtitleHistory
get_history = lambda: SubtitleHistory(Data, int(Prefs["history_size"]))
get_history = lambda: SubtitleHistory(Data, Thread, int(Prefs["history_size"]))
+106
View File
@@ -0,0 +1,106 @@
# coding=utf-8
import inspect
from support.config import config
core = getattr(Data, "_core")
# get original localization module in order to access its base classes later on
def get_localization_module():
cls = getattr(core.localization, "__class__")
return inspect.getmodule(cls)
plex_i18n_module = get_localization_module()
def old_style_placeholders_count(s):
# fixme: incomplete, use regex
return sum(s.count(c) for c in ["%s", "%d", "%r", "%f", "%i"])
def check_old_style_placeholders(k, args):
# replace escaped %'s?
k = k.__str__().replace("%%", "")
if "%(" in k:
Log.Error(u"%r defines named placeholders for formatting" % k)
return "NEEDS NAMED ARGUMENTS"
placeholders_found = old_style_placeholders_count(k)
if placeholders_found and not args:
Log.Error(u"%r requires a arguments for formatting" % k)
return "NEEDS FORMAT ARGUMENTS"
elif not placeholders_found and args:
Log.Error(u"%r doesn't define placeholders for formatting" % k)
return "HAS NO FORMAT ARGUMENTS"
elif placeholders_found and placeholders_found != len(args):
Log.Error(u"%r wrong amount of arguments supplied for formatting" % k)
return "WRONG FORMAT ARGUMENT COUNT"
class SmartLocalStringFormatter(plex_i18n_module.LocalStringFormatter):
"""
this allows the use of dictionaries for string formatting, also does some sanity checking on the keys and values
"""
def __init__(self, string1, string2, locale=None):
if isinstance(string2, tuple):
# dictionary passed
if len(string2) == 1 and hasattr(string2[0], "iteritems"):
string2 = string2[0]
if config.debug_i18n:
if "%(" not in string1.__str__().replace("%%", ""):
Log.Error(u"%r: dictionary for non-named format string supplied" % string1.__str__())
string1 = "%s"
string2 = "NO NAMED ARGUMENTS"
# arguments
elif len(string2) >= 1 and config.debug_i18n:
msg = check_old_style_placeholders(string1, string2)
if msg:
string1 = "%s"
string2 = msg
setattr(self, "_string1", string1)
setattr(self, "_string2", string2)
setattr(self, "_locale", locale)
def local_string_with_optional_format(key, *args, **kwargs):
if kwargs:
args = (kwargs,)
else:
args = tuple(args)
if args:
# fixme: may not be the best idea as this evaluates the string early
try:
return unicode(SmartLocalStringFormatter(plex_i18n_module.LocalString(core, key, Locale.CurrentLocale), args))
except TypeError:
Log.Exception("Broken translation!")
return unicode(SmartLocalStringFormatter(plex_i18n_module.LocalString(core, key, "en"), args))
# check string instances for arguments
if config.debug_i18n:
msg = check_old_style_placeholders(key, args)
if msg:
return msg
try:
return unicode(plex_i18n_module.LocalString(core, key, Locale.CurrentLocale))
except TypeError:
Log.Exception("Broken translation!")
return unicode(plex_i18n_module.LocalString(core, key, "en"))
_ = local_string_with_optional_format
def is_localized_string(s):
return hasattr(s, "localize")
+7 -4
View File
@@ -11,7 +11,8 @@ class IgnoreDict(DictProxy):
"section": "sections",
"show": "series",
"movie": "videos",
"episode": "videos"
"episode": "videos",
"season": "seasons",
}
# getItems types mapped to their verbose names
@@ -19,9 +20,10 @@ class IgnoreDict(DictProxy):
"sections": "Section",
"series": "Series",
"videos": "Item",
"seasons": "Season",
}
key_order = ("sections", "series", "videos")
key_order = ("sections", "series", "videos", "seasons")
def __len__(self):
try:
@@ -35,7 +37,7 @@ class IgnoreDict(DictProxy):
return self.translate_keys.get(name)
def verbose(self, name):
return self.keys_verbose.get(name)
return self.keys_verbose.get(self.translate_key(name) or name)
def get_title_key(self, kind, key):
return "%s_%s" % (kind, key)
@@ -57,6 +59,7 @@ class IgnoreDict(DictProxy):
Dict.Save()
def setup_defaults(self):
return {"sections": [], "series": [], "videos": [], "titles": {}}
return {"sections": [], "series": [], "videos": [], "titles": {}, "seasons": []}
ignore_list = IgnoreDict(Dict)
+119 -44
View File
@@ -5,12 +5,18 @@ import re
import traceback
import types
import os
import time
import datetime
from ignore import ignore_list
from helpers import is_recent, get_plex_item_display_title, query_plex, PartUnknownException
from lib import Plex, get_intent
from config import config, IGNORE_FN
from subliminal_patch.subtitle import ModifiedSubtitle
from subzero.modification import registry as mod_registry, SubtitleModifications
from socket import timeout
logger = logging.getLogger(__name__)
@@ -25,7 +31,11 @@ def get_item(key):
except ValueError:
return
item_container = Plex["library"].metadata(item_id)
try:
item_container = Plex["library"].metadata(item_id)
except timeout:
Log.Debug("PMS API timed out when querying information about item %d", item_id)
return
try:
return list(item_container)[0]
@@ -54,6 +64,21 @@ def get_item_kind_from_item(item):
return PLEX_API_TYPE_MAP.get(get_item_kind(item))
def get_item_title(item):
kind = get_item_kind_from_item(item)
if kind not in ("episode", "movie", "season", "series"):
return
if kind == "episode":
return get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
parent_title=item.show.title)
elif kind == "season":
return get_plex_item_display_title(item, "season", parent=item.show, section_title="Season",
parent_title=item.show.title)
else:
return get_plex_item_display_title(item, kind, section_title=None)
def get_item_thumb(item):
kind = get_item_kind(item)
if kind == "Episode":
@@ -240,7 +265,7 @@ def is_ignored(rating_key, item=None):
:return:
"""
# item in soft ignore list
if rating_key in ignore_list["videos"]:
if ignore_list["videos"] and rating_key in ignore_list["videos"]:
Log.Debug("Item %s is in the soft ignore list" % rating_key)
return True
@@ -248,12 +273,17 @@ def is_ignored(rating_key, item=None):
kind = get_item_kind(item)
# show in soft ignore list
if kind == "Episode" and item.show.rating_key in ignore_list["series"]:
if kind == "Episode" and ignore_list["series"] and item.show.rating_key in ignore_list["series"]:
Log.Debug("Item %s's show is in the soft ignore list" % rating_key)
return True
# season in soft ignore list
if kind == "Episode" and ignore_list["seasons"] and item.season.rating_key in ignore_list["seasons"]:
Log.Debug("Item %s's season is in the soft ignore list" % rating_key)
return True
# section in soft ignore list
if item.section.key in ignore_list["sections"]:
if ignore_list["sections"] and item.section.key in ignore_list["sections"]:
Log.Debug("Item %s's section is in the soft ignore list" % rating_key)
return True
@@ -303,26 +333,101 @@ def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, paren
# season refresh, needs explicit per-episode refresh
refresh = [item.rating_key for item in list(Plex["library/metadata"].children(int(rating_key)))]
multiple = len(refresh) > 1
for key in refresh:
Log.Info("%s item %s", "Refreshing" if not force else "Forced-refreshing", key)
Plex["library/metadata"].refresh(key)
if multiple:
Thread.Sleep(10.0)
def get_current_sub(rating_key, part_id, language):
def get_current_sub(rating_key, part_id, language, plex_item=None):
from support.storage import get_subtitle_storage
item = get_item(rating_key)
item = plex_item or get_item(rating_key)
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load_or_new(item)
current_sub = stored_subs.get_any(part_id, language)
return current_sub, stored_subs, subtitle_storage
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
from support.plex_media import get_plex_metadata, scan_videos
from support.storage import save_subtitles
def save_stored_sub(stored_subtitle, rating_key, part_id, language, item_type, plex_item=None, storage=None,
stored_subs=None):
"""
in order for this to work, if the calling supplies stored_subs and storage, it has to trigger its saving and
destruction explicitly
:param stored_subtitle:
:param rating_key:
:param part_id:
:param language:
:param item_type:
:param plex_item:
:param storage:
:param stored_subs:
:return:
"""
from support.plex_media import get_plex_metadata
from support.scanning import scan_videos
from support.storage import save_subtitles, get_subtitle_storage
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
plex_item = plex_item or get_item(rating_key)
stored_subs_was_provided = True
if not stored_subs or not storage:
storage = get_subtitle_storage()
stored_subs = storage.load(plex_item.rating_key)
stored_subs_was_provided = False
if not all([plex_item, stored_subs]):
return
try:
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
except PartUnknownException:
return
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
video, plex_part = scanned_parts.items()[0]
subtitle = ModifiedSubtitle(language, mods=stored_subtitle.mods)
subtitle.content = stored_subtitle.content
if stored_subtitle.encoding:
# thanks plex
setattr(subtitle, "_guessed_encoding", stored_subtitle.encoding)
if stored_subtitle.encoding != "utf-8":
subtitle.normalize()
stored_subtitle.content = subtitle.content
stored_subtitle.encoding = "utf-8"
storage.save(stored_subs)
subtitle.plex_media_fps = plex_part.fps
subtitle.page_link = stored_subtitle.id
subtitle.language = language
subtitle.id = stored_subtitle.id
try:
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
", ".join(stored_subtitle.mods) if stored_subtitle.mods else "none")
except:
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
if subtitle.storage_path:
stored_subtitle.last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
if not stored_subs_was_provided:
storage.save(stored_subs)
storage.destroy()
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
plex_item = get_item(rating_key)
if not plex_item:
return
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language, plex_item=plex_item)
if mode == "add":
for mod in mods:
identifier, args = SubtitleModifications.parse_identifier(mod)
@@ -350,39 +455,9 @@ def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"
current_sub.mods.pop()
else:
raise NotImplementedError("Wrong mode given")
save_stored_sub(current_sub, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
stored_subs=stored_subs)
storage.save(stored_subs)
try:
metadata = get_plex_metadata(rating_key, part_id, item_type)
except PartUnknownException:
return
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True,
no_refining=True)
video, plex_part = scanned_parts.items()[0]
subtitle = ModifiedSubtitle(language, mods=current_sub.mods)
subtitle.content = current_sub.content
if current_sub.encoding:
# thanks plex
setattr(subtitle, "_guessed_encoding", current_sub.encoding)
if current_sub.encoding != "utf-8":
subtitle.set_encoding("utf-8")
current_sub.content = subtitle.content
current_sub.encoding = "utf-8"
storage.save(stored_subs)
storage.destroy()
subtitle.plex_media_fps = plex_part.fps
subtitle.page_link = "modify subtitles with: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
subtitle.language = language
subtitle.id = current_sub.id
try:
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
", ".join(current_sub.mods) if current_sub.mods else "none")
except:
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
+21 -14
View File
@@ -9,29 +9,33 @@ import subtitlehelpers
from config import config as sz_config
SECONDARY_TAGS = ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom', 'hi', 'cc', 'sdh']
def find_subtitles(part):
lang_sub_map = {}
part_filename = helpers.unicodize(part.file)
part_basename = os.path.splitext(os.path.basename(part_filename))[0]
use_filesystem = helpers.cast_bool(Prefs["subtitles.save.filesystem"])
paths = [os.path.dirname(part_filename)] if use_filesystem else []
sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
if Prefs["subtitles.save.subFolder.Custom"] else None
global_subtitle_folder = None
use_sub_subfolder = Prefs["subtitles.save.subFolder"] != "current folder" and not sub_dir_custom
sub_subfolder = None
paths = [os.path.dirname(part_filename)] if use_filesystem else []
global_folders = []
if use_filesystem:
# Check for local subtitles subdirectory
sub_dir_base = paths[0]
sub_dir_list = []
if Prefs["subtitles.save.subFolder"] != "current folder":
if use_sub_subfolder:
# got selected subfolder
sub_dir_list.append(os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"]))
sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
if Prefs["subtitles.save.subFolder.Custom"] else None
sub_subfolder = os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"])
sub_dir_list.append(sub_subfolder)
sub_subfolder = os.path.normpath(helpers.unicodize(sub_subfolder))
if sub_dir_custom:
# got custom subfolder
@@ -84,8 +88,12 @@ def find_subtitles(part):
media_files.append(root)
# cleanup any leftover subtitle if no associated media file was found
if helpers.cast_bool(Prefs["subtitles.autoclean"]):
if use_filesystem and helpers.cast_bool(Prefs["subtitles.autoclean"]):
for path in paths:
# only housekeep in sub_subfolder if sub_subfolder is used
if use_sub_subfolder and path != sub_subfolder and not sz_config.advanced.thorough_cleaning:
continue
# we can't housekeep the global subtitle folders as we don't know about *all* media files
# in a library; skip them
skip_path = False
@@ -105,11 +113,10 @@ def find_subtitles(part):
if os.path.isfile(enc_fn):
(root, ext) = os.path.splitext(file_path_listing)
# it's a subtitle file
if ext.lower()[1:] in config.SUBTITLE_EXTS:
if ext.lower()[1:] in config.SUBTITLE_EXTS_BASE:
# get fn without forced/default/normal tag
split_tag = root.rsplit(".", 1)
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded',
'custom']:
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
root = split_tag[0]
# get associated media file name without language
@@ -135,7 +142,7 @@ def find_subtitles(part):
# get fn without forced/default/normal tag
split_tag = local_basename.rsplit(".", 1)
has_additional_tag = False
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'custom']:
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
local_basename = split_tag[0]
has_additional_tag = True
@@ -159,7 +166,7 @@ def find_subtitles(part):
continue
# determine whether to pick up the subtitle based on our match strictness
elif not filename_matches_part:
if not filename_matches_part:
if sz_config.ext_match_strictness == "strict" or (
sz_config.ext_match_strictness == "loose" and not filename_contains_part):
# Log.Debug("%s doesn't match %s, skipping" % (helpers.unicodize(local_filename),
+122 -11
View File
@@ -2,10 +2,17 @@
import traceback
import time
from support.config import config
from support.helpers import get_plex_item_display_title, cast_bool
import os
from babelfish import LanguageReverseError
from support.config import config, TEXT_SUBTITLE_EXTS
from support.helpers import get_plex_item_display_title, cast_bool, get_language_from_stream
from support.items import get_item
from support.lib import Plex
from support.storage import get_subtitle_storage
from subzero.video import has_external_subtitle
from subzero.language import Language
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()):
@@ -17,11 +24,59 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
else:
item_title = get_plex_item_display_title(item, kind, section_title=section_title)
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load(rating_key)
subtitle_storage.destroy()
subtitle_target_dir, tdir_is_absolute = config.subtitle_sub_dir
missing = set()
languages_set = set(languages)
languages_set = set([Language.fromietf(str(l)) for l in languages])
for media in item.media:
existing_subs = {"internal": [], "external": [], "count": 0}
existing_subs = {"internal": [], "external": [], "own_external": [], "count": 0}
for part in media.parts:
# did we already download an external subtitle before?
if subtitle_target_dir and stored_subs:
for language in languages_set:
if has_external_subtitle(part.id, stored_subs, language):
# check the existence of the actual subtitle file
# get media filename without extension
part_basename = os.path.splitext(os.path.basename(part.file))[0]
# compute target directory for subtitle
# fixme: move to central location
if tdir_is_absolute:
possible_subtitle_path_base = subtitle_target_dir
else:
possible_subtitle_path_base = os.path.join(os.path.dirname(part.file), subtitle_target_dir)
possible_subtitle_path_base = os.path.realpath(possible_subtitle_path_base)
# folder actually exists?
if not os.path.isdir(possible_subtitle_path_base):
continue
found_any = False
for ext in config.subtitle_formats:
if cast_bool(Prefs['subtitles.only_one']):
possible_subtitle_path = os.path.join(possible_subtitle_path_base,
u"%s.%s" % (part_basename, ext))
else:
possible_subtitle_path = os.path.join(possible_subtitle_path_base,
u"%s.%s.%s" % (part_basename, language, ext))
# check for subtitle existence
if os.path.isfile(possible_subtitle_path):
found_any = True
Log.Debug(u"Found: %s", possible_subtitle_path)
break
if found_any:
existing_subs["own_external"].append(language)
existing_subs["count"] = existing_subs["count"] + 1
for stream in part.streams:
if stream.stream_type == 3:
if stream.index:
@@ -29,18 +84,72 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
else:
key = "external"
existing_subs[key].append(Locale.Language.Match(stream.language_code or ""))
existing_subs["count"] = existing_subs["count"] + 1
if not config.exotic_ext and stream.codec.lower() not in TEXT_SUBTITLE_EXTS:
continue
missing_from_part = set(languages_set)
# treat unknown language as lang1?
if not stream.language_code and config.treat_und_as_first:
lang = Language.fromietf(str(list(config.lang_list)[0]))
# we can't parse empty language codes
elif not stream.language_code or not stream.codec:
continue
else:
# parse with internal language parser first
try:
lang = get_language_from_stream(stream.language_code)
if not lang:
if config.treat_und_as_first:
lang = Language.fromietf(str(list(config.lang_list)[0]))
else:
continue
except (ValueError, LanguageReverseError):
continue
if lang:
# Log.Debug("Found babelfish language: %r", lang)
existing_subs[key].append(lang)
existing_subs["count"] = existing_subs["count"] + 1
missing_from_part = set([Language.fromietf(str(l)) for l in languages])
if existing_subs["count"]:
existing_flat = set((existing_subs["internal"] if internal else []) + (existing_subs["external"] if external else []))
if languages_set.issubset(existing_flat) or (len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
# fixme: this is actually somewhat broken with IETF, as Plex doesn't store the country portion
# (pt instead of pt-BR) inside the database. So it might actually download pt-BR if there's a local pt-BR
# subtitle but not our own.
existing_flat = set((existing_subs["internal"] if internal else [])
+ (existing_subs["external"] if external else [])
+ existing_subs["own_external"])
check_languages = set([Language.fromietf(str(l)) for l in languages])
alpha3_map = {}
if config.ietf_as_alpha3:
for language in existing_flat:
if language.country:
alpha3_map[language.alpha3] = language.country
language.country = None
for language in check_languages:
if language.country:
alpha3_map[language.alpha3] = language.country
language.country = None
# compare sets of strings, not sets of different Language instances
check_languages_str = set(str(l) for l in check_languages)
existing_flat_str = set(str(l) for l in existing_flat)
if check_languages_str.issubset(existing_flat_str) or \
(len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
# all subs found
#Log.Info(u"All subtitles exist for '%s'", item_title)
continue
missing_from_part = languages_set - existing_flat
missing_from_part = set(Language.fromietf(l) for l in check_languages_str - existing_flat_str)
if config.ietf_as_alpha3:
for language in missing_from_part:
language.country = alpha3_map.get(language.alpha3, None)
if missing_from_part:
Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id,
@@ -48,6 +157,8 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
missing.update(missing_from_part)
if missing:
# deduplicate
missing = set(Language.fromietf(la) for la in set(str(l) for l in missing))
return added_at, item_id, item_title, item, missing
@@ -60,7 +171,7 @@ def items_get_all_missing_subs(items, sleep_after_request=False):
kind=kind,
added_at=added_at,
section_title=section_title,
languages=config.lang_list,
languages=config.lang_list.copy(),
internal=cast_bool(Prefs["subtitles.scan.embedded"]),
external=cast_bool(Prefs["subtitles.scan.external"])
)
+68 -100
View File
@@ -1,13 +1,12 @@
# coding=utf-8
import os
from urllib2 import URLError
import helpers
from config import config
from items import get_item
from lib import get_intent, Plex
from subzero.video import parse_video
from lib import Plex
from support.config import TEXT_SUBTITLE_EXTS, config
def get_metadata_dict(item, part, add):
data = {
@@ -45,10 +44,11 @@ def get_plexapi_stream_info(plex_item, part_id=None):
return d
data["video_codec"] = current_media.video_codec
data["audio_codec"] = current_media.audio_codec.upper()
if current_media.audio_codec:
data["audio_codec"] = current_media.audio_codec.upper()
if data["audio_codec"] == "DCA":
data["audio_codec"] = "DTS"
if data["audio_codec"] == "DCA":
data["audio_codec"] = "DTS"
if current_media.audio_channels == 8:
data["audio_channels"] = "7.1"
@@ -153,10 +153,9 @@ def get_stream_fps(streams):
def get_media_item_ids(media, kind="series"):
ids = []
if kind == "movies":
ids.append(media.id)
else:
# fixme: does this work correctly for full series force-refreshes and its intents?
ids = [media.id]
if kind == "series":
for season in media.seasons:
for episode in media.seasons[season].episodes:
ids.append(media.seasons[season].episodes[episode].id)
@@ -164,98 +163,53 @@ def get_media_item_ids(media, kind="series"):
return ids
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, no_refining=False):
"""
returnes a subliminal/guessit-refined parsed video
:param pms_video_info:
:param ignore_all:
:param hints:
:param rating_key:
:return:
"""
embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
external_subtitles = not ignore_all and Prefs['subtitles.scan.external']
plex_part = pms_video_info["plex_part"]
if ignore_all:
Log.Debug("Force refresh intended.")
Log.Debug("Scanning video: %s, external_subtitles=%s, embedded_subtitles=%s" % (
plex_part.file, external_subtitles, embedded_subtitles))
known_embedded = []
def get_all_parts(plex_item):
parts = []
for media in list(Plex["library"].metadata(rating_key))[0].media:
for media in plex_item.media:
parts += media.parts
plexpy_part = None
for part in parts:
if int(part.id) == int(plex_part.id):
plexpy_part = part
# embedded subtitles
if plexpy_part:
for stream in plexpy_part.streams:
# subtitle stream
if stream.stream_type == 3:
if (config.forced_only and getattr(stream, "forced")) or \
(not config.forced_only and not getattr(stream, "forced")):
# embedded subtitle
if not stream.stream_key:
if config.exotic_ext or stream.codec in ("srt", "ass", "ssa"):
lang_code = stream.language_code
# treat unknown language as lang1?
if not lang_code and config.treat_und_as_first:
lang_code = list(config.lang_list)[0].alpha3
known_embedded.append(lang_code)
else:
Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)
try:
# get basic video info scan (filename)
video = parse_video(plex_part.file, pms_video_info, hints, external_subtitles=external_subtitles,
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
forced_only=config.forced_only, no_refining=no_refining)
# add video fps info
video.fps = plex_part.fps
return video
except ValueError:
Log.Warn("File could not be guessed by subliminal: %s" % plex_part.file)
return parts
def scan_videos(videos, kind="series", ignore_all=False, no_refining=False):
"""
receives a list of videos containing dictionaries returned by media_to_videos
:param videos:
:param kind: series or movies
:return: dictionary of subliminal.video.scan_video, key=subliminal scanned video, value=plex file part
"""
ret = {}
for video in videos:
intent = get_intent()
force_refresh = intent.get("force", video["id"], video["series_id"], video["season_id"])
Log.Debug("Determining force-refresh (video: %s, series: %s, season: %s), result: %s"
% (video["id"], video["series_id"], video["season_id"], force_refresh))
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, get_forced=None):
streams = []
has_unknown = False
for stream in part.streams:
# subtitle stream
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
language = helpers.get_language_from_stream(stream.language_code)
is_unknown = False
found_requested_language = requested_language and requested_language == language
is_forced = helpers.is_stream_forced(stream)
hints = helpers.get_item_hints(video)
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
scanned_video = scan_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
rating_key=video["id"], no_refining=no_refining)
if get_forced is not None:
if (get_forced and not is_forced) or (not get_forced and is_forced):
continue
if not scanned_video:
continue
if not language and config.treat_und_as_first:
# only consider first unknown subtitle stream
if has_unknown and skip_duplicate_unknown:
continue
scanned_video.id = video["id"]
part_metadata = video.copy()
del part_metadata["plex_part"]
scanned_video.plexapi_metadata = part_metadata
ret[scanned_video] = video["plex_part"]
return ret
language = list(config.lang_list)[0]
is_unknown = True
has_unknown = True
if not requested_language or found_requested_language or has_unknown:
streams.append({"stream": stream, "is_unknown": is_unknown, "language": language,
"is_forced": is_forced})
if found_requested_language:
break
return streams
def get_part(plex_item, part_id):
for media in plex_item.media:
for part in media.parts:
if str(part.id) == str(part_id):
return part
def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
@@ -275,11 +229,7 @@ def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
return
# find current part
current_part = None
for media in plex_item.media:
for part in media.parts:
if str(part.id) == str(part_id):
current_part = part
current_part = get_part(plex_item, part_id)
if not current_part:
raise helpers.PartUnknownException("Part unknown")
@@ -334,6 +284,24 @@ def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
return metadata
def get_blacklist_from_part_map(video_part_map, languages):
from support.storage import get_subtitle_storage
subtitle_storage = get_subtitle_storage()
blacklist = []
for video, part in video_part_map.iteritems():
stored_subs = subtitle_storage.load_or_new(video.plexapi_metadata["item"])
for language in languages:
current_bl, subs = stored_subs.get_blacklist(part.id, language)
if not current_bl:
continue
blacklist = blacklist + [(str(a), str(b)) for a, b in current_bl.keys()]
subtitle_storage.destroy()
return blacklist
class PMSMediaProxy(object):
"""
Proxy object for getting data from a mediatree items "internally" via the PMS
+131
View File
@@ -0,0 +1,131 @@
# coding=utf-8
import traceback
import helpers
from babelfish.exceptions import LanguageError
from support.lib import Plex, get_intent
from support.plex_media import get_stream_fps
from support.storage import get_subtitle_storage
from support.config import config, TEXT_SUBTITLE_EXTS
from subzero.video import parse_video, set_existing_languages
from subzero.language import language_from_stream
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False):
"""
returnes a subliminal/guessit-refined parsed video
:param pms_video_info:
:param ignore_all:
:param hints:
:param rating_key:
:return:
"""
embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
external_subtitles = not ignore_all and Prefs['subtitles.scan.external']
plex_part = pms_video_info["plex_part"]
if ignore_all:
Log.Debug("Force refresh intended.")
Log.Debug("Scanning video: %s, external_subtitles=%s, embedded_subtitles=%s" % (
plex_part.file, external_subtitles, embedded_subtitles))
known_embedded = []
parts = []
for media in list(Plex["library"].metadata(rating_key))[0].media:
parts += media.parts
plexpy_part = None
for part in parts:
if int(part.id) == int(plex_part.id):
plexpy_part = part
# embedded subtitles
# fixme: skip the whole scanning process if known_embedded == wanted languages?
if plexpy_part:
if embedded_subtitles:
for stream in plexpy_part.streams:
# subtitle stream
if stream.stream_type == 3:
is_forced = helpers.is_stream_forced(stream)
if (config.forced_only and is_forced) or \
(not config.forced_only and not is_forced):
# embedded subtitle
# fixme: tap into external subtitles here instead of scanning for ourselves later?
if stream.codec and getattr(stream, "index", None):
if config.exotic_ext or stream.codec.lower() in config.text_based_formats:
lang = None
try:
lang = language_from_stream(stream.language_code)
except LanguageError:
Log.Debug("Couldn't detect embedded subtitle stream language: %s", stream.language_code)
# treat unknown language as lang1?
if not lang and config.treat_und_as_first:
lang = list(config.lang_list)[0]
if lang:
known_embedded.append(lang.alpha3)
else:
Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)
Log.Debug("Known embedded: %r", known_embedded)
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load(rating_key)
subtitle_storage.destroy()
try:
# get basic video info scan (filename)
video = parse_video(plex_part.file, hints, skip_hashing=config.low_impact_mode or skip_hashing,
providers=providers)
if not ignore_all:
set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles,
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
forced_only=config.forced_only, stored_subs=stored_subs, languages=config.lang_list,
only_one=config.only_one)
# add video fps info
video.fps = plex_part.fps
return video
except ValueError:
Log.Warn("File could not be guessed: %s: %s", plex_part.file, traceback.format_exc())
def scan_videos(videos, ignore_all=False, providers=None, skip_hashing=False):
"""
receives a list of videos containing dictionaries returned by media_to_videos
:param videos:
:param kind: series or movies
:return: dictionary of subliminal.video.scan_video, key=subliminal scanned video, value=plex file part
"""
ret = {}
for video in videos:
intent = get_intent()
force_refresh = intent.get("force", video["id"], video["series_id"], video["season_id"])
Log.Debug("Determining force-refresh (video: %s, series: %s, season: %s), result: %s"
% (video["id"], video["series_id"], video["season_id"], force_refresh))
hints = helpers.get_item_hints(video)
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
p = providers or config.get_providers(media_type="series" if video["type"] == "episode" else "movies")
scanned_video = scan_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
rating_key=video["id"], providers=p,
skip_hashing=skip_hashing)
if not scanned_video:
continue
scanned_video.id = video["id"]
part_metadata = video.copy()
del part_metadata["plex_part"]
scanned_video.plexapi_metadata = part_metadata
scanned_video.ignore_all = force_refresh
ret[scanned_video] = video["plex_part"]
return ret
+34 -7
View File
@@ -4,21 +4,24 @@ import datetime
import logging
import traceback
from config import config
def parse_frequency(s):
if s == "never" or s == None:
if s == "never" or s is None:
return None, None
kind, num, unit = s.split()
return int(num), unit
class DefaultScheduler(object):
thread = None
queue_thread = None
scheduler_thread = None
running = False
registry = None
def __init__(self):
self.thread = None
self.queue_thread = None
self.scheduler_thread = None
self.running = False
self.registry = []
@@ -47,6 +50,7 @@ class DefaultScheduler(object):
if Dict["tasks"]:
for task_name in Dict["tasks"].keys():
if task_name == "queue":
Dict["tasks"][task_name] = []
continue
Dict["tasks"][task_name]["data"] = {}
@@ -58,6 +62,7 @@ class DefaultScheduler(object):
raise NotImplementedError("Task missing! %s" % name)
Dict["tasks"][name]["data"] = {}
Dict["tasks"][name]["running"] = False
Dict.Save()
Log.Debug("Task data cleared: %s", name)
@@ -78,7 +83,8 @@ class DefaultScheduler(object):
def run(self):
self.running = True
self.thread = Thread.Create(self.worker)
self.scheduler_thread = Thread.Create(self.scheduler_worker)
self.queue_thread = Thread.Create(self.queue_worker)
def stop(self):
self.running = False
@@ -113,6 +119,7 @@ class DefaultScheduler(object):
def run_task(self, name, *args, **kwargs):
task = self.tasks[name]["task"]
if task.running:
Log.Debug("Scheduler: Not running %s, as it's currently running.", name)
return False
@@ -124,8 +131,12 @@ class DefaultScheduler(object):
except Exception, e:
Log.Error("Scheduler: Something went wrong when running %s: %s", name, traceback.format_exc())
finally:
task.post_run(Dict["tasks"][name]["data"])
try:
task.post_run(Dict["tasks"][name]["data"])
except:
Log.Error("Scheduler: task.post_run failed for %s: %s", name, traceback.format_exc())
Dict.Save()
config.sync_cache()
def dispatch_task(self, *args, **kwargs):
if "queue" not in Dict["tasks"]:
@@ -157,7 +168,7 @@ class DefaultScheduler(object):
continue
Log.Debug("Scheduler: Not sending signal %s to task %s, because: not running", name, task_name)
def worker(self):
def queue_worker(self):
Thread.Sleep(10.0)
while 1:
if not self.running:
@@ -170,10 +181,18 @@ class DefaultScheduler(object):
Dict["tasks"]["queue"] = []
Dict.Save()
for args, kwargs in queue:
Log.Debug("Dispatching single task: %s, %s", args, kwargs)
Log.Debug("Queue: Dispatching single task: %s, %s", args, kwargs)
Thread.Create(self.run_task, True, *args, **kwargs)
Thread.Sleep(5.0)
Thread.Sleep(1)
def scheduler_worker(self):
Thread.Sleep(10.0)
while 1:
if not self.running:
break
# scheduled tasks
for name in self.tasks.keys():
now = datetime.datetime.now()
@@ -193,6 +212,14 @@ class DefaultScheduler(object):
if not frequency_num:
continue
# run legacy SARAM once
if name == "SearchAllRecentlyAddedMissing" and ("hasRunLSARAM" not in Dict or not Dict["hasRunLSARAM"]):
task = self.tasks["LegacySearchAllRecentlyAddedMissing"]["task"]
task.last_run = None
name = "LegacySearchAllRecentlyAddedMissing"
Dict["hasRunLSARAM"] = True
Dict.Save()
if not task.last_run or (task.last_run + datetime.timedelta(**{frequency_key: frequency_num}) <= now):
# fixme: scheduled tasks run synchronously. is this the best idea?
Thread.Create(self.run_task, True, name)
+128 -54
View File
@@ -4,9 +4,12 @@ import datetime
import os
import pprint
import copy
import traceback
import types
from subliminal_patch.core import save_subtitles as subliminal_save_subtitles
from subzero.subtitle_storage import StoredSubtitlesManager
from subzero.lib.io import FileIO
from subtitlehelpers import force_utf8
from config import config
@@ -16,30 +19,45 @@ from support.items import get_item
def get_subtitle_storage():
return StoredSubtitlesManager(Data, get_item)
return StoredSubtitlesManager(Data, Thread, get_item)
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a"):
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a", set_current=True):
"""
stores information about downloaded subtitles in plex's Dict()
"""
subtitle_storage = get_subtitle_storage()
for video, video_subtitles in downloaded_subtitles.items():
part = scanned_video_part_map[video]
part_id = str(part.id)
video_id = str(video.id)
plex_item = get_item(video_id)
if not plex_item:
Log.Warning("Plex item not found: %s", video_id)
continue
metadata = video.plexapi_metadata
title = get_title_for_video_metadata(metadata)
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load_or_new(plex_item)
stored_subs = subtitle_storage.load(video_id)
is_new = False
if not stored_subs:
is_new = True
Log.Debug(u"Creating new subtitle storage: %s, %s", video_id, part_id)
stored_subs = subtitle_storage.new(plex_item)
for subtitle in video_subtitles:
lang = str(subtitle.language)
subtitle.set_encoding("utf-8")
Log.Debug(u"Adding subtitle to storage: %s, %s, %s, %s" % (video_id, part_id, title,
subtitle.guess_encoding()))
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode)
subtitle.normalize()
Log.Debug(u"Adding subtitle to storage: %s, %s, %s, %s, %s" % (video_id, part_id, lang, title,
subtitle.guess_encoding()))
last_mod = None
if subtitle.storage_path:
last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode, last_mod=last_mod,
set_current=set_current)
if ret_val:
Log.Debug("Subtitle stored")
@@ -47,9 +65,11 @@ def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_ty
else:
Log.Debug("Subtitle already existing in storage")
Log.Debug("Saving subtitle storage for %s" % video_id)
subtitle_storage.save(stored_subs)
subtitle_storage.destroy()
if is_new or video_subtitles:
Log.Debug("Saving subtitle storage for %s" % video_id)
subtitle_storage.save(stored_subs)
subtitle_storage.destroy()
def reset_storage(key):
@@ -71,36 +91,47 @@ def log_storage(key):
Log.Debug(pprint.pformat(Dict[key]))
def save_subtitles_to_file(subtitles):
def get_target_folder(file_path):
fld = None
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
if Prefs["subtitles.save.subFolder.Custom"] else None
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
# specific subFolder requested, create it if it doesn't exist
fld_base = os.path.split(file_path)[0]
if fld_custom:
if fld_custom.startswith("/"):
# absolute folder
fld = fld_custom
else:
fld = os.path.join(fld_base, fld_custom)
else:
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
fld = force_unicode(fld)
if not os.path.exists(fld):
os.makedirs(fld)
return fld
def save_subtitles_to_file(subtitles, tags=None, forced_tag=None):
forced_tag = forced_tag or config.forced_only
for video, video_subtitles in subtitles.items():
if not video_subtitles:
continue
fld = None
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
# specific subFolder requested, create it if it doesn't exist
fld_base = os.path.split(video.name)[0]
if fld_custom:
if fld_custom.startswith("/"):
# absolute folder
fld = fld_custom
else:
fld = os.path.join(fld_base, fld_custom)
else:
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
fld = force_unicode(fld)
if not os.path.exists(fld):
os.makedirs(fld)
subliminal_save_subtitles(video, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
chmod=config.chmod, forced_tag=config.forced_only, path_decoder=force_unicode,
debug_mods=config.debug_mods, formats=config.subtitle_formats)
if not isinstance(video, types.StringTypes):
file_path = video.name
else:
file_path = video
fld = get_target_folder(file_path)
subliminal_save_subtitles(file_path, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
chmod=config.chmod, forced_tag=forced_tag, path_decoder=force_unicode,
debug_mods=config.debug_mods, formats=config.subtitle_formats, tags=tags)
return True
def save_subtitles_to_metadata(videos, subtitles):
def save_subtitles_to_metadata(videos, subtitles, is_forced=False):
for video, video_subtitles in subtitles.items():
mediaPart = videos[video]
for subtitle in video_subtitles:
@@ -112,14 +143,19 @@ def save_subtitles_to_metadata(videos, subtitles):
mp = PMSMediaProxy(video.id).get_part(mediaPart.id)
else:
mp = mediaPart
mp.subtitles[Locale.Language.Match(subtitle.language.alpha2)][subtitle.id] = Proxy.Media(content, ext="srt")
pm = Proxy.Media(content, ext="srt", forced="1" if is_forced else None)
lang = Locale.Language.Match(subtitle.language.alpha2)
mp.subtitles[lang].validate_keys({})
mp.subtitles[lang]["subzero"] = pm
return True
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None):
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None,
set_current=True, is_forced=False):
"""
:param scanned_video_part_map:
:param set_current: save the subtitle as the current one
:param scanned_video_part_map:
:param downloaded_subtitles:
:param mode:
:param bare_save: don't trigger anything; don't store information
@@ -129,6 +165,8 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_
meta_fallback = False
save_successful = False
# big fixme: scanned_video_part_map isn't needed to the current extent. rewrite.
if mods:
for video, video_subtitles in downloaded_subtitles.items():
if not video_subtitles:
@@ -140,31 +178,67 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_
subtitle.plex_media_fps = video.fps
storage = "metadata"
if Prefs['subtitles.save.filesystem']:
save_to_fs = cast_bool(Prefs['subtitles.save.filesystem'])
if save_to_fs:
storage = "filesystem"
try:
Log.Debug("Using filesystem as subtitle storage")
save_subtitles_to_file(downloaded_subtitles)
except OSError:
if Prefs["subtitles.save.metadata_fallback"]:
meta_fallback = True
if set_current:
if save_to_fs:
try:
Log.Debug("Using filesystem as subtitle storage")
save_subtitles_to_file(downloaded_subtitles, forced_tag=is_forced)
except OSError:
if cast_bool(Prefs["subtitles.save.metadata_fallback"]):
meta_fallback = True
storage = "metadata"
else:
raise
else:
raise
else:
save_successful = True
save_successful = True
if not Prefs['subtitles.save.filesystem'] or meta_fallback:
if meta_fallback:
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
else:
Log.Debug("Using metadata as subtitle storage")
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
if not save_to_fs or meta_fallback:
if meta_fallback:
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
else:
Log.Debug("Using metadata as subtitle storage")
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles,
is_forced=is_forced)
if not bare_save and save_successful and config.notify_executable:
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
if not bare_save and save_successful and config.notify_executable:
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
if not bare_save and save_successful:
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode)
if not bare_save and save_successful or not set_current:
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode, set_current=set_current)
return save_successful
def get_pack_id(subtitle):
return "%s_%s" % (subtitle.provider_name, subtitle.numeric_id)
def get_pack_data(subtitle):
subtitle_id = get_pack_id(subtitle)
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
if os.path.isfile(archive):
Log.Info("Loading archive from pack cache: %s", subtitle_id)
try:
data = FileIO.read(archive, 'rb')
return data
except:
Log.Error("Couldn't load archive from pack cache: %s: %s", subtitle_id, traceback.format_exc())
def store_pack_data(subtitle, data):
subtitle_id = get_pack_id(subtitle)
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
Log.Info("Storing archive in pack cache: %s", subtitle_id)
try:
FileIO.write(archive, data, 'wb')
except:
Log.Error("Couldn't store archive in pack cache: %s: %s", subtitle_id, traceback.format_exc())
+19 -10
View File
@@ -1,9 +1,9 @@
# coding=utf-8
import re, os
import config
import helpers
from config import config, SUBTITLE_EXTS, TEXT_SUBTITLE_EXTS
from bs4 import UnicodeDammit
@@ -90,7 +90,7 @@ ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
def match_ietf_language(s):
language_match = re.match(".+\.([^\.]+)$" if not helpers.cast_bool(Prefs["subtitles.language.ietf"])
language_match = re.match(".+\.([^\.]+)$" if not helpers.cast_bool(Prefs["subtitles.language.ietf_display"])
else IETF_MATCH, s)
if language_match and len(language_match.groups()) == 1:
language = language_match.groups()[0]
@@ -102,7 +102,7 @@ class DefaultSubtitleHelper(SubtitleHelper):
@classmethod
def is_helper_for(cls, filename):
(file, file_extension) = os.path.splitext(filename)
return file_extension.lower()[1:] in config.SUBTITLE_EXTS
return file_extension.lower()[1:] in SUBTITLE_EXTS
def process_subtitles(self, part):
@@ -120,21 +120,29 @@ class DefaultSubtitleHelper(SubtitleHelper):
forced = ''
default = ''
split_tag = file.rsplit('.', 1)
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'custom']:
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'embedded-forced',
'custom']:
file = split_tag[0]
sub_tag = split_tag[1].lower()
# don't do anything with 'normal', we don't need it
if 'forced' == split_tag[1].lower():
if 'forced' in sub_tag:
forced = '1'
if 'default' == split_tag[1].lower():
elif 'default' == sub_tag:
default = '1'
# Attempt to extract the language from the filename (e.g. Avatar (2009).eng)
# IETF support thanks to
# https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
language = Locale.Language.Match(match_ietf_language(file))
lang_part = match_ietf_language(file)
if lang_part != file:
language = Locale.Language.Match(lang_part)
elif config.only_one:
language = Locale.Language.Match(list(config.lang_list)[0].alpha2)
else:
language = Locale.Language.Match("xx")
# skip non-SRT if wanted
if not helpers.cast_bool(Prefs["subtitles.scan.exotic_ext"]) and ext not in ["srt", "ass", "ssa", "vtt"]:
if not config.exotic_ext and ext not in TEXT_SUBTITLE_EXTS:
return lang_sub_map
codec = None
@@ -157,7 +165,8 @@ class DefaultSubtitleHelper(SubtitleHelper):
Log("An error occurred while attempting to parse the subtitle file, skipping... : " + self.filename)
return lang_sub_map
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb', 'vtt']:
# fixme: re-add vtt once Plex Inc. fixes this line in LocalMedia.bundle
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb']:
codec = ext.replace('ass', 'ssa')
if format is None:
@@ -187,7 +196,7 @@ def get_subtitles_from_metadata(part):
if p_type == "Media":
# metadata subtitle
Log.Debug(u"Found metadata subtitle: %s, %s" % (language, repr(proxy)))
subs[language].append(key)
subs[language] = [key]
return subs
+447 -218
View File
@@ -1,27 +1,27 @@
# coding=utf-8
import glob
import os
import datetime
import time
import operator
import traceback
from urllib2 import URLError
from subliminal_patch.score import compute_score
from subliminal_patch.core import download_subtitles
from subliminal import list_subtitles as list_all_subtitles
from babelfish import Language
from subliminal import list_subtitles as list_all_subtitles, region as subliminal_cache_region
from subzero.language import Language
from subzero.video import refine_video
from missing_subtitles import items_get_all_missing_subs, refresh_item
from scheduler import scheduler
from storage import save_subtitles, get_subtitle_storage
from support.config import config
from support.items import get_recent_items, get_item, is_ignored
from support.items import get_recent_items, get_item, is_ignored, get_item_title
from support.helpers import track_usage, get_title_for_video_metadata, cast_bool, PartUnknownException
from support.plex_media import scan_videos, get_plex_metadata
from download import download_best_subtitles
PROVIDER_SLACK = 30
DL_PROVIDER_SLACK = 30
from support.plex_media import get_plex_metadata
from support.scanning import scan_videos
from support.i18n import _
from download import download_best_subtitles, pre_download_hook, post_download_hook, language_hook
class Task(object):
@@ -32,6 +32,9 @@ class Task(object):
time_start = None
data = None
PROVIDER_SLACK = 30
DL_PROVIDER_SLACK = 30
stored_attributes = ("last_run", "last_run_time", "running")
default_data = {"last_run": None, "last_run_time": None, "running": False, "data": {}}
@@ -97,7 +100,7 @@ class Task(object):
class SubtitleListingMixin(object):
def list_subtitles(self, rating_key, item_type, part_id, language, skip_wrong_fps=True, metadata=None,
scanned_parts=None):
scanned_parts=None, air_date_cutoff=None):
if not metadata:
metadata = get_plex_metadata(rating_key, part_id, item_type)
@@ -105,18 +108,26 @@ class SubtitleListingMixin(object):
if not metadata:
return
providers = config.get_providers(media_type="series" if item_type == "episode" else "movies")
if not scanned_parts:
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
scanned_parts = scan_videos([metadata], ignore_all=True, providers=providers)
if not scanned_parts:
Log.Error(u"%s: Couldn't list available subtitles for %s", self.name, rating_key)
return
video, plex_part = scanned_parts.items()[0]
refine_video(video, refiner_settings=config.refiner_settings)
if air_date_cutoff is not None and metadata["item"].year and \
metadata["item"].year + air_date_cutoff < datetime.date.today().year:
Log.Debug("Skipping searching for subtitles: %s, it aired over %s year(s) ago.", rating_key,
air_date_cutoff)
return
config.init_subliminal_patches()
provider_settings = config.provider_settings.copy()
provider_settings = config.provider_settings
if not skip_wrong_fps:
provider_settings = config.provider_settings.copy()
provider_settings["opensubtitles"]["skip_wrong_fps"] = False
if item_type == "episode":
@@ -126,10 +137,14 @@ class SubtitleListingMixin(object):
else:
min_score = 60
available_subs = list_all_subtitles(scanned_parts, {Language.fromietf(language)},
providers=config.providers,
languages = {Language.fromietf(language)}
available_subs = list_all_subtitles([video], languages,
providers=providers,
provider_configs=provider_settings,
pool_class=config.provider_pool)
pool_class=config.provider_pool,
throttle_callback=config.provider_throttle,
language_hook=language_hook)
use_hearing_impaired = Prefs['subtitles.search.hearingImpaired'] in ("prefer", "force HI")
@@ -143,6 +158,16 @@ class SubtitleListingMixin(object):
Log.Error(u"%s: Match computation failed for %s: %s", self.name, s, traceback.format_exc())
continue
# skip wrong season/episodes
if item_type == "episode":
can_verify_series = True
if not s.hash_verifiable and "hash" in matches:
can_verify_series = False
if can_verify_series and not {"series", "season", "episode"}.issubset(matches):
Log.Debug(u"%s: Skipping %s, because it doesn't match our series/episode", self.name, s)
continue
unsorted_subtitles.append(
(s, compute_score(matches, s, video, hearing_impaired=use_hearing_impaired), matches))
scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1), reverse=True)
@@ -168,21 +193,36 @@ class DownloadSubtitleMixin(object):
item_type = subtitle.item_type
part_id = subtitle.part_id
metadata = get_plex_metadata(rating_key, part_id, item_type)
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
providers = config.get_providers(media_type="series" if item_type == "episode" else "movies")
scanned_parts = scan_videos([metadata], ignore_all=True, providers=providers)
video, plex_part = scanned_parts.items()[0]
pre_download_hook(subtitle)
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
download_subtitles([subtitle], providers=config.providers, provider_configs=config.provider_settings,
pool_class=config.provider_pool)
download_subtitles([subtitle], providers=providers,
provider_configs=config.provider_settings,
pool_class=config.provider_pool, throttle_callback=config.provider_throttle)
post_download_hook(subtitle)
# may be redundant
subtitle.pack_data = None
download_successful = False
if subtitle.content:
try:
save_subtitles(scanned_parts, {video: [subtitle]}, mode=mode, mods=config.default_mods)
Log.Debug(u"%s: Manually downloaded subtitle for: %s", self.name, rating_key)
if mode == "m":
Log.Debug(u"%s: Manually downloaded subtitle for: %s", self.name, rating_key)
track_usage("Subtitle", "manual", "download", 1)
elif mode == "b":
Log.Debug(u"%s: Downloaded better subtitle for: %s", self.name, rating_key)
track_usage("Subtitle", "better", "download", 1)
download_successful = True
refresh_item(rating_key)
track_usage("Subtitle", "manual", "download", 1)
except:
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s",
self.name, traceback.format_exc())
@@ -197,8 +237,15 @@ class DownloadSubtitleMixin(object):
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
subtitle=subtitle,
mode=mode)
history.destroy()
# clear missing subtitles menu data
if not scheduler.is_task_running("MissingSubtitles"):
scheduler.clear_task_data("MissingSubtitles")
else:
set_refresh_menu_state(u"%s: Subtitle download failed (%s)", self.name, rating_key)
set_refresh_menu_state(_(u"%(class_name)s: Subtitle download failed (%(item_id)s)",
class_name=self.name,
item_id=rating_key))
return download_successful
@@ -223,7 +270,12 @@ class AvailableSubsForItem(SubtitleListingMixin, Task):
def run(self):
super(AvailableSubsForItem, self).run()
self.running = True
track_usage("Subtitle", "manual", "list", 1)
try:
track_usage("Subtitle", "manual", "list", 1)
except:
Log.Error("Something went wrong with track_usage: %s", traceback.format_exc())
Log.Debug("Listing available subtitles for: %s", self.rating_key)
subs = self.list_subtitles(self.rating_key, self.item_type, self.part_id, self.language, skip_wrong_fps=False)
if not subs:
self.data = "found_none"
@@ -309,6 +361,8 @@ class SearchAllRecentlyAddedMissing(Task):
now = datetime.datetime.now()
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
series_providers = config.get_providers(media_type="series")
movie_providers = config.get_providers(media_type="movies")
is_recent_str = Prefs["scheduler.item_is_recent_age"]
num, ident = is_recent_str.split()
@@ -320,24 +374,9 @@ class SearchAllRecentlyAddedMissing(Task):
max_search_days = int(num) * 7
subtitle_storage = get_subtitle_storage()
recent_sub_fns = subtitle_storage.get_recent_files(age_days=max_search_days)
viable_items = {}
recent_files = subtitle_storage.get_recent_files(age_days=max_search_days)
# determine viable items
for fn in recent_sub_fns:
# added_date <= max_search_days?
stored_subs = subtitle_storage.load(filename=fn)
if not stored_subs:
continue
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
continue
viable_items[fn] = stored_subs
subtitle_storage.destroy()
self.items_searching = len(viable_items)
self.items_searching = len(recent_files)
download_count = 0
videos_with_downloads = 0
@@ -346,98 +385,130 @@ class SearchAllRecentlyAddedMissing(Task):
Log.Info(u"%s: Searching for subtitles for %s items", self.name, self.items_searching)
def skip_item():
self.items_searching = self.items_searching - 1
self.percentage = int(self.items_done * 100 / self.items_searching) if self.items_searching > 0 else 100
# search for subtitles in viable items
for fn, stored_subs in viable_items.iteritems():
video_id = stored_subs.video_id
if stored_subs.item_type == "episode":
min_score = min_score_series
else:
min_score = min_score_movies
parts = []
plex_item = get_item(video_id)
if not plex_item:
Log.Info(u"%s: Item %s unknown, skipping", self.name, video_id)
continue
if is_ignored(video_id, item=plex_item):
continue
for media in plex_item.media:
parts += media.parts
downloads_per_video = 0
hit_providers = False
for part in parts:
part_id = part.id
try:
metadata = get_plex_metadata(video_id, part_id, stored_subs.item_type)
except PartUnknownException:
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
try:
for fn in recent_files:
stored_subs = subtitle_storage.load(filename=fn)
if not stored_subs:
Log.Debug("Skipping item %s because storage is empty", fn)
skip_item()
continue
if not metadata:
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
video_id = stored_subs.video_id
# added_date <= max_search_days?
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
Log.Debug("Skipping item %s because it's too old", video_id)
skip_item()
continue
Log.Debug(u"%s: Looking for missing subtitles: %s:%s", self.name, video_id, part_id)
scanned_parts = scan_videos([metadata], kind="series"
if stored_subs.item_type == "episode" else "movie")
if stored_subs.item_type == "episode":
min_score = min_score_series
providers = series_providers
else:
min_score = min_score_movies
providers = movie_providers
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score)
hit_providers = downloaded_subtitles is not None
download_successful = False
parts = []
plex_item = get_item(video_id)
if downloaded_subtitles:
downloaded_any = any(downloaded_subtitles.values())
if not downloaded_any:
continue
if not plex_item:
Log.Info(u"%s: Item %s unknown, skipping", self.name, video_id)
skip_item()
continue
if is_ignored(video_id, item=plex_item):
skip_item()
continue
for media in plex_item.media:
parts += media.parts
downloads_per_video = 0
hit_providers = False
for part in parts:
part_id = part.id
try:
save_subtitles(scanned_parts, downloaded_subtitles, mode="a", mods=config.default_mods)
Log.Debug(u"%s: Downloaded subtitle for item with missing subs: %s", self.name, video_id)
download_successful = True
refresh_item(video_id)
track_usage("Subtitle", "manual", "download", 1)
except:
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s", self.name,
traceback.format_exc())
finally:
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
if download_successful:
# store item in history
for video, video_subtitles in downloaded_subtitles.items():
if not video_subtitles:
continue
metadata = get_plex_metadata(video_id, part_id, stored_subs.item_type)
except PartUnknownException:
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
continue
for subtitle in video_subtitles:
downloads_per_video += 1
history.add(item_title, video.id, section_title=metadata["section"],
subtitle=subtitle,
mode="a")
if not metadata:
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
continue
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
time.sleep(PROVIDER_SLACK)
Log.Debug(u"%s: Looking for missing subtitles: %s", self.name, get_item_title(plex_item))
scanned_parts = scan_videos([metadata], providers=providers)
download_count += downloads_per_video
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score,
providers=providers)
hit_providers = downloaded_subtitles is not None
download_successful = False
if downloads_per_video:
videos_with_downloads += 1
if downloaded_subtitles:
downloaded_any = any(downloaded_subtitles.values())
if not downloaded_any:
continue
self.items_done = self.items_done + 1
self.percentage = int(self.items_done * 100 / self.items_searching)
try:
save_subtitles(scanned_parts, downloaded_subtitles, mode="a", mods=config.default_mods)
Log.Debug(u"%s: Downloaded subtitle for item with missing subs: %s", self.name, video_id)
download_successful = True
refresh_item(video_id)
track_usage("Subtitle", "manual", "download", 1)
except:
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s", self.name,
traceback.format_exc())
finally:
scanned_parts = None
try:
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
if download_successful:
# store item in history
for video, video_subtitles in downloaded_subtitles.items():
if not video_subtitles:
continue
if downloads_per_video:
Log.Debug(u"%s: Subtitles have been downloaded, "
u"waiting %s seconds before continuing", self.name, DL_PROVIDER_SLACK)
time.sleep(DL_PROVIDER_SLACK)
else:
if hit_providers:
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
time.sleep(PROVIDER_SLACK)
for subtitle in video_subtitles:
downloads_per_video += 1
history.add(item_title, video.id, section_title=metadata["section"],
subtitle=subtitle,
mode="a")
downloaded_subtitles = None
except:
Log.Error(u"%s: DEBUG HIT: %s", self.name, traceback.format_exc())
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
Thread.Sleep(self.PROVIDER_SLACK)
download_count += downloads_per_video
if downloads_per_video:
videos_with_downloads += 1
self.items_done = self.items_done + 1
self.percentage = int(self.items_done * 100 / self.items_searching) if self.items_searching > 0 else 100
stored_subs = None
if downloads_per_video:
Log.Debug(u"%s: Subtitles have been downloaded, "
u"waiting %s seconds before continuing", self.name, self.DL_PROVIDER_SLACK)
Thread.Sleep(self.DL_PROVIDER_SLACK)
else:
if hit_providers:
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
Thread.Sleep(self.PROVIDER_SLACK)
finally:
subtitle_storage.destroy()
history.destroy()
if download_count:
Log.Debug(u"%s: done. Missing subtitles found for %s/%s items (%s subs downloaded)", self.name,
@@ -453,6 +524,103 @@ class SearchAllRecentlyAddedMissing(Task):
self.items_searching = None
class LegacySearchAllRecentlyAddedMissing(Task):
periodic = True
frequency = "never"
items_done = None
items_searching = None
items_searching_ids = None
items_failed = None
percentage = 0
stall_time = 30
def __init__(self):
super(LegacySearchAllRecentlyAddedMissing, self).__init__()
self.items_done = None
self.items_searching = None
self.items_searching_ids = None
self.items_failed = None
self.percentage = 0
def signal(self, signal_name, *args, **kwargs):
handler = getattr(self, "signal_%s" % signal_name)
return handler(*args, **kwargs) if handler else None
def signal_updated_metadata(self, *args, **kwargs):
item_id = int(args[0])
if self.items_searching_ids is not None and item_id in self.items_searching_ids:
self.items_done.append(item_id)
return True
def prepare(self, *args, **kwargs):
self.items_done = []
recent_items = get_recent_items()
missing = items_get_all_missing_subs(recent_items, sleep_after_request=0.2)
ids = set([id for added_at, id, title, item, missing_languages in missing if not is_ignored(id, item=item)])
self.items_searching = missing
self.items_searching_ids = ids
self.items_failed = []
self.percentage = 0
self.ready_for_display = True
def run(self):
super(LegacySearchAllRecentlyAddedMissing, self).run()
self.running = True
missing_count = len(self.items_searching)
items_done_count = 0
for added_at, item_id, title, item, missing_languages in self.items_searching:
Log.Debug(u"Task: %s, triggering refresh for %s (%s)", self.name, title, item_id)
try:
refresh_item(item_id)
except URLError:
# timeout
pass
search_started = datetime.datetime.now()
tries = 1
while 1:
if item_id in self.items_done:
items_done_count += 1
self.percentage = int(items_done_count * 100 / missing_count) if missing_count > 0 else 100
Log.Debug(u"Task: %s, item %s done (%s%%, %s/%s)", self.name, item_id, self.percentage,
items_done_count, missing_count)
break
# item considered stalled after self.stall_time seconds passed after last refresh
if (datetime.datetime.now() - search_started).total_seconds() > self.stall_time:
if tries > 3:
self.items_failed.append(item_id)
Log.Debug(u"Task: %s, item stalled for %s times: %s, skipping", self.name, tries, item_id)
break
Log.Debug(u"Task: %s, item stalled for %s seconds: %s, retrying", self.name, self.stall_time,
item_id)
tries += 1
try:
refresh_item(item_id)
except URLError:
pass
search_started = datetime.datetime.now()
Thread.Sleep(1)
Thread.Sleep(0.1)
# we can't hammer the PMS, otherwise requests will be stalled
Thread.Sleep(5)
Log.Debug("Task: %s, done (%s%%, %s/%s). Failed items: %s", self.name, self.percentage,
items_done_count, missing_count, self.items_failed)
def post_run(self, task_data):
super(LegacySearchAllRecentlyAddedMissing, self).post_run(task_data)
self.ready_for_display = False
self.percentage = 0
self.items_done = None
self.items_failed = None
self.items_searching = None
self.items_searching_ids = None
class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
periodic = True
@@ -482,134 +650,162 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
now = datetime.datetime.now()
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
min_score_extracted_series = config.advanced.find_better_as_extracted_tv_score or 352
min_score_extracted_movies = config.advanced.find_better_as_extracted_movie_score or 82
overwrite_manually_modified = cast_bool(
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified"])
overwrite_manually_selected = cast_bool(
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected"])
air_date_cutoff_pref = Prefs["scheduler.tasks.FindBetterSubtitles.air_date_cutoff"]
if air_date_cutoff_pref == "don't limit":
air_date_cutoff = None
else:
air_date_cutoff = int(air_date_cutoff_pref.split()[0])
subtitle_storage = get_subtitle_storage()
recent_subs = subtitle_storage.load_recent_files(age_days=max_search_days)
viable_item_count = 0
for fn, stored_subs in recent_subs.iteritems():
video_id = stored_subs.video_id
try:
for fn in subtitle_storage.get_recent_files(age_days=max_search_days):
stored_subs = subtitle_storage.load(filename=fn)
if not stored_subs:
continue
if stored_subs.item_type == "episode":
cutoff = self.series_cutoff
min_score = min_score_series
else:
cutoff = self.movies_cutoff
min_score = min_score_movies
video_id = stored_subs.video_id
# don't search for better subtitles until at least 30 minutes have passed
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
Log.Debug(u"%s: Item %s too new, skipping", self.name, video_id)
continue
if stored_subs.item_type == "episode":
cutoff = self.series_cutoff
min_score = min_score_series
min_score_extracted = min_score_extracted_series
else:
cutoff = self.movies_cutoff
min_score = min_score_movies
min_score_extracted = min_score_extracted_movies
# added_date <= max_search_days?
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
continue
# don't search for better subtitles until at least 30 minutes have passed
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
Log.Debug(u"%s: Item %s too new, skipping", self.name, video_id)
continue
viable_item_count += 1
ditch_parts = []
# added_date <= max_search_days?
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
continue
# look through all stored subtitle data
for part_id, languages in stored_subs.parts.iteritems():
part_id = str(part_id)
viable_item_count += 1
ditch_parts = []
# all languages
for language, current_subs in languages.iteritems():
current_key = current_subs.get("current")
current = current_subs.get(current_key)
# look through all stored subtitle data
for part_id, languages in stored_subs.parts.iteritems():
part_id = str(part_id)
# currently got subtitle?
if not current:
continue
current_score = current.score
current_mode = current.mode
# all languages
for language, current_subs in languages.iteritems():
current_key = current_subs.get("current")
current = current_subs.get(current_key)
# late cutoff met? skip
if current_score >= cutoff:
Log.Debug(u"%s: Skipping finding better subs, "
u"cutoff met (current: %s, cutoff: %s): %s (%s)",
self.name, current_score, cutoff, stored_subs.title, video_id)
continue
# currently got subtitle?
# fixme: check for existence
if not current:
continue
current_score = current.score
current_mode = current.mode
# got manual subtitle but don't want to touch those?
if current_mode == "m" and not overwrite_manually_selected:
Log.Debug(u"%s: Skipping finding better subs, "
u"had manual: %s (%s)", self.name, stored_subs.title, video_id)
continue
# late cutoff met? skip
if current_score >= cutoff:
Log.Debug(u"%s: Skipping finding better subs, "
u"cutoff met (current: %s, cutoff: %s): %s (%s)",
self.name, current_score, cutoff, stored_subs.title, video_id)
continue
# subtitle modifications different from default
if not overwrite_manually_modified and current.mods \
and set(current.mods).difference(set(config.default_mods)):
Log.Debug(u"%s: Skipping finding better subs, it has manual modifications: %s (%s)",
self.name, stored_subs.title, video_id)
continue
# got manual subtitle but don't want to touch those?
if current_mode == "m" and not overwrite_manually_selected:
Log.Debug(u"%s: Skipping finding better subs, "
u"had manual: %s (%s)", self.name, stored_subs.title, video_id)
continue
try:
subs = self.list_subtitles(video_id, stored_subs.item_type, part_id, language)
except PartUnknownException:
Log.Info(u"%s: Part %s unknown/gone; ditching subtitle info", self.name, part_id)
ditch_parts.append(part_id)
continue
# subtitle modifications different from default
if not overwrite_manually_modified and current.mods \
and set(current.mods).difference(set(config.default_mods)):
Log.Debug(u"%s: Skipping finding better subs, it has manual modifications: %s (%s)",
self.name, stored_subs.title, video_id)
continue
hit_providers = subs is not None
try:
subs = self.list_subtitles(video_id, stored_subs.item_type, part_id, language,
air_date_cutoff=air_date_cutoff)
except PartUnknownException:
Log.Info(u"%s: Part %s unknown/gone; ditching subtitle info", self.name, part_id)
ditch_parts.append(part_id)
continue
if subs:
# subs are already sorted by score
better_downloaded = False
better_tried_download = 0
better_visited = 0
for sub in subs:
if sub.score > current_score and sub.score > min_score:
Log.Debug(u"%s: Better subtitle found for %s, downloading", self.name, video_id)
better_tried_download += 1
ret = self.download_subtitle(sub, video_id, mode="b")
if ret:
better_found += 1
better_downloaded = True
break
else:
Log.Debug(u"%s: Couldn't download/save subtitle. "
u"Continuing to the next one", self.name)
Log.Debug(u"%s: Waiting %s seconds before continuing",
self.name, DL_PROVIDER_SLACK)
time.sleep(DL_PROVIDER_SLACK)
better_visited += 1
hit_providers = subs is not None
if better_tried_download and not better_downloaded:
Log.Debug(u"%s: Tried downloading better subtitle for %s, "
u"but every try failed.", self.name, video_id)
if subs:
# subs are already sorted by score
better_downloaded = False
better_tried_download = 0
better_visited = 0
for sub in subs:
if sub.score > current_score and sub.score > min_score:
if current.provider_name == "embedded" and sub.score < min_score_extracted:
Log.Debug(u"%s: Not downloading subtitle for %s, we've got an active extracted "
u"embedded sub and the min score %s isn't met (%s).",
self.name, video_id, min_score_extracted, sub.score)
better_visited += 1
break
elif better_downloaded:
Log.Debug(u"%s: Better subtitle downloaded for %s", self.name, video_id)
Log.Debug(u"%s: Better subtitle found for %s, downloading", self.name, video_id)
better_tried_download += 1
ret = self.download_subtitle(sub, video_id, mode="b")
if ret:
better_found += 1
better_downloaded = True
break
else:
Log.Debug(u"%s: Couldn't download/save subtitle. "
u"Continuing to the next one", self.name)
Log.Debug(u"%s: Waiting %s seconds before continuing",
self.name, self.DL_PROVIDER_SLACK)
Thread.Sleep(self.DL_PROVIDER_SLACK)
better_visited += 1
if better_tried_download or better_downloaded:
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, DL_PROVIDER_SLACK)
time.sleep(DL_PROVIDER_SLACK)
if better_tried_download and not better_downloaded:
Log.Debug(u"%s: Tried downloading better subtitle for %s, "
u"but every try failed.", self.name, video_id)
elif better_visited:
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
time.sleep(PROVIDER_SLACK)
elif better_downloaded:
Log.Debug(u"%s: Better subtitle downloaded for %s", self.name, video_id)
elif hit_providers:
# hit the providers but didn't try downloading? wait.
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
time.sleep(PROVIDER_SLACK)
if better_tried_download or better_downloaded:
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.DL_PROVIDER_SLACK)
Thread.Sleep(self.DL_PROVIDER_SLACK)
if ditch_parts:
for part_id in ditch_parts:
try:
del stored_subs.parts[part_id]
except KeyError:
pass
subtitle_storage.save(stored_subs)
elif better_visited:
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
Thread.Sleep(self.PROVIDER_SLACK)
time.sleep(1)
subs = None
subtitle_storage.destroy()
elif hit_providers:
# hit the providers but didn't try downloading? wait.
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
Thread.Sleep(self.PROVIDER_SLACK)
if ditch_parts:
for part_id in ditch_parts:
try:
del stored_subs.parts[part_id]
except KeyError:
pass
subtitle_storage.save(stored_subs)
ditch_parts = None
stored_subs = None
Thread.Sleep(1)
finally:
subtitle_storage.destroy()
if better_found:
Log.Debug(u"%s: done. Better subtitles found for %s/%s items", self.name, better_found,
@@ -674,6 +870,38 @@ class MigrateSubtitleStorage(Task):
storage.destroy()
class CacheMaintenance(Task):
periodic = True
frequency = "every 1 days"
main_cache_validity = 14 # days
pack_cache_validity = 4 # days
def run(self):
super(CacheMaintenance, self).run()
self.running = True
Log.Info(u"%s: Running cache maintenance", self.name)
now = datetime.datetime.now()
def remove_expired(path, expiry):
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(path))
if mtime + datetime.timedelta(days=expiry) < now:
try:
os.remove(path)
except (IOError, OSError):
Log.Debug("Couldn't remove cache file: %s", os.path.basename(path))
# main cache
if config.new_style_cache:
for fn in subliminal_cache_region.backend.all_filenames:
remove_expired(fn, self.main_cache_validity)
# archive cache
for fn in glob.iglob(os.path.join(config.pack_cache_dir, "*.archive")):
remove_expired(fn, self.pack_cache_validity)
scheduler.register(LegacySearchAllRecentlyAddedMissing)
scheduler.register(SearchAllRecentlyAddedMissing)
scheduler.register(AvailableSubsForItem)
scheduler.register(DownloadSubtitleForItem)
@@ -682,3 +910,4 @@ scheduler.register(FindBetterSubtitles)
scheduler.register(SubtitleStorageMaintenance)
scheduler.register(MigrateSubtitleStorage)
scheduler.register(MenuHistoryMaintenance)
scheduler.register(CacheMaintenance)
+201 -50
View File
@@ -1,6 +1,6 @@
[
{
"id": "langPref1",
"id": "langPref1a",
"label": "Subtitle Language (1)",
"type": "enum",
"values": [
@@ -50,12 +50,14 @@
"tr",
"uk",
"vi",
"hr"
"hr",
"zh-hans",
"zh-hant"
],
"default": "en"
},
{
"id": "langPref2",
"id": "langPref2a",
"label": "Subtitle Language (2)",
"type": "enum",
"values": [
@@ -106,12 +108,14 @@
"tr",
"uk",
"vi",
"hr"
"hr",
"zh-hans",
"zh-hant"
],
"default": "None"
},
{
"id": "langPref3",
"id": "langPref3a",
"label": "Subtitle Language (3)",
"type": "enum",
"values": [
@@ -162,7 +166,9 @@
"tr",
"uk",
"vi",
"hr"
"hr",
"zh-hans",
"zh-hant"
],
"default": "None"
},
@@ -179,11 +185,17 @@
"default": "false"
},
{
"id": "subtitles.language.ietf",
"label": "Treat IETF language tags as ISO 639-1 (e.g. pt-BR = pt)",
"id": "subtitles.language.ietf_display",
"label": "Display languages with country attribute as ISO 639-1 (e.g. pt-BR = pt)",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.language.ietf_normalize",
"label": "Treat languages with country attribute as ISO 639-1 (e.g. don't download pt-BR if pt subtitle exists)",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.only_one",
"label": "Restrict to one language (skips adding \".lang.\" to the subtitle filename; only uses \"Subtitle Language (1)\")",
@@ -196,6 +208,50 @@
"type": "bool",
"default": "true"
},
{
"id": "media_rename1",
"label": "I rename my files using",
"type": "enum",
"values": [
"Sonarr/Radarr (fill api info below)",
"Filebot",
"Sonarr/Radarr/Filebot",
"Symlink to original file",
"I keep the original filenames",
"none of the above"
],
"default": "I keep the original filenames"
},
{
"id": "use_file_info_file",
"label": "Retrieve original filename from .file_info/file_info index files (see wiki)",
"type": "bool",
"default": "false"
},
{
"id": "drone_api.sonarr.url",
"label": "Sonarr URL (add URL base if configured)",
"type": "text",
"default": "http://127.0.0.1:8989"
},
{
"id": "drone_api.sonarr.api_key",
"label": "Sonarr API key",
"type": "text",
"default": ""
},
{
"id": "drone_api.radarr.url",
"label": "Radarr URL (add URL base if configured, min. version: 0.2.0.897)",
"type": "text",
"default": "http://127.0.0.1:7878"
},
{
"id": "drone_api.radarr.api_key",
"label": "Radarr API key",
"type": "text",
"default": ""
},
{
"id": "provider.opensubtitles.enabled",
"label": "Provider: Enable OpenSubtitles",
@@ -204,7 +260,7 @@
},
{
"id": "provider.opensubtitles.username",
"label": "Opensubtitles Username (VIP)",
"label": "Opensubtitles Username",
"type": "text",
"default": ""
},
@@ -216,12 +272,24 @@
"default": "",
"secure": "true"
},
{
"id": "provider.opensubtitles.is_vip",
"label": "OpenSubtitles VIP? (ad-free subs, 1000 subs/day, no-cache VIP server: http://v.ht/osvip)",
"type": "bool",
"default": "false"
},
{
"id": "provider.podnapisi.enabled",
"label": "Provider: Enable Podnapisi.NET",
"type": "bool",
"default": "true"
},
{
"id": "provider.titlovi.enabled",
"label": "Provider: Enable Titlovi.com",
"type": "bool",
"default": "true"
},
{
"id": "provider.addic7ed.enabled",
"label": "Provider: Enable Addic7ed",
@@ -243,7 +311,7 @@
"secure": "true"
},
{
"id": "provider.addic7ed.boost_by1",
"id": "provider.addic7ed.boost_by2",
"label": "Addic7ed: boost score (if requirements met)",
"type": "enum",
"values": [
@@ -266,18 +334,19 @@
"25",
"21",
"20",
"19",
"15",
"10",
"5",
"0"
],
"default": "21"
"default": "19"
},
{
"id": "provider.addic7ed.use_random_agents",
"id": "provider.addic7ed.use_random_agents1",
"label": "Addic7ed: Use random user agents",
"type": "bool",
"default": "false"
"default": "true"
},
{
"id": "provider.legendastv.enabled",
@@ -312,31 +381,41 @@
"default": "false"
},
{
"id": "provider.shooter.enabled",
"label": "Provider: Enable Shooter.cn (Chinese)",
"id": "provider.subscene.enabled",
"label": "Provider: Enable SubScene (TV shows)",
"type": "bool",
"default": "true"
},
{
"id": "provider.supersubtitles.enabled",
"label": "Provider: Enable feliratok.info (Hungarian)",
"type": "bool",
"default": "false"
},
{
"id": "provider.subscenter.enabled",
"label": "Provider: Enable SubsCenter (Hebrew)",
"id": "provider.hosszupuska.enabled",
"label": "Provider: Enable hosszupuskasub.com (Hungarian)",
"type": "bool",
"default": "false"
},
{
"id": "provider.subscenter.username",
"label": "SubsCenter Username",
"id": "provider.argenteam.enabled",
"label": "Provider: Enable aRGENTeaM (Spanish)",
"type": "bool",
"default": "false"
},
{
"id": "provider.assrt.enabled",
"label": "Provider: Enable assrt.net (Chinese)",
"type": "bool",
"default": "false"
},
{
"id": "provider.assrt.token",
"label": "Assrt API Token",
"type": "text",
"default": ""
},
{
"id": "provider.subscenter.password",
"label": "SubsCenter Password",
"type": "text",
"option": "hidden",
"default": "",
"secure": "true"
},
{
"id": "providers.multithreading",
"label": "Search enabled providers simultaneously (multithreading)",
@@ -344,32 +423,32 @@
"default": "true"
},
{
"id": "provider.opensubtitles.use_tags",
"label": "I keep the exact (release-) filename of my media files",
"id": "subtitles.embedded.autoextract",
"label": "Automatically extract and use embedded subtitles upon media addition (with configured default mods)",
"type": "bool",
"default": "true"
"default": "false"
},
{
"id": "subtitles.search_after_autoextract",
"label": "After automatic extraction of embedded subtitles, also immediately search for available subtitles?",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.scan.embedded",
"label": "Scan: include embedded subtitles (in the media file (MKV/MP4), don't download if existing)",
"label": "Don't search for subtitles of a language if there are embedded subtitles inside the media file (MKV/MP4)?",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.scan.external",
"label": "Scan: include external subtitles (metadata/filesystem, don't download if existing)",
"label": "Don't search for subtitles of a language if they already exist on the filesystem (metadata/filesystem)?",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.scan.exotic_ext",
"label": "Scan: include \"exotic\" subtitle formats (anything else than .srt/.ssa/.ass/.vtt; embedded or external)",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.scan.filename_strictness",
"label": "Scan: which external subtitles should be picked up?",
"label": "How strict should these subtitles existing on the filesystem be detected?",
"type": "enum",
"values": [
"exact: media filename match",
@@ -378,6 +457,12 @@
],
"default": "loose: filename contains media filename"
},
{
"id": "subtitles.scan.exotic_ext",
"label": "Include non-text subtitle formats (anything else than .srt/.ssa/.ass/.vtt; embedded or external) in the above?",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.search.minimumTVScore2",
"label": "Minimum score for TV (min: 240, def/sane: 337, min-ideal: 352; see http://v.ht/szscores)",
@@ -408,9 +493,15 @@
"type": "bool",
"default": "false"
},
{
"id": "subtitles.remove_tags",
"label": "Remove style tags from downloaded subtitles (bold, italic, underline, colors, ...)",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.fix_common",
"label": "Fix common whitespace/punctuation issues in subtitles",
"label": "Fix common issues in subtitles",
"type": "bool",
"default": "true"
},
@@ -420,6 +511,12 @@
"type": "bool",
"default": "true"
},
{
"id": "subtitles.reverse_rtl",
"label": "Reverse punctuation in RTL languages (heb)",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.colors",
"label": "Change colors of subtitles to",
@@ -518,8 +615,6 @@
"type": "enum",
"values": [
"never",
"every 1 hours",
"every 3 hours",
"every 6 hours",
"every 12 hours",
"every 24 hours"
@@ -540,7 +635,8 @@
"3 weeks",
"4 weeks",
"5 weeks",
"6 weeks"
"6 weeks",
"12 weeks"
],
"default": "2 weeks"
},
@@ -568,11 +664,30 @@
"type": "text",
"default": "7"
},
{
"id": "scheduler.tasks.FindBetterSubtitles.air_date_cutoff",
"label": "Scheduler: Don't search for better subtitles if the item's air date is older than",
"type": "enum",
"values": [
"don't limit",
"1 year",
"2 years",
"3 years",
"4 years",
"5 years",
"6 years",
"7 years",
"8 years",
"9 years",
"10 years"
],
"default": "1 year"
},
{
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected",
"label": "Scheduler: Overwrite manually selected subtitles when better found",
"type": "bool",
"default": "false"
"default": "true"
},
{
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified",
@@ -618,15 +733,15 @@
"default": ""
},
{
"id": "plugin_mode",
"id": "plugin_mode2",
"label": "Sub-Zero mode",
"type": "enum",
"values": [
"agent + channel",
"agent + interface",
"only agent",
"only channel"
"only interface"
],
"default": "agent + channel"
"default": "agent + interface"
},
{
"id": "plugin_pin",
@@ -643,12 +758,12 @@
"default": "10"
},
{
"id": "plugin_pin_mode",
"id": "plugin_pin_mode2",
"label": "Use PIN to restrict access to (needs plugin or PMS restart)",
"type": "enum",
"values": [
"disabled",
"channel menu",
"interface",
"advanced menu"
],
"default": "disabled"
@@ -665,6 +780,36 @@
"type": "bool",
"default": "true"
},
{
"id": "new_style_cache",
"label": "Use new style caching (for subliminal)",
"type": "bool",
"default": "true"
},
{
"id": "low_impact_mode",
"label": "Low impact mode (for remote filesystems)",
"type": "bool",
"default": "false"
},
{
"id": "pms_request_timeout",
"label": "Timeout for API requests sent to the PMS",
"type": "text",
"default": "15"
},
{
"id": "proxy",
"label": "HTTP proxy to use for providers (supports credentials)",
"type": "text",
"default": ""
},
{
"id": "path_to_advanced_settings",
"label": "Custom path to advanced_settings.json",
"type": "text",
"default": ""
},
{
"id": "log_level",
"label": "How verbose should the logging be?",
@@ -678,6 +823,12 @@
],
"default": "WARNING"
},
{
"id": "log_rotate_keep",
"label": "How many log backups to keep?",
"type": "text",
"default": "5"
},
{
"id": "log_debug_mods",
"label": "Log subtitle modification (debug)",
+6 -4
View File
@@ -9,11 +9,11 @@
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleShortVersionString</key>
<string>2.0.25</string>
<string>2.5.4</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
<string>2.0.25.1635</string>
<string>2.5.7.2663</string>
<key>PlexFrameworkVersion</key>
<string>2</string>
<key>PlexPluginClass</key>
@@ -32,7 +32,7 @@
&lt;h1&gt;Sub-Zero for Plex&lt;/h1&gt;&lt;i&gt;Subtitles done right&lt;/i&gt;
Version 2.0.25.1635
Version 2.5.7.2663
Originally based on @bramwalet's awesome &lt;a href=&quot;https://github.com/bramwalet/Subliminal.bundle&quot;&gt;Subliminal.bundle&lt;/a&gt;
@@ -44,7 +44,9 @@ Score info: &lt;a href=&quot;http://v.ht/szscores&quot;&gt;http://v.ht/szscores&
Plex thread: &lt;a href=&quot;https://forums.plex.tv/discussion/186575&quot;>https://forums.plex.tv/discussion/186575&lt;/a&gt;
Github: &lt;a href=&quot;https://github.com/pannal/Sub-Zero.bundle&quot;&gt;https://github.com/pannal/Sub-Zero&lt;/a&gt;
panni, 2017
3rd party licenses: &lt;a href=&quot;https://github.com/pannal/Sub-Zero.bundle/tree/master/Licenses&quot;&gt;https://github.com/pannal/Sub-Zero.bundle/tree/master/Licenses&lt;/a&gt;
panni, 2018
&lt;/div&gt;
</string>
</dict>
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large Load Diff
@@ -4,7 +4,6 @@
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from functools import partial
from pkg_resources import resource_stream # @UnresolvedImport
@@ -4,7 +4,6 @@
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from functools import partial
from pkg_resources import resource_stream # @UnresolvedImport
@@ -4,7 +4,6 @@
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from pkg_resources import resource_stream # @UnresolvedImport
from . import basestr
+82 -21
View File
@@ -5,26 +5,31 @@ http://www.crummy.com/software/BeautifulSoup/
Beautiful Soup uses a pluggable XML or HTML parser to parse a
(possibly invalid) document into a tree representation. Beautiful Soup
provides provides methods and Pythonic idioms that make it easy to
navigate, search, and modify the parse tree.
provides methods and Pythonic idioms that make it easy to navigate,
search, and modify the parse tree.
Beautiful Soup works with Python 2.6 and up. It works better if lxml
Beautiful Soup works with Python 2.7 and up. It works better if lxml
and/or html5lib is installed.
For more than you ever wanted to know about Beautiful Soup, see the
documentation:
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.4.1"
__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
__version__ = "4.6.0"
__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
__license__ = "MIT"
__all__ = ['BeautifulSoup']
import os
import re
import traceback
import warnings
from .builder import builder_registry, ParserRejectedMarkup
@@ -77,7 +82,7 @@ class BeautifulSoup(Tag):
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, exclude_encodings=None,
@@ -137,6 +142,10 @@ class BeautifulSoup(Tag):
from_encoding = from_encoding or deprecated_argument(
"fromEncoding", "from_encoding")
if from_encoding and isinstance(markup, unicode):
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
from_encoding = None
if len(kwargs) > 0:
arg = kwargs.keys().pop()
raise TypeError(
@@ -161,19 +170,29 @@ class BeautifulSoup(Tag):
markup_type = "XML"
else:
markup_type = "HTML"
caller = traceback.extract_stack()[0]
filename = caller[0]
line_number = caller[1]
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
filename=filename,
line_number=line_number,
parser=builder.NAME,
markup_type=markup_type))
self.builder = builder
self.is_xml = builder.is_xml
self.known_xml = self.is_xml
self.builder.soup = self
self.parse_only = parse_only
if hasattr(markup, 'read'): # It's a file-type object.
markup = markup.read()
elif len(markup) <= 256:
elif len(markup) <= 256 and (
(isinstance(markup, bytes) and not b'<' in markup)
or (isinstance(markup, unicode) and not u'<' in markup)
):
# Print out warnings for a couple beginner problems
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
@@ -195,16 +214,10 @@ class BeautifulSoup(Tag):
if isinstance(markup, unicode):
markup = markup.encode("utf8")
warnings.warn(
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
if markup[:5] == "http:" or markup[:6] == "https:":
# TODO: This is ugly but I couldn't get it to work in
# Python 3 otherwise.
if ((isinstance(markup, bytes) and not b' ' in markup)
or (isinstance(markup, unicode) and not u' ' in markup)):
if isinstance(markup, unicode):
markup = markup.encode("utf8")
warnings.warn(
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
'"%s" looks like a filename, not markup. You should'
' probably open this file and pass the filehandle into'
' Beautiful Soup.' % markup)
self._check_markup_is_url(markup)
for (self.markup, self.original_encoding, self.declared_html_encoding,
self.contains_replacement_characters) in (
@@ -223,15 +236,52 @@ class BeautifulSoup(Tag):
self.builder.soup = None
def __copy__(self):
return type(self)(self.encode(), builder=self.builder)
copy = type(self)(
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
)
# Although we encoded the tree to UTF-8, that may not have
# been the encoding of the original markup. Set the copy's
# .original_encoding to reflect the original object's
# .original_encoding.
copy.original_encoding = self.original_encoding
return copy
def __getstate__(self):
# Frequently a tree builder can't be pickled.
d = dict(self.__dict__)
if 'builder' in d and not self.builder.picklable:
del d['builder']
d['builder'] = None
return d
@staticmethod
def _check_markup_is_url(markup):
"""
Check if markup looks like it's actually a url and raise a warning
if so. Markup can be unicode or str (py2) / bytes (py3).
"""
if isinstance(markup, bytes):
space = b' '
cant_start_with = (b"http:", b"https:")
elif isinstance(markup, unicode):
space = u' '
cant_start_with = (u"http:", u"https:")
else:
return
if any(markup.startswith(prefix) for prefix in cant_start_with):
if not space in markup:
if isinstance(markup, bytes):
decoded_markup = markup.decode('utf-8', 'replace')
else:
decoded_markup = markup
warnings.warn(
'"%s" looks like a URL. Beautiful Soup is not an'
' HTTP client. You should probably use an HTTP client like'
' requests to get the document behind the URL, and feed'
' that document to Beautiful Soup.' % decoded_markup
)
def _feed(self):
# Convert the document to Unicode.
self.builder.reset()
@@ -335,7 +385,18 @@ class BeautifulSoup(Tag):
if parent.next_sibling:
# This node is being inserted into an element that has
# already been parsed. Deal with any dangling references.
index = parent.contents.index(o)
index = len(parent.contents)-1
while index >= 0:
if parent.contents[index] is o:
break
index -= 1
else:
raise ValueError(
"Error building tree: supposedly %r was inserted "
"into %r after the fact, but I don't see it!" % (
o, parent
)
)
if index == 0:
previous_element = parent
previous_sibling = None
@@ -387,7 +448,7 @@ class BeautifulSoup(Tag):
"""Push a start tag on to the stack.
If this method returns None, the tag was rejected by the
SoupStrainer. You should proceed as if the tag had not occured
SoupStrainer. You should proceed as if the tag had not occurred
in the document. For instance, if this was a self-closing tag,
don't call handle_endtag.
"""
@@ -1,9 +1,13 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from collections import defaultdict
import itertools
import sys
from bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
HTMLAwareEntitySubstitution,
whitespace_re
)
@@ -227,9 +231,14 @@ class HTMLTreeBuilder(TreeBuilder):
Such as which tags are empty-element tags.
"""
preserve_whitespace_tags = set(['pre', 'textarea'])
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
'spacer', 'link', 'frame', 'base'])
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
empty_element_tags = set([
# These are from HTML5.
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
# These are from HTML4, removed in HTML5.
'spacer', 'frame'
])
# The HTML standard defines these attributes as containing a
# space-separated list of values, not a single value. That is,
@@ -1,9 +1,12 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__all__ = [
'HTML5TreeBuilder',
]
from pdb import set_trace
import warnings
import re
from bs4.builder import (
PERMISSIVE,
HTML,
@@ -15,7 +18,10 @@ from bs4.element import (
whitespace_re,
)
import html5lib
from html5lib.constants import namespaces
from html5lib.constants import (
namespaces,
prefixes,
)
from bs4.element import (
Comment,
Doctype,
@@ -23,6 +29,15 @@ from bs4.element import (
Tag,
)
try:
# Pre-0.99999999
from html5lib.treebuilders import _base as treebuilder_base
new_html5lib = False
except ImportError, e:
# 0.99999999 and up
from html5lib.treebuilders import base as treebuilder_base
new_html5lib = True
class HTML5TreeBuilder(HTMLTreeBuilder):
"""Use html5lib to build a tree."""
@@ -47,7 +62,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
if self.soup.parse_only is not None:
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
doc = parser.parse(markup, encoding=self.user_specified_encoding)
extra_kwargs = dict()
if not isinstance(markup, unicode):
if new_html5lib:
extra_kwargs['override_encoding'] = self.user_specified_encoding
else:
extra_kwargs['encoding'] = self.user_specified_encoding
doc = parser.parse(markup, **extra_kwargs)
# Set the character encoding detected by the tokenizer.
if isinstance(markup, unicode):
@@ -55,11 +77,17 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
# charEncoding to UTF-8 if it gets Unicode input.
doc.original_encoding = None
else:
doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
original_encoding = parser.tokenizer.stream.charEncoding[0]
if not isinstance(original_encoding, basestring):
# In 0.99999999 and up, the encoding is an html5lib
# Encoding object. We want to use a string for compatibility
# with other tree builders.
original_encoding = original_encoding.name
doc.original_encoding = original_encoding
def create_treebuilder(self, namespaceHTMLElements):
self.underlying_builder = TreeBuilderForHtml5lib(
self.soup, namespaceHTMLElements)
namespaceHTMLElements, self.soup)
return self.underlying_builder
def test_fragment_to_document(self, fragment):
@@ -67,10 +95,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
return u'<html><head></head><body>%s</body></html>' % fragment
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
def __init__(self, soup, namespaceHTMLElements):
self.soup = soup
def __init__(self, namespaceHTMLElements, soup=None):
if soup:
self.soup = soup
else:
from bs4 import BeautifulSoup
self.soup = BeautifulSoup("", "html.parser")
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
def documentClass(self):
@@ -93,7 +125,8 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
return TextNode(Comment(data), self.soup)
def fragmentClass(self):
self.soup = BeautifulSoup("")
from bs4 import BeautifulSoup
self.soup = BeautifulSoup("", "html.parser")
self.soup.name = "[document_fragment]"
return Element(self.soup, self.soup, None)
@@ -105,7 +138,57 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
return self.soup
def getFragment(self):
return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
return treebuilder_base.TreeBuilder.getFragment(self).element
def testSerializer(self, element):
from bs4 import BeautifulSoup
rv = []
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
def serializeElement(element, indent=0):
if isinstance(element, BeautifulSoup):
pass
if isinstance(element, Doctype):
m = doctype_re.match(element)
if m:
name = m.group(1)
if m.lastindex > 1:
publicId = m.group(2) or ""
systemId = m.group(3) or m.group(4) or ""
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
(' ' * indent, name, publicId, systemId))
else:
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name))
else:
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
elif isinstance(element, Comment):
rv.append("|%s<!-- %s -->" % (' ' * indent, element))
elif isinstance(element, NavigableString):
rv.append("|%s\"%s\"" % (' ' * indent, element))
else:
if element.namespace:
name = "%s %s" % (prefixes[element.namespace],
element.name)
else:
name = element.name
rv.append("|%s<%s>" % (' ' * indent, name))
if element.attrs:
attributes = []
for name, value in element.attrs.items():
if isinstance(name, NamespacedAttribute):
name = "%s %s" % (prefixes[name.namespace], name.name)
if isinstance(value, list):
value = " ".join(value)
attributes.append((name, value))
for name, value in sorted(attributes):
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
indent += 2
for child in element.children:
serializeElement(child, indent)
serializeElement(element, 0)
return "\n".join(rv)
class AttrList(object):
def __init__(self, element):
@@ -137,9 +220,9 @@ class AttrList(object):
return name in list(self.attrs.keys())
class Element(html5lib.treebuilders._base.Node):
class Element(treebuilder_base.Node):
def __init__(self, element, soup, namespace):
html5lib.treebuilders._base.Node.__init__(self, element.name)
treebuilder_base.Node.__init__(self, element.name)
self.element = element
self.soup = soup
self.namespace = namespace
@@ -158,8 +241,10 @@ class Element(html5lib.treebuilders._base.Node):
child = node
elif node.element.__class__ == NavigableString:
string_child = child = node.element
node.parent = self
else:
child = node.element
node.parent = self
if not isinstance(child, basestring) and child.parent is not None:
node.element.extract()
@@ -197,6 +282,8 @@ class Element(html5lib.treebuilders._base.Node):
most_recent_element=most_recent_element)
def getAttributes(self):
if isinstance(self.element, Comment):
return {}
return AttrList(self.element)
def setAttributes(self, attributes):
@@ -224,11 +311,11 @@ class Element(html5lib.treebuilders._base.Node):
attributes = property(getAttributes, setAttributes)
def insertText(self, data, insertBefore=None):
text = TextNode(self.soup.new_string(data), self.soup)
if insertBefore:
text = TextNode(self.soup.new_string(data), self.soup)
self.insertBefore(data, insertBefore)
self.insertBefore(text, insertBefore)
else:
self.appendChild(data)
self.appendChild(text)
def insertBefore(self, node, refNode):
index = self.element.index(refNode.element)
@@ -250,6 +337,7 @@ class Element(html5lib.treebuilders._base.Node):
# print "MOVE", self.element.contents
# print "FROM", self.element
# print "TO", new_parent.element
element = self.element
new_parent_element = new_parent.element
# Determine what this tag's next_element will be once all the children
@@ -268,7 +356,6 @@ class Element(html5lib.treebuilders._base.Node):
new_parents_last_descendant_next_element = new_parent_element.next_element
to_append = element.contents
append_after = new_parent_element.contents
if len(to_append) > 0:
# Set the first child's previous_element and previous_sibling
# to elements within the new parent
@@ -285,12 +372,19 @@ class Element(html5lib.treebuilders._base.Node):
if new_parents_last_child:
new_parents_last_child.next_sibling = first_child
# Fix the last child's next_element and next_sibling
last_child = to_append[-1]
last_child.next_element = new_parents_last_descendant_next_element
# Find the very last element being moved. It is now the
# parent's last descendant. It has no .next_sibling and
# its .next_element is whatever the previous last
# descendant had.
last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
if new_parents_last_descendant_next_element:
new_parents_last_descendant_next_element.previous_element = last_child
last_child.next_sibling = None
# TODO: This code has no test coverage and I'm not sure
# how to get html5lib to go through this path, but it's
# just the other side of the previous line.
new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
last_childs_last_descendant.next_sibling = None
for child in to_append:
child.parent = new_parent_element
@@ -324,7 +418,7 @@ class Element(html5lib.treebuilders._base.Node):
class TextNode(Element):
def __init__(self, element, soup):
html5lib.treebuilders._base.Node.__init__(self, None)
treebuilder_base.Node.__init__(self, None)
self.element = element
self.soup = soup
@@ -1,5 +1,8 @@
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__all__ = [
'HTMLParserTreeBuilder',
]
@@ -49,7 +52,31 @@ from bs4.builder import (
HTMLPARSER = 'html.parser'
class BeautifulSoupHTMLParser(HTMLParser):
def handle_starttag(self, name, attrs):
def __init__(self, *args, **kwargs):
HTMLParser.__init__(self, *args, **kwargs)
# Keep a list of empty-element tags that were encountered
# without an explicit closing tag. If we encounter a closing tag
# of this type, we'll associate it with one of those entries.
#
# This isn't a stack because we don't care about the
# order. It's a list of closing tags we've already handled and
# will ignore, assuming they ever show up.
self.already_closed_empty_element = []
def handle_startendtag(self, name, attrs):
# This is only called when the markup looks like
# <tag/>.
# is_startend() tells handle_starttag not to close the tag
# just because its name matches a known empty-element tag. We
# know that this is an empty-element tag and we want to call
# handle_endtag ourselves.
tag = self.handle_starttag(name, attrs, handle_empty_element=False)
self.handle_endtag(name)
def handle_starttag(self, name, attrs, handle_empty_element=True):
# XXX namespace
attr_dict = {}
for key, value in attrs:
@@ -59,10 +86,34 @@ class BeautifulSoupHTMLParser(HTMLParser):
value = ''
attr_dict[key] = value
attrvalue = '""'
self.soup.handle_starttag(name, None, None, attr_dict)
#print "START", name
tag = self.soup.handle_starttag(name, None, None, attr_dict)
if tag and tag.is_empty_element and handle_empty_element:
# Unlike other parsers, html.parser doesn't send separate end tag
# events for empty-element tags. (It's handled in
# handle_startendtag, but only if the original markup looked like
# <tag/>.)
#
# So we need to call handle_endtag() ourselves. Since we
# know the start event is identical to the end event, we
# don't want handle_endtag() to cross off any previous end
# events for tags of this name.
self.handle_endtag(name, check_already_closed=False)
def handle_endtag(self, name):
self.soup.handle_endtag(name)
# But we might encounter an explicit closing tag for this tag
# later on. If so, we want to ignore it.
self.already_closed_empty_element.append(name)
def handle_endtag(self, name, check_already_closed=True):
#print "END", name
if check_already_closed and name in self.already_closed_empty_element:
# This is a redundant end tag for an empty-element tag.
# We've already called handle_endtag() for it, so just
# check it off the list.
# print "ALREADY CLOSED", name
self.already_closed_empty_element.remove(name)
else:
self.soup.handle_endtag(name)
def handle_data(self, data):
self.soup.handle_data(data)
@@ -166,6 +217,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
parser.already_closed_empty_element = []
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
+16 -6
View File
@@ -1,3 +1,5 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__all__ = [
'LXMLTreeBuilderForXML',
'LXMLTreeBuilder',
@@ -12,6 +14,7 @@ from bs4.element import (
Doctype,
NamespacedAttribute,
ProcessingInstruction,
XMLProcessingInstruction,
)
from bs4.builder import (
FAST,
@@ -29,6 +32,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
DEFAULT_PARSER_CLASS = etree.XMLParser
is_xml = True
processing_instruction_class = XMLProcessingInstruction
NAME = "lxml-xml"
ALTERNATE_NAMES = ["xml"]
@@ -87,6 +91,16 @@ class LXMLTreeBuilderForXML(TreeBuilder):
Each 4-tuple represents a strategy for parsing the document.
"""
# Instead of using UnicodeDammit to convert the bytestring to
# Unicode using different encodings, use EncodingDetector to
# iterate over the encodings, and tell lxml to try to parse
# the document as each one in turn.
is_html = not self.is_xml
if is_html:
self.processing_instruction_class = ProcessingInstruction
else:
self.processing_instruction_class = XMLProcessingInstruction
if isinstance(markup, unicode):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
@@ -98,11 +112,6 @@ class LXMLTreeBuilderForXML(TreeBuilder):
yield (markup.encode("utf8"), "utf8",
document_declared_encoding, False)
# Instead of using UnicodeDammit to convert the bytestring to
# Unicode using different encodings, use EncodingDetector to
# iterate over the encodings, and tell lxml to try to parse
# the document as each one in turn.
is_html = not self.is_xml
try_encodings = [user_specified_encoding, document_declared_encoding]
detector = EncodingDetector(
markup, try_encodings, is_html, exclude_encodings)
@@ -201,7 +210,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def pi(self, target, data):
self.soup.endData()
self.soup.handle_data(target + ' ' + data)
self.soup.endData(ProcessingInstruction)
self.soup.endData(self.processing_instruction_class)
def data(self, content):
self.soup.handle_data(content)
@@ -229,6 +238,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
is_xml = False
processing_instruction_class = ProcessingInstruction
def default_parser(self, encoding):
return etree.HTMLParser
+8 -6
View File
@@ -6,9 +6,10 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal
Feed Parser. It works best on XML and HTML, but it does not rewrite the
XML or HTML to reflect a new encoding; that's the tree builder's job.
"""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__license__ = "MIT"
from pdb import set_trace
import codecs
from htmlentitydefs import codepoint2name
import re
@@ -309,7 +310,7 @@ class EncodingDetector:
else:
xml_endpos = 1024
html_endpos = max(2048, int(len(markup) * 0.05))
declared_encoding = None
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
if not declared_encoding_match and is_html:
@@ -346,7 +347,7 @@ class UnicodeDammit:
self.tried_encodings = []
self.contains_replacement_characters = False
self.is_html = is_html
self.log = logging.getLogger(__name__)
self.detector = EncodingDetector(
markup, override_encodings, is_html, exclude_encodings)
@@ -376,9 +377,10 @@ class UnicodeDammit:
if encoding != "ascii":
u = self._convert_from(encoding, "replace")
if u is not None:
logging.warning(
self.log.warning(
"Some characters could not be decoded, and were "
"replaced with REPLACEMENT CHARACTER.")
"replaced with REPLACEMENT CHARACTER."
)
self.contains_replacement_characters = True
break
@@ -734,7 +736,7 @@ class UnicodeDammit:
0xde : b'\xc3\x9e', # Þ
0xdf : b'\xc3\x9f', # ß
0xe0 : b'\xc3\xa0', # à
0xe1 : b'\xa1', # á
0xe1 : b'\xa1', # á
0xe2 : b'\xc3\xa2', # â
0xe3 : b'\xc3\xa3', # ã
0xe4 : b'\xc3\xa4', # ä
+4 -1
View File
@@ -1,5 +1,7 @@
"""Diagnostic functions, mainly for use when doing tech support."""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__license__ = "MIT"
import cProfile
@@ -56,7 +58,8 @@ def diagnose(data):
data = data.read()
elif os.path.exists(data):
print '"%s" looks like a filename. Reading data from the file.' % data
data = open(data).read()
with open(data) as fp:
data = fp.read()
elif data.startswith("http:") or data.startswith("https:"):
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
+131 -48
View File
@@ -1,8 +1,10 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__license__ = "MIT"
from pdb import set_trace
import collections
import re
import shlex
import sys
import warnings
from bs4.dammit import EntitySubstitution
@@ -99,6 +101,8 @@ class HTMLAwareEntitySubstitution(EntitySubstitution):
preformatted_tags = set(["pre"])
preserve_whitespace_tags = set(['pre', 'textarea'])
@classmethod
def _substitute_if_appropriate(cls, ns, f):
if (isinstance(ns, NavigableString)
@@ -127,8 +131,8 @@ class PageElement(object):
# to methods like encode() and prettify():
#
# "html" - All Unicode characters with corresponding HTML entities
# are converted to those entities on output.
# "minimal" - Bare ampersands and angle brackets are converted to
# are converted to those entities on output.
# "minimal" - Bare ampersands and angle brackets are converted to
# XML entities: &amp; &lt; &gt;
# None - The null formatter. Unicode characters are never
# converted to entities. This is not recommended, but it's
@@ -169,11 +173,19 @@ class PageElement(object):
This is used when mapping a formatter name ("minimal") to an
appropriate function (one that performs entity-substitution on
the contents of <script> and <style> tags, or not). It's
the contents of <script> and <style> tags, or not). It can be
inefficient, but it should be called very rarely.
"""
if self.known_xml is not None:
# Most of the time we will have determined this when the
# document is parsed.
return self.known_xml
# Otherwise, it's likely that this element was created by
# direct invocation of the constructor from within the user's
# Python code.
if self.parent is None:
# This is the top-level object. It should have .is_xml set
# This is the top-level object. It should have .known_xml set
# from tree creation. If not, take a guess--BS is usually
# used on HTML markup.
return getattr(self, 'is_xml', False)
@@ -523,9 +535,16 @@ class PageElement(object):
return ResultSet(strainer, result)
elif isinstance(name, basestring):
# Optimization to find all tags with a given name.
if name.count(':') == 1:
# This is a name with a prefix.
prefix, name = name.split(':', 1)
else:
prefix = None
result = (element for element in generator
if isinstance(element, Tag)
and element.name == name)
and element.name == name
and (prefix is None or element.prefix == prefix)
)
return ResultSet(strainer, result)
results = ResultSet(strainer)
while True:
@@ -637,7 +656,7 @@ class PageElement(object):
return lambda el: el._attr_value_as_string(
attribute, '').startswith(value)
elif operator == '$':
# string represenation of `attribute` ends with `value`
# string representation of `attribute` ends with `value`
return lambda el: el._attr_value_as_string(
attribute, '').endswith(value)
elif operator == '*':
@@ -677,6 +696,11 @@ class NavigableString(unicode, PageElement):
PREFIX = ''
SUFFIX = ''
# We can't tell just by looking at a string whether it's contained
# in an XML document or an HTML document.
known_xml = None
def __new__(cls, value):
"""Create a new NavigableString.
@@ -743,10 +767,16 @@ class CData(PreformattedString):
SUFFIX = u']]>'
class ProcessingInstruction(PreformattedString):
"""A SGML processing instruction."""
PREFIX = u'<?'
SUFFIX = u'>'
class XMLProcessingInstruction(ProcessingInstruction):
"""An XML processing instruction."""
PREFIX = u'<?'
SUFFIX = u'?>'
class Comment(PreformattedString):
PREFIX = u'<!--'
@@ -781,7 +811,8 @@ class Tag(PageElement):
"""Represents a found HTML tag with its attributes and contents."""
def __init__(self, parser=None, builder=None, name=None, namespace=None,
prefix=None, attrs=None, parent=None, previous=None):
prefix=None, attrs=None, parent=None, previous=None,
is_xml=None):
"Basic constructor."
if parser is None:
@@ -795,6 +826,14 @@ class Tag(PageElement):
self.name = name
self.namespace = namespace
self.prefix = prefix
if builder is not None:
preserve_whitespace_tags = builder.preserve_whitespace_tags
else:
if is_xml:
preserve_whitespace_tags = []
else:
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
self.preserve_whitespace_tags = preserve_whitespace_tags
if attrs is None:
attrs = {}
elif attrs:
@@ -805,6 +844,13 @@ class Tag(PageElement):
attrs = dict(attrs)
else:
attrs = dict(attrs)
# If possible, determine ahead of time whether this tag is an
# XML tag.
if builder:
self.known_xml = builder.is_xml
else:
self.known_xml = is_xml
self.attrs = attrs
self.contents = []
self.setup(parent, previous)
@@ -824,7 +870,7 @@ class Tag(PageElement):
Its contents are a copy of the old Tag's contents.
"""
clone = type(self)(None, self.builder, self.name, self.namespace,
self.nsprefix, self.attrs)
self.prefix, self.attrs, is_xml=self._is_xml)
for attr in ('can_be_empty_element', 'hidden'):
setattr(clone, attr, getattr(self, attr))
for child in self.contents:
@@ -946,6 +992,13 @@ class Tag(PageElement):
attribute."""
return self.attrs.get(key, default)
def get_attribute_list(self, key, default=None):
"""The same as get(), but always returns a list."""
value = self.get(key, default)
if not isinstance(value, list):
value = [value]
return value
def has_attr(self, key):
return key in self.attrs
@@ -997,7 +1050,7 @@ class Tag(PageElement):
tag_name, tag_name))
return self.find(tag_name)
# We special case contents to avoid recursion.
elif not tag.startswith("__") and not tag=="contents":
elif not tag.startswith("__") and not tag == "contents":
return self.find(tag)
raise AttributeError(
"'%s' object has no attribute '%s'" % (self.__class__, tag))
@@ -1057,10 +1110,11 @@ class Tag(PageElement):
def _should_pretty_print(self, indent_level):
"""Should this tag be pretty-printed?"""
return (
indent_level is not None and
(self.name not in HTMLAwareEntitySubstitution.preformatted_tags
or self._is_xml))
indent_level is not None
and self.name not in self.preserve_whitespace_tags
)
def decode(self, indent_level=None,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
@@ -1280,6 +1334,7 @@ class Tag(PageElement):
_selector_combinators = ['>', '+', '~']
_select_debug = False
quoted_colon = re.compile('"[^"]*:[^"]*"')
def select_one(self, selector):
"""Perform a CSS selection operation on the current element."""
value = self.select(selector, limit=1)
@@ -1305,8 +1360,7 @@ class Tag(PageElement):
if limit and len(context) >= limit:
break
return context
tokens = selector.split()
tokens = shlex.split(selector)
current_context = [self]
if tokens[-1] in self._selector_combinators:
@@ -1358,7 +1412,7 @@ class Tag(PageElement):
return classes.issubset(candidate.get('class', []))
checker = classes_match
elif ':' in token:
elif ':' in token and not self.quoted_colon.search(token):
# Pseudo-class
tag_name, pseudo = token.split(':', 1)
if tag_name == '':
@@ -1389,11 +1443,8 @@ class Tag(PageElement):
self.count += 1
if self.count == self.destination:
return True
if self.count > self.destination:
# Stop the generator that's sending us
# these things.
raise StopIteration()
return False
else:
return False
checker = Counter(pseudo_value).nth_child_of_type
else:
raise NotImplementedError(
@@ -1498,13 +1549,12 @@ class Tag(PageElement):
# don't include it in the context more than once.
new_context.append(candidate)
new_context_ids.add(id(candidate))
if limit and len(new_context) >= limit:
break
elif self._select_debug:
print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
current_context = new_context
if limit and len(current_context) >= limit:
current_context = current_context[:limit]
if self._select_debug:
print "Final verdict:"
@@ -1662,28 +1712,22 @@ class SoupStrainer(object):
"I don't know how to match against a %s" % markup.__class__)
return found
def _matches(self, markup, match_against):
def _matches(self, markup, match_against, already_tried=None):
# print u"Matching %s against %s" % (markup, match_against)
result = False
if isinstance(markup, list) or isinstance(markup, tuple):
# This should only happen when searching a multi-valued attribute
# like 'class'.
if (isinstance(match_against, unicode)
and ' ' in match_against):
# A bit of a special case. If they try to match "foo
# bar" on a multivalue attribute's value, only accept
# the literal value "foo bar"
#
# XXX This is going to be pretty slow because we keep
# splitting match_against. But it shouldn't come up
# too often.
return (whitespace_re.split(match_against) == markup)
else:
for item in markup:
if self._matches(item, match_against):
return True
return False
for item in markup:
if self._matches(item, match_against):
return True
# We didn't match any particular value of the multivalue
# attribute, but maybe we match the attribute value when
# considered as a string.
if self._matches(' '.join(markup), match_against):
return True
return False
if match_against is True:
# True matches any non-None value.
return markup is not None
@@ -1693,6 +1737,7 @@ class SoupStrainer(object):
# Custom callables take the tag as an argument, but all
# other ways of matching match the tag name as a string.
original_markup = markup
if isinstance(markup, Tag):
markup = markup.name
@@ -1703,18 +1748,51 @@ class SoupStrainer(object):
# None matches None, False, an empty string, an empty list, and so on.
return not match_against
if isinstance(match_against, unicode):
if (hasattr(match_against, '__iter__')
and not isinstance(match_against, basestring)):
# We're asked to match against an iterable of items.
# The markup must be match at least one item in the
# iterable. We'll try each one in turn.
#
# To avoid infinite recursion we need to keep track of
# items we've already seen.
if not already_tried:
already_tried = set()
for item in match_against:
if item.__hash__:
key = item
else:
key = id(item)
if key in already_tried:
continue
else:
already_tried.add(key)
if self._matches(original_markup, item, already_tried):
return True
else:
return False
# Beyond this point we might need to run the test twice: once against
# the tag's name and once against its prefixed name.
match = False
if not match and isinstance(match_against, unicode):
# Exact string match
return markup == match_against
match = markup == match_against
if hasattr(match_against, 'match'):
if not match and hasattr(match_against, 'search'):
# Regexp match
return match_against.search(markup)
if hasattr(match_against, '__iter__'):
# The markup must be an exact match against something
# in the iterable.
return markup in match_against
if (not match
and isinstance(original_markup, Tag)
and original_markup.prefix):
# Try the whole thing again with the prefixed tag name.
return self._matches(
original_markup.prefix + ':' + original_markup.name, match_against
)
return match
class ResultSet(list):
@@ -1723,3 +1801,8 @@ class ResultSet(list):
def __init__(self, source, result=()):
super(ResultSet, self).__init__(result)
self.source = source
def __getattr__(self, key):
raise AttributeError(
"ResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?" % key
)
+87 -4
View File
@@ -1,5 +1,7 @@
"""Helper classes for tests."""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__license__ = "MIT"
import pickle
@@ -67,6 +69,18 @@ class HTMLTreeBuilderSmokeTest(object):
markup in these tests, there's not much room for interpretation.
"""
def test_empty_element_tags(self):
"""Verify that all HTML4 and HTML5 empty element (aka void element) tags
are handled correctly.
"""
for name in [
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
'spacer', 'frame'
]:
soup = self.soup("")
new_tag = soup.new_tag(name)
self.assertEqual(True, new_tag.is_empty_element)
def test_pickle_and_unpickle_identity(self):
# Pickling a tree, then unpickling it, yields a tree identical
# to the original.
@@ -137,6 +151,14 @@ class HTMLTreeBuilderSmokeTest(object):
markup.replace(b"\n", b""))
def test_processing_instruction(self):
# We test both Unicode and bytestring to verify that
# process_markup correctly sets processing_instruction_class
# even when the markup is already Unicode and there is no
# need to process anything.
markup = u"""<?PITarget PIContent?>"""
soup = self.soup(markup)
self.assertEqual(markup, soup.decode())
markup = b"""<?PITarget PIContent?>"""
soup = self.soup(markup)
self.assertEqual(markup, soup.encode("utf8"))
@@ -215,9 +237,22 @@ Hello, world!
self.assertEqual(comment, baz.previous_element)
def test_preserved_whitespace_in_pre_and_textarea(self):
"""Whitespace must be preserved in <pre> and <textarea> tags."""
self.assertSoupEquals("<pre> </pre>")
self.assertSoupEquals("<textarea> woo </textarea>")
"""Whitespace must be preserved in <pre> and <textarea> tags,
even if that would mean not prettifying the markup.
"""
pre_markup = "<pre> </pre>"
textarea_markup = "<textarea> woo\nwoo </textarea>"
self.assertSoupEquals(pre_markup)
self.assertSoupEquals(textarea_markup)
soup = self.soup(pre_markup)
self.assertEqual(soup.pre.prettify(), pre_markup)
soup = self.soup(textarea_markup)
self.assertEqual(soup.textarea.prettify(), textarea_markup)
soup = self.soup("<textarea></textarea>")
self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
def test_nested_inline_elements(self):
"""Inline elements can be nested indefinitely."""
@@ -307,6 +342,13 @@ Hello, world!
self.assertEqual("p", soup.p.name)
self.assertConnectedness(soup)
def test_empty_element_tags(self):
"""Verify consistent handling of empty-element tags,
no matter how they come in through the markup.
"""
self.assertSoupEquals('<br/><br/><br/>', "<br/><br/><br/>")
self.assertSoupEquals('<br /><br /><br />', "<br/><br/><br/>")
def test_head_tag_between_head_and_body(self):
"Prevent recurrence of a bug in the html5lib treebuilder."
content = """<html><head></head>
@@ -480,7 +522,9 @@ Hello, world!
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
soup = self.soup(
hebrew_document, from_encoding="iso8859-8")
self.assertEqual(soup.original_encoding, 'iso8859-8')
# Some tree builders call it iso8859-8, others call it iso-8859-9.
# That's not a difference we really care about.
assert soup.original_encoding in ('iso8859-8', 'iso-8859-8')
self.assertEqual(
soup.encode('utf-8'),
hebrew_document.decode("iso8859-8").encode("utf-8"))
@@ -563,6 +607,11 @@ class XMLTreeBuilderSmokeTest(object):
soup = self.soup(markup)
self.assertEqual(markup, soup.encode("utf8"))
def test_processing_instruction(self):
markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
soup = self.soup(markup)
self.assertEqual(markup, soup.encode("utf8"))
def test_real_xhtml_document(self):
"""A real XHTML document should come out *exactly* the same as it went in."""
markup = b"""<?xml version="1.0" encoding="utf-8"?>
@@ -639,6 +688,40 @@ class XMLTreeBuilderSmokeTest(object):
soup = self.soup(markup)
self.assertEqual(unicode(soup.foo), markup)
def test_find_by_prefixed_name(self):
doc = """<?xml version="1.0" encoding="utf-8"?>
<Document xmlns="http://example.com/ns0"
xmlns:ns1="http://example.com/ns1"
xmlns:ns2="http://example.com/ns2"
<ns1:tag>foo</ns1:tag>
<ns1:tag>bar</ns1:tag>
<ns2:tag key="value">baz</ns2:tag>
</Document>
"""
soup = self.soup(doc)
# There are three <tag> tags.
self.assertEqual(3, len(soup.find_all('tag')))
# But two of them are ns1:tag and one of them is ns2:tag.
self.assertEqual(2, len(soup.find_all('ns1:tag')))
self.assertEqual(1, len(soup.find_all('ns2:tag')))
self.assertEqual(1, len(soup.find_all('ns2:tag', key='value')))
self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag'])))
def test_copy_tag_preserves_namespace(self):
xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://example.com/ns0"/>"""
soup = self.soup(xml)
tag = soup.document
duplicate = copy.copy(tag)
# The two tags have the same namespace prefix.
self.assertEqual(tag.prefix, duplicate.prefix)
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
"""Smoke test for a tree builder that supports HTML5."""
@@ -84,6 +84,33 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
def test_reparented_markup_containing_identical_whitespace_nodes(self):
"""Verify that we keep the two whitespace nodes in this
document distinct when reparenting the adjacent <tbody> tags.
"""
markup = '<table> <tbody><tbody><ims></tbody> </table>'
soup = self.soup(markup)
space1, space2 = soup.find_all(string=' ')
tbody1, tbody2 = soup.find_all('tbody')
assert space1.next_element is tbody1
assert tbody2.next_element is space2
def test_reparented_markup_containing_children(self):
markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
soup = self.soup(markup)
noscript = soup.noscript
self.assertEqual("target", noscript.next_element)
target = soup.find(string='target')
# The 'aftermath' string was duplicated; we want the second one.
final_aftermath = soup.find_all(string='aftermath')[-1]
# The <noscript> tag was moved beneath a copy of the <a> tag,
# but the 'target' string within is still connected to the
# (second) 'aftermath' string.
self.assertEqual(final_aftermath, target.next_element)
self.assertEqual(target, final_aftermath.previous_element)
def test_processing_instruction(self):
"""Processing instructions become comments."""
markup = b"""<?PITarget PIContent?>"""
@@ -96,3 +123,8 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
a1, a2 = soup.find_all('a')
self.assertEqual(a1, a2)
assert a1 is not a2
def test_foster_parenting(self):
markup = b"""<table><td></tbody>A"""
soup = self.soup(markup)
self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
@@ -29,4 +29,6 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
loaded = pickle.loads(dumped)
self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
def test_redundant_empty_element_closing_tags(self):
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
self.assertSoupEquals('</br></br></br>', "")
@@ -35,7 +35,6 @@ try:
except ImportError, e:
LXML_PRESENT = False
PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
class TestConstructor(SoupTest):
@@ -77,7 +76,7 @@ class TestWarnings(SoupTest):
def test_no_warning_if_explicit_parser_specified(self):
with warnings.catch_warnings(record=True) as w:
soup = self.soup("<a><b></b></a>", "html.parser")
self.assertEquals([], w)
self.assertEqual([], w)
def test_parseOnlyThese_renamed_to_parse_only(self):
with warnings.catch_warnings(record=True) as w:
@@ -118,15 +117,34 @@ class TestWarnings(SoupTest):
soup = self.soup(filename)
self.assertEqual(0, len(w))
def test_url_warning(self):
with warnings.catch_warnings(record=True) as w:
soup = self.soup("http://www.crummy.com/")
msg = str(w[0].message)
self.assertTrue("looks like a URL" in msg)
def test_url_warning_with_bytes_url(self):
with warnings.catch_warnings(record=True) as warning_list:
soup = self.soup(b"http://www.crummybytes.com/")
# Be aware this isn't the only warning that can be raised during
# execution..
self.assertTrue(any("looks like a URL" in str(w.message)
for w in warning_list))
def test_url_warning_with_unicode_url(self):
with warnings.catch_warnings(record=True) as warning_list:
# note - this url must differ from the bytes one otherwise
# python's warnings system swallows the second warning
soup = self.soup(u"http://www.crummyunicode.com/")
self.assertTrue(any("looks like a URL" in str(w.message)
for w in warning_list))
def test_url_warning_with_bytes_and_space(self):
with warnings.catch_warnings(record=True) as warning_list:
soup = self.soup(b"http://www.crummybytes.com/ is great")
self.assertFalse(any("looks like a URL" in str(w.message)
for w in warning_list))
def test_url_warning_with_unicode_and_space(self):
with warnings.catch_warnings(record=True) as warning_list:
soup = self.soup(u"http://www.crummyuncode.com/ is great")
self.assertFalse(any("looks like a URL" in str(w.message)
for w in warning_list))
with warnings.catch_warnings(record=True) as w:
soup = self.soup("http://www.crummy.com/ is great")
self.assertEqual(0, len(w))
class TestSelectiveParsing(SoupTest):
@@ -260,7 +278,7 @@ class TestEncodingConversion(SoupTest):
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
@skipIf(
PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
PYTHON_3_PRE_3_2,
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
def test_attribute_name_containing_unicode_characters(self):
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
"""Tests for Beautiful Soup's tree traversal methods.
@@ -222,7 +223,19 @@ class TestFindAllByName(TreeTest):
self.assertSelects(
tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
def test_find_with_multi_valued_attribute(self):
soup = self.soup(
"<div class='a b'>1</div><div class='a c'>2</div><div class='a d'>3</div>"
)
r1 = soup.find('div', 'a d');
r2 = soup.find('div', re.compile(r'a d'));
r3, r4 = soup.find_all('div', ['a b', 'a d']);
self.assertEqual('3', r1.string)
self.assertEqual('3', r2.string)
self.assertEqual('1', r3.string)
self.assertEqual('3', r4.string)
class TestFindAllByAttribute(TreeTest):
def test_find_all_by_attribute_name(self):
@@ -294,10 +307,10 @@ class TestFindAllByAttribute(TreeTest):
f = tree.find_all("gar", class_=re.compile("a"))
self.assertSelects(f, ["Found it"])
# Since the class is not the string "foo bar", but the two
# strings "foo" and "bar", this will not find anything.
# If the search fails to match the individual strings "foo" and "bar",
# it will be tried against the combined string "foo bar".
f = tree.find_all("gar", class_=re.compile("o b"))
self.assertSelects(f, [])
self.assertSelects(f, ["Found it"])
def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
soup = self.soup("<a class='bar'>Found it</a>")
@@ -335,7 +348,7 @@ class TestFindAllByAttribute(TreeTest):
strainer = SoupStrainer(attrs={'id' : 'first'})
self.assertSelects(tree.find_all(strainer), ['Match.'])
def test_find_all_with_missing_atribute(self):
def test_find_all_with_missing_attribute(self):
# You can pass in None as the value of an attribute to find_all.
# This will match tags that do not have that attribute set.
tree = self.soup("""<a id="1">ID present.</a>
@@ -1273,6 +1286,10 @@ class TestCDAtaListAttributes(SoupTest):
soup = self.soup("<a class='foo\tbar'>")
self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode())
def test_get_attribute_list(self):
soup = self.soup("<a id='abc def'>")
self.assertEqual(['abc def'], soup.a.get_attribute_list('id'))
def test_accept_charset(self):
soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">')
self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset'])
@@ -1328,6 +1345,13 @@ class TestPersistence(SoupTest):
copied = copy.deepcopy(self.tree)
self.assertEqual(copied.decode(), self.tree.decode())
def test_copy_preserves_encoding(self):
soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
encoding = soup.original_encoding
copy = soup.__copy__()
self.assertEqual(u"<p> </p>", unicode(copy))
self.assertEqual(encoding, copy.original_encoding)
def test_unicode_pickle(self):
# A tree containing Unicode characters can be pickled.
html = u"<b>\N{SNOWMAN}</b>"
@@ -1676,8 +1700,8 @@ class TestSoupSelector(TreeTest):
def setUp(self):
self.soup = BeautifulSoup(self.HTML, 'html.parser')
def assertSelects(self, selector, expected_ids):
el_ids = [el['id'] for el in self.soup.select(selector)]
def assertSelects(self, selector, expected_ids, **kwargs):
el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
el_ids.sort()
expected_ids.sort()
self.assertEqual(expected_ids, el_ids,
@@ -1720,6 +1744,13 @@ class TestSoupSelector(TreeTest):
for selector in ('html div', 'html body div', 'body div'):
self.assertSelects(selector, ['data1', 'main', 'inner', 'footer'])
def test_limit(self):
self.assertSelects('html div', ['main'], limit=1)
self.assertSelects('html body div', ['inner', 'main'], limit=2)
self.assertSelects('body div', ['data1', 'main', 'inner', 'footer'],
limit=10)
def test_tag_no_match(self):
self.assertEqual(len(self.soup.select('del')), 0)
@@ -1902,6 +1933,14 @@ class TestSoupSelector(TreeTest):
('div[data-tag]', ['data1'])
)
def test_quoted_space_in_selector_name(self):
html = """<div style="display: wrong">nope</div>
<div style="display: right">yes</div>
"""
soup = BeautifulSoup(html, 'html.parser')
[chosen] = soup.select('div[style="display: right"]')
self.assertEqual("yes", chosen.string)
def test_unsupported_pseudoclass(self):
self.assertRaises(
NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
@@ -1,3 +1,3 @@
from .core import where, old_where
__version__ = "2017.04.17"
__version__ = "2018.01.18"
File diff suppressed because it is too large Load Diff
+5 -4
View File
@@ -19,18 +19,19 @@ class DeprecatedBundleWarning(DeprecationWarning):
def where():
f = os.path.split(__file__)[0]
f = os.path.dirname(__file__)
return os.path.join(f, 'cacert.pem')
def old_where():
warnings.warn(
"The weak security bundle is being deprecated.",
"The weak security bundle has been removed. certifi.old_where() is now an alias "
"of certifi.where(). Please update your code to use certifi.where() instead. "
"certifi.old_where() will be removed in 2018.",
DeprecatedBundleWarning
)
f = os.path.split(__file__)[0]
return os.path.join(f, 'weak.pem')
return where()
if __name__ == '__main__':
print(where())
File diff suppressed because it is too large Load Diff
+436
View File
@@ -0,0 +1,436 @@
"""contextlib2 - backports and enhancements to the contextlib module"""
import sys
import warnings
from collections import deque
from functools import wraps
__all__ = ["contextmanager", "closing", "ContextDecorator", "ExitStack",
"redirect_stdout", "redirect_stderr", "suppress"]
# Backwards compatibility
__all__ += ["ContextStack"]
class ContextDecorator(object):
"A base class or mixin that enables context managers to work as decorators."
def refresh_cm(self):
"""Returns the context manager used to actually wrap the call to the
decorated function.
The default implementation just returns *self*.
Overriding this method allows otherwise one-shot context managers
like _GeneratorContextManager to support use as decorators via
implicit recreation.
DEPRECATED: refresh_cm was never added to the standard library's
ContextDecorator API
"""
warnings.warn("refresh_cm was never added to the standard library",
DeprecationWarning)
return self._recreate_cm()
def _recreate_cm(self):
"""Return a recreated instance of self.
Allows an otherwise one-shot context manager like
_GeneratorContextManager to support use as
a decorator via implicit recreation.
This is a private interface just for _GeneratorContextManager.
See issue #11647 for details.
"""
return self
def __call__(self, func):
@wraps(func)
def inner(*args, **kwds):
with self._recreate_cm():
return func(*args, **kwds)
return inner
class _GeneratorContextManager(ContextDecorator):
"""Helper for @contextmanager decorator."""
def __init__(self, func, args, kwds):
self.gen = func(*args, **kwds)
self.func, self.args, self.kwds = func, args, kwds
# Issue 19330: ensure context manager instances have good docstrings
doc = getattr(func, "__doc__", None)
if doc is None:
doc = type(self).__doc__
self.__doc__ = doc
# Unfortunately, this still doesn't provide good help output when
# inspecting the created context manager instances, since pydoc
# currently bypasses the instance docstring and shows the docstring
# for the class instead.
# See http://bugs.python.org/issue19404 for more details.
def _recreate_cm(self):
# _GCM instances are one-shot context managers, so the
# CM must be recreated each time a decorated function is
# called
return self.__class__(self.func, self.args, self.kwds)
def __enter__(self):
try:
return next(self.gen)
except StopIteration:
raise RuntimeError("generator didn't yield")
def __exit__(self, type, value, traceback):
if type is None:
try:
next(self.gen)
except StopIteration:
return
else:
raise RuntimeError("generator didn't stop")
else:
if value is None:
# Need to force instantiation so we can reliably
# tell if we get the same exception back
value = type()
try:
self.gen.throw(type, value, traceback)
raise RuntimeError("generator didn't stop after throw()")
except StopIteration as exc:
# Suppress StopIteration *unless* it's the same exception that
# was passed to throw(). This prevents a StopIteration
# raised inside the "with" statement from being suppressed.
return exc is not value
except RuntimeError as exc:
# Don't re-raise the passed in exception
if exc is value:
return False
# Likewise, avoid suppressing if a StopIteration exception
# was passed to throw() and later wrapped into a RuntimeError
# (see PEP 479).
if _HAVE_EXCEPTION_CHAINING and exc.__cause__ is value:
return False
raise
except:
# only re-raise if it's *not* the exception that was
# passed to throw(), because __exit__() must not raise
# an exception unless __exit__() itself failed. But throw()
# has to raise the exception to signal propagation, so this
# fixes the impedance mismatch between the throw() protocol
# and the __exit__() protocol.
#
if sys.exc_info()[1] is not value:
raise
def contextmanager(func):
"""@contextmanager decorator.
Typical usage:
@contextmanager
def some_generator(<arguments>):
<setup>
try:
yield <value>
finally:
<cleanup>
This makes this:
with some_generator(<arguments>) as <variable>:
<body>
equivalent to this:
<setup>
try:
<variable> = <value>
<body>
finally:
<cleanup>
"""
@wraps(func)
def helper(*args, **kwds):
return _GeneratorContextManager(func, args, kwds)
return helper
class closing(object):
"""Context to automatically close something at the end of a block.
Code like this:
with closing(<module>.open(<arguments>)) as f:
<block>
is equivalent to this:
f = <module>.open(<arguments>)
try:
<block>
finally:
f.close()
"""
def __init__(self, thing):
self.thing = thing
def __enter__(self):
return self.thing
def __exit__(self, *exc_info):
self.thing.close()
class _RedirectStream(object):
_stream = None
def __init__(self, new_target):
self._new_target = new_target
# We use a list of old targets to make this CM re-entrant
self._old_targets = []
def __enter__(self):
self._old_targets.append(getattr(sys, self._stream))
setattr(sys, self._stream, self._new_target)
return self._new_target
def __exit__(self, exctype, excinst, exctb):
setattr(sys, self._stream, self._old_targets.pop())
class redirect_stdout(_RedirectStream):
"""Context manager for temporarily redirecting stdout to another file.
# How to send help() to stderr
with redirect_stdout(sys.stderr):
help(dir)
# How to write help() to a file
with open('help.txt', 'w') as f:
with redirect_stdout(f):
help(pow)
"""
_stream = "stdout"
class redirect_stderr(_RedirectStream):
"""Context manager for temporarily redirecting stderr to another file."""
_stream = "stderr"
class suppress(object):
"""Context manager to suppress specified exceptions
After the exception is suppressed, execution proceeds with the next
statement following the with statement.
with suppress(FileNotFoundError):
os.remove(somefile)
# Execution still resumes here if the file was already removed
"""
def __init__(self, *exceptions):
self._exceptions = exceptions
def __enter__(self):
pass
def __exit__(self, exctype, excinst, exctb):
# Unlike isinstance and issubclass, CPython exception handling
# currently only looks at the concrete type hierarchy (ignoring
# the instance and subclass checking hooks). While Guido considers
# that a bug rather than a feature, it's a fairly hard one to fix
# due to various internal implementation details. suppress provides
# the simpler issubclass based semantics, rather than trying to
# exactly reproduce the limitations of the CPython interpreter.
#
# See http://bugs.python.org/issue12029 for more details
return exctype is not None and issubclass(exctype, self._exceptions)
# Context manipulation is Python 3 only
_HAVE_EXCEPTION_CHAINING = sys.version_info[0] >= 3
if _HAVE_EXCEPTION_CHAINING:
def _make_context_fixer(frame_exc):
def _fix_exception_context(new_exc, old_exc):
# Context may not be correct, so find the end of the chain
while 1:
exc_context = new_exc.__context__
if exc_context is old_exc:
# Context is already set correctly (see issue 20317)
return
if exc_context is None or exc_context is frame_exc:
break
new_exc = exc_context
# Change the end of the chain to point to the exception
# we expect it to reference
new_exc.__context__ = old_exc
return _fix_exception_context
def _reraise_with_existing_context(exc_details):
try:
# bare "raise exc_details[1]" replaces our carefully
# set-up context
fixed_ctx = exc_details[1].__context__
raise exc_details[1]
except BaseException:
exc_details[1].__context__ = fixed_ctx
raise
else:
# No exception context in Python 2
def _make_context_fixer(frame_exc):
return lambda new_exc, old_exc: None
# Use 3 argument raise in Python 2,
# but use exec to avoid SyntaxError in Python 3
def _reraise_with_existing_context(exc_details):
exc_type, exc_value, exc_tb = exc_details
exec ("raise exc_type, exc_value, exc_tb")
# Handle old-style classes if they exist
try:
from types import InstanceType
except ImportError:
# Python 3 doesn't have old-style classes
_get_type = type
else:
# Need to handle old-style context managers on Python 2
def _get_type(obj):
obj_type = type(obj)
if obj_type is InstanceType:
return obj.__class__ # Old-style class
return obj_type # New-style class
# Inspired by discussions on http://bugs.python.org/issue13585
class ExitStack(object):
"""Context manager for dynamic management of a stack of exit callbacks
For example:
with ExitStack() as stack:
files = [stack.enter_context(open(fname)) for fname in filenames]
# All opened files will automatically be closed at the end of
# the with statement, even if attempts to open files later
# in the list raise an exception
"""
def __init__(self):
self._exit_callbacks = deque()
def pop_all(self):
"""Preserve the context stack by transferring it to a new instance"""
new_stack = type(self)()
new_stack._exit_callbacks = self._exit_callbacks
self._exit_callbacks = deque()
return new_stack
def _push_cm_exit(self, cm, cm_exit):
"""Helper to correctly register callbacks to __exit__ methods"""
def _exit_wrapper(*exc_details):
return cm_exit(cm, *exc_details)
_exit_wrapper.__self__ = cm
self.push(_exit_wrapper)
def push(self, exit):
"""Registers a callback with the standard __exit__ method signature
Can suppress exceptions the same way __exit__ methods can.
Also accepts any object with an __exit__ method (registering a call
to the method instead of the object itself)
"""
# We use an unbound method rather than a bound method to follow
# the standard lookup behaviour for special methods
_cb_type = _get_type(exit)
try:
exit_method = _cb_type.__exit__
except AttributeError:
# Not a context manager, so assume its a callable
self._exit_callbacks.append(exit)
else:
self._push_cm_exit(exit, exit_method)
return exit # Allow use as a decorator
def callback(self, callback, *args, **kwds):
"""Registers an arbitrary callback and arguments.
Cannot suppress exceptions.
"""
def _exit_wrapper(exc_type, exc, tb):
callback(*args, **kwds)
# We changed the signature, so using @wraps is not appropriate, but
# setting __wrapped__ may still help with introspection
_exit_wrapper.__wrapped__ = callback
self.push(_exit_wrapper)
return callback # Allow use as a decorator
def enter_context(self, cm):
"""Enters the supplied context manager
If successful, also pushes its __exit__ method as a callback and
returns the result of the __enter__ method.
"""
# We look up the special methods on the type to match the with statement
_cm_type = _get_type(cm)
_exit = _cm_type.__exit__
result = _cm_type.__enter__(cm)
self._push_cm_exit(cm, _exit)
return result
def close(self):
"""Immediately unwind the context stack"""
self.__exit__(None, None, None)
def __enter__(self):
return self
def __exit__(self, *exc_details):
received_exc = exc_details[0] is not None
# We manipulate the exception state so it behaves as though
# we were actually nesting multiple with statements
frame_exc = sys.exc_info()[1]
_fix_exception_context = _make_context_fixer(frame_exc)
# Callbacks are invoked in LIFO order to match the behaviour of
# nested context managers
suppressed_exc = False
pending_raise = False
while self._exit_callbacks:
cb = self._exit_callbacks.pop()
try:
if cb(*exc_details):
suppressed_exc = True
pending_raise = False
exc_details = (None, None, None)
except:
new_exc_details = sys.exc_info()
# simulate the stack of exceptions by setting the context
_fix_exception_context(new_exc_details[1], exc_details[1])
pending_raise = True
exc_details = new_exc_details
if pending_raise:
_reraise_with_existing_context(exc_details)
return received_exc and suppressed_exc
# Preserve backwards compatibility
class ContextStack(ExitStack):
"""Backwards compatibility alias for ExitStack"""
def __init__(self):
warnings.warn("ContextStack has been renamed to ExitStack",
DeprecationWarning)
super(ContextStack, self).__init__()
def register_exit(self, callback):
return self.push(callback)
def register(self, callback, *args, **kwds):
return self.callback(callback, *args, **kwds)
def preserve(self):
return self.pop_all()
@@ -1,4 +1,4 @@
__version__ = '0.6.2'
__version__ = '0.6.5'
from .lock import Lock # noqa
from .lock import NeedRegenerationException # noqa
from .lock import NeedRegenerationException # noqa
+8
View File
@@ -13,6 +13,13 @@ class NoValue(object):
def payload(self):
return self
def __repr__(self):
"""Ensure __repr__ is a consistent value in case NoValue is used to
fill another cache key.
"""
return '<dogpile.cache.api.NoValue object>'
if py3k:
def __bool__(self): # pragma NO COVERAGE
return False
@@ -20,6 +27,7 @@ class NoValue(object):
def __nonzero__(self): # pragma NO COVERAGE
return False
NO_VALUE = NoValue()
"""Value returned from ``get()`` that describes
a key not present."""
+8
View File
@@ -15,3 +15,11 @@ class RegionNotConfigured(DogpileCacheException):
class ValidationError(DogpileCacheException):
"""Error validating a value or option."""
class PluginNotFound(DogpileCacheException):
"""The specified plugin could not be found.
.. versionadded:: 0.6.4
"""
+35 -5
View File
@@ -410,7 +410,13 @@ class CacheRegion(object):
"configured with backend: %s. "
"Specify replace_existing_backend=True to replace."
% self.backend)
backend_cls = _backend_loader.load(backend)
try:
backend_cls = _backend_loader.load(backend)
except PluginLoader.NotFound:
raise exception.PluginNotFound(
"Couldn't find cache plugin to load: %s" % backend)
if _config_argument_dict:
self.backend = backend_cls.from_config_dict(
_config_argument_dict,
@@ -487,8 +493,19 @@ class CacheRegion(object):
a value. Any retrieved value whose creation
time is prior to this timestamp
is considered to be stale. It does not
affect the data in the cache in any way, and is also
local to this instance of :class:`.CacheRegion`.
affect the data in the cache in any way, and is
**local to this instance of :class:`.CacheRegion`.**
.. warning::
The :meth:`.CacheRegion.invalidate` method's default mode of
operation is to set a timestamp **local to this CacheRegion
in this Python process only**. It does not impact other Python
processes or regions as the timestamp is **only stored locally in
memory**. To implement invalidation where the
timestamp is stored in the cache or similar so that all Python
processes can be affected by an invalidation timestamp, implement a
custom :class:`.RegionInvalidationStrategy`.
Once set, the invalidation time is honored by
the :meth:`.CacheRegion.get_or_create`,
@@ -550,6 +567,8 @@ class CacheRegion(object):
_config_prefix="%sarguments." % prefix,
wrap=config_dict.get(
"%swrap" % prefix, None),
replace_existing_backend=config_dict.get(
"%sreplace_existing_backend" % prefix, False),
)
@memoized_property
@@ -944,11 +963,14 @@ class CacheRegion(object):
if not should_cache_fn:
self.backend.set_multi(values_w_created)
else:
self.backend.set_multi(dict(
values_to_cache = dict(
(k, v)
for k, v in values_w_created.items()
if should_cache_fn(v[0])
))
)
if values_to_cache:
self.backend.set_multi(values_to_cache)
values.update(values_w_created)
return [values[orig_to_mangled[k]].payload for k in keys]
@@ -1075,6 +1097,14 @@ class CacheRegion(object):
.. versionadded:: 0.5.0 Added ``refresh()`` method to decorated
function.
``original()`` on other hand will invoke the decorated function
without any caching::
newvalue = generate_something.original(5, 6)
.. versionadded:: 0.6.0 Added ``original()`` method to decorated
function.
Lastly, the ``get()`` method returns either the value cached
for the given key, or the token ``NO_VALUE`` if no such key
exists::
@@ -1,4 +1,4 @@
from .nameregistry import NameRegistry # noqa
from .readwrite_lock import ReadWriteMutex # noqa
from .langhelpers import PluginLoader, memoized_property, \
coerce_string_conf, to_list, KeyReentrantMutex # noqa
coerce_string_conf, to_list, KeyReentrantMutex # noqa
@@ -39,9 +39,9 @@ class PluginLoader(object):
self.impls[name] = impl.load
return impl.load()
else:
raise Exception(
"Can't load plugin %s %s" %
(self.group, name))
raise self.NotFound(
"Can't load plugin %s %s" % (self.group, name)
)
def register(self, name, modulepath, objname):
def load():
@@ -49,6 +49,9 @@ class PluginLoader(object):
return getattr(mod, objname)
self.impls[name] = load
class NotFound(Exception):
"""The specified plugin could not be found."""
class memoized_property(object):
"""A read-only @property that is only evaluated once."""
+32
View File
@@ -0,0 +1,32 @@
Copyright (c) 2013, Ethan Furman.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
Redistributions of source code must retain the above
copyright notice, this list of conditions and the
following disclaimer.
Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
Neither the name Ethan Furman nor the names of any
contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
+3
View File
@@ -0,0 +1,3 @@
enum34 is the new Python stdlib enum module available in Python 3.4
backported for previous versions of Python from 2.4 to 3.3.
tested on 2.6, 2.7, and 3.3+
+837
View File
@@ -0,0 +1,837 @@
"""Python Enumerations"""
import sys as _sys
__all__ = ['Enum', 'IntEnum', 'unique']
version = 1, 1, 6
pyver = float('%s.%s' % _sys.version_info[:2])
try:
any
except NameError:
def any(iterable):
for element in iterable:
if element:
return True
return False
try:
from collections import OrderedDict
except ImportError:
OrderedDict = None
try:
basestring
except NameError:
# In Python 2 basestring is the ancestor of both str and unicode
# in Python 3 it's just str, but was missing in 3.1
basestring = str
try:
unicode
except NameError:
# In Python 3 unicode no longer exists (it's just str)
unicode = str
class _RouteClassAttributeToGetattr(object):
"""Route attribute access on a class to __getattr__.
This is a descriptor, used to define attributes that act differently when
accessed through an instance and through a class. Instance access remains
normal, but access to an attribute through a class will be routed to the
class's __getattr__ method; this is done by raising AttributeError.
"""
def __init__(self, fget=None):
self.fget = fget
def __get__(self, instance, ownerclass=None):
if instance is None:
raise AttributeError()
return self.fget(instance)
def __set__(self, instance, value):
raise AttributeError("can't set attribute")
def __delete__(self, instance):
raise AttributeError("can't delete attribute")
def _is_descriptor(obj):
"""Returns True if obj is a descriptor, False otherwise."""
return (
hasattr(obj, '__get__') or
hasattr(obj, '__set__') or
hasattr(obj, '__delete__'))
def _is_dunder(name):
"""Returns True if a __dunder__ name, False otherwise."""
return (name[:2] == name[-2:] == '__' and
name[2:3] != '_' and
name[-3:-2] != '_' and
len(name) > 4)
def _is_sunder(name):
"""Returns True if a _sunder_ name, False otherwise."""
return (name[0] == name[-1] == '_' and
name[1:2] != '_' and
name[-2:-1] != '_' and
len(name) > 2)
def _make_class_unpicklable(cls):
"""Make the given class un-picklable."""
def _break_on_call_reduce(self, protocol=None):
raise TypeError('%r cannot be pickled' % self)
cls.__reduce_ex__ = _break_on_call_reduce
cls.__module__ = '<unknown>'
class _EnumDict(dict):
"""Track enum member order and ensure member names are not reused.
EnumMeta will use the names found in self._member_names as the
enumeration member names.
"""
def __init__(self):
super(_EnumDict, self).__init__()
self._member_names = []
def __setitem__(self, key, value):
"""Changes anything not dundered or not a descriptor.
If a descriptor is added with the same name as an enum member, the name
is removed from _member_names (this may leave a hole in the numerical
sequence of values).
If an enum member name is used twice, an error is raised; duplicate
values are not checked for.
Single underscore (sunder) names are reserved.
Note: in 3.x __order__ is simply discarded as a not necessary piece
leftover from 2.x
"""
if pyver >= 3.0 and key in ('_order_', '__order__'):
return
elif key == '__order__':
key = '_order_'
if _is_sunder(key):
if key != '_order_':
raise ValueError('_names_ are reserved for future Enum use')
elif _is_dunder(key):
pass
elif key in self._member_names:
# descriptor overwriting an enum?
raise TypeError('Attempted to reuse key: %r' % key)
elif not _is_descriptor(value):
if key in self:
# enum overwriting a descriptor?
raise TypeError('Key already defined as: %r' % self[key])
self._member_names.append(key)
super(_EnumDict, self).__setitem__(key, value)
# Dummy value for Enum as EnumMeta explicity checks for it, but of course until
# EnumMeta finishes running the first time the Enum class doesn't exist. This
# is also why there are checks in EnumMeta like `if Enum is not None`
Enum = None
class EnumMeta(type):
"""Metaclass for Enum"""
@classmethod
def __prepare__(metacls, cls, bases):
return _EnumDict()
def __new__(metacls, cls, bases, classdict):
# an Enum class is final once enumeration items have been defined; it
# cannot be mixed with other types (int, float, etc.) if it has an
# inherited __new__ unless a new __new__ is defined (or the resulting
# class will fail).
if type(classdict) is dict:
original_dict = classdict
classdict = _EnumDict()
for k, v in original_dict.items():
classdict[k] = v
member_type, first_enum = metacls._get_mixins_(bases)
__new__, save_new, use_args = metacls._find_new_(classdict, member_type,
first_enum)
# save enum items into separate mapping so they don't get baked into
# the new class
members = dict((k, classdict[k]) for k in classdict._member_names)
for name in classdict._member_names:
del classdict[name]
# py2 support for definition order
_order_ = classdict.get('_order_')
if _order_ is None:
if pyver < 3.0:
try:
_order_ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])]
except TypeError:
_order_ = [name for name in sorted(members.keys())]
else:
_order_ = classdict._member_names
else:
del classdict['_order_']
if pyver < 3.0:
_order_ = _order_.replace(',', ' ').split()
aliases = [name for name in members if name not in _order_]
_order_ += aliases
# check for illegal enum names (any others?)
invalid_names = set(members) & set(['mro'])
if invalid_names:
raise ValueError('Invalid enum member name(s): %s' % (
', '.join(invalid_names), ))
# save attributes from super classes so we know if we can take
# the shortcut of storing members in the class dict
base_attributes = set([a for b in bases for a in b.__dict__])
# create our new Enum type
enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict)
enum_class._member_names_ = [] # names in random order
if OrderedDict is not None:
enum_class._member_map_ = OrderedDict()
else:
enum_class._member_map_ = {} # name->value map
enum_class._member_type_ = member_type
# Reverse value->name map for hashable values.
enum_class._value2member_map_ = {}
# instantiate them, checking for duplicates as we go
# we instantiate first instead of checking for duplicates first in case
# a custom __new__ is doing something funky with the values -- such as
# auto-numbering ;)
if __new__ is None:
__new__ = enum_class.__new__
for member_name in _order_:
value = members[member_name]
if not isinstance(value, tuple):
args = (value, )
else:
args = value
if member_type is tuple: # special case for tuple enums
args = (args, ) # wrap it one more time
if not use_args or not args:
enum_member = __new__(enum_class)
if not hasattr(enum_member, '_value_'):
enum_member._value_ = value
else:
enum_member = __new__(enum_class, *args)
if not hasattr(enum_member, '_value_'):
enum_member._value_ = member_type(*args)
value = enum_member._value_
enum_member._name_ = member_name
enum_member.__objclass__ = enum_class
enum_member.__init__(*args)
# If another member with the same value was already defined, the
# new member becomes an alias to the existing one.
for name, canonical_member in enum_class._member_map_.items():
if canonical_member.value == enum_member._value_:
enum_member = canonical_member
break
else:
# Aliases don't appear in member names (only in __members__).
enum_class._member_names_.append(member_name)
# performance boost for any member that would not shadow
# a DynamicClassAttribute (aka _RouteClassAttributeToGetattr)
if member_name not in base_attributes:
setattr(enum_class, member_name, enum_member)
# now add to _member_map_
enum_class._member_map_[member_name] = enum_member
try:
# This may fail if value is not hashable. We can't add the value
# to the map, and by-value lookups for this value will be
# linear.
enum_class._value2member_map_[value] = enum_member
except TypeError:
pass
# If a custom type is mixed into the Enum, and it does not know how
# to pickle itself, pickle.dumps will succeed but pickle.loads will
# fail. Rather than have the error show up later and possibly far
# from the source, sabotage the pickle protocol for this class so
# that pickle.dumps also fails.
#
# However, if the new class implements its own __reduce_ex__, do not
# sabotage -- it's on them to make sure it works correctly. We use
# __reduce_ex__ instead of any of the others as it is preferred by
# pickle over __reduce__, and it handles all pickle protocols.
unpicklable = False
if '__reduce_ex__' not in classdict:
if member_type is not object:
methods = ('__getnewargs_ex__', '__getnewargs__',
'__reduce_ex__', '__reduce__')
if not any(m in member_type.__dict__ for m in methods):
_make_class_unpicklable(enum_class)
unpicklable = True
# double check that repr and friends are not the mixin's or various
# things break (such as pickle)
for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'):
class_method = getattr(enum_class, name)
obj_method = getattr(member_type, name, None)
enum_method = getattr(first_enum, name, None)
if name not in classdict and class_method is not enum_method:
if name == '__reduce_ex__' and unpicklable:
continue
setattr(enum_class, name, enum_method)
# method resolution and int's are not playing nice
# Python's less than 2.6 use __cmp__
if pyver < 2.6:
if issubclass(enum_class, int):
setattr(enum_class, '__cmp__', getattr(int, '__cmp__'))
elif pyver < 3.0:
if issubclass(enum_class, int):
for method in (
'__le__',
'__lt__',
'__gt__',
'__ge__',
'__eq__',
'__ne__',
'__hash__',
):
setattr(enum_class, method, getattr(int, method))
# replace any other __new__ with our own (as long as Enum is not None,
# anyway) -- again, this is to support pickle
if Enum is not None:
# if the user defined their own __new__, save it before it gets
# clobbered in case they subclass later
if save_new:
setattr(enum_class, '__member_new__', enum_class.__dict__['__new__'])
setattr(enum_class, '__new__', Enum.__dict__['__new__'])
return enum_class
def __bool__(cls):
"""
classes/types should always be True.
"""
return True
def __call__(cls, value, names=None, module=None, type=None, start=1):
"""Either returns an existing member, or creates a new enum class.
This method is used both when an enum class is given a value to match
to an enumeration member (i.e. Color(3)) and for the functional API
(i.e. Color = Enum('Color', names='red green blue')).
When used for the functional API: `module`, if set, will be stored in
the new class' __module__ attribute; `type`, if set, will be mixed in
as the first base class.
Note: if `module` is not set this routine will attempt to discover the
calling module by walking the frame stack; if this is unsuccessful
the resulting class will not be pickleable.
"""
if names is None: # simple value lookup
return cls.__new__(cls, value)
# otherwise, functional API: we're creating a new Enum type
return cls._create_(value, names, module=module, type=type, start=start)
def __contains__(cls, member):
return isinstance(member, cls) and member.name in cls._member_map_
def __delattr__(cls, attr):
# nicer error message when someone tries to delete an attribute
# (see issue19025).
if attr in cls._member_map_:
raise AttributeError(
"%s: cannot delete Enum member." % cls.__name__)
super(EnumMeta, cls).__delattr__(attr)
def __dir__(self):
return (['__class__', '__doc__', '__members__', '__module__'] +
self._member_names_)
@property
def __members__(cls):
"""Returns a mapping of member name->value.
This mapping lists all enum members, including aliases. Note that this
is a copy of the internal mapping.
"""
return cls._member_map_.copy()
def __getattr__(cls, name):
"""Return the enum member matching `name`
We use __getattr__ instead of descriptors or inserting into the enum
class' __dict__ in order to support `name` and `value` being both
properties for enum members (which live in the class' __dict__) and
enum members themselves.
"""
if _is_dunder(name):
raise AttributeError(name)
try:
return cls._member_map_[name]
except KeyError:
raise AttributeError(name)
def __getitem__(cls, name):
return cls._member_map_[name]
def __iter__(cls):
return (cls._member_map_[name] for name in cls._member_names_)
def __reversed__(cls):
return (cls._member_map_[name] for name in reversed(cls._member_names_))
def __len__(cls):
return len(cls._member_names_)
__nonzero__ = __bool__
def __repr__(cls):
return "<enum %r>" % cls.__name__
def __setattr__(cls, name, value):
"""Block attempts to reassign Enum members.
A simple assignment to the class namespace only changes one of the
several possible ways to get an Enum member from the Enum class,
resulting in an inconsistent Enumeration.
"""
member_map = cls.__dict__.get('_member_map_', {})
if name in member_map:
raise AttributeError('Cannot reassign members.')
super(EnumMeta, cls).__setattr__(name, value)
def _create_(cls, class_name, names=None, module=None, type=None, start=1):
"""Convenience method to create a new Enum class.
`names` can be:
* A string containing member names, separated either with spaces or
commas. Values are auto-numbered from 1.
* An iterable of member names. Values are auto-numbered from 1.
* An iterable of (member name, value) pairs.
* A mapping of member name -> value.
"""
if pyver < 3.0:
# if class_name is unicode, attempt a conversion to ASCII
if isinstance(class_name, unicode):
try:
class_name = class_name.encode('ascii')
except UnicodeEncodeError:
raise TypeError('%r is not representable in ASCII' % class_name)
metacls = cls.__class__
if type is None:
bases = (cls, )
else:
bases = (type, cls)
classdict = metacls.__prepare__(class_name, bases)
_order_ = []
# special processing needed for names?
if isinstance(names, basestring):
names = names.replace(',', ' ').split()
if isinstance(names, (tuple, list)) and isinstance(names[0], basestring):
names = [(e, i+start) for (i, e) in enumerate(names)]
# Here, names is either an iterable of (name, value) or a mapping.
item = None # in case names is empty
for item in names:
if isinstance(item, basestring):
member_name, member_value = item, names[item]
else:
member_name, member_value = item
classdict[member_name] = member_value
_order_.append(member_name)
# only set _order_ in classdict if name/value was not from a mapping
if not isinstance(item, basestring):
classdict['_order_'] = ' '.join(_order_)
enum_class = metacls.__new__(metacls, class_name, bases, classdict)
# TODO: replace the frame hack if a blessed way to know the calling
# module is ever developed
if module is None:
try:
module = _sys._getframe(2).f_globals['__name__']
except (AttributeError, ValueError):
pass
if module is None:
_make_class_unpicklable(enum_class)
else:
enum_class.__module__ = module
return enum_class
@staticmethod
def _get_mixins_(bases):
"""Returns the type for creating enum members, and the first inherited
enum class.
bases: the tuple of bases that was given to __new__
"""
if not bases or Enum is None:
return object, Enum
# double check that we are not subclassing a class with existing
# enumeration members; while we're at it, see if any other data
# type has been mixed in so we can use the correct __new__
member_type = first_enum = None
for base in bases:
if (base is not Enum and
issubclass(base, Enum) and
base._member_names_):
raise TypeError("Cannot extend enumerations")
# base is now the last base in bases
if not issubclass(base, Enum):
raise TypeError("new enumerations must be created as "
"`ClassName([mixin_type,] enum_type)`")
# get correct mix-in type (either mix-in type of Enum subclass, or
# first base if last base is Enum)
if not issubclass(bases[0], Enum):
member_type = bases[0] # first data type
first_enum = bases[-1] # enum type
else:
for base in bases[0].__mro__:
# most common: (IntEnum, int, Enum, object)
# possible: (<Enum 'AutoIntEnum'>, <Enum 'IntEnum'>,
# <class 'int'>, <Enum 'Enum'>,
# <class 'object'>)
if issubclass(base, Enum):
if first_enum is None:
first_enum = base
else:
if member_type is None:
member_type = base
return member_type, first_enum
if pyver < 3.0:
@staticmethod
def _find_new_(classdict, member_type, first_enum):
"""Returns the __new__ to be used for creating the enum members.
classdict: the class dictionary given to __new__
member_type: the data type whose __new__ will be used by default
first_enum: enumeration to check for an overriding __new__
"""
# now find the correct __new__, checking to see of one was defined
# by the user; also check earlier enum classes in case a __new__ was
# saved as __member_new__
__new__ = classdict.get('__new__', None)
if __new__:
return None, True, True # __new__, save_new, use_args
N__new__ = getattr(None, '__new__')
O__new__ = getattr(object, '__new__')
if Enum is None:
E__new__ = N__new__
else:
E__new__ = Enum.__dict__['__new__']
# check all possibles for __member_new__ before falling back to
# __new__
for method in ('__member_new__', '__new__'):
for possible in (member_type, first_enum):
try:
target = possible.__dict__[method]
except (AttributeError, KeyError):
target = getattr(possible, method, None)
if target not in [
None,
N__new__,
O__new__,
E__new__,
]:
if method == '__member_new__':
classdict['__new__'] = target
return None, False, True
if isinstance(target, staticmethod):
target = target.__get__(member_type)
__new__ = target
break
if __new__ is not None:
break
else:
__new__ = object.__new__
# if a non-object.__new__ is used then whatever value/tuple was
# assigned to the enum member name will be passed to __new__ and to the
# new enum member's __init__
if __new__ is object.__new__:
use_args = False
else:
use_args = True
return __new__, False, use_args
else:
@staticmethod
def _find_new_(classdict, member_type, first_enum):
"""Returns the __new__ to be used for creating the enum members.
classdict: the class dictionary given to __new__
member_type: the data type whose __new__ will be used by default
first_enum: enumeration to check for an overriding __new__
"""
# now find the correct __new__, checking to see of one was defined
# by the user; also check earlier enum classes in case a __new__ was
# saved as __member_new__
__new__ = classdict.get('__new__', None)
# should __new__ be saved as __member_new__ later?
save_new = __new__ is not None
if __new__ is None:
# check all possibles for __member_new__ before falling back to
# __new__
for method in ('__member_new__', '__new__'):
for possible in (member_type, first_enum):
target = getattr(possible, method, None)
if target not in (
None,
None.__new__,
object.__new__,
Enum.__new__,
):
__new__ = target
break
if __new__ is not None:
break
else:
__new__ = object.__new__
# if a non-object.__new__ is used then whatever value/tuple was
# assigned to the enum member name will be passed to __new__ and to the
# new enum member's __init__
if __new__ is object.__new__:
use_args = False
else:
use_args = True
return __new__, save_new, use_args
########################################################
# In order to support Python 2 and 3 with a single
# codebase we have to create the Enum methods separately
# and then use the `type(name, bases, dict)` method to
# create the class.
########################################################
temp_enum_dict = {}
temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n"
def __new__(cls, value):
# all enum instances are actually created during class construction
# without calling this method; this method is called by the metaclass'
# __call__ (i.e. Color(3) ), and by pickle
if type(value) is cls:
# For lookups like Color(Color.red)
value = value.value
#return value
# by-value search for a matching enum member
# see if it's in the reverse mapping (for hashable values)
try:
if value in cls._value2member_map_:
return cls._value2member_map_[value]
except TypeError:
# not there, now do long search -- O(n) behavior
for member in cls._member_map_.values():
if member.value == value:
return member
raise ValueError("%s is not a valid %s" % (value, cls.__name__))
temp_enum_dict['__new__'] = __new__
del __new__
def __repr__(self):
return "<%s.%s: %r>" % (
self.__class__.__name__, self._name_, self._value_)
temp_enum_dict['__repr__'] = __repr__
del __repr__
def __str__(self):
return "%s.%s" % (self.__class__.__name__, self._name_)
temp_enum_dict['__str__'] = __str__
del __str__
if pyver >= 3.0:
def __dir__(self):
added_behavior = [
m
for cls in self.__class__.mro()
for m in cls.__dict__
if m[0] != '_' and m not in self._member_map_
]
return (['__class__', '__doc__', '__module__', ] + added_behavior)
temp_enum_dict['__dir__'] = __dir__
del __dir__
def __format__(self, format_spec):
# mixed-in Enums should use the mixed-in type's __format__, otherwise
# we can get strange results with the Enum name showing up instead of
# the value
# pure Enum branch
if self._member_type_ is object:
cls = str
val = str(self)
# mix-in branch
else:
cls = self._member_type_
val = self.value
return cls.__format__(val, format_spec)
temp_enum_dict['__format__'] = __format__
del __format__
####################################
# Python's less than 2.6 use __cmp__
if pyver < 2.6:
def __cmp__(self, other):
if type(other) is self.__class__:
if self is other:
return 0
return -1
return NotImplemented
raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__cmp__'] = __cmp__
del __cmp__
else:
def __le__(self, other):
raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__le__'] = __le__
del __le__
def __lt__(self, other):
raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__lt__'] = __lt__
del __lt__
def __ge__(self, other):
raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__ge__'] = __ge__
del __ge__
def __gt__(self, other):
raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__gt__'] = __gt__
del __gt__
def __eq__(self, other):
if type(other) is self.__class__:
return self is other
return NotImplemented
temp_enum_dict['__eq__'] = __eq__
del __eq__
def __ne__(self, other):
if type(other) is self.__class__:
return self is not other
return NotImplemented
temp_enum_dict['__ne__'] = __ne__
del __ne__
def __hash__(self):
return hash(self._name_)
temp_enum_dict['__hash__'] = __hash__
del __hash__
def __reduce_ex__(self, proto):
return self.__class__, (self._value_, )
temp_enum_dict['__reduce_ex__'] = __reduce_ex__
del __reduce_ex__
# _RouteClassAttributeToGetattr is used to provide access to the `name`
# and `value` properties of enum members while keeping some measure of
# protection from modification, while still allowing for an enumeration
# to have members named `name` and `value`. This works because enumeration
# members are not set directly on the enum class -- __getattr__ is
# used to look them up.
@_RouteClassAttributeToGetattr
def name(self):
return self._name_
temp_enum_dict['name'] = name
del name
@_RouteClassAttributeToGetattr
def value(self):
return self._value_
temp_enum_dict['value'] = value
del value
@classmethod
def _convert(cls, name, module, filter, source=None):
"""
Create a new Enum subclass that replaces a collection of global constants
"""
# convert all constants from source (or module) that pass filter() to
# a new Enum called name, and export the enum and its members back to
# module;
# also, replace the __reduce_ex__ method so unpickling works in
# previous Python versions
module_globals = vars(_sys.modules[module])
if source:
source = vars(source)
else:
source = module_globals
members = dict((name, value) for name, value in source.items() if filter(name))
cls = cls(name, members, module=module)
cls.__reduce_ex__ = _reduce_ex_by_name
module_globals.update(cls.__members__)
module_globals[name] = cls
return cls
temp_enum_dict['_convert'] = _convert
del _convert
Enum = EnumMeta('Enum', (object, ), temp_enum_dict)
del temp_enum_dict
# Enum has now been created
###########################
class IntEnum(int, Enum):
"""Enum where members are also (and must be) ints"""
def _reduce_ex_by_name(self, proto):
return self.name
def unique(enumeration):
"""Class decorator that ensures only unique members exist in an enumeration."""
duplicates = []
for name, member in enumeration.__members__.items():
if name != member.name:
duplicates.append((name, member.name))
if duplicates:
duplicate_names = ', '.join(
["%s -> %s" % (alias, name) for (alias, name) in duplicates]
)
raise ValueError('duplicate names found in %r: %s' %
(enumeration, duplicate_names)
)
return enumeration
File diff suppressed because it is too large Load Diff
+735
View File
@@ -0,0 +1,735 @@
``enum`` --- support for enumerations
========================================
.. :synopsis: enumerations are sets of symbolic names bound to unique, constant
values.
.. :moduleauthor:: Ethan Furman <ethan@stoneleaf.us>
.. :sectionauthor:: Barry Warsaw <barry@python.org>,
.. :sectionauthor:: Eli Bendersky <eliben@gmail.com>,
.. :sectionauthor:: Ethan Furman <ethan@stoneleaf.us>
----------------
An enumeration is a set of symbolic names (members) bound to unique, constant
values. Within an enumeration, the members can be compared by identity, and
the enumeration itself can be iterated over.
Module Contents
---------------
This module defines two enumeration classes that can be used to define unique
sets of names and values: ``Enum`` and ``IntEnum``. It also defines
one decorator, ``unique``.
``Enum``
Base class for creating enumerated constants. See section `Functional API`_
for an alternate construction syntax.
``IntEnum``
Base class for creating enumerated constants that are also subclasses of ``int``.
``unique``
Enum class decorator that ensures only one name is bound to any one value.
Creating an Enum
----------------
Enumerations are created using the ``class`` syntax, which makes them
easy to read and write. An alternative creation method is described in
`Functional API`_. To define an enumeration, subclass ``Enum`` as
follows::
>>> from enum import Enum
>>> class Color(Enum):
... red = 1
... green = 2
... blue = 3
Note: Nomenclature
- The class ``Color`` is an *enumeration* (or *enum*)
- The attributes ``Color.red``, ``Color.green``, etc., are
*enumeration members* (or *enum members*).
- The enum members have *names* and *values* (the name of
``Color.red`` is ``red``, the value of ``Color.blue`` is
``3``, etc.)
Note:
Even though we use the ``class`` syntax to create Enums, Enums
are not normal Python classes. See `How are Enums different?`_ for
more details.
Enumeration members have human readable string representations::
>>> print(Color.red)
Color.red
...while their ``repr`` has more information::
>>> print(repr(Color.red))
<Color.red: 1>
The *type* of an enumeration member is the enumeration it belongs to::
>>> type(Color.red)
<enum 'Color'>
>>> isinstance(Color.green, Color)
True
>>>
Enum members also have a property that contains just their item name::
>>> print(Color.red.name)
red
Enumerations support iteration. In Python 3.x definition order is used; in
Python 2.x the definition order is not available, but class attribute
``__order__`` is supported; otherwise, value order is used::
>>> class Shake(Enum):
... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x
... vanilla = 7
... chocolate = 4
... cookies = 9
... mint = 3
...
>>> for shake in Shake:
... print(shake)
...
Shake.vanilla
Shake.chocolate
Shake.cookies
Shake.mint
The ``__order__`` attribute is always removed, and in 3.x it is also ignored
(order is definition order); however, in the stdlib version it will be ignored
but not removed.
Enumeration members are hashable, so they can be used in dictionaries and sets::
>>> apples = {}
>>> apples[Color.red] = 'red delicious'
>>> apples[Color.green] = 'granny smith'
>>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'}
True
Programmatic access to enumeration members and their attributes
---------------------------------------------------------------
Sometimes it's useful to access members in enumerations programmatically (i.e.
situations where ``Color.red`` won't do because the exact color is not known
at program-writing time). ``Enum`` allows such access::
>>> Color(1)
<Color.red: 1>
>>> Color(3)
<Color.blue: 3>
If you want to access enum members by *name*, use item access::
>>> Color['red']
<Color.red: 1>
>>> Color['green']
<Color.green: 2>
If have an enum member and need its ``name`` or ``value``::
>>> member = Color.red
>>> member.name
'red'
>>> member.value
1
Duplicating enum members and values
-----------------------------------
Having two enum members (or any other attribute) with the same name is invalid;
in Python 3.x this would raise an error, but in Python 2.x the second member
simply overwrites the first::
>>> # python 2.x
>>> class Shape(Enum):
... square = 2
... square = 3
...
>>> Shape.square
<Shape.square: 3>
>>> # python 3.x
>>> class Shape(Enum):
... square = 2
... square = 3
Traceback (most recent call last):
...
TypeError: Attempted to reuse key: 'square'
However, two enum members are allowed to have the same value. Given two members
A and B with the same value (and A defined first), B is an alias to A. By-value
lookup of the value of A and B will return A. By-name lookup of B will also
return A::
>>> class Shape(Enum):
... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x
... square = 2
... diamond = 1
... circle = 3
... alias_for_square = 2
...
>>> Shape.square
<Shape.square: 2>
>>> Shape.alias_for_square
<Shape.square: 2>
>>> Shape(2)
<Shape.square: 2>
Allowing aliases is not always desirable. ``unique`` can be used to ensure
that none exist in a particular enumeration::
>>> from enum import unique
>>> @unique
... class Mistake(Enum):
... __order__ = 'one two three four' # only needed in 2.x
... one = 1
... two = 2
... three = 3
... four = 3
Traceback (most recent call last):
...
ValueError: duplicate names found in <enum 'Mistake'>: four -> three
Iterating over the members of an enum does not provide the aliases::
>>> list(Shape)
[<Shape.square: 2>, <Shape.diamond: 1>, <Shape.circle: 3>]
The special attribute ``__members__`` is a dictionary mapping names to members.
It includes all names defined in the enumeration, including the aliases::
>>> for name, member in sorted(Shape.__members__.items()):
... name, member
...
('alias_for_square', <Shape.square: 2>)
('circle', <Shape.circle: 3>)
('diamond', <Shape.diamond: 1>)
('square', <Shape.square: 2>)
The ``__members__`` attribute can be used for detailed programmatic access to
the enumeration members. For example, finding all the aliases::
>>> [name for name, member in Shape.__members__.items() if member.name != name]
['alias_for_square']
Comparisons
-----------
Enumeration members are compared by identity::
>>> Color.red is Color.red
True
>>> Color.red is Color.blue
False
>>> Color.red is not Color.blue
True
Ordered comparisons between enumeration values are *not* supported. Enum
members are not integers (but see `IntEnum`_ below)::
>>> Color.red < Color.blue
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: unorderable types: Color() < Color()
.. warning::
In Python 2 *everything* is ordered, even though the ordering may not
make sense. If you want your enumerations to have a sensible ordering
check out the `OrderedEnum`_ recipe below.
Equality comparisons are defined though::
>>> Color.blue == Color.red
False
>>> Color.blue != Color.red
True
>>> Color.blue == Color.blue
True
Comparisons against non-enumeration values will always compare not equal
(again, ``IntEnum`` was explicitly designed to behave differently, see
below)::
>>> Color.blue == 2
False
Allowed members and attributes of enumerations
----------------------------------------------
The examples above use integers for enumeration values. Using integers is
short and handy (and provided by default by the `Functional API`_), but not
strictly enforced. In the vast majority of use-cases, one doesn't care what
the actual value of an enumeration is. But if the value *is* important,
enumerations can have arbitrary values.
Enumerations are Python classes, and can have methods and special methods as
usual. If we have this enumeration::
>>> class Mood(Enum):
... funky = 1
... happy = 3
...
... def describe(self):
... # self is the member here
... return self.name, self.value
...
... def __str__(self):
... return 'my custom str! {0}'.format(self.value)
...
... @classmethod
... def favorite_mood(cls):
... # cls here is the enumeration
... return cls.happy
Then::
>>> Mood.favorite_mood()
<Mood.happy: 3>
>>> Mood.happy.describe()
('happy', 3)
>>> str(Mood.funky)
'my custom str! 1'
The rules for what is allowed are as follows: _sunder_ names (starting and
ending with a single underscore) are reserved by enum and cannot be used;
all other attributes defined within an enumeration will become members of this
enumeration, with the exception of *__dunder__* names and descriptors (methods
are also descriptors).
Note:
If your enumeration defines ``__new__`` and/or ``__init__`` then
whatever value(s) were given to the enum member will be passed into
those methods. See `Planet`_ for an example.
Restricted subclassing of enumerations
--------------------------------------
Subclassing an enumeration is allowed only if the enumeration does not define
any members. So this is forbidden::
>>> class MoreColor(Color):
... pink = 17
Traceback (most recent call last):
...
TypeError: Cannot extend enumerations
But this is allowed::
>>> class Foo(Enum):
... def some_behavior(self):
... pass
...
>>> class Bar(Foo):
... happy = 1
... sad = 2
...
Allowing subclassing of enums that define members would lead to a violation of
some important invariants of types and instances. On the other hand, it makes
sense to allow sharing some common behavior between a group of enumerations.
(See `OrderedEnum`_ for an example.)
Pickling
--------
Enumerations can be pickled and unpickled::
>>> from enum.test_enum import Fruit
>>> from pickle import dumps, loads
>>> Fruit.tomato is loads(dumps(Fruit.tomato, 2))
True
The usual restrictions for pickling apply: picklable enums must be defined in
the top level of a module, since unpickling requires them to be importable
from that module.
Note:
With pickle protocol version 4 (introduced in Python 3.4) it is possible
to easily pickle enums nested in other classes.
Functional API
--------------
The ``Enum`` class is callable, providing the following functional API::
>>> Animal = Enum('Animal', 'ant bee cat dog')
>>> Animal
<enum 'Animal'>
>>> Animal.ant
<Animal.ant: 1>
>>> Animal.ant.value
1
>>> list(Animal)
[<Animal.ant: 1>, <Animal.bee: 2>, <Animal.cat: 3>, <Animal.dog: 4>]
The semantics of this API resemble ``namedtuple``. The first argument
of the call to ``Enum`` is the name of the enumeration.
The second argument is the *source* of enumeration member names. It can be a
whitespace-separated string of names, a sequence of names, a sequence of
2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to
values. The last two options enable assigning arbitrary values to
enumerations; the others auto-assign increasing integers starting with 1. A
new class derived from ``Enum`` is returned. In other words, the above
assignment to ``Animal`` is equivalent to::
>>> class Animals(Enum):
... ant = 1
... bee = 2
... cat = 3
... dog = 4
Pickling enums created with the functional API can be tricky as frame stack
implementation details are used to try and figure out which module the
enumeration is being created in (e.g. it will fail if you use a utility
function in separate module, and also may not work on IronPython or Jython).
The solution is to specify the module name explicitly as follows::
>>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__)
Derived Enumerations
--------------------
IntEnum
^^^^^^^
A variation of ``Enum`` is provided which is also a subclass of
``int``. Members of an ``IntEnum`` can be compared to integers;
by extension, integer enumerations of different types can also be compared
to each other::
>>> from enum import IntEnum
>>> class Shape(IntEnum):
... circle = 1
... square = 2
...
>>> class Request(IntEnum):
... post = 1
... get = 2
...
>>> Shape == 1
False
>>> Shape.circle == 1
True
>>> Shape.circle == Request.post
True
However, they still can't be compared to standard ``Enum`` enumerations::
>>> class Shape(IntEnum):
... circle = 1
... square = 2
...
>>> class Color(Enum):
... red = 1
... green = 2
...
>>> Shape.circle == Color.red
False
``IntEnum`` values behave like integers in other ways you'd expect::
>>> int(Shape.circle)
1
>>> ['a', 'b', 'c'][Shape.circle]
'b'
>>> [i for i in range(Shape.square)]
[0, 1]
For the vast majority of code, ``Enum`` is strongly recommended,
since ``IntEnum`` breaks some semantic promises of an enumeration (by
being comparable to integers, and thus by transitivity to other
unrelated enumerations). It should be used only in special cases where
there's no other choice; for example, when integer constants are
replaced with enumerations and backwards compatibility is required with code
that still expects integers.
Others
^^^^^^
While ``IntEnum`` is part of the ``enum`` module, it would be very
simple to implement independently::
class IntEnum(int, Enum):
pass
This demonstrates how similar derived enumerations can be defined; for example
a ``StrEnum`` that mixes in ``str`` instead of ``int``.
Some rules:
1. When subclassing ``Enum``, mix-in types must appear before
``Enum`` itself in the sequence of bases, as in the ``IntEnum``
example above.
2. While ``Enum`` can have members of any type, once you mix in an
additional type, all the members must have values of that type, e.g.
``int`` above. This restriction does not apply to mix-ins which only
add methods and don't specify another data type such as ``int`` or
``str``.
3. When another data type is mixed in, the ``value`` attribute is *not the
same* as the enum member itself, although it is equivalant and will compare
equal.
4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and
``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for
IntEnum) treat the enum member as its mixed-in type.
Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int``
subclasses are printed as strings and not numbers when the ``%d``, ``%i``,
or ``%u`` codes are used.
5. ``str.__format__`` (or ``format``) will use the mixed-in
type's ``__format__``. If the ``Enum``'s ``str`` or
``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes.
Decorators
----------
unique
^^^^^^
A ``class`` decorator specifically for enumerations. It searches an
enumeration's ``__members__`` gathering any aliases it finds; if any are
found ``ValueError`` is raised with the details::
>>> @unique
... class NoDupes(Enum):
... first = 'one'
... second = 'two'
... third = 'two'
Traceback (most recent call last):
...
ValueError: duplicate names found in <enum 'NoDupes'>: third -> second
Interesting examples
--------------------
While ``Enum`` and ``IntEnum`` are expected to cover the majority of
use-cases, they cannot cover them all. Here are recipes for some different
types of enumerations that can be used directly, or as examples for creating
one's own.
AutoNumber
^^^^^^^^^^
Avoids having to specify the value for each enumeration member::
>>> class AutoNumber(Enum):
... def __new__(cls):
... value = len(cls.__members__) + 1
... obj = object.__new__(cls)
... obj._value_ = value
... return obj
...
>>> class Color(AutoNumber):
... __order__ = "red green blue" # only needed in 2.x
... red = ()
... green = ()
... blue = ()
...
>>> Color.green.value == 2
True
Note:
The `__new__` method, if defined, is used during creation of the Enum
members; it is then replaced by Enum's `__new__` which is used after
class creation for lookup of existing members. Due to the way Enums are
supposed to behave, there is no way to customize Enum's `__new__`.
UniqueEnum
^^^^^^^^^^
Raises an error if a duplicate member name is found instead of creating an
alias::
>>> class UniqueEnum(Enum):
... def __init__(self, *args):
... cls = self.__class__
... if any(self.value == e.value for e in cls):
... a = self.name
... e = cls(self.value).name
... raise ValueError(
... "aliases not allowed in UniqueEnum: %r --> %r"
... % (a, e))
...
>>> class Color(UniqueEnum):
... red = 1
... green = 2
... blue = 3
... grene = 2
Traceback (most recent call last):
...
ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green'
OrderedEnum
^^^^^^^^^^^
An ordered enumeration that is not based on ``IntEnum`` and so maintains
the normal ``Enum`` invariants (such as not being comparable to other
enumerations)::
>>> class OrderedEnum(Enum):
... def __ge__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ >= other._value_
... return NotImplemented
... def __gt__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ > other._value_
... return NotImplemented
... def __le__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ <= other._value_
... return NotImplemented
... def __lt__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ < other._value_
... return NotImplemented
...
>>> class Grade(OrderedEnum):
... __ordered__ = 'A B C D F'
... A = 5
... B = 4
... C = 3
... D = 2
... F = 1
...
>>> Grade.C < Grade.A
True
Planet
^^^^^^
If ``__new__`` or ``__init__`` is defined the value of the enum member
will be passed to those methods::
>>> class Planet(Enum):
... MERCURY = (3.303e+23, 2.4397e6)
... VENUS = (4.869e+24, 6.0518e6)
... EARTH = (5.976e+24, 6.37814e6)
... MARS = (6.421e+23, 3.3972e6)
... JUPITER = (1.9e+27, 7.1492e7)
... SATURN = (5.688e+26, 6.0268e7)
... URANUS = (8.686e+25, 2.5559e7)
... NEPTUNE = (1.024e+26, 2.4746e7)
... def __init__(self, mass, radius):
... self.mass = mass # in kilograms
... self.radius = radius # in meters
... @property
... def surface_gravity(self):
... # universal gravitational constant (m3 kg-1 s-2)
... G = 6.67300E-11
... return G * self.mass / (self.radius * self.radius)
...
>>> Planet.EARTH.value
(5.976e+24, 6378140.0)
>>> Planet.EARTH.surface_gravity
9.802652743337129
How are Enums different?
------------------------
Enums have a custom metaclass that affects many aspects of both derived Enum
classes and their instances (members).
Enum Classes
^^^^^^^^^^^^
The ``EnumMeta`` metaclass is responsible for providing the
``__contains__``, ``__dir__``, ``__iter__`` and other methods that
allow one to do things with an ``Enum`` class that fail on a typical
class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is
responsible for ensuring that various other methods on the final ``Enum``
class are correct (such as ``__new__``, ``__getnewargs__``,
``__str__`` and ``__repr__``).
.. note::
``__dir__`` is not changed in the Python 2 line as it messes up some
of the decorators included in the stdlib.
Enum Members (aka instances)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The most interesting thing about Enum members is that they are singletons.
``EnumMeta`` creates them all while it is creating the ``Enum``
class itself, and then puts a custom ``__new__`` in place to ensure
that no new ones are ever instantiated by returning only the existing
member instances.
Finer Points
^^^^^^^^^^^^
``Enum`` members are instances of an ``Enum`` class, and even though they
are accessible as `EnumClass.member1.member2`, they should not be
accessed directly from the member as that lookup may fail or, worse,
return something besides the ``Enum`` member you were looking for
(changed in version 1.1.1)::
>>> class FieldTypes(Enum):
... name = 1
... value = 2
... size = 3
...
>>> FieldTypes.value.size
<FieldTypes.size: 3>
>>> FieldTypes.size.value
3
The ``__members__`` attribute is only available on the class.
In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being
the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if
``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python
2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified
as the ``OrderedDict`` type didn't exist yet.
If you give your ``Enum`` subclass extra methods, like the `Planet`_
class above, those methods will show up in a `dir` of the member,
but not of the class::
>>> dir(Planet)
['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS',
'VENUS', '__class__', '__doc__', '__members__', '__module__']
>>> dir(Planet.EARTH)
['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value']
A ``__new__`` method will only be used for the creation of the
``Enum`` members -- after that it is replaced. This means if you wish to
change how ``Enum`` members are looked up you either have to write a
helper function or a ``classmethod``.
File diff suppressed because it is too large Load Diff
+312
View File
@@ -0,0 +1,312 @@
import codecs
import logging
import os
import pickle
import shutil
import tempfile
import traceback
import appdirs
from scandir import scandir
try:
from collections.abc import MutableMapping
unicode = str
except ImportError:
# Python 2 imports
from collections import MutableMapping
FileNotFoundError = IOError
from .posixemulation import rename
logger = logging.getLogger(__name__)
class FileCache(MutableMapping):
"""A persistent file cache that is dictionary-like and has a write buffer.
*appname* is passed to `appdirs <https://pypi.python.org/pypi/appdirs/>`_
to determine a system-appropriate location for the cache files. The cache
directory used is available via :data:`cache_dir`.
By default, a write buffer is used, so writing to cache files is not done
until :meth:`sync` is explicitly called. This behavior can be changed using
the optional *flag* argument.
.. NOTE::
Keys and values are always stored as :class:`bytes` objects. If data
serialization is enabled, keys are returned as :class:`str` or
:class:`unicode` objects.
If data serialization is disabled, keys are returned as a
:class:`bytes` object.
:param str appname: The app/script the cache should be associated with.
:param str flag: How the cache should be opened. See below for details.
:param mode: The Unix mode for the cache files.
:param str keyencoding: The encoding the keys use, defaults to 'utf-8'.
This is used if *serialize* is ``False``; the keys are treated as
:class:`bytes` objects.
:param bool serialize: Whether or not to (de)serialize the values. If a
cache is used with a :class:`~shelve.Shelf`, set this to ``False``.
:param str app_cache_dir: absolute path to root cache directory to be
used in place of system-appropriate location determined by appdirs
The optional *flag* argument can be:
+---------+-------------------------------------------+
| Value | Meaning |
+=========+===========================================+
| ``'r'`` | Open existing cache for reading only |
+---------+-------------------------------------------+
| ``'w'`` | Open existing cache for reading and |
| | writing |
+---------+-------------------------------------------+
| ``'c'`` | Open cache for reading and writing, |
| | creating it if it doesn't exist (default) |
+---------+-------------------------------------------+
| ``'n'`` | Always create a new, empty cache, open |
| | for reading and writing |
+---------+-------------------------------------------+
If a ``'s'`` is appended to the *flag* argument, the cache will be opened
in sync mode. Writing to the cache will happen immediately and will not be
buffered.
If an application needs to use more than one cache, then it should use
subcaches. To create a subcache, append a series of one or more names
separated by periods to the application name when creating a
:class:`FileCache` object (e.g. ``'appname.subcache'`` or
``'appname.subcache.subcache'``).
Subcaches are a way for an application to use more than one cache without
polluting a user's cache directory. All caches -- main caches or subcaches
-- are totally independent. The only aspect in which they are linked is
that all of an application's caches exist in the same system directory.
Because each cache is independent of every other cache, calling
:meth:`delete` on an application's main cache will not delete data in
its subcaches.
"""
def __init__(self, appname, flag='c', mode=0o666, keyencoding='utf-8',
serialize=True, app_cache_dir=None):
"""Initialize a :class:`FileCache` object."""
if not isinstance(flag, str):
raise TypeError("flag must be str not '{}'".format(type(flag)))
elif flag[0] not in 'rwcn':
raise ValueError("invalid flag: '{}', first flag must be one of "
"'r', 'w', 'c' or 'n'".format(flag))
elif len(flag) > 1 and flag[1] != 's':
raise ValueError("invalid flag: '{}', second flag must be "
"'s'".format(flag))
appname, subcache = self._parse_appname(appname)
if 'cache' in subcache:
raise ValueError("invalid subcache name: 'cache'.")
self._is_subcache = bool(subcache)
if not app_cache_dir:
app_cache_dir = appdirs.user_cache_dir(appname, appname)
subcache_dir = os.path.join(app_cache_dir, *subcache)
self.cache_dir = os.path.join(subcache_dir, 'cache')
exists = os.path.exists(self.cache_dir)
if len(flag) > 1 and flag[1] == 's':
self._sync = True
else:
self._sync = False
self._buffer = {}
if exists and 'n' in flag:
self.clear()
self.create()
elif not exists and ('c' in flag or 'n' in flag):
self.create()
elif not exists:
raise FileNotFoundError("no such directory: '{}'".format(
self.cache_dir))
self._flag = 'rb' if 'r' in flag else 'wb'
self._mode = mode
self._keyencoding = keyencoding
self._serialize = serialize
def _parse_appname(self, appname):
"""Splits an appname into the appname and subcache components."""
components = appname.split('.')
return components[0], components[1:]
def create(self):
"""Create the write buffer and cache directory."""
if not self._sync and not hasattr(self, '_buffer'):
self._buffer = {}
if not os.path.exists(self.cache_dir):
os.makedirs(self.cache_dir)
def clear(self):
"""Remove all items from the write buffer and cache.
The write buffer object and cache directory are not deleted.
"""
self.delete()
self.create()
def delete(self):
"""Delete the write buffer and cache directory."""
if not self._sync:
del self._buffer
shutil.rmtree(self.cache_dir)
def close(self):
"""Sync the write buffer, then close the cache.
If a closed :class:`FileCache` object's methods are called, a
:exc:`ValueError` will be raised.
"""
self.sync()
self.sync = self.create = self.delete = self._closed
self._write_to_file = self._read_to_file = self._closed
self._key_to_filename = self._filename_to_key = self._closed
self.__getitem__ = self.__setitem__ = self.__delitem__ = self._closed
self.__iter__ = self.__len__ = self.__contains__ = self._closed
def sync(self):
"""Sync the write buffer with the cache files and clear the buffer.
If the :class:`FileCache` object was opened with the optional ``'s'``
*flag* argument, then calling :meth:`sync` will do nothing.
"""
if self._sync:
return # opened in sync mode, so skip the manual sync
self._sync = True
for ekey in self._buffer:
filename = self._key_to_filename(ekey)
try:
self._write_to_file(filename, self._buffer[ekey])
except:
logger.error("Couldn't write content from %r to cache file: %r: %s", ekey, filename,
traceback.format_exc())
self._buffer.clear()
self._sync = False
def _closed(self, *args, **kwargs):
"""Filler method for closed cache methods."""
raise ValueError("invalid operation on closed cache")
def _encode_key(self, key):
"""Encode key using *hex_codec* for constructing a cache filename.
Keys are implicitly converted to :class:`bytes` if passed as
:class:`str`.
"""
if isinstance(key, str) or isinstance(key, unicode):
key = key.encode(self._keyencoding)
elif not isinstance(key, bytes):
raise TypeError("key must be bytes or str")
return codecs.encode(key, 'hex_codec').decode(self._keyencoding)
def _decode_key(self, key):
"""Decode key using hex_codec to retrieve the original key.
Keys are returned as :class:`str` if serialization is enabled.
Keys are returned as :class:`bytes` if serialization is disabled.
"""
bkey = codecs.decode(key.encode(self._keyencoding), 'hex_codec')
return bkey.decode(self._keyencoding) if self._serialize else bkey
def _dumps(self, value):
return value if not self._serialize else pickle.dumps(value)
def _loads(self, value):
return value if not self._serialize else pickle.loads(value)
def _key_to_filename(self, key):
"""Convert an encoded key to an absolute cache filename."""
return os.path.join(self.cache_dir, key)
def _filename_to_key(self, absfilename):
"""Convert an absolute cache filename to a key name."""
return os.path.split(absfilename)[1]
def _all_filenames(self):
"""Return a list of absolute cache filenames"""
try:
for entry in scandir(self.cache_dir):
if entry.is_file(follow_symlinks=False):
yield os.path.join(self.cache_dir, entry.name)
except (FileNotFoundError, OSError):
raise StopIteration
def _all_keys(self):
"""Return a list of all encoded key names."""
file_keys = [self._filename_to_key(fn) for fn in self._all_filenames()]
if self._sync:
return set(file_keys)
else:
return set(file_keys + list(self._buffer))
def _write_to_file(self, filename, bytesvalue):
"""Write bytesvalue to filename."""
fh, tmp = tempfile.mkstemp()
with os.fdopen(fh, self._flag) as f:
f.write(self._dumps(bytesvalue))
rename(tmp, filename)
os.chmod(filename, self._mode)
def _read_from_file(self, filename):
"""Read data from filename."""
try:
with open(filename, 'rb') as f:
return self._loads(f.read())
except (IOError, OSError):
logger.warning('Error opening file: {}'.format(filename))
return None
def __setitem__(self, key, value):
ekey = self._encode_key(key)
if not self._sync:
self._buffer[ekey] = value
else:
filename = self._key_to_filename(ekey)
self._write_to_file(filename, value)
def __getitem__(self, key):
ekey = self._encode_key(key)
if not self._sync:
try:
return self._buffer[ekey]
except KeyError:
pass
filename = self._key_to_filename(ekey)
if filename not in self._all_filenames():
raise KeyError(key)
return self._read_from_file(filename)
def __delitem__(self, key):
ekey = self._encode_key(key)
filename = self._key_to_filename(ekey)
if not self._sync:
try:
del self._buffer[ekey]
except KeyError:
if filename not in self._all_filenames():
raise KeyError(key)
try:
os.remove(filename)
except (IOError, OSError):
pass
def __iter__(self):
for key in self._all_keys():
yield self._decode_key(key)
def __len__(self):
return len(self._all_keys())
def __contains__(self, key):
ekey = self._encode_key(key)
return ekey in self._all_keys()
@@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
r"""
werkzeug.posixemulation
~~~~~~~~~~~~~~~~~~~~~~~
Provides a POSIX emulation for some features that are relevant to
web applications. The main purpose is to simplify support for
systems such as Windows NT that are not 100% POSIX compatible.
Currently this only implements a :func:`rename` function that
follows POSIX semantics. Eg: if the target file already exists it
will be replaced without asking.
This module was introduced in 0.6.1 and is not a public interface.
It might become one in later versions of Werkzeug.
:copyright: (c) 2013 by the Werkzeug Team, see AUTHORS for more details.
:license: BSD, see LICENSE for more details.
"""
import sys
import os
import errno
import time
import random
import shutil
can_rename_open_file = False
if os.name == 'nt': # pragma: no cover
_rename = lambda src, dst: False
_rename_atomic = lambda src, dst: False
if sys.version_info >= (3, 0):
unicode = str
try:
import ctypes
_MOVEFILE_REPLACE_EXISTING = 0x1
_MOVEFILE_WRITE_THROUGH = 0x8
_MoveFileEx = ctypes.windll.kernel32.MoveFileExW
def _rename(src, dst):
if not isinstance(src, unicode):
src = unicode(src, sys.getfilesystemencoding())
if not isinstance(dst, unicode):
dst = unicode(dst, sys.getfilesystemencoding())
if _rename_atomic(src, dst):
return True
retry = 0
rv = False
while not rv and retry < 100:
rv = _MoveFileEx(src, dst, _MOVEFILE_REPLACE_EXISTING |
_MOVEFILE_WRITE_THROUGH)
if not rv:
time.sleep(0.001)
retry += 1
return rv
# new in Vista and Windows Server 2008
_CreateTransaction = ctypes.windll.ktmw32.CreateTransaction
_CommitTransaction = ctypes.windll.ktmw32.CommitTransaction
_MoveFileTransacted = ctypes.windll.kernel32.MoveFileTransactedW
_CloseHandle = ctypes.windll.kernel32.CloseHandle
can_rename_open_file = True
def _rename_atomic(src, dst):
ta = _CreateTransaction(None, 0, 0, 0, 0, 1000, 'Werkzeug rename')
if ta == -1:
return False
try:
retry = 0
rv = False
while not rv and retry < 100:
rv = _MoveFileTransacted(src, dst, None, None,
_MOVEFILE_REPLACE_EXISTING |
_MOVEFILE_WRITE_THROUGH, ta)
if rv:
rv = _CommitTransaction(ta)
break
else:
time.sleep(0.001)
retry += 1
return rv
finally:
_CloseHandle(ta)
except Exception:
pass
def rename(src, dst):
# Try atomic or pseudo-atomic rename
if _rename(src, dst):
return
# Fall back to "move away and replace"
try:
os.rename(src, dst)
except OSError as e:
if e.errno != errno.EEXIST:
raise
old = "%s-%08x" % (dst, random.randint(0, sys.maxint))
os.rename(dst, old)
os.rename(src, dst)
try:
os.unlink(old)
except Exception:
pass
else:
"""
If dst on current filesystem then use
atomic rename. Otherwise, fall back to a
non-atomic copy and remove.
"""
rename = shutil.move
can_rename_open_file = True
+32 -33
View File
@@ -4,7 +4,6 @@ This gives other modules access to the gritty details about characters and the
encodings that use them.
"""
from __future__ import unicode_literals
import re
import zlib
import unicodedata
@@ -15,13 +14,13 @@ from ftfy.compatibility import unichr
# These are the encodings we will try to fix in ftfy, in the
# order that they should be tried.
CHARMAP_ENCODINGS = [
'latin-1',
'sloppy-windows-1252',
'sloppy-windows-1250',
'iso-8859-2',
'sloppy-windows-1251',
'macroman',
'cp437',
u'latin-1',
u'sloppy-windows-1252',
u'sloppy-windows-1250',
u'iso-8859-2',
u'sloppy-windows-1251',
u'macroman',
u'cp437',
]
@@ -29,25 +28,25 @@ def _build_regexes():
"""
ENCODING_REGEXES contain reasonably fast ways to detect if we
could represent a given string in a given encoding. The simplest one is
the 'ascii' detector, which of course just determines if all characters
the u'ascii' detector, which of course just determines if all characters
are between U+0000 and U+007F.
"""
# Define a regex that matches ASCII text.
encoding_regexes = {'ascii': re.compile('^[\x00-\x7f]*$')}
encoding_regexes = {u'ascii': re.compile('^[\x00-\x7f]*$')}
for encoding in CHARMAP_ENCODINGS:
# Make a sequence of characters that bytes \x80 to \xFF decode to
# in each encoding, as well as byte \x1A, which is used to represent
# the replacement character in the sloppy-* encodings.
latin1table = ''.join(unichr(i) for i in range(128, 256)) + '\x1a'
charlist = latin1table.encode('latin-1').decode(encoding)
latin1table = u''.join(unichr(i) for i in range(128, 256)) + '\x1a'
charlist = latin1table.encode(u'latin-1').decode(encoding)
# The rest of the ASCII bytes -- bytes \x00 to \x19 and \x1B
# to \x7F -- will decode as those ASCII characters in any encoding we
# support, so we can just include them as ranges. This also lets us
# not worry about escaping regex special characters, because all of
# them are in the \x1B to \x7F range.
regex = '^[\x00-\x19\x1b-\x7f{0}]*$'.format(charlist)
regex = u'^[\x00-\x19\x1b-\x7f{0}]*$'.format(charlist)
encoding_regexes[encoding] = re.compile(regex)
return encoding_regexes
ENCODING_REGEXES = _build_regexes()
@@ -57,10 +56,10 @@ def _build_utf8_punct_regex():
"""
Recognize UTF-8 mojibake that's so blatant that we can fix it even when the
rest of the string doesn't decode as UTF-8 -- namely, UTF-8 sequences for
the 'General Punctuation' characters U+2000 to U+2040, re-encoded in
the u'General Punctuation' characters U+2000 to U+2040, re-encoded in
Windows-1252.
These are recognizable by the distinctive 'â€' ('\xe2\x80') sequence they
These are recognizable by the distinctiveu'â€u' ('\xe2\x80') sequence they
all begin with when decoded as Windows-1252.
"""
# We're making a regex that has all the literal bytes from 0x80 to 0xbf in
@@ -72,10 +71,10 @@ def _build_utf8_punct_regex():
# prettier when we deprecate Python 2.
continuation_char_list = ''.join(
unichr(i) for i in range(0x80, 0xc0)
).encode('latin-1')
obvious_utf8 = ('â€['
+ continuation_char_list.decode('sloppy-windows-1252')
+ ']')
).encode(u'latin-1')
obvious_utf8 = (u'â€['
+ continuation_char_list.decode(u'sloppy-windows-1252')
+ u']')
return re.compile(obvious_utf8)
PARTIAL_UTF8_PUNCT_RE = _build_utf8_punct_regex()
@@ -126,8 +125,8 @@ LOSSY_UTF8_RE = re.compile(
)
# These regexes match various Unicode variations on single and double quotes.
SINGLE_QUOTE_RE = re.compile('[\u2018-\u201b]')
DOUBLE_QUOTE_RE = re.compile('[\u201c-\u201f]')
SINGLE_QUOTE_RE = re.compile(u'[\u2018-\u201b]')
DOUBLE_QUOTE_RE = re.compile(u'[\u201c-\u201f]')
def possible_encoding(text, encoding):
@@ -143,7 +142,7 @@ def possible_encoding(text, encoding):
CHAR_CLASS_STRING = zlib.decompress(
resource_string(__name__, 'char_classes.dat')
).decode('ascii')
).decode(u'ascii')
def chars_to_classes(string):
"""
@@ -185,15 +184,15 @@ CONTROL_CHARS = _build_control_char_mapping()
# Ligatures may also be separated by NFKC normalization, but that is sometimes
# more normalization than you want.
LIGATURES = {
ord('IJ'): 'IJ',
ord('ij'): 'ij',
ord(''): 'ff',
ord(''): 'fi',
ord(''): 'fl',
ord(''): 'ffi',
ord(''): 'ffl',
ord(''): 'ſt',
ord(''): 'st'
ord(u'IJ'): u'IJ',
ord(u'ij'): u'ij',
ord(u''): u'ff',
ord(u''): u'fi',
ord(u''): u'fl',
ord(u''): u'ffi',
ord(u''): u'ffl',
ord(u''): u'ſt',
ord(u''): u'st'
}
@@ -205,10 +204,10 @@ def _build_width_map():
# Though it's not listed as a fullwidth character, we'll want to convert
# U+3000 IDEOGRAPHIC SPACE to U+20 SPACE on the same principle, so start
# with that in the dictionary.
width_map = {0x3000: ' '}
width_map = {0x3000: u' '}
for i in range(0xff01, 0xfff0):
char = unichr(i)
alternate = unicodedata.normalize('NFKC', char)
alternate = unicodedata.normalize(u'NFKC', char)
if alternate != char:
width_map[i] = alternate
return width_map
@@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '3.0.0.dev0'
__version__ = '2.1.4'
@@ -133,7 +133,6 @@ class ValidateHasNeighbor(Rule):
Validate tag has-neighbor
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
@@ -159,7 +158,6 @@ class ValidateHasNeighborBefore(Rule):
Validate tag has-neighbor-before that previous match exists.
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
@@ -179,7 +177,6 @@ class ValidateHasNeighborAfter(Rule):
Validate tag has-neighbor-after that next match exists.
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
@@ -3895,6 +3895,7 @@
season: 7
episode: 22
episode_title: 2000 Light Years from Home
other: Classic
container: mkv
mimetype: video/x-matroska
type: episode
@@ -3962,15 +3963,3 @@
subtitle_language: fr
other: FullHD
type: episode
? Whose Line is it anyway/Season 01/Whose.Line.is.it.Anyway.US.S13E01.720p.WEB.x264-TBS.mkv
: title: Whose Line is it Anyway
season: 13
episode: 1
country: US
screen_size: 720p
format: WEB-DL
video_codec: h264
release_group: TBS
container: mkv
type: episode
@@ -1,163 +0,0 @@
Change Log
----------
0.999
~~~~~
Released on December 23, 2013
* Fix #127: add work-around for CPython issue #20007: .read(0) on
http.client.HTTPResponse drops the rest of the content.
* Fix #115: lxml treewalker can now deal with fragments containing, at
their root level, text nodes with non-ASCII characters on Python 2.
0.99
~~~~
Released on September 10, 2013
* No library changes from 1.0b3; released as 0.99 as pip has changed
behaviour from 1.4 to avoid installing pre-release versions per
PEP 440.
1.0b3
~~~~~
Released on July 24, 2013
* Removed ``RecursiveTreeWalker`` from ``treewalkers._base``. Any
implementation using it should be moved to
``NonRecursiveTreeWalker``, as everything bundled with html5lib has
for years.
* Fix #67 so that ``BufferedStream`` to correctly returns a bytes
object, thereby fixing any case where html5lib is passed a
non-seekable RawIOBase-like object.
1.0b2
~~~~~
Released on June 27, 2013
* Removed reordering of attributes within the serializer. There is now
an ``alphabetical_attributes`` option which preserves the previous
behaviour through a new filter. This allows attribute order to be
preserved through html5lib if the tree builder preserves order.
* Removed ``dom2sax`` from DOM treebuilders. It has been replaced by
``treeadapters.sax.to_sax`` which is generic and supports any
treewalker; it also resolves all known bugs with ``dom2sax``.
* Fix treewalker assertions on hitting bytes strings on
Python 2. Previous to 1.0b1, treewalkers coped with mixed
bytes/unicode data on Python 2; this reintroduces this prior
behaviour on Python 2. Behaviour is unchanged on Python 3.
1.0b1
~~~~~
Released on May 17, 2013
* Implementation updated to implement the `HTML specification
<http://www.whatwg.org/specs/web-apps/current-work/>`_ as of 5th May
2013 (`SVN <http://svn.whatwg.org/webapps/>`_ revision r7867).
* Python 3.2+ supported in a single codebase using the ``six`` library.
* Removed support for Python 2.5 and older.
* Removed the deprecated Beautiful Soup 3 treebuilder.
``beautifulsoup4`` can use ``html5lib`` as a parser instead. Note that
since it doesn't support namespaces, foreign content like SVG and
MathML is parsed incorrectly.
* Removed ``simpletree`` from the package. The default tree builder is
now ``etree`` (using the ``xml.etree.cElementTree`` implementation if
available, and ``xml.etree.ElementTree`` otherwise).
* Removed the ``XHTMLSerializer`` as it never actually guaranteed its
output was well-formed XML, and hence provided little of use.
* Removed default DOM treebuilder, so ``html5lib.treebuilders.dom`` is no
longer supported. ``html5lib.treebuilders.getTreeBuilder("dom")`` will
return the default DOM treebuilder, which uses ``xml.dom.minidom``.
* Optional heuristic character encoding detection now based on
``charade`` for Python 2.6 - 3.3 compatibility.
* Optional ``Genshi`` treewalker support fixed.
* Many bugfixes, including:
* #33: null in attribute value breaks XML AttValue;
* #4: nested, indirect descendant, <button> causes infinite loop;
* `Google Code 215
<http://code.google.com/p/html5lib/issues/detail?id=215>`_: Properly
detect seekable streams;
* `Google Code 206
<http://code.google.com/p/html5lib/issues/detail?id=206>`_: add
support for <video preload=...>, <audio preload=...>;
* `Google Code 205
<http://code.google.com/p/html5lib/issues/detail?id=205>`_: add
support for <video poster=...>;
* `Google Code 202
<http://code.google.com/p/html5lib/issues/detail?id=202>`_: Unicode
file breaks InputStream.
* Source code is now mostly PEP 8 compliant.
* Test harness has been improved and now depends on ``nose``.
* Documentation updated and moved to http://html5lib.readthedocs.org/.
0.95
~~~~
Released on February 11, 2012
0.90
~~~~
Released on January 17, 2010
0.11.1
~~~~~~
Released on June 12, 2008
0.11
~~~~
Released on June 10, 2008
0.10
~~~~
Released on October 7, 2007
0.9
~~~
Released on March 11, 2007
0.2
~~~
Released on January 8, 2007
@@ -1,157 +0,0 @@
html5lib
========
.. image:: https://travis-ci.org/html5lib/html5lib-python.png?branch=master
:target: https://travis-ci.org/html5lib/html5lib-python
html5lib is a pure-python library for parsing HTML. It is designed to
conform to the WHATWG HTML specification, as is implemented by all major
web browsers.
Usage
-----
Simple usage follows this pattern:
.. code-block:: python
import html5lib
with open("mydocument.html", "rb") as f:
document = html5lib.parse(f)
or:
.. code-block:: python
import html5lib
document = html5lib.parse("<p>Hello World!")
By default, the ``document`` will be an ``xml.etree`` element instance.
Whenever possible, html5lib chooses the accelerated ``ElementTree``
implementation (i.e. ``xml.etree.cElementTree`` on Python 2.x).
Two other tree types are supported: ``xml.dom.minidom`` and
``lxml.etree``. To use an alternative format, specify the name of
a treebuilder:
.. code-block:: python
import html5lib
with open("mydocument.html", "rb") as f:
lxml_etree_document = html5lib.parse(f, treebuilder="lxml")
When using with ``urllib2`` (Python 2), the charset from HTTP should be
pass into html5lib as follows:
.. code-block:: python
from contextlib import closing
from urllib2 import urlopen
import html5lib
with closing(urlopen("http://example.com/")) as f:
document = html5lib.parse(f, encoding=f.info().getparam("charset"))
When using with ``urllib.request`` (Python 3), the charset from HTTP
should be pass into html5lib as follows:
.. code-block:: python
from urllib.request import urlopen
import html5lib
with urlopen("http://example.com/") as f:
document = html5lib.parse(f, encoding=f.info().get_content_charset())
To have more control over the parser, create a parser object explicitly.
For instance, to make the parser raise exceptions on parse errors, use:
.. code-block:: python
import html5lib
with open("mydocument.html", "rb") as f:
parser = html5lib.HTMLParser(strict=True)
document = parser.parse(f)
When you're instantiating parser objects explicitly, pass a treebuilder
class as the ``tree`` keyword argument to use an alternative document
format:
.. code-block:: python
import html5lib
parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
minidom_document = parser.parse("<p>Hello World!")
More documentation is available at http://html5lib.readthedocs.org/.
Installation
------------
html5lib works on CPython 2.6+, CPython 3.2+ and PyPy. To install it,
use:
.. code-block:: bash
$ pip install html5lib
Optional Dependencies
---------------------
The following third-party libraries may be used for additional
functionality:
- ``datrie`` can be used to improve parsing performance (though in
almost all cases the improvement is marginal);
- ``lxml`` is supported as a tree format (for both building and
walking) under CPython (but *not* PyPy where it is known to cause
segfaults);
- ``genshi`` has a treewalker (but not builder); and
- ``charade`` can be used as a fallback when character encoding cannot
be determined; ``chardet``, from which it was forked, can also be used
on Python 2.
- ``ordereddict`` can be used under Python 2.6
(``collections.OrderedDict`` is used instead on later versions) to
serialize attributes in alphabetical order.
Bugs
----
Please report any bugs on the `issue tracker
<https://github.com/html5lib/html5lib-python/issues>`_.
Tests
-----
Unit tests require the ``nose`` library and can be run using the
``nosetests`` command in the root directory; ``ordereddict`` is
required under Python 2.6. All should pass.
Test data are contained in a separate `html5lib-tests
<https://github.com/html5lib/html5lib-tests>`_ repository and included
as a submodule, thus for git checkouts they must be initialized::
$ git submodule init
$ git submodule update
If you have all compatible Python implementations available on your
system, you can run tests on all of them using the ``tox`` utility,
which can be found on PyPI.
Questions?
----------
There's a mailing list available for support on Google Groups,
`html5lib-discuss <http://groups.google.com/group/html5lib-discuss>`_,
though you may get a quicker response asking on IRC in `#whatwg on
irc.freenode.net <http://wiki.whatwg.org/wiki/IRC>`_.
+20 -8
View File
@@ -1,14 +1,23 @@
"""
HTML parsing library based on the WHATWG "HTML5"
specification. The parser is designed to be compatible with existing
HTML found in the wild and implements well-defined error recovery that
HTML parsing library based on the `WHATWG HTML specification
<https://whatwg.org/html>`_. The parser is designed to be compatible with
existing HTML found in the wild and implements well-defined error recovery that
is largely compatible with modern desktop web browsers.
Example usage:
Example usage::
import html5lib
f = open("my_document.html")
tree = html5lib.parse(f)
import html5lib
with open("my_document.html", "rb") as f:
tree = html5lib.parse(f)
For convenience, this module re-exports the following names:
* :func:`~.html5parser.parse`
* :func:`~.html5parser.parseFragment`
* :class:`~.html5parser.HTMLParser`
* :func:`~.treebuilders.getTreeBuilder`
* :func:`~.treewalkers.getTreeWalker`
* :func:`~.serializer.serialize`
"""
from __future__ import absolute_import, division, unicode_literals
@@ -20,4 +29,7 @@ from .serializer import serialize
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
"getTreeWalker", "serialize"]
__version__ = "0.999"
# this has to be at the top level, see how setup.py parses this
#: Distribution version number.
__version__ = "1.0.1"
@@ -175,18 +175,18 @@ def escapeRegexp(string):
return string
# output from the above
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
# Simpler things
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
class InfosetFilter(object):
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
def __init__(self, replaceChars=None,
def __init__(self,
dropXmlnsLocalName=False,
dropXmlnsAttrNs=False,
preventDoubleDashComments=False,
@@ -217,7 +217,7 @@ class InfosetFilter(object):
else:
return self.toXmlName(name)
def coerceElement(self, name, namespace=None):
def coerceElement(self, name):
return self.toXmlName(name)
def coerceComment(self, data):
@@ -225,11 +225,14 @@ class InfosetFilter(object):
while "--" in data:
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
data = data.replace("--", "- -")
if data.endswith("-"):
warnings.warn("Comments cannot end in a dash", DataLossWarning)
data += " "
return data
def coerceCharacters(self, data):
if self.replaceFormFeedCharacters:
for i in range(data.count("\x0C")):
for _ in range(data.count("\x0C")):
warnings.warn("Text cannot contain U+000C", DataLossWarning)
data = data.replace("\x0C", " ")
# Other non-xml characters
@@ -1,13 +1,16 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
from six.moves import http_client
from six import text_type, binary_type
from six.moves import http_client, urllib
import codecs
import re
import webencodings
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
from .constants import encodings, ReparseException
from . import utils
from .constants import _ReparseException
from . import _utils
from io import StringIO
@@ -16,19 +19,26 @@ try:
except ImportError:
BytesIO = StringIO
try:
from io import BufferedIOBase
except ImportError:
class BufferedIOBase(object):
pass
# Non-unicode versions of constants for use in the pre-parser
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa
if _utils.supports_lone_surrogates:
# Use one extra step of indirection and create surrogates with
# eval. Not using this indirection would introduce an illegal
# unicode literal on platforms not supporting such lone
# surrogates.
assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used
"]")
else:
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
@@ -38,7 +48,7 @@ non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
0x10FFFE, 0x10FFFF])
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
# Cache for charsUntil()
charsUntilRegEx = {}
@@ -118,10 +128,13 @@ class BufferedStream(object):
return b"".join(rv)
def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
if isinstance(source, http_client.HTTPResponse):
# Work around Python bug #20007: read(0) closes the connection.
# http://bugs.python.org/issue20007
def HTMLInputStream(source, **kwargs):
# Work around Python bug #20007: read(0) closes the connection.
# http://bugs.python.org/issue20007
if (isinstance(source, http_client.HTTPResponse) or
# Also check for addinfourl wrapping HTTPResponse
(isinstance(source, urllib.response.addbase) and
isinstance(source.fp, http_client.HTTPResponse))):
isUnicode = False
elif hasattr(source, "read"):
isUnicode = isinstance(source.read(0), text_type)
@@ -129,12 +142,13 @@ def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
isUnicode = isinstance(source, text_type)
if isUnicode:
if encoding is not None:
raise TypeError("Cannot explicitly set an encoding with a unicode string")
encodings = [x for x in kwargs if x.endswith("_encoding")]
if encodings:
raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
return HTMLUnicodeInputStream(source)
return HTMLUnicodeInputStream(source, **kwargs)
else:
return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
return HTMLBinaryInputStream(source, **kwargs)
class HTMLUnicodeInputStream(object):
@@ -160,22 +174,21 @@ class HTMLUnicodeInputStream(object):
regardless of any BOM or later declaration (such as in a meta
element)
parseMeta - Look for a <meta> element containing encoding information
"""
# Craziness
if len("\U0010FFFF") == 1:
if not _utils.supports_lone_surrogates:
# Such platforms will have already checked for such
# surrogate errors, so no need to do this checking.
self.reportCharacterErrors = None
elif len("\U0010FFFF") == 1:
self.reportCharacterErrors = self.characterErrorsUCS4
self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
else:
self.reportCharacterErrors = self.characterErrorsUCS2
self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")
# List of where new lines occur
self.newLines = [0]
self.charEncoding = ("utf-8", "certain")
self.charEncoding = (lookupEncoding("utf-8"), "certain")
self.dataStream = self.openStream(source)
self.reset()
@@ -265,12 +278,10 @@ class HTMLUnicodeInputStream(object):
self._bufferedCharacter = data[-1]
data = data[:-1]
self.reportCharacterErrors(data)
if self.reportCharacterErrors:
self.reportCharacterErrors(data)
# Replace invalid characters
# Note U+0000 is dealt with in the tokenizer
data = self.replaceCharactersRegexp.sub("\ufffd", data)
data = data.replace("\r\n", "\n")
data = data.replace("\r", "\n")
@@ -280,7 +291,7 @@ class HTMLUnicodeInputStream(object):
return True
def characterErrorsUCS4(self, data):
for i in range(len(invalid_unicode_re.findall(data))):
for _ in range(len(invalid_unicode_re.findall(data))):
self.errors.append("invalid-codepoint")
def characterErrorsUCS2(self, data):
@@ -293,9 +304,9 @@ class HTMLUnicodeInputStream(object):
codepoint = ord(match.group())
pos = match.start()
# Pretty sure there should be endianness issues here
if utils.isSurrogatePair(data[pos:pos + 2]):
if _utils.isSurrogatePair(data[pos:pos + 2]):
# We have a surrogate pair!
char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
if char_val in non_bmp_invalid_codepoints:
self.errors.append("invalid-codepoint")
skip = True
@@ -378,7 +389,9 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
"""
def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
def __init__(self, source, override_encoding=None, transport_encoding=None,
same_origin_parent_encoding=None, likely_encoding=None,
default_encoding="windows-1252", useChardet=True):
"""Initialises the HTMLInputStream.
HTMLInputStream(source, [encoding]) -> Normalized stream from source
@@ -391,8 +404,6 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
regardless of any BOM or later declaration (such as in a meta
element)
parseMeta - Look for a <meta> element containing encoding information
"""
# Raw Stream - for unicode objects this will encode to utf-8 and set
# self.charEncoding as appropriate
@@ -400,27 +411,28 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
HTMLUnicodeInputStream.__init__(self, self.rawStream)
self.charEncoding = (codecName(encoding), "certain")
# Encoding Information
# Number of bytes to use when looking for a meta element with
# encoding information
self.numBytesMeta = 512
self.numBytesMeta = 1024
# Number of bytes to use when using detecting encoding using chardet
self.numBytesChardet = 100
# Encoding to use if no other information can be found
self.defaultEncoding = "windows-1252"
# Things from args
self.override_encoding = override_encoding
self.transport_encoding = transport_encoding
self.same_origin_parent_encoding = same_origin_parent_encoding
self.likely_encoding = likely_encoding
self.default_encoding = default_encoding
# Detect encoding iff no explicit "transport level" encoding is supplied
if (self.charEncoding[0] is None):
self.charEncoding = self.detectEncoding(parseMeta, chardet)
# Determine encoding
self.charEncoding = self.determineEncoding(useChardet)
assert self.charEncoding[0] is not None
# Call superclass
self.reset()
def reset(self):
self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
'replace')
self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
HTMLUnicodeInputStream.reset(self)
def openStream(self, source):
@@ -437,29 +449,50 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
try:
stream.seek(stream.tell())
except:
except: # pylint:disable=bare-except
stream = BufferedStream(stream)
return stream
def detectEncoding(self, parseMeta=True, chardet=True):
# First look for a BOM
def determineEncoding(self, chardet=True):
# BOMs take precedence over everything
# This will also read past the BOM if present
encoding = self.detectBOM()
confidence = "certain"
# If there is no BOM need to look for meta elements with encoding
# information
if encoding is None and parseMeta:
encoding = self.detectEncodingMeta()
confidence = "tentative"
# Guess with chardet, if avaliable
if encoding is None and chardet:
confidence = "tentative"
charEncoding = self.detectBOM(), "certain"
if charEncoding[0] is not None:
return charEncoding
# If we've been overriden, we've been overriden
charEncoding = lookupEncoding(self.override_encoding), "certain"
if charEncoding[0] is not None:
return charEncoding
# Now check the transport layer
charEncoding = lookupEncoding(self.transport_encoding), "certain"
if charEncoding[0] is not None:
return charEncoding
# Look for meta elements with encoding information
charEncoding = self.detectEncodingMeta(), "tentative"
if charEncoding[0] is not None:
return charEncoding
# Parent document encoding
charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
return charEncoding
# "likely" encoding
charEncoding = lookupEncoding(self.likely_encoding), "tentative"
if charEncoding[0] is not None:
return charEncoding
# Guess with chardet, if available
if chardet:
try:
try:
from charade.universaldetector import UniversalDetector
except ImportError:
from chardet.universaldetector import UniversalDetector
from chardet.universaldetector import UniversalDetector
except ImportError:
pass
else:
buffers = []
detector = UniversalDetector()
while not detector.done:
@@ -470,37 +503,34 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
buffers.append(buffer)
detector.feed(buffer)
detector.close()
encoding = detector.result['encoding']
encoding = lookupEncoding(detector.result['encoding'])
self.rawStream.seek(0)
except ImportError:
pass
# If all else fails use the default encoding
if encoding is None:
confidence = "tentative"
encoding = self.defaultEncoding
if encoding is not None:
return encoding, "tentative"
# Substitute for equivalent encodings:
encodingSub = {"iso-8859-1": "windows-1252"}
# Try the default encoding
charEncoding = lookupEncoding(self.default_encoding), "tentative"
if charEncoding[0] is not None:
return charEncoding
if encoding.lower() in encodingSub:
encoding = encodingSub[encoding.lower()]
return encoding, confidence
# Fallback to html5lib's default if even that hasn't worked
return lookupEncoding("windows-1252"), "tentative"
def changeEncoding(self, newEncoding):
assert self.charEncoding[1] != "certain"
newEncoding = codecName(newEncoding)
if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
newEncoding = "utf-8"
newEncoding = lookupEncoding(newEncoding)
if newEncoding is None:
return
if newEncoding.name in ("utf-16be", "utf-16le"):
newEncoding = lookupEncoding("utf-8")
assert newEncoding is not None
elif newEncoding == self.charEncoding[0]:
self.charEncoding = (self.charEncoding[0], "certain")
else:
self.rawStream.seek(0)
self.reset()
self.charEncoding = (newEncoding, "certain")
raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
self.reset()
raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
def detectBOM(self):
"""Attempts to detect at BOM at the start of the stream. If
@@ -508,8 +538,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
encoding otherwise return None"""
bomDict = {
codecs.BOM_UTF8: 'utf-8',
codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
}
# Go to beginning of file and read in 4 bytes
@@ -529,9 +559,12 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
# Set the read position past the BOM if one was found, otherwise
# set it to the start of the stream
self.rawStream.seek(encoding and seek or 0)
return encoding
if encoding:
self.rawStream.seek(seek)
return lookupEncoding(encoding)
else:
self.rawStream.seek(0)
return None
def detectEncodingMeta(self):
"""Report the encoding declared by the meta element
@@ -542,8 +575,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
self.rawStream.seek(0)
encoding = parser.getEncoding()
if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
encoding = "utf-8"
if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
encoding = lookupEncoding("utf-8")
return encoding
@@ -557,6 +590,7 @@ class EncodingBytes(bytes):
return bytes.__new__(self, value.lower())
def __init__(self, value):
# pylint:disable=unused-argument
self._position = -1
def __iter__(self):
@@ -667,7 +701,7 @@ class EncodingParser(object):
(b"<!", self.handleOther),
(b"<?", self.handleOther),
(b"<", self.handlePossibleStartTag))
for byte in self.data:
for _ in self.data:
keepParsing = True
for key, method in methodDispatch:
if self.data.matchBytes(key):
@@ -706,7 +740,7 @@ class EncodingParser(object):
return False
elif attr[0] == b"charset":
tentativeEncoding = attr[1]
codec = codecName(tentativeEncoding)
codec = lookupEncoding(tentativeEncoding)
if codec is not None:
self.encoding = codec
return False
@@ -714,7 +748,7 @@ class EncodingParser(object):
contentParser = ContentAttrParser(EncodingBytes(attr[1]))
tentativeEncoding = contentParser.parse()
if tentativeEncoding is not None:
codec = codecName(tentativeEncoding)
codec = lookupEncoding(tentativeEncoding)
if codec is not None:
if hasPragma:
self.encoding = codec
@@ -871,16 +905,19 @@ class ContentAttrParser(object):
return None
def codecName(encoding):
def lookupEncoding(encoding):
"""Return the python codec name corresponding to an encoding or None if the
string doesn't correspond to a valid encoding."""
if isinstance(encoding, bytes):
if isinstance(encoding, binary_type):
try:
encoding = encoding.decode("ascii")
except UnicodeDecodeError:
return None
if encoding:
canonicalName = ascii_punctuation_re.sub("", encoding).lower()
return encodings.get(canonicalName, None)
if encoding is not None:
try:
return webencodings.lookup(encoding)
except AttributeError:
return None
else:
return None
@@ -1,9 +1,6 @@
from __future__ import absolute_import, division, unicode_literals
try:
chr = unichr # flake8: noqa
except NameError:
pass
from six import unichr as chr
from collections import deque
@@ -14,9 +11,9 @@ from .constants import digits, hexDigits, EOF
from .constants import tokenTypes, tagTokenTypes
from .constants import replacementCharacters
from .inputstream import HTMLInputStream
from ._inputstream import HTMLInputStream
from .trie import Trie
from ._trie import Trie
entitiesTrie = Trie(entities)
@@ -34,16 +31,11 @@ class HTMLTokenizer(object):
Points to HTMLInputStream object.
"""
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
lowercaseElementName=True, lowercaseAttrName=True, parser=None):
def __init__(self, stream, parser=None, **kwargs):
self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
self.stream = HTMLInputStream(stream, **kwargs)
self.parser = parser
# Perform case conversions?
self.lowercaseElementName = lowercaseElementName
self.lowercaseAttrName = lowercaseAttrName
# Setup the initial tokenizer state
self.escapeFlag = False
self.lastFourChars = []
@@ -147,8 +139,8 @@ class HTMLTokenizer(object):
output = "&"
charStack = [self.stream.char()]
if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&")
or (allowedChar is not None and allowedChar == charStack[0])):
if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or
(allowedChar is not None and allowedChar == charStack[0])):
self.stream.unget(charStack[0])
elif charStack[0] == "#":
@@ -235,8 +227,7 @@ class HTMLTokenizer(object):
token = self.currentToken
# Add token to the queue to be yielded
if (token["type"] in tagTokenTypes):
if self.lowercaseElementName:
token["name"] = token["name"].translate(asciiUpper2Lower)
token["name"] = token["name"].translate(asciiUpper2Lower)
if token["type"] == tokenTypes["EndTag"]:
if token["data"]:
self.tokenQueue.append({"type": tokenTypes["ParseError"],
@@ -921,10 +912,9 @@ class HTMLTokenizer(object):
# Attributes are not dropped at this stage. That happens when the
# start tag token is emitted so values can still be safely appended
# to attributes, but we do want to report the parse error in time.
if self.lowercaseAttrName:
self.currentToken["data"][-1][0] = (
self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
for name, value in self.currentToken["data"][:-1]:
self.currentToken["data"][-1][0] = (
self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
for name, _ in self.currentToken["data"][:-1]:
if self.currentToken["data"][-1][0] == name:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"duplicate-attribute"})
@@ -1716,11 +1706,11 @@ class HTMLTokenizer(object):
else:
data.append(char)
data = "".join(data)
data = "".join(data) # pylint:disable=redefined-variable-type
# Deal with null here rather than in the parser
nullCount = data.count("\u0000")
if nullCount > 0:
for i in range(nullCount):
for _ in range(nullCount):
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
data = data.replace("\u0000", "\uFFFD")
@@ -4,9 +4,11 @@ from .py import Trie as PyTrie
Trie = PyTrie
# pylint:disable=wrong-import-position
try:
from .datrie import Trie as DATrie
except ImportError:
pass
else:
Trie = DATrie
# pylint:enable=wrong-import-position
@@ -7,13 +7,13 @@ class Trie(Mapping):
"""Abstract base class for tries"""
def keys(self, prefix=None):
keys = super().keys()
# pylint:disable=arguments-differ
keys = super(Trie, self).keys()
if prefix is None:
return set(keys)
# Python 2.6: no set comprehensions
return set([x for x in keys if x.startswith(prefix)])
return {x for x in keys if x.startswith(prefix)}
def has_keys_with_prefix(self, prefix):
for key in self.keys():
@@ -2,6 +2,8 @@ from __future__ import absolute_import, division, unicode_literals
from types import ModuleType
from six import text_type
try:
import xml.etree.cElementTree as default_etree
except ImportError:
@@ -9,7 +11,26 @@ except ImportError:
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
"surrogatePairToCodepoint", "moduleFactoryFactory"]
"surrogatePairToCodepoint", "moduleFactoryFactory",
"supports_lone_surrogates"]
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
# caught by the below test. In general this would be any platform
# using UTF-16 as its encoding of unicode strings, such as
# Jython. This is because UTF-16 itself is based on the use of such
# surrogates, and there is no mechanism to further escape such
# escapes.
try:
_x = eval('"\\uD800"') # pylint:disable=eval-used
if not isinstance(_x, text_type):
# We need this with u"" because of http://bugs.jython.org/issue2039
_x = eval('u"\\uD800"') # pylint:disable=eval-used
assert isinstance(_x, text_type)
except: # pylint:disable=bare-except
supports_lone_surrogates = False
else:
supports_lone_surrogates = True
class MethodDispatcher(dict):
@@ -31,19 +52,20 @@ class MethodDispatcher(dict):
# anything here.
_dictEntries = []
for name, value in items:
if type(name) in (list, tuple, frozenset, set):
if isinstance(name, (list, tuple, frozenset, set)):
for item in name:
_dictEntries.append((item, value))
else:
_dictEntries.append((name, value))
dict.__init__(self, _dictEntries)
assert len(self) == len(_dictEntries)
self.default = None
def __getitem__(self, key):
return dict.get(self, key, self.default)
# Some utility functions to dal with weirdness around UCS2 vs UCS4
# Some utility functions to deal with weirdness around UCS2 vs UCS4
# python builds
def isSurrogatePair(data):
@@ -70,13 +92,33 @@ def moduleFactoryFactory(factory):
else:
name = b"_%s_factory" % baseModule.__name__
if name in moduleCache:
return moduleCache[name]
else:
kwargs_tuple = tuple(kwargs.items())
try:
return moduleCache[name][args][kwargs_tuple]
except KeyError:
mod = ModuleType(name)
objs = factory(baseModule, *args, **kwargs)
mod.__dict__.update(objs)
moduleCache[name] = mod
if "name" not in moduleCache:
moduleCache[name] = {}
if "args" not in moduleCache[name]:
moduleCache[name][args] = {}
if "kwargs" not in moduleCache[name][args]:
moduleCache[name][args][kwargs_tuple] = {}
moduleCache[name][args][kwargs_tuple] = mod
return mod
return moduleFactory
def memoize(func):
cache = {}
def wrapped(*args, **kwargs):
key = (tuple(args), tuple(kwargs.items()))
if key not in cache:
cache[key] = func(*args, **kwargs)
return cache[key]
return wrapped
+270 -427
View File
@@ -1,292 +1,296 @@
from __future__ import absolute_import, division, unicode_literals
import string
import gettext
_ = gettext.gettext
EOF = None
E = {
"null-character":
_("Null character in input stream, replaced with U+FFFD."),
"Null character in input stream, replaced with U+FFFD.",
"invalid-codepoint":
_("Invalid codepoint in stream."),
"Invalid codepoint in stream.",
"incorrectly-placed-solidus":
_("Solidus (/) incorrectly placed in tag."),
"Solidus (/) incorrectly placed in tag.",
"incorrect-cr-newline-entity":
_("Incorrect CR newline entity, replaced with LF."),
"Incorrect CR newline entity, replaced with LF.",
"illegal-windows-1252-entity":
_("Entity used with illegal number (windows-1252 reference)."),
"Entity used with illegal number (windows-1252 reference).",
"cant-convert-numeric-entity":
_("Numeric entity couldn't be converted to character "
"(codepoint U+%(charAsInt)08x)."),
"Numeric entity couldn't be converted to character "
"(codepoint U+%(charAsInt)08x).",
"illegal-codepoint-for-numeric-entity":
_("Numeric entity represents an illegal codepoint: "
"U+%(charAsInt)08x."),
"Numeric entity represents an illegal codepoint: "
"U+%(charAsInt)08x.",
"numeric-entity-without-semicolon":
_("Numeric entity didn't end with ';'."),
"Numeric entity didn't end with ';'.",
"expected-numeric-entity-but-got-eof":
_("Numeric entity expected. Got end of file instead."),
"Numeric entity expected. Got end of file instead.",
"expected-numeric-entity":
_("Numeric entity expected but none found."),
"Numeric entity expected but none found.",
"named-entity-without-semicolon":
_("Named entity didn't end with ';'."),
"Named entity didn't end with ';'.",
"expected-named-entity":
_("Named entity expected. Got none."),
"Named entity expected. Got none.",
"attributes-in-end-tag":
_("End tag contains unexpected attributes."),
"End tag contains unexpected attributes.",
'self-closing-flag-on-end-tag':
_("End tag contains unexpected self-closing flag."),
"End tag contains unexpected self-closing flag.",
"expected-tag-name-but-got-right-bracket":
_("Expected tag name. Got '>' instead."),
"Expected tag name. Got '>' instead.",
"expected-tag-name-but-got-question-mark":
_("Expected tag name. Got '?' instead. (HTML doesn't "
"support processing instructions.)"),
"Expected tag name. Got '?' instead. (HTML doesn't "
"support processing instructions.)",
"expected-tag-name":
_("Expected tag name. Got something else instead"),
"Expected tag name. Got something else instead",
"expected-closing-tag-but-got-right-bracket":
_("Expected closing tag. Got '>' instead. Ignoring '</>'."),
"Expected closing tag. Got '>' instead. Ignoring '</>'.",
"expected-closing-tag-but-got-eof":
_("Expected closing tag. Unexpected end of file."),
"Expected closing tag. Unexpected end of file.",
"expected-closing-tag-but-got-char":
_("Expected closing tag. Unexpected character '%(data)s' found."),
"Expected closing tag. Unexpected character '%(data)s' found.",
"eof-in-tag-name":
_("Unexpected end of file in the tag name."),
"Unexpected end of file in the tag name.",
"expected-attribute-name-but-got-eof":
_("Unexpected end of file. Expected attribute name instead."),
"Unexpected end of file. Expected attribute name instead.",
"eof-in-attribute-name":
_("Unexpected end of file in attribute name."),
"Unexpected end of file in attribute name.",
"invalid-character-in-attribute-name":
_("Invalid character in attribute name"),
"Invalid character in attribute name",
"duplicate-attribute":
_("Dropped duplicate attribute on tag."),
"Dropped duplicate attribute on tag.",
"expected-end-of-tag-name-but-got-eof":
_("Unexpected end of file. Expected = or end of tag."),
"Unexpected end of file. Expected = or end of tag.",
"expected-attribute-value-but-got-eof":
_("Unexpected end of file. Expected attribute value."),
"Unexpected end of file. Expected attribute value.",
"expected-attribute-value-but-got-right-bracket":
_("Expected attribute value. Got '>' instead."),
"Expected attribute value. Got '>' instead.",
'equals-in-unquoted-attribute-value':
_("Unexpected = in unquoted attribute"),
"Unexpected = in unquoted attribute",
'unexpected-character-in-unquoted-attribute-value':
_("Unexpected character in unquoted attribute"),
"Unexpected character in unquoted attribute",
"invalid-character-after-attribute-name":
_("Unexpected character after attribute name."),
"Unexpected character after attribute name.",
"unexpected-character-after-attribute-value":
_("Unexpected character after attribute value."),
"Unexpected character after attribute value.",
"eof-in-attribute-value-double-quote":
_("Unexpected end of file in attribute value (\")."),
"Unexpected end of file in attribute value (\").",
"eof-in-attribute-value-single-quote":
_("Unexpected end of file in attribute value (')."),
"Unexpected end of file in attribute value (').",
"eof-in-attribute-value-no-quotes":
_("Unexpected end of file in attribute value."),
"Unexpected end of file in attribute value.",
"unexpected-EOF-after-solidus-in-tag":
_("Unexpected end of file in tag. Expected >"),
"Unexpected end of file in tag. Expected >",
"unexpected-character-after-solidus-in-tag":
_("Unexpected character after / in tag. Expected >"),
"Unexpected character after / in tag. Expected >",
"expected-dashes-or-doctype":
_("Expected '--' or 'DOCTYPE'. Not found."),
"Expected '--' or 'DOCTYPE'. Not found.",
"unexpected-bang-after-double-dash-in-comment":
_("Unexpected ! after -- in comment"),
"Unexpected ! after -- in comment",
"unexpected-space-after-double-dash-in-comment":
_("Unexpected space after -- in comment"),
"Unexpected space after -- in comment",
"incorrect-comment":
_("Incorrect comment."),
"Incorrect comment.",
"eof-in-comment":
_("Unexpected end of file in comment."),
"Unexpected end of file in comment.",
"eof-in-comment-end-dash":
_("Unexpected end of file in comment (-)"),
"Unexpected end of file in comment (-)",
"unexpected-dash-after-double-dash-in-comment":
_("Unexpected '-' after '--' found in comment."),
"Unexpected '-' after '--' found in comment.",
"eof-in-comment-double-dash":
_("Unexpected end of file in comment (--)."),
"Unexpected end of file in comment (--).",
"eof-in-comment-end-space-state":
_("Unexpected end of file in comment."),
"Unexpected end of file in comment.",
"eof-in-comment-end-bang-state":
_("Unexpected end of file in comment."),
"Unexpected end of file in comment.",
"unexpected-char-in-comment":
_("Unexpected character in comment found."),
"Unexpected character in comment found.",
"need-space-after-doctype":
_("No space after literal string 'DOCTYPE'."),
"No space after literal string 'DOCTYPE'.",
"expected-doctype-name-but-got-right-bracket":
_("Unexpected > character. Expected DOCTYPE name."),
"Unexpected > character. Expected DOCTYPE name.",
"expected-doctype-name-but-got-eof":
_("Unexpected end of file. Expected DOCTYPE name."),
"Unexpected end of file. Expected DOCTYPE name.",
"eof-in-doctype-name":
_("Unexpected end of file in DOCTYPE name."),
"Unexpected end of file in DOCTYPE name.",
"eof-in-doctype":
_("Unexpected end of file in DOCTYPE."),
"Unexpected end of file in DOCTYPE.",
"expected-space-or-right-bracket-in-doctype":
_("Expected space or '>'. Got '%(data)s'"),
"Expected space or '>'. Got '%(data)s'",
"unexpected-end-of-doctype":
_("Unexpected end of DOCTYPE."),
"Unexpected end of DOCTYPE.",
"unexpected-char-in-doctype":
_("Unexpected character in DOCTYPE."),
"Unexpected character in DOCTYPE.",
"eof-in-innerhtml":
_("XXX innerHTML EOF"),
"XXX innerHTML EOF",
"unexpected-doctype":
_("Unexpected DOCTYPE. Ignored."),
"Unexpected DOCTYPE. Ignored.",
"non-html-root":
_("html needs to be the first start tag."),
"html needs to be the first start tag.",
"expected-doctype-but-got-eof":
_("Unexpected End of file. Expected DOCTYPE."),
"Unexpected End of file. Expected DOCTYPE.",
"unknown-doctype":
_("Erroneous DOCTYPE."),
"Erroneous DOCTYPE.",
"expected-doctype-but-got-chars":
_("Unexpected non-space characters. Expected DOCTYPE."),
"Unexpected non-space characters. Expected DOCTYPE.",
"expected-doctype-but-got-start-tag":
_("Unexpected start tag (%(name)s). Expected DOCTYPE."),
"Unexpected start tag (%(name)s). Expected DOCTYPE.",
"expected-doctype-but-got-end-tag":
_("Unexpected end tag (%(name)s). Expected DOCTYPE."),
"Unexpected end tag (%(name)s). Expected DOCTYPE.",
"end-tag-after-implied-root":
_("Unexpected end tag (%(name)s) after the (implied) root element."),
"Unexpected end tag (%(name)s) after the (implied) root element.",
"expected-named-closing-tag-but-got-eof":
_("Unexpected end of file. Expected end tag (%(name)s)."),
"Unexpected end of file. Expected end tag (%(name)s).",
"two-heads-are-not-better-than-one":
_("Unexpected start tag head in existing head. Ignored."),
"Unexpected start tag head in existing head. Ignored.",
"unexpected-end-tag":
_("Unexpected end tag (%(name)s). Ignored."),
"Unexpected end tag (%(name)s). Ignored.",
"unexpected-start-tag-out-of-my-head":
_("Unexpected start tag (%(name)s) that can be in head. Moved."),
"Unexpected start tag (%(name)s) that can be in head. Moved.",
"unexpected-start-tag":
_("Unexpected start tag (%(name)s)."),
"Unexpected start tag (%(name)s).",
"missing-end-tag":
_("Missing end tag (%(name)s)."),
"Missing end tag (%(name)s).",
"missing-end-tags":
_("Missing end tags (%(name)s)."),
"Missing end tags (%(name)s).",
"unexpected-start-tag-implies-end-tag":
_("Unexpected start tag (%(startName)s) "
"implies end tag (%(endName)s)."),
"Unexpected start tag (%(startName)s) "
"implies end tag (%(endName)s).",
"unexpected-start-tag-treated-as":
_("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
"Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
"deprecated-tag":
_("Unexpected start tag %(name)s. Don't use it!"),
"Unexpected start tag %(name)s. Don't use it!",
"unexpected-start-tag-ignored":
_("Unexpected start tag %(name)s. Ignored."),
"Unexpected start tag %(name)s. Ignored.",
"expected-one-end-tag-but-got-another":
_("Unexpected end tag (%(gotName)s). "
"Missing end tag (%(expectedName)s)."),
"Unexpected end tag (%(gotName)s). "
"Missing end tag (%(expectedName)s).",
"end-tag-too-early":
_("End tag (%(name)s) seen too early. Expected other end tag."),
"End tag (%(name)s) seen too early. Expected other end tag.",
"end-tag-too-early-named":
_("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
"Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
"end-tag-too-early-ignored":
_("End tag (%(name)s) seen too early. Ignored."),
"End tag (%(name)s) seen too early. Ignored.",
"adoption-agency-1.1":
_("End tag (%(name)s) violates step 1, "
"paragraph 1 of the adoption agency algorithm."),
"End tag (%(name)s) violates step 1, "
"paragraph 1 of the adoption agency algorithm.",
"adoption-agency-1.2":
_("End tag (%(name)s) violates step 1, "
"paragraph 2 of the adoption agency algorithm."),
"End tag (%(name)s) violates step 1, "
"paragraph 2 of the adoption agency algorithm.",
"adoption-agency-1.3":
_("End tag (%(name)s) violates step 1, "
"paragraph 3 of the adoption agency algorithm."),
"End tag (%(name)s) violates step 1, "
"paragraph 3 of the adoption agency algorithm.",
"adoption-agency-4.4":
_("End tag (%(name)s) violates step 4, "
"paragraph 4 of the adoption agency algorithm."),
"End tag (%(name)s) violates step 4, "
"paragraph 4 of the adoption agency algorithm.",
"unexpected-end-tag-treated-as":
_("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
"Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
"no-end-tag":
_("This element (%(name)s) has no end tag."),
"This element (%(name)s) has no end tag.",
"unexpected-implied-end-tag-in-table":
_("Unexpected implied end tag (%(name)s) in the table phase."),
"Unexpected implied end tag (%(name)s) in the table phase.",
"unexpected-implied-end-tag-in-table-body":
_("Unexpected implied end tag (%(name)s) in the table body phase."),
"Unexpected implied end tag (%(name)s) in the table body phase.",
"unexpected-char-implies-table-voodoo":
_("Unexpected non-space characters in "
"table context caused voodoo mode."),
"Unexpected non-space characters in "
"table context caused voodoo mode.",
"unexpected-hidden-input-in-table":
_("Unexpected input with type hidden in table context."),
"Unexpected input with type hidden in table context.",
"unexpected-form-in-table":
_("Unexpected form in table context."),
"Unexpected form in table context.",
"unexpected-start-tag-implies-table-voodoo":
_("Unexpected start tag (%(name)s) in "
"table context caused voodoo mode."),
"Unexpected start tag (%(name)s) in "
"table context caused voodoo mode.",
"unexpected-end-tag-implies-table-voodoo":
_("Unexpected end tag (%(name)s) in "
"table context caused voodoo mode."),
"Unexpected end tag (%(name)s) in "
"table context caused voodoo mode.",
"unexpected-cell-in-table-body":
_("Unexpected table cell start tag (%(name)s) "
"in the table body phase."),
"Unexpected table cell start tag (%(name)s) "
"in the table body phase.",
"unexpected-cell-end-tag":
_("Got table cell end tag (%(name)s) "
"while required end tags are missing."),
"Got table cell end tag (%(name)s) "
"while required end tags are missing.",
"unexpected-end-tag-in-table-body":
_("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
"Unexpected end tag (%(name)s) in the table body phase. Ignored.",
"unexpected-implied-end-tag-in-table-row":
_("Unexpected implied end tag (%(name)s) in the table row phase."),
"Unexpected implied end tag (%(name)s) in the table row phase.",
"unexpected-end-tag-in-table-row":
_("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
"Unexpected end tag (%(name)s) in the table row phase. Ignored.",
"unexpected-select-in-select":
_("Unexpected select start tag in the select phase "
"treated as select end tag."),
"Unexpected select start tag in the select phase "
"treated as select end tag.",
"unexpected-input-in-select":
_("Unexpected input start tag in the select phase."),
"Unexpected input start tag in the select phase.",
"unexpected-start-tag-in-select":
_("Unexpected start tag token (%(name)s in the select phase. "
"Ignored."),
"Unexpected start tag token (%(name)s in the select phase. "
"Ignored.",
"unexpected-end-tag-in-select":
_("Unexpected end tag (%(name)s) in the select phase. Ignored."),
"Unexpected end tag (%(name)s) in the select phase. Ignored.",
"unexpected-table-element-start-tag-in-select-in-table":
_("Unexpected table element start tag (%(name)s) in the select in table phase."),
"Unexpected table element start tag (%(name)s) in the select in table phase.",
"unexpected-table-element-end-tag-in-select-in-table":
_("Unexpected table element end tag (%(name)s) in the select in table phase."),
"Unexpected table element end tag (%(name)s) in the select in table phase.",
"unexpected-char-after-body":
_("Unexpected non-space characters in the after body phase."),
"Unexpected non-space characters in the after body phase.",
"unexpected-start-tag-after-body":
_("Unexpected start tag token (%(name)s)"
" in the after body phase."),
"Unexpected start tag token (%(name)s)"
" in the after body phase.",
"unexpected-end-tag-after-body":
_("Unexpected end tag token (%(name)s)"
" in the after body phase."),
"Unexpected end tag token (%(name)s)"
" in the after body phase.",
"unexpected-char-in-frameset":
_("Unexpected characters in the frameset phase. Characters ignored."),
"Unexpected characters in the frameset phase. Characters ignored.",
"unexpected-start-tag-in-frameset":
_("Unexpected start tag token (%(name)s)"
" in the frameset phase. Ignored."),
"Unexpected start tag token (%(name)s)"
" in the frameset phase. Ignored.",
"unexpected-frameset-in-frameset-innerhtml":
_("Unexpected end tag token (frameset) "
"in the frameset phase (innerHTML)."),
"Unexpected end tag token (frameset) "
"in the frameset phase (innerHTML).",
"unexpected-end-tag-in-frameset":
_("Unexpected end tag token (%(name)s)"
" in the frameset phase. Ignored."),
"Unexpected end tag token (%(name)s)"
" in the frameset phase. Ignored.",
"unexpected-char-after-frameset":
_("Unexpected non-space characters in the "
"after frameset phase. Ignored."),
"Unexpected non-space characters in the "
"after frameset phase. Ignored.",
"unexpected-start-tag-after-frameset":
_("Unexpected start tag (%(name)s)"
" in the after frameset phase. Ignored."),
"Unexpected start tag (%(name)s)"
" in the after frameset phase. Ignored.",
"unexpected-end-tag-after-frameset":
_("Unexpected end tag (%(name)s)"
" in the after frameset phase. Ignored."),
"Unexpected end tag (%(name)s)"
" in the after frameset phase. Ignored.",
"unexpected-end-tag-after-body-innerhtml":
_("Unexpected end tag after body(innerHtml)"),
"Unexpected end tag after body(innerHtml)",
"expected-eof-but-got-char":
_("Unexpected non-space characters. Expected end of file."),
"Unexpected non-space characters. Expected end of file.",
"expected-eof-but-got-start-tag":
_("Unexpected start tag (%(name)s)"
". Expected end of file."),
"Unexpected start tag (%(name)s)"
". Expected end of file.",
"expected-eof-but-got-end-tag":
_("Unexpected end tag (%(name)s)"
". Expected end of file."),
"Unexpected end tag (%(name)s)"
". Expected end of file.",
"eof-in-table":
_("Unexpected end of file. Expected table content."),
"Unexpected end of file. Expected table content.",
"eof-in-select":
_("Unexpected end of file. Expected select content."),
"Unexpected end of file. Expected select content.",
"eof-in-frameset":
_("Unexpected end of file. Expected frameset content."),
"Unexpected end of file. Expected frameset content.",
"eof-in-script-in-script":
_("Unexpected end of file. Expected script content."),
"Unexpected end of file. Expected script content.",
"eof-in-foreign-lands":
_("Unexpected end of file. Expected foreign content"),
"Unexpected end of file. Expected foreign content",
"non-void-element-with-trailing-solidus":
_("Trailing solidus not allowed on element %(name)s"),
"Trailing solidus not allowed on element %(name)s",
"unexpected-html-element-in-foreign-content":
_("Element %(name)s not allowed in a non-html context"),
"Element %(name)s not allowed in a non-html context",
"unexpected-end-tag-before-html":
_("Unexpected end tag (%(name)s) before html."),
"Unexpected end tag (%(name)s) before html.",
"unexpected-inhead-noscript-tag":
"Element %(name)s not allowed in a inhead-noscript context",
"eof-in-head-noscript":
"Unexpected end of file. Expected inhead-noscript content",
"char-in-head-noscript":
"Unexpected non-space character. Expected inhead-noscript content",
"XXX-undefined-error":
_("Undefined error (this sucks and should be fixed)"),
"Undefined error (this sucks and should be fixed)",
}
namespaces = {
@@ -298,7 +302,7 @@ namespaces = {
"xmlns": "http://www.w3.org/2000/xmlns/"
}
scopingElements = frozenset((
scopingElements = frozenset([
(namespaces["html"], "applet"),
(namespaces["html"], "caption"),
(namespaces["html"], "html"),
@@ -316,9 +320,9 @@ scopingElements = frozenset((
(namespaces["svg"], "foreignObject"),
(namespaces["svg"], "desc"),
(namespaces["svg"], "title"),
))
])
formattingElements = frozenset((
formattingElements = frozenset([
(namespaces["html"], "a"),
(namespaces["html"], "b"),
(namespaces["html"], "big"),
@@ -333,9 +337,9 @@ formattingElements = frozenset((
(namespaces["html"], "strong"),
(namespaces["html"], "tt"),
(namespaces["html"], "u")
))
])
specialElements = frozenset((
specialElements = frozenset([
(namespaces["html"], "address"),
(namespaces["html"], "applet"),
(namespaces["html"], "area"),
@@ -416,22 +420,89 @@ specialElements = frozenset((
(namespaces["html"], "wbr"),
(namespaces["html"], "xmp"),
(namespaces["svg"], "foreignObject")
))
])
htmlIntegrationPointElements = frozenset((
(namespaces["mathml"], "annotaion-xml"),
htmlIntegrationPointElements = frozenset([
(namespaces["mathml"], "annotation-xml"),
(namespaces["svg"], "foreignObject"),
(namespaces["svg"], "desc"),
(namespaces["svg"], "title")
))
])
mathmlTextIntegrationPointElements = frozenset((
mathmlTextIntegrationPointElements = frozenset([
(namespaces["mathml"], "mi"),
(namespaces["mathml"], "mo"),
(namespaces["mathml"], "mn"),
(namespaces["mathml"], "ms"),
(namespaces["mathml"], "mtext")
))
])
adjustSVGAttributes = {
"attributename": "attributeName",
"attributetype": "attributeType",
"basefrequency": "baseFrequency",
"baseprofile": "baseProfile",
"calcmode": "calcMode",
"clippathunits": "clipPathUnits",
"contentscripttype": "contentScriptType",
"contentstyletype": "contentStyleType",
"diffuseconstant": "diffuseConstant",
"edgemode": "edgeMode",
"externalresourcesrequired": "externalResourcesRequired",
"filterres": "filterRes",
"filterunits": "filterUnits",
"glyphref": "glyphRef",
"gradienttransform": "gradientTransform",
"gradientunits": "gradientUnits",
"kernelmatrix": "kernelMatrix",
"kernelunitlength": "kernelUnitLength",
"keypoints": "keyPoints",
"keysplines": "keySplines",
"keytimes": "keyTimes",
"lengthadjust": "lengthAdjust",
"limitingconeangle": "limitingConeAngle",
"markerheight": "markerHeight",
"markerunits": "markerUnits",
"markerwidth": "markerWidth",
"maskcontentunits": "maskContentUnits",
"maskunits": "maskUnits",
"numoctaves": "numOctaves",
"pathlength": "pathLength",
"patterncontentunits": "patternContentUnits",
"patterntransform": "patternTransform",
"patternunits": "patternUnits",
"pointsatx": "pointsAtX",
"pointsaty": "pointsAtY",
"pointsatz": "pointsAtZ",
"preservealpha": "preserveAlpha",
"preserveaspectratio": "preserveAspectRatio",
"primitiveunits": "primitiveUnits",
"refx": "refX",
"refy": "refY",
"repeatcount": "repeatCount",
"repeatdur": "repeatDur",
"requiredextensions": "requiredExtensions",
"requiredfeatures": "requiredFeatures",
"specularconstant": "specularConstant",
"specularexponent": "specularExponent",
"spreadmethod": "spreadMethod",
"startoffset": "startOffset",
"stddeviation": "stdDeviation",
"stitchtiles": "stitchTiles",
"surfacescale": "surfaceScale",
"systemlanguage": "systemLanguage",
"tablevalues": "tableValues",
"targetx": "targetX",
"targety": "targetY",
"textlength": "textLength",
"viewbox": "viewBox",
"viewtarget": "viewTarget",
"xchannelselector": "xChannelSelector",
"ychannelselector": "yChannelSelector",
"zoomandpan": "zoomAndPan"
}
adjustMathMLAttributes = {"definitionurl": "definitionURL"}
adjustForeignAttributes = {
"xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
@@ -451,21 +522,21 @@ adjustForeignAttributes = {
unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
adjustForeignAttributes.items()])
spaceCharacters = frozenset((
spaceCharacters = frozenset([
"\t",
"\n",
"\u000C",
" ",
"\r"
))
])
tableInsertModeElements = frozenset((
tableInsertModeElements = frozenset([
"table",
"tbody",
"tfoot",
"thead",
"tr"
))
])
asciiLowercase = frozenset(string.ascii_lowercase)
asciiUppercase = frozenset(string.ascii_uppercase)
@@ -486,7 +557,7 @@ headingElements = (
"h6"
)
voidElements = frozenset((
voidElements = frozenset([
"base",
"command",
"event-source",
@@ -502,11 +573,11 @@ voidElements = frozenset((
"input",
"source",
"track"
))
])
cdataElements = frozenset(('title', 'textarea'))
cdataElements = frozenset(['title', 'textarea'])
rcdataElements = frozenset((
rcdataElements = frozenset([
'style',
'script',
'xmp',
@@ -514,27 +585,28 @@ rcdataElements = frozenset((
'noembed',
'noframes',
'noscript'
))
])
booleanAttributes = {
"": frozenset(("irrelevant",)),
"style": frozenset(("scoped",)),
"img": frozenset(("ismap",)),
"audio": frozenset(("autoplay", "controls")),
"video": frozenset(("autoplay", "controls")),
"script": frozenset(("defer", "async")),
"details": frozenset(("open",)),
"datagrid": frozenset(("multiple", "disabled")),
"command": frozenset(("hidden", "disabled", "checked", "default")),
"hr": frozenset(("noshade")),
"menu": frozenset(("autosubmit",)),
"fieldset": frozenset(("disabled", "readonly")),
"option": frozenset(("disabled", "readonly", "selected")),
"optgroup": frozenset(("disabled", "readonly")),
"button": frozenset(("disabled", "autofocus")),
"input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")),
"select": frozenset(("disabled", "readonly", "autofocus", "multiple")),
"output": frozenset(("disabled", "readonly")),
"": frozenset(["irrelevant", "itemscope"]),
"style": frozenset(["scoped"]),
"img": frozenset(["ismap"]),
"audio": frozenset(["autoplay", "controls"]),
"video": frozenset(["autoplay", "controls"]),
"script": frozenset(["defer", "async"]),
"details": frozenset(["open"]),
"datagrid": frozenset(["multiple", "disabled"]),
"command": frozenset(["hidden", "disabled", "checked", "default"]),
"hr": frozenset(["noshade"]),
"menu": frozenset(["autosubmit"]),
"fieldset": frozenset(["disabled", "readonly"]),
"option": frozenset(["disabled", "readonly", "selected"]),
"optgroup": frozenset(["disabled", "readonly"]),
"button": frozenset(["disabled", "autofocus"]),
"input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
"select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
"output": frozenset(["disabled", "readonly"]),
"iframe": frozenset(["seamless"]),
}
# entitiesWindows1252 has to be _ordered_ and needs to have an index. It
@@ -574,7 +646,7 @@ entitiesWindows1252 = (
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
)
xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])
entities = {
"AElig": "\xc6",
@@ -2815,7 +2887,6 @@ replacementCharacters = {
0x0d: "\u000D",
0x80: "\u20AC",
0x81: "\u0081",
0x81: "\u0081",
0x82: "\u201A",
0x83: "\u0192",
0x84: "\u201E",
@@ -2848,235 +2919,6 @@ replacementCharacters = {
0x9F: "\u0178",
}
encodings = {
'437': 'cp437',
'850': 'cp850',
'852': 'cp852',
'855': 'cp855',
'857': 'cp857',
'860': 'cp860',
'861': 'cp861',
'862': 'cp862',
'863': 'cp863',
'865': 'cp865',
'866': 'cp866',
'869': 'cp869',
'ansix341968': 'ascii',
'ansix341986': 'ascii',
'arabic': 'iso8859-6',
'ascii': 'ascii',
'asmo708': 'iso8859-6',
'big5': 'big5',
'big5hkscs': 'big5hkscs',
'chinese': 'gbk',
'cp037': 'cp037',
'cp1026': 'cp1026',
'cp154': 'ptcp154',
'cp367': 'ascii',
'cp424': 'cp424',
'cp437': 'cp437',
'cp500': 'cp500',
'cp775': 'cp775',
'cp819': 'windows-1252',
'cp850': 'cp850',
'cp852': 'cp852',
'cp855': 'cp855',
'cp857': 'cp857',
'cp860': 'cp860',
'cp861': 'cp861',
'cp862': 'cp862',
'cp863': 'cp863',
'cp864': 'cp864',
'cp865': 'cp865',
'cp866': 'cp866',
'cp869': 'cp869',
'cp936': 'gbk',
'cpgr': 'cp869',
'cpis': 'cp861',
'csascii': 'ascii',
'csbig5': 'big5',
'cseuckr': 'cp949',
'cseucpkdfmtjapanese': 'euc_jp',
'csgb2312': 'gbk',
'cshproman8': 'hp-roman8',
'csibm037': 'cp037',
'csibm1026': 'cp1026',
'csibm424': 'cp424',
'csibm500': 'cp500',
'csibm855': 'cp855',
'csibm857': 'cp857',
'csibm860': 'cp860',
'csibm861': 'cp861',
'csibm863': 'cp863',
'csibm864': 'cp864',
'csibm865': 'cp865',
'csibm866': 'cp866',
'csibm869': 'cp869',
'csiso2022jp': 'iso2022_jp',
'csiso2022jp2': 'iso2022_jp_2',
'csiso2022kr': 'iso2022_kr',
'csiso58gb231280': 'gbk',
'csisolatin1': 'windows-1252',
'csisolatin2': 'iso8859-2',
'csisolatin3': 'iso8859-3',
'csisolatin4': 'iso8859-4',
'csisolatin5': 'windows-1254',
'csisolatin6': 'iso8859-10',
'csisolatinarabic': 'iso8859-6',
'csisolatincyrillic': 'iso8859-5',
'csisolatingreek': 'iso8859-7',
'csisolatinhebrew': 'iso8859-8',
'cskoi8r': 'koi8-r',
'csksc56011987': 'cp949',
'cspc775baltic': 'cp775',
'cspc850multilingual': 'cp850',
'cspc862latinhebrew': 'cp862',
'cspc8codepage437': 'cp437',
'cspcp852': 'cp852',
'csptcp154': 'ptcp154',
'csshiftjis': 'shift_jis',
'csunicode11utf7': 'utf-7',
'cyrillic': 'iso8859-5',
'cyrillicasian': 'ptcp154',
'ebcdiccpbe': 'cp500',
'ebcdiccpca': 'cp037',
'ebcdiccpch': 'cp500',
'ebcdiccphe': 'cp424',
'ebcdiccpnl': 'cp037',
'ebcdiccpus': 'cp037',
'ebcdiccpwt': 'cp037',
'ecma114': 'iso8859-6',
'ecma118': 'iso8859-7',
'elot928': 'iso8859-7',
'eucjp': 'euc_jp',
'euckr': 'cp949',
'extendedunixcodepackedformatforjapanese': 'euc_jp',
'gb18030': 'gb18030',
'gb2312': 'gbk',
'gb231280': 'gbk',
'gbk': 'gbk',
'greek': 'iso8859-7',
'greek8': 'iso8859-7',
'hebrew': 'iso8859-8',
'hproman8': 'hp-roman8',
'hzgb2312': 'hz',
'ibm037': 'cp037',
'ibm1026': 'cp1026',
'ibm367': 'ascii',
'ibm424': 'cp424',
'ibm437': 'cp437',
'ibm500': 'cp500',
'ibm775': 'cp775',
'ibm819': 'windows-1252',
'ibm850': 'cp850',
'ibm852': 'cp852',
'ibm855': 'cp855',
'ibm857': 'cp857',
'ibm860': 'cp860',
'ibm861': 'cp861',
'ibm862': 'cp862',
'ibm863': 'cp863',
'ibm864': 'cp864',
'ibm865': 'cp865',
'ibm866': 'cp866',
'ibm869': 'cp869',
'iso2022jp': 'iso2022_jp',
'iso2022jp2': 'iso2022_jp_2',
'iso2022kr': 'iso2022_kr',
'iso646irv1991': 'ascii',
'iso646us': 'ascii',
'iso88591': 'windows-1252',
'iso885910': 'iso8859-10',
'iso8859101992': 'iso8859-10',
'iso885911987': 'windows-1252',
'iso885913': 'iso8859-13',
'iso885914': 'iso8859-14',
'iso8859141998': 'iso8859-14',
'iso885915': 'iso8859-15',
'iso885916': 'iso8859-16',
'iso8859162001': 'iso8859-16',
'iso88592': 'iso8859-2',
'iso885921987': 'iso8859-2',
'iso88593': 'iso8859-3',
'iso885931988': 'iso8859-3',
'iso88594': 'iso8859-4',
'iso885941988': 'iso8859-4',
'iso88595': 'iso8859-5',
'iso885951988': 'iso8859-5',
'iso88596': 'iso8859-6',
'iso885961987': 'iso8859-6',
'iso88597': 'iso8859-7',
'iso885971987': 'iso8859-7',
'iso88598': 'iso8859-8',
'iso885981988': 'iso8859-8',
'iso88599': 'windows-1254',
'iso885991989': 'windows-1254',
'isoceltic': 'iso8859-14',
'isoir100': 'windows-1252',
'isoir101': 'iso8859-2',
'isoir109': 'iso8859-3',
'isoir110': 'iso8859-4',
'isoir126': 'iso8859-7',
'isoir127': 'iso8859-6',
'isoir138': 'iso8859-8',
'isoir144': 'iso8859-5',
'isoir148': 'windows-1254',
'isoir149': 'cp949',
'isoir157': 'iso8859-10',
'isoir199': 'iso8859-14',
'isoir226': 'iso8859-16',
'isoir58': 'gbk',
'isoir6': 'ascii',
'koi8r': 'koi8-r',
'koi8u': 'koi8-u',
'korean': 'cp949',
'ksc5601': 'cp949',
'ksc56011987': 'cp949',
'ksc56011989': 'cp949',
'l1': 'windows-1252',
'l10': 'iso8859-16',
'l2': 'iso8859-2',
'l3': 'iso8859-3',
'l4': 'iso8859-4',
'l5': 'windows-1254',
'l6': 'iso8859-10',
'l8': 'iso8859-14',
'latin1': 'windows-1252',
'latin10': 'iso8859-16',
'latin2': 'iso8859-2',
'latin3': 'iso8859-3',
'latin4': 'iso8859-4',
'latin5': 'windows-1254',
'latin6': 'iso8859-10',
'latin8': 'iso8859-14',
'latin9': 'iso8859-15',
'ms936': 'gbk',
'mskanji': 'shift_jis',
'pt154': 'ptcp154',
'ptcp154': 'ptcp154',
'r8': 'hp-roman8',
'roman8': 'hp-roman8',
'shiftjis': 'shift_jis',
'tis620': 'cp874',
'unicode11utf7': 'utf-7',
'us': 'ascii',
'usascii': 'ascii',
'utf16': 'utf-16',
'utf16be': 'utf-16-be',
'utf16le': 'utf-16-le',
'utf8': 'utf-8',
'windows1250': 'cp1250',
'windows1251': 'cp1251',
'windows1252': 'cp1252',
'windows1253': 'cp1253',
'windows1254': 'cp1254',
'windows1255': 'cp1255',
'windows1256': 'cp1256',
'windows1257': 'cp1257',
'windows1258': 'cp1258',
'windows936': 'gbk',
'x-x-big5': 'big5'}
tokenTypes = {
"Doctype": 0,
"Characters": 1,
@@ -3088,8 +2930,8 @@ tokenTypes = {
"ParseError": 7
}
tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
tokenTypes["EmptyTag"]))
tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
tokenTypes["EmptyTag"]])
prefixes = dict([(v, k) for k, v in namespaces.items()])
@@ -3097,8 +2939,9 @@ prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
class DataLossWarning(UserWarning):
"""Raised when the current tree is unable to represent the input data"""
pass
class ReparseException(Exception):
class _ReparseException(Exception):
pass
@@ -1,20 +1,29 @@
from __future__ import absolute_import, division, unicode_literals
from . import _base
from . import base
try:
from collections import OrderedDict
except ImportError:
from ordereddict import OrderedDict
from collections import OrderedDict
class Filter(_base.Filter):
def _attr_key(attr):
"""Return an appropriate key for an attribute for sorting
Attributes have a namespace that can be either ``None`` or a string. We
can't compare the two because they're different types, so we convert
``None`` to an empty string first.
"""
return (attr[0][0] or ''), attr[0][1]
class Filter(base.Filter):
"""Alphabetizes attributes for elements"""
def __iter__(self):
for token in _base.Filter.__iter__(self):
for token in base.Filter.__iter__(self):
if token["type"] in ("StartTag", "EmptyTag"):
attrs = OrderedDict()
for name, value in sorted(token["data"].items(),
key=lambda x: x[0]):
key=_attr_key):
attrs[name] = value
token["data"] = attrs
yield token
@@ -1,11 +1,19 @@
from __future__ import absolute_import, division, unicode_literals
from . import _base
from . import base
class Filter(_base.Filter):
class Filter(base.Filter):
"""Injects ``<meta charset=ENCODING>`` tag into head of document"""
def __init__(self, source, encoding):
_base.Filter.__init__(self, source)
"""Creates a Filter
:arg source: the source token stream
:arg encoding: the encoding to set
"""
base.Filter.__init__(self, source)
self.encoding = encoding
def __iter__(self):
@@ -13,7 +21,7 @@ class Filter(_base.Filter):
meta_found = (self.encoding is None)
pending = []
for token in _base.Filter.__iter__(self):
for token in base.Filter.__iter__(self):
type = token["type"]
if type == "StartTag":
if token["name"].lower() == "head":
@@ -1,93 +1,93 @@
from __future__ import absolute_import, division, unicode_literals
from gettext import gettext
_ = gettext
from six import text_type
from . import _base
from ..constants import cdataElements, rcdataElements, voidElements
from . import base
from ..constants import namespaces, voidElements
from ..constants import spaceCharacters
spaceCharacters = "".join(spaceCharacters)
class LintError(Exception):
pass
class Filter(base.Filter):
"""Lints the token stream for errors
If it finds any errors, it'll raise an ``AssertionError``.
"""
def __init__(self, source, require_matching_tags=True):
"""Creates a Filter
:arg source: the source token stream
:arg require_matching_tags: whether or not to require matching tags
"""
super(Filter, self).__init__(source)
self.require_matching_tags = require_matching_tags
class Filter(_base.Filter):
def __iter__(self):
open_elements = []
contentModelFlag = "PCDATA"
for token in _base.Filter.__iter__(self):
for token in base.Filter.__iter__(self):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
namespace = token["namespace"]
name = token["name"]
if contentModelFlag != "PCDATA":
raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
if not isinstance(name, str):
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
if not name:
raise LintError(_("Empty tag name"))
if type == "StartTag" and name in voidElements:
raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
elif type == "EmptyTag" and name not in voidElements:
raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
if type == "StartTag":
open_elements.append(name)
for name, value in token["data"]:
if not isinstance(name, str):
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
if not name:
raise LintError(_("Empty attribute name"))
if not isinstance(value, str):
raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
if name in cdataElements:
contentModelFlag = "CDATA"
elif name in rcdataElements:
contentModelFlag = "RCDATA"
elif name == "plaintext":
contentModelFlag = "PLAINTEXT"
assert namespace is None or isinstance(namespace, text_type)
assert namespace != ""
assert isinstance(name, text_type)
assert name != ""
assert isinstance(token["data"], dict)
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
assert type == "EmptyTag"
else:
assert type == "StartTag"
if type == "StartTag" and self.require_matching_tags:
open_elements.append((namespace, name))
for (namespace, name), value in token["data"].items():
assert namespace is None or isinstance(namespace, text_type)
assert namespace != ""
assert isinstance(name, text_type)
assert name != ""
assert isinstance(value, text_type)
elif type == "EndTag":
namespace = token["namespace"]
name = token["name"]
if not isinstance(name, str):
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
if not name:
raise LintError(_("Empty tag name"))
if name in voidElements:
raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
start_name = open_elements.pop()
if start_name != name:
raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
contentModelFlag = "PCDATA"
assert namespace is None or isinstance(namespace, text_type)
assert namespace != ""
assert isinstance(name, text_type)
assert name != ""
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
elif self.require_matching_tags:
start = open_elements.pop()
assert start == (namespace, name)
elif type == "Comment":
if contentModelFlag != "PCDATA":
raise LintError(_("Comment not in PCDATA content model flag"))
data = token["data"]
assert isinstance(data, text_type)
elif type in ("Characters", "SpaceCharacters"):
data = token["data"]
if not isinstance(data, str):
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
if not data:
raise LintError(_("%(type)s token with empty data") % {"type": type})
assert isinstance(data, text_type)
assert data != ""
if type == "SpaceCharacters":
data = data.strip(spaceCharacters)
if data:
raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
assert data.strip(spaceCharacters) == ""
elif type == "Doctype":
name = token["name"]
if contentModelFlag != "PCDATA":
raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
if not isinstance(name, str):
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
# XXX: what to do with token["data"] ?
assert name is None or isinstance(name, text_type)
assert token["publicId"] is None or isinstance(name, text_type)
assert token["systemId"] is None or isinstance(name, text_type)
elif type in ("ParseError", "SerializeError"):
pass
elif type == "Entity":
assert isinstance(token["name"], text_type)
elif type == "SerializerError":
assert isinstance(token["data"], text_type)
else:
raise LintError(_("Unknown token type: %(type)s") % {"type": type})
assert False, "Unknown token type: %(type)s" % {"type": type}
yield token
@@ -1,9 +1,10 @@
from __future__ import absolute_import, division, unicode_literals
from . import _base
from . import base
class Filter(_base.Filter):
class Filter(base.Filter):
"""Removes optional tags from the token stream"""
def slider(self):
previous1 = previous2 = None
for token in self.source:
@@ -11,7 +12,8 @@ class Filter(_base.Filter):
yield previous2, previous1, token
previous2 = previous1
previous1 = token
yield previous2, previous1, None
if previous1 is not None:
yield previous2, previous1, None
def __iter__(self):
for previous, token, next in self.slider():
@@ -58,7 +60,7 @@ class Filter(_base.Filter):
elif tagname == 'colgroup':
# A colgroup element's start tag may be omitted if the first thing
# inside the colgroup element is a col element, and if the element
# is not immediately preceeded by another colgroup element whose
# is not immediately preceded by another colgroup element whose
# end tag has been omitted.
if type in ("StartTag", "EmptyTag"):
# XXX: we do not look at the preceding event, so instead we never
@@ -70,7 +72,7 @@ class Filter(_base.Filter):
elif tagname == 'tbody':
# A tbody element's start tag may be omitted if the first thing
# inside the tbody element is a tr element, and if the element is
# not immediately preceeded by a tbody, thead, or tfoot element
# not immediately preceded by a tbody, thead, or tfoot element
# whose end tag has been omitted.
if type == "StartTag":
# omit the thead and tfoot elements' end tag when they are
@@ -1,12 +1,896 @@
from __future__ import absolute_import, division, unicode_literals
from . import _base
from ..sanitizer import HTMLSanitizerMixin
import re
from xml.sax.saxutils import escape, unescape
from six.moves import urllib_parse as urlparse
from . import base
from ..constants import namespaces, prefixes
__all__ = ["Filter"]
class Filter(_base.Filter, HTMLSanitizerMixin):
allowed_elements = frozenset((
(namespaces['html'], 'a'),
(namespaces['html'], 'abbr'),
(namespaces['html'], 'acronym'),
(namespaces['html'], 'address'),
(namespaces['html'], 'area'),
(namespaces['html'], 'article'),
(namespaces['html'], 'aside'),
(namespaces['html'], 'audio'),
(namespaces['html'], 'b'),
(namespaces['html'], 'big'),
(namespaces['html'], 'blockquote'),
(namespaces['html'], 'br'),
(namespaces['html'], 'button'),
(namespaces['html'], 'canvas'),
(namespaces['html'], 'caption'),
(namespaces['html'], 'center'),
(namespaces['html'], 'cite'),
(namespaces['html'], 'code'),
(namespaces['html'], 'col'),
(namespaces['html'], 'colgroup'),
(namespaces['html'], 'command'),
(namespaces['html'], 'datagrid'),
(namespaces['html'], 'datalist'),
(namespaces['html'], 'dd'),
(namespaces['html'], 'del'),
(namespaces['html'], 'details'),
(namespaces['html'], 'dfn'),
(namespaces['html'], 'dialog'),
(namespaces['html'], 'dir'),
(namespaces['html'], 'div'),
(namespaces['html'], 'dl'),
(namespaces['html'], 'dt'),
(namespaces['html'], 'em'),
(namespaces['html'], 'event-source'),
(namespaces['html'], 'fieldset'),
(namespaces['html'], 'figcaption'),
(namespaces['html'], 'figure'),
(namespaces['html'], 'footer'),
(namespaces['html'], 'font'),
(namespaces['html'], 'form'),
(namespaces['html'], 'header'),
(namespaces['html'], 'h1'),
(namespaces['html'], 'h2'),
(namespaces['html'], 'h3'),
(namespaces['html'], 'h4'),
(namespaces['html'], 'h5'),
(namespaces['html'], 'h6'),
(namespaces['html'], 'hr'),
(namespaces['html'], 'i'),
(namespaces['html'], 'img'),
(namespaces['html'], 'input'),
(namespaces['html'], 'ins'),
(namespaces['html'], 'keygen'),
(namespaces['html'], 'kbd'),
(namespaces['html'], 'label'),
(namespaces['html'], 'legend'),
(namespaces['html'], 'li'),
(namespaces['html'], 'm'),
(namespaces['html'], 'map'),
(namespaces['html'], 'menu'),
(namespaces['html'], 'meter'),
(namespaces['html'], 'multicol'),
(namespaces['html'], 'nav'),
(namespaces['html'], 'nextid'),
(namespaces['html'], 'ol'),
(namespaces['html'], 'output'),
(namespaces['html'], 'optgroup'),
(namespaces['html'], 'option'),
(namespaces['html'], 'p'),
(namespaces['html'], 'pre'),
(namespaces['html'], 'progress'),
(namespaces['html'], 'q'),
(namespaces['html'], 's'),
(namespaces['html'], 'samp'),
(namespaces['html'], 'section'),
(namespaces['html'], 'select'),
(namespaces['html'], 'small'),
(namespaces['html'], 'sound'),
(namespaces['html'], 'source'),
(namespaces['html'], 'spacer'),
(namespaces['html'], 'span'),
(namespaces['html'], 'strike'),
(namespaces['html'], 'strong'),
(namespaces['html'], 'sub'),
(namespaces['html'], 'sup'),
(namespaces['html'], 'table'),
(namespaces['html'], 'tbody'),
(namespaces['html'], 'td'),
(namespaces['html'], 'textarea'),
(namespaces['html'], 'time'),
(namespaces['html'], 'tfoot'),
(namespaces['html'], 'th'),
(namespaces['html'], 'thead'),
(namespaces['html'], 'tr'),
(namespaces['html'], 'tt'),
(namespaces['html'], 'u'),
(namespaces['html'], 'ul'),
(namespaces['html'], 'var'),
(namespaces['html'], 'video'),
(namespaces['mathml'], 'maction'),
(namespaces['mathml'], 'math'),
(namespaces['mathml'], 'merror'),
(namespaces['mathml'], 'mfrac'),
(namespaces['mathml'], 'mi'),
(namespaces['mathml'], 'mmultiscripts'),
(namespaces['mathml'], 'mn'),
(namespaces['mathml'], 'mo'),
(namespaces['mathml'], 'mover'),
(namespaces['mathml'], 'mpadded'),
(namespaces['mathml'], 'mphantom'),
(namespaces['mathml'], 'mprescripts'),
(namespaces['mathml'], 'mroot'),
(namespaces['mathml'], 'mrow'),
(namespaces['mathml'], 'mspace'),
(namespaces['mathml'], 'msqrt'),
(namespaces['mathml'], 'mstyle'),
(namespaces['mathml'], 'msub'),
(namespaces['mathml'], 'msubsup'),
(namespaces['mathml'], 'msup'),
(namespaces['mathml'], 'mtable'),
(namespaces['mathml'], 'mtd'),
(namespaces['mathml'], 'mtext'),
(namespaces['mathml'], 'mtr'),
(namespaces['mathml'], 'munder'),
(namespaces['mathml'], 'munderover'),
(namespaces['mathml'], 'none'),
(namespaces['svg'], 'a'),
(namespaces['svg'], 'animate'),
(namespaces['svg'], 'animateColor'),
(namespaces['svg'], 'animateMotion'),
(namespaces['svg'], 'animateTransform'),
(namespaces['svg'], 'clipPath'),
(namespaces['svg'], 'circle'),
(namespaces['svg'], 'defs'),
(namespaces['svg'], 'desc'),
(namespaces['svg'], 'ellipse'),
(namespaces['svg'], 'font-face'),
(namespaces['svg'], 'font-face-name'),
(namespaces['svg'], 'font-face-src'),
(namespaces['svg'], 'g'),
(namespaces['svg'], 'glyph'),
(namespaces['svg'], 'hkern'),
(namespaces['svg'], 'linearGradient'),
(namespaces['svg'], 'line'),
(namespaces['svg'], 'marker'),
(namespaces['svg'], 'metadata'),
(namespaces['svg'], 'missing-glyph'),
(namespaces['svg'], 'mpath'),
(namespaces['svg'], 'path'),
(namespaces['svg'], 'polygon'),
(namespaces['svg'], 'polyline'),
(namespaces['svg'], 'radialGradient'),
(namespaces['svg'], 'rect'),
(namespaces['svg'], 'set'),
(namespaces['svg'], 'stop'),
(namespaces['svg'], 'svg'),
(namespaces['svg'], 'switch'),
(namespaces['svg'], 'text'),
(namespaces['svg'], 'title'),
(namespaces['svg'], 'tspan'),
(namespaces['svg'], 'use'),
))
allowed_attributes = frozenset((
# HTML attributes
(None, 'abbr'),
(None, 'accept'),
(None, 'accept-charset'),
(None, 'accesskey'),
(None, 'action'),
(None, 'align'),
(None, 'alt'),
(None, 'autocomplete'),
(None, 'autofocus'),
(None, 'axis'),
(None, 'background'),
(None, 'balance'),
(None, 'bgcolor'),
(None, 'bgproperties'),
(None, 'border'),
(None, 'bordercolor'),
(None, 'bordercolordark'),
(None, 'bordercolorlight'),
(None, 'bottompadding'),
(None, 'cellpadding'),
(None, 'cellspacing'),
(None, 'ch'),
(None, 'challenge'),
(None, 'char'),
(None, 'charoff'),
(None, 'choff'),
(None, 'charset'),
(None, 'checked'),
(None, 'cite'),
(None, 'class'),
(None, 'clear'),
(None, 'color'),
(None, 'cols'),
(None, 'colspan'),
(None, 'compact'),
(None, 'contenteditable'),
(None, 'controls'),
(None, 'coords'),
(None, 'data'),
(None, 'datafld'),
(None, 'datapagesize'),
(None, 'datasrc'),
(None, 'datetime'),
(None, 'default'),
(None, 'delay'),
(None, 'dir'),
(None, 'disabled'),
(None, 'draggable'),
(None, 'dynsrc'),
(None, 'enctype'),
(None, 'end'),
(None, 'face'),
(None, 'for'),
(None, 'form'),
(None, 'frame'),
(None, 'galleryimg'),
(None, 'gutter'),
(None, 'headers'),
(None, 'height'),
(None, 'hidefocus'),
(None, 'hidden'),
(None, 'high'),
(None, 'href'),
(None, 'hreflang'),
(None, 'hspace'),
(None, 'icon'),
(None, 'id'),
(None, 'inputmode'),
(None, 'ismap'),
(None, 'keytype'),
(None, 'label'),
(None, 'leftspacing'),
(None, 'lang'),
(None, 'list'),
(None, 'longdesc'),
(None, 'loop'),
(None, 'loopcount'),
(None, 'loopend'),
(None, 'loopstart'),
(None, 'low'),
(None, 'lowsrc'),
(None, 'max'),
(None, 'maxlength'),
(None, 'media'),
(None, 'method'),
(None, 'min'),
(None, 'multiple'),
(None, 'name'),
(None, 'nohref'),
(None, 'noshade'),
(None, 'nowrap'),
(None, 'open'),
(None, 'optimum'),
(None, 'pattern'),
(None, 'ping'),
(None, 'point-size'),
(None, 'poster'),
(None, 'pqg'),
(None, 'preload'),
(None, 'prompt'),
(None, 'radiogroup'),
(None, 'readonly'),
(None, 'rel'),
(None, 'repeat-max'),
(None, 'repeat-min'),
(None, 'replace'),
(None, 'required'),
(None, 'rev'),
(None, 'rightspacing'),
(None, 'rows'),
(None, 'rowspan'),
(None, 'rules'),
(None, 'scope'),
(None, 'selected'),
(None, 'shape'),
(None, 'size'),
(None, 'span'),
(None, 'src'),
(None, 'start'),
(None, 'step'),
(None, 'style'),
(None, 'summary'),
(None, 'suppress'),
(None, 'tabindex'),
(None, 'target'),
(None, 'template'),
(None, 'title'),
(None, 'toppadding'),
(None, 'type'),
(None, 'unselectable'),
(None, 'usemap'),
(None, 'urn'),
(None, 'valign'),
(None, 'value'),
(None, 'variable'),
(None, 'volume'),
(None, 'vspace'),
(None, 'vrml'),
(None, 'width'),
(None, 'wrap'),
(namespaces['xml'], 'lang'),
# MathML attributes
(None, 'actiontype'),
(None, 'align'),
(None, 'columnalign'),
(None, 'columnalign'),
(None, 'columnalign'),
(None, 'columnlines'),
(None, 'columnspacing'),
(None, 'columnspan'),
(None, 'depth'),
(None, 'display'),
(None, 'displaystyle'),
(None, 'equalcolumns'),
(None, 'equalrows'),
(None, 'fence'),
(None, 'fontstyle'),
(None, 'fontweight'),
(None, 'frame'),
(None, 'height'),
(None, 'linethickness'),
(None, 'lspace'),
(None, 'mathbackground'),
(None, 'mathcolor'),
(None, 'mathvariant'),
(None, 'mathvariant'),
(None, 'maxsize'),
(None, 'minsize'),
(None, 'other'),
(None, 'rowalign'),
(None, 'rowalign'),
(None, 'rowalign'),
(None, 'rowlines'),
(None, 'rowspacing'),
(None, 'rowspan'),
(None, 'rspace'),
(None, 'scriptlevel'),
(None, 'selection'),
(None, 'separator'),
(None, 'stretchy'),
(None, 'width'),
(None, 'width'),
(namespaces['xlink'], 'href'),
(namespaces['xlink'], 'show'),
(namespaces['xlink'], 'type'),
# SVG attributes
(None, 'accent-height'),
(None, 'accumulate'),
(None, 'additive'),
(None, 'alphabetic'),
(None, 'arabic-form'),
(None, 'ascent'),
(None, 'attributeName'),
(None, 'attributeType'),
(None, 'baseProfile'),
(None, 'bbox'),
(None, 'begin'),
(None, 'by'),
(None, 'calcMode'),
(None, 'cap-height'),
(None, 'class'),
(None, 'clip-path'),
(None, 'color'),
(None, 'color-rendering'),
(None, 'content'),
(None, 'cx'),
(None, 'cy'),
(None, 'd'),
(None, 'dx'),
(None, 'dy'),
(None, 'descent'),
(None, 'display'),
(None, 'dur'),
(None, 'end'),
(None, 'fill'),
(None, 'fill-opacity'),
(None, 'fill-rule'),
(None, 'font-family'),
(None, 'font-size'),
(None, 'font-stretch'),
(None, 'font-style'),
(None, 'font-variant'),
(None, 'font-weight'),
(None, 'from'),
(None, 'fx'),
(None, 'fy'),
(None, 'g1'),
(None, 'g2'),
(None, 'glyph-name'),
(None, 'gradientUnits'),
(None, 'hanging'),
(None, 'height'),
(None, 'horiz-adv-x'),
(None, 'horiz-origin-x'),
(None, 'id'),
(None, 'ideographic'),
(None, 'k'),
(None, 'keyPoints'),
(None, 'keySplines'),
(None, 'keyTimes'),
(None, 'lang'),
(None, 'marker-end'),
(None, 'marker-mid'),
(None, 'marker-start'),
(None, 'markerHeight'),
(None, 'markerUnits'),
(None, 'markerWidth'),
(None, 'mathematical'),
(None, 'max'),
(None, 'min'),
(None, 'name'),
(None, 'offset'),
(None, 'opacity'),
(None, 'orient'),
(None, 'origin'),
(None, 'overline-position'),
(None, 'overline-thickness'),
(None, 'panose-1'),
(None, 'path'),
(None, 'pathLength'),
(None, 'points'),
(None, 'preserveAspectRatio'),
(None, 'r'),
(None, 'refX'),
(None, 'refY'),
(None, 'repeatCount'),
(None, 'repeatDur'),
(None, 'requiredExtensions'),
(None, 'requiredFeatures'),
(None, 'restart'),
(None, 'rotate'),
(None, 'rx'),
(None, 'ry'),
(None, 'slope'),
(None, 'stemh'),
(None, 'stemv'),
(None, 'stop-color'),
(None, 'stop-opacity'),
(None, 'strikethrough-position'),
(None, 'strikethrough-thickness'),
(None, 'stroke'),
(None, 'stroke-dasharray'),
(None, 'stroke-dashoffset'),
(None, 'stroke-linecap'),
(None, 'stroke-linejoin'),
(None, 'stroke-miterlimit'),
(None, 'stroke-opacity'),
(None, 'stroke-width'),
(None, 'systemLanguage'),
(None, 'target'),
(None, 'text-anchor'),
(None, 'to'),
(None, 'transform'),
(None, 'type'),
(None, 'u1'),
(None, 'u2'),
(None, 'underline-position'),
(None, 'underline-thickness'),
(None, 'unicode'),
(None, 'unicode-range'),
(None, 'units-per-em'),
(None, 'values'),
(None, 'version'),
(None, 'viewBox'),
(None, 'visibility'),
(None, 'width'),
(None, 'widths'),
(None, 'x'),
(None, 'x-height'),
(None, 'x1'),
(None, 'x2'),
(namespaces['xlink'], 'actuate'),
(namespaces['xlink'], 'arcrole'),
(namespaces['xlink'], 'href'),
(namespaces['xlink'], 'role'),
(namespaces['xlink'], 'show'),
(namespaces['xlink'], 'title'),
(namespaces['xlink'], 'type'),
(namespaces['xml'], 'base'),
(namespaces['xml'], 'lang'),
(namespaces['xml'], 'space'),
(None, 'y'),
(None, 'y1'),
(None, 'y2'),
(None, 'zoomAndPan'),
))
attr_val_is_uri = frozenset((
(None, 'href'),
(None, 'src'),
(None, 'cite'),
(None, 'action'),
(None, 'longdesc'),
(None, 'poster'),
(None, 'background'),
(None, 'datasrc'),
(None, 'dynsrc'),
(None, 'lowsrc'),
(None, 'ping'),
(namespaces['xlink'], 'href'),
(namespaces['xml'], 'base'),
))
svg_attr_val_allows_ref = frozenset((
(None, 'clip-path'),
(None, 'color-profile'),
(None, 'cursor'),
(None, 'fill'),
(None, 'filter'),
(None, 'marker'),
(None, 'marker-start'),
(None, 'marker-mid'),
(None, 'marker-end'),
(None, 'mask'),
(None, 'stroke'),
))
svg_allow_local_href = frozenset((
(None, 'altGlyph'),
(None, 'animate'),
(None, 'animateColor'),
(None, 'animateMotion'),
(None, 'animateTransform'),
(None, 'cursor'),
(None, 'feImage'),
(None, 'filter'),
(None, 'linearGradient'),
(None, 'pattern'),
(None, 'radialGradient'),
(None, 'textpath'),
(None, 'tref'),
(None, 'set'),
(None, 'use')
))
allowed_css_properties = frozenset((
'azimuth',
'background-color',
'border-bottom-color',
'border-collapse',
'border-color',
'border-left-color',
'border-right-color',
'border-top-color',
'clear',
'color',
'cursor',
'direction',
'display',
'elevation',
'float',
'font',
'font-family',
'font-size',
'font-style',
'font-variant',
'font-weight',
'height',
'letter-spacing',
'line-height',
'overflow',
'pause',
'pause-after',
'pause-before',
'pitch',
'pitch-range',
'richness',
'speak',
'speak-header',
'speak-numeral',
'speak-punctuation',
'speech-rate',
'stress',
'text-align',
'text-decoration',
'text-indent',
'unicode-bidi',
'vertical-align',
'voice-family',
'volume',
'white-space',
'width',
))
allowed_css_keywords = frozenset((
'auto',
'aqua',
'black',
'block',
'blue',
'bold',
'both',
'bottom',
'brown',
'center',
'collapse',
'dashed',
'dotted',
'fuchsia',
'gray',
'green',
'!important',
'italic',
'left',
'lime',
'maroon',
'medium',
'none',
'navy',
'normal',
'nowrap',
'olive',
'pointer',
'purple',
'red',
'right',
'solid',
'silver',
'teal',
'top',
'transparent',
'underline',
'white',
'yellow',
))
allowed_svg_properties = frozenset((
'fill',
'fill-opacity',
'fill-rule',
'stroke',
'stroke-width',
'stroke-linecap',
'stroke-linejoin',
'stroke-opacity',
))
allowed_protocols = frozenset((
'ed2k',
'ftp',
'http',
'https',
'irc',
'mailto',
'news',
'gopher',
'nntp',
'telnet',
'webcal',
'xmpp',
'callto',
'feed',
'urn',
'aim',
'rsync',
'tag',
'ssh',
'sftp',
'rtsp',
'afs',
'data',
))
allowed_content_types = frozenset((
'image/png',
'image/jpeg',
'image/gif',
'image/webp',
'image/bmp',
'text/plain',
))
data_content_type = re.compile(r'''
^
# Match a content type <application>/<type>
(?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
# Match any character set and encoding
(?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
|(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
# Assume the rest is data
,.*
$
''',
re.VERBOSE)
class Filter(base.Filter):
"""Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
def __init__(self,
source,
allowed_elements=allowed_elements,
allowed_attributes=allowed_attributes,
allowed_css_properties=allowed_css_properties,
allowed_css_keywords=allowed_css_keywords,
allowed_svg_properties=allowed_svg_properties,
allowed_protocols=allowed_protocols,
allowed_content_types=allowed_content_types,
attr_val_is_uri=attr_val_is_uri,
svg_attr_val_allows_ref=svg_attr_val_allows_ref,
svg_allow_local_href=svg_allow_local_href):
"""Creates a Filter
:arg allowed_elements: set of elements to allow--everything else will
be escaped
:arg allowed_attributes: set of attributes to allow in
elements--everything else will be stripped
:arg allowed_css_properties: set of CSS properties to allow--everything
else will be stripped
:arg allowed_css_keywords: set of CSS keywords to allow--everything
else will be stripped
:arg allowed_svg_properties: set of SVG properties to allow--everything
else will be removed
:arg allowed_protocols: set of allowed protocols for URIs
:arg allowed_content_types: set of allowed content types for ``data`` URIs.
:arg attr_val_is_uri: set of attributes that have URI values--values
that have a scheme not listed in ``allowed_protocols`` are removed
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
references
:arg svg_allow_local_href: set of SVG elements that can have local
hrefs--these are removed
"""
super(Filter, self).__init__(source)
self.allowed_elements = allowed_elements
self.allowed_attributes = allowed_attributes
self.allowed_css_properties = allowed_css_properties
self.allowed_css_keywords = allowed_css_keywords
self.allowed_svg_properties = allowed_svg_properties
self.allowed_protocols = allowed_protocols
self.allowed_content_types = allowed_content_types
self.attr_val_is_uri = attr_val_is_uri
self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
self.svg_allow_local_href = svg_allow_local_href
def __iter__(self):
for token in _base.Filter.__iter__(self):
for token in base.Filter.__iter__(self):
token = self.sanitize_token(token)
if token:
yield token
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
# stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
# are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
# ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
# are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
# allowed.
#
# sanitize_html('<script> do_nasty_stuff() </script>')
# => &lt;script> do_nasty_stuff() &lt;/script>
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
# => <a>Click here for $100</a>
def sanitize_token(self, token):
# accommodate filters which use token_type differently
token_type = token["type"]
if token_type in ("StartTag", "EndTag", "EmptyTag"):
name = token["name"]
namespace = token["namespace"]
if ((namespace, name) in self.allowed_elements or
(namespace is None and
(namespaces["html"], name) in self.allowed_elements)):
return self.allowed_token(token)
else:
return self.disallowed_token(token)
elif token_type == "Comment":
pass
else:
return token
def allowed_token(self, token):
if "data" in token:
attrs = token["data"]
attr_names = set(attrs.keys())
# Remove forbidden attributes
for to_remove in (attr_names - self.allowed_attributes):
del token["data"][to_remove]
attr_names.remove(to_remove)
# Remove attributes with disallowed URL values
for attr in (attr_names & self.attr_val_is_uri):
assert attr in attrs
# I don't have a clue where this regexp comes from or why it matches those
# characters, nor why we call unescape. I just know it's always been here.
# Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
# this will do is remove *more* than it otherwise would.
val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
unescape(attrs[attr])).lower()
# remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace("\ufffd", "")
try:
uri = urlparse.urlparse(val_unescaped)
except ValueError:
uri = None
del attrs[attr]
if uri and uri.scheme:
if uri.scheme not in self.allowed_protocols:
del attrs[attr]
if uri.scheme == 'data':
m = data_content_type.match(uri.path)
if not m:
del attrs[attr]
elif m.group('content_type') not in self.allowed_content_types:
del attrs[attr]
for attr in self.svg_attr_val_allows_ref:
if attr in attrs:
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
' ',
unescape(attrs[attr]))
if (token["name"] in self.svg_allow_local_href and
(namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
attrs[(namespaces['xlink'], 'href')])):
del attrs[(namespaces['xlink'], 'href')]
if (None, 'style') in attrs:
attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
token["data"] = attrs
return token
def disallowed_token(self, token):
token_type = token["type"]
if token_type == "EndTag":
token["data"] = "</%s>" % token["name"]
elif token["data"]:
assert token_type in ("StartTag", "EmptyTag")
attrs = []
for (ns, name), v in token["data"].items():
attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
else:
token["data"] = "<%s>" % token["name"]
if token.get("selfClosing"):
token["data"] = token["data"][:-1] + "/>"
token["type"] = "Characters"
del token["name"]
return token
def sanitize_css(self, style):
# disallow urls
style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
# gauntlet
if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
return ''
if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
return ''
clean = []
for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
if not value:
continue
if prop.lower() in self.allowed_css_properties:
clean.append(prop + ': ' + value + ';')
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
'padding']:
for keyword in value.split():
if keyword not in self.allowed_css_keywords and \
not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
break
else:
clean.append(prop + ': ' + value + ';')
elif prop.lower() in self.allowed_svg_properties:
clean.append(prop + ': ' + value + ';')
return ' '.join(clean)
@@ -2,20 +2,20 @@ from __future__ import absolute_import, division, unicode_literals
import re
from . import _base
from . import base
from ..constants import rcdataElements, spaceCharacters
spaceCharacters = "".join(spaceCharacters)
SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
class Filter(_base.Filter):
class Filter(base.Filter):
"""Collapses whitespace except in pre, textarea, and script elements"""
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
def __iter__(self):
preserve = 0
for token in _base.Filter.__iter__(self):
for token in base.Filter.__iter__(self):
type = token["type"]
if type == "StartTag" \
and (preserve or token["name"] in self.spacePreserveElements):
File diff suppressed because it is too large Load Diff
@@ -1,271 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
import re
from xml.sax.saxutils import escape, unescape
from .tokenizer import HTMLTokenizer
from .constants import tokenTypes
class HTMLSanitizerMixin(object):
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
'munderover', 'none']
svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
'background', 'balance', 'bgcolor', 'bgproperties', 'border',
'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
'width', 'wrap', 'xml:lang']
mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
'xlink:type', 'xmlns', 'xmlns:xlink']
svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
'arabic-form', 'ascent', 'attributeName', 'attributeType',
'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
'fill-opacity', 'fill-rule', 'font-family', 'font-size',
'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
'opacity', 'orient', 'origin', 'overline-position',
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
'transform', 'type', 'u1', 'u2', 'underline-position',
'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
'y1', 'y2', 'zoomAndPan']
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
'xlink:href', 'xml:base']
svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
'mask', 'stroke']
svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
'set', 'use']
acceptable_css_properties = ['azimuth', 'background-color',
'border-bottom-color', 'border-collapse', 'border-color',
'border-left-color', 'border-right-color', 'border-top-color', 'clear',
'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
'white-space', 'width']
acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
'transparent', 'underline', 'white', 'yellow']
acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
'stroke-opacity']
acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
'ssh', 'sftp', 'rtsp', 'afs']
# subclasses may define their own versions of these constants
allowed_elements = acceptable_elements + mathml_elements + svg_elements
allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
allowed_css_properties = acceptable_css_properties
allowed_css_keywords = acceptable_css_keywords
allowed_svg_properties = acceptable_svg_properties
allowed_protocols = acceptable_protocols
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
# stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
# attributes are parsed, and a restricted set, # specified by
# ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
# attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
# in ALLOWED_PROTOCOLS are allowed.
#
# sanitize_html('<script> do_nasty_stuff() </script>')
# => &lt;script> do_nasty_stuff() &lt;/script>
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
# => <a>Click here for $100</a>
def sanitize_token(self, token):
# accommodate filters which use token_type differently
token_type = token["type"]
if token_type in list(tokenTypes.keys()):
token_type = tokenTypes[token_type]
if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
tokenTypes["EmptyTag"]):
if token["name"] in self.allowed_elements:
return self.allowed_token(token, token_type)
else:
return self.disallowed_token(token, token_type)
elif token_type == tokenTypes["Comment"]:
pass
else:
return token
def allowed_token(self, token, token_type):
if "data" in token:
attrs = dict([(name, val) for name, val in
token["data"][::-1]
if name in self.allowed_attributes])
for attr in self.attr_val_is_uri:
if attr not in attrs:
continue
val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
unescape(attrs[attr])).lower()
# remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace("\ufffd", "")
if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
(val_unescaped.split(':')[0] not in
self.allowed_protocols)):
del attrs[attr]
for attr in self.svg_attr_val_allows_ref:
if attr in attrs:
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
' ',
unescape(attrs[attr]))
if (token["name"] in self.svg_allow_local_href and
'xlink:href' in attrs and re.search('^\s*[^#\s].*',
attrs['xlink:href'])):
del attrs['xlink:href']
if 'style' in attrs:
attrs['style'] = self.sanitize_css(attrs['style'])
token["data"] = [[name, val] for name, val in list(attrs.items())]
return token
def disallowed_token(self, token, token_type):
if token_type == tokenTypes["EndTag"]:
token["data"] = "</%s>" % token["name"]
elif token["data"]:
attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
token["data"] = "<%s%s>" % (token["name"], attrs)
else:
token["data"] = "<%s>" % token["name"]
if token.get("selfClosing"):
token["data"] = token["data"][:-1] + "/>"
if token["type"] in list(tokenTypes.keys()):
token["type"] = "Characters"
else:
token["type"] = tokenTypes["Characters"]
del token["name"]
return token
def sanitize_css(self, style):
# disallow urls
style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
# gauntlet
if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
return ''
if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
return ''
clean = []
for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
if not value:
continue
if prop.lower() in self.allowed_css_properties:
clean.append(prop + ': ' + value + ';')
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
'padding']:
for keyword in value.split():
if not keyword in self.acceptable_css_keywords and \
not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
break
else:
clean.append(prop + ': ' + value + ';')
elif prop.lower() in self.allowed_svg_properties:
clean.append(prop + ': ' + value + ';')
return ' '.join(clean)
class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
lowercaseElementName=False, lowercaseAttrName=False, parser=None):
# Change case matching defaults as we only output lowercase html anyway
# This solution doesn't seem ideal...
HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
lowercaseElementName, lowercaseAttrName, parser=parser)
def __iter__(self):
for token in HTMLTokenizer.__iter__(self):
token = self.sanitize_token(token)
if token:
yield token
@@ -0,0 +1,409 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
import re
from codecs import register_error, xmlcharrefreplace_errors
from .constants import voidElements, booleanAttributes, spaceCharacters
from .constants import rcdataElements, entities, xmlEntities
from . import treewalkers, _utils
from xml.sax.saxutils import escape
_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`"
_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]")
_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars +
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
"\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
"\u3000]")
_encode_entity_map = {}
_is_ucs4 = len("\U0010FFFF") == 1
for k, v in list(entities.items()):
# skip multi-character entities
if ((_is_ucs4 and len(v) > 1) or
(not _is_ucs4 and len(v) > 2)):
continue
if v != "&":
if len(v) == 2:
v = _utils.surrogatePairToCodepoint(v)
else:
v = ord(v)
if v not in _encode_entity_map or k.islower():
# prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
_encode_entity_map[v] = k
def htmlentityreplace_errors(exc):
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
res = []
codepoints = []
skip = False
for i, c in enumerate(exc.object[exc.start:exc.end]):
if skip:
skip = False
continue
index = i + exc.start
if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
skip = True
else:
codepoint = ord(c)
codepoints.append(codepoint)
for cp in codepoints:
e = _encode_entity_map.get(cp)
if e:
res.append("&")
res.append(e)
if not e.endswith(";"):
res.append(";")
else:
res.append("&#x%s;" % (hex(cp)[2:]))
return ("".join(res), exc.end)
else:
return xmlcharrefreplace_errors(exc)
register_error("htmlentityreplace", htmlentityreplace_errors)
def serialize(input, tree="etree", encoding=None, **serializer_opts):
"""Serializes the input token stream using the specified treewalker
:arg input: the token stream to serialize
:arg tree: the treewalker to use
:arg encoding: the encoding to use
:arg serializer_opts: any options to pass to the
:py:class:`html5lib.serializer.HTMLSerializer` that gets created
:returns: the tree serialized as a string
Example:
>>> from html5lib.html5parser import parse
>>> from html5lib.serializer import serialize
>>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
>>> serialize(token_stream, omit_optional_tags=False)
'<html><head></head><body><p>Hi!</p></body></html>'
"""
# XXX: Should we cache this?
walker = treewalkers.getTreeWalker(tree)
s = HTMLSerializer(**serializer_opts)
return s.render(walker(input), encoding)
class HTMLSerializer(object):
# attribute quoting options
quote_attr_values = "legacy" # be secure by default
quote_char = '"'
use_best_quote_char = True
# tag syntax options
omit_optional_tags = True
minimize_boolean_attributes = True
use_trailing_solidus = False
space_before_trailing_solidus = True
# escaping options
escape_lt_in_attrs = False
escape_rcdata = False
resolve_entities = True
# miscellaneous options
alphabetical_attributes = False
inject_meta_charset = True
strip_whitespace = False
sanitize = False
options = ("quote_attr_values", "quote_char", "use_best_quote_char",
"omit_optional_tags", "minimize_boolean_attributes",
"use_trailing_solidus", "space_before_trailing_solidus",
"escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
"alphabetical_attributes", "inject_meta_charset",
"strip_whitespace", "sanitize")
def __init__(self, **kwargs):
"""Initialize HTMLSerializer
:arg inject_meta_charset: Whether or not to inject the meta charset.
Defaults to ``True``.
:arg quote_attr_values: Whether to quote attribute values that don't
require quoting per legacy browser behavior (``"legacy"``), when
required by the standard (``"spec"``), or always (``"always"``).
Defaults to ``"legacy"``.
:arg quote_char: Use given quote character for attribute quoting.
Defaults to ``"`` which will use double quotes unless attribute
value contains a double quote, in which case single quotes are
used.
:arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
values.
Defaults to ``False``.
:arg escape_rcdata: Whether to escape characters that need to be
escaped within normal elements within rcdata elements such as
style.
Defaults to ``False``.
:arg resolve_entities: Whether to resolve named character entities that
appear in the source tree. The XML predefined entities &lt; &gt;
&amp; &quot; &apos; are unaffected by this setting.
Defaults to ``True``.
:arg strip_whitespace: Whether to remove semantically meaningless
whitespace. (This compresses all whitespace to a single space
except within ``pre``.)
Defaults to ``False``.
:arg minimize_boolean_attributes: Shortens boolean attributes to give
just the attribute value, for example::
<input disabled="disabled">
becomes::
<input disabled>
Defaults to ``True``.
:arg use_trailing_solidus: Includes a close-tag slash at the end of the
start tag of void elements (empty elements whose end tag is
forbidden). E.g. ``<hr/>``.
Defaults to ``False``.
:arg space_before_trailing_solidus: Places a space immediately before
the closing slash in a tag using a trailing solidus. E.g.
``<hr />``. Requires ``use_trailing_solidus=True``.
Defaults to ``True``.
:arg sanitize: Strip all unsafe or unknown constructs from output.
See :py:class:`html5lib.filters.sanitizer.Filter`.
Defaults to ``False``.
:arg omit_optional_tags: Omit start/end tags that are optional.
Defaults to ``True``.
:arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
Defaults to ``False``.
"""
unexpected_args = frozenset(kwargs) - frozenset(self.options)
if len(unexpected_args) > 0:
raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args)))
if 'quote_char' in kwargs:
self.use_best_quote_char = False
for attr in self.options:
setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
self.errors = []
self.strict = False
def encode(self, string):
assert(isinstance(string, text_type))
if self.encoding:
return string.encode(self.encoding, "htmlentityreplace")
else:
return string
def encodeStrict(self, string):
assert(isinstance(string, text_type))
if self.encoding:
return string.encode(self.encoding, "strict")
else:
return string
def serialize(self, treewalker, encoding=None):
# pylint:disable=too-many-nested-blocks
self.encoding = encoding
in_cdata = False
self.errors = []
if encoding and self.inject_meta_charset:
from .filters.inject_meta_charset import Filter
treewalker = Filter(treewalker, encoding)
# Alphabetical attributes is here under the assumption that none of
# the later filters add or change order of attributes; it needs to be
# before the sanitizer so escaped elements come out correctly
if self.alphabetical_attributes:
from .filters.alphabeticalattributes import Filter
treewalker = Filter(treewalker)
# WhitespaceFilter should be used before OptionalTagFilter
# for maximum efficiently of this latter filter
if self.strip_whitespace:
from .filters.whitespace import Filter
treewalker = Filter(treewalker)
if self.sanitize:
from .filters.sanitizer import Filter
treewalker = Filter(treewalker)
if self.omit_optional_tags:
from .filters.optionaltags import Filter
treewalker = Filter(treewalker)
for token in treewalker:
type = token["type"]
if type == "Doctype":
doctype = "<!DOCTYPE %s" % token["name"]
if token["publicId"]:
doctype += ' PUBLIC "%s"' % token["publicId"]
elif token["systemId"]:
doctype += " SYSTEM"
if token["systemId"]:
if token["systemId"].find('"') >= 0:
if token["systemId"].find("'") >= 0:
self.serializeError("System identifer contains both single and double quote characters")
quote_char = "'"
else:
quote_char = '"'
doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
doctype += ">"
yield self.encodeStrict(doctype)
elif type in ("Characters", "SpaceCharacters"):
if type == "SpaceCharacters" or in_cdata:
if in_cdata and token["data"].find("</") >= 0:
self.serializeError("Unexpected </ in CDATA")
yield self.encode(token["data"])
else:
yield self.encode(escape(token["data"]))
elif type in ("StartTag", "EmptyTag"):
name = token["name"]
yield self.encodeStrict("<%s" % name)
if name in rcdataElements and not self.escape_rcdata:
in_cdata = True
elif in_cdata:
self.serializeError("Unexpected child element of a CDATA element")
for (_, attr_name), attr_value in token["data"].items():
# TODO: Add namespace support here
k = attr_name
v = attr_value
yield self.encodeStrict(' ')
yield self.encodeStrict(k)
if not self.minimize_boolean_attributes or \
(k not in booleanAttributes.get(name, tuple()) and
k not in booleanAttributes.get("", tuple())):
yield self.encodeStrict("=")
if self.quote_attr_values == "always" or len(v) == 0:
quote_attr = True
elif self.quote_attr_values == "spec":
quote_attr = _quoteAttributeSpec.search(v) is not None
elif self.quote_attr_values == "legacy":
quote_attr = _quoteAttributeLegacy.search(v) is not None
else:
raise ValueError("quote_attr_values must be one of: "
"'always', 'spec', or 'legacy'")
v = v.replace("&", "&amp;")
if self.escape_lt_in_attrs:
v = v.replace("<", "&lt;")
if quote_attr:
quote_char = self.quote_char
if self.use_best_quote_char:
if "'" in v and '"' not in v:
quote_char = '"'
elif '"' in v and "'" not in v:
quote_char = "'"
if quote_char == "'":
v = v.replace("'", "&#39;")
else:
v = v.replace('"', "&quot;")
yield self.encodeStrict(quote_char)
yield self.encode(v)
yield self.encodeStrict(quote_char)
else:
yield self.encode(v)
if name in voidElements and self.use_trailing_solidus:
if self.space_before_trailing_solidus:
yield self.encodeStrict(" /")
else:
yield self.encodeStrict("/")
yield self.encode(">")
elif type == "EndTag":
name = token["name"]
if name in rcdataElements:
in_cdata = False
elif in_cdata:
self.serializeError("Unexpected child element of a CDATA element")
yield self.encodeStrict("</%s>" % name)
elif type == "Comment":
data = token["data"]
if data.find("--") >= 0:
self.serializeError("Comment contains --")
yield self.encodeStrict("<!--%s-->" % token["data"])
elif type == "Entity":
name = token["name"]
key = name + ";"
if key not in entities:
self.serializeError("Entity %s not recognized" % name)
if self.resolve_entities and key not in xmlEntities:
data = entities[key]
else:
data = "&%s;" % name
yield self.encodeStrict(data)
else:
self.serializeError(token["data"])
def render(self, treewalker, encoding=None):
"""Serializes the stream from the treewalker into a string
:arg treewalker: the treewalker to serialize
:arg encoding: the string encoding to use
:returns: the serialized tree
Example:
>>> from html5lib import parse, getTreeWalker
>>> from html5lib.serializer import HTMLSerializer
>>> token_stream = parse('<html><body>Hi!</body></html>')
>>> walker = getTreeWalker('etree')
>>> serializer = HTMLSerializer(omit_optional_tags=False)
>>> serializer.render(walker(token_stream))
'<html><head></head><body>Hi!</body></html>'
"""
if encoding:
return b"".join(list(self.serialize(treewalker, encoding)))
else:
return "".join(list(self.serialize(treewalker)))
def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
# XXX The idea is to make data mandatory.
self.errors.append(data)
if self.strict:
raise SerializeError
class SerializeError(Exception):
"""Error in serialized tree"""
pass
@@ -1,16 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
from .. import treewalkers
from .htmlserializer import HTMLSerializer
def serialize(input, tree="etree", format="html", encoding=None,
**serializer_opts):
# XXX: Should we cache this?
walker = treewalkers.getTreeWalker(tree)
if format == "html":
s = HTMLSerializer(**serializer_opts)
else:
raise ValueError("type must be html")
return s.render(walker(input), encoding)

Some files were not shown because too many files have changed in this diff Show More