Compare commits

...

471 Commits

Author SHA1 Message Date
panni 1b96dbae3d remove dev 2017-07-01 03:47:32 +02:00
panni 244e183a2b remove traceback info 2017-07-01 03:46:38 +02:00
panni 5cb00a0532 clarify error and change exception to error on legendastv malformed RAR archive 2017-07-01 03:38:49 +02:00
panni 09ce46f46a Release 2.0.24.1558 2017-07-01 03:35:32 +02:00
panni 881a23ec7f don't discard provider on bad rar file 2017-07-01 03:29:45 +02:00
panni d53da82ddf back to dev 2017-07-01 02:32:56 +02:00
panni 177d95128f release 2.0.24.1555 2017-07-01 02:27:41 +02:00
panni 867a162fcf add plex fps info always; fixes rare microdvd error on opensubtitles 2017-07-01 02:09:08 +02:00
panni fe0291ef55 Merge branch 'master' into develop-2.0 2017-06-30 23:34:03 +02:00
panni 1a21ab513d update changelog 2017-06-30 22:52:36 +02:00
panni 1a275e9501 update changelog 2017-06-30 22:37:08 +02:00
panni 084284d1ee back to dev 2017-06-30 14:38:08 +02:00
panni 13b087e44b release 2.0.24.1549 2017-06-30 14:18:46 +02:00
pannal 22b318f05e Update README.md 2017-06-30 14:15:43 +02:00
pannal a575e40859 Update README.md 2017-06-30 14:15:15 +02:00
panni ef044e4937 set dev to 0 2017-06-30 14:13:48 +02:00
panni 1e1f8e7ca0 update readme 2017-06-30 14:13:35 +02:00
panni 814395b58e Merge branch 'develop-2.0'
# Conflicts:
#	Contents/Info.plist
#	Contents/Libraries/Shared/subliminal_patch/patch_provider_pool.py
#	README.md
2017-06-30 14:12:34 +02:00
panni 5ac5c3c595 release 2.0.24.1508 2017-06-30 14:11:11 +02:00
panni 64a8daab76 legendastv: and even more fixes by @fernandog 2017-06-24 01:10:20 +02:00
panni 3fb6017976 legendastv: more fixes by @fernandog 2017-06-24 00:57:13 +02:00
panni 9379e84ba2 disable "Scheduler: Overwrite manually selected subtitles when better found" by default 2017-06-23 23:56:04 +02:00
panni 8eaa468b1c legendastv: implement patches by fernandog (86fa8f0, 4522722, 498db9a, 156d548) 2017-06-23 22:24:52 +02:00
ukdtom a1c3e64bf3 Removing old Wiki Images 2017-06-13 00:20:24 +02:00
ukdtom e90e1bd0c5 Mod of one pic 2017-06-12 00:09:19 +02:00
ukdtom 30cec00f0e Yet a modification of pics 2017-06-11 23:57:42 +02:00
ukdtom 2a0c1a13ad Modified a couple of pics 2017-06-11 23:41:15 +02:00
ukdtom 072aa0883b One pic added, and one altered 2017-06-11 23:20:50 +02:00
ukdtom 2e22c585d0 Changed a pic 2017-06-10 21:23:08 +02:00
ukdtom 3240b19649 Modified Gui Item 2017-06-10 21:15:05 +02:00
ukdtom 2f4b47e456 Adv pics added as well as changed 2017-06-10 20:56:29 +02:00
ukdtom f735c9128c Added a few more pics 2017-06-10 20:38:35 +02:00
ukdtom 56e8cb0f44 Rename old Wiki Images to make sure, thay are not used anymore 2017-06-10 20:25:21 +02:00
panni d5253f130c 2.0.24.1503 RC11 pre-final 2017-06-06 16:56:39 +02:00
panni 261c6f3c7e fix inexistant plex item error 2017-06-06 16:45:33 +02:00
panni 2ad59e6592 update dev before release 2017-06-05 03:48:09 +02:00
panni f5cf977788 addic7ed: revert WEB-DL = WEBRip 2017-06-04 22:43:51 +02:00
panni d392707ecf re-add parent directory hinting to guessit, as the bug's fixed 2017-06-04 15:35:44 +02:00
panni cbc57fbc0b update dev 2017-06-04 02:31:06 +02:00
panni b32a2ded77 searchRecentlyAddedMissing: sleep for 1 second if no subtitles were downloaded; else 5 seconds; pep8 2017-06-04 02:23:09 +02:00
panni e7ee9ae747 addic7ed: treat WEBRip as WEB-DL 2017-06-04 01:34:27 +02:00
panni 97acfb6845 plex.py: section: add type property 2017-06-04 01:05:59 +02:00
panni 709197a957 plex.py: section: add type property 2017-06-04 01:05:46 +02:00
panni 7d003cdc3b 2.0.24.1492 RC11 pre-release 2017-06-04 00:19:43 +02:00
panni c0266a5b84 update guessit to 9e5fc600901c7d0a5bab3fd83b4e065d656b3d5b 2017-06-04 00:12:44 +02:00
panni 5b61c71cdd parse_video: use dont_use_actual_file if no_refining was provided 2017-06-04 00:03:45 +02:00
panni 3423b42a8a take shortcut when only re-saving subtitles with mods 2017-06-03 23:41:00 +02:00
panni 942124ac67 bump dev 2017-06-03 23:31:46 +02:00
panni 58d4534176 submod: OCR: update english dictionary 2017-06-03 23:22:14 +02:00
panni 93517582d1 submod: common: be smarter about spaces in numbers 2017-06-03 23:18:54 +02:00
panni 75c60c2b60 submod: HI: don't treat 9:00 o'clock as hearing impaired before colon 2017-06-03 23:05:27 +02:00
panni 1fbd9cfd50 ditch mixins; recentlyaddedmissing: fix logging 2017-06-03 14:51:47 +02:00
panni 2e6843fd78 don't hint parent folders to guessit anymore 2017-06-03 14:49:10 +02:00
panni c073de4acd skip duplicate subs in manual listing 2017-06-03 04:33:43 +02:00
panni dcd85c85d0 explicitly set "found_none" 2017-06-03 04:13:32 +02:00
panni 6e5bfd162a remove individual UpdateLocalMedia item from menu (for now) 2017-06-03 03:31:04 +02:00
panni b579fa7804 update DEV 2017-06-03 02:01:51 +02:00
panni f356313e67 remove metadata updated signal for now 2017-06-03 01:50:53 +02:00
panni 4055debc6f catch get_item fail genericly 2017-06-03 01:38:06 +02:00
panni fcc907c507 update subtitle storage maintenance task; remove missing languages; remove missing parts 2017-06-03 01:34:12 +02:00
panni 8a90a51182 back to dev; update dev version 2017-06-03 00:21:08 +02:00
panni 4c42b3090a remove old whack_missing_parts; correctly handle none-downloaded-subs on refresh 2017-06-03 00:17:48 +02:00
panni 626d519c81 improve refining once again 2017-06-02 23:55:12 +02:00
panni dae3672a9a set guessit to single values 2017-06-02 23:18:46 +02:00
panni 640bf5515f update rebulk to 0.9.0 2017-06-02 23:15:30 +02:00
panni 476fd09397 findrecentlymissing: fix logging 2017-06-02 23:06:31 +02:00
panni bfbf12914f update guessit to ac2de2de05e893d9c45535afa0b2dfeb7629f152 2017-06-02 23:06:16 +02:00
panni 91eae536ae fix findrecentlymissing: don't re-download every time :) 2017-06-02 22:49:08 +02:00
panni 404becadba move note 2017-06-02 21:53:46 +02:00
panni d71d33d899 add info to readme about ftfy 2017-06-02 21:52:46 +02:00
panni 65e72da01e always store subtitle info, even if the agent is disabled 2017-06-02 17:07:28 +02:00
panni 8556bebb1f rewrite SearchRecentlyAddedMissing 2017-06-02 15:42:23 +02:00
panni dc5c353b8d 2.0.23.1464 2017-06-02 15:35:50 +02:00
panni 9f7f877cf2 only use fix_text when storing the subtitle 2017-06-02 15:26:40 +02:00
panni 9a827b783a pep 2017-06-02 13:14:26 +02:00
panni d2641f045e lower menu history timeout to 6 hours; add menu history maintenance task 2017-06-02 13:14:06 +02:00
panni e4ef6dc604 add full logging of internal Dict storage 2017-06-02 13:06:42 +02:00
panni c8cc9bb188 throttle same-event tracking to once per half an hour 2017-06-02 13:01:04 +02:00
panni a21dd3d0c0 add missing debug check 2017-06-01 17:16:42 +02:00
panni b16d6658f8 2.0.23.1456 RC10 2017-06-01 17:08:55 +02:00
panni 01aab808c3 try legacy subtitle storage migration; support ietf in storage 2017-06-01 17:05:05 +02:00
panni eb1ae54739 submod: HI: handle multiline brackets 2017-06-01 16:37:53 +02:00
panni 5483d02a6f also patch the corresponding subliminal provider if possible 2017-06-01 16:12:11 +02:00
panni 9d434eb1e9 simplify 2017-06-01 16:06:33 +02:00
panni 43269befd6 automatic registration of providers 2017-06-01 16:03:46 +02:00
panni d8d2b06c6c add environ.SZ_NO_REFRESH special environment handling 2017-06-01 15:35:27 +02:00
panni 1f9a2f6554 correctly get plex title if original_title is not available 2017-05-30 13:07:08 +02:00
panni 940162a8b5 fix None str replace 2017-05-29 19:17:35 +02:00
panni 3c2b39453a Merge remote-tracking branch 'origin/develop-2.0' into develop-2.0 2017-05-29 18:22:08 +02:00
panni 459cd92017 replace dash when re-refining 2017-05-29 18:22:01 +02:00
pannal a5aa0a773d Update README.md 2017-05-29 02:58:35 +02:00
panni d1b569fbbe update dev version 2017-05-29 02:52:22 +02:00
panni 6d609f628b make HI_before_colon more harsh 2017-05-29 02:49:57 +02:00
panni 8d5eaf0f8d we know we're utf-8, write the file without shenanigans 2017-05-29 02:47:24 +02:00
panni de93b439ca add error log if that happens 2017-05-29 01:40:04 +02:00
panni d11d9ef03c try utf-8 decoding fallback on old stored subtitle. otherwise ditch it 2017-05-29 01:37:30 +02:00
panni f1fc8e1d82 switch from info to debug on already guessed encoding 2017-05-29 01:30:25 +02:00
panni 9a44c37cab backwards compatibility courtesy to old RC users; set utf-8 before storage 2017-05-29 01:18:26 +02:00
panni 25a9e5efdf set encoding to utf-8 in edge case of re-parsed subtitle 2017-05-29 01:01:57 +02:00
panni 9352193986 simplify encoding; fix storage 2017-05-29 00:53:53 +02:00
panni 61436ca278 again: explicitly set encoding to utf-8 2017-05-29 00:45:34 +02:00
panni 17b6fcc48a explicitly set encoding to utf-8 2017-05-29 00:38:07 +02:00
panni 9f9c5cf27a bump dev 2017-05-28 05:38:02 +02:00
panni 8fd38fbb40 save as utf-8 2017-05-28 05:35:40 +02:00
panni ac2c9fff38 remove force UTF-8; always assume content to be UTF-8 2017-05-28 05:29:14 +02:00
panni 8dc4877379 add correct subtitle language code to notify_executable 2017-05-28 05:21:43 +02:00
panni d22a3a3953 remove most likely obsolete encode decode dance 2017-05-28 05:15:05 +02:00
panni 182538d2a7 update dev version 2017-05-28 05:09:23 +02:00
panni 997c0bc297 add encoding to subtitle repr; ditch stored subtitles without encoding. 2017-05-28 05:05:52 +02:00
panni f9099cd680 don't only store language.alpha2, store its representation; add sr-script styles to addicted; 2017-05-28 05:04:47 +02:00
panni e8b47c33b6 add our own OS language converter; add sr-latn and sr-cyrl to supported languages and remove them again, because it might still be bad 2017-05-28 04:21:50 +02:00
panni 6618fdd86b add wcwidth 2017-05-28 03:48:30 +02:00
panni 0b5ef5e257 don't trust self.encoding, never 2017-05-28 03:41:36 +02:00
panni 4f36e6119c use ftfy 2017-05-28 03:40:59 +02:00
panni 24b58d9615 add ftfy 4.4.3 2017-05-28 03:36:09 +02:00
panni 4621c21907 actually try re-encoding content before passing the encoding test; may be stupid 2017-05-28 03:27:48 +02:00
panni a53f6005b3 bump dev 2017-05-28 02:43:18 +02:00
panni 8bad1b2dfc remove obsolete debug print 2017-05-28 01:08:41 +02:00
panni 856ec02083 explicitly import subliminal_patch as subliminal 2017-05-28 01:06:51 +02:00
panni 45c63bdac7 more debug 2017-05-28 01:03:11 +02:00
panni a5202b8eb8 add debug log 2017-05-28 01:01:14 +02:00
panni 766e47a757 pep8 2017-05-28 00:59:44 +02:00
panni 0026ef7db7 import subliminal_patch.save_subtitles explicitly 2017-05-28 00:59:04 +02:00
panni 368c7927ff bump dev version 2017-05-28 00:38:19 +02:00
panni 1dd1ec3a0d adapt sr-latn and sr-cyrl correctly, including translation strings; ditch parse_language altogether 2017-05-28 00:37:51 +02:00
panni 6ed5c83b05 fix non-scripted srp 2017-05-28 00:20:19 +02:00
panni 3efd1e56c4 rename lang to lang_code to avoid shadowing 2017-05-28 00:13:38 +02:00
panni 1e18c9e309 findbetter: don't fail on inexistant metadata 2017-05-27 23:52:47 +02:00
panni c79048027c kill the provider_manager 2017-05-27 23:39:24 +02:00
ukdtom b2c981fca1 Changed to ShareX 2017-05-27 23:24:18 +02:00
ukdtom 88af4d608d more pics for the Wiki 2017-05-27 22:35:55 +02:00
panni 2008b35e8e podnapisi debug 2017-05-27 16:19:02 +02:00
panni a082714ad5 add Cyrl to podnapisis actually 2017-05-27 16:10:49 +02:00
panni 2f28fde4e6 podnapisi sr-cyrl special case 2017-05-27 15:59:11 +02:00
panni e3004b9db7 fix debug msg 2017-05-27 14:48:38 +02:00
panni b192f4f80d podnapisi: add movie/episode hash 2017-05-27 05:13:19 +02:00
panni 809331b9fd bump DEV 2017-05-27 05:06:06 +02:00
panni 3828c8bf89 language encoding: set windows first 2017-05-27 05:03:38 +02:00
panni 4731750684 serbian: set windows first 2017-05-27 04:54:59 +02:00
panni 54f2308944 update serbian encoding order 2017-05-27 04:53:52 +02:00
panni afdd44323e fix cleanup for sr-Xxxx 2017-05-27 04:42:37 +02:00
panni 9b88d5814c podnapisi: fix pt-BR, sl-Latn, sl-Cyrl 2017-05-27 04:31:25 +02:00
panni 02a924e97d disable dumbdbm again 2017-05-27 03:12:26 +02:00
panni e167439ed0 add lang_list to config debug 2017-05-27 03:09:50 +02:00
panni 9f26d5a401 bump version 2017-05-27 02:57:31 +02:00
panni d7f72470ec add anydbm library to circumvent restricted mode 2017-05-27 02:48:35 +02:00
panni abc45b1a2f add dbhash library to circumvent restricted mode 2017-05-27 02:40:56 +02:00
panni 5bc530deb2 try re-adding dumbdbm 2017-05-27 02:33:30 +02:00
panni 6a206b0c5e add dumbdbm as separate library 2017-05-27 02:32:19 +02:00
panni 2485639e11 remove dumbdbm from supported DBMs 2017-05-27 02:14:33 +02:00
panni d056c14b91 add serbian cyrillic and serbian latin distinction 2017-05-27 01:42:58 +02:00
panni 834a8dd0a8 add serbian cyrillic and serbian latin distinction 2017-05-27 01:42:22 +02:00
panni ea5e4d48d3 aaaaaaaaaaand added more encodings 2017-05-27 00:33:42 +02:00
panni 2b08a8958a update rarfile.py 2017-05-26 04:38:34 +02:00
panni 759b09c8d6 add more debug info; log failed download to menu 2017-05-26 04:25:36 +02:00
panni 0266afe9ab better non-DBM fallback 2017-05-26 03:55:43 +02:00
panni 109c5e0703 simplify config.init_cache 2017-05-26 03:52:31 +02:00
panni 40a79c2cc4 DBM detection fixes 2017-05-26 03:41:14 +02:00
panni debc425f99 early break 2017-05-26 03:28:21 +02:00
panni 602a1cc8a3 add return 2017-05-26 03:26:39 +02:00
panni d080eae809 update version 2017-05-26 03:22:57 +02:00
panni 631b5033fe fix 2017-05-26 03:22:33 +02:00
panni af8ea6934b add config.dbm_supported 2017-05-26 03:20:19 +02:00
panni 19740ae6c2 try determining DBM availability 2017-05-26 03:15:35 +02:00
panni 7b78b71487 try determining DBM availability 2017-05-26 03:14:49 +02:00
panni 86a43a79c8 ninja __import__ 2017-05-26 03:08:06 +02:00
panni 6035a1bde4 bump DEV version 2017-05-26 00:16:01 +02:00
panni a32e952323 more VTT support stuff 2017-05-26 00:15:09 +02:00
panni d55b1c67df add VTT to subtitle_exts 2017-05-26 00:04:48 +02:00
panni 103f7bc18b bump DEV version 2017-05-26 00:02:06 +02:00
panni e857c223d4 add VTT format; fixes #292 2017-05-25 23:56:33 +02:00
ukdtom ea07997522 new image, and a few changed. 2017-05-25 23:09:07 +02:00
panni d492c73f94 add fixme 2017-05-25 00:01:27 +02:00
panni 3b836d29a2 item details menu: add shortcut to list subtitles for item if no subtitle in storage 2017-05-24 23:59:43 +02:00
panni 9248916527 findbetter: increase series_cutoff by 2 (resolution) 2017-05-24 22:03:27 +02:00
panni 2006ebb244 2.0.20.1364 RC9 2017-05-24 21:47:51 +02:00
panni 58c852cdba submod: OCR update eng data 2017-05-24 21:41:51 +02:00
panni 9e77a8e304 update guessit to d96859d056864b8956cbeb8c8f5bb6875d270e39 2017-05-24 21:40:12 +02:00
panni e9817f1e0d bump version 2017-05-24 18:03:53 +02:00
panni 123dde7b8f don't verify hashes of specials 2017-05-24 18:02:23 +02:00
panni c1b84eabdb improve specials support (opensubtitles), mostly for manual listing 2017-05-24 16:24:23 +02:00
panni c7ececde77 add doc 2017-05-23 23:06:12 +02:00
panni 6f305d636e make legandastv subtitle picklable for availablesubsforitem 2017-05-23 22:37:00 +02:00
panni d25990895c something's wrong with the menu history key here; add error debug 2017-05-23 22:12:22 +02:00
panni d406ced759 bump version 2017-05-23 18:08:28 +02:00
panni b858b56120 add hearing_impaired_verifiable per provider/subtitle and only bail out on force-non-hi if necessary; #289 2017-05-23 18:07:56 +02:00
panni c94fe81dbf bump dev version 2017-05-23 17:54:49 +02:00
panni a67bbebb84 Merge remote-tracking branch 'origin/develop-2.0' into develop-2.0 2017-05-23 13:00:38 +02:00
panni cf577c81e1 submod: OCR fixes: compile new dictionaries 2017-05-23 13:00:27 +02:00
panni ad236be02c submod: OCR fixes: and more. 2017-05-23 12:59:37 +02:00
panni 3412e379d6 submod: better unopened/unclosed font tag handling 2017-05-23 12:49:46 +02:00
panni 95f240ab07 submod: HI: HI_before_colon broke font style tags 2017-05-23 12:17:54 +02:00
panni 0c8ae3f45b submod: update eng OCR fix data 2017-05-23 11:58:08 +02:00
pannal fe87944049 Update README.md 2017-05-22 05:48:50 +02:00
pannal 2cbe290916 Update README.md 2017-05-22 05:48:19 +02:00
pannal a85321a1a9 Update README.md 2017-05-22 05:47:59 +02:00
pannal c55071d157 Update README.md 2017-05-22 05:41:24 +02:00
pannal 86eac774e7 Update README.md 2017-05-22 05:39:28 +02:00
pannal dac6df4282 Update README.md 2017-05-22 05:30:34 +02:00
pannal d7918b1714 Update README.md 2017-05-22 05:16:07 +02:00
pannal c4de84a23a Update README.md 2017-05-22 05:15:22 +02:00
panni c147c29756 add wiki notice to notify_executable pref 2017-05-22 02:33:18 +02:00
panni 5a4a50bc9d add note about enforce_encoding 2017-05-22 02:30:49 +02:00
panni 55ea4009c9 rename exotic_ext prefs to reflect its current function 2017-05-22 02:28:59 +02:00
panni 536fd7dfe4 bump dev version 2017-05-22 02:13:12 +02:00
panni a1f6568b84 only use the first video stream #270 2017-05-22 02:11:35 +02:00
panni 6a9112f03c add more known info about the media file/streams; resolves #270 2017-05-22 02:10:26 +02:00
panni 89b4305ccb don't query plex item twice in case of movies 2017-05-22 01:26:26 +02:00
ukdtom 8643e6a055 New pics for Wiki 2017-05-21 21:27:28 +02:00
panni e2756e85b7 2.0.19.1337 RC8 2017-05-21 15:52:37 +02:00
panni 0f7bc36e86 add fixme 2017-05-21 15:50:12 +02:00
panni 5e20032976 fix findbetter 2017-05-21 15:40:37 +02:00
panni c7dbac05a9 update guessit to 8d56c9f 2017-05-21 15:35:06 +02:00
panni a0a5adb807 remove info log 2017-05-21 06:19:41 +02:00
panni ac6a43f6e5 re-up recently to 2 weeks and 1000 items 2017-05-21 06:13:59 +02:00
panni 91f57da735 fix findallrecentlymissing 2017-05-21 06:13:29 +02:00
panni 488ac604f9 better debug info for findbettersubtitles 2017-05-21 04:09:33 +02:00
panni 70ab3e456f add missing info to hints and video_info 2017-05-21 04:00:36 +02:00
panni d0017d2ab8 fix 2017-05-21 03:41:44 +02:00
panni 9633abc09e ditch OMDB refiner support for now. all needed info comes from the PMS 2017-05-21 01:49:51 +02:00
panni 8f608acc71 submod: OCR update data 2017-05-20 22:31:28 +02:00
panni dbce582bdf submod: skip empty line post processors when not needed 2017-05-20 22:24:29 +02:00
panni 62f03bcf11 submod: fix not opened/closed font tags after modification 2017-05-20 16:20:27 +02:00
panni 530eb9ef66 adapt 1.4 readme 2017-05-20 05:22:39 +02:00
pannal 12509eb93a Update README.md 2017-05-20 04:48:45 +02:00
pannal 621623bdb6 Update README.md 2017-05-20 04:46:27 +02:00
panni 497a94e3a5 submod: update dictionaries from SE 2017-05-20 04:07:40 +02:00
pannal a2f5ce797d Update README.md 2017-05-20 03:33:57 +02:00
panni e17082d27e task allrecentlymissing: fix logging 2017-05-20 02:17:41 +02:00
panni 2eefb8e225 fixes; lower default recently added to 1 week 2017-05-20 01:26:38 +02:00
panni 5d9b1a1810 don't re-guess encoding when saving modified subtitle 2017-05-20 00:42:38 +02:00
panni f274e76253 submod: simplify 2017-05-20 00:35:34 +02:00
panni 3bfef7f67b submod: break mods.modify up to make it smaller 2017-05-20 00:34:09 +02:00
panni 5d6651e00e submod: HI: remove obsolete fixme 2017-05-19 23:33:40 +02:00
panni f0ed0b7c41 submod: common: move CM_double_apostrophe further up the chain 2017-05-19 23:29:49 +02:00
panni 0d4bf7b6b3 submod: common: CM_uppercase_i_in_word: support "WeII" aswell 2017-05-19 23:23:55 +02:00
panni a5c7c656e6 set get my logs link as title2 also 2017-05-19 23:15:18 +02:00
panni fb3a937c81 submod: add performance debug 2017-05-19 23:11:24 +02:00
panni e50820abd0 submod: common: fix CM_uppercase_i_in_word 2017-05-19 23:03:17 +02:00
panni 083084136c don't fall back to utf-8, we should be good here 2017-05-19 22:57:20 +02:00
panni 0188b81220 clarify 2017-05-19 22:55:09 +02:00
panni c7468dbfb5 submod: OCR add more eng data 2017-05-19 22:53:19 +02:00
panni d92ba7125e in case of microdvd, try guessing the fps from the file, else suggest the FPS from our media file. add docs 2017-05-19 22:52:05 +02:00
panni 050d5dd063 add config.enforce_encoding to debug log 2017-05-19 21:54:37 +02:00
panni a860c57bd1 when force-utf8 is enabled, also store subtitle content in utf-8 2017-05-19 21:52:19 +02:00
panni 1b0b189c16 add more encodings for western, eastern and northern europe 2017-05-19 18:51:52 +02:00
panni 7d2b3d6663 add our pysubs2 to_unicode encoder to PatchedSubtitle; add iso-8859-2 for polish; 2017-05-19 18:42:31 +02:00
panni 2899d68973 add fps to napiprojekt subtitle for when it can't be guessed from the MicroDVD format contents 2017-05-19 18:28:14 +02:00
panni 0cc8238b1a don't trigger text conversion more than once in is_valid 2017-05-19 17:55:05 +02:00
panni f277751d86 don't blerg all of the subtitle content into stdout; log the traceback for pysubs2 2017-05-19 17:51:58 +02:00
panni 74d63a9144 2.0.19.1299 RC7 2017-05-19 14:51:22 +02:00
panni 07f7b4e7fb add fixme 2017-05-19 14:42:58 +02:00
panni 92fda093f7 submod: CM_spaces_in_numbers: don't break up ellipses 2017-05-19 14:38:33 +02:00
panni 714751d2d8 submod: merge mergeable mods; skip duplicate exclusive mods early; make offset args mergeable to avoid nasty stuff like negative offset first, then positive 2017-05-19 14:29:59 +02:00
panni 2c949192b2 submod: improve processing performance by adding some shortcuts 2017-05-19 14:08:36 +02:00
panni c0e3c6a0eb submod: improve processing performance by feeding line mods already cleaned-up lines 2017-05-19 13:43:30 +02:00
panni 764484f735 submod: add fixed order to line mods 2017-05-19 03:29:05 +02:00
panni 208bd4fcb2 reset last order change 2017-05-19 03:28:44 +02:00
Tommy Mikkelsen 6b17825fa2 Merge pull request #284 from ukdtom/master
First part of new images for Wiki V2
2017-05-19 00:53:09 +02:00
ukdtom d20e0bd2c2 First part of new images for Wiki V2 2017-05-19 00:51:02 +02:00
panni ba53a5fa93 add more stuff to test.srt 2017-05-18 13:55:50 +02:00
panni 4d40da5661 submod: common: leading crocodile can also have a space in front 2017-05-18 13:49:36 +02:00
panni 4ab157e2a1 submod: re_processor: clean font style tags before processing the line 2017-05-18 13:47:36 +02:00
panni dbf64d2a2b submod: HI: make bracket detection more aggressive 2017-05-18 13:44:55 +02:00
panni 03d4ee3482 submod: HI: add HI_starting_upper_then_sentence 2017-05-18 13:17:43 +02:00
panni 959a061380 submod: set default order 2017-05-18 13:17:20 +02:00
panni f5432dfb9e submod: OCR: more eng default fixes 2017-05-18 13:16:59 +02:00
Tommy Mikkelsen 6e2f2fb9d2 Create .gitattributes
Exclude above from both zip files, tars and releases
Regular GIT Checkouts still gets everything
2017-05-18 02:10:42 +02:00
panni fb494a911d fix character ranges 2017-05-17 20:15:45 +02:00
panni bc9dec659c submod: update uppercase after dot to be less greedy 2017-05-17 20:12:20 +02:00
panni b68cc3f61e submod: use À-Ž instead of A-Z for patterns 2017-05-17 20:00:38 +02:00
panni 0db80add2c submod: common: fit non-uppercase after dot 2017-05-17 19:56:41 +02:00
panni 2a67632497 update OCR fix data 2017-05-17 19:17:04 +02:00
panni 5260b28c15 submod: HI: be less aggressive on removing text-before-colon 2017-05-17 19:14:26 +02:00
panni 4d365cba22 submod: don't fix countdown numbers 2017-05-17 19:02:42 +02:00
panni 8174a8efc3 submod fixes english: Âs='s 2017-05-17 19:01:07 +02:00
panni a5d8df35b6 more stuff for the readme 2017-05-17 18:50:06 +02:00
panni 0ad429ffaa add automation 2017-05-17 15:23:05 +02:00
panni 3108572387 move changelog for now 2017-05-17 15:17:28 +02:00
panni 98a406ff9e revert, preformatted looks better 2017-05-17 15:16:51 +02:00
panni 9257550e56 update readme for mods 2017-05-17 15:15:56 +02:00
panni ef19ed0a26 update readme for mods 2017-05-17 15:13:43 +02:00
panni 80daa8560d first version of the 2.0 readme 2017-05-17 15:06:53 +02:00
panni 797cc16a91 add cleanline processor; remove Mr->Mr. as it's valid in the UK 2017-05-17 14:26:19 +02:00
panni 771e0464d7 update OCR fixes 2017-05-17 13:51:48 +02:00
panni 715e9c0015 2.0.19.1267 RC6 2017-05-16 18:10:59 +02:00
panni d13a0c4fb3 submod: allow for more punctuation in spaced numbers; add more english OCR fixes 2017-05-16 17:55:49 +02:00
panni 2bb0517264 correctly handle partiallines 2017-05-16 17:46:56 +02:00
panni ac174673ef fix major whoopsie in item details 2017-05-16 14:22:31 +02:00
panni dacab5ece7 enzyme: fix logging; skip element without type 2017-05-16 14:22:22 +02:00
panni 69a5ef6f18 common fixes: test for leading ellipsis earlier to skip unnecessary CM_ellipsis_no_space 2017-05-16 14:15:19 +02:00
panni 47be8eef62 HI: improve all caps line matching (allow some punctuation) 2017-05-16 14:13:41 +02:00
panni fe7760e779 color mod: return the original line if color not found 2017-05-16 13:50:05 +02:00
panni 18dddaf0a1 add our own dictionaries to submod fixes 2017-05-16 13:44:23 +02:00
panni b32066e6f8 don't bother listing unexistant parts in item details menu 2017-05-16 13:37:02 +02:00
panni eca378c09e submod: fix patterns for beginlines/endlines 2017-05-16 13:34:28 +02:00
panni 2c3e4173f4 only append extension to jsonpath if necessary; bail out correctly 2017-05-16 13:00:59 +02:00
panni 488a65055b cache guessed encoding and don't re-guess every time 2017-05-15 18:47:52 +02:00
panni cb94f0c2c6 remove invalid comment 2017-05-15 18:09:31 +02:00
panni 8dc4cf8d63 subtitle history: don't fail on old dict data 2017-05-15 18:07:39 +02:00
panni 82ec5e0d5e only store subtitle info if save was successful 2017-05-15 18:02:33 +02:00
panni 91cebd2902 store encoding of subtitle in storage; store unicode version; add migration task 2017-05-15 18:00:51 +02:00
panni cecee18d8e implement new json/gzip based subtitle storage format; auto-migrate legacy data 2017-05-15 17:01:20 +02:00
panni 2b1ea2eb6f add json_tricks 3.9.0 2017-05-15 16:11:28 +02:00
panni bc67b380e5 Merge remote-tracking branch 'origin/develop-2.0' into develop-2.0 2017-05-14 02:53:36 +02:00
panni b7b784f442 clarify not found preferences.xml 2017-05-14 02:53:24 +02:00
pannal 6889effbb6 Update README.md 2017-05-14 02:44:43 +02:00
panni ae7865ecb8 2.0.18.1245 RC5 2017-05-14 02:31:25 +02:00
panni 83c9d4887b rename Auto-search to Force-find 2017-05-14 02:26:31 +02:00
panni 75da4dab70 clear up already decoded debug info 2017-05-14 02:25:14 +02:00
panni 07fccf9b52 shift_offset should be non-exclusive 2017-05-14 02:15:20 +02:00
panni 6cfafd60ef add full color range; add color submod menu 2017-05-14 02:13:12 +02:00
panni b24bd740c2 fix stupidity. add newline to subtitle line index 2017-05-14 01:36:38 +02:00
panni 6c81ee7b3a addic7ed: format also matches if release group was correct 2017-05-14 01:33:48 +02:00
panni cd00194819 add more debug 2017-05-14 01:24:19 +02:00
panni 0eda52e3b2 update readme 2017-05-13 16:47:29 +02:00
panni 56de3b5658 again 2017-05-13 15:00:37 +02:00
panni b8f31fc36f forgot version 2017-05-13 15:00:31 +02:00
panni 7354110d2f pre-release 2.0.15.1234 RC4 2017-05-13 14:59:15 +02:00
panni c08335b5a8 fail miserably when last-resort utf-8 encoding fails also 2017-05-13 14:49:43 +02:00
panni f4d9a3c65c add color mod; add to_unicode to submod 2017-05-13 06:32:40 +02:00
panni 174b73a5cb doc 2017-05-13 04:55:45 +02:00
panni 5df5123682 simplify data patterns 2017-05-13 04:32:29 +02:00
panni 1aef828fcd debug mods with repr; (um) = (?um) 2017-05-13 04:11:04 +02:00
panni 6401183eff increase searchallrecentlymissing wait to 5 seconds per request 2017-05-13 02:13:17 +02:00
panni 82757a2f0c apply correct path to env on non-windows 2017-05-13 02:05:15 +02:00
panni 736386bc31 try mitigating #27 2017-05-13 01:45:32 +02:00
panni 922bed81fa resolve #256 2017-05-13 01:34:20 +02:00
panni 708e8c5b14 also print SZ environment variables 2017-05-13 01:26:17 +02:00
panni 1e02082472 don't fail on metadata query timeout 2017-05-13 01:20:10 +02:00
panni 9599bcb70f searchallrecentlymissing: don't error on timeout; don't fail on no current mods 2017-05-13 01:17:48 +02:00
panni dad8460574 correctly handle multiple media files with multiple parts; honor physical ignore in missing subtitles 2017-05-12 18:23:53 +02:00
panni 021d12963f update provider test; add custom repr for napiprojektsubtitle 2017-05-12 16:30:24 +02:00
panni e5599650ac implement custom user agent (for OS) 2017-05-12 15:29:44 +02:00
panni 22a1eff98e backport provider download retry behaviour 2017-05-12 01:28:33 +02:00
panni fc00566469 also discard provider 2017-05-12 01:20:43 +02:00
panni 2e05eb91ca also discard provider 2017-05-12 01:18:43 +02:00
panni 7587860c12 1.4.27.980 2017-05-12 01:09:05 +02:00
panni fabb5dd003 Merge remote-tracking branch 'origin/master' 2017-05-12 01:07:12 +02:00
panni 314da8b50f only retry downloading on connection issues; increase retry-sleep to 5 seconds; #277 2017-05-12 01:06:47 +02:00
panni 031e035a50 2.0.15.1216 RC3 2017-05-08 17:56:25 +02:00
panni 02374575bc add missing thread.sleep 2017-05-08 17:54:57 +02:00
panni adef9e1014 only retry on specific RequestExceptions 2017-05-08 17:51:04 +02:00
panni 5bb3f15332 only retry on RequestException 2017-05-08 17:46:44 +02:00
panni 089e0d5d6c use WholeLineProcessor for WholeLines 2017-05-08 17:40:20 +02:00
pannal c8fbfcbc24 Update README.md 2017-05-08 16:30:02 +02:00
pannal a922961621 Update README.md 2017-05-08 16:29:42 +02:00
panni 513bc2ae8b use correct sys.modules path; add non-refreshing local subtitle search 2017-05-08 06:01:14 +02:00
panni 8a1c61ac22 2.0.15.1209 RC2 2017-05-08 05:34:32 +02:00
panni 3e1910a28b 2.0.15.1209 RC2 2017-05-08 04:07:24 +02:00
panni b5e5341436 add generic back options in sub menus 2017-05-08 03:59:53 +02:00
panni 223ef16583 add back menu items for season/episodes 2017-05-08 03:40:07 +02:00
panni 114312e1e5 rename leeway to sleep_after_request 2017-05-08 02:30:36 +02:00
panni 1a49159b64 by default don't download better subtitles for manually modified ones 2017-05-08 02:22:47 +02:00
panni d0ee9badb2 don't cleanup matching custom or embedded tag 2017-05-08 02:08:34 +02:00
panni b9116c30ed debounce crucial items in advanced menu 2017-05-08 02:03:22 +02:00
panni d7e6436d8d stagger less 2017-05-08 01:41:40 +02:00
panni c039172880 stagger thread creation on scheduled and manual (GUI) triggered tasks; react faster on requested task run 2017-05-08 01:39:34 +02:00
panni bd5da47370 adjust leeway to 0.2s 2017-05-08 01:29:17 +02:00
panni e9aabe0a5e spawn scheduled tasks in separate threads 2017-05-08 01:26:59 +02:00
panni f3f09dbb9d stagger SearchAllRecentlyAddedMissing 2017-05-08 01:26:33 +02:00
panni 3cc8a98f67 stagger FindBetter by 1 second per item 2017-05-08 01:07:28 +02:00
panni 31e923c080 reduce sudmod shift minute range from -59/60 to -15/15 2017-05-07 22:39:49 +02:00
panni 39b3b4a0c2 move update_local_media before ignore list checking 2017-05-07 22:21:24 +02:00
panni 8470daa20f more debug info when loading stored sub info; delete invalid sub info when loading; don't fail apply_default_mods on invalid sub info 2017-05-07 06:17:03 +02:00
panni e852137baf rename titles for on-deck and recently added items menu items 2017-05-07 05:32:48 +02:00
panni 753c46d9fd move PartUnknownException to helpers; add items.set_mods_for_part; add ApplyDefaultMods and ReApplyMods to advanced menu 2017-05-07 05:32:23 +02:00
panni e06ca730a2 make amount of stored recently played items dynamic 2017-05-07 05:31:02 +02:00
panni f84e84b17b allow wrong subtitle FPS when manually listing subtitles 2017-05-07 05:16:12 +02:00
panni 4f927b272b log no better subtitles found 2017-05-07 04:41:36 +02:00
panni 662e1a93a9 store last 20 played items; shift last played item accordingly if already in last played list 2017-05-07 03:40:41 +02:00
panni e25a043457 return save_successful on save_subtitles 2017-05-07 02:47:06 +02:00
panni b32f923513 add subtitle modification debug setting; also apply mods on metadata-stored subtitles 2017-05-07 02:45:12 +02:00
panni ad8898266e mod: common: fix starting space dots 2017-05-07 02:22:37 +02:00
panni 51e87bdda5 don't crash the menu when no mods are applied on the current subtitle 2017-05-06 18:07:53 +02:00
panni f88677b0f6 fix common fixes description 2017-05-06 18:04:20 +02:00
panni fc71ec0250 remove unnecessary debounces 2017-05-06 18:00:40 +02:00
panni ca6089c220 Pre-Release 2.0.12.1180 RC1 2017-05-06 17:49:58 +02:00
panni 7cc051fd90 set default movie score to lowest (60) 2017-05-06 17:43:38 +02:00
panni 5b01fda526 adapt forced_only for new providers (disable them) 2017-05-06 17:37:31 +02:00
panni 585f6b8a4d rename config.use_activities to react_to_activities and act accordingly 2017-05-06 17:29:11 +02:00
panni 81aeba0874 use added icon instead of recent icon for recently added menu 2017-05-06 17:24:05 +02:00
panni d9133e2793 add recently played menu 2017-05-06 17:22:33 +02:00
panni 9ef740ae1f remove_HI: less aggressive bracket content matching 2017-05-06 16:53:32 +02:00
panni e54fe71e93 reduce addicted default boost to 21 2017-05-06 16:46:54 +02:00
panni 9df878b8e3 add common fixes as default; remove debug print 2017-05-06 16:46:22 +02:00
panni 1a59c267c1 remove doublequote processors, doesn't seem possible 2017-05-06 16:42:07 +02:00
panni f8a07d983b fix typo resolves #274 2017-05-06 15:28:40 +02:00
panni 1f1847f246 change doublequote regexes 2017-05-06 06:48:52 +02:00
panni a32dfd6b37 add common fixes 2017-05-06 06:14:58 +02:00
panni b1cce92e04 use positive lookahead for HI all caps line detection 2017-05-06 01:35:43 +02:00
panni fdf32439c9 don't remove dash-in-front on hearing impaired; skip empty lines properly 2017-05-06 01:26:17 +02:00
panni fc2208f9e5 bump version 2017-05-05 19:32:12 +02:00
panni 1a4eb366bb add helping indicator to FPS mod; add 30fps 2017-05-05 19:31:43 +02:00
panni b89c64a2c2 add modification management menu 2017-05-05 19:19:34 +02:00
panni 68e8f6e753 don't remove HI by default 2017-05-05 19:11:43 +02:00
panni f15cc4cb3c add offset shifter submod 2017-05-05 19:10:32 +02:00
panni 903273e3ef add advanced submods; add global (non-line) submods; test implementation of ChangeFPS mod 2017-05-05 15:39:18 +02:00
panni 1c9b744d31 move subtitle modification menu to separate file 2017-05-05 14:58:19 +02:00
panni 7c0fb29886 fix init_cache whoopsie 2017-05-05 14:58:06 +02:00
panni 2505a7510c enzyme: incorporate 0.4.2 fixes 2017-05-05 14:44:59 +02:00
panni 0a66db40a2 fix findbetter 2017-05-05 14:30:49 +02:00
panni 6c68893979 add mod.long_description; add remove_last action to subtitle modification menu 2017-05-04 20:10:35 +02:00
panni c512eab0b6 testcommit 2017-05-04 20:00:12 +02:00
panni 3cedd4bd0f try getting plex token from environment by default 2017-05-04 19:33:05 +02:00
panni 0759c5e4c6 add environment debug 2017-05-04 19:31:07 +02:00
panni ad6cf4be79 move config debug to better position; verify readability of log files 2017-05-04 19:15:38 +02:00
panni 23c3899fb2 add fixme 2017-05-04 14:30:25 +02:00
panni 1a6515a660 add platform and os to config debug 2017-05-04 14:20:29 +02:00
panni 58815a7650 use external ip fallback when logs were requested from plex.tv 2017-05-04 14:16:10 +02:00
panni c15ec9fefc disable get_logs when universal plex token is None 2017-05-04 13:49:02 +02:00
panni 0e18d59680 2.0.0.12 2017-05-03 23:12:42 +02:00
panni 2d88efa5b4 add doc 2017-05-03 23:12:26 +02:00
panni b3da7572f3 add PartialWordsAlways to OCR_fixes 2017-05-03 23:11:02 +02:00
panni 099ec4e85d remove debug print; add doc 2017-05-03 23:04:25 +02:00
panni ff88a15c61 reset initialized mods after load 2017-05-03 22:59:47 +02:00
panni 839791b0fa add OCR fixes as default; fix little whoopsie in SubtitleModifications.modify 2017-05-03 22:52:33 +02:00
panni 159a533731 add precompiled patterns to data dict; add more parsed data; add OCR fixes finally 2017-05-03 22:44:54 +02:00
panni fb5835baa4 separate ocr fix data further into line, word, partial 2017-05-03 15:19:52 +02:00
panni a3f05cd597 separat partial and full replace data 2017-05-03 15:16:22 +02:00
panni f3af1672f6 use memory cache on windows for now; add config debug logging 2017-05-03 13:33:29 +02:00
panni c984c9849b only add better subtitle if its score is higher than the minimum configured 2017-05-02 21:37:40 +02:00
panni e28d264125 language conversion test 2017-05-02 19:22:57 +02:00
panni 7166ab9502 use default mods in tasks as well 2017-05-02 18:47:58 +02:00
panni ab242c2ecb add current find/replace data 2017-05-02 18:43:45 +02:00
panni 6f829dd4c7 move xmls to xml/; add make_data and test_data script; 2017-05-02 18:43:35 +02:00
panni 3e0602cdf0 add OCRFixReplaceList dictionaries of SubtitleEdit; commit 4f43a84c354d53251614fe6fa4c1b9df92839f57; add second test srt 2017-05-02 18:03:17 +02:00
panni 67cdebfb67 make subtitle modifications a subpackage of subzero 2017-05-02 18:01:46 +02:00
panni 0f87973742 modify test.srt to accomodate for specials chars in text-before-colon; handle special chars in HI_before_colon better 2017-05-02 17:42:39 +02:00
panni 92317f7730 add task run info logging 2017-05-01 05:37:38 +02:00
panni ce936c2553 add task debug 2017-05-01 05:37:09 +02:00
panni b995f16c34 2.0.0.10 DEV 2017-05-01 05:18:33 +02:00
panni 49c7adcc40 correctly use current_sub; correctly use add_mod with mods=mods 2017-05-01 05:18:02 +02:00
panni 88eee6fe48 lower opensubtitles timeout from 10 to 4 seconds 2017-05-01 04:52:05 +02:00
panni cbe425d150 add default hearing impaired removal; add optional mods to save_subtitles 2017-05-01 04:51:44 +02:00
panni 1c7d6b7bf8 debug stuff 2017-05-01 04:01:45 +02:00
panni 8323608558 add HI starting dash 2017-05-01 03:55:41 +02:00
panni 3f8a5ec125 add info to virtual subtitle instance 2017-05-01 03:50:45 +02:00
panni 464b1695a9 add subtitle modification processor debug option; fix remove_HI; 2017-05-01 03:50:30 +02:00
panni d85602612b subtitle modifications live! 2017-05-01 03:15:00 +02:00
panni 59440d251b rename support.background to support.scheduler; add subtitle modification to PatchedSubtitle 2017-05-01 02:39:44 +02:00
panni d774f09427 add doc; get_modified_content: always return unicode 2017-05-01 02:02:33 +02:00
panni 45be650db9 add exclusive mods; add processor/mod debug info; add to_string encoding parameter; add StoredSubtitle.add_mod; 2017-05-01 01:45:42 +02:00
panni d54847803f add subtitle modification menu; add get_current_sub; 2017-05-01 01:44:57 +02:00
panni ce3b66eda7 add mod registry 2017-05-01 00:19:21 +02:00
panni 5b6bcc7d12 add log warning for dogpile cache region setup; bump to 2.0.0.9 2017-04-30 23:55:31 +02:00
pannal 24d4c2ae2c memory backend fallback 2017-04-30 22:36:29 +02:00
pannal 98e451d57d move imports 2017-04-30 06:02:37 +02:00
pannal 8c491c45be re-merge GetLogsLink 2017-04-30 05:59:30 +02:00
pannal 6f271c5638 Merge branch '2.0_menu_maintenance' into develop-2.0
# Conflicts:
#	Contents/Code/interface/menu.py
2017-04-30 05:58:25 +02:00
pannal f9c083ebc6 split interface.menu up; first coarse try 2017-04-30 05:57:35 +02:00
pannal e79360915d remove redundancy 2017-04-30 04:56:35 +02:00
pannal 2fbd8fdc08 urlparse object has hostname, not host 2017-04-30 04:54:55 +02:00
pannal 5a9d5ec9a1 separate menu items 2017-04-30 04:53:11 +02:00
pannal 9ace798ee5 support older PMSs; use Referer for request origin determination 2017-04-30 04:37:16 +02:00
pannal 63e0dc0cb0 add debug if request origin can't be determined 2017-04-30 03:49:35 +02:00
pannal 974aae3ec6 precompile regex patterns 2017-04-30 00:37:26 +02:00
pannal 3268975849 hopefully fix #271 2017-04-29 23:59:07 +02:00
pannal b6adb4cff5 add SubtitleModifications.to_string; add mods to stored subtitle 2017-04-29 23:03:14 +02:00
pannal 78191bb750 doc; all caps line replaced with at least 3 chars; accept filename or subtitle content; optional known fps argument 2017-04-29 22:51:18 +02:00
pannal 2ab66671e5 add subtitle modification basics; add test script and test subtitle 2017-04-29 06:14:33 +02:00
pannal fdcfc630b3 fixed logging 2017-04-29 01:15:20 +02:00
pannal 3a717a8876 more logging 2017-04-29 00:47:32 +02:00
pannal 2dfb381b96 DEV 8 2017-04-29 00:44:52 +02:00
pannal d8a7e3331b improve logging for multiprocessing 2017-04-29 00:44:24 +02:00
pannal 2995eb1cac Release 1.4.27.974 2017-04-28 10:34:48 +02:00
pannal bedb097955 reflect DEV number in last version digit for now 2017-04-27 19:40:43 +02:00
pannal e6cebe41dc delete old libraries 2017-04-27 19:40:23 +02:00
pannal 5aa123d42b add automatic subtitle storage maintenance task 2017-04-27 19:08:15 +02:00
pannal 758b732142 1.4.27.974 2017-04-27 14:01:54 +02:00
pannal 50b80f3267 fix duplicate subtitles issue on synology/qnap #215
(cherry picked from commit 8b91093)
2017-04-27 14:00:41 +02:00
413 changed files with 34166 additions and 36496 deletions
+3
View File
@@ -0,0 +1,3 @@
.gitattributes export-ignore
/Wiki export-ignore
.gitignore export-ignore
+138
View File
@@ -1,3 +1,141 @@
2.0.23.1464 RC10.1
- core: huge bugfix; please check `Library/Application Support/Plex Media\ Server/Plug-in Support/Data/com.plexapp.agents.subzero/DataItems`
for any `subs_XXXXX.json.gz` file bigger than 500kb and delete them
2.0.23.1456 RC10
- core: findBetterSubtitles: increase series cutoff by 2 (resolution match)
- core: add VTT format
- core: fix crashes regarding DBM/cache management
- core: update rarfile.py
- core: add missing encodings
- core: full support for Serbian subtitles (Cyrillic and Latin)
- podnapisi: fix pt-BR, srp-cyrl and srp-latn
- core: implement own provider registry and ditch the subliminal one
- core: use ftfy library to fix re-encoding errors inside subtitles introduced by the subtitle author
- core: always store and save subtitles normalized to UTF-8
- core: replace spaced dashes in movie/series names before re-refining with plex metadata info
- submod: remove_HI: handle multiline brackets correctly
2.0.20.1364 RC9
- core: performance improvements
- core: if info couldn't be guessed from the filename, fill missing info from PMS #270
- submod: OCR: add more to the eng dictionary
- submod: HI: fixed some issues with font style tags
- core: don't ignore subtitles from providers that don't have hearing impaired info, when hearing impaired mode is set to "force non-HI"
- legendastv/menu: fix manual subtitle selection issues in menu
- core: improve specials matching on OpenSubtitles
- core: update guessit
2.0.19.1337 RC8
- napiprojekt: fixed: couldn't convert microdvd to SRT in certain occasions
- core: when normalize to UTF-8 is enabled, also store the subtitle in UTF-8 encoding in the internal storage
- core: add more encodings for western/eastern/northern europe
- submod: OCR: update dictionaries from SubtitleEdit
- submod: common: be smarter about uppercase i's in words that should have lowercase L's
- submod: fix unopened/unclosed font style tags after modification
- core: re-enable OMDB support
- core: update guessit for better matching
- core: fix SearchAllRecentlyMissing (was broken since RC3)
2.0.19.1299 RC7
- submod: offset mods now get merged internally when applied multiple times (to avoid errors and increase performance)
- submod: improve performance
- submod: core mods (OCR, common, remove_HI) now are always applied in a fixed order internally, regardless of the order they were added in
- submod: CM_spaces_in_numbers: don't break up ellipses (30... 29... 28...)
- submod: CM_spaces_in_numbers: don't fix countdown numbers (30, 29, 28)
- submod: remove_HI: make bracket removal more aggressive
- submod: remove_HI: be less aggressive when removing text-before-colon
- submod: remove_HI: remove all-uppercase-before-sentence (THIS IS ALL UPPERCASE And here starts a sentence -> And here starts a sentence)
- submod: fix all character ranges to include non-ASCII characters
- add new README for 2.0
2.0.19.1267 RC6
- core: add new SZ subtitle storage format
- smaller data files and less cumbersome
- it will auto migrate when old data is accessed - to speed this up, use "Trigger subtitle storage migration (expensive)" in advanced menu)
- core: performance optimizations
- addic7ed: when release group matches, assume the format matches, too (leftover change from RC5)
- submod: fix patterns for beginlines/endlines
- submod: add our own dictionaries to OCR fixes (english)
- submod: hearing impaired: also remove full-caps with punctuation inside
- submod: correctly handle partiallines
- submod: in numbers with spaces (incorrect), also allow for some punctuation (,.:')
2.0.18.1245 RC5
- core: add more debug info
- core: fix subtitle modifications (was broken in RC4, created non-usable subtitles)
- submod: add ANSI colors
- menu/submod: add color mod menu
- submod: exclusive mods now are mutually exclusive and get cleaned on duplicate
- menu/core: naming
For everyone who runs RC4: your subtitles are broken. Go to the advanced menu and trigger `Re-Apply mods of all stored subtitles` to fix them.
2.0.17.1234 RC4
- core: backport provider-download-retry implementation
- core: implement custom user agent (for OpenSubtitles)
- core/menu: correct handling of media with multiple files
- core: fix SearchAllRecentlyMissing; also wait 5 seconds between searches
- core: SearchAllRecentlyMissing: honor physical ignores
- submod: pattern fixes
- submod: better unicode handling
- submod: add color mod (only automatic by now)
2.0.15.1216 RC3
- core: fixes
- scheduler: revert some of the aggressive changes in RC2
- submod: be smarter about WholeLine matches
2.0.15.1209 RC2
- core: fixes
- core: submod-common: fix multiple dots at start of line
- core/menu: add subtitle modification debug setting
- core/menu: when manually listing available subtitles in menu, display those with wrong FPS also (opensubtitles), because you can fix them later
- core/menu: advanced-menu: add apply-all-default-mods menu item; add re-apply all mods menu item
- core: always look for currently (not-) existing subtitles when called; hopefully fixes #276
- scheduler/menu: be faster; also launch scheduled tasks in threads, not just manually launched ones
- core: don't delete subtitles with .custom or .embedded in their filenames when running auto cleanup, if the correct media file exists
- menu: add back-to-previous menu items
2.0.12.1180 RC1
- core: update subliminal to version 2
- core: update all dependencies
- core: add new providers: legendastv (pt-BR), napiprojekt (pl), shooter (cn), subscenter (heb)
- core: rewritten all subliminal patches for version 2
- menu: add icons for menu items; update main channel icon
- core: use SSL again for opensubtitles
- core: improved matching due to subliminal 2 (and SZ custom) tvdb/omdb refiners
- menu: add "Get my logs" function to the advanced menu, which zips up all necessary logs suitable for posting in the forums
- core: on non-windows systems, utilize a file-based cache database for provider media lists and subliminal refiner results
- core: add manual and automatic subtitle modification framework (fix common OCR issues, remove hearing impaired etc.)
- menu: add subtitle modifications (subtitle content fixes, offset-based shifting, framerate conversion)
- menu: add recently played menu
- improve almost everything Sub-Zero did in 1.4 :)
1.4.27.973
- core: ignore "obfuscated" and "scrambled" tags in filenames when searching for subtitles
- core: exotic embedded subtitles are now also considered when searching (and when the option is enabled); fixes #264
1.4.27.967
- core: remember the last 10 played items; only consider on_playback for "playing" state within the first 60 seconds of an item
1.4.27.965
- core: on_playback activity bugfixes
1.4.27.957
- core: correctly fall back to the next best subtitle if the current one couldn't be downloaded; hopefully fixes #231
- core: add "Scan: which external subtitles should be picked up?"-setting
+30 -62
View File
@@ -1,7 +1,8 @@
# coding=utf-8
import sys
import datetime
import os
from subliminal_patch import compute_score
from subzero.sandbox import restore_builtins
module = sys.modules['__main__']
@@ -16,18 +17,15 @@ import logger
sys.modules["logger"] = logger
import subliminal
import support
import interface
sys.modules["interface"] = interface
from subliminal.cli import MutexLock
from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
from interface.menu import *
from support.plex_media import media_to_videos, get_media_item_ids, scan_videos
from support.subtitlehelpers import get_subtitles_from_metadata
from support.storage import whack_missing_parts, save_subtitles
from support.storage import save_subtitles, store_subtitle_info
from support.items import is_ignored
from support.config import config
from support.lib import get_intent
@@ -35,15 +33,14 @@ from support.helpers import track_usage, get_title_for_video_metadata, get_ident
from support.history import get_history
from support.data import dispatch_migrate
from support.activities import activity
from support.download import download_best_subtitles
def Start():
HTTP.CacheTime = 0
HTTP.Headers['User-agent'] = OS_PLEX_USERAGENT
subliminal.region.configure('dogpile.cache.dbm', expiration_time=datetime.timedelta(days=30),
arguments={'filename': os.path.join(config.data_items_path, 'subzero.dbm'),
'lock_factory': MutexLock})
config.init_cache()
# clear expired intents
intent = get_intent()
@@ -52,9 +49,12 @@ def Start():
# clear expired menu history items
now = datetime.datetime.now()
if "menu_history" in Dict:
for key, timeout in Dict["menu_history"].items():
for key, timeout in Dict["menu_history"].copy().items():
if now > timeout:
del Dict["menu_history"][key]
try:
del Dict["menu_history"][key]
except:
pass
# run migrations
if "subs" in Dict or "history" in Dict:
@@ -90,45 +90,6 @@ def Start():
track_usage("General", "plugin", "start", config.version)
def download_best_subtitles(video_part_map, min_score=0):
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
languages = config.lang_list
if not languages:
return
missing_languages = False
for video, part in video_part_map.iteritems():
if not Prefs['subtitles.save.filesystem']:
# scan for existing metadata subtitles
meta_subs = get_subtitles_from_metadata(part)
for language, subList in meta_subs.iteritems():
if subList:
video.subtitle_languages.add(language)
Log.Debug("Found metadata subtitle %s for %s", language, video)
missing_subs = (languages - video.subtitle_languages)
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
if not missing_subs or found_one_which_is_enough:
if found_one_which_is_enough:
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
else:
Log.Debug('All languages %r exist for %s', languages, video)
continue
missing_languages = True
break
if missing_languages:
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s" % (min_score, hearing_impaired))
return subliminal.download_best_subtitles(video_part_map.keys(), languages, min_score, hearing_impaired, providers=config.providers,
provider_configs=config.provider_settings, pool_class=config.provider_pool,
compute_score=compute_score)
Log.Debug("All languages for all requested videos exist. Doing nothing.")
def update_local_media(metadata, media, media_type="movies"):
# Look for subtitles
if media_type == "movies":
@@ -169,10 +130,6 @@ class SubZeroAgent(object):
results.Append(MetadataSearchResult(id='null', score=100))
def update(self, metadata, media, lang):
if not config.enable_agent:
Log.Debug("Skipping Sub-Zero agent(s)")
return
Log.Debug("Sub-Zero %s, %s update called" % (config.version, self.agent_type))
intent = get_intent()
@@ -185,6 +142,9 @@ class SubZeroAgent(object):
config.init_subliminal_patches()
videos = media_to_videos(media, kind=self.agent_type)
# find local media
update_local_media(metadata, media, media_type=self.agent_type)
# media ignored?
use_any_parts = False
for video in videos:
@@ -205,20 +165,24 @@ class SubZeroAgent(object):
set_refresh_menu_state(media, media_type=self.agent_type)
# find local media
update_local_media(metadata, media, media_type=self.agent_type)
# scanned_video_part_map = {subliminal.Video: plex_part, ...}
scanned_video_part_map = scan_videos(videos, kind=self.agent_type)
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score)
item_ids = get_media_item_ids(media, kind=self.agent_type)
downloaded_subtitles = None
if not config.enable_agent:
Log.Debug("Skipping Sub-Zero agent(s)")
whack_missing_parts(scanned_video_part_map)
else:
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score)
item_ids = get_media_item_ids(media, kind=self.agent_type)
downloaded_any = False
if downloaded_subtitles:
save_subtitles(scanned_video_part_map, downloaded_subtitles)
downloaded_any = any(downloaded_subtitles.values())
if downloaded_any:
save_subtitles(scanned_video_part_map, downloaded_subtitles, mods=config.default_mods)
track_usage("Subtitle", "refreshed", "download", 1)
for video, video_subtitles in downloaded_subtitles.items():
@@ -228,6 +192,10 @@ class SubZeroAgent(object):
history = get_history()
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
subtitle=subtitle)
else:
# store subtitle info even if we've downloaded none
store_subtitle_info(scanned_video_part_map, dict((k, []) for k in scanned_video_part_map.keys()),
None, mode="a")
update_local_media(metadata, media, media_type=self.agent_type)
@@ -237,7 +205,7 @@ class SubZeroAgent(object):
# notify any running tasks about our finished update
for item_id in item_ids:
scheduler.signal("updated_metadata", item_id)
#scheduler.signal("updated_metadata", item_id)
# resolve existing intent for that id
intent.resolve("force", item_id)
+17 -1
View File
@@ -2,6 +2,22 @@ import sys
import menu
sys.modules["interface.menu"] = menu
sys.modules["menu"] = menu
import menu_helpers
sys.modules["interface.menu_helpers"] = menu_helpers
sys.modules["interface.menu_helpers"] = menu_helpers
import advanced
sys.modules["interface.advanced"] = advanced
import main
sys.modules["interface.main"] = main
import refresh_item
sys.modules["interface.refresh_item"] = refresh_item
import item_details
sys.modules["interface.item_details"] = item_details
import sub_mod
sys.modules["interface.modification"] = sub_mod
+339
View File
@@ -0,0 +1,339 @@
# coding=utf-8
import datetime
import StringIO
import glob
import os
import traceback
import urlparse
from zipfile import ZipFile, ZIP_DEFLATED
from babelfish import Language
from subzero.lib.io import FileIO
from subzero.constants import PREFIX, PLUGIN_IDENTIFIER
from menu_helpers import SubFolderObjectContainer, debounce, set_refresh_menu_state, ZipObject, ObjectContainer
from main import fatality
from support.helpers import timestamp, pad_title
from support.config import config
from support.lib import Plex
from support.storage import reset_storage, log_storage, get_subtitle_storage
from support.scheduler import scheduler
from support.items import set_mods_for_part, get_item_kind_from_rating_key
@route(PREFIX + '/advanced')
def AdvancedMenu(randomize=None, header=None, message=None):
oc = SubFolderObjectContainer(header=header or "Internal stuff, pay attention!", message=message, no_cache=True,
no_history=True,
replace_parent=False, title2="Advanced")
if config.lock_advanced_menu and not config.pin_correct:
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp(), success_go_to="advanced"),
title=pad_title("Enter PIN"),
summary="The owner has restricted the access to this menu. Please enter the correct pin",
))
return oc
oc.add(DirectoryObject(
key=Callback(TriggerRestart, randomize=timestamp()),
title=pad_title("Restart the plugin"),
))
oc.add(DirectoryObject(
key=Callback(GetLogsLink),
title="Get my logs (copy the appearing link and open it in your browser, please)",
summary="Copy the appearing link and open it in your browser, please",
))
oc.add(DirectoryObject(
key=Callback(TriggerBetterSubtitles, randomize=timestamp()),
title=pad_title("Trigger find better subtitles"),
))
oc.add(DirectoryObject(
key=Callback(TriggerStorageMaintenance, randomize=timestamp()),
title=pad_title("Trigger subtitle storage maintenance"),
))
oc.add(DirectoryObject(
key=Callback(TriggerStorageMigration, randomize=timestamp()),
title=pad_title("Trigger subtitle storage migration (expensive)"),
))
oc.add(DirectoryObject(
key=Callback(ApplyDefaultMods, randomize=timestamp()),
title=pad_title("Apply configured default subtitle mods to all (active) stored subtitles"),
))
oc.add(DirectoryObject(
key=Callback(ReApplyMods, randomize=timestamp()),
title=pad_title("Re-Apply mods of all stored subtitles"),
))
oc.add(DirectoryObject(
key=Callback(LogStorage, key="tasks", randomize=timestamp()),
title=pad_title("Log the plugin's scheduled tasks state storage"),
))
oc.add(DirectoryObject(
key=Callback(LogStorage, key="ignore", randomize=timestamp()),
title=pad_title("Log the plugin's internal ignorelist storage"),
))
oc.add(DirectoryObject(
key=Callback(LogStorage, key=None, randomize=timestamp()),
title=pad_title("Log the plugin's complete state storage"),
))
oc.add(DirectoryObject(
key=Callback(ResetStorage, key="tasks", randomize=timestamp()),
title=pad_title("Reset the plugin's scheduled tasks state storage"),
))
oc.add(DirectoryObject(
key=Callback(ResetStorage, key="ignore", randomize=timestamp()),
title=pad_title("Reset the plugin's internal ignorelist storage"),
))
oc.add(DirectoryObject(
key=Callback(InvalidateCache, randomize=timestamp()),
title=pad_title("Invalidate Sub-Zero metadata caches (subliminal)"),
))
return oc
def DispatchRestart():
Thread.CreateTimer(1.0, Restart)
@route(PREFIX + '/advanced/restart/trigger')
@debounce
def TriggerRestart(randomize=None):
set_refresh_menu_state("Restarting the plugin")
DispatchRestart()
return fatality(header="Restart triggered, please wait about 5 seconds", force_title=" ", only_refresh=True,
replace_parent=True,
no_history=True, randomize=timestamp())
@route(PREFIX + '/advanced/restart/execute')
def Restart():
Plex[":/plugins"].restart(PLUGIN_IDENTIFIER)
@route(PREFIX + '/storage/reset', sure=bool)
@debounce
def ResetStorage(key, randomize=None, sure=False):
if not sure:
oc = SubFolderObjectContainer(no_history=True, title1="Reset subtitle storage", title2="Are you sure?")
oc.add(DirectoryObject(
key=Callback(ResetStorage, key=key, sure=True, randomize=timestamp()),
title=pad_title("Are you really sure?"),
))
return oc
reset_storage(key)
if key == "tasks":
# reinitialize the scheduler
scheduler.init_storage()
scheduler.setup_tasks()
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='Information Storage (%s) reset' % key
)
@route(PREFIX + '/storage/log')
def LogStorage(key, randomize=None):
log_storage(key)
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='Information Storage (%s) logged' % key
)
@route(PREFIX + '/triggerbetter')
@debounce
def TriggerBetterSubtitles(randomize=None):
scheduler.dispatch_task("FindBetterSubtitles")
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='FindBetterSubtitles triggered'
)
@route(PREFIX + '/triggermaintenance')
@debounce
def TriggerStorageMaintenance(randomize=None):
scheduler.dispatch_task("SubtitleStorageMaintenance")
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='SubtitleStorageMaintenance triggered'
)
@route(PREFIX + '/triggerstoragemigration')
@debounce
def TriggerStorageMigration(randomize=None):
scheduler.dispatch_task("MigrateSubtitleStorage")
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='MigrateSubtitleStorage triggered'
)
def apply_default_mods(reapply_current=False):
storage = get_subtitle_storage()
subs_applied = 0
for fn in storage.get_all_files():
data = storage.load(None, filename=fn)
if data:
video_id = data.video_id
item_type = get_item_kind_from_rating_key(video_id)
if not item_type:
continue
for part_id, part in data.parts.iteritems():
for lang, subs in part.iteritems():
current_sub = subs.get("current")
if not current_sub:
continue
sub = subs[current_sub]
if not sub.content:
continue
current_mods = sub.mods or []
if not reapply_current:
add_mods = list(set(config.default_mods).difference(set(current_mods)))
if not add_mods:
continue
else:
if not current_mods:
continue
add_mods = []
try:
set_mods_for_part(video_id, part_id, Language.fromietf(lang), item_type, add_mods, mode="add")
except:
Log.Error("Couldn't set mods for %s:%s: %s", video_id, part_id, traceback.format_exc())
continue
subs_applied += 1
Log.Debug("Applied mods to %i items" % subs_applied)
@route(PREFIX + '/applydefaultmods')
@debounce
def ApplyDefaultMods(randomize=None):
Thread.CreateTimer(1.0, apply_default_mods)
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='This may take some time ...'
)
@route(PREFIX + '/reapplyallmods')
@debounce
def ReApplyMods(randomize=None):
Thread.CreateTimer(1.0, apply_default_mods, reapply_current=True)
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='This may take some time ...'
)
@route(PREFIX + '/get_logs_link')
def GetLogsLink():
if not config.plex_token:
oc = ObjectContainer(title2="Download Logs", no_cache=True, no_history=True,
header="Sorry, feature unavailable",
message="Universal Plex token not available")
return oc
# try getting the link base via the request in context, first, otherwise use the public ip
req_headers = Core.sandbox.context.request.headers
get_external_ip = True
link_base = ""
if "Origin" in req_headers:
link_base = req_headers["Origin"]
Log.Debug("Using origin-based link_base")
get_external_ip = False
elif "Referer" in req_headers:
parsed = urlparse.urlparse(req_headers["Referer"])
link_base = "%s://%s:%s" % (parsed.scheme, parsed.hostname, parsed.port)
Log.Debug("Using referer-based link_base")
get_external_ip = False
if get_external_ip or "plex.tv" in link_base:
ip = Core.networking.http_request("http://www.plexapp.com/ip.php", cacheTime=7200).content.strip()
link_base = "https://%s:32400" % ip
Log.Debug("Using ip-based fallback link_base")
logs_link = "%s%s?X-Plex-Token=%s" % (link_base, PREFIX + '/logs', config.plex_token)
oc = ObjectContainer(title2=logs_link, no_cache=True, no_history=True,
header="Copy this link and open this in your browser, please",
message=logs_link)
return oc
@route(PREFIX + '/logs')
def DownloadLogs():
buff = StringIO.StringIO()
zip_archive = ZipFile(buff, mode='w', compression=ZIP_DEFLATED)
logs = sorted(glob.glob(config.plugin_log_path + '*')) + [config.server_log_path]
for path in logs:
data = StringIO.StringIO()
data.write(FileIO.read(path))
zip_archive.writestr(os.path.basename(path), data.getvalue())
zip_archive.close()
return ZipObject(buff.getvalue())
@route(PREFIX + '/invalidatecache')
@debounce
def InvalidateCache(randomize=None):
from subliminal.cache import region
region.invalidate()
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='Cache invalidated'
)
@route(PREFIX + '/pin')
def PinMenu(pin="", randomize=None, success_go_to="channel"):
oc = ObjectContainer(title2="Enter PIN number %s" % (len(pin) + 1), no_cache=True, no_history=True,
skip_pin_lock=True)
if pin == config.pin:
Dict["pin_correct_time"] = datetime.datetime.now()
config.locked = False
if success_go_to == "channel":
return fatality(force_title="PIN correct", header="PIN correct", no_history=True)
elif success_go_to == "advanced":
return AdvancedMenu(randomize=timestamp())
for i in range(10):
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp(), pin=pin + str(i), success_go_to=success_go_to),
title=pad_title(str(i)),
))
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp(), success_go_to=success_go_to),
title=pad_title("Reset"),
))
return oc
@route(PREFIX + '/pin_lock')
def ClearPin(randomize=None):
Dict["pin_correct_time"] = None
config.locked = True
return fatality(force_title="Menu locked", header=" ", no_history=True)
+293
View File
@@ -0,0 +1,293 @@
# coding=utf-8
import os
from sub_mod import SubtitleModificationsMenu
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_ignore_options, get_item_task_data, \
set_refresh_menu_state
from refresh_item import RefreshItem
from subzero.constants import PREFIX
from support.config import config
from support.helpers import timestamp, cast_bool, df, get_language
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub
from support.lib import Plex
from support.plex_media import get_plex_metadata, scan_videos, PMSMediaProxy
from support.scheduler import scheduler
from support.storage import get_subtitle_storage
# fixme: needs kwargs cleanup
@route(PREFIX + '/item/{rating_key}/actions')
@debounce
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None):
"""
displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
:param rating_key:
:param title:
:param base_title:
:param item_title:
:param randomize:
:return:
"""
from interface.main import IgnoreMenu
title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
item = get_item(rating_key)
current_kind = get_item_kind_from_rating_key(rating_key)
timeout = 30
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
# add back to season for episode
if current_kind == "episode":
from interface.menu import MetadataMenu
show = get_item(item.show.rating_key)
season = get_item(item.season.rating_key)
oc.add(DirectoryObject(
key=Callback(MetadataMenu, rating_key=season.rating_key, title=season.title, base_title=show.title,
previous_item_type="show", previous_rating_key=show.rating_key,
display_items=True, randomize=timestamp()),
title=u"< Back to %s" % season.title,
summary="Back to %s > %s" % (show.title, season.title),
thumb=season.thumb or default_thumb
))
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, randomize=timestamp(),
timeout=timeout * 1000),
title=u"Refresh: %s" % item_title,
summary="Refreshes the %s, possibly searching for missing and picking up new subtitles on disk" % current_kind,
thumb=item.thumb or default_thumb
))
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(),
timeout=timeout * 1000),
title=u"Force-find subtitles: %s" % item_title,
summary="Issues a forced refresh, ignoring known subtitles and searching for new ones",
thumb=item.thumb or default_thumb
))
# get stored subtitle info for item id
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load_or_new(item)
# get the plex item
plex_item = get_item(rating_key)
# look for subtitles for all available media parts and all of their languages
has_multiple_parts = len(plex_item.media) > 1
part_index = 0
for media in plex_item.media:
for part in media.parts:
filename = os.path.basename(part.file)
if not os.path.exists(part.file):
continue
part_id = str(part.id)
part_index += 1
# iterate through all configured languages
for lang in config.lang_list:
# get corresponding stored subtitle data for that media part (physical media item), for language
current_sub = stored_subs.get_any(part_id, lang)
current_sub_id = None
current_sub_provider_name = None
part_index_addon = ""
part_summary_addon = ""
if has_multiple_parts:
part_index_addon = u"File %s: " % part_index
part_summary_addon = "%s " % filename
summary = u"%sNo current subtitle in storage" % part_summary_addon
current_score = None
if current_sub:
current_sub_id = current_sub.id
current_sub_provider_name = current_sub.provider_name
current_score = current_sub.score
summary = u"%sCurrent subtitle: %s (added: %s, %s), Language: %s, Score: %i, Storage: %s" % \
(part_summary_addon, current_sub.provider_name, df(current_sub.date_added),
current_sub.mode_verbose, lang, current_sub.score, current_sub.storage_type)
oc.add(DirectoryObject(
key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
item_title=item_title, language=lang, language_name=lang.name,
current_id=current_sub_id,
item_type=plex_item.type, filename=filename, current_data=summary,
randomize=timestamp(), current_provider=current_sub_provider_name,
current_score=current_score),
title=u"%sActions for %s subtitle" % (part_index_addon, lang.name),
summary=summary
))
else:
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
item_title=item_title, language=lang, language_name=lang.name,
current_id=current_sub_id,
item_type=plex_item.type, filename=filename, current_data=summary,
randomize=timestamp(), current_provider=current_sub_provider_name,
current_score=current_score),
title=u"%sList %s subtitles" % (part_index_addon, lang.name),
summary=summary
))
add_ignore_options(oc, "videos", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
return oc
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}', force=bool)
@debounce
def SubtitleOptionsMenu(**kwargs):
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
kwargs.pop("randomize")
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
title=kwargs["title"], randomize=timestamp()),
title=u"< Back to %s" % kwargs["title"],
summary=kwargs["current_data"],
thumb=default_thumb
))
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, randomize=timestamp(), **kwargs),
title=u"List %s subtitles" % kwargs["language_name"],
summary=kwargs["current_data"]
))
if current_sub:
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title=u"Modify %s subtitle" % kwargs["language_name"],
summary=u"Currently applied mods: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
))
return oc
@route(PREFIX + '/item/search/{rating_key}/{part_id}', force=bool)
@debounce
def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item_title=None, filename=None,
item_type="episode", language=None, language_name=None, force=False, current_id=None,
current_data=None,
current_provider=None, current_score=None, randomize=None):
assert rating_key, part_id
running = scheduler.is_task_running("AvailableSubsForItem")
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
if (search_results is None or force) and not running:
scheduler.dispatch_task("AvailableSubsForItem", rating_key=rating_key, item_type=item_type, part_id=part_id,
language=language)
running = True
oc = SubFolderObjectContainer(title2=unicode(title), replace_parent=True)
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, rating_key=rating_key, item_title=item_title, title=title, randomize=timestamp()),
title=u"< Back to %s" % title,
summary=current_data,
thumb=default_thumb
))
metadata = get_plex_metadata(rating_key, part_id, item_type)
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
if not scanned_parts:
Log.Error("Couldn't list available subtitles for %s", rating_key)
return oc
video, plex_part = scanned_parts.items()[0]
video_display_data = [video.format] if video.format else []
if video.release_group:
video_display_data.append(u"by %s" % video.release_group)
video_display_data = " ".join(video_display_data)
current_display = (u"Current: %s (%s) " % (current_provider, current_score) if current_provider else "")
if not running:
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title, language=language,
filename=filename, part_id=part_id, title=title, current_id=current_id, force=True,
current_provider=current_provider, current_score=current_score,
current_data=current_data, item_type=item_type, randomize=timestamp()),
title=u"Search for %s subs (%s)" % (get_language(language).name, video_display_data),
summary=u"%sFilename: %s" % (current_display, filename),
thumb=default_thumb
))
if search_results == "found_none":
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
language=language, filename=filename, current_data=current_data, force=True,
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
current_provider=current_provider, current_score=current_score,
randomize=timestamp()),
title=u"No subtitles found",
summary=u"%sFilename: %s" % (current_display, filename),
thumb=default_thumb
))
else:
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
language=language, filename=filename, current_data=current_data,
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
current_provider=current_provider, current_score=current_score,
randomize=timestamp()),
title=u"Searching for %s subs (%s), refresh here ..." % (get_language(language).name, video_display_data),
summary=u"%sFilename: %s" % (current_display, filename),
thumb=default_thumb
))
if not search_results or search_results == "found_none":
return oc
seen = []
for subtitle in search_results:
if subtitle.id in seen:
continue
wrong_fps_addon = ""
if subtitle.wrong_fps:
wrong_fps_addon = " (wrong FPS, sub: %s, media: %s)" % (subtitle.fps, plex_part.fps)
oc.add(DirectoryObject(
key=Callback(TriggerDownloadSubtitle, rating_key=rating_key, randomize=timestamp(), item_title=item_title,
subtitle_id=str(subtitle.id), language=language),
title=u"%s: %s, score: %s%s" % ("Available" if current_id != subtitle.id else "Current",
subtitle.provider_name, subtitle.score, wrong_fps_addon),
summary=u"Release: %s, Matches: %s" % (subtitle.release_info, ", ".join(subtitle.matches)),
thumb=default_thumb
))
seen.append(current_id)
return oc
@route(PREFIX + '/download_subtitle/{rating_key}')
@debounce
def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None, language=None, randomize=None):
from interface.main import fatality
set_refresh_menu_state("Downloading subtitle for %s" % item_title or rating_key)
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
download_subtitle = None
for subtitle in search_results:
if str(subtitle.id) == subtitle_id:
download_subtitle = subtitle
break
if not download_subtitle:
Log.Error(u"Something went horribly wrong")
else:
scheduler.dispatch_task("DownloadSubtitleForItem", rating_key=rating_key, subtitle=download_subtitle)
return fatality(randomize=timestamp(), header=" ", replace_parent=True)
+420
View File
@@ -0,0 +1,420 @@
# coding=utf-8
from subzero.constants import PREFIX, TITLE, ART
from support.config import config
from support.helpers import pad_title, timestamp, df, get_plex_item_display_title
from support.scheduler import scheduler
from support.ignore import ignore_list
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, \
get_item_kind_from_item
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_ignore_options,\
ObjectContainer
from item_details import ItemDetailsMenu
@handler(PREFIX, TITLE if not config.is_development else TITLE + " DEV", art=ART, thumb=main_icon)
@route(PREFIX)
def fatality(randomize=None, force_title=None, header=None, message=None, only_refresh=False, no_history=False,
replace_parent=False):
"""
subzero main menu
"""
from interface.advanced import PinMenu, ClearPin, AdvancedMenu
from interface.menu import RefreshMissing, IgnoreListMenu, HistoryMenu
title = config.full_version # force_title if force_title is not None else config.full_version
oc = ObjectContainer(title1=title, title2=title, header=unicode(header) if header else title, message=message,
no_history=no_history,
replace_parent=replace_parent, no_cache=True)
# always re-check permissions
config.refresh_permissions_status()
# always re-check enabled sections
config.refresh_enabled_sections()
if config.lock_menu and not config.pin_correct:
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp()),
title=pad_title("Enter PIN"),
summary="The owner has restricted the access to this menu. Please enter the correct pin",
))
return oc
if not config.permissions_ok and config.missing_permissions:
for title, path in config.missing_permissions:
oc.add(DirectoryObject(
key=Callback(fatality, randomize=timestamp()),
title=pad_title("Insufficient permissions"),
summary="Insufficient permissions on library %s, folder: %s" % (title, path),
))
return oc
if not config.enabled_sections:
oc.add(DirectoryObject(
key=Callback(fatality, randomize=timestamp()),
title=pad_title("I'm not enabled!"),
summary="Please enable me for some of your libraries in your server settings; currently I do nothing",
))
return oc
if not only_refresh:
if Dict["current_refresh_state"]:
oc.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title("Working ... refresh here"),
summary="Current state: %s; Last state: %s" % (
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
)
))
oc.add(DirectoryObject(
key=Callback(OnDeckMenu),
title="On-deck items",
summary="Shows the current on deck items and allows you to individually (force-) refresh their metadata/"
"subtitles.",
thumb=R("icon-ondeck.jpg")
))
if "last_played_items" in Dict and Dict["last_played_items"]:
oc.add(DirectoryObject(
key=Callback(RecentlyPlayedMenu),
title=pad_title("Recently played items"),
summary="Shows the %i recently played items and allows you to individually (force-) refresh their "
"metadata/subtitles." % config.store_recently_played_amount,
thumb=R("icon-played.jpg")
))
oc.add(DirectoryObject(
key=Callback(RecentlyAddedMenu),
title="Recently-added items",
summary="Shows the recently added items per section.",
thumb=R("icon-added.jpg")
))
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, randomize=timestamp()),
title="Items with missing subtitles",
summary="Shows the items honoring the configured 'Item age to be considered recent'-setting (%s)"
" and allowing you to individually (force-) refresh their metadata/subtitles. " %
Prefs["scheduler.item_is_recent_age"],
thumb=R("icon-missing.jpg")
))
oc.add(DirectoryObject(
key=Callback(SectionsMenu),
title="Browse all items",
summary="Go through your whole library and manage your ignore list. You can also "
"(force-) refresh the metadata/subtitles of individual items.",
thumb=R("icon-browse.jpg")
))
task_name = "SearchAllRecentlyAddedMissing"
task = scheduler.task(task_name)
if task.ready_for_display:
task_state = "Running: %s/%s (%s%%)" % (task.items_done, task.items_searching, task.percentage)
else:
task_state = "Last scheduler run: %s; Next scheduled run: %s; Last runtime: %s" % (
df(scheduler.last_run(task_name)) or "never",
df(scheduler.next_run(task_name)) or "never",
str(task.last_run_time).split(".")[0])
oc.add(DirectoryObject(
key=Callback(RefreshMissing, randomize=timestamp()),
title="Search for missing subtitles (in recently-added items, max-age: %s)" % Prefs[
"scheduler.item_is_recent_age"],
summary="Automatically run periodically by the scheduler, if configured. %s" % task_state,
thumb=R("icon-search.jpg")
))
oc.add(DirectoryObject(
key=Callback(IgnoreListMenu),
title="Display ignore list (%d)" % len(ignore_list),
summary="Show the current ignore list (mainly used for the automatic tasks)",
thumb=R("icon-ignore.jpg")
))
oc.add(DirectoryObject(
key=Callback(HistoryMenu),
title="History",
summary="Show the last %i downloaded subtitles" % int(Prefs["history_size"]),
thumb=R("icon-history.jpg")
))
oc.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title("Refresh"),
summary="Current state: %s; Last state: %s" % (
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
),
thumb=R("icon-refresh.jpg")
))
# add re-lock after pin unlock
if config.pin:
oc.add(DirectoryObject(
key=Callback(ClearPin, randomize=timestamp()),
title=pad_title("Re-lock menu(s)"),
summary="Enabled the PIN again for menu(s)"
))
if not only_refresh:
oc.add(DirectoryObject(
key=Callback(AdvancedMenu),
title=pad_title("Advanced functions"),
summary="Use at your own risk",
thumb=R("icon-advanced.jpg")
))
return oc
@route(PREFIX + '/on_deck')
def OnDeckMenu(message=None):
"""
displays the items on deck
:param message:
:return:
"""
return mergedItemsMenu(title="Items On Deck", base_title="Items On Deck", itemGetter=get_on_deck_items)
@route(PREFIX + '/recently_played')
def RecentlyPlayedMenu():
base_title = "Recently Played"
oc = SubFolderObjectContainer(title2=base_title, replace_parent=True)
for item in [get_item(rating_key) for rating_key in Dict["last_played_items"]]:
kind = get_item_kind_from_item(item)
if kind not in ("episode", "movie"):
continue
if kind == "episode":
item_title = get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
parent_title=item.show.title)
else:
item_title = get_plex_item_display_title(item, kind, section_title=None)
oc.add(DirectoryObject(
title=item_title,
key=Callback(ItemDetailsMenu, title=base_title + " > " + item.title, item_title=item.title,
rating_key=item.rating_key)
))
return oc
@route(PREFIX + '/recently_added')
def RecentlyAddedMenu(message=None):
"""
displays the items recently added per section
:param message:
:return:
"""
return SectionsMenu(base_title="Recently added", section_items_key="recently_added", ignore_options=False)
@route(PREFIX + '/recent', force=bool)
@debounce
def RecentMissingSubtitlesMenu(force=False, randomize=None):
title = "Items with missing subtitles"
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
running = scheduler.is_task_running("MissingSubtitles")
task_data = scheduler.get_task_data("MissingSubtitles")
missing_items = task_data["missing_subtitles"] if task_data else None
if ((missing_items is None) or force) and not running:
scheduler.dispatch_task("MissingSubtitles")
running = True
if not running:
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, force=True, randomize=timestamp()),
title=u"Get items with missing subtitles",
thumb=default_thumb
))
else:
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, force=False, randomize=timestamp()),
title=u"Updating, refresh here ...",
thumb=default_thumb
))
if missing_items is not None:
for added_at, item_id, item_title, item, missing_languages in missing_items:
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, title=title + " > " + item_title, item_title=item_title,
rating_key=item_id),
title=item_title,
summary="Missing: %s" % ", ".join(l.name for l in missing_languages),
thumb=get_item_thumb(item) or default_thumb
))
return oc
def mergedItemsMenu(title, itemGetter, itemGetterKwArgs=None, base_title=None, *args, **kwargs):
"""
displays an item list of dynamic kinds of items
:param title:
:param itemGetter:
:param itemGetterKwArgs:
:param base_title:
:param args:
:param kwargs:
:return:
"""
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
items = itemGetter(*args, **kwargs)
for kind, title, item_id, deeper, item in items:
oc.add(DirectoryObject(
title=title,
key=Callback(ItemDetailsMenu, title=base_title + " > " + title, item_title=title, rating_key=item_id),
thumb=get_item_thumb(item) or default_thumb
))
return oc
def determine_section_display(kind, item, pass_kwargs=None):
"""
returns the menu function for a section based on the size of it (amount of items)
:param kind:
:param item:
:return:
"""
if pass_kwargs and pass_kwargs.get("section_items_key", "all") != "all":
return SectionMenu
if item.size > 80:
return SectionFirstLetterMenu
return SectionMenu
@route(PREFIX + '/ignore/set/{kind}/{rating_key}/{todo}/sure={sure}', kind=str, rating_key=str, todo=str, sure=bool)
def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
"""
displays the ignore options for a menu
:param kind:
:param rating_key:
:param title:
:param sure:
:param todo:
:return:
"""
is_ignored = rating_key in ignore_list[kind]
if not sure:
oc = SubFolderObjectContainer(no_history=True, replace_parent=True, title1="%s %s %s %s the ignore list" % (
"Add" if not is_ignored else "Remove", ignore_list.verbose(kind), title,
"to" if not is_ignored else "from"), title2="Are you sure?")
oc.add(DirectoryObject(
key=Callback(IgnoreMenu, kind=kind, rating_key=rating_key, title=title, sure=True,
todo="add" if not is_ignored else "remove"),
title=pad_title("Are you sure?"),
))
return oc
rel = ignore_list[kind]
dont_change = False
if todo == "remove":
if not is_ignored:
dont_change = True
else:
rel.remove(rating_key)
Log.Info("Removed %s (%s) from the ignore list", title, rating_key)
ignore_list.remove_title(kind, rating_key)
ignore_list.save()
state = "removed from"
elif todo == "add":
if is_ignored:
dont_change = True
else:
rel.append(rating_key)
Log.Info("Added %s (%s) to the ignore list", title, rating_key)
ignore_list.add_title(kind, rating_key, title)
ignore_list.save()
state = "added to"
else:
dont_change = True
if dont_change:
return fatality(force_title=" ", header="Didn't change the ignore list", no_history=True)
return fatality(force_title=" ", header="%s %s the ignore list" % (title, state), no_history=True)
@route(PREFIX + '/sections')
def SectionsMenu(base_title="Sections", section_items_key="all", ignore_options=True):
"""
displays the menu for all sections
:return:
"""
items = get_all_items("sections")
return dig_tree(SubFolderObjectContainer(title2="Sections", no_cache=True, no_history=True), items, None,
menu_determination_callback=determine_section_display, pass_kwargs={"base_title": base_title,
"section_items_key": section_items_key,
"ignore_options": ignore_options},
fill_args={"title": "section_title"})
@route(PREFIX + '/section', ignore_options=bool)
def SectionMenu(rating_key, title=None, base_title=None, section_title=None, ignore_options=True,
section_items_key="all"):
"""
displays the contents of a section
:param section_items_key:
:param rating_key:
:param title:
:param base_title:
:param section_title:
:param ignore_options:
:return:
"""
from menu import MetadataMenu
items = get_all_items(key=section_items_key, value=rating_key, base="library/sections")
kind, deeper = get_items_info(items)
title = unicode(title)
section_title = title
title = base_title + " > " + title
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
if ignore_options:
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
return dig_tree(oc, items, MetadataMenu,
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": "section",
"previous_rating_key": rating_key})
@route(PREFIX + '/section/firstLetter', deeper=bool)
def SectionFirstLetterMenu(rating_key, title=None, base_title=None, section_title=None, ignore_options=True,
section_items_key="all"):
"""
displays the contents of a section indexed by its first char (A-Z, 0-9...)
:param ignore_options: ignored
:param section_items_key: ignored
:param rating_key:
:param title:
:param base_title:
:param section_title:
:return:
"""
from menu import FirstLetterMetadataMenu
items = get_all_items(key="first_character", value=rating_key, base="library/sections")
kind, deeper = get_items_info(items)
title = unicode(title)
oc = SubFolderObjectContainer(title2=section_title, no_cache=True, no_history=True)
title = base_title + " > " + title
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
oc.add(DirectoryObject(
key=Callback(SectionMenu, title="All", base_title=title, rating_key=rating_key, ignore_options=False),
title="All"
)
)
return dig_tree(oc, items, FirstLetterMetadataMenu, force_rating_key=rating_key, fill_args={"key": "key"},
pass_kwargs={"base_title": title, "display_items": deeper, "previous_rating_key": rating_key})
+72 -785
View File
@@ -1,27 +1,23 @@
# coding=utf-8
import locale
import logging
import datetime
import logger
import os
import StringIO
import glob
from zipfile import ZipFile, ZIP_DEFLATED
import logger
from item_details import ItemDetailsMenu
from refresh_item import RefreshItem
from menu_helpers import add_ignore_options, dig_tree, set_refresh_menu_state, \
should_display_ignore, enable_channel_wrapper, default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, \
ZipObject
from subzero.constants import TITLE, ART, ICON, PREFIX, PLUGIN_IDENTIFIER, DEPENDENCY_MODULE_NAMES
from support.background import scheduler
should_display_ignore, enable_channel_wrapper, default_thumb, debounce, ObjectContainer, SubFolderObjectContainer
from main import fatality, IgnoreMenu
from advanced import DispatchRestart
from subzero.constants import ART, PREFIX, DEPENDENCY_MODULE_NAMES
from support.scheduler import scheduler
from support.config import config
from support.helpers import pad_title, timestamp, get_language, df, cast_bool
from support.helpers import timestamp, df
from support.ignore import ignore_list
from support.items import get_item, get_on_deck_items, refresh_item, get_all_items, get_items_info, \
get_item_thumb, get_item_kind_from_rating_key
from support.lib import Plex
from subzero.lib.io import FileIO
from support.plex_media import get_plex_metadata, scan_videos
from support.storage import reset_storage, log_storage, get_subtitle_storage
from support.items import get_all_items, get_items_info, \
get_item_kind_from_rating_key, get_item
# init GUI
ObjectContainer.art = R(ART)
@@ -35,412 +31,6 @@ route = enable_channel_wrapper(route)
# noinspection PyUnboundLocalVariable
handler = enable_channel_wrapper(handler)
main_icon = ICON if not config.is_development else "icon-dev.jpg"
@handler(PREFIX, TITLE if not config.is_development else TITLE + " DEV", art=ART, thumb=main_icon)
@route(PREFIX)
def fatality(randomize=None, force_title=None, header=None, message=None, only_refresh=False, no_history=False,
replace_parent=False):
"""
subzero main menu
"""
title = config.full_version # force_title if force_title is not None else config.full_version
oc = ObjectContainer(title1=title, title2=title, header=unicode(header) if header else title, message=message,
no_history=no_history,
replace_parent=replace_parent, no_cache=True)
# always re-check permissions
config.refresh_permissions_status()
# always re-check enabled sections
config.refresh_enabled_sections()
if config.lock_menu and not config.pin_correct:
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp()),
title=pad_title("Enter PIN"),
summary="The owner has restricted the access to this menu. Please enter the correct pin",
))
return oc
if not config.permissions_ok and config.missing_permissions:
for title, path in config.missing_permissions:
oc.add(DirectoryObject(
key=Callback(fatality, randomize=timestamp()),
title=pad_title("Insufficient permissions"),
summary="Insufficient permissions on library %s, folder: %s" % (title, path),
))
return oc
if not config.enabled_sections:
oc.add(DirectoryObject(
key=Callback(fatality, randomize=timestamp()),
title=pad_title("I'm not enabled!"),
summary="Please enable me for some of your libraries in your server settings; currently I do nothing",
))
return oc
if not only_refresh:
if Dict["current_refresh_state"]:
oc.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title("Working ... refresh here"),
summary="Current state: %s; Last state: %s" % (
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
)
))
oc.add(DirectoryObject(
key=Callback(OnDeckMenu),
title="On Deck items",
summary="Shows the current on deck items and allows you to individually (force-) refresh their metadata/"
"subtitles.",
thumb=R("icon-ondeck.jpg")
))
oc.add(DirectoryObject(
key=Callback(RecentlyAddedMenu),
title="Recently Added items",
summary="Shows the recently added items per section.",
thumb=R("icon-recent.jpg")
))
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, randomize=timestamp()),
title="Items with missing subtitles",
summary="Shows the items honoring the configured 'Item age to be considered recent'-setting (%s)"
" and allowing you to individually (force-) refresh their metadata/subtitles. " %
Prefs["scheduler.item_is_recent_age"],
thumb=R("icon-missing.jpg")
))
oc.add(DirectoryObject(
key=Callback(SectionsMenu),
title="Browse all items",
summary="Go through your whole library and manage your ignore list. You can also "
"(force-) refresh the metadata/subtitles of individual items.",
thumb=R("icon-browse.jpg")
))
task_name = "SearchAllRecentlyAddedMissing"
task = scheduler.task(task_name)
if task.ready_for_display:
task_state = "Running: %s/%s (%s%%)" % (len(task.items_done), len(task.items_searching), task.percentage)
else:
task_state = "Last scheduler run: %s; Next scheduled run: %s; Last runtime: %s" % (
df(scheduler.last_run(task_name)) or "never",
df(scheduler.next_run(task_name)) or "never",
str(task.last_run_time).split(".")[0])
oc.add(DirectoryObject(
key=Callback(RefreshMissing, randomize=timestamp()),
title="Search for missing subtitles (in recently-added items, max-age: %s)" % Prefs[
"scheduler.item_is_recent_age"],
summary="Automatically run periodically by the scheduler, if configured. %s" % task_state,
thumb=R("icon-search.jpg")
))
oc.add(DirectoryObject(
key=Callback(IgnoreListMenu),
title="Display ignore list (%d)" % len(ignore_list),
summary="Show the current ignore list (mainly used for the automatic tasks)",
thumb=R("icon-ignore.jpg")
))
oc.add(DirectoryObject(
key=Callback(HistoryMenu),
title="History",
summary="Show the last %i downloaded subtitles" % int(Prefs["history_size"]),
thumb=R("icon-history.jpg")
))
oc.add(DirectoryObject(
key=Callback(fatality, force_title=" ", randomize=timestamp()),
title=pad_title("Refresh"),
summary="Current state: %s; Last state: %s" % (
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
),
thumb=R("icon-refresh.jpg")
))
# add re-lock after pin unlock
if config.pin:
oc.add(DirectoryObject(
key=Callback(ClearPin, randomize=timestamp()),
title=pad_title("Re-lock menu(s)"),
summary="Enabled the PIN again for menu(s)"
))
if not only_refresh:
oc.add(DirectoryObject(
key=Callback(AdvancedMenu),
title=pad_title("Advanced functions"),
summary="Use at your own risk",
thumb=R("icon-advanced.jpg")
))
return oc
@route(PREFIX + '/pin')
def PinMenu(pin="", randomize=None, success_go_to="channel"):
oc = ObjectContainer(title2="Enter PIN number %s" % (len(pin) + 1), no_cache=True, no_history=True,
skip_pin_lock=True)
if pin == config.pin:
Dict["pin_correct_time"] = datetime.datetime.now()
config.locked = False
if success_go_to == "channel":
return fatality(force_title="PIN correct", header="PIN correct", no_history=True)
elif success_go_to == "advanced":
return AdvancedMenu(randomize=timestamp())
for i in range(10):
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp(), pin=pin + str(i), success_go_to=success_go_to),
title=pad_title(str(i)),
))
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp(), success_go_to=success_go_to),
title=pad_title("Reset"),
))
return oc
@route(PREFIX + '/pin_lock')
def ClearPin(randomize=None):
Dict["pin_correct_time"] = None
config.locked = True
return fatality(force_title="Menu locked", header=" ", no_history=True)
@route(PREFIX + '/on_deck')
def OnDeckMenu(message=None):
"""
displays the items on deck
:param message:
:return:
"""
return mergedItemsMenu(title="Items On Deck", base_title="Items On Deck", itemGetter=get_on_deck_items)
@route(PREFIX + '/recently_added')
def RecentlyAddedMenu(message=None):
"""
displays the items recently added per section
:param message:
:return:
"""
return SectionsMenu(base_title="Recently added", section_items_key="recently_added", ignore_options=False)
@route(PREFIX + '/recent', force=bool)
@debounce
def RecentMissingSubtitlesMenu(force=False, randomize=None):
title = "Items with missing subtitles"
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
running = scheduler.is_task_running("MissingSubtitles")
task_data = scheduler.get_task_data("MissingSubtitles")
missing_items = task_data["missing_subtitles"] if task_data else None
if ((missing_items is None) or force) and not running:
scheduler.dispatch_task("MissingSubtitles")
running = True
if not running:
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, force=True, randomize=timestamp()),
title=u"Get items with missing subtitles",
thumb=default_thumb
))
else:
oc.add(DirectoryObject(
key=Callback(RecentMissingSubtitlesMenu, force=False, randomize=timestamp()),
title=u"Updating, refresh here ...",
thumb=default_thumb
))
if missing_items is not None:
for added_at, item_id, item_title, item, missing_languages in missing_items:
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, title=title + " > " + item_title, item_title=item_title,
rating_key=item_id),
title=item_title,
summary="Missing: %s" % ", ".join(l.name for l in missing_languages),
thumb=get_item_thumb(item) or default_thumb
))
scheduler.clear_task_data("MissingSubtitles")
return oc
def mergedItemsMenu(title, itemGetter, itemGetterKwArgs=None, base_title=None, *args, **kwargs):
"""
displays an item list of dynamic kinds of items
:param title:
:param itemGetter:
:param itemGetterKwArgs:
:param base_title:
:param args:
:param kwargs:
:return:
"""
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
items = itemGetter(*args, **kwargs)
for kind, title, item_id, deeper, item in items:
oc.add(DirectoryObject(
title=title,
key=Callback(ItemDetailsMenu, title=base_title + " > " + title, item_title=title, rating_key=item_id),
thumb=get_item_thumb(item) or default_thumb
))
return oc
def determine_section_display(kind, item, pass_kwargs=None):
"""
returns the menu function for a section based on the size of it (amount of items)
:param kind:
:param item:
:return:
"""
if pass_kwargs and pass_kwargs.get("section_items_key", "all") != "all":
return SectionMenu
if item.size > 80:
return SectionFirstLetterMenu
return SectionMenu
@route(PREFIX + '/ignore/set/{kind}/{rating_key}/{todo}/sure={sure}', kind=str, rating_key=str, todo=str, sure=bool)
def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
"""
displays the ignore options for a menu
:param kind:
:param rating_key:
:param title:
:param sure:
:param todo:
:return:
"""
is_ignored = rating_key in ignore_list[kind]
if not sure:
oc = SubFolderObjectContainer(no_history=True, replace_parent=True, title1="%s %s %s %s the ignore list" % (
"Add" if not is_ignored else "Remove", ignore_list.verbose(kind), title,
"to" if not is_ignored else "from"), title2="Are you sure?")
oc.add(DirectoryObject(
key=Callback(IgnoreMenu, kind=kind, rating_key=rating_key, title=title, sure=True,
todo="add" if not is_ignored else "remove"),
title=pad_title("Are you sure?"),
))
return oc
rel = ignore_list[kind]
dont_change = False
if todo == "remove":
if not is_ignored:
dont_change = True
else:
rel.remove(rating_key)
Log.Info("Removed %s (%s) from the ignore list", title, rating_key)
ignore_list.remove_title(kind, rating_key)
ignore_list.save()
state = "removed from"
elif todo == "add":
if is_ignored:
dont_change = True
else:
rel.append(rating_key)
Log.Info("Added %s (%s) to the ignore list", title, rating_key)
ignore_list.add_title(kind, rating_key, title)
ignore_list.save()
state = "added to"
else:
dont_change = True
if dont_change:
return fatality(force_title=" ", header="Didn't change the ignore list", no_history=True)
return fatality(force_title=" ", header="%s %s the ignore list" % (title, state), no_history=True)
@route(PREFIX + '/sections')
def SectionsMenu(base_title="Sections", section_items_key="all", ignore_options=True):
"""
displays the menu for all sections
:return:
"""
items = get_all_items("sections")
return dig_tree(SubFolderObjectContainer(title2="Sections", no_cache=True, no_history=True), items, None,
menu_determination_callback=determine_section_display, pass_kwargs={"base_title": base_title,
"section_items_key": section_items_key,
"ignore_options": ignore_options},
fill_args={"title": "section_title"})
@route(PREFIX + '/section', ignore_options=bool)
def SectionMenu(rating_key, title=None, base_title=None, section_title=None, ignore_options=True,
section_items_key="all"):
"""
displays the contents of a section
:param section_items_key:
:param rating_key:
:param title:
:param base_title:
:param section_title:
:param ignore_options:
:return:
"""
items = get_all_items(key=section_items_key, value=rating_key, base="library/sections")
kind, deeper = get_items_info(items)
title = unicode(title)
section_title = title
title = base_title + " > " + title
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
if ignore_options:
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
return dig_tree(oc, items, MetadataMenu,
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": "section",
"previous_rating_key": rating_key})
@route(PREFIX + '/section/firstLetter', deeper=bool)
def SectionFirstLetterMenu(rating_key, title=None, base_title=None, section_title=None, ignore_options=True,
section_items_key="all"):
"""
displays the contents of a section indexed by its first char (A-Z, 0-9...)
:param ignore_options: ignored
:param section_items_key: ignored
:param rating_key:
:param title:
:param base_title:
:param section_title:
:return:
"""
items = get_all_items(key="first_character", value=rating_key, base="library/sections")
kind, deeper = get_items_info(items)
title = unicode(title)
oc = SubFolderObjectContainer(title2=section_title, no_cache=True, no_history=True)
title = base_title + " > " + title
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
oc.add(DirectoryObject(
key=Callback(SectionMenu, title="All", base_title=title, rating_key=rating_key, ignore_options=False),
title="All"
)
)
return dig_tree(oc, items, FirstLetterMetadataMenu, force_rating_key=rating_key, fill_args={"key": "key"},
pass_kwargs={"base_title": title, "display_items": deeper, "previous_rating_key": rating_key})
@route(PREFIX + '/section/firstLetter/key', deeper=bool)
def FirstLetterMetadataMenu(rating_key, key, title=None, base_title=None, display_items=False, previous_item_type=None,
@@ -466,7 +56,7 @@ def FirstLetterMetadataMenu(rating_key, key, title=None, base_title=None, displa
@route(PREFIX + '/section/contents', display_items=bool)
def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, previous_item_type=None,
previous_rating_key=None):
previous_rating_key=None, randomize=None):
"""
displays the contents of a section based on whether it has a deeper tree or not (movies->movie (item) list; series->series list)
:param rating_key:
@@ -485,6 +75,22 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
current_kind = get_item_kind_from_rating_key(rating_key)
if display_items:
timeout = 30
# add back to series for season
if current_kind == "season":
timeout = 360
show = get_item(previous_rating_key)
oc.add(DirectoryObject(
key=Callback(MetadataMenu, rating_key=show.rating_key, title=show.title, base_title=show.section.title,
previous_item_type="section", display_items=True, randomize=timestamp()),
title=u"< Back to %s" % show.title,
thumb=show.thumb or default_thumb
))
elif current_kind == "series":
timeout = 1800
items = get_all_items(key="children", value=rating_key, base="library/metadata")
kind, deeper = get_items_info(items)
dig_tree(oc, items, MetadataMenu,
@@ -494,12 +100,6 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
if should_display_ignore(items, previous=previous_item_type):
add_ignore_options(oc, "series", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
timeout = 30
if current_kind == "season":
timeout = 360
elif current_kind == "series":
timeout = 1800
# add refresh
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=title, refresh_kind=current_kind,
@@ -549,210 +149,6 @@ def HistoryMenu():
return oc
@route(PREFIX + '/item/{rating_key}/actions')
@debounce
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None):
"""
displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
:param rating_key:
:param title:
:param base_title:
:param item_title:
:param randomize:
:return:
"""
title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
item = get_item(rating_key)
current_kind = get_item_kind_from_rating_key(rating_key)
timeout = 30
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, randomize=timestamp(),
timeout=timeout * 1000),
title=u"Refresh: %s" % item_title,
summary="Refreshes the %s, possibly searching for missing and picking up new subtitles on disk" % current_kind,
thumb=item.thumb or default_thumb
))
oc.add(DirectoryObject(
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(),
timeout=timeout * 1000),
title=u"Auto-search: %s" % item_title,
summary="Issues a forced refresh, ignoring known subtitles and searching for new ones",
thumb=item.thumb or default_thumb
))
# get stored subtitle info for item id
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load_or_new(item)
# get the plex item
plex_item = list(Plex["library"].metadata(rating_key))[0]
# get current media info for that item
media = plex_item.media
# look for subtitles for all available media parts and all of their languages
for part in media.parts:
filename = os.path.basename(part.file)
part_id = str(part.id)
# iterate through all configured languages
for lang in config.lang_list:
lang_a2 = lang.alpha2
# ietf lang?
if cast_bool(Prefs["subtitles.language.ietf"]) and "-" in lang_a2:
lang_a2 = lang_a2.split("-")[0]
# get corresponding stored subtitle data for that media part (physical media item), for language
current_sub = stored_subs.get_any(part_id, lang_a2)
current_sub_id = None
current_sub_provider_name = None
summary = u"No current subtitle in storage"
current_score = None
if current_sub:
current_sub_id = current_sub.id
current_sub_provider_name = current_sub.provider_name
current_score = current_sub.score
summary = u"Current subtitle: %s (added: %s, %s), Language: %s, Score: %i, Storage: %s" % \
(current_sub.provider_name, df(current_sub.date_added), current_sub.mode_verbose, lang,
current_sub.score, current_sub.storage_type)
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
item_title=item_title, language=lang, current_id=current_sub_id,
item_type=plex_item.type, filename=filename, current_data=summary,
randomize=timestamp(), current_provider=current_sub_provider_name,
current_score=current_score),
title=u"List %s subtitles" % lang.name,
summary=summary
))
add_ignore_options(oc, "videos", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
return oc
def get_item_task_data(task_name, rating_key, language):
task_data = scheduler.get_task_data(task_name)
search_results = task_data.get(rating_key, {}) if task_data else {}
return search_results.get(language)
@route(PREFIX + '/item/search/{rating_key}/{part_id}', force=bool)
@debounce
def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item_title=None, filename=None,
item_type="episode", language=None, force=False, current_id=None, current_data=None,
current_provider=None, current_score=None, randomize=None):
assert rating_key, part_id
running = scheduler.is_task_running("AvailableSubsForItem")
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
if (search_results is None or force) and not running:
scheduler.dispatch_task("AvailableSubsForItem", rating_key=rating_key, item_type=item_type, part_id=part_id,
language=language)
running = True
oc = SubFolderObjectContainer(title2=unicode(title), replace_parent=True)
oc.add(DirectoryObject(
key=Callback(ItemDetailsMenu, rating_key=rating_key, item_title=item_title, title=title, randomize=timestamp()),
title=u"Back to: %s" % title,
summary=current_data,
thumb=default_thumb
))
metadata = get_plex_metadata(rating_key, part_id, item_type)
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
if not scanned_parts:
Log.Error("Couldn't list available subtitles for %s", rating_key)
return oc
video, plex_part = scanned_parts.items()[0]
video_display_data = [video.format] if video.format else []
if video.release_group:
video_display_data.append(u"by %s" % video.release_group)
video_display_data = " ".join(video_display_data)
current_display = (u"Current: %s (%s) " % (current_provider, current_score) if current_provider else "")
if not running:
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title, language=language,
filename=filename, part_id=part_id, title=title, current_id=current_id, force=True,
current_provider=current_provider, current_score=current_score,
current_data=current_data, item_type=item_type, randomize=timestamp()),
title=u"Search for %s subs (%s)" % (get_language(language).name, video_display_data),
summary=u"%sFilename: %s" % (current_display, filename),
thumb=default_thumb
))
else:
oc.add(DirectoryObject(
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
language=language, filename=filename, current_data=current_data,
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
current_provider=current_provider, current_score=current_score,
randomize=timestamp()),
title=u"Searching for %s subs (%s), refresh here ..." % (get_language(language).name, video_display_data),
summary=u"%sFilename: %s" % (current_display, filename),
thumb=default_thumb
))
if not search_results:
return oc
for subtitle in search_results:
oc.add(DirectoryObject(
key=Callback(TriggerDownloadSubtitle, rating_key=rating_key, randomize=timestamp(), item_title=item_title,
subtitle_id=str(subtitle.id), language=language),
title=u"%s: %s, score: %s" % ("Available" if current_id != subtitle.id else "Current",
subtitle.provider_name, subtitle.score),
summary=u"Release: %s, Matches: %s" % (subtitle.release_info, ", ".join(subtitle.matches)),
thumb=default_thumb
))
return oc
@route(PREFIX + '/download_subtitle/{rating_key}')
@debounce
def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None, language=None, randomize=None):
set_refresh_menu_state("Downloading subtitle for %s" % item_title or rating_key)
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
download_subtitle = None
for subtitle in search_results:
if str(subtitle.id) == subtitle_id:
download_subtitle = subtitle
break
if not download_subtitle:
Log.Error(u"Something went horribly wrong")
else:
scheduler.dispatch_task("DownloadSubtitleForItem", rating_key=rating_key, subtitle=download_subtitle)
return fatality(randomize=timestamp(), header=" ", replace_parent=True)
@route(PREFIX + '/item/{rating_key}')
@debounce
def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=False, refresh_kind=None,
previous_rating_key=None, timeout=8000, randomize=None, trigger=True):
assert rating_key
header = " "
if trigger:
set_refresh_menu_state(u"Triggering %sRefresh for %s" % ("Force-" if force else "", item_title))
Thread.Create(refresh_item, rating_key=rating_key, force=force, refresh_kind=refresh_kind,
parent_rating_key=previous_rating_key, timeout=int(timeout))
header = u"%s of item %s triggered" % ("Refresh" if not force else "Forced-refresh", rating_key)
return fatality(randomize=timestamp(), header=header, replace_parent=True)
@route(PREFIX + '/missing/refresh')
@debounce
def RefreshMissing(randomize=None):
@@ -761,60 +157,9 @@ def RefreshMissing(randomize=None):
return fatality(header=header, replace_parent=True)
@route(PREFIX + '/advanced')
def AdvancedMenu(randomize=None, header=None, message=None):
oc = SubFolderObjectContainer(header=header or "Internal stuff, pay attention!", message=message, no_cache=True,
no_history=True,
replace_parent=False, title2="Advanced")
if config.lock_advanced_menu and not config.pin_correct:
oc.add(DirectoryObject(
key=Callback(PinMenu, randomize=timestamp(), success_go_to="advanced"),
title=pad_title("Enter PIN"),
summary="The owner has restricted the access to this menu. Please enter the correct pin",
))
return oc
oc.add(DirectoryObject(
key=Callback(TriggerRestart, randomize=timestamp()),
title=pad_title("Restart the plugin"),
))
oc.add(DirectoryObject(
key=Callback(GetLogsLink),
title="Get my logs (copy the appearing link and open it in your browser, please)",
summary="Copy the appearing link and open it in your browser, please",
))
oc.add(DirectoryObject(
key=Callback(TriggerBetterSubtitles, randomize=timestamp()),
title=pad_title("Trigger find better subtitles"),
))
oc.add(DirectoryObject(
key=Callback(LogStorage, key="tasks", randomize=timestamp()),
title=pad_title("Log the plugin's scheduled tasks state storage"),
))
oc.add(DirectoryObject(
key=Callback(LogStorage, key="ignore", randomize=timestamp()),
title=pad_title("Log the plugin's internal ignorelist storage"),
))
oc.add(DirectoryObject(
key=Callback(ResetStorage, key="tasks", randomize=timestamp()),
title=pad_title("Reset the plugin's scheduled tasks state storage"),
))
oc.add(DirectoryObject(
key=Callback(ResetStorage, key="ignore", randomize=timestamp()),
title=pad_title("Reset the plugin's internal ignorelist storage"),
))
oc.add(DirectoryObject(
key=Callback(InvalidateCache, randomize=timestamp()),
title=pad_title("Invalidate Sub-Zero metadata caches (subliminal)"),
))
return oc
@route(PREFIX + '/ValidatePrefs', enforce_route=True)
def ValidatePrefs():
Core.log.setLevel(logging.DEBUG)
Log.Debug("Validate Prefs called.")
# cache the channel state
update_dict = False
@@ -849,109 +194,51 @@ def ValidatePrefs():
Core.log.removeHandler(logger.console_handler)
Log.Debug("Stop logging to console")
Log.Debug("Validate Prefs called.")
# SZ config debug
Log.Debug("--- SZ Config-Debug ---")
for attr in [
"app_support_path", "data_path", "data_items_path", "enable_agent",
"enable_channel", "permissions_ok", "missing_permissions", "fs_encoding",
"subtitle_destination_folder", "dbm_supported", "lang_list"]:
Log.Debug("config.%s: %s", attr, getattr(config, attr))
for attr in ["plugin_log_path", "server_log_path"]:
value = getattr(config, attr)
access = os.access(value, os.R_OK)
if Core.runtime.os == "Windows":
try:
f = open(value, "r")
f.read(1)
f.close()
except:
access = False
Log.Debug("config.%s: %s (accessible: %s)", attr, value, access)
for attr in [
"subtitles.save.filesystem", ]:
Log.Debug("Pref.%s: %s", attr, Prefs[attr])
# fixme: check existance of and os access of logs
Log.Debug("Platform: %s", Core.runtime.platform)
Log.Debug("OS: %s", Core.runtime.os)
Log.Debug("----- Environment -----")
for key, value in os.environ.iteritems():
if key.startswith("PLEX") or key.startswith("SZ_"):
if "TOKEN" in key:
outval = "xxxxxxxxxxxxxxxxxxx"
else:
outval = value
Log.Debug("%s: %s", key, outval)
Log.Debug("Locale: %s", locale.getdefaultlocale())
Log.Debug("-----------------------")
Log.Debug("Setting log-level to %s", Prefs["log_level"])
logger.register_logging_handler(DEPENDENCY_MODULE_NAMES, level=Prefs["log_level"])
Core.log.setLevel(logging.getLevelName(Prefs["log_level"]))
os.environ['U1pfT01EQl9LRVk'] = '789CF30DAC2C8B0AF433F5C9AD34290A712DF30D7135F12D0FB3E502006FDE081E'
return
def DispatchRestart():
Thread.CreateTimer(1.0, Restart)
@route(PREFIX + '/advanced/restart/trigger')
@debounce
def TriggerRestart(randomize=None):
set_refresh_menu_state("Restarting the plugin")
DispatchRestart()
return fatality(header="Restart triggered, please wait about 5 seconds", force_title=" ", only_refresh=True,
replace_parent=True,
no_history=True, randomize=timestamp())
@route(PREFIX + '/advanced/restart/execute')
def Restart():
Plex[":/plugins"].restart(PLUGIN_IDENTIFIER)
@route(PREFIX + '/storage/reset', sure=bool)
def ResetStorage(key, randomize=None, sure=False):
if not sure:
oc = SubFolderObjectContainer(no_history=True, title1="Reset subtitle storage", title2="Are you sure?")
oc.add(DirectoryObject(
key=Callback(ResetStorage, key=key, sure=True, randomize=timestamp()),
title=pad_title("Are you really sure?"),
))
return oc
reset_storage(key)
if key == "tasks":
# reinitialize the scheduler
scheduler.init_storage()
scheduler.setup_tasks()
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='Information Storage (%s) reset' % key
)
@route(PREFIX + '/storage/log')
def LogStorage(key, randomize=None):
log_storage(key)
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='Information Storage (%s) logged' % key
)
@route(PREFIX + '/triggerbetter')
def TriggerBetterSubtitles(randomize=None):
scheduler.dispatch_task("FindBetterSubtitles")
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='FindBetterSubtitles triggered'
)
@route(PREFIX + '/get_logs_link')
def GetLogsLink():
ip = Core.networking.http_request("http://www.plexapp.com/ip.php", cacheTime=7200).content.strip()
logs_link = "http://%s:32400%s?X-Plex-Token=%s" % (ip, PREFIX + '/logs', config.universal_plex_token)
oc = ObjectContainer(title2="Download Logs", no_cache=True, no_history=True,
header="Copy this link and open this in your browser, please",
message=logs_link)
return oc
@route(PREFIX + '/logs')
def DownloadLogs():
buff = StringIO.StringIO()
zip_archive = ZipFile(buff, mode='w', compression=ZIP_DEFLATED)
logs = sorted(glob.glob(config.plugin_log_path + '*')) + [config.server_log_path]
for path in logs:
data = StringIO.StringIO()
data.write(FileIO.read(path))
zip_archive.writestr(os.path.basename(path), data.getvalue())
zip_archive.close()
return ZipObject(buff.getvalue())
@route(PREFIX + '/invalidatecache')
def InvalidateCache(randomize=None):
from subliminal.cache import region
region.invalidate()
return AdvancedMenu(
randomize=timestamp(),
header='Success',
message='Cache invalidated'
)
+19 -6
View File
@@ -7,9 +7,11 @@ from support.helpers import get_video_display_title
from support.ignore import ignore_list
from support.lib import get_intent
from support.config import config
from subzero.constants import ICON_SUB
from subzero.constants import ICON_SUB, ICON
from support.scheduler import scheduler
default_thumb = R(ICON_SUB)
main_icon = ICON if not config.is_development else "icon-dev.jpg"
def should_display_ignore(items, previous=None):
@@ -41,8 +43,8 @@ def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None
oc.add(DirectoryObject(
key=Callback(callback_menu, kind=use_kind, rating_key=rating_key, title=title),
title=u"%s %s \"%s\" %s the ignore list" % (
"Remove" if in_list else "Add", ignore_list.verbose(kind) if add_kind else "", unicode(title), "from" if in_list else "to")
title=u"%s %s \"%s\"" % (
"Un-Ignore" if in_list else "Ignore", ignore_list.verbose(kind) if add_kind else "", unicode(title))
)
)
@@ -104,6 +106,12 @@ def set_refresh_menu_state(state_or_media, media_type="movies"):
Dict["current_refresh_state"] = u"%sRefreshing %s" % ("Force-" if force_refresh else "", unicode(title))
def get_item_task_data(task_name, rating_key, language):
task_data = scheduler.get_task_data(task_name)
search_results = task_data.get(rating_key, {}) if task_data else {}
return search_results.get(language)
def enable_channel_wrapper(func):
"""
returns the original wrapper :func: (route or handler) if applicable, else the plain to-be-wrapped function
@@ -140,7 +148,7 @@ def debounce(func):
def wrap(*args, **kwargs):
if "randomize" in kwargs:
if not "menu_history" in Dict:
if "menu_history" not in Dict:
Dict["menu_history"] = {}
key = get_lookup_key([func] + list(args), kwargs)
@@ -148,8 +156,13 @@ def debounce(func):
Log.Debug("not triggering %s twice with %s, %s" % (func, args, kwargs))
return ObjectContainer()
else:
Dict["menu_history"][key] = datetime.datetime.now() + datetime.timedelta(days=1)
Dict.Save()
Dict["menu_history"][key] = datetime.datetime.now() + datetime.timedelta(hours=6)
try:
Dict.Save()
except TypeError:
Log.Error("Can't save menu history for: %r", key)
del Dict["menu_history"][key]
return func(*args, **kwargs)
return wrap
+22
View File
@@ -0,0 +1,22 @@
# coding=utf-8
from subzero.constants import PREFIX
from menu_helpers import debounce, set_refresh_menu_state
from support.items import refresh_item
from support.helpers import timestamp
@route(PREFIX + '/item/{rating_key}')
@debounce
def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=False, refresh_kind=None,
previous_rating_key=None, timeout=8000, randomize=None, trigger=True):
assert rating_key
from interface.main import fatality
header = " "
if trigger:
set_refresh_menu_state(u"Triggering %sRefresh for %s" % ("Force-" if force else "", item_title))
Thread.Create(refresh_item, rating_key=rating_key, force=force, refresh_kind=refresh_kind,
parent_rating_key=previous_rating_key, timeout=int(timeout))
header = u"%s of item %s triggered" % ("Refresh" if not force else "Forced-refresh", rating_key)
return fatality(randomize=timestamp(), header=header, replace_parent=True)
+251
View File
@@ -0,0 +1,251 @@
# coding=utf-8
import traceback
import types
from babelfish import Language
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb
from subzero.modification import registry as mod_registry, SubtitleModifications
from subzero.constants import PREFIX
from support.plex_media import get_plex_metadata, scan_videos
from support.helpers import timestamp, pad_title
from support.items import get_current_sub, set_mods_for_part
@route(PREFIX + '/item/sub_mods/{rating_key}/{part_id}', force=bool)
@debounce
def SubtitleModificationsMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
kwargs.pop("randomize")
current_mods = current_sub.mods or []
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
from interface.item_details import SubtitleOptionsMenu
oc.add(DirectoryObject(
key=Callback(SubtitleOptionsMenu, randomize=timestamp(), **kwargs),
title=u"< Back to subtitle options for: %s" % kwargs["title"],
summary=kwargs["current_data"],
thumb=default_thumb
))
for identifier, mod in mod_registry.mods.iteritems():
if mod.advanced:
continue
if mod.exclusive and identifier in current_mods:
continue
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=identifier, mode="add", randomize=timestamp(), **kwargs),
title=pad_title(mod.description), summary=mod.long_description or ""
))
fps_mod = SubtitleModifications.get_mod_class("change_FPS")
oc.add(DirectoryObject(
key=Callback(SubtitleFPSModMenu, randomize=timestamp(), **kwargs),
title=pad_title(fps_mod.description), summary=fps_mod.long_description or ""
))
shift_mod = SubtitleModifications.get_mod_class("shift_offset")
oc.add(DirectoryObject(
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
title=pad_title(shift_mod.description), summary=shift_mod.long_description or ""
))
color_mod = SubtitleModifications.get_mod_class("color")
oc.add(DirectoryObject(
key=Callback(SubtitleColorModMenu, randomize=timestamp(), **kwargs),
title=pad_title(color_mod.description), summary=color_mod.long_description or ""
))
if current_mods:
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=None, mode="remove_last", randomize=timestamp(), **kwargs),
title=pad_title("Remove last applied mod (%s)" % current_mods[-1]),
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
))
oc.add(DirectoryObject(
key=Callback(SubtitleListMods, randomize=timestamp(), **kwargs),
title=pad_title("Manage applied mods"),
summary=u"Currently applied mods: %s" % (", ".join(current_mods))
))
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=None, mode="clear", randomize=timestamp(), **kwargs),
title=pad_title("Restore original version"),
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
))
return oc
@route(PREFIX + '/item/sub_mod_fps/{rating_key}/{part_id}', force=bool)
def SubtitleFPSModMenu(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
item_type = kwargs["item_type"]
kwargs.pop("randomize")
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title="< Back to subtitle modification menu"
))
metadata = get_plex_metadata(rating_key, part_id, item_type)
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
video, plex_part = scanned_parts.items()[0]
target_fps = plex_part.fps
for fps in ["23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
if float(fps) == float(target_fps):
continue
if float(fps) > float(target_fps):
indicator = "subs constantly getting faster"
else:
indicator = "subs constantly getting slower"
mod_ident = SubtitleModifications.get_mod_signature("change_FPS", **{"from": fps, "to": target_fps})
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
title="%s fps -> %s fps (%s)" % (fps, target_fps, indicator)
))
return oc
POSSIBLE_UNITS = (("ms", "milliseconds"), ("s", "seconds"), ("m", "minutes"), ("h", "hours"))
POSSIBLE_UNITS_D = dict(POSSIBLE_UNITS)
@route(PREFIX + '/item/sub_mod_shift_unit/{rating_key}/{part_id}', force=bool)
def SubtitleShiftModUnitMenu(**kwargs):
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
kwargs.pop("randomize")
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title="< Back to subtitle modifications"
))
for unit, title in POSSIBLE_UNITS:
oc.add(DirectoryObject(
key=Callback(SubtitleShiftModMenu, unit=unit, randomize=timestamp(), **kwargs),
title="Adjust by %s" % title
))
return oc
@route(PREFIX + '/item/sub_mod_shift/{rating_key}/{part_id}/{unit}', force=bool)
def SubtitleShiftModMenu(unit=None, **kwargs):
if unit not in POSSIBLE_UNITS_D:
raise NotImplementedError
kwargs.pop("randomize")
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
oc.add(DirectoryObject(
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
title="< Back to unit selection"
))
rng = []
if unit == "h":
rng = range(-10, 11)
elif unit in ("m", "s"):
rng = range(-15, 15)
elif unit == "ms":
rng = range(-900, 1000, 100)
for i in rng:
if i == 0:
continue
mod_ident = SubtitleModifications.get_mod_signature("shift_offset", **{unit: i})
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
title="%s %s" % (("%s" if i < 0 else "+%s") % i, unit)
))
return oc
@route(PREFIX + '/item/sub_mod_colors/{rating_key}/{part_id}', force=bool)
def SubtitleColorModMenu(**kwargs):
kwargs.pop("randomize")
color_mod = SubtitleModifications.get_mod_class("color")
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title="< Back to subtitle modification menu"
))
for color, code in color_mod.colors.iteritems():
mod_ident = SubtitleModifications.get_mod_signature("color", **{"name": color})
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
title="%s (%s)" % (color, code)
))
return oc
@route(PREFIX + '/item/sub_set_mods/{rating_key}/{part_id}/{mods}/{mode}', force=bool)
@debounce
def SubtitleSetMods(mods=None, mode=None, **kwargs):
if not isinstance(mods, types.ListType) and mods:
mods = [mods]
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
lang_a2 = kwargs["language"]
item_type = kwargs["item_type"]
language = Language.fromietf(lang_a2)
set_mods_for_part(rating_key, part_id, language, item_type, mods, mode=mode)
kwargs.pop("randomize")
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
@route(PREFIX + '/item/sub_list_mods/{rating_key}/{part_id}', force=bool)
@debounce
def SubtitleListMods(**kwargs):
rating_key = kwargs["rating_key"]
part_id = kwargs["part_id"]
language = kwargs["language"]
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
kwargs.pop("randomize")
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
oc.add(DirectoryObject(
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
title="< Back to subtitle modifications"
))
for identifier in current_sub.mods:
oc.add(DirectoryObject(
key=Callback(SubtitleSetMods, mods=identifier, mode="remove", randomize=timestamp(), **kwargs),
title="Remove: %s" % identifier
))
return oc
+6 -3
View File
@@ -18,7 +18,7 @@ sys.modules["support.plex_media"] = plex_media
import localmedia
sys.modules["subzero.localmedia"] = localmedia
sys.modules["support.localmedia"] = localmedia
import subtitlehelpers
@@ -32,9 +32,9 @@ import missing_subtitles
sys.modules["support.missing_subtitles"] = missing_subtitles
import background
import scheduler
sys.modules["support.background"] = background
sys.modules["support.scheduler"] = scheduler
import tasks
@@ -58,3 +58,6 @@ sys.modules["support.data"] = data
import activities
sys.modules["support.activities"] = activities
import download
sys.modules["support.download"] = download
+15 -8
View File
@@ -11,9 +11,9 @@ class PlexActivityManager(object):
def start(self):
activity_sources_enabled = None
if config.universal_plex_token:
if config.plex_token:
from plex import Plex
Plex.configuration.defaults.authentication(config.universal_plex_token)
Plex.configuration.defaults.authentication(config.plex_token)
activity_sources_enabled = ["websocket"]
Activity.on('websocket.playing', self.on_playing)
@@ -27,9 +27,6 @@ class PlexActivityManager(object):
@throttle(5, instance_method=True)
def on_playing(self, info):
if not config.use_activities:
return
# ignore non-playing states and anything too far in
if info["state"] != "playing" or info["viewOffset"] > 60000:
return
@@ -41,13 +38,22 @@ class PlexActivityManager(object):
return
rating_key = info["ratingKey"]
if rating_key not in Dict["last_played_items"]:
# new playing; store last 10 recently played items
if rating_key in Dict["last_played_items"] and rating_key != Dict["last_played_items"][0]:
# shift last played
Dict["last_played_items"].insert(0,
Dict["last_played_items"].pop(Dict["last_played_items"].index(rating_key)))
Dict.Save()
elif rating_key not in Dict["last_played_items"]:
# new playing; store last X recently played items
Dict["last_played_items"].insert(0, rating_key)
Dict["last_played_items"] = Dict["last_played_items"][:10]
Dict["last_played_items"] = Dict["last_played_items"][:config.store_recently_played_amount]
Dict.Save()
if not config.react_to_activities:
return
debug_msg = "Started playing %s. Refreshing it." % rating_key
key_to_refresh = None
@@ -108,4 +114,5 @@ class PlexActivityManager(object):
if ep.index == 1:
return ep
activity = PlexActivityManager()
+117 -15
View File
@@ -3,18 +3,23 @@
import os
import re
import inspect
import sys
import datetime
import subliminal
import subliminal_patch
from whichdb import whichdb
from babelfish import Language
from subliminal.cli import MutexLock
from subzero.lib.io import FileIO, get_viable_encoding
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW, MEDIA_TYPE_TO_STRING
from lib import Plex
from helpers import check_write_permissions, cast_bool
SUBTITLE_EXTS = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'txt', 'psb']
SUBTITLE_EXTS = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'txt', 'psb',
'vtt']
VIDEO_EXTS = ['3g2', '3gp', 'asf', 'asx', 'avc', 'avi', 'avs', 'bivx', 'bup', 'divx', 'dv', 'dvr-ms', 'evo', 'fli',
'flv',
'm2t', 'm2ts', 'm2v', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'mts', 'nsv', 'nuv', 'ogm', 'ogv', 'tp',
@@ -27,6 +32,8 @@ IGNORE_FN = ("subzero.ignore", ".subzero.ignore", ".nosz")
VERSION_RE = re.compile(ur'CFBundleVersion.+?<string>([0-9\.]+)</string>', re.DOTALL)
DEV_RE = re.compile(ur'PlexPluginDevMode.+?<string>([01]+)</string>', re.DOTALL)
impawrt = getattr(sys.modules['__main__'], "__builtins__").get("__import__")
def int_or_default(s, default):
try:
@@ -45,7 +52,9 @@ class Config(object):
data_path = None
data_items_path = None
universal_plex_token = None
plex_token = None
is_development = False
dbm_supported = False
enable_channel = True
enable_agent = True
@@ -56,6 +65,7 @@ class Config(object):
pin_valid_minutes = 10
lang_list = None
subtitle_destination_folder = None
subtitle_formats = None
providers = None
provider_settings = None
max_recent_items_per_library = 200
@@ -67,14 +77,23 @@ class Config(object):
notify_executable = None
sections = None
enabled_sections = None
enforce_encoding = False
remove_hi = False
fix_ocr = False
fix_common = False
colors = ""
chmod = None
forced_only = False
exotic_ext = False
treat_und_as_first = False
ext_match_strictness = False
use_activities = False
default_mods = None
debug_mods = False
react_to_activities = False
activity_mode = None
subtitles_save_to = None
no_refresh = False
store_recently_played_amount = 20
initialized = False
@@ -89,6 +108,9 @@ class Config(object):
self.data_path = getattr(Data, "_core").storage.data_path
self.data_items_path = os.path.join(self.data_path, "DataItems")
self.universal_plex_token = self.get_universal_plex_token()
self.plex_token = os.environ.get("PLEXTOKEN", self.universal_plex_token)
os.environ["SZ_USER_AGENT"] = self.get_user_agent()
self.set_plugin_mode()
self.set_plugin_lock()
@@ -96,6 +118,8 @@ class Config(object):
self.lang_list = self.get_lang_list()
self.subtitle_destination_folder = self.get_subtitle_destination_folder()
self.subtitle_formats = self.get_subtitle_formats()
self.forced_only = cast_bool(Prefs["subtitles.only_foreign"])
self.providers = self.get_providers()
self.provider_settings = self.get_provider_settings()
self.max_recent_items_per_library = int_or_default(Prefs["scheduler.max_recent_items_per_library"], 2000)
@@ -106,14 +130,59 @@ class Config(object):
self.enabled_sections = self.check_enabled_sections()
self.permissions_ok = self.check_permissions()
self.notify_executable = self.check_notify_executable()
self.enforce_encoding = cast_bool(Prefs['subtitles.enforce_encoding'])
self.remove_hi = cast_bool(Prefs['subtitles.remove_hi'])
self.fix_ocr = cast_bool(Prefs['subtitles.fix_ocr'])
self.fix_common = cast_bool(Prefs['subtitles.fix_common'])
self.colors = Prefs['subtitles.colors'] if Prefs['subtitles.colors'] != "don't change" else None
self.chmod = self.check_chmod()
self.forced_only = cast_bool(Prefs["subtitles.only_foreign"])
self.exotic_ext = cast_bool(Prefs["subtitles.scan.exotic_ext"])
self.treat_und_as_first = cast_bool(Prefs["subtitles.language.treat_und_as_first"])
self.ext_match_strictness = self.determine_ext_sub_strictness()
self.default_mods = self.get_default_mods()
self.debug_mods = cast_bool(Prefs['log_debug_mods'])
self.subtitles_save_to = Prefs['subtitles.save.filesystem']
self.no_refresh = os.environ.get("SZ_NO_REFRESH", False)
self.initialized = True
def init_cache(self):
names = ['dbhash', 'gdbm', 'dbm']
dbfn = None
self.dbm_supported = False
# try importing dbm modules
if impawrt:
for name in names:
try:
impawrt(name)
except:
continue
if not self.dbm_supported:
self.dbm_supported = name
break
if self.dbm_supported:
# anydbm checks; try guessing the format and importing the correct module
dbfn = os.path.join(config.data_items_path, 'subzero.dbm')
db_which = whichdb(dbfn)
if db_which is not None and db_which != "":
try:
impawrt(db_which)
except ImportError:
self.dbm_supported = False
if Core.runtime.os != "Windows" and self.dbm_supported:
try:
subliminal.region.configure('dogpile.cache.dbm', expiration_time=datetime.timedelta(days=30),
arguments={'filename': dbfn,
'lock_factory': MutexLock})
Log.Info("Using file based cache!")
return
except:
self.dbm_supported = False
Log.Warn("Not using file based cache!")
subliminal.region.configure('dogpile.cache.memory')
def set_log_paths(self):
# find log handler
for handler in Core.log.handlers:
@@ -138,7 +207,9 @@ class Config(object):
except:
Log.Warn("Couldn't determine Plex Token")
else:
Log("Did NOT find Preferences file - please check logfile and hierarchy. Aborting!")
Log("Did NOT find Preferences file - most likely Windows OS. Otherwise please check logfile and hierarchy.")
# fixme: windows
def set_plugin_mode(self):
if Prefs["plugin_mode"] == "only agent":
@@ -213,11 +284,17 @@ class Config(object):
return all_permissions_ok
def get_version(self):
return self.get_bare_version() + ("" if not self.is_development else " DEV")
def get_bare_version(self):
result = VERSION_RE.search(self.plugin_info)
add = "" if not self.is_development else " DEV"
if result:
return result.group(1) + add
return result.group(1)
return "2.x.x.x"
def get_user_agent(self):
return "Sub-Zero/%s" % (self.get_bare_version() + ("" if not self.is_development else "-dev"))
def get_dev_mode(self):
dev = DEV_RE.search(self.plugin_info)
@@ -270,7 +347,7 @@ class Config(object):
self.enabled_sections = self.check_enabled_sections()
def check_enabled_sections(self):
enabled_for_primary_agents = []
enabled_for_primary_agents = {"movie": [], "show": []}
enabled_sections = {}
# find which agents we're enabled for
@@ -283,11 +360,11 @@ class Config(object):
related_agents = Plex.primary_agent(agent.identifier, t.media_type)
for a in related_agents:
if a.identifier == PLUGIN_IDENTIFIER and a.enabled:
enabled_for_primary_agents.append(agent.identifier)
enabled_for_primary_agents[MEDIA_TYPE_TO_STRING[t.media_type]].append(agent.identifier)
# find the libraries that use them
for library in self.sections:
if library.agent in enabled_for_primary_agents:
if library.agent in enabled_for_primary_agents.get(library.type, []):
enabled_sections[library.key] = library
Log.Debug(u"I'm enabled for: %s" % [lib.title for key, lib in enabled_sections.iteritems()])
@@ -330,6 +407,15 @@ class Config(object):
return fld_custom or (
Prefs["subtitles.save.subFolder"] if Prefs["subtitles.save.subFolder"] != "current folder" else None)
def get_subtitle_formats(self):
formats = Prefs["subtitles.save.formats"]
out = []
if "SRT" in formats:
out.append("srt")
if "VTT" in formats:
out.append("vtt")
return out
def get_providers(self):
providers = {'opensubtitles': cast_bool(Prefs['provider.opensubtitles.enabled']),
# 'thesubdb': Prefs['provider.thesubdb.enabled'],
@@ -343,10 +429,13 @@ class Config(object):
}
# ditch non-forced-subtitles-reporting providers
if cast_bool(Prefs['subtitles.only_foreign']):
if self.forced_only:
providers["addic7ed"] = False
providers["tvsubtitles"] = False
providers["legendastv"] = False
providers["napiprojekt"] = False
providers["shooter"] = False
providers["subscenter"] = False
return filter(lambda prov: providers[prov], providers)
@@ -404,13 +493,26 @@ class Config(object):
return "loose"
return "strict"
def get_default_mods(self):
mods = []
if self.remove_hi:
mods.append("remove_HI")
if self.fix_ocr:
mods.append("OCR_fixes")
if self.fix_common:
mods.append("common")
if self.colors:
mods.append("color(name=%s)" % self.colors)
return mods
def set_activity_modes(self):
val = Prefs["activity.on_playback"]
if val == "never":
self.use_activities = False
self.react_to_activities = False
return
self.use_activities = True
self.react_to_activities = True
if val == "current media item":
self.activity_mode = "refresh"
elif val == "hybrid: current item or next episode":
+46
View File
@@ -0,0 +1,46 @@
# coding=utf-8
import subliminal_patch as subliminal
from support.config import config
from subtitlehelpers import get_subtitles_from_metadata
from subliminal_patch import compute_score
def download_best_subtitles(video_part_map, min_score=0):
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
languages = config.lang_list
if not languages:
return
missing_languages = False
for video, part in video_part_map.iteritems():
if not Prefs['subtitles.save.filesystem']:
# scan for existing metadata subtitles
meta_subs = get_subtitles_from_metadata(part)
for language, subList in meta_subs.iteritems():
if subList:
video.subtitle_languages.add(language)
Log.Debug("Found metadata subtitle %s for %s", language, video)
missing_subs = (languages - video.subtitle_languages)
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
if not missing_subs or found_one_which_is_enough:
if found_one_which_is_enough:
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
else:
Log.Debug('All languages %r exist for %s', languages, video)
continue
missing_languages = True
break
if missing_languages:
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s" % (min_score, hearing_impaired))
return subliminal.download_best_subtitles(video_part_map.keys(), languages, min_score, hearing_impaired, providers=config.providers,
provider_configs=config.provider_settings, pool_class=config.provider_pool,
compute_score=compute_score)
Log.Debug("All languages for all requested videos exist. Doing nothing.")
+58 -11
View File
@@ -9,15 +9,26 @@ import time
import re
import platform
import subprocess
from bs4 import UnicodeDammit
import sys
from collections import OrderedDict
import chardet
from bs4 import UnicodeDammit
from babelfish import Language
from subzero.analytics import track_event
mswindows = (sys.platform == "win32")
if mswindows:
from subprocess import list2cmdline
quote_args = list2cmdline
else:
# POSIX
from pipes import quote
def quote_args(seq):
return ' '.join(quote(arg) for arg in seq)
# Unicode control characters can appear in ID3v2 tags but are not legal in XML.
RE_UNICODE_CONTROL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \
u'|' + \
@@ -30,7 +41,7 @@ RE_UNICODE_CONTROL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])'
def cast_bool(value):
return str(value) in ("true", "True")
return str(value).strip() in ("true", "True")
# A platform independent way to split paths which might come in with different separators.
@@ -110,9 +121,9 @@ def str_pad(s, length, align='left', pad_char=' ', trim=False):
raise ValueError("Unknown align type, expected either 'left' or 'right'")
def pad_title(value):
def pad_title(value, width=49):
"""Pad a title to 30 characters to force the 'details' view."""
return str_pad(value, 49, pad_char=' ')
return str_pad(value, width, pad_char=' ')
def get_plex_item_display_title(item, kind, parent=None, parent_title=None, section_title=None,
@@ -236,13 +247,13 @@ def get_item_hints(data):
:param data: video item dict of media_to_videos
:return:
"""
hints = {"title": data["title"], "type": "movie"}
hints = {"title": data["original_title"] or data["title"], "type": "movie"}
if data["type"] == "episode":
hints.update(
{
"type": "episode",
"episode_title": data["title"],
"title": data["series"],
"title": data["original_title"] or data["series"],
}
)
return hints
@@ -256,7 +267,7 @@ def notify_executable(exe_info, videos, subtitles, storage):
exe, arguments = exe_info
for video, video_subtitles in subtitles.items():
for subtitle in video_subtitles:
lang = Locale.Language.Match(subtitle.language.alpha2)
lang = str(subtitle.language)
data = video.plexapi_metadata.copy()
data.update({
"subtitle_language": lang,
@@ -273,9 +284,21 @@ def notify_executable(exe_info, videos, subtitles, storage):
prepared_arguments = [arg % prepared_data for arg in arguments]
Log.Debug(u"Calling %s with arguments: %s" % (exe, prepared_arguments))
env = os.environ
if not mswindows:
env_path = {"PATH": os.pathsep.join(
[
"/usr/local/bin",
"/usr/bin",
os.environ.get("PATH", "")
]
)
}
env = dict(os.environ, **env_path)
try:
output = subprocess.check_output(subprocess.list2cmdline([exe] + prepared_arguments),
stderr=subprocess.STDOUT, shell=True)
output = subprocess.check_output(quote_args([exe] + prepared_arguments),
stderr=subprocess.STDOUT, shell=True, env=env)
except subprocess.CalledProcessError:
Log.Error(u"Calling %s failed: %s" % (exe, traceback.format_exc()))
else:
@@ -286,6 +309,26 @@ def track_usage(category=None, action=None, label=None, value=None):
if not cast_bool(Prefs["track_usage"]):
return
if "last_tracked" not in Dict:
Dict["last_tracked"] = OrderedDict()
Dict.Save()
event_key = (category, action, label, value)
now = datetime.datetime.now()
if event_key in Dict["last_tracked"] and (Dict["last_tracked"][event_key] + datetime.timedelta(minutes=30)) < now:
return
Dict["last_tracked"][event_key] = now
# maintenance
for key, value in Dict["last_tracked"].copy().iteritems():
# kill day old values
if value < now - datetime.timedelta(days=1):
try:
del Dict["last_tracked"][key]
except:
pass
Thread.Create(dispatch_track_usage, category, action, label, value,
identifier=Dict["anon_id"], first_use=Dict["first_use"],
add=Network.PublicAddress)
@@ -303,3 +346,7 @@ def dispatch_track_usage(*args, **kwargs):
def get_language(lang_short):
return Language.fromietf(lang_short)
class PartUnknownException(Exception):
pass
+116 -19
View File
@@ -2,12 +2,15 @@
import logging
import re
import traceback
import types
import os
from ignore import ignore_list
from helpers import is_recent, get_plex_item_display_title, query_plex
from helpers import is_recent, get_plex_item_display_title, query_plex, PartUnknownException
from lib import Plex, get_intent
from config import config, IGNORE_FN
from subliminal_patch.subtitle import ModifiedSubtitle
from subzero.modification import registry as mod_registry, SubtitleModifications
logger = logging.getLogger(__name__)
@@ -22,7 +25,7 @@ def get_item(key):
try:
return list(item_container)[0]
except IndexError:
except:
pass
@@ -40,11 +43,11 @@ PLEX_API_TYPE_MAP = {
def get_item_kind_from_rating_key(key):
item = get_item(key)
return PLEX_API_TYPE_MAP[get_item_kind(item)]
return PLEX_API_TYPE_MAP.get(get_item_kind(item))
def get_item_kind_from_item(item):
return PLEX_API_TYPE_MAP[get_item_kind(item)]
return PLEX_API_TYPE_MAP.get(get_item_kind(item))
def get_item_thumb(item):
@@ -164,14 +167,17 @@ def get_recent_items():
"X-Plex-Container-Size": "%s" % config.max_recent_items_per_library
}
episode_re = re.compile(ur'ratingKey="(?P<key>\d+)"'
episode_re = re.compile(ur'(?su)ratingKey="(?P<key>\d+)"'
ur'.+?grandparentRatingKey="(?P<parent_key>\d+)"'
ur'.+?title="(?P<title>.*?)"'
ur'.+?grandparentTitle="(?P<parent_title>.*?)"'
ur'.+?index="(?P<episode>\d+?)"'
ur'.+?parentIndex="(?P<season>\d+?)".+?addedAt="(?P<added>\d+)"')
movie_re = re.compile(ur'ratingKey="(?P<key>\d+)".+?title="(?P<title>.*?)".+?addedAt="(?P<added>\d+)"')
available_keys = ("key", "title", "parent_key", "parent_title", "season", "episode", "added")
ur'.+?parentIndex="(?P<season>\d+?)".+?addedAt="(?P<added>\d+)"'
ur'.+?<Part.+? file="(?P<filename>[^"]+?)"')
movie_re = re.compile(ur'(?su)ratingKey="(?P<key>\d+)".+?title="(?P<title>.*?)'
ur'".+?addedAt="(?P<added>\d+)"'
ur'.+?<Part.+? file="(?P<filename>[^"]+?)"')
available_keys = ("key", "title", "parent_key", "parent_title", "season", "episode", "added", "filename")
recent = []
for section in Plex["library"].sections():
@@ -182,8 +188,10 @@ def get_recent_items():
continue
use_args = args.copy()
plex_item_type = "Movie"
if section.type == "show":
use_args["type"] = "4"
plex_item_type = "Episode"
url = "http://127.0.0.1:32400/library/sections/%s/all" % int(section.key)
response = query_plex(url, use_args)
@@ -198,6 +206,10 @@ def get_recent_items():
if data["key"] in ignore_list.videos:
Log.Debug(u"Skipping item: %s" % data["title"])
continue
if is_physically_ignored(data["filename"], plex_item_type):
Log.Debug(u"Skipping item: %s" % data["title"])
continue
if is_recent(int(data["added"])):
recent.append((int(data["added"]), section.type, section.title, data["key"]))
@@ -242,6 +254,16 @@ def is_ignored(rating_key, item=None):
return True
# physical/path ignore
if config.ignore_sz_files or config.ignore_paths:
for media in item.media:
for part in media.parts:
if is_physically_ignored(part.file, kind):
return True
return False
def is_physically_ignored(fn, kind):
if config.ignore_sz_files or config.ignore_paths:
# normally check current item folder and the library
check_ignore_paths = [".", "../"]
@@ -249,18 +271,15 @@ def is_ignored(rating_key, item=None):
# series/episode, we've got a season folder here, also
check_ignore_paths.append("../../")
for part in item.media.parts:
if config.ignore_paths and config.is_path_ignored(part.file):
Log.Debug("Item %s's path is manually ignored" % rating_key)
return True
if config.ignore_paths and config.is_path_ignored(fn):
Log.Debug("Item %s's path is manually ignored" % fn)
return True
if config.ignore_sz_files:
for sub_path in check_ignore_paths:
if config.is_physically_ignored(os.path.abspath(os.path.join(os.path.dirname(part.file), sub_path))):
Log.Debug("An ignore file exists in either the items or its parent folders")
return True
return False
if config.ignore_sz_files:
for sub_path in check_ignore_paths:
if config.is_physically_ignored(os.path.normpath(os.path.join(os.path.dirname(fn), sub_path))):
Log.Debug("An ignore file exists in either the items or its parent folders")
return True
def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, parent_rating_key=None):
@@ -283,3 +302,81 @@ def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, paren
for key in refresh:
Log.Info("%s item %s", "Refreshing" if not force else "Forced-refreshing", key)
Plex["library/metadata"].refresh(key)
def get_current_sub(rating_key, part_id, language):
from support.storage import get_subtitle_storage
item = get_item(rating_key)
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load_or_new(item)
current_sub = stored_subs.get_any(part_id, language)
return current_sub, stored_subs, subtitle_storage
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
from support.plex_media import get_plex_metadata, scan_videos
from support.storage import save_subtitles
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
if mode == "add":
for mod in mods:
identifier, args = SubtitleModifications.parse_identifier(mod)
mod_class = SubtitleModifications.get_mod_class(identifier)
if identifier not in mod_registry.mods_available:
raise NotImplementedError("Mod unknown or not registered")
# clean exclusive mods
if mod_class.exclusive and current_sub.mods:
for current_mod in current_sub.mods[:]:
if current_mod.startswith(identifier):
current_sub.mods.remove(current_mod)
Log.Info("Removing superseded mod %s" % current_mod)
current_sub.add_mod(mod)
elif mode == "clear":
current_sub.add_mod(None)
elif mode == "remove":
for mod in mods:
current_sub.mods.remove(mod)
elif mode == "remove_last":
if current_sub.mods:
current_sub.mods.pop()
else:
raise NotImplementedError("Wrong mode given")
storage.save(stored_subs)
try:
metadata = get_plex_metadata(rating_key, part_id, item_type)
except PartUnknownException:
return
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True,
no_refining=True)
video, plex_part = scanned_parts.items()[0]
subtitle = ModifiedSubtitle(language, mods=current_sub.mods)
subtitle.content = current_sub.content
if current_sub.encoding:
# thanks plex
setattr(subtitle, "_guessed_encoding", current_sub.encoding)
if current_sub.encoding != "utf-8":
subtitle.set_encoding("utf-8")
current_sub.content = subtitle.content
current_sub.encoding = "utf-8"
storage.save(stored_subs)
subtitle.plex_media_fps = plex_part.fps
subtitle.page_link = "modify subtitles with: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
subtitle.language = language
subtitle.id = current_sub.id
try:
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
", ".join(current_sub.mods) if current_sub.mods else "none")
except:
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
+4 -4
View File
@@ -108,7 +108,8 @@ def find_subtitles(part):
if ext.lower()[1:] in config.SUBTITLE_EXTS:
# get fn without forced/default/normal tag
split_tag = root.rsplit(".", 1)
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default']:
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded',
'custom']:
root = split_tag[0]
# get associated media file name without language
@@ -160,9 +161,8 @@ def find_subtitles(part):
# determine whether to pick up the subtitle based on our match strictness
elif not filename_matches_part:
if sz_config.ext_match_strictness == "strict" or (
sz_config.ext_match_strictness == "loose" and not filename_contains_part):
#Log.Debug("%s doesn't match %s, skipping" % (helpers.unicodize(local_filename),
sz_config.ext_match_strictness == "loose" and not filename_contains_part):
# Log.Debug("%s doesn't match %s, skipping" % (helpers.unicodize(local_filename),
# helpers.unicodize(part_basename)))
continue
+31 -27
View File
@@ -1,5 +1,6 @@
# coding=utf-8
import traceback
import time
from support.config import config
from support.helpers import get_plex_item_display_title, cast_bool
@@ -8,8 +9,6 @@ from support.lib import Plex
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()):
existing_subs = {"internal": [], "external": [], "count": 0}
item_id = int(rating_key)
item = get_item(rating_key)
@@ -18,36 +17,41 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
else:
item_title = get_plex_item_display_title(item, kind, section_title=section_title)
video = item.media
missing = set()
languages_set = set(languages)
for media in item.media:
existing_subs = {"internal": [], "external": [], "count": 0}
for part in media.parts:
for stream in part.streams:
if stream.stream_type == 3:
if stream.index:
key = "internal"
else:
key = "external"
for part in video.parts:
for stream in part.streams:
if stream.stream_type == 3:
if stream.index:
key = "internal"
else:
key = "external"
existing_subs[key].append(Locale.Language.Match(stream.language_code or ""))
existing_subs["count"] = existing_subs["count"] + 1
existing_subs[key].append(Locale.Language.Match(stream.language_code or ""))
existing_subs["count"] = existing_subs["count"] + 1
missing_from_part = set(languages_set)
if existing_subs["count"]:
existing_flat = set((existing_subs["internal"] if internal else []) + (existing_subs["external"] if external else []))
if languages_set.issubset(existing_flat) or (len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
# all subs found
#Log.Info(u"All subtitles exist for '%s'", item_title)
continue
missing = languages
if existing_subs["count"]:
existing_flat = (existing_subs["internal"] if internal else []) + (existing_subs["external"] if external else [])
languages_set = set(languages)
if languages_set.issubset(existing_flat) or (len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
# all subs found
Log.Info(u"All subtitles exist for '%s'", item_title)
return
missing_from_part = languages_set - existing_flat
missing = languages_set - set(existing_flat)
Log.Info(u"Subs still missing for '%s': %s", item_title, missing)
if missing_from_part:
Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id,
missing_from_part)
missing.update(missing_from_part)
if missing:
return added_at, item_id, item_title, item, missing
def items_get_all_missing_subs(items):
def items_get_all_missing_subs(items, sleep_after_request=False):
missing = []
for added_at, kind, section_title, key in items:
try:
@@ -65,13 +69,13 @@ def items_get_all_missing_subs(items):
missing.append(state)
except:
Log.Error("Something went wrong when getting the state of item %s: %s", key, traceback.format_exc())
if sleep_after_request:
time.sleep(sleep_after_request)
return missing
def refresh_item(item):
Plex["library/metadata"].refresh(item)
if not config.no_refresh:
Plex["library/metadata"].refresh(item)
def refresh_items(items):
for item, title in items:
refresh_item(item)
+179 -49
View File
@@ -1,15 +1,14 @@
# coding=utf-8
import os
from urllib2 import URLError
import helpers
from config import config
from items import get_item
from lib import get_intent, Plex
from config import config
from subzero.video import parse_video
def get_metadata_dict(item, part, add):
data = {
"item": item,
@@ -22,6 +21,54 @@ def get_metadata_dict(item, part, add):
return data
imdb_guid_identifier = "com.plexapp.agents.imdb://"
tvdb_guid_identifier = "com.plexapp.agents.thetvdb://"
def get_plexapi_stream_info(plex_item, part_id=None):
d = {"stream": {}}
data = d["stream"]
# find current part
current_part = None
current_media = None
for media in plex_item.media:
for part in media.parts:
if not part_id or str(part.id) == part_id:
current_part = part
current_media = media
break
if current_part:
break
if not current_part:
return d
data["video_codec"] = current_media.video_codec
data["audio_codec"] = current_media.audio_codec.upper()
if data["audio_codec"] == "DCA":
data["audio_codec"] = "DTS"
if current_media.audio_channels == 8:
data["audio_channels"] = "7.1"
elif current_media.audio_channels == 6:
data["audio_channels"] = "5.1"
else:
data["audio_channels"] = "%s.0" % str(current_media.audio_channels)
# iter streams
for stream in current_part.streams:
if stream.stream_type == 1:
# video stream
data["resolution"] = "%s%s" % (current_media.video_resolution,
"i" if stream.scan_type != "progressive" else "p")
break
return d
def media_to_videos(media, kind="series"):
"""
iterates through media and returns the associated parts (videos)
@@ -31,36 +78,61 @@ def media_to_videos(media, kind="series"):
"""
videos = []
# this is a Show or a Movie object
plex_item = get_item(media.id)
year = plex_item.year
original_title = plex_item.title_original
if kind == "series":
for season in media.seasons:
season_object = media.seasons[season]
for episode in media.seasons[season].episodes:
ep = media.seasons[season].episodes[episode]
tvdb_id = None
series_tvdb_id = None
if tvdb_guid_identifier in ep.guid:
tvdb_id = ep.guid[len(tvdb_guid_identifier):].split("?")[0]
series_tvdb_id = tvdb_id.split("/")[0]
# get plex item via API for additional metadata
plex_episode = get_item(ep.id)
stream_info = get_plexapi_stream_info(plex_episode)
for item in media.seasons[season].episodes[episode].items:
for part in item.parts:
videos.append(
get_metadata_dict(plex_episode, part,
{"plex_part": part, "type": "episode", "title": ep.title,
"series": media.title, "id": ep.id,
"series_id": media.id, "season_id": season_object.id,
"episode": plex_episode.index, "season": plex_episode.season.index,
"section": plex_episode.section.title
})
dict(stream_info, **{"plex_part": part, "type": "episode",
"title": ep.title,
"series": media.title, "id": ep.id, "year": year,
"series_id": media.id,
"season_id": season_object.id,
"imdb_id": None, "series_tvdb_id": series_tvdb_id,
"tvdb_id": tvdb_id,
"original_title": original_title,
"episode": plex_episode.index,
"season": plex_episode.season.index,
"section": plex_episode.section.title
})
)
)
else:
plex_item = get_item(media.id)
stream_info = get_plexapi_stream_info(plex_item)
imdb_id = None
if imdb_guid_identifier in media.guid:
imdb_id = media.guid[len(imdb_guid_identifier):].split("?")[0]
for item in media.items:
for part in item.parts:
videos.append(
get_metadata_dict(plex_item, part, {"plex_part": part, "type": "movie",
"title": media.title, "id": media.id,
"series_id": None,
"season_id": None,
"section": plex_item.section.title})
get_metadata_dict(plex_item, part, dict(stream_info, **{"plex_part": part, "type": "movie",
"title": media.title, "id": media.id,
"series_id": None, "year": year,
"season_id": None, "imdb_id": imdb_id,
"original_title": original_title,
"series_tvdb_id": None, "tvdb_id": None,
"section": plex_item.section.title})
)
)
return videos
@@ -92,10 +164,10 @@ def get_media_item_ids(media, kind="series"):
return ids
def scan_video(plex_part, ignore_all=False, hints=None, rating_key=None):
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, no_refining=False):
"""
returnes a subliminal/guessit-refined parsed video
:param plex_part:
:param pms_video_info:
:param ignore_all:
:param hints:
:param rating_key:
@@ -104,14 +176,19 @@ def scan_video(plex_part, ignore_all=False, hints=None, rating_key=None):
embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
external_subtitles = not ignore_all and Prefs['subtitles.scan.external']
plex_part = pms_video_info["plex_part"]
if ignore_all:
Log.Debug("Force refresh intended.")
Log.Debug("Scanning video: %s, subtitles=%s, embedded_subtitles=%s" % (
Log.Debug("Scanning video: %s, external_subtitles=%s, embedded_subtitles=%s" % (
plex_part.file, external_subtitles, embedded_subtitles))
known_embedded = []
parts = list(Plex["library"].metadata(rating_key))[0].media.parts
parts = []
for media in list(Plex["library"].metadata(rating_key))[0].media:
parts += media.parts
plexpy_part = None
for part in parts:
if int(part.id) == int(plex_part.id):
@@ -139,17 +216,19 @@ def scan_video(plex_part, ignore_all=False, hints=None, rating_key=None):
try:
# get basic video info scan (filename)
video = parse_video(plex_part.file, hints, external_subtitles=external_subtitles,
video = parse_video(plex_part.file, pms_video_info, hints, external_subtitles=external_subtitles,
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
forced_only=config.forced_only, video_fps=plex_part.fps)
forced_only=config.forced_only, no_refining=no_refining)
# add video fps info
video.fps = plex_part.fps
return video
except ValueError:
Log.Warn("File could not be guessed by subliminal: %s" % plex_part.file)
def scan_videos(videos, kind="series", ignore_all=False):
def scan_videos(videos, kind="series", ignore_all=False, no_refining=False):
"""
receives a list of videos containing dictionaries returned by media_to_videos
:param videos:
@@ -165,8 +244,8 @@ def scan_videos(videos, kind="series", ignore_all=False):
hints = helpers.get_item_hints(video)
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
scanned_video = scan_video(video["plex_part"], ignore_all=force_refresh or ignore_all, hints=hints,
rating_key=video["id"])
scanned_video = scan_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
rating_key=video["id"], no_refining=no_refining)
if not scanned_video:
continue
@@ -179,49 +258,79 @@ def scan_videos(videos, kind="series", ignore_all=False):
return ret
class PartUnknownException(Exception):
pass
def get_plex_metadata(rating_key, part_id, item_type):
def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
"""
uses the Plex 3rd party API accessor to get metadata information
:param rating_key:
:param rating_key: movie or episode
:param part_id:
:param item_type:
:return:
"""
plex_item = list(Plex["library"].metadata(rating_key))[0]
if not plex_item:
plex_item = get_item(rating_key)
if not plex_item:
return
# find current part
current_part = None
for part in plex_item.media.parts:
if str(part.id) == part_id:
current_part = part
for media in plex_item.media:
for part in media.parts:
if str(part.id) == str(part_id):
current_part = part
if not current_part:
raise PartUnknownException("Part unknown")
raise helpers.PartUnknownException("Part unknown")
stream_info = get_plexapi_stream_info(plex_item, part_id)
# get normalized metadata
# fixme: duplicated logic of media_to_videos
if item_type == "episode":
show = list(Plex["library"].metadata(plex_item.show.rating_key))[0]
year = show.year
tvdb_id = None
series_tvdb_id = None
original_title = show.title_original
if tvdb_guid_identifier in plex_item.guid:
tvdb_id = plex_item.guid[len(tvdb_guid_identifier):].split("?")[0]
series_tvdb_id = tvdb_id.split("/")[0]
metadata = get_metadata_dict(plex_item, current_part,
{"plex_part": current_part, "type": "episode", "title": plex_item.title,
"series": plex_item.show.title, "id": plex_item.rating_key,
"series_id": plex_item.show.rating_key,
"season_id": plex_item.season.rating_key,
"season": plex_item.season.index,
"episode": plex_item.index
})
dict(stream_info,
**{"plex_part": current_part, "type": "episode", "title": plex_item.title,
"series": plex_item.show.title, "id": plex_item.rating_key,
"series_id": plex_item.show.rating_key,
"season_id": plex_item.season.rating_key,
"imdb_id": None,
"year": year,
"tvdb_id": tvdb_id,
"series_tvdb_id": series_tvdb_id,
"original_title": original_title,
"season": plex_item.season.index,
"episode": plex_item.index
})
)
else:
metadata = get_metadata_dict(plex_item, current_part, {"plex_part": current_part, "type": "movie",
"title": plex_item.title, "id": plex_item.rating_key,
"series_id": None,
"season_id": None,
"season": None,
"episode": None,
"section": plex_item.section.title})
imdb_id = None
original_title = plex_item.title_original
if imdb_guid_identifier in plex_item.guid:
imdb_id = plex_item.guid[len(imdb_guid_identifier):].split("?")[0]
metadata = get_metadata_dict(plex_item, current_part,
dict(stream_info, **{"plex_part": current_part, "type": "movie",
"title": plex_item.title, "id": plex_item.rating_key,
"series_id": None,
"season_id": None,
"imdb_id": imdb_id,
"year": plex_item.year,
"tvdb_id": None,
"series_tvdb_id": None,
"original_title": original_title,
"season": None,
"episode": None,
"section": plex_item.section.title})
)
return metadata
@@ -257,3 +366,24 @@ class PMSMediaProxy(object):
break
m = m.children[0]
def get_all_parts(self):
"""
walk the mediatree until the given part was found; if no part was given, return the first one
:param part_id:
:return:
"""
m = self.mediatree
parts = []
while 1:
if m.items:
media_item = m.items[0]
for part in media_item.parts:
parts.append(part)
break
if not m.children:
break
m = m.children[0]
return parts
+8 -3
View File
@@ -72,7 +72,7 @@ class DefaultScheduler(object):
try:
task_frequency = Prefs["scheduler.tasks.%s.frequency" % task.name]
except KeyError:
task_frequency = None
task_frequency = getattr(task, "frequency", None)
self.tasks[task.name] = {"task": task, "frequency": parse_frequency(task_frequency)}
@@ -168,6 +168,7 @@ class DefaultScheduler(object):
for args, kwargs in queue:
Log.Debug("Dispatching single task: %s, %s", args, kwargs)
Thread.Create(self.run_task, True, *args, **kwargs)
Thread.Sleep(5.0)
# scheduled tasks
for name, info in self.tasks.iteritems():
@@ -185,9 +186,13 @@ class DefaultScheduler(object):
continue
if not task.last_run or (task.last_run + datetime.timedelta(**{frequency_key: frequency_num}) <= now):
self.run_task(name)
# fixme: scheduled tasks run synchronously. is this the best idea?
Thread.Create(self.run_task, True, name)
#Thread.Sleep(5.0)
#self.run_task(name)
Thread.Sleep(5.0)
Thread.Sleep(5.0)
Thread.Sleep(1)
scheduler = DefaultScheduler()
+43 -60
View File
@@ -5,61 +5,24 @@ import os
import pprint
import copy
import subliminal
from items import get_item
from subliminal_patch.core import save_subtitles as subliminal_save_subtitles
from subzero.subtitle_storage import StoredSubtitlesManager
from subtitlehelpers import force_utf8
from config import config
from helpers import notify_executable, get_title_for_video_metadata, cast_bool, force_unicode
from plex_media import PMSMediaProxy
from support.items import get_item
get_subtitle_storage = lambda: StoredSubtitlesManager(Data, get_item)
def whack_missing_parts(scanned_video_part_map, existing_parts=None):
"""
cleans out our internal storage's video parts (parts may get updated/deleted/whatever)
:param existing_parts: optional list of part ids known
:param scanned_video_part_map: videos to check for
:return:
"""
# shortcut
if "subs" not in Dict:
return
if not existing_parts:
existing_parts = []
for part in scanned_video_part_map.viewvalues():
existing_parts.append(str(part.id))
whacked_parts = False
for video in scanned_video_part_map.keys():
video_id = str(video.id)
if video_id not in Dict["subs"]:
continue
parts = Dict["subs"][video_id].keys()
for part_id in parts:
part_id = str(part_id)
if part_id not in existing_parts:
Log.Info("Whacking part %s in internal storage of video %s (%s, %s)", part_id, video_id,
repr(existing_parts), repr(parts))
del Dict["subs"][video_id][part_id]
whacked_parts = True
if whacked_parts:
Dict.Save()
def get_subtitle_storage():
return StoredSubtitlesManager(Data, get_item)
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a"):
"""
stores information about downloaded subtitles in plex's Dict()
"""
existing_parts = []
for video, video_subtitles in downloaded_subtitles.items():
part = scanned_video_part_map[video]
part_id = str(part.id)
@@ -71,27 +34,21 @@ def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_ty
subtitle_storage = get_subtitle_storage()
stored_subs = subtitle_storage.load_or_new(plex_item)
existing_parts.append(part_id)
stored_any = False
for subtitle in video_subtitles:
lang = Locale.Language.Match(subtitle.language.alpha2)
Log.Debug(u"Adding subtitle to storage: %s, %s, %s" % (video_id, part_id, title))
lang = str(subtitle.language)
subtitle.set_encoding("utf-8")
Log.Debug(u"Adding subtitle to storage: %s, %s, %s, %s" % (video_id, part_id, title,
subtitle.guess_encoding()))
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode)
if ret_val:
Log.Debug("Subtitle stored")
stored_any = True
else:
Log.Debug("Subtitle already existing in storage")
if stored_any:
Log.Debug("Saving subtitle storage for %s" % video_id)
subtitle_storage.save(stored_subs)
#if existing_parts:
# whack_missing_parts(scanned_video_part_map, existing_parts=existing_parts)
Log.Debug("Saving subtitle storage for %s" % video_id)
subtitle_storage.save(stored_subs)
def reset_storage(key):
@@ -107,6 +64,8 @@ def reset_storage(key):
def log_storage(key):
if not key:
Log.Debug(pprint.pformat(getattr(Dict, "_dict")))
if key in Dict:
Log.Debug(pprint.pformat(Dict[key]))
@@ -134,9 +93,9 @@ def save_subtitles_to_file(subtitles):
fld = force_unicode(fld)
if not os.path.exists(fld):
os.makedirs(fld)
subliminal.save_subtitles(video, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
encode_with=force_utf8 if config.enforce_encoding else None,
chmod=config.chmod, forced_tag=config.forced_only, path_decoder=force_unicode)
subliminal_save_subtitles(video, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
chmod=config.chmod, forced_tag=config.forced_only, path_decoder=force_unicode,
debug_mods=config.debug_mods, formats=config.subtitle_formats)
return True
@@ -144,7 +103,7 @@ def save_subtitles_to_metadata(videos, subtitles):
for video, video_subtitles in subtitles.items():
mediaPart = videos[video]
for subtitle in video_subtitles:
content = force_utf8(subtitle.text) if config.enforce_encoding else subtitle.content
content = subtitle.get_modified_content(debug=config.debug_mods)
if not isinstance(mediaPart, Framework.api.agentkit.MediaPart):
# we're being handed a Plex.py model instance here, not an internal PMS MediaPart object.
@@ -156,9 +115,29 @@ def save_subtitles_to_metadata(videos, subtitles):
return True
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a"):
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None):
"""
:param scanned_video_part_map:
:param downloaded_subtitles:
:param mode:
:param bare_save: don't trigger anything; don't store information
:param mods: enabled mods
:return:
"""
meta_fallback = False
save_successful = False
if mods:
for video, video_subtitles in downloaded_subtitles.items():
if not video_subtitles:
continue
for subtitle in video_subtitles:
Log.Info("Applying mods: %s to %s", mods, subtitle)
subtitle.mods = mods
subtitle.plex_media_fps = video.fps
storage = "metadata"
if Prefs['subtitles.save.filesystem']:
storage = "filesystem"
@@ -180,7 +159,11 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a"):
Log.Debug("Using metadata as subtitle storage")
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
if save_successful and config.notify_executable:
if not bare_save and save_successful and config.notify_executable:
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode)
if not bare_save and save_successful:
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode)
return save_successful
+9 -7
View File
@@ -86,7 +86,7 @@ class VobSubSubtitleHelper(SubtitleHelper):
IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2})?$")
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
def match_ietf_language(s):
@@ -129,13 +129,12 @@ class DefaultSubtitleHelper(SubtitleHelper):
default = '1'
# Attempt to extract the language from the filename (e.g. Avatar (2009).eng)
language = ""
# IETF support thanks to https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
# IETF support thanks to
# https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
language = Locale.Language.Match(match_ietf_language(file))
# skip non-SRT if wanted
if not helpers.cast_bool(Prefs["subtitles.scan.exotic_ext"]) and ext not in ["srt", "ass", "ssa"]:
if not helpers.cast_bool(Prefs["subtitles.scan.exotic_ext"]) and ext not in ["srt", "ass", "ssa", "vtt"]:
return lang_sub_map
codec = None
@@ -158,7 +157,7 @@ class DefaultSubtitleHelper(SubtitleHelper):
Log("An error occurred while attempting to parse the subtitle file, skipping... : " + self.filename)
return lang_sub_map
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb']:
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb', 'vtt']:
codec = ext.replace('ass', 'ssa')
if format is None:
@@ -194,7 +193,10 @@ def get_subtitles_from_metadata(part):
def force_utf8(content):
a = UnicodeDammit(content)
Log.Debug("detected encoding: %s (None: most likely already successfully decoded)" % a.original_encoding)
if a.original_encoding:
Log.Debug("detected encoding: %s (None: most likely already successfully decoded)" % a.original_encoding)
else:
Log.Debug("detected encoding: unicode (already decoded)")
# easy way out - already utf-8
if a.original_encoding and a.original_encoding == "utf-8":
+295 -119
View File
@@ -11,13 +11,13 @@ from subliminal import list_subtitles as list_all_subtitles
from babelfish import Language
from missing_subtitles import items_get_all_missing_subs, refresh_item
from background import scheduler
from storage import save_subtitles, whack_missing_parts, get_subtitle_storage
from scheduler import scheduler
from storage import save_subtitles, get_subtitle_storage
from support.config import config
from support.items import get_recent_items, is_ignored, get_item
from support.lib import Plex
from support.helpers import track_usage, get_title_for_video_metadata, cast_bool
from support.plex_media import scan_videos, get_plex_metadata, PartUnknownException
from support.items import get_recent_items, get_item, is_ignored
from support.helpers import track_usage, get_title_for_video_metadata, cast_bool, PartUnknownException
from support.plex_media import scan_videos, get_plex_metadata
from download import download_best_subtitles
class Task(object):
@@ -80,124 +80,52 @@ class Task(object):
return
def run(self):
Log.Info(u"Task: running: %s", self.name)
self.time_start = datetime.datetime.now()
def post_run(self, data_holder):
self.running = False
self.last_run = datetime.datetime.now()
if self.time_start:
if self.time_start and self.last_run:
self.last_run_time = self.last_run - self.time_start
self.time_start = None
class SearchAllRecentlyAddedMissing(Task):
periodic = True
items_done = None
items_searching = None
items_searching_ids = None
items_failed = None
percentage = 0
stall_time = 30
def __init__(self, scheduler):
super(SearchAllRecentlyAddedMissing, self).__init__(scheduler)
self.items_done = None
self.items_searching = None
self.items_searching_ids = None
self.items_failed = None
self.percentage = 0
def signal(self, signal_name, *args, **kwargs):
handler = getattr(self, "signal_%s" % signal_name)
return handler(*args, **kwargs) if handler else None
def signal_updated_metadata(self, *args, **kwargs):
item_id = int(args[0])
if self.items_searching_ids is not None and item_id in self.items_searching_ids:
self.items_done.append(item_id)
return True
def prepare(self, *args, **kwargs):
self.items_done = []
recent_items = get_recent_items()
missing = items_get_all_missing_subs(recent_items)
ids = set([id for added_at, id, title, item, missing_languages in missing if not is_ignored(id, item=item)])
self.items_searching = missing
self.items_searching_ids = ids
self.items_failed = []
self.percentage = 0
self.ready_for_display = True
def run(self):
super(SearchAllRecentlyAddedMissing, self).run()
self.running = True
missing_count = len(self.items_searching)
items_done_count = 0
for added_at, item_id, title, item, missing_languages in self.items_searching:
Log.Debug(u"Task: %s, triggering refresh for %s (%s)", self.name, title, item_id)
refresh_item(item_id)
search_started = datetime.datetime.now()
tries = 1
while 1:
if item_id in self.items_done:
items_done_count += 1
Log.Debug(u"Task: %s, item %s done", self.name, item_id)
self.percentage = int(items_done_count * 100 / missing_count)
break
# item considered stalled after self.stall_time seconds passed after last refresh
if (datetime.datetime.now() - search_started).total_seconds() > self.stall_time:
if tries > 3:
self.items_failed.append(item_id)
Log.Debug(u"Task: %s, item stalled for %s times: %s, skipping", self.name, tries, item_id)
break
Log.Debug(u"Task: %s, item stalled for %s seconds: %s, retrying", self.name, self.stall_time,
item_id)
tries += 1
refresh_item(item_id)
search_started = datetime.datetime.now()
time.sleep(1)
time.sleep(0.1)
# we can't hammer the PMS, otherwise requests will be stalled
time.sleep(1)
Log.Debug("Task: %s, done. Failed items: %s", self.name, self.items_failed)
self.running = False
def post_run(self, task_data):
super(SearchAllRecentlyAddedMissing, self).post_run(task_data)
self.ready_for_display = False
self.percentage = 0
self.items_done = None
self.items_failed = None
self.items_searching = None
self.items_searching_ids = None
Log.Info(u"Task: ran: %s", self.name)
class SubtitleListingMixin(object):
def list_subtitles(self, rating_key, item_type, part_id, language):
metadata = get_plex_metadata(rating_key, part_id, item_type)
def list_subtitles(self, rating_key, item_type, part_id, language, skip_wrong_fps=True, metadata=None,
scanned_parts=None):
if item_type == "episode":
min_score = 240
else:
min_score = 60
if not metadata:
metadata = get_plex_metadata(rating_key, part_id, item_type)
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
if not scanned_parts:
Log.Error("Couldn't list available subtitles for %s", rating_key)
if not metadata:
return
if not scanned_parts:
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
if not scanned_parts:
Log.Error("Couldn't list available subtitles for %s", rating_key)
return
video, plex_part = scanned_parts.items()[0]
config.init_subliminal_patches()
provider_settings = config.provider_settings.copy()
if not skip_wrong_fps:
provider_settings = config.provider_settings.copy()
provider_settings["opensubtitles"]["skip_wrong_fps"] = False
if item_type == "episode":
min_score = 240
if video.is_special:
min_score = 180
else:
min_score = 60
available_subs = list_all_subtitles(scanned_parts, {Language.fromietf(language)},
providers=config.providers,
provider_configs=config.provider_settings,
provider_configs=provider_settings,
pool_class=config.provider_pool)
use_hearing_impaired = Prefs['subtitles.search.hearingImpaired'] in ("prefer", "force HI")
@@ -247,8 +175,7 @@ class DownloadSubtitleMixin(object):
if subtitle.content:
try:
whack_missing_parts(scanned_parts)
save_subtitles(scanned_parts, {video: [subtitle]}, mode=mode)
save_subtitles(scanned_parts, {video: [subtitle]}, mode=mode, mods=config.default_mods)
Log.Debug("Manually downloaded subtitle for: %s", rating_key)
download_successful = True
refresh_item(rating_key)
@@ -266,6 +193,8 @@ class DownloadSubtitleMixin(object):
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
subtitle=subtitle,
mode=mode)
else:
set_refresh_menu_state("Subtitle download failed (%s)" % rating_key)
return download_successful
@@ -291,7 +220,13 @@ class AvailableSubsForItem(SubtitleListingMixin, Task):
super(AvailableSubsForItem, self).run()
self.running = True
track_usage("Subtitle", "manual", "list", 1)
self.data = self.list_subtitles(self.rating_key, self.item_type, self.part_id, self.language)
subs = self.list_subtitles(self.rating_key, self.item_type, self.part_id, self.language, skip_wrong_fps=False)
if not subs:
self.data = "found_none"
return
# we can't have nasty unpicklable stuff like ZipFile, BytesIO etc in self.data
self.data = [s.make_picklable() for s in subs]
def post_run(self, task_data):
super(AvailableSubsForItem, self).post_run(task_data)
@@ -335,11 +270,174 @@ class MissingSubtitles(Task):
task_data["missing_subtitles"] = self.data
class SearchAllRecentlyAddedMissing(Task):
periodic = True
items_done = None
items_searching = None
percentage = 0
def __init__(self, scheduler):
super(SearchAllRecentlyAddedMissing, self).__init__(scheduler)
self.items_done = None
self.items_searching = None
self.percentage = 0
def signal_updated_metadata(self, *args, **kwargs):
return True
def prepare(self):
self.items_done = 0
self.items_searching = 0
self.percentage = 0
self.ready_for_display = True
def run(self):
super(SearchAllRecentlyAddedMissing, self).run()
self.running = True
self.prepare()
from support.history import get_history
history = get_history()
now = datetime.datetime.now()
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
is_recent_str = Prefs["scheduler.item_is_recent_age"]
num, ident = is_recent_str.split()
max_search_days = 0
if ident == "days":
max_search_days = int(num)
elif ident == "weeks":
max_search_days = int(num) * 7
subtitle_storage = get_subtitle_storage()
recent_sub_fns = subtitle_storage.get_recent_files(age_days=max_search_days)
viable_items = {}
# determine viable items
for fn in recent_sub_fns:
# added_date <= max_search_days?
stored_subs = subtitle_storage.load(filename=fn)
if not stored_subs:
continue
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
continue
viable_items[fn] = stored_subs
self.items_searching = len(viable_items)
download_count = 0
videos_with_downloads = 0
config.init_subliminal_patches()
Log.Info("%s: Searching for subtitles for %s items", self.name, self.items_searching)
# search for subtitles in viable items
for fn, stored_subs in viable_items.iteritems():
video_id = stored_subs.video_id
if stored_subs.item_type == "episode":
min_score = min_score_series
else:
min_score = min_score_movies
parts = []
plex_item = get_item(video_id)
if is_ignored(video_id, item=plex_item):
continue
for media in plex_item.media:
parts += media.parts
downloads_per_video = 0
for part in parts:
part_id = part.id
try:
metadata = get_plex_metadata(video_id, part_id, stored_subs.item_type)
except PartUnknownException:
Log.Info("%s: Part %s:%s unknown", self.name, video_id, part_id)
continue
if not metadata:
Log.Info("%s: Part %s:%s unknown", self.name, video_id, part_id)
continue
Log.Debug("%s: Looking for missing subtitles: %s:%s", self.name, video_id, part_id)
scanned_parts = scan_videos([metadata], kind="series"
if stored_subs.item_type == "episode" else "movie")
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score)
download_successful = False
if downloaded_subtitles:
downloaded_any = any(downloaded_subtitles.values())
if not downloaded_any:
continue
try:
save_subtitles(scanned_parts, downloaded_subtitles, mode="a", mods=config.default_mods)
Log.Debug("%s: Downloaded subtitle for item with missing subs: %s", self.name, video_id)
download_successful = True
refresh_item(video_id)
track_usage("Subtitle", "manual", "download", 1)
except:
Log.Error("%s: Something went wrong when downloading specific subtitle: %s", self.name,
traceback.format_exc())
finally:
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
if download_successful:
# store item in history
for video, video_subtitles in downloaded_subtitles.items():
if not video_subtitles:
continue
for subtitle in video_subtitles:
downloads_per_video += 1
history.add(item_title, video.id, section_title=metadata["section"],
subtitle=subtitle,
mode="a")
download_count += downloads_per_video
if downloads_per_video:
videos_with_downloads += 1
self.items_done = self.items_done + 1
self.percentage = int(self.items_done * 100 / self.items_searching)
if downloads_per_video:
time.sleep(5)
else:
time.sleep(1)
if download_count:
Log.Debug("Task: %s, done. Missing subtitles found for %s/%s items (%s subs downloaded)", self.name,
videos_with_downloads, self.items_searching, download_count)
else:
Log.Debug("Task: %s, done. No subtitles found for %s items", self.name, self.items_searching)
def post_run(self, task_data):
super(SearchAllRecentlyAddedMissing, self).post_run(task_data)
self.ready_for_display = False
self.percentage = 0
self.items_done = None
self.items_searching = None
class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
periodic = True
# TV: episode, format, series, year, season, video_codec, release_group, hearing_impaired
series_cutoff = 355
# TV: episode, format, series, year, season, video_codec, release_group, hearing_impaired, resolution
series_cutoff = 357
# movies: format, title, release_group, year, video_codec, resolution, hearing_impaired
movies_cutoff = 117
@@ -362,13 +460,26 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
return
now = datetime.datetime.now()
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
overwrite_manually_modified = cast_bool(
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified"])
overwrite_manually_selected = cast_bool(
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected"])
subtitle_storage = get_subtitle_storage()
recent_subs = subtitle_storage.load_recent_files(age_days=max_search_days)
viable_item_count = 0
for fn, stored_subs in recent_subs.iteritems():
video_id = stored_subs.video_id
cutoff = self.series_cutoff if stored_subs.item_type == "episode" else self.movies_cutoff
if stored_subs.item_type == "episode":
cutoff = self.series_cutoff
min_score = min_score_series
else:
cutoff = self.movies_cutoff
min_score = min_score_movies
# don't search for better subtitles until at least 30 minutes have passed
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
@@ -379,6 +490,7 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
continue
viable_item_count += 1
ditch_parts = []
# look through all stored subtitle data
@@ -398,14 +510,20 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
# late cutoff met? skip
if current_score >= cutoff:
Log.Debug(u"Skipping finding better subs, cutoff met (current: %s, cutoff: %s): %s",
current_score, cutoff, stored_subs.title)
Log.Debug(u"Skipping finding better subs, cutoff met (current: %s, cutoff: %s): %s (%s)",
current_score, cutoff, stored_subs.title, video_id)
continue
# got manual subtitle but don't want to touch those?
if current_mode == "m" and \
not cast_bool(Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected"]):
Log.Debug(u"Skipping finding better subs, had manual: %s", stored_subs.title)
if current_mode == "m" and not overwrite_manually_selected:
Log.Debug(u"Skipping finding better subs, had manual: %s (%s)", stored_subs.title, video_id)
continue
# subtitle modifications different from default
if not overwrite_manually_modified and current.mods \
and set(current.mods).difference(set(config.default_mods)):
Log.Debug(u"Skipping finding better subs, it has manual modifications: %s (%s)",
stored_subs.title, video_id)
continue
try:
@@ -420,7 +538,7 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
better_downloaded = False
better_tried_download = 0
for sub in subs:
if sub.score > current_score:
if sub.score > current_score and sub.score > min_score:
Log.Debug("Better subtitle found for %s, downloading", video_id)
better_tried_download += 1
ret = self.download_subtitle(sub, video_id, mode="b")
@@ -444,9 +562,64 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
pass
subtitle_storage.save(stored_subs)
time.sleep(1)
if better_found:
Log.Debug("Task: %s, done. Better subtitles found for %s items", self.name, better_found)
self.running = False
Log.Debug("Task: %s, done. Better subtitles found for %s/%s items", self.name, better_found,
viable_item_count)
else:
Log.Debug("Task: %s, done. No better subtitles found for %s items", self.name, viable_item_count)
class SubtitleStorageMaintenance(Task):
periodic = True
frequency = "every 7 days"
def run(self):
super(SubtitleStorageMaintenance, self).run()
self.running = True
Log.Info("Running subtitle storage maintenance")
storage = get_subtitle_storage()
deleted_items = storage.delete_missing(wanted_languages=set(str(l) for l in config.lang_list))
if deleted_items:
Log.Info("Subtitle information for %d non-existant videos have been cleaned up" % len(deleted_items))
Log.Debug("Videos: %s" % deleted_items)
else:
Log.Info("Nothing to do")
class MenuHistoryMaintenance(Task):
periodic = True
frequency = "every 7 days"
def run(self):
super(MenuHistoryMaintenance, self).run()
self.running = True
Log.Info("Running menu history maintenance")
now = datetime.datetime.now()
if "menu_history" in Dict:
for key, timeout in Dict["menu_history"].copy().items():
if now > timeout:
try:
del Dict["menu_history"][key]
except:
pass
class MigrateSubtitleStorage(Task):
periodic = False
frequency = None
def run(self):
super(MigrateSubtitleStorage, self).run()
self.running = True
Log.Info("Running subtitle storage migration")
storage = get_subtitle_storage()
for fn in storage.get_all_files():
if fn.endswith(".json.gz"):
continue
Log.Debug("Migrating %s", fn)
storage.load(None, fn)
scheduler.register(SearchAllRecentlyAddedMissing)
@@ -454,3 +627,6 @@ scheduler.register(AvailableSubsForItem)
scheduler.register(DownloadSubtitleForItem)
scheduler.register(MissingSubtitles)
scheduler.register(FindBetterSubtitles)
scheduler.register(SubtitleStorageMaintenance)
scheduler.register(MigrateSubtitleStorage)
scheduler.register(MenuHistoryMaintenance)
+76 -9
View File
@@ -40,6 +40,8 @@
"ro",
"ru",
"sr",
"sr-cyrl",
"sr-latn",
"sk",
"sl",
"es",
@@ -94,6 +96,8 @@
"ro",
"ru",
"sr",
"sr-cyrl",
"sr-latn",
"sk",
"sl",
"es",
@@ -148,6 +152,8 @@
"ro",
"ru",
"sr",
"sr-cyrl",
"sr-latn",
"sk",
"sl",
"es",
@@ -258,13 +264,14 @@
"35",
"30",
"25",
"21",
"20",
"15",
"10",
"5",
"0"
],
"default": "25"
"default": "21"
},
{
"id": "provider.addic7ed.use_random_agents",
@@ -332,7 +339,7 @@
},
{
"id": "providers.multithreading",
"label": "Search enabled providers simuntaneously (multithreading)",
"label": "Search enabled providers simultaneously (multithreading)",
"type": "bool",
"default": "true"
},
@@ -356,7 +363,7 @@
},
{
"id": "subtitles.scan.exotic_ext",
"label": "Scan: include \"exotic\" external subtitle formats (anything else than .srt/.ssa/.ass)",
"label": "Scan: include \"exotic\" subtitle formats (anything else than .srt/.ssa/.ass/.vtt; embedded or external)",
"type": "bool",
"default": "false"
},
@@ -381,7 +388,7 @@
"id": "subtitles.search.minimumMovieScore2",
"label": "Minimum score for movies (min: 60, def/sane: 69, min-ideal: 82; see http://v.ht/szscores)",
"type": "text",
"default": "69"
"default": "60"
},
{
"id": "subtitles.search.hearingImpaired",
@@ -396,17 +403,65 @@
"default": "don't prefer"
},
{
"id": "subtitles.enforce_encoding",
"label": "Normalize subtitle encoding to UTF-8",
"id": "subtitles.remove_hi",
"label": "Remove Hearing Impaired tags from downloaded subtitles",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.fix_common",
"label": "Fix common whitespace/punctuation issues in subtitles",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.fix_ocr",
"label": "Fix common OCR errors in downloaded subtitles",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.colors",
"label": "Change colors of subtitles to",
"type": "enum",
"values": [
"don't change",
"white",
"light-grey",
"red",
"green",
"yellow",
"blue",
"magenta",
"cyan",
"black",
"dark-red",
"dark-green",
"dark-yellow",
"dark-blue",
"dark-magenta",
"dark-cyan",
"dark-grey"
],
"default": "don't change"
},
{
"id": "subtitles.save.filesystem",
"label": "Store subtitles next to media files (instead of metadata)",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.save.formats",
"label": "Subtitle formats to save (non-SRT only works if the previous option is enabled)",
"type": "enum",
"values": [
"SRT",
"VTT",
"SRT+VTT"
],
"default": "SRT"
},
{
"id": "subtitles.save.subFolder",
"label": "Subtitle Folder (\"current folder\" is the folder the current media file lives in)",
@@ -492,7 +547,7 @@
"id": "scheduler.max_recent_items_per_library",
"label": "Scheduler: Recent items to consider per library",
"type": "text",
"default": "500"
"default": "1000"
},
{
"id": "scheduler.tasks.FindBetterSubtitles.frequency",
@@ -516,7 +571,13 @@
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected",
"label": "Scheduler: Overwrite manually selected subtitles when better found",
"type": "bool",
"default": "true"
"default": "false"
},
{
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified",
"label": "Scheduler: Overwrite subtitles with non-default subtitle modifications when better found",
"type": "bool",
"default": "false"
},
{
"id": "history_size",
@@ -593,7 +654,7 @@
},
{
"id": "notify_executable",
"label": "Call this executable upon successful subtitle download",
"label": "Call this executable upon successful subtitle download (see Wiki for details)",
"type": "text",
"default": ""
},
@@ -616,6 +677,12 @@
],
"default": "WARNING"
},
{
"id": "log_debug_mods",
"label": "Log subtitle modification (debug)",
"type": "bool",
"default": "false"
},
{
"id": "log_console",
"label": "Log to console (for development/debugging)",
+4 -4
View File
@@ -9,11 +9,11 @@
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleShortVersionString</key>
<string>2.0.0</string>
<string>2.0.24</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
<string>2.0.0.0</string>
<string>2.0.24.1558</string>
<key>PlexFrameworkVersion</key>
<string>2</string>
<key>PlexPluginClass</key>
@@ -23,7 +23,7 @@
<key>PlexPluginConsoleLogging</key>
<string>0</string>
<key>PlexPluginDevMode</key>
<string>1</string>
<string>0</string>
<key>PlexPluginCodePolicy</key>
<!-- this allows channels to access some python methods which are otherwise blocked, as well as import external code libraries, and interact with the PMS HTTP API -->
<string>Elevated</string>
@@ -32,7 +32,7 @@
&lt;h1&gt;Sub-Zero for Plex&lt;/h1&gt;&lt;i&gt;Subtitles done right&lt;/i&gt;
Version 2.0.0.0 DEV #7
Version 2.0.24.1558
Originally based on @bramwalet's awesome &lt;a href=&quot;https://github.com/bramwalet/Subliminal.bundle&quot;&gt;Subliminal.bundle&lt;/a&gt;
+85
View File
@@ -0,0 +1,85 @@
"""Generic interface to all dbm clones.
Instead of
import dbm
d = dbm.open(file, 'w', 0666)
use
import anydbm
d = anydbm.open(file, 'w')
The returned object is a dbhash, gdbm, dbm or dumbdbm object,
dependent on the type of database being opened (determined by whichdb
module) in the case of an existing dbm. If the dbm does not exist and
the create or new flag ('c' or 'n') was specified, the dbm type will
be determined by the availability of the modules (tested in the above
order).
It has the following interface (key and data are strings):
d[key] = data # store data at key (may override data at
# existing key)
data = d[key] # retrieve data at key (raise KeyError if no
# such key)
del d[key] # delete data stored at key (raises KeyError
# if no such key)
flag = key in d # true if the key exists
list = d.keys() # return a list of all existing keys (slow!)
Future versions may change the order in which implementations are
tested for existence, and add interfaces to other dbm-like
implementations.
"""
class error(Exception):
pass
_names = ['dbhash', 'gdbm', 'dbm', 'dumbdbm']
_errors = [error]
_defaultmod = None
for _name in _names:
try:
_mod = __import__(_name)
except ImportError:
continue
if not _defaultmod:
_defaultmod = _mod
_errors.append(_mod.error)
if not _defaultmod:
raise ImportError, "no dbm clone found; tried %s" % _names
error = tuple(_errors)
def open(file, flag='r', mode=0666):
"""Open or create database at path given by *file*.
Optional argument *flag* can be 'r' (default) for read-only access, 'w'
for read-write access of an existing database, 'c' for read-write access
to a new or existing database, and 'n' for read-write access to a new
database.
Note: 'r' and 'w' fail if the database doesn't exist; 'c' creates it
only if it doesn't exist; and 'n' always creates a new database.
"""
# guess the type of an existing database
from whichdb import whichdb
result=whichdb(file)
if result is None:
# db doesn't exist
if 'c' in flag or 'n' in flag:
# file doesn't exist and the new
# flag was used so use default type
mod = _defaultmod
else:
raise error, "need 'c' or 'n' flag to open new db"
elif result == "":
# db type cannot be determined
raise error, "db type could not be determined"
else:
mod = __import__(result)
return mod.open(file, flag, mode)
@@ -1,25 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
__title__ = 'babelfish'
__version__ = '0.5.5-dev'
__author__ = 'Antoine Bertin'
__license__ = 'BSD'
__copyright__ = 'Copyright 2015 the BabelFish authors'
import sys
if sys.version_info[0] >= 3:
basestr = str
else:
basestr = basestring
from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
CountryReverseConverter)
from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
from .script import SCRIPTS, SCRIPT_MATRIX, Script
@@ -1,287 +0,0 @@
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
import collections
from pkg_resources import iter_entry_points, EntryPoint
from ..exceptions import LanguageConvertError, LanguageReverseError
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
class CaseInsensitiveDict(collections.MutableMapping):
"""A case-insensitive ``dict``-like object.
Implements all methods and operations of
``collections.MutableMapping`` as well as dict's ``copy``. Also
provides ``lower_items``.
All keys are expected to be strings. The structure remembers the
case of the last key to be set, and ``iter(instance)``,
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
will contain case-sensitive keys. However, querying and contains
testing is case insensitive:
cid = CaseInsensitiveDict()
cid['English'] = 'eng'
cid['ENGLISH'] == 'eng' # True
list(cid) == ['English'] # True
If the constructor, ``.update``, or equality comparison
operations are given keys that have equal ``.lower()``s, the
behavior is undefined.
"""
def __init__(self, data=None, **kwargs):
self._store = dict()
if data is None:
data = {}
self.update(data, **kwargs)
def __setitem__(self, key, value):
# Use the lowercased key for lookups, but store the actual
# key alongside the value.
self._store[key.lower()] = (key, value)
def __getitem__(self, key):
return self._store[key.lower()][1]
def __delitem__(self, key):
del self._store[key.lower()]
def __iter__(self):
return (casedkey for casedkey, mappedvalue in self._store.values())
def __len__(self):
return len(self._store)
def lower_items(self):
"""Like iteritems(), but with all lowercase keys."""
return (
(lowerkey, keyval[1])
for (lowerkey, keyval)
in self._store.items()
)
def __eq__(self, other):
if isinstance(other, collections.Mapping):
other = CaseInsensitiveDict(other)
else:
return NotImplemented
# Compare insensitively
return dict(self.lower_items()) == dict(other.lower_items())
# Copy is required
def copy(self):
return CaseInsensitiveDict(self._store.values())
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
class LanguageConverter(object):
"""A :class:`LanguageConverter` supports converting an alpha3 language code with an
alpha2 country code and a script code into a custom code
.. attribute:: codes
Set of possible custom codes
"""
def convert(self, alpha3, country=None, script=None):
"""Convert an alpha3 language code with an alpha2 country code and a script code
into a custom code
:param string alpha3: ISO-639-3 language code
:param country: ISO-3166 country code, if any
:type country: string or None
:param script: ISO-15924 script code, if any
:type script: string or None
:return: the corresponding custom code
:rtype: string
:raise: :class:`~babelfish.exceptions.LanguageConvertError`
"""
raise NotImplementedError
class LanguageReverseConverter(LanguageConverter):
"""A :class:`LanguageConverter` able to reverse a custom code into a alpha3
ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
"""
def reverse(self, code):
"""Reverse a custom code into alpha3, country and script code
:param string code: custom code to reverse
:return: the corresponding alpha3 ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
:rtype: tuple
:raise: :class:`~babelfish.exceptions.LanguageReverseError`
"""
raise NotImplementedError
class LanguageEquivalenceConverter(LanguageReverseConverter):
"""A :class:`LanguageEquivalenceConverter` is a utility class that allows you to easily define a
:class:`LanguageReverseConverter` by only specifying the dict from alpha3 to their corresponding symbols.
You must specify the dict of equivalence as a class variable named SYMBOLS.
If you also set the class variable CASE_SENSITIVE to ``True`` then the reverse conversion function will be
case-sensitive (it is case-insensitive by default).
Example::
class MyCodeConverter(babelfish.LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {'fra': 'mycode1', 'eng': 'mycode2'}
"""
CASE_SENSITIVE = False
def __init__(self):
self.codes = set()
self.to_symbol = {}
if self.CASE_SENSITIVE:
self.from_symbol = {}
else:
self.from_symbol = CaseInsensitiveDict()
for alpha3, symbol in self.SYMBOLS.items():
self.to_symbol[alpha3] = symbol
self.from_symbol[symbol] = (alpha3, None, None)
self.codes.add(symbol)
def convert(self, alpha3, country=None, script=None):
try:
return self.to_symbol[alpha3]
except KeyError:
raise LanguageConvertError(alpha3, country, script)
def reverse(self, code):
try:
return self.from_symbol[code]
except KeyError:
raise LanguageReverseError(code)
class CountryConverter(object):
"""A :class:`CountryConverter` supports converting an alpha2 country code
into a custom code
.. attribute:: codes
Set of possible custom codes
"""
def convert(self, alpha2):
"""Convert an alpha2 country code into a custom code
:param string alpha2: ISO-3166-1 language code
:return: the corresponding custom code
:rtype: string
:raise: :class:`~babelfish.exceptions.CountryConvertError`
"""
raise NotImplementedError
class CountryReverseConverter(CountryConverter):
"""A :class:`CountryConverter` able to reverse a custom code into a alpha2
ISO-3166-1 country code
"""
def reverse(self, code):
"""Reverse a custom code into alpha2 code
:param string code: custom code to reverse
:return: the corresponding alpha2 ISO-3166-1 country code
:rtype: string
:raise: :class:`~babelfish.exceptions.CountryReverseError`
"""
raise NotImplementedError
class ConverterManager(object):
"""Manager for babelfish converters behaving like a dict with lazy loading
Loading is done in this order:
* Entry point converters
* Registered converters
* Internal converters
.. attribute:: entry_point
The entry point where to look for converters
.. attribute:: internal_converters
Internal converters with entry point syntax
"""
entry_point = ''
internal_converters = []
def __init__(self):
#: Registered converters with entry point syntax
self.registered_converters = []
#: Loaded converters
self.converters = {}
def __getitem__(self, name):
"""Get a converter, lazy loading it if necessary"""
if name in self.converters:
return self.converters[name]
for ep in iter_entry_points(self.entry_point):
if ep.name == name:
self.converters[ep.name] = ep.load()()
return self.converters[ep.name]
for ep in (EntryPoint.parse(c) for c in self.registered_converters + self.internal_converters):
if ep.name == name:
# `require` argument of ep.load() is deprecated in newer versions of setuptools
if hasattr(ep, 'resolve'):
plugin = ep.resolve()
elif hasattr(ep, '_load'):
plugin = ep._load()
else:
plugin = ep.load(require=False)
self.converters[ep.name] = plugin()
return self.converters[ep.name]
raise KeyError(name)
def __setitem__(self, name, converter):
"""Load a converter"""
self.converters[name] = converter
def __delitem__(self, name):
"""Unload a converter"""
del self.converters[name]
def __iter__(self):
"""Iterator over loaded converters"""
return iter(self.converters)
def register(self, entry_point):
"""Register a converter
:param string entry_point: converter to register (entry point syntax)
:raise: ValueError if already registered
"""
if entry_point in self.registered_converters:
raise ValueError('Already registered')
self.registered_converters.insert(0, entry_point)
def unregister(self, entry_point):
"""Unregister a converter
:param string entry_point: converter to unregister (entry point syntax)
"""
self.registered_converters.remove(entry_point)
def __contains__(self, name):
return name in self.converters
@@ -1,17 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha2Converter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha2:
SYMBOLS[iso_language.alpha3] = iso_language.alpha2
@@ -1,17 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha3BConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha3b:
SYMBOLS[iso_language.alpha3] = iso_language.alpha3b
@@ -1,17 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha3TConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha3t:
SYMBOLS[iso_language.alpha3] = iso_language.alpha3t
@@ -1,31 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import CountryReverseConverter, CaseInsensitiveDict
from ..country import COUNTRY_MATRIX
from ..exceptions import CountryConvertError, CountryReverseError
class CountryNameConverter(CountryReverseConverter):
def __init__(self):
self.codes = set()
self.to_name = {}
self.from_name = CaseInsensitiveDict()
for country in COUNTRY_MATRIX:
self.codes.add(country.name)
self.to_name[country.alpha2] = country.name
self.from_name[country.name] = country.alpha2
def convert(self, alpha2):
if alpha2 not in self.to_name:
raise CountryConvertError(alpha2)
return self.to_name[alpha2]
def reverse(self, name):
if name not in self.from_name:
raise CountryReverseError(name)
return self.from_name[name]
@@ -1,17 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class NameConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = False
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.name:
SYMBOLS[iso_language.alpha3] = iso_language.name
@@ -1,36 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageReverseConverter, CaseInsensitiveDict
from ..exceptions import LanguageReverseError
from ..language import language_converters
class OpenSubtitlesConverter(LanguageReverseConverter):
def __init__(self):
self.alpha3b_converter = language_converters['alpha3b']
self.alpha2_converter = language_converters['alpha2']
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
'scc': ('srp', None), 'mne': ('srp', 'ME')})
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(['pob', 'pb', 'scc', 'mne']))
def convert(self, alpha3, country=None, script=None):
alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
if (alpha3b, country) in self.to_opensubtitles:
return self.to_opensubtitles[(alpha3b, country)]
return alpha3b
def reverse(self, opensubtitles):
if opensubtitles in self.from_opensubtitles:
return self.from_opensubtitles[opensubtitles]
for conv in [self.alpha3b_converter, self.alpha2_converter]:
try:
return conv.reverse(opensubtitles)
except LanguageReverseError:
pass
raise LanguageReverseError(opensubtitles)
@@ -1,23 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageConverter
from ..exceptions import LanguageConvertError
from ..language import LANGUAGE_MATRIX
class ScopeConverter(LanguageConverter):
FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
SYMBOLS[iso_language.alpha3] = iso_language.scope
codes = set(SYMBOLS.values())
def convert(self, alpha3, country=None, script=None):
if self.SYMBOLS[alpha3] in self.FULLNAME:
return self.FULLNAME[self.SYMBOLS[alpha3]]
raise LanguageConvertError(alpha3, country, script)
@@ -1,23 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageConverter
from ..exceptions import LanguageConvertError
from ..language import LANGUAGE_MATRIX
class LanguageTypeConverter(LanguageConverter):
FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
SYMBOLS[iso_language.alpha3] = iso_language.type
codes = set(SYMBOLS.values())
def convert(self, alpha3, country=None, script=None):
if self.SYMBOLS[alpha3] in self.FULLNAME:
return self.FULLNAME[self.SYMBOLS[alpha3]]
raise LanguageConvertError(alpha3, country, script)
@@ -1,104 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from functools import partial
from pkg_resources import resource_stream # @UnresolvedImport
from .converters import ConverterManager
from . import basestr
COUNTRIES = {}
COUNTRY_MATRIX = []
#: The namedtuple used in the :data:`COUNTRY_MATRIX`
IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
f = resource_stream('babelfish', 'data/iso-3166-1.txt')
f.readline()
for l in f:
iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
COUNTRIES[iso_country.alpha2] = iso_country.name
COUNTRY_MATRIX.append(iso_country)
f.close()
class CountryConverterManager(ConverterManager):
""":class:`~babelfish.converters.ConverterManager` for country converters"""
entry_point = 'babelfish.country_converters'
internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
country_converters = CountryConverterManager()
class CountryMeta(type):
"""The :class:`Country` metaclass
Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
"""
def __getattr__(cls, name):
if name.startswith('from'):
return partial(cls.fromcode, converter=name[4:])
return type.__getattribute__(cls, name)
class Country(CountryMeta(str('CountryBase'), (object,), {})):
"""A country on Earth
A country is represented by a 2-letter code from the ISO-3166 standard
:param string country: 2-letter ISO-3166 country code
"""
def __init__(self, country):
if country not in COUNTRIES:
raise ValueError('%r is not a valid country' % country)
#: ISO-3166 2-letter country code
self.alpha2 = country
@classmethod
def fromcode(cls, code, converter):
"""Create a :class:`Country` by its `code` using `converter` to
:meth:`~babelfish.converters.CountryReverseConverter.reverse` it
:param string code: the code to reverse
:param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
:return: the corresponding :class:`Country` instance
:rtype: :class:`Country`
"""
return cls(country_converters[converter].reverse(code))
def __getstate__(self):
return self.alpha2
def __setstate__(self, state):
self.alpha2 = state
def __getattr__(self, name):
return country_converters[name].convert(self.alpha2)
def __hash__(self):
return hash(self.alpha2)
def __eq__(self, other):
if isinstance(other, basestr):
return str(self) == other
if not isinstance(other, Country):
return False
return self.alpha2 == other.alpha2
def __ne__(self, other):
return not self == other
def __repr__(self):
return '<Country [%s]>' % self
def __str__(self):
return self.alpha2
@@ -1,45 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
import os.path
import tempfile
import zipfile
import requests
DATA_DIR = os.path.dirname(__file__)
# iso-3166-1.txt
print('Downloading ISO-3166-1 standard (ISO country codes)...')
with open(os.path.join(DATA_DIR, 'iso-3166-1.txt'), 'w') as f:
r = requests.get('http://www.iso.org/iso/home/standards/country_codes/country_names_and_code_elements_txt.htm')
f.write(r.content.strip())
# iso-639-3.tab
print('Downloading ISO-639-3 standard (ISO language codes)...')
with tempfile.TemporaryFile() as f:
r = requests.get('http://www-01.sil.org/iso639-3/iso-639-3_Code_Tables_20130531.zip')
f.write(r.content)
with zipfile.ZipFile(f) as z:
z.extract('iso-639-3.tab', DATA_DIR)
# iso-15924
print('Downloading ISO-15924 standard (ISO script codes)...')
with tempfile.TemporaryFile() as f:
r = requests.get('http://www.unicode.org/iso15924/iso15924.txt.zip')
f.write(r.content)
with zipfile.ZipFile(f) as z:
z.extract('iso15924-utf8-20131012.txt', DATA_DIR)
# opensubtitles supported languages
print('Downloading OpenSubtitles supported languages...')
with open(os.path.join(DATA_DIR, 'opensubtitles_languages.txt'), 'w') as f:
r = requests.get('http://www.opensubtitles.org/addons/export_languages.php')
f.write(r.content)
print('Done!')
@@ -1,250 +0,0 @@
Country Name;ISO 3166-1-alpha-2 code
AFGHANISTAN;AF
ÅLAND ISLANDS;AX
ALBANIA;AL
ALGERIA;DZ
AMERICAN SAMOA;AS
ANDORRA;AD
ANGOLA;AO
ANGUILLA;AI
ANTARCTICA;AQ
ANTIGUA AND BARBUDA;AG
ARGENTINA;AR
ARMENIA;AM
ARUBA;AW
AUSTRALIA;AU
AUSTRIA;AT
AZERBAIJAN;AZ
BAHAMAS;BS
BAHRAIN;BH
BANGLADESH;BD
BARBADOS;BB
BELARUS;BY
BELGIUM;BE
BELIZE;BZ
BENIN;BJ
BERMUDA;BM
BHUTAN;BT
BOLIVIA, PLURINATIONAL STATE OF;BO
BONAIRE, SINT EUSTATIUS AND SABA;BQ
BOSNIA AND HERZEGOVINA;BA
BOTSWANA;BW
BOUVET ISLAND;BV
BRAZIL;BR
BRITISH INDIAN OCEAN TERRITORY;IO
BRUNEI DARUSSALAM;BN
BULGARIA;BG
BURKINA FASO;BF
BURUNDI;BI
CAMBODIA;KH
CAMEROON;CM
CANADA;CA
CAPE VERDE;CV
CAYMAN ISLANDS;KY
CENTRAL AFRICAN REPUBLIC;CF
CHAD;TD
CHILE;CL
CHINA;CN
CHRISTMAS ISLAND;CX
COCOS (KEELING) ISLANDS;CC
COLOMBIA;CO
COMOROS;KM
CONGO;CG
CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
COOK ISLANDS;CK
COSTA RICA;CR
CÔTE D'IVOIRE;CI
CROATIA;HR
CUBA;CU
CURAÇAO;CW
CYPRUS;CY
CZECH REPUBLIC;CZ
DENMARK;DK
DJIBOUTI;DJ
DOMINICA;DM
DOMINICAN REPUBLIC;DO
ECUADOR;EC
EGYPT;EG
EL SALVADOR;SV
EQUATORIAL GUINEA;GQ
ERITREA;ER
ESTONIA;EE
ETHIOPIA;ET
FALKLAND ISLANDS (MALVINAS);FK
FAROE ISLANDS;FO
FIJI;FJ
FINLAND;FI
FRANCE;FR
FRENCH GUIANA;GF
FRENCH POLYNESIA;PF
FRENCH SOUTHERN TERRITORIES;TF
GABON;GA
GAMBIA;GM
GEORGIA;GE
GERMANY;DE
GHANA;GH
GIBRALTAR;GI
GREECE;GR
GREENLAND;GL
GRENADA;GD
GUADELOUPE;GP
GUAM;GU
GUATEMALA;GT
GUERNSEY;GG
GUINEA;GN
GUINEA-BISSAU;GW
GUYANA;GY
HAITI;HT
HEARD ISLAND AND MCDONALD ISLANDS;HM
HOLY SEE (VATICAN CITY STATE);VA
HONDURAS;HN
HONG KONG;HK
HUNGARY;HU
ICELAND;IS
INDIA;IN
INDONESIA;ID
IRAN, ISLAMIC REPUBLIC OF;IR
IRAQ;IQ
IRELAND;IE
ISLE OF MAN;IM
ISRAEL;IL
ITALY;IT
JAMAICA;JM
JAPAN;JP
JERSEY;JE
JORDAN;JO
KAZAKHSTAN;KZ
KENYA;KE
KIRIBATI;KI
KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
KOREA, REPUBLIC OF;KR
KUWAIT;KW
KYRGYZSTAN;KG
LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
LATVIA;LV
LEBANON;LB
LESOTHO;LS
LIBERIA;LR
LIBYA;LY
LIECHTENSTEIN;LI
LITHUANIA;LT
LUXEMBOURG;LU
MACAO;MO
MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
MADAGASCAR;MG
MALAWI;MW
MALAYSIA;MY
MALDIVES;MV
MALI;ML
MALTA;MT
MARSHALL ISLANDS;MH
MARTINIQUE;MQ
MAURITANIA;MR
MAURITIUS;MU
MAYOTTE;YT
MEXICO;MX
MICRONESIA, FEDERATED STATES OF;FM
MOLDOVA, REPUBLIC OF;MD
MONACO;MC
MONGOLIA;MN
MONTENEGRO;ME
MONTSERRAT;MS
MOROCCO;MA
MOZAMBIQUE;MZ
MYANMAR;MM
NAMIBIA;NA
NAURU;NR
NEPAL;NP
NETHERLANDS;NL
NEW CALEDONIA;NC
NEW ZEALAND;NZ
NICARAGUA;NI
NIGER;NE
NIGERIA;NG
NIUE;NU
NORFOLK ISLAND;NF
NORTHERN MARIANA ISLANDS;MP
NORWAY;NO
OMAN;OM
PAKISTAN;PK
PALAU;PW
PALESTINE, STATE OF;PS
PANAMA;PA
PAPUA NEW GUINEA;PG
PARAGUAY;PY
PERU;PE
PHILIPPINES;PH
PITCAIRN;PN
POLAND;PL
PORTUGAL;PT
PUERTO RICO;PR
QATAR;QA
RÉUNION;RE
ROMANIA;RO
RUSSIAN FEDERATION;RU
RWANDA;RW
SAINT BARTHÉLEMY;BL
SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
SAINT KITTS AND NEVIS;KN
SAINT LUCIA;LC
SAINT MARTIN (FRENCH PART);MF
SAINT PIERRE AND MIQUELON;PM
SAINT VINCENT AND THE GRENADINES;VC
SAMOA;WS
SAN MARINO;SM
SAO TOME AND PRINCIPE;ST
SAUDI ARABIA;SA
SENEGAL;SN
SERBIA;RS
SEYCHELLES;SC
SIERRA LEONE;SL
SINGAPORE;SG
SINT MAARTEN (DUTCH PART);SX
SLOVAKIA;SK
SLOVENIA;SI
SOLOMON ISLANDS;SB
SOMALIA;SO
SOUTH AFRICA;ZA
SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
SOUTH SUDAN;SS
SPAIN;ES
SRI LANKA;LK
SUDAN;SD
SURINAME;SR
SVALBARD AND JAN MAYEN;SJ
SWAZILAND;SZ
SWEDEN;SE
SWITZERLAND;CH
SYRIAN ARAB REPUBLIC;SY
TAIWAN, PROVINCE OF CHINA;TW
TAJIKISTAN;TJ
TANZANIA, UNITED REPUBLIC OF;TZ
THAILAND;TH
TIMOR-LESTE;TL
TOGO;TG
TOKELAU;TK
TONGA;TO
TRINIDAD AND TOBAGO;TT
TUNISIA;TN
TURKEY;TR
TURKMENISTAN;TM
TURKS AND CAICOS ISLANDS;TC
TUVALU;TV
UGANDA;UG
UKRAINE;UA
UNITED ARAB EMIRATES;AE
UNITED KINGDOM;GB
UNITED STATES;US
UNITED STATES MINOR OUTLYING ISLANDS;UM
URUGUAY;UY
UZBEKISTAN;UZ
VANUATU;VU
VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
VIET NAM;VN
VIRGIN ISLANDS, BRITISH;VG
VIRGIN ISLANDS, U.S.;VI
WALLIS AND FUTUNA;WF
WESTERN SAHARA;EH
YEMEN;YE
ZAMBIA;ZM
ZIMBABWE;ZW
File diff suppressed because it is too large Load Diff
@@ -1,176 +0,0 @@
#
# ISO 15924 - Codes for the representation of names of scripts
# Codes pour la représentation des noms d’écritures
# Format:
# Code;N°;English Name;Nom français;PVA;Date
#
Afak;439;Afaka;afaka;;2010-12-21
Aghb;239;Caucasian Albanian;aghbanien;;2012-10-16
Ahom;338;Ahom, Tai Ahom;âhom;;2012-11-01
Arab;160;Arabic;arabe;Arabic;2004-05-01
Armi;124;Imperial Aramaic;araméen impérial;Imperial_Aramaic;2009-06-01
Armn;230;Armenian;arménien;Armenian;2004-05-01
Avst;134;Avestan;avestique;Avestan;2009-06-01
Bali;360;Balinese;balinais;Balinese;2006-10-10
Bamu;435;Bamum;bamoum;Bamum;2009-06-01
Bass;259;Bassa Vah;bassa;;2010-03-26
Batk;365;Batak;batik;Batak;2010-07-23
Beng;325;Bengali;bengalî;Bengali;2004-05-01
Blis;550;Blissymbols;symboles Bliss;;2004-05-01
Bopo;285;Bopomofo;bopomofo;Bopomofo;2004-05-01
Brah;300;Brahmi;brahma;Brahmi;2010-07-23
Brai;570;Braille;braille;Braille;2004-05-01
Bugi;367;Buginese;bouguis;Buginese;2006-06-21
Buhd;372;Buhid;bouhide;Buhid;2004-05-01
Cakm;349;Chakma;chakma;Chakma;2012-02-06
Cans;440;Unified Canadian Aboriginal Syllabics;syllabaire autochtone canadien unifié;Canadian_Aboriginal;2004-05-29
Cari;201;Carian;carien;Carian;2007-07-02
Cham;358;Cham;cham (čam, tcham);Cham;2009-11-11
Cher;445;Cherokee;tchérokî;Cherokee;2004-05-01
Cirt;291;Cirth;cirth;;2004-05-01
Copt;204;Coptic;copte;Coptic;2006-06-21
Cprt;403;Cypriot;syllabaire chypriote;Cypriot;2004-05-01
Cyrl;220;Cyrillic;cyrillique;Cyrillic;2004-05-01
Cyrs;221;Cyrillic (Old Church Slavonic variant);cyrillique (variante slavonne);;2004-05-01
Deva;315;Devanagari (Nagari);dévanâgarî;Devanagari;2004-05-01
Dsrt;250;Deseret (Mormon);déseret (mormon);Deseret;2004-05-01
Dupl;755;Duployan shorthand, Duployan stenography;sténographie Duployé;;2010-07-18
Egyd;070;Egyptian demotic;démotique égyptien;;2004-05-01
Egyh;060;Egyptian hieratic;hiératique égyptien;;2004-05-01
Egyp;050;Egyptian hieroglyphs;hiéroglyphes égyptiens;Egyptian_Hieroglyphs;2009-06-01
Elba;226;Elbasan;elbasan;;2010-07-18
Ethi;430;Ethiopic (Geʻez);éthiopien (geʻez, guèze);Ethiopic;2004-10-25
Geor;240;Georgian (Mkhedruli);géorgien (mkhédrouli);Georgian;2004-05-29
Geok;241;Khutsuri (Asomtavruli and Nuskhuri);khoutsouri (assomtavrouli et nouskhouri);Georgian;2012-10-16
Glag;225;Glagolitic;glagolitique;Glagolitic;2006-06-21
Goth;206;Gothic;gotique;Gothic;2004-05-01
Gran;343;Grantha;grantha;;2009-11-11
Grek;200;Greek;grec;Greek;2004-05-01
Gujr;320;Gujarati;goudjarâtî (gujrâtî);Gujarati;2004-05-01
Guru;310;Gurmukhi;gourmoukhî;Gurmukhi;2004-05-01
Hang;286;Hangul (Hangŭl, Hangeul);hangûl (hangŭl, hangeul);Hangul;2004-05-29
Hani;500;Han (Hanzi, Kanji, Hanja);idéogrammes han (sinogrammes);Han;2009-02-23
Hano;371;Hanunoo (Hanunóo);hanounóo;Hanunoo;2004-05-29
Hans;501;Han (Simplified variant);idéogrammes han (variante simplifiée);;2004-05-29
Hant;502;Han (Traditional variant);idéogrammes han (variante traditionnelle);;2004-05-29
Hatr;127;Hatran;hatrénien;;2012-11-01
Hebr;125;Hebrew;hébreu;Hebrew;2004-05-01
Hira;410;Hiragana;hiragana;Hiragana;2004-05-01
Hluw;080;Anatolian Hieroglyphs (Luwian Hieroglyphs, Hittite Hieroglyphs);hiéroglyphes anatoliens (hiéroglyphes louvites, hiéroglyphes hittites);;2011-12-09
Hmng;450;Pahawh Hmong;pahawh hmong;;2004-05-01
Hrkt;412;Japanese syllabaries (alias for Hiragana + Katakana);syllabaires japonais (alias pour hiragana + katakana);Katakana_Or_Hiragana;2011-06-21
Hung;176;Old Hungarian (Hungarian Runic);runes hongroises (ancien hongrois);;2012-10-16
Inds;610;Indus (Harappan);indus;;2004-05-01
Ital;210;Old Italic (Etruscan, Oscan, etc.);ancien italique (étrusque, osque, etc.);Old_Italic;2004-05-29
Java;361;Javanese;javanais;Javanese;2009-06-01
Jpan;413;Japanese (alias for Han + Hiragana + Katakana);japonais (alias pour han + hiragana + katakana);;2006-06-21
Jurc;510;Jurchen;jurchen;;2010-12-21
Kali;357;Kayah Li;kayah li;Kayah_Li;2007-07-02
Kana;411;Katakana;katakana;Katakana;2004-05-01
Khar;305;Kharoshthi;kharochthî;Kharoshthi;2006-06-21
Khmr;355;Khmer;khmer;Khmer;2004-05-29
Khoj;322;Khojki;khojkî;;2011-06-21
Knda;345;Kannada;kannara (canara);Kannada;2004-05-29
Kore;287;Korean (alias for Hangul + Han);coréen (alias pour hangûl + han);;2007-06-13
Kpel;436;Kpelle;kpèllé;;2010-03-26
Kthi;317;Kaithi;kaithî;Kaithi;2009-06-01
Lana;351;Tai Tham (Lanna);taï tham (lanna);Tai_Tham;2009-06-01
Laoo;356;Lao;laotien;Lao;2004-05-01
Latf;217;Latin (Fraktur variant);latin (variante brisée);;2004-05-01
Latg;216;Latin (Gaelic variant);latin (variante gaélique);;2004-05-01
Latn;215;Latin;latin;Latin;2004-05-01
Lepc;335;Lepcha (Róng);lepcha (róng);Lepcha;2007-07-02
Limb;336;Limbu;limbou;Limbu;2004-05-29
Lina;400;Linear A;linéaire A;;2004-05-01
Linb;401;Linear B;linéaire B;Linear_B;2004-05-29
Lisu;399;Lisu (Fraser);lisu (Fraser);Lisu;2009-06-01
Loma;437;Loma;loma;;2010-03-26
Lyci;202;Lycian;lycien;Lycian;2007-07-02
Lydi;116;Lydian;lydien;Lydian;2007-07-02
Mahj;314;Mahajani;mahâjanî;;2012-10-16
Mand;140;Mandaic, Mandaean;mandéen;Mandaic;2010-07-23
Mani;139;Manichaean;manichéen;;2007-07-15
Maya;090;Mayan hieroglyphs;hiéroglyphes mayas;;2004-05-01
Mend;438;Mende Kikakui;mendé kikakui;;2013-10-12
Merc;101;Meroitic Cursive;cursif méroïtique;Meroitic_Cursive;2012-02-06
Mero;100;Meroitic Hieroglyphs;hiéroglyphes méroïtiques;Meroitic_Hieroglyphs;2012-02-06
Mlym;347;Malayalam;malayâlam;Malayalam;2004-05-01
Modi;323;Modi, Moḍī;modî;;2013-10-12
Moon;218;Moon (Moon code, Moon script, Moon type);écriture Moon;;2006-12-11
Mong;145;Mongolian;mongol;Mongolian;2004-05-01
Mroo;199;Mro, Mru;mro;;2010-12-21
Mtei;337;Meitei Mayek (Meithei, Meetei);meitei mayek;Meetei_Mayek;2009-06-01
Mult;323; Multani;multanî;;2012-11-01
Mymr;350;Myanmar (Burmese);birman;Myanmar;2004-05-01
Narb;106;Old North Arabian (Ancient North Arabian);nord-arabique;;2010-03-26
Nbat;159;Nabataean;nabatéen;;2010-03-26
Nkgb;420;Nakhi Geba ('Na-'Khi ²Ggŏ-¹baw, Naxi Geba);nakhi géba;;2009-02-23
Nkoo;165;NKo;nko;Nko;2006-10-10
Nshu;499;Nüshu;nüshu;;2010-12-21
Ogam;212;Ogham;ogam;Ogham;2004-05-01
Olck;261;Ol Chiki (Ol Cemet, Ol, Santali);ol tchiki;Ol_Chiki;2007-07-02
Orkh;175;Old Turkic, Orkhon Runic;orkhon;Old_Turkic;2009-06-01
Orya;327;Oriya;oriyâ;Oriya;2004-05-01
Osma;260;Osmanya;osmanais;Osmanya;2004-05-01
Palm;126;Palmyrene;palmyrénien;;2010-03-26
Pauc;263;Pau Cin Hau;paou chin haou;;2013-10-12
Perm;227;Old Permic;ancien permien;;2004-05-01
Phag;331;Phags-pa;phags pa;Phags_Pa;2006-10-10
Phli;131;Inscriptional Pahlavi;pehlevi des inscriptions;Inscriptional_Pahlavi;2009-06-01
Phlp;132;Psalter Pahlavi;pehlevi des psautiers;;2007-11-26
Phlv;133;Book Pahlavi;pehlevi des livres;;2007-07-15
Phnx;115;Phoenician;phénicien;Phoenician;2006-10-10
Plrd;282;Miao (Pollard);miao (Pollard);Miao;2012-02-06
Prti;130;Inscriptional Parthian;parthe des inscriptions;Inscriptional_Parthian;2009-06-01
Qaaa;900;Reserved for private use (start);réservé à lusage privé (début);;2004-05-29
Qabx;949;Reserved for private use (end);réservé à lusage privé (fin);;2004-05-29
Rjng;363;Rejang (Redjang, Kaganga);redjang (kaganga);Rejang;2009-02-23
Roro;620;Rongorongo;rongorongo;;2004-05-01
Runr;211;Runic;runique;Runic;2004-05-01
Samr;123;Samaritan;samaritain;Samaritan;2009-06-01
Sara;292;Sarati;sarati;;2004-05-29
Sarb;105;Old South Arabian;sud-arabique, himyarite;Old_South_Arabian;2009-06-01
Saur;344;Saurashtra;saurachtra;Saurashtra;2007-07-02
Sgnw;095;SignWriting;SignÉcriture, SignWriting;;2006-10-10
Shaw;281;Shavian (Shaw);shavien (Shaw);Shavian;2004-05-01
Shrd;319;Sharada, Śāradā;charada, shard;Sharada;2012-02-06
Sidd;302;Siddham, Siddhaṃ, Siddhamātṛkā;siddham;;2013-10-12
Sind;318;Khudawadi, Sindhi;khoudawadî, sindhî;;2010-12-21
Sinh;348;Sinhala;singhalais;Sinhala;2004-05-01
Sora;398;Sora Sompeng;sora sompeng;Sora_Sompeng;2012-02-06
Sund;362;Sundanese;sundanais;Sundanese;2007-07-02
Sylo;316;Syloti Nagri;sylotî nâgrî;Syloti_Nagri;2006-06-21
Syrc;135;Syriac;syriaque;Syriac;2004-05-01
Syre;138;Syriac (Estrangelo variant);syriaque (variante estranghélo);;2004-05-01
Syrj;137;Syriac (Western variant);syriaque (variante occidentale);;2004-05-01
Syrn;136;Syriac (Eastern variant);syriaque (variante orientale);;2004-05-01
Tagb;373;Tagbanwa;tagbanoua;Tagbanwa;2004-05-01
Takr;321;Takri, Ṭākrī, Ṭāṅkrī;tâkrî;Takri;2012-02-06
Tale;353;Tai Le;taï-le;Tai_Le;2004-10-25
Talu;354;New Tai Lue;nouveau taï-lue;New_Tai_Lue;2006-06-21
Taml;346;Tamil;tamoul;Tamil;2004-05-01
Tang;520;Tangut;tangoute;;2010-12-21
Tavt;359;Tai Viet;taï viêt;Tai_Viet;2009-06-01
Telu;340;Telugu;télougou;Telugu;2004-05-01
Teng;290;Tengwar;tengwar;;2004-05-01
Tfng;120;Tifinagh (Berber);tifinagh (berbère);Tifinagh;2006-06-21
Tglg;370;Tagalog (Baybayin, Alibata);tagal (baybayin, alibata);Tagalog;2009-02-23
Thaa;170;Thaana;thâna;Thaana;2004-05-01
Thai;352;Thai;thaï;Thai;2004-05-01
Tibt;330;Tibetan;tibétain;Tibetan;2004-05-01
Tirh;326;Tirhuta;tirhouta;;2011-12-09
Ugar;040;Ugaritic;ougaritique;Ugaritic;2004-05-01
Vaii;470;Vai;vaï;Vai;2007-07-02
Visp;280;Visible Speech;parole visible;;2004-05-01
Wara;262;Warang Citi (Varang Kshiti);warang citi;;2009-11-11
Wole;480;Woleai;woléaï;;2010-12-21
Xpeo;030;Old Persian;cunéiforme persépolitain;Old_Persian;2006-06-21
Xsux;020;Cuneiform, Sumero-Akkadian;cunéiforme suméro-akkadien;Cuneiform;2006-10-10
Yiii;460;Yi;yi;Yi;2004-05-01
Zinh;994;Code for inherited script;codet pour écriture héritée;Inherited;2009-02-23
Zmth;995;Mathematical notation;notation mathématique;;2007-11-26
Zsym;996;Symbols;symboles;;2007-11-26
Zxxx;997;Code for unwritten documents;codet pour les documents non écrits;;2011-06-21
Zyyy;998;Code for undetermined script;codet pour écriture indéterminée;Common;2004-05-29
Zzzz;999;Code for uncoded script;codet pour écriture non codée;Unknown;2006-10-10
@@ -1,474 +0,0 @@
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
aar aa Afar, afar 0 0
abk ab Abkhazian 0 0
ace Achinese 0 0
ach Acoli 0 0
ada Adangme 0 0
ady adyghé 0 0
afa Afro-Asiatic (Other) 0 0
afh Afrihili 0 0
afr af Afrikaans 1 0
ain Ainu 0 0
aka ak Akan 0 0
akk Akkadian 0 0
alb sq Albanian 1 1
ale Aleut 0 0
alg Algonquian languages 0 0
alt Southern Altai 0 0
amh am Amharic 0 0
ang English, Old (ca.450-1100) 0 0
apa Apache languages 0 0
ara ar Arabic 1 1
arc Aramaic 0 0
arg an Aragonese 0 0
arm hy Armenian 1 0
arn Araucanian 0 0
arp Arapaho 0 0
art Artificial (Other) 0 0
arw Arawak 0 0
asm as Assamese 0 0
ast Asturian, Bable 0 0
ath Athapascan languages 0 0
aus Australian languages 0 0
ava av Avaric 0 0
ave ae Avestan 0 0
awa Awadhi 0 0
aym ay Aymara 0 0
aze az Azerbaijani 0 0
bad Banda 0 0
bai Bamileke languages 0 0
bak ba Bashkir 0 0
bal Baluchi 0 0
bam bm Bambara 0 0
ban Balinese 0 0
baq eu Basque 1 1
bas Basa 0 0
bat Baltic (Other) 0 0
bej Beja 0 0
bel be Belarusian 0 0
bem Bemba 0 0
ben bn Bengali 1 0
ber Berber (Other) 0 0
bho Bhojpuri 0 0
bih bh Bihari 0 0
bik Bikol 0 0
bin Bini 0 0
bis bi Bislama 0 0
bla Siksika 0 0
bnt Bantu (Other) 0 0
bos bs Bosnian 1 0
bra Braj 0 0
bre br Breton 1 0
btk Batak (Indonesia) 0 0
bua Buriat 0 0
bug Buginese 0 0
bul bg Bulgarian 1 1
bur my Burmese 1 0
byn Blin 0 0
cad Caddo 0 0
cai Central American Indian (Other) 0 0
car Carib 0 0
cat ca Catalan 1 1
cau Caucasian (Other) 0 0
ceb Cebuano 0 0
cel Celtic (Other) 0 0
cha ch Chamorro 0 0
chb Chibcha 0 0
che ce Chechen 0 0
chg Chagatai 0 0
chi zh Chinese 1 1
chk Chuukese 0 0
chm Mari 0 0
chn Chinook jargon 0 0
cho Choctaw 0 0
chp Chipewyan 0 0
chr Cherokee 0 0
chu cu Church Slavic 0 0
chv cv Chuvash 0 0
chy Cheyenne 0 0
cmc Chamic languages 0 0
cop Coptic 0 0
cor kw Cornish 0 0
cos co Corsican 0 0
cpe Creoles and pidgins, English based (Other) 0 0
cpf Creoles and pidgins, French-based (Other) 0 0
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
cre cr Cree 0 0
crh Crimean Tatar 0 0
crp Creoles and pidgins (Other) 0 0
csb Kashubian 0 0
cus Cushitic (Other)' couchitiques, autres langues 0 0
cze cs Czech 1 1
dak Dakota 0 0
dan da Danish 1 1
dar Dargwa 0 0
day Dayak 0 0
del Delaware 0 0
den Slave (Athapascan) 0 0
dgr Dogrib 0 0
din Dinka 0 0
div dv Divehi 0 0
doi Dogri 0 0
dra Dravidian (Other) 0 0
dua Duala 0 0
dum Dutch, Middle (ca.1050-1350) 0 0
dut nl Dutch 1 1
dyu Dyula 0 0
dzo dz Dzongkha 0 0
efi Efik 0 0
egy Egyptian (Ancient) 0 0
eka Ekajuk 0 0
elx Elamite 0 0
eng en English 1 1
enm English, Middle (1100-1500) 0 0
epo eo Esperanto 1 0
est et Estonian 1 1
ewe ee Ewe 0 0
ewo Ewondo 0 0
fan Fang 0 0
fao fo Faroese 0 0
fat Fanti 0 0
fij fj Fijian 0 0
fil Filipino 0 0
fin fi Finnish 1 1
fiu Finno-Ugrian (Other) 0 0
fon Fon 0 0
fre fr French 1 1
frm French, Middle (ca.1400-1600) 0 0
fro French, Old (842-ca.1400) 0 0
fry fy Frisian 0 0
ful ff Fulah 0 0
fur Friulian 0 0
gaa Ga 0 0
gay Gayo 0 0
gba Gbaya 0 0
gem Germanic (Other) 0 0
geo ka Georgian 1 1
ger de German 1 1
gez Geez 0 0
gil Gilbertese 0 0
gla gd Gaelic 0 0
gle ga Irish 0 0
glg gl Galician 1 1
glv gv Manx 0 0
gmh German, Middle High (ca.1050-1500) 0 0
goh German, Old High (ca.750-1050) 0 0
gon Gondi 0 0
gor Gorontalo 0 0
got Gothic 0 0
grb Grebo 0 0
grc Greek, Ancient (to 1453) 0 0
ell el Greek 1 1
grn gn Guarani 0 0
guj gu Gujarati 0 0
gwi Gwich´in 0 0
hai Haida 0 0
hat ht Haitian 0 0
hau ha Hausa 0 0
haw Hawaiian 0 0
heb he Hebrew 1 1
her hz Herero 0 0
hil Hiligaynon 0 0
him Himachali 0 0
hin hi Hindi 1 1
hit Hittite 0 0
hmn Hmong 0 0
hmo ho Hiri Motu 0 0
hrv hr Croatian 1 1
hun hu Hungarian 1 1
hup Hupa 0 0
iba Iban 0 0
ibo ig Igbo 0 0
ice is Icelandic 1 1
ido io Ido 0 0
iii ii Sichuan Yi 0 0
ijo Ijo 0 0
iku iu Inuktitut 0 0
ile ie Interlingue 0 0
ilo Iloko 0 0
ina ia Interlingua (International Auxiliary Language Asso 0 0
inc Indic (Other) 0 0
ind id Indonesian 1 1
ine Indo-European (Other) 0 0
inh Ingush 0 0
ipk ik Inupiaq 0 0
ira Iranian (Other) 0 0
iro Iroquoian languages 0 0
ita it Italian 1 1
jav jv Javanese 0 0
jpn ja Japanese 1 1
jpr Judeo-Persian 0 0
jrb Judeo-Arabic 0 0
kaa Kara-Kalpak 0 0
kab Kabyle 0 0
kac Kachin 0 0
kal kl Kalaallisut 0 0
kam Kamba 0 0
kan kn Kannada 0 0
kar Karen 0 0
kas ks Kashmiri 0 0
kau kr Kanuri 0 0
kaw Kawi 0 0
kaz kk Kazakh 1 0
kbd Kabardian 0 0
kha Khasi 0 0
khi Khoisan (Other) 0 0
khm km Khmer 1 1
kho Khotanese 0 0
kik ki Kikuyu 0 0
kin rw Kinyarwanda 0 0
kir ky Kirghiz 0 0
kmb Kimbundu 0 0
kok Konkani 0 0
kom kv Komi 0 0
kon kg Kongo 0 0
kor ko Korean 1 1
kos Kosraean 0 0
kpe Kpelle 0 0
krc Karachay-Balkar 0 0
kro Kru 0 0
kru Kurukh 0 0
kua kj Kuanyama 0 0
kum Kumyk 0 0
kur ku Kurdish 0 0
kut Kutenai 0 0
lad Ladino 0 0
lah Lahnda 0 0
lam Lamba 0 0
lao lo Lao 0 0
lat la Latin 0 0
lav lv Latvian 1 0
lez Lezghian 0 0
lim li Limburgan 0 0
lin ln Lingala 0 0
lit lt Lithuanian 1 0
lol Mongo 0 0
loz Lozi 0 0
ltz lb Luxembourgish 1 0
lua Luba-Lulua 0 0
lub lu Luba-Katanga 0 0
lug lg Ganda 0 0
lui Luiseno 0 0
lun Lunda 0 0
luo Luo (Kenya and Tanzania) 0 0
lus lushai 0 0
mac mk Macedonian 1 1
mad Madurese 0 0
mag Magahi 0 0
mah mh Marshallese 0 0
mai Maithili 0 0
mak Makasar 0 0
mal ml Malayalam 1 0
man Mandingo 0 0
mao mi Maori 0 0
map Austronesian (Other) 0 0
mar mr Marathi 0 0
mas Masai 0 0
may ms Malay 1 1
mdf Moksha 0 0
mdr Mandar 0 0
men Mende 0 0
mga Irish, Middle (900-1200) 0 0
mic Mi'kmaq 0 0
min Minangkabau 0 0
mis Miscellaneous languages 0 0
mkh Mon-Khmer (Other) 0 0
mlg mg Malagasy 0 0
mlt mt Maltese 0 0
mnc Manchu 0 0
mni Manipuri 0 0
mno Manobo languages 0 0
moh Mohawk 0 0
mol mo Moldavian 0 0
mon mn Mongolian 1 0
mos Mossi 0 0
mwl Mirandese 0 0
mul Multiple languages 0 0
mun Munda languages 0 0
mus Creek 0 0
mwr Marwari 0 0
myn Mayan languages 0 0
myv Erzya 0 0
nah Nahuatl 0 0
nai North American Indian 0 0
nap Neapolitan 0 0
nau na Nauru 0 0
nav nv Navajo 0 0
nbl nr Ndebele, South 0 0
nde nd Ndebele, North 0 0
ndo ng Ndonga 0 0
nds Low German 0 0
nep ne Nepali 0 0
new Nepal Bhasa 0 0
nia Nias 0 0
nic Niger-Kordofanian (Other) 0 0
niu Niuean 0 0
nno nn Norwegian Nynorsk 0 0
nob nb Norwegian Bokmal 0 0
nog Nogai 0 0
non Norse, Old 0 0
nor no Norwegian 1 1
nso Northern Sotho 0 0
nub Nubian languages 0 0
nwc Classical Newari 0 0
nya ny Chichewa 0 0
nym Nyamwezi 0 0
nyn Nyankole 0 0
nyo Nyoro 0 0
nzi Nzima 0 0
oci oc Occitan 1 1
oji oj Ojibwa 0 0
ori or Oriya 0 0
orm om Oromo 0 0
osa Osage 0 0
oss os Ossetian 0 0
ota Turkish, Ottoman (1500-1928) 0 0
oto Otomian languages 0 0
paa Papuan (Other) 0 0
pag Pangasinan 0 0
pal Pahlavi 0 0
pam Pampanga 0 0
pan pa Panjabi 0 0
pap Papiamento 0 0
pau Palauan 0 0
peo Persian, Old (ca.600-400 B.C.) 0 0
per fa Persian 1 1
phi Philippine (Other) 0 0
phn Phoenician 0 0
pli pi Pali 0 0
pol pl Polish 1 1
pon Pohnpeian 0 0
por pt Portuguese 1 1
pra Prakrit languages 0 0
pro Provençal, Old (to 1500) 0 0
pus ps Pushto 0 0
que qu Quechua 0 0
raj Rajasthani 0 0
rap Rapanui 0 0
rar Rarotongan 0 0
roa Romance (Other) 0 0
roh rm Raeto-Romance 0 0
rom Romany 0 0
run rn Rundi 0 0
rup Aromanian 0 0
rus ru Russian 1 1
sad Sandawe 0 0
sag sg Sango 0 0
sah Yakut 0 0
sai South American Indian (Other) 0 0
sal Salishan languages 0 0
sam Samaritan Aramaic 0 0
san sa Sanskrit 0 0
sas Sasak 0 0
sat Santali 0 0
scc sr Serbian 1 1
scn Sicilian 0 0
sco Scots 0 0
sel Selkup 0 0
sem Semitic (Other) 0 0
sga Irish, Old (to 900) 0 0
sgn Sign Languages 0 0
shn Shan 0 0
sid Sidamo 0 0
sin si Sinhalese 1 1
sio Siouan languages 0 0
sit Sino-Tibetan (Other) 0 0
sla Slavic (Other) 0 0
slo sk Slovak 1 1
slv sl Slovenian 1 1
sma Southern Sami 0 0
sme se Northern Sami 0 0
smi Sami languages (Other) 0 0
smj Lule Sami 0 0
smn Inari Sami 0 0
smo sm Samoan 0 0
sms Skolt Sami 0 0
sna sn Shona 0 0
snd sd Sindhi 0 0
snk Soninke 0 0
sog Sogdian 0 0
som so Somali 0 0
son Songhai 0 0
sot st Sotho, Southern 0 0
spa es Spanish 1 1
srd sc Sardinian 0 0
srr Serer 0 0
ssa Nilo-Saharan (Other) 0 0
ssw ss Swati 0 0
suk Sukuma 0 0
sun su Sundanese 0 0
sus Susu 0 0
sux Sumerian 0 0
swa sw Swahili 1 0
swe sv Swedish 1 1
syr Syriac 1 0
tah ty Tahitian 0 0
tai Tai (Other) 0 0
tam ta Tamil 1 0
tat tt Tatar 0 0
tel te Telugu 1 0
tem Timne 0 0
ter Tereno 0 0
tet Tetum 0 0
tgk tg Tajik 0 0
tgl tl Tagalog 1 1
tha th Thai 1 1
tib bo Tibetan 0 0
tig Tigre 0 0
tir ti Tigrinya 0 0
tiv Tiv 0 0
tkl Tokelau 0 0
tlh Klingon 0 0
tli Tlingit 0 0
tmh Tamashek 0 0
tog Tonga (Nyasa) 0 0
ton to Tonga (Tonga Islands) 0 0
tpi Tok Pisin 0 0
tsi Tsimshian 0 0
tsn tn Tswana 0 0
tso ts Tsonga 0 0
tuk tk Turkmen 0 0
tum Tumbuka 0 0
tup Tupi languages 0 0
tur tr Turkish 1 1
tut Altaic (Other) 0 0
tvl Tuvalu 0 0
twi tw Twi 0 0
tyv Tuvinian 0 0
udm Udmurt 0 0
uga Ugaritic 0 0
uig ug Uighur 0 0
ukr uk Ukrainian 1 1
umb Umbundu 0 0
und Undetermined 0 0
urd ur Urdu 1 0
uzb uz Uzbek 0 0
vai Vai 0 0
ven ve Venda 0 0
vie vi Vietnamese 1 1
vol vo Volapük 0 0
vot Votic 0 0
wak Wakashan languages 0 0
wal Walamo 0 0
war Waray 0 0
was Washo 0 0
wel cy Welsh 0 0
wen Sorbian languages 0 0
wln wa Walloon 0 0
wol wo Wolof 0 0
xal Kalmyk 0 0
xho xh Xhosa 0 0
yao Yao 0 0
yap Yapese 0 0
yid yi Yiddish 0 0
yor yo Yoruba 0 0
ypk Yupik languages 0 0
zap Zapotec 0 0
zen Zenaga 0 0
zha za Zhuang 0 0
znd Zande 0 0
zul zu Zulu 0 0
zun Zuni 0 0
rum ro Romanian 1 1
pob pb Brazilian 1 1
mne Montenegrin 1 0
@@ -1,85 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
class Error(Exception):
"""Base class for all exceptions in babelfish"""
pass
class LanguageError(Error, AttributeError):
"""Base class for all language exceptions in babelfish"""
pass
class LanguageConvertError(LanguageError):
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
:param string alpha3: alpha3 code that failed conversion
:param country: country code that failed conversion, if any
:type country: string or None
:param script: script code that failed conversion, if any
:type script: string or None
"""
def __init__(self, alpha3, country=None, script=None):
self.alpha3 = alpha3
self.country = country
self.script = script
def __str__(self):
s = self.alpha3
if self.country is not None:
s += '-' + self.country
if self.script is not None:
s += '-' + self.script
return s
class LanguageReverseError(LanguageError):
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
:param string code: code that failed reverse conversion
"""
def __init__(self, code):
self.code = code
def __str__(self):
return repr(self.code)
class CountryError(Error, AttributeError):
"""Base class for all country exceptions in babelfish"""
pass
class CountryConvertError(CountryError):
"""Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
:param string alpha2: alpha2 code that failed conversion
"""
def __init__(self, alpha2):
self.alpha2 = alpha2
def __str__(self):
return self.alpha2
class CountryReverseError(CountryError):
"""Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
:param string code: code that failed reverse conversion
"""
def __init__(self, code):
self.code = code
def __str__(self):
return repr(self.code)
@@ -1,185 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from functools import partial
from pkg_resources import resource_stream # @UnresolvedImport
from .converters import ConverterManager
from .country import Country
from .exceptions import LanguageConvertError
from .script import Script
from . import basestr
LANGUAGES = set()
LANGUAGE_MATRIX = []
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
f = resource_stream('babelfish', 'data/iso-639-3.tab')
f.readline()
for l in f:
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
LANGUAGES.add(iso_language.alpha3)
LANGUAGE_MATRIX.append(iso_language)
f.close()
class LanguageConverterManager(ConverterManager):
""":class:`~babelfish.converters.ConverterManager` for language converters"""
entry_point = 'babelfish.language_converters'
internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
'name = babelfish.converters.name:NameConverter',
'scope = babelfish.converters.scope:ScopeConverter',
'type = babelfish.converters.type:LanguageTypeConverter',
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
language_converters = LanguageConverterManager()
class LanguageMeta(type):
"""The :class:`Language` metaclass
Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
"""
def __getattr__(cls, name):
if name.startswith('from'):
return partial(cls.fromcode, converter=name[4:])
return type.__getattribute__(cls, name)
class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
"""A human language
A human language is composed of a language part following the ISO-639
standard and can be country-specific when a :class:`~babelfish.country.Country`
is specified.
The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
:param string language: the language as a 3-letter ISO-639-3 code
:param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
:type country: string or :class:`~babelfish.country.Country` or None
:param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
:type script: string or :class:`~babelfish.script.Script` or None
:param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
:type unknown: string or None
:raise: ValueError if the language could not be recognized and `unknown` is ``None``
"""
def __init__(self, language, country=None, script=None, unknown=None):
if unknown is not None and language not in LANGUAGES:
language = unknown
if language not in LANGUAGES:
raise ValueError('%r is not a valid language' % language)
self.alpha3 = language
self.country = None
if isinstance(country, Country):
self.country = country
elif country is None:
self.country = None
else:
self.country = Country(country)
self.script = None
if isinstance(script, Script):
self.script = script
elif script is None:
self.script = None
else:
self.script = Script(script)
@classmethod
def fromcode(cls, code, converter):
"""Create a :class:`Language` by its `code` using `converter` to
:meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
:param string code: the code to reverse
:param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
:return: the corresponding :class:`Language` instance
:rtype: :class:`Language`
"""
return cls(*language_converters[converter].reverse(code))
@classmethod
def fromietf(cls, ietf):
"""Create a :class:`Language` by from an IETF language code
:param string ietf: the ietf code
:return: the corresponding :class:`Language` instance
:rtype: :class:`Language`
"""
subtags = ietf.split('-')
language_subtag = subtags.pop(0).lower()
if len(language_subtag) == 2:
language = cls.fromalpha2(language_subtag)
else:
language = cls(language_subtag)
while subtags:
subtag = subtags.pop(0)
if len(subtag) == 2:
language.country = Country(subtag.upper())
else:
language.script = Script(subtag.capitalize())
if language.script is not None:
if subtags:
raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
break
return language
def __getstate__(self):
return self.alpha3, self.country, self.script
def __setstate__(self, state):
self.alpha3, self.country, self.script = state
def __getattr__(self, name):
alpha3 = self.alpha3
country = self.country.alpha2 if self.country is not None else None
script = self.script.code if self.script is not None else None
try:
return language_converters[name].convert(alpha3, country, script)
except KeyError:
raise AttributeError(name)
def __hash__(self):
return hash(str(self))
def __eq__(self, other):
if isinstance(other, basestr):
return str(self) == other
if not isinstance(other, Language):
return False
return (self.alpha3 == other.alpha3 and
self.country == other.country and
self.script == other.script)
def __ne__(self, other):
return not self == other
def __bool__(self):
return self.alpha3 != 'und'
__nonzero__ = __bool__
def __repr__(self):
return '<Language [%s]>' % self
def __str__(self):
try:
s = self.alpha2
except LanguageConvertError:
s = self.alpha3
if self.country is not None:
s += '-' + str(self.country)
if self.script is not None:
s += '-' + str(self.script)
return s
@@ -1,76 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from pkg_resources import resource_stream # @UnresolvedImport
from . import basestr
#: Script code to script name mapping
SCRIPTS = {}
#: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
SCRIPT_MATRIX = []
#: The namedtuple used in the :data:`SCRIPT_MATRIX`
IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
f = resource_stream('babelfish', 'data/iso15924-utf8-20131012.txt')
f.readline()
for l in f:
l = l.decode('utf-8').strip()
if not l or l.startswith('#'):
continue
script = IsoScript._make(l.split(';'))
SCRIPT_MATRIX.append(script)
SCRIPTS[script.code] = script.name
f.close()
class Script(object):
"""A human writing system
A script is represented by a 4-letter code from the ISO-15924 standard
:param string script: 4-letter ISO-15924 script code
"""
def __init__(self, script):
if script not in SCRIPTS:
raise ValueError('%r is not a valid script' % script)
#: ISO-15924 4-letter script code
self.code = script
@property
def name(self):
"""English name of the script"""
return SCRIPTS[self.code]
def __getstate__(self):
return self.code
def __setstate__(self, state):
self.code = state
def __hash__(self):
return hash(self.code)
def __eq__(self, other):
if isinstance(other, basestr):
return self.code == other
if not isinstance(other, Script):
return False
return self.code == other.code
def __ne__(self, other):
return not self == other
def __repr__(self):
return '<Script [%s]>' % self
def __str__(self):
return self.code
@@ -1,368 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
import re
import sys
import pickle
from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
from pkg_resources import resource_stream # @UnresolvedImport
from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
if sys.version_info[:2] <= (2, 6):
_MAX_LENGTH = 80
def safe_repr(obj, short=False):
try:
result = repr(obj)
except Exception:
result = object.__repr__(obj)
if not short or len(result) < _MAX_LENGTH:
return result
return result[:_MAX_LENGTH] + ' [truncated]...'
class _AssertRaisesContext(object):
"""A context manager used to implement TestCase.assertRaises* methods."""
def __init__(self, expected, test_case, expected_regexp=None):
self.expected = expected
self.failureException = test_case.failureException
self.expected_regexp = expected_regexp
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, tb):
if exc_type is None:
try:
exc_name = self.expected.__name__
except AttributeError:
exc_name = str(self.expected)
raise self.failureException(
"{0} not raised".format(exc_name))
if not issubclass(exc_type, self.expected):
# let unexpected exceptions pass through
return False
self.exception = exc_value # store for later retrieval
if self.expected_regexp is None:
return True
expected_regexp = self.expected_regexp
if isinstance(expected_regexp, basestring):
expected_regexp = re.compile(expected_regexp)
if not expected_regexp.search(str(exc_value)):
raise self.failureException('"%s" does not match "%s"' %
(expected_regexp.pattern, str(exc_value)))
return True
class _Py26FixTestCase(object):
def assertIsNone(self, obj, msg=None):
"""Same as self.assertTrue(obj is None), with a nicer default message."""
if obj is not None:
standardMsg = '%s is not None' % (safe_repr(obj),)
self.fail(self._formatMessage(msg, standardMsg))
def assertIsNotNone(self, obj, msg=None):
"""Included for symmetry with assertIsNone."""
if obj is None:
standardMsg = 'unexpectedly None'
self.fail(self._formatMessage(msg, standardMsg))
def assertIn(self, member, container, msg=None):
"""Just like self.assertTrue(a in b), but with a nicer default message."""
if member not in container:
standardMsg = '%s not found in %s' % (safe_repr(member),
safe_repr(container))
self.fail(self._formatMessage(msg, standardMsg))
def assertNotIn(self, member, container, msg=None):
"""Just like self.assertTrue(a not in b), but with a nicer default message."""
if member in container:
standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
safe_repr(container))
self.fail(self._formatMessage(msg, standardMsg))
def assertIs(self, expr1, expr2, msg=None):
"""Just like self.assertTrue(a is b), but with a nicer default message."""
if expr1 is not expr2:
standardMsg = '%s is not %s' % (safe_repr(expr1),
safe_repr(expr2))
self.fail(self._formatMessage(msg, standardMsg))
def assertIsNot(self, expr1, expr2, msg=None):
"""Just like self.assertTrue(a is not b), but with a nicer default message."""
if expr1 is expr2:
standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
self.fail(self._formatMessage(msg, standardMsg))
else:
class _Py26FixTestCase(object):
pass
class TestScript(TestCase, _Py26FixTestCase):
def test_wrong_script(self):
self.assertRaises(ValueError, lambda: Script('Azer'))
def test_eq(self):
self.assertEqual(Script('Latn'), Script('Latn'))
def test_ne(self):
self.assertNotEqual(Script('Cyrl'), Script('Latn'))
def test_hash(self):
self.assertEqual(hash(Script('Hira')), hash('Hira'))
def test_pickle(self):
self.assertEqual(pickle.loads(pickle.dumps(Script('Latn'))), Script('Latn'))
class TestCountry(TestCase, _Py26FixTestCase):
def test_wrong_country(self):
self.assertRaises(ValueError, lambda: Country('ZZ'))
def test_eq(self):
self.assertEqual(Country('US'), Country('US'))
def test_ne(self):
self.assertNotEqual(Country('GB'), Country('US'))
self.assertIsNotNone(Country('US'))
def test_hash(self):
self.assertEqual(hash(Country('US')), hash('US'))
def test_pickle(self):
for country in [Country('GB'), Country('US')]:
self.assertEqual(pickle.loads(pickle.dumps(country)), country)
def test_converter_name(self):
self.assertEqual(Country('US').name, 'UNITED STATES')
self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
self.assertEqual(len(country_converters['name'].codes), 249)
class TestLanguage(TestCase, _Py26FixTestCase):
def test_languages(self):
self.assertEqual(len(LANGUAGES), 7874)
def test_wrong_language(self):
self.assertRaises(ValueError, lambda: Language('zzz'))
def test_unknown_language(self):
self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
def test_converter_alpha2(self):
self.assertEqual(Language('eng').alpha2, 'en')
self.assertEqual(Language.fromalpha2('en'), Language('eng'))
self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
self.assertEqual(len(language_converters['alpha2'].codes), 184)
def test_converter_alpha3b(self):
self.assertEqual(Language('fra').alpha3b, 'fre')
self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
self.assertEqual(len(language_converters['alpha3b'].codes), 418)
def test_converter_alpha3t(self):
self.assertEqual(Language('fra').alpha3t, 'fra')
self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
self.assertEqual(len(language_converters['alpha3t'].codes), 418)
def test_converter_name(self):
self.assertEqual(Language('eng').name, 'English')
self.assertEqual(Language.fromname('English'), Language('eng'))
self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
self.assertEqual(len(language_converters['name'].codes), 7874)
def test_converter_scope(self):
self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
self.assertEqual(Language('eng').scope, 'individual')
self.assertEqual(Language('und').scope, 'special')
def test_converter_type(self):
self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
self.assertEqual(Language('eng').type, 'living')
self.assertEqual(Language('und').type, 'special')
def test_converter_opensubtitles(self):
self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
# Montenegrin is not recognized as an ISO language (yet?) but for now it is
# unofficially accepted as Serbian from Montenegro
self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
self.assertEqual(len(language_converters['opensubtitles'].codes), 606)
# test with all the LANGUAGES from the opensubtitles api
# downloaded from: http://www.opensubtitles.org/addons/export_languages.php
f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
f.readline()
for l in f:
idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
if not int(upload_enabled) and not int(web_enabled):
# do not test LANGUAGES that are too esoteric / not widely available
continue
self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
if alpha2:
self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
f.close()
def test_fromietf_country_script(self):
language = Language.fromietf('fra-FR-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_country_no_script(self):
language = Language.fromietf('fra-FR')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertIsNone(language.script)
def test_fromietf_no_country_no_script(self):
language = Language.fromietf('fra-FR')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertIsNone(language.script)
def test_fromietf_no_country_script(self):
language = Language.fromietf('fra-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertIsNone(language.country)
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_alpha2_language(self):
language = Language.fromietf('fr-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertIsNone(language.country)
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_wrong_language(self):
self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
def test_fromietf_wrong_country(self):
self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
def test_fromietf_wrong_script(self):
self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
def test_eq(self):
self.assertEqual(Language('eng'), Language('eng'))
def test_ne(self):
self.assertNotEqual(Language('fra'), Language('eng'))
self.assertIsNotNone(Language('fra'))
def test_nonzero(self):
self.assertFalse(bool(Language('und')))
self.assertTrue(bool(Language('eng')))
def test_language_hasattr(self):
self.assertTrue(hasattr(Language('fra'), 'alpha3'))
self.assertTrue(hasattr(Language('fra'), 'alpha2'))
self.assertFalse(hasattr(Language('bej'), 'alpha2'))
def test_country(self):
self.assertEqual(Language('por', 'BR').country, Country('BR'))
self.assertEqual(Language('eng', Country('US')).country, Country('US'))
def test_eq_with_country(self):
self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
def test_ne_with_country(self):
self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
def test_script(self):
self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
def test_eq_with_script(self):
self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
def test_ne_with_script(self):
self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
def test_eq_with_country_and_script(self):
self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
def test_ne_with_country_and_script(self):
self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
def test_hash(self):
self.assertEqual(hash(Language('fra')), hash('fr'))
self.assertEqual(hash(Language('ace')), hash('ace'))
self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
def test_pickle(self):
for lang in [Language('fra'),
Language('eng', 'US'),
Language('srp', script='Latn'),
Language('eng', 'US', 'Latn')]:
self.assertEqual(pickle.loads(pickle.dumps(lang)), lang)
def test_str(self):
self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
def test_register_converter(self):
class TestConverter(LanguageReverseConverter):
def __init__(self):
self.to_test = {'fra': 'test1', 'eng': 'test2'}
self.from_test = {'test1': 'fra', 'test2': 'eng'}
def convert(self, alpha3, country=None, script=None):
if alpha3 not in self.to_test:
raise LanguageConvertError(alpha3, country, script)
return self.to_test[alpha3]
def reverse(self, test):
if test not in self.from_test:
raise LanguageReverseError(test)
return (self.from_test[test], None)
language = Language('fra')
self.assertFalse(hasattr(language, 'test'))
language_converters['test'] = TestConverter()
self.assertTrue(hasattr(language, 'test'))
self.assertIn('test', language_converters)
self.assertEqual(Language('fra').test, 'test1')
self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
del language_converters['test']
self.assertNotIn('test', language_converters)
self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
self.assertRaises(AttributeError, lambda: Language('fra').test)
def suite():
suite = TestSuite()
suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
return suite
if __name__ == '__main__':
TextTestRunner().run(suite())
+18
View File
@@ -0,0 +1,18 @@
"""Provide a (g)dbm-compatible interface to bsddb.hashopen."""
import sys
import warnings
warnings.warnpy3k("in 3.x, the dbhash module has been removed", stacklevel=2)
try:
import bsddb
except ImportError:
# prevent a second import of this module from spuriously succeeding
del sys.modules[__name__]
raise
__all__ = ["error","open"]
error = bsddb.error # Exported for anydbm
def open(file, flag = 'r', mode=0666):
return bsddb.hashopen(file, flag, mode)
@@ -1,27 +0,0 @@
Copyright (c) 2011-2014 Mike Bayer
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author or contributors may not be used to endorse or
promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
@@ -1,91 +0,0 @@
dogpile.cache
=============
A caching API built around the concept of a "dogpile lock", which allows
continued access to an expiring data value while a single thread generates a
new value.
dogpile.cache builds on the `dogpile.core <http://pypi.python.org/pypi/dogpile.core>`_
locking system, which implements the idea of "allow one creator to write while
others read" in the abstract. Overall, dogpile.cache is intended as a
replacement to the `Beaker <http://beaker.groovie.org>`_ caching system, the internals
of which are written by the same author. All the ideas of Beaker which "work"
are re-implemented in dogpile.cache in a more efficient and succinct manner,
and all the cruft (Beaker's internals were first written in 2005) relegated
to the trash heap.
Features
--------
* A succinct API which encourages up-front configuration of pre-defined
"regions", each one defining a set of caching characteristics including
storage backend, configuration options, and default expiration time.
* A standard get/set/delete API as well as a function decorator API is
provided.
* The mechanics of key generation are fully customizable. The function
decorator API features a pluggable "key generator" to customize how
cache keys are made to correspond to function calls, and an optional
"key mangler" feature provides for pluggable mangling of keys
(such as encoding, SHA-1 hashing) as desired for each region.
* The dogpile lock, first developed as the core engine behind the Beaker
caching system, here vastly simplified, improved, and better tested.
Some key performance
issues that were intrinsic to Beaker's architecture, particularly that
values would frequently be "double-fetched" from the cache, have been fixed.
* Backends implement their own version of a "distributed" lock, where the
"distribution" matches the backend's storage system. For example, the
memcached backends allow all clients to coordinate creation of values
using memcached itself. The dbm file backend uses a lockfile
alongside the dbm file. New backends, such as a Redis-based backend,
can provide their own locking mechanism appropriate to the storage
engine.
* Writing new backends or hacking on the existing backends is intended to be
routine - all that's needed are basic get/set/delete methods. A distributed
lock tailored towards the backend is an optional addition, else dogpile uses
a regular thread mutex. New backends can be registered with dogpile.cache
directly or made available via setuptools entry points.
* Included backends feature three memcached backends (python-memcached, pylibmc,
bmemcached), a Redis backend, a backend based on Python's
anydbm, and a plain dictionary backend.
* Space for third party plugins, including the first which provides the
dogpile.cache engine to Mako templates.
* Python 3 compatible in place - no 2to3 required.
Synopsis
--------
dogpile.cache features a single public usage object known as the ``CacheRegion``.
This object then refers to a particular ``CacheBackend``. Typical usage
generates a region using ``make_region()``, which can then be used at the
module level to decorate functions, or used directly in code with a traditional
get/set interface. Configuration of the backend is applied to the region
using ``configure()`` or ``configure_from_config()``, allowing deferred
config-file based configuration to occur after modules have been imported::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.pylibmc',
expiration_time = 3600,
arguments = {
'url':["127.0.0.1"],
'binary':True,
'behaviors':{"tcp_nodelay": True,"ketama":True}
}
)
@region.cache_on_arguments()
def load_user_info(user_id):
return some_database.lookup_user_by_id(user_id)
Documentation
-------------
See dogpile.cache's full documentation at
`dogpile.cache documentation <http://dogpilecache.readthedocs.org>`_.
@@ -1,6 +0,0 @@
# See http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages
try:
__import__('pkg_resources').declare_namespace(__name__)
except ImportError:
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
@@ -1,3 +0,0 @@
__version__ = '0.5.4'
from .region import CacheRegion, register_backend, make_region
-193
View File
@@ -1,193 +0,0 @@
import operator
from .compat import py3k
class NoValue(object):
"""Describe a missing cache value.
The :attr:`.NO_VALUE` module global
should be used.
"""
@property
def payload(self):
return self
if py3k:
def __bool__(self): #pragma NO COVERAGE
return False
else:
def __nonzero__(self): #pragma NO COVERAGE
return False
NO_VALUE = NoValue()
"""Value returned from ``get()`` that describes
a key not present."""
class CachedValue(tuple):
"""Represent a value stored in the cache.
:class:`.CachedValue` is a two-tuple of
``(payload, metadata)``, where ``metadata``
is dogpile.cache's tracking information (
currently the creation time). The metadata
and tuple structure is pickleable, if
the backend requires serialization.
"""
payload = property(operator.itemgetter(0))
"""Named accessor for the payload."""
metadata = property(operator.itemgetter(1))
"""Named accessor for the dogpile.cache metadata dictionary."""
def __new__(cls, payload, metadata):
return tuple.__new__(cls, (payload, metadata))
def __reduce__(self):
return CachedValue, (self.payload, self.metadata)
class CacheBackend(object):
"""Base class for backend implementations."""
key_mangler = None
"""Key mangling function.
May be None, or otherwise declared
as an ordinary instance method.
"""
def __init__(self, arguments): #pragma NO COVERAGE
"""Construct a new :class:`.CacheBackend`.
Subclasses should override this to
handle the given arguments.
:param arguments: The ``arguments`` parameter
passed to :func:`.make_registry`.
"""
raise NotImplementedError()
@classmethod
def from_config_dict(cls, config_dict, prefix):
prefix_len = len(prefix)
return cls(
dict(
(key[prefix_len:], config_dict[key])
for key in config_dict
if key.startswith(prefix)
)
)
def get_mutex(self, key):
"""Return an optional mutexing object for the given key.
This object need only provide an ``acquire()``
and ``release()`` method.
May return ``None``, in which case the dogpile
lock will use a regular ``threading.Lock``
object to mutex concurrent threads for
value creation. The default implementation
returns ``None``.
Different backends may want to provide various
kinds of "mutex" objects, such as those which
link to lock files, distributed mutexes,
memcached semaphores, etc. Whatever
kind of system is best suited for the scope
and behavior of the caching backend.
A mutex that takes the key into account will
allow multiple regenerate operations across
keys to proceed simultaneously, while a mutex
that does not will serialize regenerate operations
to just one at a time across all keys in the region.
The latter approach, or a variant that involves
a modulus of the given key's hash value,
can be used as a means of throttling the total
number of value recreation operations that may
proceed at one time.
"""
return None
def get(self, key): #pragma NO COVERAGE
"""Retrieve a value from the cache.
The returned value should be an instance of
:class:`.CachedValue`, or ``NO_VALUE`` if
not present.
"""
raise NotImplementedError()
def get_multi(self, keys): #pragma NO COVERAGE
"""Retrieve multiple values from the cache.
The returned value should be a list, corresponding
to the list of keys given.
.. versionadded:: 0.5.0
"""
raise NotImplementedError()
def set(self, key, value): #pragma NO COVERAGE
"""Set a value in the cache.
The key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The value will always be an instance
of :class:`.CachedValue`.
"""
raise NotImplementedError()
def set_multi(self, mapping): #pragma NO COVERAGE
"""Set multiple values in the cache.
The key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The value will always be an instance
of :class:`.CachedValue`.
.. versionadded:: 0.5.0
"""
raise NotImplementedError()
def delete(self, key): #pragma NO COVERAGE
"""Delete a value from the cache.
The key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The behavior here should be idempotent,
that is, can be called any number of times
regardless of whether or not the
key exists.
"""
raise NotImplementedError()
def delete_multi(self, keys): #pragma NO COVERAGE
"""Delete multiple values from the cache.
The key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The behavior here should be idempotent,
that is, can be called any number of times
regardless of whether or not the
key exists.
.. versionadded:: 0.5.0
"""
raise NotImplementedError()
@@ -1,10 +0,0 @@
from dogpile.cache.region import register_backend
register_backend("dogpile.cache.null", "dogpile.cache.backends.null", "NullBackend")
register_backend("dogpile.cache.dbm", "dogpile.cache.backends.file", "DBMBackend")
register_backend("dogpile.cache.pylibmc", "dogpile.cache.backends.memcached", "PylibmcBackend")
register_backend("dogpile.cache.bmemcached", "dogpile.cache.backends.memcached", "BMemcachedBackend")
register_backend("dogpile.cache.memcached", "dogpile.cache.backends.memcached", "MemcachedBackend")
register_backend("dogpile.cache.memory", "dogpile.cache.backends.memory", "MemoryBackend")
register_backend("dogpile.cache.memory_pickle", "dogpile.cache.backends.memory", "MemoryPickleBackend")
register_backend("dogpile.cache.redis", "dogpile.cache.backends.redis", "RedisBackend")
@@ -1,441 +0,0 @@
"""
File Backends
------------------
Provides backends that deal with local filesystem access.
"""
from __future__ import with_statement
from dogpile.cache.api import CacheBackend, NO_VALUE
from contextlib import contextmanager
from dogpile.cache import compat
from dogpile.cache import util
import os
__all__ = 'DBMBackend', 'FileLock', 'AbstractFileLock'
class DBMBackend(CacheBackend):
"""A file-backend using a dbm file to store keys.
Basic usage::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.dbm',
expiration_time = 3600,
arguments = {
"filename":"/path/to/cachefile.dbm"
}
)
DBM access is provided using the Python ``anydbm`` module,
which selects a platform-specific dbm module to use.
This may be made to be more configurable in a future
release.
Note that different dbm modules have different behaviors.
Some dbm implementations handle their own locking, while
others don't. The :class:`.DBMBackend` uses a read/write
lockfile by default, which is compatible even with those
DBM implementations for which this is unnecessary,
though the behavior can be disabled.
The DBM backend by default makes use of two lockfiles.
One is in order to protect the DBM file itself from
concurrent writes, the other is to coordinate
value creation (i.e. the dogpile lock). By default,
these lockfiles use the ``flock()`` system call
for locking; this is **only available on Unix
platforms**. An alternative lock implementation, such as one
which is based on threads or uses a third-party system
such as `portalocker <https://pypi.python.org/pypi/portalocker>`_,
can be dropped in using the ``lock_factory`` argument
in conjunction with the :class:`.AbstractFileLock` base class.
Currently, the dogpile lock is against the entire
DBM file, not per key. This means there can
only be one "creator" job running at a time
per dbm file.
A future improvement might be to have the dogpile lock
using a filename that's based on a modulus of the key.
Locking on a filename that uniquely corresponds to the
key is problematic, since it's not generally safe to
delete lockfiles as the application runs, implying an
unlimited number of key-based files would need to be
created and never deleted.
Parameters to the ``arguments`` dictionary are
below.
:param filename: path of the filename in which to
create the DBM file. Note that some dbm backends
will change this name to have additional suffixes.
:param rw_lockfile: the name of the file to use for
read/write locking. If omitted, a default name
is used by appending the suffix ".rw.lock" to the
DBM filename. If False, then no lock is used.
:param dogpile_lockfile: the name of the file to use
for value creation, i.e. the dogpile lock. If
omitted, a default name is used by appending the
suffix ".dogpile.lock" to the DBM filename. If
False, then dogpile.cache uses the default dogpile
lock, a plain thread-based mutex.
:param lock_factory: a function or class which provides
for a read/write lock. Defaults to :class:`.FileLock`.
Custom implementations need to implement context-manager
based ``read()`` and ``write()`` functions - the
:class:`.AbstractFileLock` class is provided as a base class
which provides these methods based on individual read/write lock
functions. E.g. to replace the lock with the dogpile.core
:class:`.ReadWriteMutex`::
from dogpile.core.readwrite_lock import ReadWriteMutex
from dogpile.cache.backends.file import AbstractFileLock
class MutexLock(AbstractFileLock):
def __init__(self, filename):
self.mutex = ReadWriteMutex()
def acquire_read_lock(self, wait):
ret = self.mutex.acquire_read_lock(wait)
return wait or ret
def acquire_write_lock(self, wait):
ret = self.mutex.acquire_write_lock(wait)
return wait or ret
def release_read_lock(self):
return self.mutex.release_read_lock()
def release_write_lock(self):
return self.mutex.release_write_lock()
from dogpile.cache import make_region
region = make_region().configure(
"dogpile.cache.dbm",
expiration_time=300,
arguments={
"filename": "file.dbm",
"lock_factory": MutexLock
}
)
While the included :class:`.FileLock` uses ``os.flock()``, a
windows-compatible implementation can be built using a library
such as `portalocker <https://pypi.python.org/pypi/portalocker>`_.
.. versionadded:: 0.5.2
"""
def __init__(self, arguments):
self.filename = os.path.abspath(
os.path.normpath(arguments['filename'])
)
dir_, filename = os.path.split(self.filename)
self.lock_factory = arguments.get("lock_factory", FileLock)
self._rw_lock = self._init_lock(
arguments.get('rw_lockfile'),
".rw.lock", dir_, filename)
self._dogpile_lock = self._init_lock(
arguments.get('dogpile_lockfile'),
".dogpile.lock",
dir_, filename,
util.KeyReentrantMutex.factory)
# TODO: make this configurable
if compat.py3k:
import dbm
else:
import anydbm as dbm
self.dbmmodule = dbm
self._init_dbm_file()
def _init_lock(self, argument, suffix, basedir, basefile, wrapper=None):
if argument is None:
lock = self.lock_factory(os.path.join(basedir, basefile + suffix))
elif argument is not False:
lock = self.lock_factory(
os.path.abspath(
os.path.normpath(argument)
))
else:
return None
if wrapper:
lock = wrapper(lock)
return lock
def _init_dbm_file(self):
exists = os.access(self.filename, os.F_OK)
if not exists:
for ext in ('db', 'dat', 'pag', 'dir'):
if os.access(self.filename + os.extsep + ext, os.F_OK):
exists = True
break
if not exists:
fh = self.dbmmodule.open(self.filename, 'c')
fh.close()
def get_mutex(self, key):
# using one dogpile for the whole file. Other ways
# to do this might be using a set of files keyed to a
# hash/modulus of the key. the issue is it's never
# really safe to delete a lockfile as this can
# break other processes trying to get at the file
# at the same time - so handling unlimited keys
# can't imply unlimited filenames
if self._dogpile_lock:
return self._dogpile_lock(key)
else:
return None
@contextmanager
def _use_rw_lock(self, write):
if self._rw_lock is None:
yield
elif write:
with self._rw_lock.write():
yield
else:
with self._rw_lock.read():
yield
@contextmanager
def _dbm_file(self, write):
with self._use_rw_lock(write):
dbm = self.dbmmodule.open(self.filename,
"w" if write else "r")
yield dbm
dbm.close()
def get(self, key):
with self._dbm_file(False) as dbm:
if hasattr(dbm, 'get'):
value = dbm.get(key, NO_VALUE)
else:
# gdbm objects lack a .get method
try:
value = dbm[key]
except KeyError:
value = NO_VALUE
if value is not NO_VALUE:
value = compat.pickle.loads(value)
return value
def get_multi(self, keys):
return [self.get(key) for key in keys]
def set(self, key, value):
with self._dbm_file(True) as dbm:
dbm[key] = compat.pickle.dumps(value)
def set_multi(self, mapping):
with self._dbm_file(True) as dbm:
for key,value in mapping.items():
dbm[key] = compat.pickle.dumps(value)
def delete(self, key):
with self._dbm_file(True) as dbm:
try:
del dbm[key]
except KeyError:
pass
def delete_multi(self, keys):
with self._dbm_file(True) as dbm:
for key in keys:
try:
del dbm[key]
except KeyError:
pass
class AbstractFileLock(object):
"""Coordinate read/write access to a file.
typically is a file-based lock but doesn't necessarily have to be.
The default implementation here is :class:`.FileLock`.
Implementations should provide the following methods::
* __init__()
* acquire_read_lock()
* acquire_write_lock()
* release_read_lock()
* release_write_lock()
The ``__init__()`` method accepts a single argument "filename", which
may be used as the "lock file", for those implementations that use a lock
file.
Note that multithreaded environments must provide a thread-safe
version of this lock. The recommended approach for file-descriptor-based
locks is to use a Python ``threading.local()`` so that a unique file descriptor
is held per thread. See the source code of :class:`.FileLock` for an
implementation example.
"""
def __init__(self, filename):
"""Constructor, is given the filename of a potential lockfile.
The usage of this filename is optional and no file is
created by default.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def acquire(self, wait=True):
"""Acquire the "write" lock.
This is a direct call to :meth:`.AbstractFileLock.acquire_write_lock`.
"""
return self.acquire_write_lock(wait)
def release(self):
"""Release the "write" lock.
This is a direct call to :meth:`.AbstractFileLock.release_write_lock`.
"""
self.release_write_lock()
@contextmanager
def read(self):
"""Provide a context manager for the "read" lock.
This method makes use of :meth:`.AbstractFileLock.acquire_read_lock`
and :meth:`.AbstractFileLock.release_read_lock`
"""
self.acquire_read_lock(True)
try:
yield
finally:
self.release_read_lock()
@contextmanager
def write(self):
"""Provide a context manager for the "write" lock.
This method makes use of :meth:`.AbstractFileLock.acquire_write_lock`
and :meth:`.AbstractFileLock.release_write_lock`
"""
self.acquire_write_lock(True)
try:
yield
finally:
self.release_write_lock()
@property
def is_open(self):
"""optional method."""
raise NotImplementedError()
def acquire_read_lock(self, wait):
"""Acquire a 'reader' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def acquire_write_lock(self, wait):
"""Acquire a 'write' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def release_read_lock(self):
"""Release a 'reader' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def release_write_lock(self):
"""Release a 'writer' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
class FileLock(AbstractFileLock):
"""Use lockfiles to coordinate read/write access to a file.
Only works on Unix systems, using
`fcntl.flock() <http://docs.python.org/library/fcntl.html>`_.
"""
def __init__(self, filename):
self._filedescriptor = compat.threading.local()
self.filename = filename
@util.memoized_property
def _module(self):
import fcntl
return fcntl
@property
def is_open(self):
return hasattr(self._filedescriptor, 'fileno')
def acquire_read_lock(self, wait):
return self._acquire(wait, os.O_RDONLY, self._module.LOCK_SH)
def acquire_write_lock(self, wait):
return self._acquire(wait, os.O_WRONLY, self._module.LOCK_EX)
def release_read_lock(self):
self._release()
def release_write_lock(self):
self._release()
def _acquire(self, wait, wrflag, lockflag):
wrflag |= os.O_CREAT
fileno = os.open(self.filename, wrflag)
try:
if not wait:
lockflag |= self._module.LOCK_NB
self._module.flock(fileno, lockflag)
except IOError:
os.close(fileno)
if not wait:
# this is typically
# "[Errno 35] Resource temporarily unavailable",
# because of LOCK_NB
return False
else:
raise
else:
self._filedescriptor.fileno = fileno
return True
def _release(self):
try:
fileno = self._filedescriptor.fileno
except AttributeError:
return
else:
self._module.flock(fileno, self._module.LOCK_UN)
os.close(fileno)
del self._filedescriptor.fileno
@@ -1,332 +0,0 @@
"""
Memcached Backends
------------------
Provides backends for talking to `memcached <http://memcached.org>`_.
"""
from dogpile.cache.api import CacheBackend, NO_VALUE
from dogpile.cache import compat
from dogpile.cache import util
import random
import time
__all__ = 'GenericMemcachedBackend', 'MemcachedBackend',\
'PylibmcBackend', 'BMemcachedBackend', 'MemcachedLock'
class MemcachedLock(object):
"""Simple distributed lock using memcached.
This is an adaptation of the lock featured at
http://amix.dk/blog/post/19386
"""
def __init__(self, client_fn, key):
self.client_fn = client_fn
self.key = "_lock" + key
def acquire(self, wait=True):
client = self.client_fn()
i = 0
while True:
if client.add(self.key, 1):
return True
elif not wait:
return False
else:
sleep_time = (((i+1)*random.random()) + 2**i) / 2.5
time.sleep(sleep_time)
if i < 15:
i += 1
def release(self):
client = self.client_fn()
client.delete(self.key)
class GenericMemcachedBackend(CacheBackend):
"""Base class for memcached backends.
This base class accepts a number of paramters
common to all backends.
:param url: the string URL to connect to. Can be a single
string or a list of strings. This is the only argument
that's required.
:param distributed_lock: boolean, when True, will use a
memcached-lock as the dogpile lock (see :class:`.MemcachedLock`).
Use this when multiple
processes will be talking to the same memcached instance.
When left at False, dogpile will coordinate on a regular
threading mutex.
:param memcached_expire_time: integer, when present will
be passed as the ``time`` parameter to ``pylibmc.Client.set``.
This is used to set the memcached expiry time for a value.
.. note::
This parameter is **different** from Dogpile's own
``expiration_time``, which is the number of seconds after
which Dogpile will consider the value to be expired.
When Dogpile considers a value to be expired,
it **continues to use the value** until generation
of a new value is complete, when using
:meth:`.CacheRegion.get_or_create`.
Therefore, if you are setting ``memcached_expire_time``, you'll
want to make sure it is greater than ``expiration_time``
by at least enough seconds for new values to be generated,
else the value won't be available during a regeneration,
forcing all threads to wait for a regeneration each time
a value expires.
The :class:`.GenericMemachedBackend` uses a ``threading.local()``
object to store individual client objects per thread,
as most modern memcached clients do not appear to be inherently
threadsafe.
In particular, ``threading.local()`` has the advantage over pylibmc's
built-in thread pool in that it automatically discards objects
associated with a particular thread when that thread ends.
"""
set_arguments = {}
"""Additional arguments which will be passed
to the :meth:`set` method."""
def __init__(self, arguments):
self._imports()
# using a plain threading.local here. threading.local
# automatically deletes the __dict__ when a thread ends,
# so the idea is that this is superior to pylibmc's
# own ThreadMappedPool which doesn't handle this
# automatically.
self.url = util.to_list(arguments['url'])
self.distributed_lock = arguments.get('distributed_lock', False)
self.memcached_expire_time = arguments.get(
'memcached_expire_time', 0)
def _imports(self):
"""client library imports go here."""
raise NotImplementedError()
def _create_client(self):
"""Creation of a Client instance goes here."""
raise NotImplementedError()
@util.memoized_property
def _clients(self):
backend = self
class ClientPool(compat.threading.local):
def __init__(self):
self.memcached = backend._create_client()
return ClientPool()
@property
def client(self):
"""Return the memcached client.
This uses a threading.local by
default as it appears most modern
memcached libs aren't inherently
threadsafe.
"""
return self._clients.memcached
def get_mutex(self, key):
if self.distributed_lock:
return MemcachedLock(lambda: self.client, key)
else:
return None
def get(self, key):
value = self.client.get(key)
if value is None:
return NO_VALUE
else:
return value
def get_multi(self, keys):
values = self.client.get_multi(keys)
return [
NO_VALUE if key not in values
else values[key] for key in keys
]
def set(self, key, value):
self.client.set(key,
value,
**self.set_arguments
)
def set_multi(self, mapping):
self.client.set_multi(mapping,
**self.set_arguments
)
def delete(self, key):
self.client.delete(key)
def delete_multi(self, keys):
self.client.delete_multi(keys)
class MemcacheArgs(object):
"""Mixin which provides support for the 'time' argument to set(),
'min_compress_len' to other methods.
"""
def __init__(self, arguments):
self.min_compress_len = arguments.get('min_compress_len', 0)
self.set_arguments = {}
if "memcached_expire_time" in arguments:
self.set_arguments["time"] =\
arguments["memcached_expire_time"]
if "min_compress_len" in arguments:
self.set_arguments["min_compress_len"] =\
arguments["min_compress_len"]
super(MemcacheArgs, self).__init__(arguments)
class PylibmcBackend(MemcacheArgs, GenericMemcachedBackend):
"""A backend for the
`pylibmc <http://sendapatch.se/projects/pylibmc/index.html>`_
memcached client.
A configuration illustrating several of the optional
arguments described in the pylibmc documentation::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.pylibmc',
expiration_time = 3600,
arguments = {
'url':["127.0.0.1"],
'binary':True,
'behaviors':{"tcp_nodelay": True,"ketama":True}
}
)
Arguments accepted here include those of
:class:`.GenericMemcachedBackend`, as well as
those below.
:param binary: sets the ``binary`` flag understood by
``pylibmc.Client``.
:param behaviors: a dictionary which will be passed to
``pylibmc.Client`` as the ``behaviors`` parameter.
:param min_compress_len: Integer, will be passed as the
``min_compress_len`` parameter to the ``pylibmc.Client.set``
method.
"""
def __init__(self, arguments):
self.binary = arguments.get('binary', False)
self.behaviors = arguments.get('behaviors', {})
super(PylibmcBackend, self).__init__(arguments)
def _imports(self):
global pylibmc
import pylibmc
def _create_client(self):
return pylibmc.Client(self.url,
binary=self.binary,
behaviors=self.behaviors
)
class MemcachedBackend(MemcacheArgs, GenericMemcachedBackend):
"""A backend using the standard `Python-memcached <http://www.tummy.com/Community/software/python-memcached/>`_
library.
Example::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.memcached',
expiration_time = 3600,
arguments = {
'url':"127.0.0.1:11211"
}
)
"""
def _imports(self):
global memcache
import memcache
def _create_client(self):
return memcache.Client(self.url)
class BMemcachedBackend(GenericMemcachedBackend):
"""A backend for the
`python-binary-memcached <https://github.com/jaysonsantos/python-binary-memcached>`_
memcached client.
This is a pure Python memcached client which
includes the ability to authenticate with a memcached
server using SASL.
A typical configuration using username/password::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.bmemcached',
expiration_time = 3600,
arguments = {
'url':["127.0.0.1"],
'username':'scott',
'password':'tiger'
}
)
Arguments which can be passed to the ``arguments``
dictionary include:
:param username: optional username, will be used for
SASL authentication.
:param password: optional password, will be used for
SASL authentication.
"""
def __init__(self, arguments):
self.username = arguments.get('username', None)
self.password = arguments.get('password', None)
super(BMemcachedBackend, self).__init__(arguments)
def _imports(self):
global bmemcached
import bmemcached
class RepairBMemcachedAPI(bmemcached.Client):
"""Repairs BMemcached's non-standard method
signatures, which was fixed in BMemcached
ef206ed4473fec3b639e.
"""
def add(self, key, value):
try:
return super(RepairBMemcachedAPI, self).add(key, value)
except ValueError:
return False
self.Client = RepairBMemcachedAPI
def _create_client(self):
return self.Client(self.url,
username=self.username,
password=self.password
)
def delete_multi(self, keys):
"""python-binary-memcached api does not implements delete_multi"""
for key in keys:
self.delete(key)
@@ -1,122 +0,0 @@
"""
Memory Backends
---------------
Provides simple dictionary-based backends.
The two backends are :class:`.MemoryBackend` and :class:`.MemoryPickleBackend`;
the latter applies a serialization step to cached values while the former
places the value as given into the dictionary.
"""
from dogpile.cache.api import CacheBackend, NO_VALUE
from dogpile.cache.compat import pickle
class MemoryBackend(CacheBackend):
"""A backend that uses a plain dictionary.
There is no size management, and values which
are placed into the dictionary will remain
until explicitly removed. Note that
Dogpile's expiration of items is based on
timestamps and does not remove them from
the cache.
E.g.::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.memory'
)
To use a Python dictionary of your choosing,
it can be passed in with the ``cache_dict``
argument::
my_dictionary = {}
region = make_region().configure(
'dogpile.cache.memory',
arguments={
"cache_dict":my_dictionary
}
)
"""
pickle_values = False
def __init__(self, arguments):
self._cache = arguments.pop("cache_dict", {})
def get(self, key):
value = self._cache.get(key, NO_VALUE)
if value is not NO_VALUE and self.pickle_values:
value = pickle.loads(value)
return value
def get_multi(self, keys):
ret = [self._cache.get(key, NO_VALUE)
for key in keys]
if self.pickle_values:
ret = [
pickle.loads(value)
if value is not NO_VALUE else value
for value in ret
]
return ret
def set(self, key, value):
if self.pickle_values:
value = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
self._cache[key] = value
def set_multi(self, mapping):
pickle_values = self.pickle_values
for key, value in mapping.items():
if pickle_values:
value = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
self._cache[key] = value
def delete(self, key):
self._cache.pop(key, None)
def delete_multi(self, keys):
for key in keys:
self._cache.pop(key, None)
class MemoryPickleBackend(MemoryBackend):
"""A backend that uses a plain dictionary, but serializes objects on
:meth:`.MemoryBackend.set` and deserializes :meth:`.MemoryBackend.get`.
E.g.::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.memory_pickle'
)
The usage of pickle to serialize cached values allows an object
as placed in the cache to be a copy of the original given object, so
that any subsequent changes to the given object aren't reflected
in the cached value, thus making the backend behave the same way
as other backends which make use of serialization.
The serialization is performed via pickle, and incurs the same
performance hit in doing so as that of other backends; in this way
the :class:`.MemoryPickleBackend` performance is somewhere in between
that of the pure :class:`.MemoryBackend` and the remote server oriented
backends such as that of Memcached or Redis.
Pickle behavior here is the same as that of the Redis backend, using
either ``cPickle`` or ``pickle`` and specifying ``HIGHEST_PROTOCOL``
upon serialize.
.. versionadded:: 0.5.3
"""
pickle_values = True
@@ -1,62 +0,0 @@
"""
Null Backend
-------------
The Null backend does not do any caching at all. It can be
used to test behavior without caching, or as a means of disabling
caching for a region that is otherwise used normally.
.. versionadded:: 0.5.4
"""
from dogpile.cache.api import CacheBackend, NO_VALUE
__all__ = ['NullBackend']
class NullLock(object):
def acquire(self):
pass
def release(self):
pass
class NullBackend(CacheBackend):
"""A "null" backend that effectively disables all cache operations.
Basic usage::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.null'
)
"""
def __init__(self, arguments):
pass
def get_mutex(self, key):
return NullLock()
def get(self, key):
return NO_VALUE
def get_multi(self, keys):
return [NO_VALUE for k in keys]
def set(self, key, value):
pass
def set_multi(self, mapping):
pass
def delete(self, key):
pass
def delete_multi(self, keys):
pass
@@ -1,181 +0,0 @@
"""
Redis Backends
------------------
Provides backends for talking to `Redis <http://redis.io>`_.
"""
from __future__ import absolute_import
from dogpile.cache.api import CacheBackend, NO_VALUE
from dogpile.cache.compat import pickle, u
redis = None
__all__ = 'RedisBackend',
class RedisBackend(CacheBackend):
"""A `Redis <http://redis.io/>`_ backend, using the
`redis-py <http://pypi.python.org/pypi/redis/>`_ backend.
Example configuration::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.redis',
arguments = {
'host': 'localhost',
'port': 6379,
'db': 0,
'redis_expiration_time': 60*60*2, # 2 hours
'distributed_lock':True
}
)
Arguments accepted in the arguments dictionary:
:param url: string. If provided, will override separate host/port/db
params. The format is that accepted by ``StrictRedis.from_url()``.
.. versionadded:: 0.4.1
:param host: string, default is ``localhost``.
:param password: string, default is no password.
.. versionadded:: 0.4.1
:param port: integer, default is ``6379``.
:param db: integer, default is ``0``.
:param redis_expiration_time: integer, number of seconds after setting
a value that Redis should expire it. This should be larger than dogpile's
cache expiration. By default no expiration is set.
:param distributed_lock: boolean, when True, will use a
redis-lock as the dogpile lock.
Use this when multiple
processes will be talking to the same redis instance.
When left at False, dogpile will coordinate on a regular
threading mutex.
:param lock_timeout: integer, number of seconds after acquiring a lock that
Redis should expire it. This argument is only valid when
``distributed_lock`` is ``True``.
.. versionadded:: 0.5.0
:param socket_timeout: float, seconds for socket timeout.
Default is None (no timeout).
.. versionadded:: 0.5.4
:param lock_sleep: integer, number of seconds to sleep when failed to
acquire a lock. This argument is only valid when
``distributed_lock`` is ``True``.
.. versionadded:: 0.5.0
:param connection_pool: ``redis.ConnectionPool`` object. If provided,
this object supersedes other connection arguments passed to the
``redis.StrictRedis`` instance, including url and/or host as well as
socket_timeout, and will be passed to ``redis.StrictRedis`` as the
source of connectivity.
.. versionadded:: 0.5.4
"""
def __init__(self, arguments):
self._imports()
self.url = arguments.pop('url', None)
self.host = arguments.pop('host', 'localhost')
self.password = arguments.pop('password', None)
self.port = arguments.pop('port', 6379)
self.db = arguments.pop('db', 0)
self.distributed_lock = arguments.get('distributed_lock', False)
self.socket_timeout = arguments.pop('socket_timeout', None)
self.lock_timeout = arguments.get('lock_timeout', None)
self.lock_sleep = arguments.get('lock_sleep', 0.1)
self.redis_expiration_time = arguments.pop('redis_expiration_time', 0)
self.connection_pool = arguments.get('connection_pool', None)
self.client = self._create_client()
def _imports(self):
# defer imports until backend is used
global redis
import redis
def _create_client(self):
if self.connection_pool is not None:
# the connection pool already has all other connection
# options present within, so here we disregard socket_timeout
# and others.
return redis.StrictRedis(connection_pool=self.connection_pool)
args = {}
if self.socket_timeout:
args['socket_timeout'] = self.socket_timeout
if self.url is not None:
args.update(url=self.url)
return redis.StrictRedis.from_url(**args)
else:
args.update(
host=self.host, password=self.password,
port=self.port, db=self.db
)
return redis.StrictRedis(**args)
def get_mutex(self, key):
if self.distributed_lock:
return self.client.lock(u('_lock{0}').format(key),
self.lock_timeout, self.lock_sleep)
else:
return None
def get(self, key):
value = self.client.get(key)
if value is None:
return NO_VALUE
return pickle.loads(value)
def get_multi(self, keys):
values = self.client.mget(keys)
return [pickle.loads(v) if v is not None else NO_VALUE
for v in values]
def set(self, key, value):
if self.redis_expiration_time:
self.client.setex(key, self.redis_expiration_time,
pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
else:
self.client.set(key, pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
def set_multi(self, mapping):
mapping = dict(
(k, pickle.dumps(v, pickle.HIGHEST_PROTOCOL))
for k, v in mapping.items()
)
if not self.redis_expiration_time:
self.client.mset(mapping)
else:
pipe = self.client.pipeline()
for key, value in mapping.items():
pipe.setex(key, self.redis_expiration_time, value)
pipe.execute()
def delete(self, key):
self.client.delete(key)
def delete_multi(self, keys):
self.client.delete(*keys)
-68
View File
@@ -1,68 +0,0 @@
import sys
py2k = sys.version_info < (3, 0)
py3k = sys.version_info >= (3, 0)
py32 = sys.version_info >= (3, 2)
py27 = sys.version_info >= (2, 7)
jython = sys.platform.startswith('java')
win32 = sys.platform.startswith('win')
try:
import threading
except ImportError:
import dummy_threading as threading
if py3k: # pragma: no cover
string_types = str,
text_type = str
string_type = str
if py32:
callable = callable
else:
def callable(fn):
return hasattr(fn, '__call__')
def u(s):
return s
def ue(s):
return s
import configparser
import io
import _thread as thread
else:
string_types = basestring,
text_type = unicode
string_type = str
def u(s):
return unicode(s, "utf-8")
def ue(s):
return unicode(s, "unicode_escape")
import ConfigParser as configparser
import StringIO as io
callable = callable
import thread
if py3k or jython:
import pickle
else:
import cPickle as pickle
def timedelta_total_seconds(td):
if py27:
return td.total_seconds()
else:
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 1e6) / 1e6
@@ -1,17 +0,0 @@
"""Exception classes for dogpile.cache."""
class DogpileCacheException(Exception):
"""Base Exception for dogpile.cache exceptions to inherit from."""
class RegionAlreadyConfigured(DogpileCacheException):
"""CacheRegion instance is already configured."""
class RegionNotConfigured(DogpileCacheException):
"""CacheRegion instance has not been configured."""
class ValidationError(DogpileCacheException):
"""Error validating a value or option."""
@@ -1,87 +0,0 @@
"""
Mako Integration
----------------
dogpile.cache includes a `Mako <http://www.makotemplates.org>`_ plugin that replaces `Beaker <http://beaker.groovie.org>`_
as the cache backend.
Setup a Mako template lookup using the "dogpile.cache" cache implementation
and a region dictionary::
from dogpile.cache import make_region
from mako.lookup import TemplateLookup
my_regions = {
"local":make_region().configure(
"dogpile.cache.dbm",
expiration_time=360,
arguments={"filename":"file.dbm"}
),
"memcached":make_region().configure(
"dogpile.cache.pylibmc",
expiration_time=3600,
arguments={"url":["127.0.0.1"]}
)
}
mako_lookup = TemplateLookup(
directories=["/myapp/templates"],
cache_impl="dogpile.cache",
cache_args={
'regions':my_regions
}
)
To use the above configuration in a template, use the ``cached=True`` argument on any
Mako tag which accepts it, in conjunction with the name of the desired region
as the ``cache_region`` argument::
<%def name="mysection()" cached="True" cache_region="memcached">
some content that's cached
</%def>
"""
from mako.cache import CacheImpl
class MakoPlugin(CacheImpl):
"""A Mako ``CacheImpl`` which talks to dogpile.cache."""
def __init__(self, cache):
super(MakoPlugin, self).__init__(cache)
try:
self.regions = self.cache.template.cache_args['regions']
except KeyError:
raise KeyError(
"'cache_regions' argument is required on the "
"Mako Lookup or Template object for usage "
"with the dogpile.cache plugin.")
def _get_region(self, **kw):
try:
region = kw['region']
except KeyError:
raise KeyError(
"'cache_region' argument must be specified with 'cache=True'"
"within templates for usage with the dogpile.cache plugin.")
try:
return self.regions[region]
except KeyError:
raise KeyError("No such region '%s'" % region)
def get_and_replace(self, key, creation_function, **kw):
expiration_time = kw.pop("timeout", None)
return self._get_region(**kw).get_or_create(key, creation_function,
expiration_time=expiration_time)
def get_or_create(self, key, creation_function, **kw):
return self.get_and_replace(key, creation_function, **kw)
def put(self, key, value, **kw):
self._get_region(**kw).put(key, value)
def get(self, key, **kw):
expiration_time = kw.pop("timeout", None)
return self._get_region(**kw).get(key, expiration_time=expiration_time)
def invalidate(self, key, **kw):
self._get_region(**kw).delete(key)
-93
View File
@@ -1,93 +0,0 @@
"""
Proxy Backends
------------------
Provides a utility and a decorator class that allow for modifying the behavior
of different backends without altering the class itself or having to extend the
base backend.
.. versionadded:: 0.5.0 Added support for the :class:`.ProxyBackend` class.
"""
from .api import CacheBackend
class ProxyBackend(CacheBackend):
"""A decorator class for altering the functionality of backends.
Basic usage::
from dogpile.cache import make_region
from dogpile.cache.proxy import ProxyBackend
class MyFirstProxy(ProxyBackend):
def get(self, key):
# ... custom code goes here ...
return self.proxied.get(key)
def set(self, key, value):
# ... custom code goes here ...
self.proxied.set(key)
class MySecondProxy(ProxyBackend):
def get(self, key):
# ... custom code goes here ...
return self.proxied.get(key)
region = make_region().configure(
'dogpile.cache.dbm',
expiration_time = 3600,
arguments = {
"filename":"/path/to/cachefile.dbm"
},
wrap = [ MyFirstProxy, MySecondProxy ]
)
Classes that extend :class:`.ProxyBackend` can be stacked
together. The ``.proxied`` property will always
point to either the concrete backend instance or
the next proxy in the chain that a method can be
delegated towards.
.. versionadded:: 0.5.0
"""
def __init__(self, *args, **kwargs):
self.proxied = None
def wrap(self, backend):
''' Take a backend as an argument and setup the self.proxied property.
Return an object that be used as a backend by a :class:`.CacheRegion`
object.
'''
assert(isinstance(backend, CacheBackend) or isinstance(backend, ProxyBackend))
self.proxied = backend
return self
#
# Delegate any functions that are not already overridden to
# the proxies backend
#
def get(self, key):
return self.proxied.get(key)
def set(self, key, value):
self.proxied.set(key, value)
def delete(self, key):
self.proxied.delete(key)
def get_multi(self, keys):
return self.proxied.get_multi(keys)
def set_multi(self, keys):
self.proxied.set_multi(keys)
def delete_multi(self, keys):
self.proxied.delete_multi(keys)
def get_mutex(self, key):
return self.proxied.get_mutex(key)
File diff suppressed because it is too large Load Diff
-189
View File
@@ -1,189 +0,0 @@
from hashlib import sha1
import inspect
import re
import collections
from . import compat
def coerce_string_conf(d):
result = {}
for k, v in d.items():
if not isinstance(v, compat.string_types):
result[k] = v
continue
v = v.strip()
if re.match(r'^[-+]?\d+$', v):
result[k] = int(v)
elif re.match(r'^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?$', v):
result[k] = float(v)
elif v.lower() in ('false', 'true'):
result[k] = v.lower() == 'true'
elif v == 'None':
result[k] = None
else:
result[k] = v
return result
class PluginLoader(object):
def __init__(self, group):
self.group = group
self.impls = {}
def load(self, name):
if name in self.impls:
return self.impls[name]()
else: # pragma NO COVERAGE
import pkg_resources
for impl in pkg_resources.iter_entry_points(
self.group,
name):
self.impls[name] = impl.load
return impl.load()
else:
raise Exception(
"Can't load plugin %s %s" %
(self.group, name))
def register(self, name, modulepath, objname):
def load():
mod = __import__(modulepath)
for token in modulepath.split(".")[1:]:
mod = getattr(mod, token)
return getattr(mod, objname)
self.impls[name] = load
def function_key_generator(namespace, fn, to_str=compat.string_type):
"""Return a function that generates a string
key, based on a given function as well as
arguments to the returned function itself.
This is used by :meth:`.CacheRegion.cache_on_arguments`
to generate a cache key from a decorated function.
It can be replaced using the ``function_key_generator``
argument passed to :func:`.make_region`.
"""
if namespace is None:
namespace = '%s:%s' % (fn.__module__, fn.__name__)
else:
namespace = '%s:%s|%s' % (fn.__module__, fn.__name__, namespace)
args = inspect.getargspec(fn)
has_self = args[0] and args[0][0] in ('self', 'cls')
def generate_key(*args, **kw):
if kw:
raise ValueError(
"dogpile.cache's default key creation "
"function does not accept keyword arguments.")
if has_self:
args = args[1:]
return namespace + "|" + " ".join(map(to_str, args))
return generate_key
def function_multi_key_generator(namespace, fn, to_str=compat.string_type):
if namespace is None:
namespace = '%s:%s' % (fn.__module__, fn.__name__)
else:
namespace = '%s:%s|%s' % (fn.__module__, fn.__name__, namespace)
args = inspect.getargspec(fn)
has_self = args[0] and args[0][0] in ('self', 'cls')
def generate_keys(*args, **kw):
if kw:
raise ValueError(
"dogpile.cache's default key creation "
"function does not accept keyword arguments.")
if has_self:
args = args[1:]
return [namespace + "|" + key for key in map(to_str, args)]
return generate_keys
def sha1_mangle_key(key):
"""a SHA1 key mangler."""
return sha1(key).hexdigest()
def length_conditional_mangler(length, mangler):
"""a key mangler that mangles if the length of the key is
past a certain threshold.
"""
def mangle(key):
if len(key) >= length:
return mangler(key)
else:
return key
return mangle
class memoized_property(object):
"""A read-only @property that is only evaluated once."""
def __init__(self, fget, doc=None):
self.fget = fget
self.__doc__ = doc or fget.__doc__
self.__name__ = fget.__name__
def __get__(self, obj, cls):
if obj is None:
return self
obj.__dict__[self.__name__] = result = self.fget(obj)
return result
def to_list(x, default=None):
"""Coerce to a list."""
if x is None:
return default
if not isinstance(x, (list, tuple)):
return [x]
else:
return x
class KeyReentrantMutex(object):
def __init__(self, key, mutex, keys):
self.key = key
self.mutex = mutex
self.keys = keys
@classmethod
def factory(cls, mutex):
# this collection holds zero or one
# thread idents as the key; a set of
# keynames held as the value.
keystore = collections.defaultdict(set)
def fac(key):
return KeyReentrantMutex(key, mutex, keystore)
return fac
def acquire(self, wait=True):
current_thread = compat.threading.current_thread().ident
keys = self.keys.get(current_thread)
if keys is not None and \
self.key not in keys:
# current lockholder, new key. add it in
keys.add(self.key)
return True
elif self.mutex.acquire(wait=wait):
# after acquire, create new set and add our key
self.keys[current_thread].add(self.key)
return True
else:
return False
def release(self):
current_thread = compat.threading.current_thread().ident
keys = self.keys.get(current_thread)
assert keys is not None, "this thread didn't do the acquire"
assert self.key in keys, "No acquire held for key '%s'" % self.key
keys.remove(self.key)
if not keys:
# when list of keys empty, remove
# the thread ident and unlock.
del self.keys[current_thread]
self.mutex.release()
@@ -1,11 +0,0 @@
from .dogpile import NeedRegenerationException, Lock
from .nameregistry import NameRegistry
from .readwrite_lock import ReadWriteMutex
from .legacy import Dogpile, SyncReaderDogpile
__all__ = [
'Dogpile', 'SyncReaderDogpile', 'NeedRegenerationException',
'NameRegistry', 'ReadWriteMutex', 'Lock']
__version__ = '0.4.1'
@@ -1,162 +0,0 @@
import time
import logging
log = logging.getLogger(__name__)
class NeedRegenerationException(Exception):
"""An exception that when raised in the 'with' block,
forces the 'has_value' flag to False and incurs a
regeneration of the value.
"""
NOT_REGENERATED = object()
class Lock(object):
"""Dogpile lock class.
Provides an interface around an arbitrary mutex
that allows one thread/process to be elected as
the creator of a new value, while other threads/processes
continue to return the previous version
of that value.
.. versionadded:: 0.4.0
The :class:`.Lock` class was added as a single-use object
representing the dogpile API without dependence on
any shared state between multiple instances.
:param mutex: A mutex object that provides ``acquire()``
and ``release()`` methods.
:param creator: Callable which returns a tuple of the form
(new_value, creation_time). "new_value" should be a newly
generated value representing completed state. "creation_time"
should be a floating point time value which is relative
to Python's ``time.time()`` call, representing the time
at which the value was created. This time value should
be associated with the created value.
:param value_and_created_fn: Callable which returns
a tuple of the form (existing_value, creation_time). This
basically should return what the last local call to the ``creator()``
callable has returned, i.e. the value and the creation time,
which would be assumed here to be from a cache. If the
value is not available, the :class:`.NeedRegenerationException`
exception should be thrown.
:param expiretime: Expiration time in seconds. Set to
``None`` for never expires. This timestamp is compared
to the creation_time result and ``time.time()`` to determine if
the value returned by value_and_created_fn is "expired".
:param async_creator: A callable. If specified, this callable will be
passed the mutex as an argument and is responsible for releasing the mutex
after it finishes some asynchronous value creation. The intent is for
this to be used to defer invocation of the creator callable until some
later time.
.. versionadded:: 0.4.1 added the async_creator argument.
"""
def __init__(self,
mutex,
creator,
value_and_created_fn,
expiretime,
async_creator=None,
):
self.mutex = mutex
self.creator = creator
self.value_and_created_fn = value_and_created_fn
self.expiretime = expiretime
self.async_creator = async_creator
def _is_expired(self, createdtime):
"""Return true if the expiration time is reached, or no
value is available."""
return not self._has_value(createdtime) or \
(
self.expiretime is not None and
time.time() - createdtime > self.expiretime
)
def _has_value(self, createdtime):
"""Return true if the creation function has proceeded
at least once."""
return createdtime > 0
def _enter(self):
value_fn = self.value_and_created_fn
try:
value = value_fn()
value, createdtime = value
except NeedRegenerationException:
log.debug("NeedRegenerationException")
value = NOT_REGENERATED
createdtime = -1
generated = self._enter_create(createdtime)
if generated is not NOT_REGENERATED:
generated, createdtime = generated
return generated
elif value is NOT_REGENERATED:
try:
value, createdtime = value_fn()
return value
except NeedRegenerationException:
raise Exception("Generation function should "
"have just been called by a concurrent "
"thread.")
else:
return value
def _enter_create(self, createdtime):
if not self._is_expired(createdtime):
return NOT_REGENERATED
async = False
if self._has_value(createdtime):
if not self.mutex.acquire(False):
log.debug("creation function in progress "
"elsewhere, returning")
return NOT_REGENERATED
else:
log.debug("no value, waiting for create lock")
self.mutex.acquire()
try:
log.debug("value creation lock %r acquired" % self.mutex)
# see if someone created the value already
try:
value, createdtime = self.value_and_created_fn()
except NeedRegenerationException:
pass
else:
if not self._is_expired(createdtime):
log.debug("value already present")
return value, createdtime
elif self.async_creator:
log.debug("Passing creation lock to async runner")
self.async_creator(self.mutex)
async = True
return value, createdtime
log.debug("Calling creation function")
created = self.creator()
return created
finally:
if not async:
self.mutex.release()
log.debug("Released creation lock")
def __enter__(self):
return self._enter()
def __exit__(self, type, value, traceback):
pass
@@ -1,154 +0,0 @@
from __future__ import with_statement
from .util import threading
from .readwrite_lock import ReadWriteMutex
from .dogpile import Lock
import time
import contextlib
class Dogpile(object):
"""Dogpile lock class.
.. deprecated:: 0.4.0
The :class:`.Lock` object specifies the full
API of the :class:`.Dogpile` object in a single way,
rather than providing multiple modes of usage which
don't necessarily work in the majority of cases.
:class:`.Dogpile` is now a wrapper around the :class:`.Lock` object
which provides dogpile.core's original usage pattern.
This usage pattern began as something simple, but was
not of general use in real-world caching environments without
several extra complicating factors; the :class:`.Lock`
object presents the "real-world" API more succinctly,
and also fixes a cross-process concurrency issue.
:param expiretime: Expiration time in seconds. Set to
``None`` for never expires.
:param init: if True, set the 'createdtime' to the
current time.
:param lock: a mutex object that provides
``acquire()`` and ``release()`` methods.
"""
def __init__(self, expiretime, init=False, lock=None):
"""Construct a new :class:`.Dogpile`.
"""
if lock:
self.dogpilelock = lock
else:
self.dogpilelock = threading.Lock()
self.expiretime = expiretime
if init:
self.createdtime = time.time()
createdtime = -1
"""The last known 'creation time' of the value,
stored as an epoch (i.e. from ``time.time()``).
If the value here is -1, it is assumed the value
should recreate immediately.
"""
def acquire(self, creator,
value_fn=None,
value_and_created_fn=None):
"""Acquire the lock, returning a context manager.
:param creator: Creation function, used if this thread
is chosen to create a new value.
:param value_fn: Optional function that returns
the value from some datasource. Will be returned
if regeneration is not needed.
:param value_and_created_fn: Like value_fn, but returns a tuple
of (value, createdtime). The returned createdtime
will replace the "createdtime" value on this dogpile
lock. This option removes the need for the dogpile lock
itself to remain persistent across usages; another
dogpile can come along later and pick up where the
previous one left off.
"""
if value_and_created_fn is None:
if value_fn is None:
def value_and_created_fn():
return None, self.createdtime
else:
def value_and_created_fn():
return value_fn(), self.createdtime
def creator_wrapper():
value = creator()
self.createdtime = time.time()
return value, self.createdtime
else:
def creator_wrapper():
value = creator()
self.createdtime = time.time()
return value
return Lock(
self.dogpilelock,
creator_wrapper,
value_and_created_fn,
self.expiretime
)
@property
def is_expired(self):
"""Return true if the expiration time is reached, or no
value is available."""
return not self.has_value or \
(
self.expiretime is not None and
time.time() - self.createdtime > self.expiretime
)
@property
def has_value(self):
"""Return true if the creation function has proceeded
at least once."""
return self.createdtime > 0
class SyncReaderDogpile(Dogpile):
"""Provide a read-write lock function on top of the :class:`.Dogpile`
class.
.. deprecated:: 0.4.0
The :class:`.ReadWriteMutex` object can be used directly.
"""
def __init__(self, *args, **kw):
super(SyncReaderDogpile, self).__init__(*args, **kw)
self.readwritelock = ReadWriteMutex()
@contextlib.contextmanager
def acquire_write_lock(self):
"""Return the "write" lock context manager.
This will provide a section that is mutexed against
all readers/writers for the dogpile-maintained value.
"""
self.readwritelock.acquire_write_lock()
try:
yield
finally:
self.readwritelock.release_write_lock()
@contextlib.contextmanager
def acquire(self, *arg, **kw):
with super(SyncReaderDogpile, self).acquire(*arg, **kw) as value:
self.readwritelock.acquire_read_lock()
try:
yield value
finally:
self.readwritelock.release_read_lock()
@@ -1,83 +0,0 @@
from .util import threading
import weakref
class NameRegistry(object):
"""Generates and return an object, keeping it as a
singleton for a certain identifier for as long as its
strongly referenced.
e.g.::
class MyFoo(object):
"some important object."
def __init__(self, identifier):
self.identifier = identifier
registry = NameRegistry(MyFoo)
# thread 1:
my_foo = registry.get("foo1")
# thread 2
my_foo = registry.get("foo1")
Above, ``my_foo`` in both thread #1 and #2 will
be *the same object*. The constructor for
``MyFoo`` will be called once, passing the
identifier ``foo1`` as the argument.
When thread 1 and thread 2 both complete or
otherwise delete references to ``my_foo``, the
object is *removed* from the :class:`.NameRegistry` as
a result of Python garbage collection.
:param creator: A function that will create a new
value, given the identifier passed to the :meth:`.NameRegistry.get`
method.
"""
_locks = weakref.WeakValueDictionary()
_mutex = threading.RLock()
def __init__(self, creator):
"""Create a new :class:`.NameRegistry`.
"""
self._values = weakref.WeakValueDictionary()
self._mutex = threading.RLock()
self.creator = creator
def get(self, identifier, *args, **kw):
"""Get and possibly create the value.
:param identifier: Hash key for the value.
If the creation function is called, this identifier
will also be passed to the creation function.
:param \*args, \**kw: Additional arguments which will
also be passed to the creation function if it is
called.
"""
try:
if identifier in self._values:
return self._values[identifier]
else:
return self._sync_get(identifier, *args, **kw)
except KeyError:
return self._sync_get(identifier, *args, **kw)
def _sync_get(self, identifier, *args, **kw):
self._mutex.acquire()
try:
try:
if identifier in self._values:
return self._values[identifier]
else:
self._values[identifier] = value = self.creator(identifier, *args, **kw)
return value
except KeyError:
self._values[identifier] = value = self.creator(identifier, *args, **kw)
return value
finally:
self._mutex.release()
@@ -1,130 +0,0 @@
from .util import threading
import logging
log = logging.getLogger(__name__)
class LockError(Exception):
pass
class ReadWriteMutex(object):
"""A mutex which allows multiple readers, single writer.
:class:`.ReadWriteMutex` uses a Python ``threading.Condition``
to provide this functionality across threads within a process.
The Beaker package also contained a file-lock based version
of this concept, so that readers/writers could be synchronized
across processes with a common filesystem. A future Dogpile
release may include this additional class at some point.
"""
def __init__(self):
# counts how many asynchronous methods are executing
self.async = 0
# pointer to thread that is the current sync operation
self.current_sync_operation = None
# condition object to lock on
self.condition = threading.Condition(threading.Lock())
def acquire_read_lock(self, wait = True):
"""Acquire the 'read' lock."""
self.condition.acquire()
try:
# see if a synchronous operation is waiting to start
# or is already running, in which case we wait (or just
# give up and return)
if wait:
while self.current_sync_operation is not None:
self.condition.wait()
else:
if self.current_sync_operation is not None:
return False
self.async += 1
log.debug("%s acquired read lock", self)
finally:
self.condition.release()
if not wait:
return True
def release_read_lock(self):
"""Release the 'read' lock."""
self.condition.acquire()
try:
self.async -= 1
# check if we are the last asynchronous reader thread
# out the door.
if self.async == 0:
# yes. so if a sync operation is waiting, notifyAll to wake
# it up
if self.current_sync_operation is not None:
self.condition.notifyAll()
elif self.async < 0:
raise LockError("Synchronizer error - too many "
"release_read_locks called")
log.debug("%s released read lock", self)
finally:
self.condition.release()
def acquire_write_lock(self, wait = True):
"""Acquire the 'write' lock."""
self.condition.acquire()
try:
# here, we are not a synchronous reader, and after returning,
# assuming waiting or immediate availability, we will be.
if wait:
# if another sync is working, wait
while self.current_sync_operation is not None:
self.condition.wait()
else:
# if another sync is working,
# we dont want to wait, so forget it
if self.current_sync_operation is not None:
return False
# establish ourselves as the current sync
# this indicates to other read/write operations
# that they should wait until this is None again
self.current_sync_operation = threading.currentThread()
# now wait again for asyncs to finish
if self.async > 0:
if wait:
# wait
self.condition.wait()
else:
# we dont want to wait, so forget it
self.current_sync_operation = None
return False
log.debug("%s acquired write lock", self)
finally:
self.condition.release()
if not wait:
return True
def release_write_lock(self):
"""Release the 'write' lock."""
self.condition.acquire()
try:
if self.current_sync_operation is not threading.currentThread():
raise LockError("Synchronizer error - current thread doesn't "
"have the write lock")
# reset the current sync operation so
# another can get it
self.current_sync_operation = None
# tell everyone to get ready
self.condition.notifyAll()
log.debug("%s released write lock", self)
finally:
# everyone go !!
self.condition.release()
@@ -1,8 +0,0 @@
import sys
py3k = sys.version_info >= (3, 0)
try:
import threading
except ImportError:
import dummy_threading as threading
+249
View File
@@ -0,0 +1,249 @@
"""A dumb and slow but simple dbm clone.
For database spam, spam.dir contains the index (a text file),
spam.bak *may* contain a backup of the index (also a text file),
while spam.dat contains the data (a binary file).
XXX TO DO:
- seems to contain a bug when updating...
- reclaim free space (currently, space once occupied by deleted or expanded
items is never reused)
- support concurrent access (currently, if two processes take turns making
updates, they can mess up the index)
- support efficient access to large databases (currently, the whole index
is read when the database is opened, and some updates rewrite the whole index)
- support opening for read-only (flag = 'm')
"""
import ast as _ast
import os as _os
import __builtin__
import UserDict
_open = __builtin__.open
_BLOCKSIZE = 512
error = IOError # For anydbm
class _Database(UserDict.DictMixin):
# The on-disk directory and data files can remain in mutually
# inconsistent states for an arbitrarily long time (see comments
# at the end of __setitem__). This is only repaired when _commit()
# gets called. One place _commit() gets called is from __del__(),
# and if that occurs at program shutdown time, module globals may
# already have gotten rebound to None. Since it's crucial that
# _commit() finish successfully, we can't ignore shutdown races
# here, and _commit() must not reference any globals.
_os = _os # for _commit()
_open = _open # for _commit()
def __init__(self, filebasename, mode):
self._mode = mode
# The directory file is a text file. Each line looks like
# "%r, (%d, %d)\n" % (key, pos, siz)
# where key is the string key, pos is the offset into the dat
# file of the associated value's first byte, and siz is the number
# of bytes in the associated value.
self._dirfile = filebasename + _os.extsep + 'dir'
# The data file is a binary file pointed into by the directory
# file, and holds the values associated with keys. Each value
# begins at a _BLOCKSIZE-aligned byte offset, and is a raw
# binary 8-bit string value.
self._datfile = filebasename + _os.extsep + 'dat'
self._bakfile = filebasename + _os.extsep + 'bak'
# The index is an in-memory dict, mirroring the directory file.
self._index = None # maps keys to (pos, siz) pairs
# Mod by Jack: create data file if needed
try:
f = _open(self._datfile, 'r')
except IOError:
with _open(self._datfile, 'w') as f:
self._chmod(self._datfile)
else:
f.close()
self._update()
# Read directory file into the in-memory index dict.
def _update(self):
self._index = {}
try:
f = _open(self._dirfile)
except IOError:
pass
else:
with f:
for line in f:
line = line.rstrip()
key, pos_and_siz_pair = _ast.literal_eval(line)
self._index[key] = pos_and_siz_pair
# Write the index dict to the directory file. The original directory
# file (if any) is renamed with a .bak extension first. If a .bak
# file currently exists, it's deleted.
def _commit(self):
# CAUTION: It's vital that _commit() succeed, and _commit() can
# be called from __del__(). Therefore we must never reference a
# global in this routine.
if self._index is None:
return # nothing to do
try:
self._os.unlink(self._bakfile)
except self._os.error:
pass
try:
self._os.rename(self._dirfile, self._bakfile)
except self._os.error:
pass
with self._open(self._dirfile, 'w') as f:
self._chmod(self._dirfile)
for key, pos_and_siz_pair in self._index.iteritems():
f.write("%r, %r\n" % (key, pos_and_siz_pair))
sync = _commit
def __getitem__(self, key):
pos, siz = self._index[key] # may raise KeyError
with _open(self._datfile, 'rb') as f:
f.seek(pos)
dat = f.read(siz)
return dat
# Append val to the data file, starting at a _BLOCKSIZE-aligned
# offset. The data file is first padded with NUL bytes (if needed)
# to get to an aligned offset. Return pair
# (starting offset of val, len(val))
def _addval(self, val):
with _open(self._datfile, 'rb+') as f:
f.seek(0, 2)
pos = int(f.tell())
npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
f.write('\0'*(npos-pos))
pos = npos
f.write(val)
return (pos, len(val))
# Write val to the data file, starting at offset pos. The caller
# is responsible for ensuring that there's enough room starting at
# pos to hold val, without overwriting some other value. Return
# pair (pos, len(val)).
def _setval(self, pos, val):
with _open(self._datfile, 'rb+') as f:
f.seek(pos)
f.write(val)
return (pos, len(val))
# key is a new key whose associated value starts in the data file
# at offset pos and with length siz. Add an index record to
# the in-memory index dict, and append one to the directory file.
def _addkey(self, key, pos_and_siz_pair):
self._index[key] = pos_and_siz_pair
with _open(self._dirfile, 'a') as f:
self._chmod(self._dirfile)
f.write("%r, %r\n" % (key, pos_and_siz_pair))
def __setitem__(self, key, val):
if not type(key) == type('') == type(val):
raise TypeError, "keys and values must be strings"
if key not in self._index:
self._addkey(key, self._addval(val))
else:
# See whether the new value is small enough to fit in the
# (padded) space currently occupied by the old value.
pos, siz = self._index[key]
oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
if newblocks <= oldblocks:
self._index[key] = self._setval(pos, val)
else:
# The new value doesn't fit in the (padded) space used
# by the old value. The blocks used by the old value are
# forever lost.
self._index[key] = self._addval(val)
# Note that _index may be out of synch with the directory
# file now: _setval() and _addval() don't update the directory
# file. This also means that the on-disk directory and data
# files are in a mutually inconsistent state, and they'll
# remain that way until _commit() is called. Note that this
# is a disaster (for the database) if the program crashes
# (so that _commit() never gets called).
def __delitem__(self, key):
# The blocks used by the associated value are lost.
del self._index[key]
# XXX It's unclear why we do a _commit() here (the code always
# XXX has, so I'm not changing it). _setitem__ doesn't try to
# XXX keep the directory file in synch. Why should we? Or
# XXX why shouldn't __setitem__?
self._commit()
def keys(self):
return self._index.keys()
def has_key(self, key):
return key in self._index
def __contains__(self, key):
return key in self._index
def iterkeys(self):
return self._index.iterkeys()
__iter__ = iterkeys
def __len__(self):
return len(self._index)
def close(self):
try:
self._commit()
finally:
self._index = self._datfile = self._dirfile = self._bakfile = None
__del__ = close
def _chmod (self, file):
if hasattr(self._os, 'chmod'):
self._os.chmod(file, self._mode)
def open(file, flag=None, mode=0666):
"""Open the database file, filename, and return corresponding object.
The flag argument, used to control how the database is opened in the
other DBM implementations, is ignored in the dumbdbm module; the
database is always opened for update, and will be created if it does
not exist.
The optional mode argument is the UNIX mode of the file, used only when
the database has to be created. It defaults to octal code 0666 (and
will be modified by the prevailing umask).
"""
# flag argument is currently ignored
# Modify mode depending on the umask
try:
um = _os.umask(0)
_os.umask(um)
except AttributeError:
pass
else:
# Turn off any bits that are set in the umask
mode = mode & (~um)
return _Database(file, mode)
+2 -1
View File
@@ -369,7 +369,8 @@ class Chapter(object):
if chapterdisplays:
string = chapterdisplays[0].get('ChapString')
language = chapterdisplays[0].get('ChapLanguage')
return cls(start, hidden, enabled, end, string, language)
return cls(start, hidden, enabled, end, string, language)
return cls(start, hidden, enabled, end)
def __repr__(self):
return '<%s [%s, enabled=%s]>' % (self.__class__.__name__, self.start, self.enabled)
@@ -168,9 +168,13 @@ def parse(stream, specs, size=None, ignore_element_types=None, ignore_element_na
while size is None or stream.tell() - start < size:
try:
element = parse_element(stream, specs)
if not element or not hasattr(element, "type"):
stream.seek(element.size, 1)
continue
if element.type is None:
logger.error('Element with id 0x%x is not in the specs' % element_id)
stream.seek(element_size, 1)
logger.error('Element with id 0x%x is not in the specs' % element.id)
stream.seek(element.size, 1)
continue
elif element.type in ignore_element_types or element.name in ignore_element_names:
logger.info('%s %s %s ignored', element.__class__.__name__, element.name, element.type)
+411
View File
@@ -0,0 +1,411 @@
# -*- coding: utf-8 -*-
"""
ftfy: fixes text for you
This is a module for making text less broken. See the `fix_text` function
for more information.
"""
from __future__ import unicode_literals
import unicodedata
import ftfy.bad_codecs
from ftfy import fixes
from ftfy.formatting import display_ljust
from ftfy.compatibility import is_printable
__version__ = '4.4.3'
# See the docstring for ftfy.bad_codecs to see what we're doing here.
ftfy.bad_codecs.ok()
def fix_text(text,
fix_entities='auto',
remove_terminal_escapes=True,
fix_encoding=True,
fix_latin_ligatures=True,
fix_character_width=True,
uncurl_quotes=True,
fix_line_breaks=True,
fix_surrogates=True,
remove_control_chars=True,
remove_bom=True,
normalization='NFC',
max_decode_length=10**6):
r"""
Given Unicode text as input, fix inconsistencies and glitches in it,
such as mojibake.
Let's start with some examples:
>>> print(fix_text('ünicode'))
ünicode
>>> print(fix_text('Broken text&hellip; it&#x2019;s flubberific!',
... normalization='NFKC'))
Broken text... it's flubberific!
>>> print(fix_text('HTML entities &lt;3'))
HTML entities <3
>>> print(fix_text('<em>HTML entities &lt;3</em>'))
<em>HTML entities &lt;3</em>
>>> print(fix_text("&macr;\\_(ã\x83\x84)_/&macr;"))
¯\_()_/¯
>>> # This example string starts with a byte-order mark, even if
>>> # you can't see it on the Web.
>>> print(fix_text('\ufeffParty like\nit&rsquo;s 1999!'))
Party like
it's 1999!
>>> print(fix_text('LOUD NOISES'))
LOUD NOISES
>>> len(fix_text('' * 100000))
200000
>>> len(fix_text(''))
0
Based on the options you provide, ftfy applies these steps in order:
- If `remove_terminal_escapes` is True, remove sequences of bytes that are
instructions for Unix terminals, such as the codes that make text appear
in different colors.
- If `fix_encoding` is True, look for common mistakes that come from
encoding or decoding Unicode text incorrectly, and fix them if they are
reasonably fixable. See `fixes.fix_encoding` for details.
- If `fix_entities` is True, replace HTML entities with their equivalent
characters. If it's "auto" (the default), then consider replacing HTML
entities, but don't do so in text where you have seen a pair of actual
angle brackets (that's probably actually HTML and you shouldn't mess
with the entities).
- If `uncurl_quotes` is True, replace various curly quotation marks with
plain-ASCII straight quotes.
- If `fix_latin_ligatures` is True, then ligatures made of Latin letters,
such as ``, will be separated into individual letters. These ligatures
are usually not meaningful outside of font rendering, and often represent
copy-and-paste errors.
- If `fix_character_width` is True, half-width and full-width characters
will be replaced by their standard-width form.
- If `fix_line_breaks` is true, convert all line breaks to Unix style
(CRLF and CR line breaks become LF line breaks).
- If `fix_surrogates` is true, ensure that there are no UTF-16 surrogates
in the resulting string, by converting them to the correct characters
when they're appropriately paired, or replacing them with \ufffd
otherwise.
- If `remove_control_chars` is true, remove control characters that
are not suitable for use in text. This includes most of the ASCII control
characters, plus some Unicode controls such as the byte order mark
(U+FEFF). Useful control characters, such as Tab, Line Feed, and
bidirectional marks, are left as they are.
- If `remove_bom` is True, remove the Byte-Order Mark at the start of the
string if it exists. (This is largely redundant, because it's a special
case of `remove_control_characters`. This option will become deprecated
in a later version.)
- If `normalization` is not None, apply the specified form of Unicode
normalization, which can be one of 'NFC', 'NFKC', 'NFD', and 'NFKD'.
- The default normalization, NFC, combines characters and diacritics that
are written using separate code points, such as converting "e" plus an
acute accent modifier into "é", or converting "ka" () plus a dakuten
into the single character "ga" (). Unicode can be converted to NFC
form without any change in its meaning.
- If you ask for NFKC normalization, it will apply additional
normalizations that can change the meanings of characters. For example,
ellipsis characters will be replaced with three periods, all ligatures
will be replaced with the individual characters that make them up,
and characters that differ in font style will be converted to the same
character.
- If anything was changed, repeat all the steps, so that the function is
idempotent. "&amp;amp;" will become "&", for example, not "&amp;".
`fix_text` will work one line at a time, with the possibility that some
lines are in different encodings, allowing it to fix text that has been
concatenated together from different sources.
When it encounters lines longer than `max_decode_length` (1 million
codepoints by default), it will not run the `fix_encoding` step, to avoid
unbounded slowdowns.
If you're certain that any decoding errors in the text would have affected
the entire text in the same way, and you don't mind operations that scale
with the length of the text, you can use `fix_text_segment` directly to
fix the whole string in one batch.
"""
if isinstance(text, bytes):
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
out = []
pos = 0
while pos < len(text):
textbreak = text.find('\n', pos) + 1
fix_encoding_this_time = fix_encoding
if textbreak == 0:
textbreak = len(text)
if (textbreak - pos) > max_decode_length:
fix_encoding_this_time = False
substring = text[pos:textbreak]
if fix_entities == 'auto' and '<' in substring and '>' in substring:
# we see angle brackets together; this could be HTML
fix_entities = False
out.append(
fix_text_segment(
substring,
fix_entities=fix_entities,
remove_terminal_escapes=remove_terminal_escapes,
fix_encoding=fix_encoding_this_time,
uncurl_quotes=uncurl_quotes,
fix_latin_ligatures=fix_latin_ligatures,
fix_character_width=fix_character_width,
fix_line_breaks=fix_line_breaks,
fix_surrogates=fix_surrogates,
remove_control_chars=remove_control_chars,
remove_bom=remove_bom,
normalization=normalization
)
)
pos = textbreak
return ''.join(out)
# Some alternate names for the main functions
ftfy = fix_text
fix_encoding = fixes.fix_encoding
fix_text_encoding = fixes.fix_text_encoding # deprecated
def fix_file(input_file,
encoding=None,
fix_entities='auto',
remove_terminal_escapes=True,
fix_encoding=True,
fix_latin_ligatures=True,
fix_character_width=True,
uncurl_quotes=True,
fix_line_breaks=True,
fix_surrogates=True,
remove_control_chars=True,
remove_bom=True,
normalization='NFC'):
"""
Fix text that is found in a file.
If the file is being read as Unicode text, use that. If it's being read as
bytes, then we hope an encoding was supplied. If not, unfortunately, we
have to guess what encoding it is. We'll try a few common encodings, but we
make no promises. See the `guess_bytes` function for how this is done.
The output is a stream of fixed lines of text.
"""
entities = fix_entities
for line in input_file:
if isinstance(line, bytes):
if encoding is None:
line, encoding = guess_bytes(line)
else:
line = line.decode(encoding)
if fix_entities == 'auto' and '<' in line and '>' in line:
entities = False
yield fix_text_segment(
line,
fix_entities=entities,
remove_terminal_escapes=remove_terminal_escapes,
fix_encoding=fix_encoding,
fix_latin_ligatures=fix_latin_ligatures,
fix_character_width=fix_character_width,
uncurl_quotes=uncurl_quotes,
fix_line_breaks=fix_line_breaks,
fix_surrogates=fix_surrogates,
remove_control_chars=remove_control_chars,
remove_bom=remove_bom,
normalization=normalization
)
def fix_text_segment(text,
fix_entities='auto',
remove_terminal_escapes=True,
fix_encoding=True,
fix_latin_ligatures=True,
fix_character_width=True,
uncurl_quotes=True,
fix_line_breaks=True,
fix_surrogates=True,
remove_control_chars=True,
remove_bom=True,
normalization='NFC'):
"""
Apply fixes to text in a single chunk. This could be a line of text
within a larger run of `fix_text`, or it could be a larger amount
of text that you are certain is in a consistent encoding.
See `fix_text` for a description of the parameters.
"""
if isinstance(text, bytes):
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
if fix_entities == 'auto' and '<' in text and '>' in text:
fix_entities = False
while True:
origtext = text
if remove_terminal_escapes:
text = fixes.remove_terminal_escapes(text)
if fix_encoding:
text = fixes.fix_encoding(text)
if fix_entities:
text = fixes.unescape_html(text)
if fix_latin_ligatures:
text = fixes.fix_latin_ligatures(text)
if fix_character_width:
text = fixes.fix_character_width(text)
if uncurl_quotes:
text = fixes.uncurl_quotes(text)
if fix_line_breaks:
text = fixes.fix_line_breaks(text)
if fix_surrogates:
text = fixes.fix_surrogates(text)
if remove_control_chars:
text = fixes.remove_control_chars(text)
if remove_bom and not remove_control_chars:
# Skip this step if we've already done `remove_control_chars`,
# because it would be redundant.
text = fixes.remove_bom(text)
if normalization is not None:
text = unicodedata.normalize(normalization, text)
if text == origtext:
return text
def guess_bytes(bstring):
"""
NOTE: Using `guess_bytes` is not the recommended way of using ftfy. ftfy
is not designed to be an encoding detector.
In the unfortunate situation that you have some bytes in an unknown
encoding, ftfy can guess a reasonable strategy for decoding them, by trying
a few common encodings that can be distinguished from each other.
Unlike the rest of ftfy, this may not be accurate, and it may *create*
Unicode problems instead of solving them!
It doesn't try East Asian encodings at all, and if you have East Asian text
that you don't know how to decode, you are somewhat out of luck. East
Asian encodings require some serious statistics to distinguish from each
other, so we can't support them without decreasing the accuracy of ftfy.
If you don't know which encoding you have at all, I recommend
trying the 'chardet' module, and being appropriately skeptical about its
results.
The encodings we try here are:
- UTF-16 with a byte order mark, because a UTF-16 byte order mark looks
like nothing else
- UTF-8, because it's the global standard, which has been used by a
majority of the Web since 2008
- "utf-8-variants", because it's what people actually implement when they
think they're doing UTF-8
- MacRoman, because Microsoft Office thinks it's still a thing, and it
can be distinguished by its line breaks. (If there are no line breaks in
the string, though, you're out of luck.)
- "sloppy-windows-1252", the Latin-1-like encoding that is the most common
single-byte encoding
"""
if type(bstring) == type(''):
raise UnicodeError(
"This string was already decoded as Unicode. You should pass "
"bytes to guess_bytes, not Unicode."
)
if bstring.startswith(b'\xfe\xff') or bstring.startswith(b'\xff\xfe'):
return bstring.decode('utf-16'), 'utf-16'
byteset = set(bytes(bstring))
byte_ed, byte_c0, byte_CR, byte_LF = b'\xed\xc0\r\n'
try:
if byte_ed in byteset or byte_c0 in byteset:
# Byte 0xed can be used to encode a range of codepoints that
# are UTF-16 surrogates. UTF-8 does not use UTF-16 surrogates,
# so when we see 0xed, it's very likely we're being asked to
# decode CESU-8, the variant that encodes UTF-16 surrogates
# instead of the original characters themselves.
#
# This will occasionally trigger on standard UTF-8, as there
# are some Korean characters that also use byte 0xed, but that's
# not harmful.
#
# Byte 0xc0 is impossible because, numerically, it would only
# encode characters lower than U+0040. Those already have
# single-byte representations, and UTF-8 requires using the
# shortest possible representation. However, Java hides the null
# codepoint, U+0000, in a non-standard longer representation -- it
# encodes it as 0xc0 0x80 instead of 0x00, guaranteeing that 0x00
# will never appear in the encoded bytes.
#
# The 'utf-8-variants' decoder can handle both of these cases, as
# well as standard UTF-8, at the cost of a bit of speed.
return bstring.decode('utf-8-variants'), 'utf-8-variants'
else:
return bstring.decode('utf-8'), 'utf-8'
except UnicodeDecodeError:
pass
if byte_CR in bstring and byte_LF not in bstring:
return bstring.decode('macroman'), 'macroman'
else:
return bstring.decode('sloppy-windows-1252'), 'sloppy-windows-1252'
def explain_unicode(text):
"""
A utility method that's useful for debugging mysterious Unicode.
It breaks down a string, showing you for each codepoint its number in
hexadecimal, its glyph, its category in the Unicode standard, and its name
in the Unicode standard.
>>> explain_unicode('(╯°□°)╯︵ ┻━┻')
U+0028 ( [Ps] LEFT PARENTHESIS
U+256F [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
U+00B0 ° [So] DEGREE SIGN
U+25A1 [So] WHITE SQUARE
U+00B0 ° [So] DEGREE SIGN
U+0029 ) [Pe] RIGHT PARENTHESIS
U+256F [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
U+FE35 [Ps] PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
U+0020 [Zs] SPACE
U+253B [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
U+2501 [So] BOX DRAWINGS HEAVY HORIZONTAL
U+253B [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
"""
for char in text:
if is_printable(char):
display = char
else:
display = char.encode('unicode-escape').decode('ascii')
print('U+{code:04X} {display} [{category}] {name}'.format(
display=display_ljust(display, 7),
code=ord(char),
category=unicodedata.category(char),
name=unicodedata.name(char, '<unknown>')
))
@@ -0,0 +1,94 @@
# coding: utf-8
r"""
Give Python the ability to decode some common, flawed encodings.
Python does not want you to be sloppy with your text. Its encoders and decoders
("codecs") follow the relevant standards whenever possible, which means that
when you get text that *doesn't* follow those standards, you'll probably fail
to decode it. Or you might succeed at decoding it for implementation-specific
reasons, which is perhaps worse.
There are some encodings out there that Python wishes didn't exist, which are
widely used outside of Python:
- "utf-8-variants", a family of not-quite-UTF-8 encodings, including the
ever-popular CESU-8 and "Java modified UTF-8".
- "Sloppy" versions of character map encodings, where bytes that don't map to
anything will instead map to the Unicode character with the same number.
Simply importing this module, or in fact any part of the `ftfy` package, will
make these new "bad codecs" available to Python through the standard Codecs
API. You never have to actually call any functions inside `ftfy.bad_codecs`.
However, if you want to call something because your code checker insists on it,
you can call ``ftfy.bad_codecs.ok()``.
A quick example of decoding text that's encoded in CESU-8:
>>> import ftfy.bad_codecs
>>> print(b'\xed\xa0\xbd\xed\xb8\x8d'.decode('utf-8-variants'))
😍
"""
from __future__ import unicode_literals
from encodings import normalize_encoding
import codecs
_CACHE = {}
# Define some aliases for 'utf-8-variants'. All hyphens get turned into
# underscores, because of `normalize_encoding`.
UTF8_VAR_NAMES = (
'utf_8_variants', 'utf8_variants',
'utf_8_variant', 'utf8_variant',
'utf_8_var', 'utf8_var',
'cesu_8', 'cesu8',
'java_utf_8', 'java_utf8'
)
def search_function(encoding):
"""
Register our "bad codecs" with Python's codecs API. This involves adding
a search function that takes in an encoding name, and returns a codec
for that encoding if it knows one, or None if it doesn't.
The encodings this will match are:
- Encodings of the form 'sloppy-windows-NNNN' or 'sloppy-iso-8859-N',
where the non-sloppy version is an encoding that leaves some bytes
unmapped to characters.
- The 'utf-8-variants' encoding, which has the several aliases seen
above.
"""
if encoding in _CACHE:
return _CACHE[encoding]
norm_encoding = normalize_encoding(encoding)
codec = None
if norm_encoding in UTF8_VAR_NAMES:
from ftfy.bad_codecs.utf8_variants import CODEC_INFO
codec = CODEC_INFO
elif norm_encoding.startswith('sloppy_'):
from ftfy.bad_codecs.sloppy import CODECS
codec = CODECS.get(norm_encoding)
if codec is not None:
_CACHE[encoding] = codec
return codec
def ok():
"""
A feel-good function that gives you something to call after importing
this package.
Why is this here? Pyflakes. Pyflakes gets upset when you import a module
and appear not to use it. It doesn't know that you're using it when
you use the ``unicode.encode`` and ``bytes.decode`` methods with certain
encodings.
"""
pass
codecs.register(search_function)
@@ -0,0 +1,164 @@
# coding: utf-8
r"""
Decodes single-byte encodings, filling their "holes" in the same messy way that
everyone else does.
A single-byte encoding maps each byte to a Unicode character, except that some
bytes are left unmapped. In the commonly-used Windows-1252 encoding, for
example, bytes 0x81 and 0x8D, among others, have no meaning.
Python, wanting to preserve some sense of decorum, will handle these bytes
as errors. But Windows knows that 0x81 and 0x8D are possible bytes and they're
different from each other. It just hasn't defined what they are in terms of
Unicode.
Software that has to interoperate with Windows-1252 and Unicode -- such as all
the common Web browsers -- will pick some Unicode characters for them to map
to, and the characters they pick are the Unicode characters with the same
numbers: U+0081 and U+008D. This is the same as what Latin-1 does, and the
resulting characters tend to fall into a range of Unicode that's set aside for
obselete Latin-1 control characters anyway.
These sloppy codecs let Python do the same thing, thus interoperating with
other software that works this way. It defines a sloppy version of many
single-byte encodings with holes. (There is no need for a sloppy version of
an encoding without holes: for example, there is no such thing as
sloppy-iso-8859-2 or sloppy-macroman.)
The following encodings will become defined:
- sloppy-windows-1250 (Central European, sort of based on ISO-8859-2)
- sloppy-windows-1251 (Cyrillic)
- sloppy-windows-1252 (Western European, based on Latin-1)
- sloppy-windows-1253 (Greek, sort of based on ISO-8859-7)
- sloppy-windows-1254 (Turkish, based on ISO-8859-9)
- sloppy-windows-1255 (Hebrew, based on ISO-8859-8)
- sloppy-windows-1256 (Arabic)
- sloppy-windows-1257 (Baltic, based on ISO-8859-13)
- sloppy-windows-1258 (Vietnamese)
- sloppy-cp874 (Thai, based on ISO-8859-11)
- sloppy-iso-8859-3 (Maltese and Esperanto, I guess)
- sloppy-iso-8859-6 (different Arabic)
- sloppy-iso-8859-7 (Greek)
- sloppy-iso-8859-8 (Hebrew)
- sloppy-iso-8859-11 (Thai)
Aliases such as "sloppy-cp1252" for "sloppy-windows-1252" will also be
defined.
Only sloppy-windows-1251 and sloppy-windows-1252 are used by the rest of ftfy;
the rest are rather uncommon.
Here are some examples, using `ftfy.explain_unicode` to illustrate how
sloppy-windows-1252 merges Windows-1252 with Latin-1:
>>> from ftfy import explain_unicode
>>> some_bytes = b'\x80\x81\x82'
>>> explain_unicode(some_bytes.decode('latin-1'))
U+0080 \x80 [Cc] <unknown>
U+0081 \x81 [Cc] <unknown>
U+0082 \x82 [Cc] <unknown>
>>> explain_unicode(some_bytes.decode('windows-1252', 'replace'))
U+20AC [Sc] EURO SIGN
U+FFFD [So] REPLACEMENT CHARACTER
U+201A [Ps] SINGLE LOW-9 QUOTATION MARK
>>> explain_unicode(some_bytes.decode('sloppy-windows-1252'))
U+20AC [Sc] EURO SIGN
U+0081 \x81 [Cc] <unknown>
U+201A [Ps] SINGLE LOW-9 QUOTATION MARK
"""
from __future__ import unicode_literals
import codecs
from encodings import normalize_encoding
import sys
REPLACEMENT_CHAR = '\ufffd'
PY26 = sys.version_info[:2] == (2, 6)
def make_sloppy_codec(encoding):
"""
Take a codec name, and return a 'sloppy' version of that codec that can
encode and decode the unassigned bytes in that encoding.
Single-byte encodings in the standard library are defined using some
boilerplate classes surrounding the functions that do the actual work,
`codecs.charmap_decode` and `charmap_encode`. This function, given an
encoding name, *defines* those boilerplate classes.
"""
# Make an array of all 256 possible bytes.
all_bytes = bytearray(range(256))
# Get a list of what they would decode to in Latin-1.
sloppy_chars = list(all_bytes.decode('latin-1'))
# Get a list of what they decode to in the given encoding. Use the
# replacement character for unassigned bytes.
if PY26:
decoded_chars = all_bytes.decode(encoding, 'replace')
else:
decoded_chars = all_bytes.decode(encoding, errors='replace')
# Update the sloppy_chars list. Each byte that was successfully decoded
# gets its decoded value in the list. The unassigned bytes are left as
# they are, which gives their decoding in Latin-1.
for i, char in enumerate(decoded_chars):
if char != REPLACEMENT_CHAR:
sloppy_chars[i] = char
# For ftfy's own purposes, we're going to allow byte 1A, the "Substitute"
# control code, to encode the Unicode replacement character U+FFFD.
sloppy_chars[0x1a] = REPLACEMENT_CHAR
# Create the data structures that tell the charmap methods how to encode
# and decode in this sloppy encoding.
decoding_table = ''.join(sloppy_chars)
encoding_table = codecs.charmap_build(decoding_table)
# Now produce all the class boilerplate. Look at the Python source for
# `encodings.cp1252` for comparison; this is almost exactly the same,
# except I made it follow pep8.
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return codecs.charmap_encode(input, errors, encoding_table)
def decode(self, input, errors='strict'):
return codecs.charmap_decode(input, errors, decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input, self.errors, encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input, self.errors, decoding_table)[0]
class StreamWriter(Codec, codecs.StreamWriter):
pass
class StreamReader(Codec, codecs.StreamReader):
pass
return codecs.CodecInfo(
name='sloppy-' + encoding,
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
# Define a codec for each incomplete encoding. The resulting CODECS dictionary
# can be used by the main module of ftfy.bad_codecs.
CODECS = {}
INCOMPLETE_ENCODINGS = (
['windows-%s' % num for num in range(1250, 1259)] +
['iso-8859-%s' % num for num in (3, 6, 7, 8, 11)] +
['cp%s' % num for num in range(1250, 1259)] + ['cp874']
)
for _encoding in INCOMPLETE_ENCODINGS:
_new_name = normalize_encoding('sloppy-' + _encoding)
CODECS[_new_name] = make_sloppy_codec(_encoding)
@@ -0,0 +1,282 @@
r"""
This file defines a codec called "utf-8-variants" (or "utf-8-var"), which can
decode text that's been encoded with a popular non-standard version of UTF-8.
This includes CESU-8, the accidental encoding made by layering UTF-8 on top of
UTF-16, as well as Java's twist on CESU-8 that contains a two-byte encoding for
codepoint 0.
This is particularly relevant in Python 3, which provides no other way of
decoding CESU-8 [1]_.
The easiest way to use the codec is to simply import `ftfy.bad_codecs`:
>>> import ftfy.bad_codecs
>>> result = b'here comes a null! \xc0\x80'.decode('utf-8-var')
>>> print(repr(result).lstrip('u'))
'here comes a null! \x00'
The codec does not at all enforce "correct" CESU-8. For example, the Unicode
Consortium's not-quite-standard describing CESU-8 requires that there is only
one possible encoding of any character, so it does not allow mixing of valid
UTF-8 and CESU-8. This codec *does* allow that, just like Python 2's UTF-8
decoder does.
Characters in the Basic Multilingual Plane still have only one encoding. This
codec still enforces the rule, within the BMP, that characters must appear in
their shortest form. There is one exception: the sequence of bytes `0xc0 0x80`,
instead of just `0x00`, may be used to encode the null character `U+0000`, like
in Java.
If you encode with this codec, you get legitimate UTF-8. Decoding with this
codec and then re-encoding is not idempotent, although encoding and then
decoding is. So this module won't produce CESU-8 for you. Look for that
functionality in the sister module, "Breaks Text For You", coming approximately
never.
.. [1] In a pinch, you can decode CESU-8 in Python 2 using the UTF-8 codec:
first decode the bytes (incorrectly), then encode them, then decode them
again, using UTF-8 as the codec every time.
"""
from __future__ import unicode_literals
import re
import codecs
from encodings.utf_8 import (IncrementalDecoder as UTF8IncrementalDecoder,
IncrementalEncoder as UTF8IncrementalEncoder)
from ftfy.compatibility import bytes_to_ints, unichr, PYTHON2
NAME = 'utf-8-variants'
# This regular expression matches all possible six-byte CESU-8 sequences,
# plus truncations of them at the end of the string. (If any of the
# subgroups matches $, then all the subgroups after it also have to match $,
# as there are no more characters to match.)
CESU8_EXPR = (
b'('
b'\xed'
b'([\xa0-\xaf]|$)'
b'([\x80-\xbf]|$)'
b'(\xed|$)'
b'([\xb0-\xbf]|$)'
b'([\x80-\xbf]|$)'
b')'
)
CESU8_RE = re.compile(CESU8_EXPR)
# This expression matches isolated surrogate characters that aren't
# CESU-8, which have to be handled carefully on Python 2.
SURROGATE_EXPR = (b'(\xed([\xa0-\xbf]|$)([\x80-\xbf]|$))')
# This expression matches the Java encoding of U+0, including if it's
# truncated and we need more bytes.
NULL_EXPR = b'(\xc0(\x80|$))'
# This regex matches cases that we need to decode differently from
# standard UTF-8.
SPECIAL_BYTES_RE = re.compile(b'|'.join([NULL_EXPR, CESU8_EXPR, SURROGATE_EXPR]))
class IncrementalDecoder(UTF8IncrementalDecoder):
"""
An incremental decoder that extends Python's built-in UTF-8 decoder.
This encoder needs to take in bytes, possibly arriving in a stream, and
output the correctly decoded text. The general strategy for doing this
is to fall back on the real UTF-8 decoder whenever possible, because
the real UTF-8 decoder is way optimized, but to call specialized methods
we define here for the cases the real encoder isn't expecting.
"""
def _buffer_decode(self, input, errors, final):
"""
Decode bytes that may be arriving in a stream, following the Codecs
API.
`input` is the incoming sequence of bytes. `errors` tells us how to
handle errors, though we delegate all error-handling cases to the real
UTF-8 decoder to ensure correct behavior. `final` indicates whether
this is the end of the sequence, in which case we should raise an
error given incomplete input.
Returns as much decoded text as possible, and the number of bytes
consumed.
"""
# decoded_segments are the pieces of text we have decoded so far,
# and position is our current position in the byte string. (Bytes
# before this position have been consumed, and bytes after it have
# yet to be decoded.)
decoded_segments = []
position = 0
while True:
# Use _buffer_decode_step to decode a segment of text.
decoded, consumed = self._buffer_decode_step(
input[position:],
errors,
final
)
if consumed == 0:
# Either there's nothing left to decode, or we need to wait
# for more input. Either way, we're done for now.
break
# Append the decoded text to the list, and update our position.
decoded_segments.append(decoded)
position += consumed
if final:
# _buffer_decode_step must consume all the bytes when `final` is
# true.
assert position == len(input)
return ''.join(decoded_segments), position
def _buffer_decode_step(self, input, errors, final):
"""
There are three possibilities for each decoding step:
- Decode as much real UTF-8 as possible.
- Decode a six-byte CESU-8 sequence at the current position.
- Decode a Java-style null at the current position.
This method figures out which step is appropriate, and does it.
"""
# Get a reference to the superclass method that we'll be using for
# most of the real work.
sup = UTF8IncrementalDecoder._buffer_decode
# Find the next byte position that indicates a variant of UTF-8.
match = SPECIAL_BYTES_RE.search(input)
if match is None:
return sup(input, errors, final)
cutoff = match.start()
if cutoff > 0:
return sup(input[:cutoff], errors, True)
# Some byte sequence that we intend to handle specially matches
# at the beginning of the input.
if input.startswith(b'\xc0'):
if len(input) > 1:
# Decode the two-byte sequence 0xc0 0x80.
return '\u0000', 2
else:
if final:
# We hit the end of the stream. Let the superclass method
# handle it.
return sup(input, errors, True)
else:
# Wait to see another byte.
return '', 0
else:
# Decode a possible six-byte sequence starting with 0xed.
return self._buffer_decode_surrogates(sup, input, errors, final)
@staticmethod
def _buffer_decode_surrogates(sup, input, errors, final):
"""
When we have improperly encoded surrogates, we can still see the
bits that they were meant to represent.
The surrogates were meant to encode a 20-bit number, to which we
add 0x10000 to get a codepoint. That 20-bit number now appears in
this form:
11101101 1010abcd 10efghij 11101101 1011klmn 10opqrst
The CESU8_RE above matches byte sequences of this form. Then we need
to extract the bits and assemble a codepoint number from them.
"""
if len(input) < 6:
if final:
# We found 0xed near the end of the stream, and there aren't
# six bytes to decode. Delegate to the superclass method to
# handle it as an error.
if PYTHON2 and len(input) >= 3:
# We can't trust Python 2 to raise an error when it's
# asked to decode a surrogate, so let's force the issue.
input = mangle_surrogates(input)
return sup(input, errors, final)
else:
# We found a surrogate, the stream isn't over yet, and we don't
# know enough of the following bytes to decode anything, so
# consume zero bytes and wait.
return '', 0
else:
if CESU8_RE.match(input):
# Given this is a CESU-8 sequence, do some math to pull out
# the intended 20-bit value, and consume six bytes.
bytenums = bytes_to_ints(input[:6])
codepoint = (
((bytenums[1] & 0x0f) << 16) +
((bytenums[2] & 0x3f) << 10) +
((bytenums[4] & 0x0f) << 6) +
(bytenums[5] & 0x3f) +
0x10000
)
return unichr(codepoint), 6
else:
# This looked like a CESU-8 sequence, but it wasn't one.
# 0xed indicates the start of a three-byte sequence, so give
# three bytes to the superclass to decode as usual -- except
# for working around the Python 2 discrepancy as before.
if PYTHON2:
input = mangle_surrogates(input)
return sup(input[:3], errors, False)
def mangle_surrogates(bytestring):
"""
When Python 3 sees the UTF-8 encoding of a surrogate codepoint, it treats
it as an error (which it is). In 'replace' mode, it will decode as three
replacement characters. But Python 2 will just output the surrogate
codepoint.
To ensure consistency between Python 2 and Python 3, and protect downstream
applications from malformed strings, we turn surrogate sequences at the
start of the string into the bytes `ff ff ff`, which we're *sure* won't
decode, and which turn into three replacement characters in 'replace' mode.
This function does nothing in Python 3, and it will be deprecated in ftfy
5.0.
"""
if PYTHON2:
if bytestring.startswith(b'\xed') and len(bytestring) >= 3:
decoded = bytestring[:3].decode('utf-8', 'replace')
if '\ud800' <= decoded <= '\udfff':
return b'\xff\xff\xff' + mangle_surrogates(bytestring[3:])
return bytestring
else:
# On Python 3, nothing needs to be done.
return bytestring
# The encoder is identical to UTF-8.
IncrementalEncoder = UTF8IncrementalEncoder
# Everything below here is boilerplate that matches the modules in the
# built-in `encodings` package.
def encode(input, errors='strict'):
return IncrementalEncoder(errors).encode(input, final=True), len(input)
def decode(input, errors='strict'):
return IncrementalDecoder(errors).decode(input, final=True), len(input)
class StreamWriter(codecs.StreamWriter):
encode = encode
class StreamReader(codecs.StreamReader):
decode = decode
CODEC_INFO = codecs.CodecInfo(
name=NAME,
encode=encode,
decode=decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
+162
View File
@@ -0,0 +1,162 @@
# -*- coding: utf-8 -*-
"""
Heuristics to determine whether re-encoding text is actually making it
more reasonable.
"""
from __future__ import unicode_literals
import re
import unicodedata
from ftfy.chardata import chars_to_classes
# The following regex uses the mapping of character classes to ASCII
# characters defined in chardata.py and build_data.py:
#
# L = Latin capital letter
# l = Latin lowercase letter
# A = Non-latin capital or title-case letter
# a = Non-latin lowercase letter
# C = Non-cased letter (Lo)
# X = Control character (Cc)
# m = Letter modifier (Lm)
# M = Mark (Mc, Me, Mn)
# N = Miscellaneous numbers (No)
# 1 = Math symbol (Sm) or currency symbol (Sc)
# 2 = Symbol modifier (Sk)
# 3 = Other symbol (So)
# S = UTF-16 surrogate
# _ = Unassigned character
# = Whitespace
# o = Other
def _make_weirdness_regex():
"""
Creates a list of regexes that match 'weird' character sequences.
The more matches there are, the weirder the text is.
"""
groups = []
# Match lowercase letters that are followed by non-ASCII uppercase letters
groups.append('lA')
# Match diacritical marks, except when they modify a non-cased letter or
# another mark.
#
# You wouldn't put a diacritical mark on a digit or a space, for example.
# You might put it on a Latin letter, but in that case there will almost
# always be a pre-composed version, and we normalize to pre-composed
# versions first. The cases that can't be pre-composed tend to be in
# large scripts without case, which are in class C.
groups.append('[^CM]M')
# Match non-Latin characters adjacent to Latin characters.
#
# This is a simplification from ftfy version 2, which compared all
# adjacent scripts. However, the ambiguities we need to resolve come from
# encodings designed to represent Latin characters.
groups.append('[Ll][AaC]')
groups.append('[AaC][Ll]')
# Match IPA letters next to capital letters.
#
# IPA uses lowercase letters only. Some accented capital letters next to
# punctuation can accidentally decode as IPA letters, and an IPA letter
# appearing next to a capital letter is a good sign that this happened.
groups.append('[LA]i')
groups.append('i[LA]')
# Match non-combining diacritics. We've already set aside the common ones
# like ^ (the CIRCUMFLEX ACCENT, repurposed as a caret, exponent sign,
# or happy eye) and assigned them to category 'o'. The remaining ones,
# like the diaeresis (¨), are pretty weird to see on their own instead
# of combined with a letter.
groups.append('2')
# Match C1 control characters, which are almost always the result of
# decoding Latin-1 that was meant to be Windows-1252.
groups.append('X')
# Match private use and unassigned characters.
groups.append('P')
groups.append('_')
# Match adjacent characters from any different pair of these categories:
# - Modifier marks (M)
# - Letter modifiers (m)
# - Miscellaneous numbers (N)
# - Symbols (1 or 3, because 2 is already weird on its own)
exclusive_categories = 'MmN13'
for cat1 in exclusive_categories:
others_range = ''.join(c for c in exclusive_categories if c != cat1)
groups.append('{cat1}[{others_range}]'.format(
cat1=cat1, others_range=others_range
))
regex = '|'.join('({0})'.format(group) for group in groups)
return re.compile(regex)
WEIRDNESS_RE = _make_weirdness_regex()
# These characters appear in mojibake but also appear commonly on their own.
# We have a slight preference to leave them alone.
COMMON_SYMBOL_RE = re.compile(
'['
'\N{HORIZONTAL ELLIPSIS}\N{EM DASH}\N{EN DASH}'
'\N{LEFT SINGLE QUOTATION MARK}\N{LEFT DOUBLE QUOTATION MARK}'
'\N{RIGHT SINGLE QUOTATION MARK}\N{RIGHT DOUBLE QUOTATION MARK}'
'\N{INVERTED EXCLAMATION MARK}\N{INVERTED QUESTION MARK}\N{DEGREE SIGN}'
'\N{TRADE MARK SIGN}'
'\N{REGISTERED SIGN}'
'\N{SINGLE LEFT-POINTING ANGLE QUOTATION MARK}'
'\N{SINGLE RIGHT-POINTING ANGLE QUOTATION MARK}'
'\N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}'
'\N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}'
'\N{NO-BREAK SPACE}'
'\N{ACUTE ACCENT}\N{MULTIPLICATION SIGN}\N{LATIN SMALL LETTER SHARP S}'
'\ufeff' # The byte-order mark, whose encoding '' looks common
']'
)
def sequence_weirdness(text):
"""
Determine how often a text has unexpected characters or sequences of
characters. This metric is used to disambiguate when text should be
re-decoded or left as is.
We start by normalizing text in NFC form, so that penalties for
diacritical marks don't apply to characters that know what to do with
them.
The following things are deemed weird:
- Lowercase letters followed by non-ASCII uppercase letters
- Non-Latin characters next to Latin characters
- Un-combined diacritical marks, unless they're stacking on non-alphabetic
characters (in languages that do that kind of thing a lot) or other
marks
- C1 control characters
- Adjacent symbols from any different pair of these categories:
- Modifier marks
- Letter modifiers
- Non-digit numbers
- Symbols (including math and currency)
The return value is the number of instances of weirdness.
"""
text2 = unicodedata.normalize('NFC', text)
weirdness = len(WEIRDNESS_RE.findall(chars_to_classes(text2)))
punct_discount = len(COMMON_SYMBOL_RE.findall(text2))
return weirdness * 2 - punct_discount
def text_cost(text):
"""
An overall cost function for text. Weirder is worse, but all else being
equal, shorter strings are better.
The overall cost is measured as the "weirdness" (see
:func:`sequence_weirdness`) plus the length.
"""
return sequence_weirdness(text) + len(text)
@@ -0,0 +1,132 @@
"""
A script to make the char_classes.dat file.
This never needs to run in normal usage. It needs to be run if the character
classes we care about change, or if a new version of Python supports a new
Unicode standard and we want it to affect our string decoding.
The file that we generate is based on Unicode 9.0, as supported by Python 3.6.
You can certainly use it in earlier versions. This simply makes sure that we
get consistent results from running ftfy on different versions of Python.
The file will be written to the current directory.
"""
from __future__ import unicode_literals
import unicodedata
import sys
import zlib
if sys.hexversion >= 0x03000000:
unichr = chr
# L = Latin capital letter
# l = Latin lowercase letter
# A = Non-latin capital or title-case letter
# a = Non-latin lowercase letter
# C = Non-cased letter (Lo)
# X = Control character (Cc)
# m = Letter modifier (Lm)
# M = Mark (Mc, Me, Mn)
# N = Miscellaneous numbers (No)
# P = Private use (Co)
# 1 = Math symbol (Sm) or currency symbol (Sc)
# 2 = Symbol modifier (Sk)
# 3 = Other symbol (So)
# S = UTF-16 surrogate
# _ = Unassigned character
# = Whitespace
# o = Other
def make_char_data_file(do_it_anyway=False):
"""
Build the compressed data file 'char_classes.dat' and write it to the
current directory.
If you run this, run it in Python 3.6 or later. It will run in earlier
versions, but you won't get the Unicode 9 standard, leading to inconsistent
behavior.
To protect against this, running this in the wrong version of Python will
raise an error unless you pass `do_it_anyway=True`.
"""
if sys.hexversion < 0x03060000 and not do_it_anyway:
raise RuntimeError(
"This function should be run in Python 3.6 or later."
)
cclasses = [None] * 0x110000
for codepoint in range(0x0, 0x110000):
char = unichr(codepoint)
category = unicodedata.category(char)
if (0x250 <= codepoint < 0x300) and char != 'ə':
# IPA symbols and modifiers.
#
# This category excludes the schwa (ə), which is used as a normal
# Latin letter in some languages.
cclasses[codepoint] = 'i'
elif category.startswith('L'): # letters
if unicodedata.name(char, '').startswith('LATIN'):
if category == 'Lu':
cclasses[codepoint] = 'L'
else:
cclasses[codepoint] = 'l'
else:
if category == 'Lu' or category == 'Lt':
cclasses[codepoint] = 'A'
elif category == 'Ll':
cclasses[codepoint] = 'a'
elif category == 'Lo':
cclasses[codepoint] = 'C'
elif category == 'Lm':
cclasses[codepoint] = 'm'
else:
raise ValueError('got some weird kind of letter')
elif 0xfe00 <= codepoint <= 0xfe0f or 0x1f3fb <= codepoint <= 0x1f3ff:
# Variation selectors and skin-tone modifiers have the category
# of non-spacing marks, but they act like symbols
cclasses[codepoint] = '3'
elif category.startswith('M'): # marks
cclasses[codepoint] = 'M'
elif category == 'No':
cclasses[codepoint] = 'N'
elif category == 'Sm' or category == 'Sc':
cclasses[codepoint] = '1'
elif category == 'Sk':
cclasses[codepoint] = '2'
elif category == 'So':
cclasses[codepoint] = '3'
elif category == 'Cc':
cclasses[codepoint] = 'X'
elif category == 'Cs':
cclasses[codepoint] = 'S'
elif category == 'Co':
cclasses[codepoint] = 'P'
elif category.startswith('Z'):
cclasses[codepoint] = ' '
elif 0x1f000 <= codepoint <= 0x1ffff:
# This range is rapidly having emoji added to it. Assume that
# an unassigned codepoint in this range is just a symbol we
# don't know yet.
cclasses[codepoint] = '3'
elif category == 'Cn':
cclasses[codepoint] = '_'
else:
cclasses[codepoint] = 'o'
# Mark whitespace control characters as whitespace
cclasses[9] = cclasses[10] = cclasses[12] = cclasses[13] = ' '
# Some other exceptions for characters that are more commonly used as
# punctuation or decoration than for their ostensible purpose.
# For example, tilde is not usually a "math symbol", and the accents
# `´ are much more like quotation marks than modifiers.
for char in "^~`´˝^`":
cclasses[ord(char)] = 'o'
out = open('char_classes.dat', 'wb')
out.write(zlib.compress(''.join(cclasses).encode('ascii')))
out.close()
if __name__ == '__main__':
make_char_data_file()
Binary file not shown.
+215
View File
@@ -0,0 +1,215 @@
# -*- coding: utf-8 -*-
"""
This gives other modules access to the gritty details about characters and the
encodings that use them.
"""
from __future__ import unicode_literals
import re
import zlib
import unicodedata
import itertools
from pkg_resources import resource_string
from ftfy.compatibility import unichr
# These are the encodings we will try to fix in ftfy, in the
# order that they should be tried.
CHARMAP_ENCODINGS = [
'latin-1',
'sloppy-windows-1252',
'sloppy-windows-1250',
'iso-8859-2',
'sloppy-windows-1251',
'macroman',
'cp437',
]
def _build_regexes():
"""
ENCODING_REGEXES contain reasonably fast ways to detect if we
could represent a given string in a given encoding. The simplest one is
the 'ascii' detector, which of course just determines if all characters
are between U+0000 and U+007F.
"""
# Define a regex that matches ASCII text.
encoding_regexes = {'ascii': re.compile('^[\x00-\x7f]*$')}
for encoding in CHARMAP_ENCODINGS:
# Make a sequence of characters that bytes \x80 to \xFF decode to
# in each encoding, as well as byte \x1A, which is used to represent
# the replacement character in the sloppy-* encodings.
latin1table = ''.join(unichr(i) for i in range(128, 256)) + '\x1a'
charlist = latin1table.encode('latin-1').decode(encoding)
# The rest of the ASCII bytes -- bytes \x00 to \x19 and \x1B
# to \x7F -- will decode as those ASCII characters in any encoding we
# support, so we can just include them as ranges. This also lets us
# not worry about escaping regex special characters, because all of
# them are in the \x1B to \x7F range.
regex = '^[\x00-\x19\x1b-\x7f{0}]*$'.format(charlist)
encoding_regexes[encoding] = re.compile(regex)
return encoding_regexes
ENCODING_REGEXES = _build_regexes()
def _build_utf8_punct_regex():
"""
Recognize UTF-8 mojibake that's so blatant that we can fix it even when the
rest of the string doesn't decode as UTF-8 -- namely, UTF-8 sequences for
the 'General Punctuation' characters U+2000 to U+2040, re-encoded in
Windows-1252.
These are recognizable by the distinctive 'â€' ('\xe2\x80') sequence they
all begin with when decoded as Windows-1252.
"""
# We're making a regex that has all the literal bytes from 0x80 to 0xbf in
# a range. "Couldn't this have just said [\x80-\xbf]?", you might ask.
# However, when we decode the regex as Windows-1252, the resulting
# characters won't even be remotely contiguous.
#
# Unrelatedly, the expression that generates these bytes will be so much
# prettier when we deprecate Python 2.
continuation_char_list = ''.join(
unichr(i) for i in range(0x80, 0xc0)
).encode('latin-1')
obvious_utf8 = ('â€['
+ continuation_char_list.decode('sloppy-windows-1252')
+ ']')
return re.compile(obvious_utf8)
PARTIAL_UTF8_PUNCT_RE = _build_utf8_punct_regex()
# Recognize UTF-8 sequences that would be valid if it weren't for a b'\xa0'
# that some Windows-1252 program converted to a plain space.
#
# The smaller values are included on a case-by-case basis, because we don't want
# to decode likely input sequences to unlikely characters. These are the ones
# that *do* form likely characters before 0xa0:
#
# 0xc2 -> U+A0 NO-BREAK SPACE
# 0xc3 -> U+E0 LATIN SMALL LETTER A WITH GRAVE
# 0xc5 -> U+160 LATIN CAPITAL LETTER S WITH CARON
# 0xce -> U+3A0 GREEK CAPITAL LETTER PI
# 0xd0 -> U+420 CYRILLIC CAPITAL LETTER ER
#
# These still need to come with a cost, so that they only get converted when
# there's evidence that it fixes other things. Any of these could represent
# characters that legitimately appear surrounded by spaces, particularly U+C5
# (Å), which is a word in multiple languages!
#
# We should consider checking for b'\x85' being converted to ... in the future.
# I've seen it once, but the text still wasn't recoverable.
ALTERED_UTF8_RE = re.compile(b'[\xc2\xc3\xc5\xce\xd0][ ]'
b'|[\xe0-\xef][ ][\x80-\xbf]'
b'|[\xe0-\xef][\x80-\xbf][ ]'
b'|[\xf0-\xf4][ ][\x80-\xbf][\x80-\xbf]'
b'|[\xf0-\xf4][\x80-\xbf][ ][\x80-\xbf]'
b'|[\xf0-\xf4][\x80-\xbf][\x80-\xbf][ ]')
# This expression matches UTF-8 and CESU-8 sequences where some of the
# continuation bytes have been lost. The byte 0x1a (sometimes written as ^Z) is
# used within ftfy to represent a byte that produced the replacement character
# \ufffd. We don't know which byte it was, but we can at least decode the UTF-8
# sequence as \ufffd instead of failing to re-decode it at all.
LOSSY_UTF8_RE = re.compile(
b'[\xc2-\xdf][\x1a]'
b'|\xed[\xa0-\xaf][\x1a]\xed[\xb0-\xbf][\x1a\x80-\xbf]'
b'|\xed[\xa0-\xaf][\x1a\x80-\xbf]\xed[\xb0-\xbf][\x1a]'
b'|[\xe0-\xef][\x1a][\x1a\x80-\xbf]'
b'|[\xe0-\xef][\x1a\x80-\xbf][\x1a]'
b'|[\xf0-\xf4][\x1a][\x1a\x80-\xbf][\x1a\x80-\xbf]'
b'|[\xf0-\xf4][\x1a\x80-\xbf][\x1a][\x1a\x80-\xbf]'
b'|[\xf0-\xf4][\x1a\x80-\xbf][\x1a\x80-\xbf][\x1a]'
b'|\x1a'
)
# These regexes match various Unicode variations on single and double quotes.
SINGLE_QUOTE_RE = re.compile('[\u2018-\u201b]')
DOUBLE_QUOTE_RE = re.compile('[\u201c-\u201f]')
def possible_encoding(text, encoding):
"""
Given text and a single-byte encoding, check whether that text could have
been decoded from that single-byte encoding.
In other words, check whether it can be encoded in that encoding, possibly
sloppily.
"""
return bool(ENCODING_REGEXES[encoding].match(text))
CHAR_CLASS_STRING = zlib.decompress(
resource_string(__name__, 'char_classes.dat')
).decode('ascii')
def chars_to_classes(string):
"""
Convert each Unicode character to a letter indicating which of many
classes it's in.
See build_data.py for where this data comes from and what it means.
"""
return string.translate(CHAR_CLASS_STRING)
def _build_control_char_mapping():
"""
Build a translate mapping that strips likely-unintended control characters.
See :func:`ftfy.fixes.remove_control_chars` for a description of these
codepoint ranges and why they should be removed.
"""
control_chars = {}
for i in itertools.chain(
range(0x00, 0x09), [0x0b],
range(0x0e, 0x20), [0x7f],
range(0x206a, 0x2070),
[0xfeff],
range(0xfff9, 0xfffd),
range(0x1d173, 0x1d17b),
range(0xe0000, 0xe0080)
):
control_chars[i] = None
return control_chars
CONTROL_CHARS = _build_control_char_mapping()
# A translate mapping that breaks ligatures made of Latin letters. While
# ligatures may be important to the representation of other languages, in
# Latin letters they tend to represent a copy/paste error.
#
# Ligatures may also be separated by NFKC normalization, but that is sometimes
# more normalization than you want.
LIGATURES = {
ord('IJ'): 'IJ',
ord('ij'): 'ij',
ord(''): 'ff',
ord(''): 'fi',
ord(''): 'fl',
ord(''): 'ffi',
ord(''): 'ffl',
ord(''): 'ſt',
ord(''): 'st'
}
def _build_width_map():
"""
Build a translate mapping that replaces halfwidth and fullwidth forms
with their standard-width forms.
"""
# Though it's not listed as a fullwidth character, we'll want to convert
# U+3000 IDEOGRAPHIC SPACE to U+20 SPACE on the same principle, so start
# with that in the dictionary.
width_map = {0x3000: ' '}
for i in range(0xff01, 0xfff0):
char = unichr(i)
alternate = unicodedata.normalize('NFKC', char)
if alternate != char:
width_map[i] = alternate
return width_map
WIDTH_MAP = _build_width_map()
+122
View File
@@ -0,0 +1,122 @@
"""
A command-line utility for fixing text found in a file.
"""
import sys
import io
import codecs
from ftfy import fix_file, __version__
from ftfy.compatibility import PYTHON2
ENCODE_ERROR_TEXT_UNIX = """ftfy error:
Unfortunately, this output stream does not support Unicode.
Your system locale may be very old or misconfigured. You should use a locale
that supports UTF-8. One way to do this is to `export LANG=C.UTF-8`.
"""
ENCODE_ERROR_TEXT_WINDOWS = """ftfy error:
Unfortunately, this output stream does not support Unicode.
You might be trying to output to the Windows Command Prompt (cmd.exe), which
does not fully support Unicode for historical reasons. In general, we recommend
finding a way to run Python without using cmd.exe.
You can work around this problem by using the '-o filename' option in ftfy to
output to a file instead.
"""
DECODE_ERROR_TEXT = """ftfy error:
This input couldn't be decoded as %r. We got the following error:
%s
ftfy works best when its input is in a known encoding. You can use `ftfy -g`
to guess, if you're desperate. Otherwise, give the encoding name with the
`-e` option, such as `ftfy -e latin-1`.
"""
def main():
"""
Run ftfy as a command-line utility.
"""
import argparse
parser = argparse.ArgumentParser(
description="ftfy (fixes text for you), version %s" % __version__
)
parser.add_argument('filename', default='-', nargs='?',
help='The file whose Unicode is to be fixed. Defaults '
'to -, meaning standard input.')
parser.add_argument('-o', '--output', type=str, default='-',
help='The file to output to. Defaults to -, meaning '
'standard output.')
parser.add_argument('-g', '--guess', action='store_true',
help="Ask ftfy to guess the encoding of your input. "
"This is risky. Overrides -e.")
parser.add_argument('-e', '--encoding', type=str, default='utf-8',
help='The encoding of the input. Defaults to UTF-8.')
parser.add_argument('-n', '--normalization', type=str, default='NFC',
help='The normalization of Unicode to apply. '
'Defaults to NFC. Can be "none".')
parser.add_argument('--preserve-entities', action='store_true',
help="Leave HTML entities as they are. The default "
"is to decode them, as long as no HTML tags "
"have appeared in the file.")
args = parser.parse_args()
encoding = args.encoding
if args.guess:
encoding = None
if args.filename == '-':
# Get a standard input stream made of bytes, so we can decode it as
# whatever encoding is necessary.
if PYTHON2:
file = sys.stdin
else:
file = sys.stdin.buffer
else:
file = open(args.filename, 'rb')
if args.output == '-':
encode_output = PYTHON2
outfile = sys.stdout
else:
encode_output = False
outfile = io.open(args.output, 'w', encoding='utf-8')
normalization = args.normalization
if normalization.lower() == 'none':
normalization = None
if args.preserve_entities:
fix_entities = False
else:
fix_entities = 'auto'
try:
for line in fix_file(file, encoding=encoding,
fix_entities=fix_entities,
normalization=normalization):
if encode_output:
outfile.write(line.encode('utf-8'))
else:
try:
outfile.write(line)
except UnicodeEncodeError:
if sys.platform == 'win32':
sys.stderr.write(ENCODE_ERROR_TEXT_WINDOWS)
else:
sys.stderr.write(ENCODE_ERROR_TEXT_UNIX)
sys.exit(1)
except UnicodeDecodeError as err:
sys.stderr.write(DECODE_ERROR_TEXT % (encoding, err))
sys.exit(1)
if __name__ == '__main__':
main()
@@ -0,0 +1,55 @@
"""
Makes some function names and behavior consistent between Python 2 and
Python 3, and also between narrow and wide builds.
"""
from __future__ import unicode_literals
import sys
import unicodedata
if sys.hexversion >= 0x03000000:
unichr = chr
xrange = range
PYTHON2 = False
else:
unichr = unichr
xrange = xrange
PYTHON2 = True
PYTHON34_OR_LATER = (sys.hexversion >= 0x03040000)
def _narrow_unichr_workaround(codepoint):
"""
A replacement for unichr() on narrow builds of Python. This will get
us the narrow representation of an astral character, which will be
a string of length two, containing two UTF-16 surrogates.
"""
escaped = b'\\U%08x' % codepoint
return escaped.decode('unicode-escape')
if sys.maxunicode < 0x10000:
unichr = _narrow_unichr_workaround
def bytes_to_ints(bytestring):
"""
No matter what version of Python this is, make a sequence of integers from
a bytestring. On Python 3, this is easy, because a 'bytes' object _is_ a
sequence of integers.
"""
if PYTHON2:
return [ord(b) for b in bytestring]
else:
return bytestring
def is_printable(char):
"""
str.isprintable() is new in Python 3. It's useful in `explain_unicode`, so
let's make a crude approximation in Python 2.
"""
if PYTHON2:
return not unicodedata.category(char).startswith('C')
else:
return char.isprintable()
+664
View File
@@ -0,0 +1,664 @@
# -*- coding: utf-8 -*-
"""
This module contains the individual fixes that the main fix_text function
can perform.
"""
from __future__ import unicode_literals
import re
import sys
import codecs
import warnings
from ftfy.chardata import (possible_encoding, CHARMAP_ENCODINGS,
CONTROL_CHARS, LIGATURES, WIDTH_MAP,
PARTIAL_UTF8_PUNCT_RE, ALTERED_UTF8_RE,
LOSSY_UTF8_RE, SINGLE_QUOTE_RE, DOUBLE_QUOTE_RE)
from ftfy.badness import text_cost
from ftfy.compatibility import unichr
from html5lib.constants import entities
BYTES_ERROR_TEXT = """Hey wait, this isn't Unicode.
ftfy is designed to fix problems that were introduced by handling Unicode
incorrectly. It might be able to fix the bytes you just handed it, but the
fact that you just gave a pile of bytes to a function that fixes text means
that your code is *also* handling Unicode incorrectly.
ftfy takes Unicode text as input. You should take these bytes and decode
them from the encoding you think they are in. If you're not sure what encoding
they're in:
- First, try to find out. 'utf-8' is a good assumption.
- If the encoding is simply unknowable, try running your bytes through
ftfy.guess_bytes. As the name implies, this may not always be accurate.
If you're confused by this, please read the Python Unicode HOWTO:
http://docs.python.org/%d/howto/unicode.html
""" % sys.version_info[0]
def fix_encoding(text):
r"""
Fix text with incorrectly-decoded garbage ("mojibake") whenever possible.
This function looks for the evidence of mojibake, formulates a plan to fix
it, and applies the plan. It determines whether it should replace nonsense
sequences of single-byte characters that were really meant to be UTF-8
characters, and if so, turns them into the correctly-encoded Unicode
character that they were meant to represent.
The input to the function must be Unicode. If you don't have Unicode text,
you're not using the right tool to solve your problem.
`fix_encoding` decodes text that looks like it was decoded incorrectly. It
leaves alone text that doesn't.
>>> print(fix_encoding('único'))
único
>>> print(fix_encoding('This text is fine already :þ'))
This text is fine already :þ
Because these characters often come from Microsoft products, we allow
for the possibility that we get not just Unicode characters 128-255, but
also Windows's conflicting idea of what characters 128-160 are.
>>> print(fix_encoding('This — should be an em dash'))
This should be an em dash
We might have to deal with both Windows characters and raw control
characters at the same time, especially when dealing with characters like
0x81 that have no mapping in Windows. This is a string that Python's
standard `.encode` and `.decode` methods cannot correct.
>>> print(fix_encoding('This text is sad .â\x81”.'))
This text is sad ..
However, it has safeguards against fixing sequences of letters and
punctuation that can occur in valid text. In the following example,
the last three characters are not replaced with a Korean character,
even though they could be.
>>> print(fix_encoding('not such a fan of Charlotte Brontë…”'))
not such a fan of Charlotte Brontë
This function can now recover some complex manglings of text, such as when
UTF-8 mojibake has been normalized in a way that replaces U+A0 with a
space:
>>> print(fix_encoding('The more you know 🌠'))
The more you know 🌠
Cases of genuine ambiguity can sometimes be addressed by finding other
characters that are not double-encoded, and expecting the encoding to
be consistent:
>>> print(fix_encoding('AHÅ™, the new sofa from IKEA®'))
AHÅ, the new sofa from IKEA®
Finally, we handle the case where the text is in a single-byte encoding
that was intended as Windows-1252 all along but read as Latin-1:
>>> print(fix_encoding('This text was never UTF-8 at all\x85'))
This text was never UTF-8 at all
The best version of the text is found using
:func:`ftfy.badness.text_cost`.
"""
text, _ = fix_encoding_and_explain(text)
return text
def fix_text_encoding(text):
"""
A deprecated name for :func:`ftfy.fixes.fix_encoding`.
"""
warnings.warn('fix_text_encoding is now known as fix_encoding',
DeprecationWarning)
return fix_encoding(text)
# When we support discovering mojibake in more encodings, we run the risk
# of more false positives. We can mitigate false positives by assigning an
# additional cost to using encodings that are rarer than Windows-1252, so
# that these encodings will only be used if they fix multiple problems.
ENCODING_COSTS = {
'macroman': 2,
'iso-8859-2': 2,
'sloppy-windows-1250': 2,
'sloppy-windows-1251': 3,
'cp437': 3,
}
def fix_encoding_and_explain(text):
"""
Re-decodes text that has been decoded incorrectly, and also return a
"plan" indicating all the steps required to fix it.
The resulting plan could be used with :func:`ftfy.fixes.apply_plan`
to fix additional strings that are broken in the same way.
"""
best_version = text
best_cost = text_cost(text)
best_plan = []
plan_so_far = []
while True:
prevtext = text
text, plan = fix_one_step_and_explain(text)
plan_so_far.extend(plan)
cost = text_cost(text)
for _, _, step_cost in plan_so_far:
cost += step_cost
if cost < best_cost:
best_cost = cost
best_version = text
best_plan = list(plan_so_far)
if text == prevtext:
return best_version, best_plan
def fix_one_step_and_explain(text):
"""
Performs a single step of re-decoding text that's been decoded incorrectly.
Returns the decoded text, plus a "plan" for how to reproduce what it did.
"""
if isinstance(text, bytes):
raise UnicodeError(BYTES_ERROR_TEXT)
if len(text) == 0:
return text, []
# The first plan is to return ASCII text unchanged.
if possible_encoding(text, 'ascii'):
return text, []
# As we go through the next step, remember the possible encodings
# that we encounter but don't successfully fix yet. We may need them
# later.
possible_1byte_encodings = []
# Suppose the text was supposed to be UTF-8, but it was decoded using
# a single-byte encoding instead. When these cases can be fixed, they
# are usually the correct thing to do, so try them next.
for encoding in CHARMAP_ENCODINGS:
if possible_encoding(text, encoding):
encoded_bytes = text.encode(encoding)
encode_step = ('encode', encoding, ENCODING_COSTS.get(encoding, 0))
transcode_steps = []
# Now, find out if it's UTF-8 (or close enough). Otherwise,
# remember the encoding for later.
try:
decoding = 'utf-8'
# Check encoded_bytes for sequences that would be UTF-8,
# except they have b' ' where b'\xa0' would belong.
if ALTERED_UTF8_RE.search(encoded_bytes):
encoded_bytes = restore_byte_a0(encoded_bytes)
cost = encoded_bytes.count(b'\xa0') * 2
transcode_steps.append(('transcode', 'restore_byte_a0', cost))
# Check for the byte 0x1a, which indicates where one of our
# 'sloppy' codecs found a replacement character.
if encoding.startswith('sloppy') and b'\x1a' in encoded_bytes:
encoded_bytes = replace_lossy_sequences(encoded_bytes)
transcode_steps.append(('transcode', 'replace_lossy_sequences', 0))
if b'\xed' in encoded_bytes or b'\xc0' in encoded_bytes:
decoding = 'utf-8-variants'
decode_step = ('decode', decoding, 0)
steps = [encode_step] + transcode_steps + [decode_step]
fixed = encoded_bytes.decode(decoding)
return fixed, steps
except UnicodeDecodeError:
possible_1byte_encodings.append(encoding)
# Look for a-hat-euro sequences that remain, and fix them in isolation.
if PARTIAL_UTF8_PUNCT_RE.search(text):
steps = [('transcode', 'fix_partial_utf8_punct_in_1252', 1)]
fixed = fix_partial_utf8_punct_in_1252(text)
return fixed, steps
# The next most likely case is that this is Latin-1 that was intended to
# be read as Windows-1252, because those two encodings in particular are
# easily confused.
if 'latin-1' in possible_1byte_encodings:
if 'windows-1252' in possible_1byte_encodings:
# This text is in the intersection of Latin-1 and
# Windows-1252, so it's probably legit.
return text, []
else:
# Otherwise, it means we have characters that are in Latin-1 but
# not in Windows-1252. Those are C1 control characters. Nobody
# wants those. Assume they were meant to be Windows-1252. Don't
# use the sloppy codec, because bad Windows-1252 characters are
# a bad sign.
encoded = text.encode('latin-1')
try:
fixed = encoded.decode('windows-1252')
steps = []
if fixed != text:
steps = [('encode', 'latin-1', 0),
('decode', 'windows-1252', 1)]
return fixed, steps
except UnicodeDecodeError:
# This text contained characters that don't even make sense
# if you assume they were supposed to be Windows-1252. In
# that case, let's not assume anything.
pass
# The cases that remain are mixups between two different single-byte
# encodings, and not the common case of Latin-1 vs. Windows-1252.
#
# These cases may be unsolvable without adding false positives, though
# I have vague ideas about how to optionally address them in the future.
# Return the text unchanged; the plan is empty.
return text, []
def apply_plan(text, plan):
"""
Apply a plan for fixing the encoding of text.
The plan is a list of tuples of the form (operation, encoding, cost):
- `operation` is 'encode' if it turns a string into bytes, 'decode' if it
turns bytes into a string, and 'transcode' if it keeps the type the same.
- `encoding` is the name of the encoding to use, such as 'utf-8' or
'latin-1', or the function name in the case of 'transcode'.
- The `cost` does not affect how the plan itself works. It's used by other
users of plans, namely `fix_encoding_and_explain`, which has to decide
*which* plan to use.
"""
obj = text
for operation, encoding, _ in plan:
if operation == 'encode':
obj = obj.encode(encoding)
elif operation == 'decode':
obj = obj.decode(encoding)
elif operation == 'transcode':
if encoding in TRANSCODERS:
obj = TRANSCODERS[encoding](obj)
else:
raise ValueError("Unknown transcode operation: %s" % encoding)
else:
raise ValueError("Unknown plan step: %s" % operation)
return obj
HTML_ENTITY_RE = re.compile(r"&#?\w{0,8};")
def unescape_html(text):
"""
Decode all three types of HTML entities/character references.
Code by Fredrik Lundh of effbot.org. Rob Speer made a slight change
to it for efficiency: it won't match entities longer than 8 characters,
because there are no valid entities like that.
>>> print(unescape_html('&lt;tag&gt;'))
<tag>
"""
def fixup(match):
"""
Replace one matched HTML entity with the character it represents,
if possible.
"""
text = match.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
codept = int(text[3:-1], 16)
else:
codept = int(text[2:-1])
if 0x80 <= codept < 0xa0:
# Decode this range of characters as Windows-1252, as Web
# browsers do in practice.
return unichr(codept).encode('latin-1').decode('sloppy-windows-1252')
else:
return unichr(codept)
except ValueError:
pass
else:
# named entity
try:
text = entities[text[1:]]
except KeyError:
pass
return text # leave as is
return HTML_ENTITY_RE.sub(fixup, text)
ANSI_RE = re.compile('\033\\[((?:\\d|;)*)([a-zA-Z])')
def remove_terminal_escapes(text):
r"""
Strip out "ANSI" terminal escape sequences, such as those that produce
colored text on Unix.
>>> print(remove_terminal_escapes(
... "\033[36;44mI'm blue, da ba dee da ba doo...\033[0m"
... ))
I'm blue, da ba dee da ba doo...
"""
return ANSI_RE.sub('', text)
def uncurl_quotes(text):
r"""
Replace curly quotation marks with straight equivalents.
>>> print(uncurl_quotes('\u201chere\u2019s a test\u201d'))
"here's a test"
"""
return SINGLE_QUOTE_RE.sub("'", DOUBLE_QUOTE_RE.sub('"', text))
def fix_latin_ligatures(text):
"""
Replace single-character ligatures of Latin letters, such as '', with the
characters that they contain, as in 'fi'. Latin ligatures are usually not
intended in text strings (though they're lovely in *rendered* text). If
you have such a ligature in your string, it is probably a result of a
copy-and-paste glitch.
We leave ligatures in other scripts alone to be safe. They may be intended,
and removing them may lose information. If you want to take apart nearly
all ligatures, use NFKC normalization.
>>> print(fix_latin_ligatures("fluffiest"))
fluffiest
"""
return text.translate(LIGATURES)
def fix_character_width(text):
"""
The ASCII characters, katakana, and Hangul characters have alternate
"halfwidth" or "fullwidth" forms that help text line up in a grid.
If you don't need these width properties, you probably want to replace
these characters with their standard form, which is what this function
does.
Note that this replaces the ideographic space, U+3000, with the ASCII
space, U+20.
>>> print(fix_character_width("LOUD NOISES"))
LOUD NOISES
>>> print(fix_character_width("Uターン")) # this means "U-turn"
Uターン
"""
return text.translate(WIDTH_MAP)
def fix_line_breaks(text):
r"""
Convert all line breaks to Unix style.
This will convert the following sequences into the standard \\n
line break:
- CRLF (\\r\\n), used on Windows and in some communication
protocols
- CR (\\r), once used on Mac OS Classic, and now kept alive
by misguided software such as Microsoft Office for Mac
- LINE SEPARATOR (\\u2028) and PARAGRAPH SEPARATOR (\\u2029),
defined by Unicode and used to sow confusion and discord
- NEXT LINE (\\x85), a C1 control character that is certainly
not what you meant
The NEXT LINE character is a bit of an odd case, because it
usually won't show up if `fix_encoding` is also being run.
\\x85 is very common mojibake for \\u2026, HORIZONTAL ELLIPSIS.
>>> print(fix_line_breaks(
... "This string is made of two things:\u2029"
... "1. Unicode\u2028"
... "2. Spite"
... ))
This string is made of two things:
1. Unicode
2. Spite
For further testing and examples, let's define a function to make sure
we can see the control characters in their escaped form:
>>> def eprint(text):
... print(text.encode('unicode-escape').decode('ascii'))
>>> eprint(fix_line_breaks("Content-type: text/plain\r\n\r\nHi."))
Content-type: text/plain\n\nHi.
>>> eprint(fix_line_breaks("This is how Microsoft \r trolls Mac users"))
This is how Microsoft \n trolls Mac users
>>> eprint(fix_line_breaks("What is this \x85 I don't even"))
What is this \n I don't even
"""
return text.replace('\r\n', '\n').replace('\r', '\n')\
.replace('\u2028', '\n').replace('\u2029', '\n')\
.replace('\u0085', '\n')
SURROGATE_RE = re.compile('[\ud800-\udfff]')
SURROGATE_PAIR_RE = re.compile('[\ud800-\udbff][\udc00-\udfff]')
def convert_surrogate_pair(match):
"""
Convert a surrogate pair to the single codepoint it represents.
This implements the formula described at:
http://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates
"""
pair = match.group(0)
codept = 0x10000 + (ord(pair[0]) - 0xd800) * 0x400 + (ord(pair[1]) - 0xdc00)
return unichr(codept)
def fix_surrogates(text):
"""
Replace 16-bit surrogate codepoints with the characters they represent
(when properly paired), or with \ufffd otherwise.
>>> high_surrogate = unichr(0xd83d)
>>> low_surrogate = unichr(0xdca9)
>>> print(fix_surrogates(high_surrogate + low_surrogate))
💩
>>> print(fix_surrogates(low_surrogate + high_surrogate))
The above doctest had to be very carefully written, because even putting
the Unicode escapes of the surrogates in the docstring was causing
various tools to fail, which I think just goes to show why this fixer is
necessary.
"""
if SURROGATE_RE.search(text):
text = SURROGATE_PAIR_RE.sub(convert_surrogate_pair, text)
text = SURROGATE_RE.sub('\ufffd', text)
return text
def remove_control_chars(text):
"""
Remove various control characters that you probably didn't intend to be in
your text. Many of these characters appear in the table of "Characters not
suitable for use with markup" at
http://www.unicode.org/reports/tr20/tr20-9.html.
This includes:
- ASCII control characters, except for the important whitespace characters
(U+00 to U+08, U+0B, U+0E to U+1F, U+7F)
- Deprecated Arabic control characters (U+206A to U+206F)
- Interlinear annotation characters (U+FFF9 to U+FFFB)
- The Object Replacement Character (U+FFFC)
- The byte order mark (U+FEFF)
- Musical notation control characters (U+1D173 to U+1D17A)
- Tag characters (U+E0000 to U+E007F)
However, these similar characters are left alone:
- Control characters that produce whitespace (U+09, U+0A, U+0C, U+0D,
U+2028, and U+2029)
- C1 control characters (U+80 to U+9F) -- even though they are basically
never used intentionally, they are important clues about what mojibake
has happened
- Control characters that affect glyph rendering, such as joiners and
right-to-left marks (U+200C to U+200F, U+202A to U+202E)
"""
return text.translate(CONTROL_CHARS)
def remove_bom(text):
r"""
Remove a byte-order mark that was accidentally decoded as if it were part
of the text.
>>> print(remove_bom("\ufeffWhere do you want to go today?"))
Where do you want to go today?
"""
return text.lstrip(unichr(0xfeff))
# Define a regex to match valid escape sequences in Python string literals.
ESCAPE_SEQUENCE_RE = re.compile(r'''
( \\U........ # 8-digit hex escapes
| \\u.... # 4-digit hex escapes
| \\x.. # 2-digit hex escapes
| \\[0-7]{1,3} # Octal escapes
| \\N\{[^}]+\} # Unicode characters by name
| \\[\\'"abfnrtv] # Single-character escapes
)''', re.UNICODE | re.VERBOSE)
def decode_escapes(text):
r"""
Decode backslashed escape sequences, including \\x, \\u, and \\U character
references, even in the presence of other Unicode.
This is what Python's "string-escape" and "unicode-escape" codecs were
meant to do, but in contrast, this actually works. It will decode the
string exactly the same way that the Python interpreter decodes its string
literals.
>>> factoid = '\\u20a1 is the currency symbol for the colón.'
>>> print(factoid[1:])
u20a1 is the currency symbol for the colón.
>>> print(decode_escapes(factoid))
is the currency symbol for the colón.
Even though Python itself can read string literals with a combination of
escapes and literal Unicode -- you're looking at one right now -- the
"unicode-escape" codec doesn't work on literal Unicode. (See
http://stackoverflow.com/a/24519338/773754 for more details.)
Instead, this function searches for just the parts of a string that
represent escape sequences, and decodes them, leaving the rest alone. All
valid escape sequences are made of ASCII characters, and this allows
"unicode-escape" to work correctly.
This fix cannot be automatically applied by the `ftfy.fix_text` function,
because escaped text is not necessarily a mistake, and there is no way
to distinguish text that's supposed to be escaped from text that isn't.
"""
def decode_match(match):
"Given a regex match, decode the escape sequence it contains."
return codecs.decode(match.group(0), 'unicode-escape')
return ESCAPE_SEQUENCE_RE.sub(decode_match, text)
def restore_byte_a0(byts):
"""
Some mojibake has been additionally altered by a process that said "hmm,
byte A0, that's basically a space!" and replaced it with an ASCII space.
When the A0 is part of a sequence that we intend to decode as UTF-8,
changing byte A0 to 20 would make it fail to decode.
This process finds sequences that would convincingly decode as UTF-8 if
byte 20 were changed to A0, and puts back the A0. For the purpose of
deciding whether this is a good idea, this step gets a cost of twice
the number of bytes that are changed.
This is used as a step within `fix_encoding`.
"""
def replacement(match):
"The function to apply when this regex matches."
return match.group(0).replace(b'\x20', b'\xa0')
return ALTERED_UTF8_RE.sub(replacement, byts)
def replace_lossy_sequences(byts):
"""
This function identifies sequences where information has been lost in
a "sloppy" codec, indicated by byte 1A, and if they would otherwise look
like a UTF-8 sequence, it replaces them with the UTF-8 sequence for U+FFFD.
A further explanation:
ftfy can now fix text in a few cases that it would previously fix
incompletely, because of the fact that it can't successfully apply the fix
to the entire string. A very common case of this is when characters have
been erroneously decoded as windows-1252, but instead of the "sloppy"
windows-1252 that passes through unassigned bytes, the unassigned bytes get
turned into U+FFFD (), so we can't tell what they were.
This most commonly happens with curly quotation marks that appear
``✠like this â``.
We can do better by building on ftfy's "sloppy codecs" to let them handle
less-sloppy but more-lossy text. When they encounter the character ````,
instead of refusing to encode it, they encode it as byte 1A -- an
ASCII control code called SUBSTITUTE that once was meant for about the same
purpose. We can then apply a fixer that looks for UTF-8 sequences where
some continuation bytes have been replaced by byte 1A, and decode the whole
sequence as ; if that doesn't work, it'll just turn the byte back into
itself.
As a result, the above text ``✠like this â`` will decode as
`` like this ``.
If U+1A was actually in the original string, then the sloppy codecs will
not be used, and this function will not be run, so your weird control
character will be left alone but wacky fixes like this won't be possible.
This is used as a step within `fix_encoding`.
"""
return LOSSY_UTF8_RE.sub('\ufffd'.encode('utf-8'), byts)
def fix_partial_utf8_punct_in_1252(text):
"""
Fix particular characters that seem to be found in the wild encoded in
UTF-8 and decoded in Latin-1 or Windows-1252, even when this fix can't be
consistently applied.
For this function, we assume the text has been decoded in Windows-1252.
If it was decoded in Latin-1, we'll call this right after it goes through
the Latin-1-to-Windows-1252 fixer.
This is used as a step within `fix_encoding`.
"""
def replacement(match):
"The function to apply when this regex matches."
return match.group(0).encode('sloppy-windows-1252').decode('utf-8')
return PARTIAL_UTF8_PUNCT_RE.sub(replacement, text)
TRANSCODERS = {
'restore_byte_a0': restore_byte_a0,
'replace_lossy_sequences': replace_lossy_sequences,
'fix_partial_utf8_punct_in_1252': fix_partial_utf8_punct_in_1252
}
@@ -0,0 +1,154 @@
# coding: utf-8
"""
This module provides functions for justifying Unicode text in a monospaced
display such as a terminal.
We used to have our own implementation here, but now we mostly rely on
the 'wcwidth' library.
"""
from __future__ import unicode_literals, division
from unicodedata import normalize
from wcwidth import wcwidth, wcswidth
def character_width(char):
r"""
Determine the width that a character is likely to be displayed as in
a monospaced terminal. The width for a printable character will
always be 0, 1, or 2.
Nonprintable or control characters will return -1, a convention that comes
from wcwidth.
>>> character_width('')
2
>>> character_width('A')
1
>>> character_width('\N{ZERO WIDTH JOINER}')
0
>>> character_width('\n')
-1
"""
return wcwidth(char)
def monospaced_width(text):
"""
Return the number of character cells that this string is likely to occupy
when displayed in a monospaced, modern, Unicode-aware terminal emulator.
We refer to this as the "display width" of the string.
This can be useful for formatting text that may contain non-spacing
characters, or CJK characters that take up two character cells.
Returns -1 if the string contains a non-printable or control character.
>>> monospaced_width('ちゃぶ台返し')
12
>>> len('ちゃぶ台返し')
6
>>> monospaced_width('owl\N{SOFT HYPHEN}flavored')
12
>>> monospaced_width('example\x80')
-1
# The Korean word 'ibnida' can be written with 3 characters or 7 jamo.
# Either way, it *looks* the same and takes up 6 character cells.
>>> monospaced_width('입니다')
6
>>> monospaced_width('\u110b\u1175\u11b8\u1102\u1175\u1103\u1161')
6
"""
# NFC-normalize the text first, so that we don't need special cases for
# Hangul jamo.
return wcswidth(normalize('NFC', text))
def display_ljust(text, width, fillchar=' '):
"""
Return `text` left-justified in a Unicode string whose display width,
in a monospaced terminal, should be at least `width` character cells.
The rest of the string will be padded with `fillchar`, which must be
a width-1 character.
"Left" here means toward the beginning of the string, which may actually
appear on the right in an RTL context. This is similar to the use of the
word "left" in "left parenthesis".
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
>>> for line in lines:
... print(display_ljust(line, 20, ''))
Table flip
(°°)
ちゃぶ台返し
This example, and the similar ones that follow, should come out justified
correctly when viewed in a monospaced terminal. It will probably not look
correct if you're viewing this code or documentation in a Web browser.
"""
if character_width(fillchar) != 1:
raise ValueError("The padding character must have display width 1")
text_width = monospaced_width(text)
if text_width == -1:
# There's a control character here, so just don't add padding
return text
padding = max(0, width - text_width)
return text + fillchar * padding
def display_rjust(text, width, fillchar=' '):
"""
Return `text` right-justified in a Unicode string whose display width,
in a monospaced terminal, should be at least `width` character cells.
The rest of the string will be padded with `fillchar`, which must be
a width-1 character.
"Right" here means toward the end of the string, which may actually be on
the left in an RTL context. This is similar to the use of the word "right"
in "right parenthesis".
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
>>> for line in lines:
... print(display_rjust(line, 20, ''))
Table flip
(°°)
ちゃぶ台返し
"""
if character_width(fillchar) != 1:
raise ValueError("The padding character must have display width 1")
text_width = monospaced_width(text)
if text_width == -1:
return text
padding = max(0, width - text_width)
return fillchar * padding + text
def display_center(text, width, fillchar=' '):
"""
Return `text` centered in a Unicode string whose display width, in a
monospaced terminal, should be at least `width` character cells. The rest
of the string will be padded with `fillchar`, which must be a width-1
character.
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
>>> for line in lines:
... print(display_center(line, 20, ''))
Table flip
(°°)
ちゃぶ台返し
"""
if character_width(fillchar) != 1:
raise ValueError("The padding character must have display width 1")
text_width = monospaced_width(text)
if text_width == -1:
return text
padding = max(0, width - text_width)
left_padding = padding // 2
right_padding = padding - left_padding
return fillchar * left_padding + text + fillchar * right_padding
@@ -0,0 +1,47 @@
# coding: utf-8
"""
This file defines a general method for evaluating ftfy using data that arrives
in a stream. A concrete implementation of it is found in `twitter_tester.py`.
"""
from __future__ import print_function, unicode_literals
from ftfy import fix_text
from ftfy.fixes import fix_encoding, unescape_html
from ftfy.chardata import possible_encoding
class StreamTester:
"""
Take in a sequence of texts, and show the ones that will be changed by
ftfy. This will also periodically show updates, such as the proportion of
texts that changed.
"""
def __init__(self):
self.num_fixed = 0
self.count = 0
def check_ftfy(self, text, encoding_only=True):
"""
Given a single text input, check whether `ftfy.fix_text_encoding`
would change it. If so, display the change.
"""
self.count += 1
text = unescape_html(text)
if not possible_encoding(text, 'ascii'):
if encoding_only:
fixed = fix_encoding(text)
else:
fixed = fix_text(text, uncurl_quotes=False, fix_character_width=False)
if text != fixed:
# possibly filter common bots before printing
print('\nText:\t{text!r}\nFixed:\t{fixed!r}\n'.format(
text=text, fixed=fixed
))
self.num_fixed += 1
elif 'â€' in text or '\x80' in text:
print('\nNot fixed:\t{text!r}'.format(text=text))
# Print status updates once in a while
if self.count % 100 == 0:
print('.', end='', flush=True)
if self.count % 10000 == 0:
print('\n%d/%d fixed' % (self.num_fixed, self.count))
@@ -0,0 +1,72 @@
# coding: utf-8
"""
Do what is necessary to authenticate this tester as a Twitter "app", using
somebody's Twitter account.
"""
from __future__ import unicode_literals
import os
AUTH_TOKEN_PATH = os.path.expanduser('~/.cache/oauth/twitter_ftfy.auth')
def get_auth():
"""
Twitter has some bizarre requirements about how to authorize an "app" to
use its API.
The user of the app has to log in to get a secret token. That's fine. But
the app itself has its own "consumer secret" token. The app has to know it,
and the user of the app has to not know it.
This is, of course, impossible. It's equivalent to DRM. Your computer can't
*really* make use of secret information while hiding the same information
from you.
The threat appears to be that, if you have this super-sekrit token, you can
impersonate the app while doing something different. Well, of course you
can do that, because you *have the source code* and you can change it to do
what you want. You still have to log in as a particular user who has a
token that's actually secret, you know.
Even developers of closed-source applications that use the Twitter API are
unsure what to do, for good reason. These "secrets" are not secret in any
cryptographic sense. A bit of Googling shows that the secret tokens for
every popular Twitter app are already posted on the Web.
Twitter wants us to pretend this string can be kept secret, and hide this
secret behind a fig leaf like everybody else does. So that's what we've
done.
"""
from twitter.oauth import OAuth
from twitter import oauth_dance, read_token_file
def unhide(secret):
"""
Do something mysterious and exactly as secure as every other Twitter
app.
"""
return ''.join([chr(ord(c) - 0x2800) for c in secret])
fig_leaf = '⠴⡹⠹⡩⠶⠴⡶⡅⡂⡩⡅⠳⡏⡉⡈⠰⠰⡹⡥⡶⡈⡐⡍⡂⡫⡍⡗⡬⡒⡧⡶⡣⡰⡄⡧⡸⡑⡣⠵⡓⠶⠴⡁'
consumer_key = 'OFhyNd2Zt4Ba6gJGJXfbsw'
if os.path.exists(AUTH_TOKEN_PATH):
token, token_secret = read_token_file(AUTH_TOKEN_PATH)
else:
authdir = os.path.dirname(AUTH_TOKEN_PATH)
if not os.path.exists(authdir):
os.makedirs(authdir)
token, token_secret = oauth_dance(
app_name='ftfy-tester',
consumer_key=consumer_key,
consumer_secret=unhide(fig_leaf),
token_filename=AUTH_TOKEN_PATH
)
return OAuth(
token=token,
token_secret=token_secret,
consumer_key=consumer_key,
consumer_secret=unhide(fig_leaf)
)
@@ -0,0 +1,88 @@
"""
Implements a StreamTester that runs over Twitter data. See the class
docstring.
This module is written for Python 3 only. The __future__ imports you see here
are just to let Python 2 scan the file without crashing with a SyntaxError.
"""
from __future__ import print_function, unicode_literals
import os
from collections import defaultdict
from ftfy.streamtester import StreamTester
class TwitterTester(StreamTester):
"""
This class uses the StreamTester code (defined in `__init__.py`) to
evaluate ftfy's real-world performance, by feeding it live data from
Twitter.
This is a semi-manual evaluation. It requires a human to look at the
results and determine if they are good. The three possible cases we
can see here are:
- Success: the process takes in mojibake and outputs correct text.
- False positive: the process takes in correct text, and outputs
mojibake. Every false positive should be considered a bug, and
reported on GitHub if it isn't already.
- Confusion: the process takes in mojibake and outputs different
mojibake. Not a great outcome, but not as dire as a false
positive.
This tester cannot reveal false negatives. So far, that can only be
done by the unit tests.
"""
OUTPUT_DIR = './twitterlogs'
def __init__(self):
self.lines_by_lang = defaultdict(list)
super().__init__()
def save_files(self):
"""
When processing data from live Twitter, save it to log files so that
it can be replayed later.
"""
if not os.path.exists(self.OUTPUT_DIR):
os.makedirs(self.OUTPUT_DIR)
for lang, lines in self.lines_by_lang.items():
filename = 'tweets.{}.txt'.format(lang)
fullname = os.path.join(self.OUTPUT_DIR, filename)
langfile = open(fullname, 'a', encoding='utf-8')
for line in lines:
print(line.replace('\n', ' '), file=langfile)
langfile.close()
self.lines_by_lang = defaultdict(list)
def run_sample(self):
"""
Listen to live data from Twitter, and pass on the fully-formed tweets
to `check_ftfy`. This requires the `twitter` Python package as a
dependency.
"""
from twitter import TwitterStream
from ftfy.streamtester.oauth import get_auth
twitter_stream = TwitterStream(auth=get_auth())
iterator = twitter_stream.statuses.sample()
for tweet in iterator:
if 'text' in tweet:
self.check_ftfy(tweet['text'])
if 'user' in tweet:
lang = tweet['user'].get('lang', 'NONE')
self.lines_by_lang[lang].append(tweet['text'])
if self.count % 10000 == 100:
self.save_files()
def main():
"""
When run from the command line, this script connects to the Twitter stream
and runs the TwitterTester on it forever. Or at least until the stream
drops.
"""
tester = TwitterTester()
tester.run_sample()
if __name__ == '__main__':
main()
@@ -33,8 +33,6 @@ def guess_filename(filename, options):
if not options.get('yaml') and not options.get('json') and not options.get('show_property'):
print('For:', filename)
options['implicit'] = True # Force implicit option in CLI
guess = api.guessit(filename, options)
if options.get('show_property'):
@@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '2.1.3.dev0'
__version__ = '3.0.0.dev0'
+2 -1
View File
@@ -126,7 +126,8 @@ class GuessItApi(object):
for match in matches:
if isinstance(match.value, six.text_type):
match.value = match.value.encode("ascii")
return matches.to_dict(options.get('advanced', False), options.get('implicit', False))
return matches.to_dict(options.get('advanced', False), options.get('single_value', False),
options.get('enforce_list', False))
except:
raise GuessitException(string, options)
@@ -54,6 +54,10 @@ def build_argument_parser():
help='Display the value of a single property (title, series, video_codec, year, ...)')
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=None,
help='Display advanced information for filename guesses, as json output')
output_opts.add_argument('-s', '--single-value', dest='single_value', action='store_true', default=None,
help='Keep only first value found for each property')
output_opts.add_argument('-l', '--enforce-list', dest='enforce_list', action='store_true', default=None,
help='Wrap each found value in a list even when property has a single value')
output_opts.add_argument('-j', '--json', dest='json', action='store_true', default=None,
help='Display information for filename guesses as json output')
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=None,
@@ -39,12 +39,12 @@ def audio_codec():
rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority)
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
rebulk.regex("Dolby", "DolbyDigital", "Dolby-Digital", "DDP?", value="DolbyDigital")
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='AC3')
rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
rebulk.regex("AAC", value="AAC")
rebulk.regex("AC3D?", value="AC3")
rebulk.regex("Flac", value="FLAC")
rebulk.regex("DTS", value="DTS")
rebulk.string("AAC", value="AAC")
rebulk.string('EAC3', 'DDP', 'DD+', value="EAC3")
rebulk.string("Flac", value="FLAC")
rebulk.string("DTS", value="DTS")
rebulk.regex("True-?HD", value="TrueHD")
rebulk.defaults(name="audio_profile")
@@ -34,15 +34,17 @@ def container():
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
'iso', 'vob']
torrent = ['torrent']
nzb = ['nzb']
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
rebulk.defaults(name='container',
validator=seps_surround,
formatter=lambda s: s.upper(),
formatter=lambda s: s.lower(),
conflict_solver=lambda match, other: match
if other.name in ['format',
'video_codec'] or other.name == 'container' and 'extension' in other.tags
@@ -51,5 +53,6 @@ def container():
rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle'])
rebulk.string(*videos, tags=['video'])
rebulk.string(*torrent, tags=['torrent'])
rebulk.string(*nzb, tags=['nzb'])
return rebulk
@@ -24,12 +24,18 @@ def edition():
conflict_solver=lambda match, other: other
if other.name == 'episode_details' and other.value == 'Special'
else '__default__')
rebulk.string('SE', value='Special Edition', tags='has-neighbor')
rebulk.string('se', value='Special Edition', tags='has-neighbor')
rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition')
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition')
rebulk.regex('limited', 'limited-edition', value='Limited Edition')
rebulk.regex('limited', 'limited-edition', value='Limited Edition', tags=['has-neighbor', 'release-group-prefix'])
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical Edition')
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
value="Director's cut")
value="Director's Cut")
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
value='Extended', tags=['has-neighbor', 'release-group-prefix'])
rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
return rebulk
@@ -5,7 +5,7 @@ Episode title
"""
from collections import defaultdict
from rebulk import Rebulk, Rule, AppendMatch, RenameMatch, POST_PROCESS
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PROCESS
from ..common import seps, title_seps
from ..common.formatters import cleanup
@@ -19,8 +19,12 @@ def episode_title():
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().rules(EpisodeTitleFromPosition,
AlternativeTitleReplace,
previous_names = ('episode', 'episode_details', 'episode_count',
'season', 'season_count', 'date', 'title', 'year')
rebulk = Rebulk().rules(RemoveConflictsWithEpisodeTitle(previous_names),
EpisodeTitleFromPosition(previous_names),
AlternativeTitleReplace(previous_names),
TitleToEpisodeTitle,
Filepart3EpisodeTitle,
Filepart2EpisodeTitle,
@@ -28,6 +32,62 @@ def episode_title():
return rebulk
class RemoveConflictsWithEpisodeTitle(Rule):
"""
Remove conflicting matches that might lead to wrong episode_title parsing.
"""
priority = 64
consequence = RemoveMatch
def __init__(self, previous_names):
super(RemoveConflictsWithEpisodeTitle, self).__init__()
self.previous_names = previous_names
self.next_names = ('streaming_service', 'screen_size', 'format',
'video_codec', 'audio_codec', 'other', 'container')
self.affected_if_holes_after = ('part', )
self.affected_names = ('part', 'year')
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end,
predicate=lambda m: m.name in self.affected_names):
before = matches.previous(match, index=0,
predicate=lambda m, fp=filepart: not m.private and m.start >= fp.start)
if not before or before.name not in self.previous_names:
continue
after = matches.next(match, index=0,
predicate=lambda m, fp=filepart: not m.private and m.end <= fp.end)
if not after or after.name not in self.next_names:
continue
group = matches.markers.at_match(match, predicate=lambda m: m.name == 'group', index=0)
def has_value_in_same_group(current_match, current_group=group):
"""Return true if current match has value and belongs to the current group."""
return current_match.value.strip(seps) and (
current_group == matches.markers.at_match(current_match,
predicate=lambda mm: mm.name == 'group', index=0)
)
holes_before = matches.holes(before.end, match.start, predicate=has_value_in_same_group)
holes_after = matches.holes(match.end, after.start, predicate=has_value_in_same_group)
if not holes_before and not holes_after:
continue
if match.name in self.affected_if_holes_after and not holes_after:
continue
to_remove.append(match)
if match.parent:
to_remove.append(match.parent)
return to_remove
class TitleToEpisodeTitle(Rule):
"""
If multiple different title are found, convert the one following episode number to episode_title.
@@ -65,12 +125,14 @@ class EpisodeTitleFromPosition(TitleBaseRule):
"""
dependency = TitleToEpisodeTitle
def __init__(self, previous_names):
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
self.previous_names = previous_names
def hole_filter(self, hole, matches):
episode = matches.previous(hole,
lambda previous: any(name in previous.names
for name in ['episode', 'episode_details',
'episode_count', 'season', 'season_count',
'date', 'title', 'year']),
for name in self.previous_names),
0)
crc32 = matches.named('crc32')
@@ -88,9 +150,6 @@ class EpisodeTitleFromPosition(TitleBaseRule):
return False
return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
def __init__(self):
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
def when(self, matches, context):
if matches.named('episode_title'):
return
@@ -104,6 +163,10 @@ class AlternativeTitleReplace(Rule):
dependency = EpisodeTitleFromPosition
consequence = RenameMatch
def __init__(self, previous_names):
super(AlternativeTitleReplace, self).__init__()
self.previous_names = previous_names
def when(self, matches, context):
if matches.named('episode_title'):
return
@@ -115,10 +178,7 @@ class AlternativeTitleReplace(Rule):
if main_title:
episode = matches.previous(main_title,
lambda previous: any(name in previous.names
for name in ['episode', 'episode_details',
'episode_count', 'season',
'season_count',
'date', 'title', 'year']),
for name in self.previous_names),
0)
crc32 = matches.named('crc32')
@@ -98,7 +98,7 @@ def episodes():
episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
"""
values = match.children.to_dict(implicit=True)
values = match.children.to_dict()
if 'season' in values and is_iterable(values['season']):
# Season numbers must be in natural order to be validated.
if not list(sorted(values['season'])) == values['season']:
@@ -231,14 +231,16 @@ def episodes():
formatter={'season': int, 'other': lambda match: 'Complete'})
# 12, 13
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: context.get('type') == 'movie') \
.defaults(validator=None) \
.regex(r'(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
# 012, 013
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: context.get('type') == 'movie') \
.defaults(validator=None) \
.regex(r'0(?P<episode>\d{1,2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
@@ -246,7 +248,8 @@ def episodes():
# 112, 113
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: not context.get('episode_prefer_number', False)) \
disabled=lambda context: (not context.get('episode_prefer_number', False) or
context.get('type') == 'movie')) \
.defaults(validator=None) \
.regex(r'(?P<episode>\d{3,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
@@ -287,7 +290,8 @@ def episodes():
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode', 'weak-duplicate'],
formatter={'season': int, 'episode': int, 'version': int},
conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
disabled=lambda context: context.get('episode_prefer_number', False)) \
disabled=lambda context: (context.get('episode_prefer_number', False) or
context.get('type') == 'movie')) \
.defaults(validator=None) \
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
@@ -460,8 +464,21 @@ class RemoveWeakIfMovie(Rule):
return context.get('type') != 'episode'
def when(self, matches, context):
if matches.named('year'):
return matches.tagged('weak-movie')
to_remove = []
to_ignore = set()
remove = False
for filepart in matches.markers.named('path'):
year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
if year:
remove = True
next_match = matches.next(year, predicate=lambda m, fp=filepart: m.private and m.end <= fp.end, index=0)
if next_match and not matches.at_match(next_match, predicate=lambda m: m.name == 'year'):
to_ignore.add(next_match.initiator)
if remove:
to_remove.extend(matches.tagged('weak-movie', predicate=lambda m: m.initiator not in to_ignore))
return to_remove
class RemoveWeakIfSxxExx(Rule):
@@ -39,8 +39,7 @@ COMMON_WORDS_STRICT = frozenset(['brazil'])
UNDETERMINED = babelfish.Language('und')
SYN = {('und', None): ['unknown', 'inconnu', 'unk'],
('ell', None): ['gr', 'greek'],
SYN = {('ell', None): ['gr', 'greek'],
('spa', None): ['esp', 'español', 'espanol'],
('fra', None): ['français', 'vf', 'vff', 'vfi', 'vfq'],
('swe', None): ['se'],

Some files were not shown because too many files have changed in this diff Show More