Compare commits
606 Commits
2.0.25.1635
...
wiki
| Author | SHA1 | Date | |
|---|---|---|---|
| 5a05c0f858 | |||
| cebe92bd8f | |||
| 6f8cfc7914 | |||
| e7e98b83d2 | |||
| 4b72bb9d28 | |||
| 221068874b | |||
| 6028d8b2f1 | |||
| ddaafe9310 | |||
| 139e38731a | |||
| d25056cb35 | |||
| 5c80a7091b | |||
| 5faf190202 | |||
| 169b114ff6 | |||
| bc67326573 | |||
| a32543533d | |||
| 6b6e40ef96 | |||
| 8127b7ecf0 | |||
| 09425ccbe0 | |||
| 61fbc4e3b5 | |||
| 158e4f85da | |||
| 8b1107d2e1 | |||
| 59ffa9084f | |||
| 19df673c50 | |||
| 5f20894413 | |||
| 7349874804 | |||
| fda5dc7e89 | |||
| d60b45a667 | |||
| ab2e69a76e | |||
| 6a836338a5 | |||
| 5a02365605 | |||
| 26b38c4f64 | |||
| 9b7edf2960 | |||
| 7050f64fae | |||
| 4623a989d8 | |||
| 87b942bd6d | |||
| 87ee5cc627 | |||
| bff8fe8b70 | |||
| 1495882dc7 | |||
| 2e50d84f2a | |||
| d32716f4c5 | |||
| 876aa4eda0 | |||
| 3673aee8e9 | |||
| a758191ee0 | |||
| 99410249c7 | |||
| a705f2ad30 | |||
| 33223dedc1 | |||
| bd8e8ef346 | |||
| c75e7bf656 | |||
| cb4117376a | |||
| 0d37920aad | |||
| 0da6e76200 | |||
| 5f5934a6ee | |||
| 85b7a2f4f5 | |||
| 3dcfd30a04 | |||
| b5a0f65783 | |||
| 3862e6f3a4 | |||
| 1d4e2ec50b | |||
| 8b85485510 | |||
| 722ce3ac8b | |||
| 1e132f2808 | |||
| d007e0a172 | |||
| 3ddd722cc1 | |||
| 82d8189966 | |||
| 2d533eb004 | |||
| f9c899701f | |||
| e9f62fbb09 | |||
| 5b2f09318a | |||
| 8c260c43a8 | |||
| eee793302c | |||
| 0d1fdf6e60 | |||
| 64398d8f30 | |||
| cab736b573 | |||
| 93071dd81e | |||
| e8fcb8f91a | |||
| 33cacfe884 | |||
| f624f7f05a | |||
| 624195d870 | |||
| ab2ef66263 | |||
| 4ea0372212 | |||
| ff31912e8a | |||
| dcefed2e4c | |||
| 55bbc4f585 | |||
| 0f2bb99b39 | |||
| 85342eeed3 | |||
| 374a6a668a | |||
| e3be3195ee | |||
| 503279f3c2 | |||
| f8bb54024c | |||
| 6e53fc606a | |||
| ab810c48af | |||
| 13bb9183af | |||
| 2c5b6ea690 | |||
| a8efa2e266 | |||
| e73eb2fd86 | |||
| d38fa26e13 | |||
| 716f4493e8 | |||
| 3220974a4a | |||
| 6732272047 | |||
| 547f038139 | |||
| 3b0ee60eaa | |||
| a869281de7 | |||
| a4ed77c7bb | |||
| 81718e64d3 | |||
| dee0daf8aa | |||
| 8e599fb22a | |||
| acb5589af1 | |||
| 6db2771cd6 | |||
| 06d4e0a19a | |||
| 3b18c6c14f | |||
| 300359acf2 | |||
| 5456d0200a | |||
| 9890f66443 | |||
| aba863bc84 | |||
| ade416f5c8 | |||
| 7097267f7c | |||
| b0d8d1a86d | |||
| 2c8296ba85 | |||
| 4dd17de146 | |||
| 3a281b0b57 | |||
| 04ed625f1a | |||
| 1cddfb1b2d | |||
| 796b64d83e | |||
| 240a3687d7 | |||
| 9ed4764ab2 | |||
| f253a13297 | |||
| 744cd57dd5 | |||
| e2a5647363 | |||
| a1f324c105 | |||
| 767e0f8ac7 | |||
| 0c0ad02234 | |||
| c09973ec56 | |||
| 03a72e1917 | |||
| f9e0eaaf83 | |||
| 985f75f7da | |||
| 171cbd6c53 | |||
| 9875bc5c5b | |||
| 882509f891 | |||
| 3396502334 | |||
| b7fb99c3d4 | |||
| c82307a710 | |||
| 309a99d183 | |||
| 09a6ef0194 | |||
| 43afcb4239 | |||
| 7a78f33ac3 | |||
| d5fb538630 | |||
| a22cdf5d5b | |||
| fe0636bbbf | |||
| 13859cfbd7 | |||
| 0adadc59ac | |||
| d65ba19c6c | |||
| 5cedbd2fa0 | |||
| 735fb09762 | |||
| 79d61419b0 | |||
| 248b93e5c6 | |||
| d8eff1adb5 | |||
| c911620254 | |||
| c68a32b889 | |||
| 788819a900 | |||
| 27c94af980 | |||
| 81122665a0 | |||
| 1856e687eb | |||
| 6055793d46 | |||
| 99b670ff10 | |||
| 7a09218cc0 | |||
| a34d0523b5 | |||
| f06e900bab | |||
| 7da15a2d44 | |||
| e999cc53d0 | |||
| b7d4bd00a5 | |||
| 8c2aa849d7 | |||
| 01a759fff8 | |||
| cb0008b59e | |||
| 9cd825aff1 | |||
| 8ad52d2979 | |||
| efd6143498 | |||
| 157fae5f83 | |||
| 6d63301b63 | |||
| 9801c8c6b3 | |||
| e04f4c0bd0 | |||
| b501578584 | |||
| 308f429c91 | |||
| 1d45172475 | |||
| 085a4f30db | |||
| 7a600dc2b6 | |||
| c0c2891d8d | |||
| 06b269a2ba | |||
| f3a4db0d87 | |||
| bcd99d18c4 | |||
| c05c400c6f | |||
| 0f081d8d7b | |||
| 833dc5e3ae | |||
| 0be3df435b | |||
| f4446af57e | |||
| 253aa664a8 | |||
| 0df037a295 | |||
| ed49d743f9 | |||
| 203cc392c0 | |||
| 52ba5a7f24 | |||
| 8aa0576bbc | |||
| 5ce9cc79c8 | |||
| 1a596dfdea | |||
| aeecb3ff59 | |||
| 85c8d2d558 | |||
| 2cf4e7ac59 | |||
| e7412a91f9 | |||
| 9888d03982 | |||
| 765cc39553 | |||
| 6e58c2f984 | |||
| 295542ff18 | |||
| 9d72d9c647 | |||
| 853897ec3e | |||
| 9cf8ad7399 | |||
| fdf974c5e3 | |||
| 2920dbfe8d | |||
| 77d05f7697 | |||
| 3ffeaeffb6 | |||
| db2755675c | |||
| 7ca090f73c | |||
| bb251ad29e | |||
| 75d770e019 | |||
| 49bf116c18 | |||
| b7d227fe0f | |||
| 83f59935f2 | |||
| 37b794fa14 | |||
| 1f5c45df91 | |||
| 62e3020234 | |||
| 895d457500 | |||
| 586269efd3 | |||
| 576718fc03 | |||
| 648dd4147a | |||
| c4df743c3e | |||
| b98fead37e | |||
| 6522094164 | |||
| fcd3dfe75c | |||
| ec9a798590 | |||
| 5825443d4d | |||
| 9768b3fadd | |||
| 77a72d6663 | |||
| 08d647c024 | |||
| a77ef040be | |||
| 13e581b953 | |||
| 1cc18617c5 | |||
| 2642f65614 | |||
| 4abb2aacf9 | |||
| 904daaf2b3 | |||
| 3044f2b1fb | |||
| 826accb2d1 | |||
| d5cb35ed95 | |||
| 24c7e4be8c | |||
| abbd7283b2 | |||
| 2980aa08d7 | |||
| e2344abbc4 | |||
| 80097c3500 | |||
| 714f36caee | |||
| fb1860d78b | |||
| ce7acd278e | |||
| b8d9899796 | |||
| e58fa1964d | |||
| 1627dee77e | |||
| bbac0c033f | |||
| 6437e1dbad | |||
| 48a9e998ff | |||
| 6b6ca461f0 | |||
| 7960952a30 | |||
| 5ec64efb75 | |||
| 2440b2eae4 | |||
| 54db2857c9 | |||
| 5b8f0b7361 | |||
| 053ebe3963 | |||
| 661b0367f5 | |||
| 01da0697a0 | |||
| a3d3b670ae | |||
| 5c64a332f8 | |||
| 6fcd9b645a | |||
| 78da16654a | |||
| da20d4882b | |||
| 1f31c38d24 | |||
| 5f2fd9733b | |||
| 8a225b4e09 | |||
| af05b41937 | |||
| d618da457e | |||
| d16bdad782 | |||
| f6d33e73a0 | |||
| 7b48e445f5 | |||
| 2390f904bd | |||
| 3bee3631a3 | |||
| 9da0b2d3c1 | |||
| 7a092e4585 | |||
| 196fb6b4f6 | |||
| 9507002961 | |||
| 943ed38c2f | |||
| 496619b492 | |||
| 4772b42d64 | |||
| 5bc10953cc | |||
| 18deca202d | |||
| 84bc4b018d | |||
| 1a0598a47a | |||
| 973d117887 | |||
| c284c8f336 | |||
| df69cbc84c | |||
| 646453887f | |||
| 189d617005 | |||
| 554cd8bfe7 | |||
| 79505dea20 | |||
| 5358a46b7e | |||
| aff1599ce7 | |||
| bc7df1c8a1 | |||
| f1df1d25a8 | |||
| 47d9b472ed | |||
| 89ab8c34d8 | |||
| 600498f9c1 | |||
| 845fbcd2ac | |||
| 3cc9f19b8f | |||
| e68c642005 | |||
| 81ae950577 | |||
| 62b4496cd6 | |||
| 29b7292d15 | |||
| 791058a2d2 | |||
| b6c108faef | |||
| 72d592866a | |||
| 4052993246 | |||
| a24f6e7789 | |||
| 0d0fd49924 | |||
| 139dcb409e | |||
| 707e6e7d13 | |||
| 36abb29ddd | |||
| a700fe761e | |||
| 7577164471 | |||
| 1bce743ea3 | |||
| f85ab0364a | |||
| eb3a0d52fd | |||
| b8cd295a12 | |||
| d3ff49ee0c | |||
| d4833f1e6e | |||
| 548483ed2f | |||
| f6f39b97c8 | |||
| 21ea5e0df9 | |||
| 3cbab6a5c7 | |||
| f19f39ba16 | |||
| b9c0fd9a1c | |||
| ce520e6944 | |||
| 0ad62a95e2 | |||
| 8f62a69e06 | |||
| 34bbb98f7f | |||
| 26cd6bb955 | |||
| 97534c633d | |||
| 0a9a2963c2 | |||
| 05afc39a35 | |||
| 84fdc1f55f | |||
| 3b03c3c2bb | |||
| 980f62686d | |||
| 202f2532a6 | |||
| 78d193a2fd | |||
| 0c109b0f27 | |||
| e33c0ab86c | |||
| 3a0189069d | |||
| 2688bd9edd | |||
| 889f7bd2d7 | |||
| 0561c2d640 | |||
| b76f1ad004 | |||
| cde6153f64 | |||
| 12bdaa510b | |||
| 0e6a4acf80 | |||
| e7785f7094 | |||
| 2dcf39eff8 | |||
| 1125c5c133 | |||
| faf7cedfe2 | |||
| 52a6127625 | |||
| b552f6f9fa | |||
| 9b558fcce2 | |||
| c8eae6df6c | |||
| 5f50bd7095 | |||
| c8617218dc | |||
| a8ceae993e | |||
| a72a8854c9 | |||
| dc658db9ba | |||
| 8d8ecfe9e1 | |||
| 4b77e63857 | |||
| 19aa800324 | |||
| 85adb6b0e3 | |||
| bd2523821d | |||
| c1838a3c84 | |||
| d836f8f5d0 | |||
| 37491c134e | |||
| aa6efb7e5c | |||
| e4d990c06d | |||
| 01288afac0 | |||
| 579e3ca3ab | |||
| f61bc3ce7c | |||
| cc6004e981 | |||
| 35eb037d05 | |||
| 1eb0e4419d | |||
| 7b5ca875dc | |||
| 2d22a6c383 | |||
| f4884f1c18 | |||
| 27cc3bd185 | |||
| 9b894c2ea7 | |||
| a341808873 | |||
| 8927513f8e | |||
| 84436dfa94 | |||
| 2b73f633e0 | |||
| 3d7a452141 | |||
| 38a8557311 | |||
| 79672923c5 | |||
| 3842182a83 | |||
| 8b0d359e0b | |||
| db2903edfd | |||
| 18d22a72bd | |||
| 402cfc1632 | |||
| 9dec7e4971 | |||
| 931c224247 | |||
| f6ee6d4027 | |||
| 332d41fb25 | |||
| 8303af25fb | |||
| ee02bdb19a | |||
| e674132d5a | |||
| c9eb8bc7be | |||
| 2076a2c6d0 | |||
| 32c0f09b16 | |||
| 1264cabb3f | |||
| fb722d0581 | |||
| cb00ab9610 | |||
| 4102a1c8fd | |||
| af6d7a1ae2 | |||
| 36cae6311a | |||
| 327bb31daa | |||
| 8c2effe337 | |||
| da59adddf4 | |||
| 6f3c806a21 | |||
| 3d119bcd98 | |||
| 6264c21e23 | |||
| d5d6aa0bd5 | |||
| 7ad49fa65a | |||
| 5b8dfb48c3 | |||
| 4d557be99a | |||
| a7e022c6f4 | |||
| fc3f5dad4f | |||
| fa42669580 | |||
| 0c73de726a | |||
| ea87d21977 | |||
| a9e9e8cf44 | |||
| 9905cd307f | |||
| 92ea32b52c | |||
| 4c56f7583a | |||
| fc3050ef3d | |||
| 29c63e11bd | |||
| 64cbe21f6e | |||
| a56bb97d45 | |||
| 6edc6a1c6d | |||
| 01c656ffb2 | |||
| 078c6d0c21 | |||
| 580a8c0f3e | |||
| f0258349bf | |||
| d9080eeb80 | |||
| b504744876 | |||
| 638e8b5b47 | |||
| 9b9c40f310 | |||
| cc3a1db879 | |||
| a16312803e | |||
| 206f9fa5ad | |||
| f20e97574a | |||
| 51764f0ce0 | |||
| e698b9d608 | |||
| e2a7cc6b45 | |||
| 6eaf307be9 | |||
| 9743af5db0 | |||
| 07d02ad75e | |||
| 91f51a27af | |||
| a60318260a | |||
| c3e7e336b5 | |||
| 0b1037b497 | |||
| 7da48b7dc5 | |||
| 73bcfc6151 | |||
| dfe1a16aa0 | |||
| 4f0e685feb | |||
| fca052b308 | |||
| c449f42444 | |||
| 5ec956943c | |||
| 1ad696be6d | |||
| 92b3b762b2 | |||
| 0b29a57079 | |||
| 0dee015181 | |||
| 2f1294a119 | |||
| e609e55710 | |||
| b752ce8572 | |||
| de59c68328 | |||
| f92e78e8be | |||
| 9abc611f1e | |||
| 8e42f61a52 | |||
| 48fd3f977d | |||
| 451636e0b3 | |||
| 1fc810470b | |||
| 1c96efdafa | |||
| 8fb0711973 | |||
| aabb4f2c13 | |||
| eb1c5d976f | |||
| fd89533903 | |||
| d5ec60f0f6 | |||
| 18b896ec0b | |||
| af93e1edec | |||
| a8a5b4ad16 | |||
| 0d40883929 | |||
| 3b6645156d | |||
| 7596346fcd | |||
| 877ff60077 | |||
| 928da6e679 | |||
| c1a9ccef3c | |||
| 5f41c85281 | |||
| 18ef38b90b | |||
| 7b155e6b31 | |||
| ba4d7b2199 | |||
| 869387af34 | |||
| 5b16a80730 | |||
| adf1190584 | |||
| 1c16cf5926 | |||
| a833cf7b0b | |||
| 62a35e7ced | |||
| 7b005760c1 | |||
| b07631f0b5 | |||
| 595d8a8f53 | |||
| 35321b00cd | |||
| 8928f19818 | |||
| 76cc8fad47 | |||
| cb851d8519 | |||
| af0aff3aee | |||
| 6d4099c79c | |||
| d9672e179c | |||
| 1e291343fe | |||
| a5d0bf68fd | |||
| b8e2b524e1 | |||
| 6abd062477 | |||
| fbcc2644bf | |||
| 34b05c8c17 | |||
| e3dce02716 | |||
| ed8a70b5c8 | |||
| 35944b0776 | |||
| 2f80ee5b39 | |||
| 280eb71ae4 | |||
| 9462b1b175 | |||
| 874204838d | |||
| 0e4a936176 | |||
| 5089708e2d | |||
| e17367aa13 | |||
| 26be0978ee | |||
| de1aea9dd2 | |||
| 4c143be906 | |||
| b83cea1073 | |||
| 2418b67089 | |||
| 7e550cf916 | |||
| dce72fcb08 | |||
| adede7bb2e | |||
| 377799ace3 | |||
| 02a822c630 | |||
| 8101bca753 | |||
| 40e177ded0 | |||
| 13f732d733 | |||
| fbca4cbf8c | |||
| 45c8cd1536 | |||
| da293bbc2f | |||
| 7991568d6d | |||
| 5fc1c8cbb1 | |||
| 596981aca2 | |||
| 6d55197218 | |||
| 85cb813a75 | |||
| 5f99319985 | |||
| f34c76eb90 | |||
| adb08aff75 | |||
| 93f8bf561b | |||
| 52e391aa83 | |||
| 751e9fc0c5 | |||
| 77b0b9dc6b | |||
| 5729552206 | |||
| 929f53ac13 | |||
| c6b983ea6c | |||
| 419bee76e2 | |||
| 2f3180cc07 | |||
| b5eb917e10 | |||
| 9fed8d6335 | |||
| becbdba56e | |||
| 85b9373760 | |||
| c069541cee | |||
| 4c0f20694d | |||
| a99175d46c | |||
| 4bab9b9f5b | |||
| a5ea603116 | |||
| 8be6d9bd77 | |||
| 9a9043aa67 | |||
| 7ed58386e5 | |||
| 51660449a8 | |||
| af1a8d13f1 | |||
| 8e13e6c181 | |||
| de915ba840 | |||
| 834922aa35 | |||
| 2d4e67c268 | |||
| 48a036a2bb | |||
| 140fb72aeb | |||
| 2d4c3790a6 | |||
| 74860fe2ee | |||
| aab69705b6 | |||
| d6c88621f6 | |||
| bd275601aa | |||
| d6dd93b9d0 | |||
| 47d61bb83a | |||
| d5850afcc2 | |||
| 0c48b0799e | |||
| 96a8c33767 |
+2
-1
@@ -55,4 +55,5 @@ docs/_build/
|
||||
# pycharm
|
||||
.idea
|
||||
|
||||
icon.psd
|
||||
icon.psd
|
||||
main-icon.psd
|
||||
+118
@@ -1,4 +1,122 @@
|
||||
|
||||
2.0.33.1871
|
||||
- core: normalize line endings in subtitles to LF (\n)
|
||||
- core: add subtitle storage lock to avoid race condition
|
||||
- core: be more verbose about subtitle storage addition
|
||||
- core: fix MPL2 newline parsing, which resulted in broken subtitles
|
||||
- core: encoding change: reduce log spam
|
||||
- submod: common: fix CM_starting_spacedots
|
||||
- opensubtitles: fix request/response handling
|
||||
|
||||
|
||||
|
||||
2.0.33.1849
|
||||
- opensubtitles: add VIP server handling + preference; VIP benefits: 10€/year, ad-free subs, 1000 subs/day, no-cache VIP server, help SZ and subscribe via http://v.ht/osvip
|
||||
- opensubtitles: try to reuse previous token instead of logging in every time
|
||||
- core: add throttling between searches (10 seconds)
|
||||
- core: fix IETF handling for good
|
||||
- core: fix no subtitles being searched in certain situations (when an external subtitle without special tag exists)
|
||||
- core: add subtitle blacklist
|
||||
- core: fixes
|
||||
- core: fix detection of certain PMS media stream language tags ("FR" for example)
|
||||
- core: missing subtitles: correctly skip unwanted subtitle extensions
|
||||
- core: missing subtitles: honor "treat undefined as first language" option correctly
|
||||
- api: add blacklisting endpoints for quickly searching for new subtitls via bookmarklet
|
||||
- submod: colors: apply color mods at the end of processing modifications; fix color mods
|
||||
- submod: new remove_tags modification to remove all styling tags from subtitles
|
||||
- submod: HI: be more aggressive at handling brackets
|
||||
- submod: OCR: update en and hrv
|
||||
- submod: common: remove "torrent downloaded from ..." lines
|
||||
- submod: OCR: fix WholeWord handling, improving modification
|
||||
- submod: apply OCR fixes before HI
|
||||
- submod: OCR: fix broken HI tag colons (ANNOUNCER'. instead of ANNOUNCER:)
|
||||
- menu: advanced: speed up batch modifications
|
||||
- menu: add subtitle blacklist
|
||||
- menu: recently played: show only TV episodes and movies (music tracks were listed here as well)
|
||||
|
||||
|
||||
2.0.29.1767
|
||||
- core: fix internal subtitle storage issues
|
||||
- core: handle "embedded-forced" tag (futureproofing)
|
||||
- core: remove more garbage tags from release groups (nzbgeek, chamele0n, buymore, xpost, postbot)
|
||||
- submod: OCR fix: fix music icon = paragraph
|
||||
|
||||
|
||||
2.0.29.1756
|
||||
- core: don't fail on uppercase file extensions
|
||||
- core: don't re-download a subtitle if we already downloaded one, it still physically exists and external subtitles are configured to be ignored
|
||||
- core: fix VTT subtitle duplication
|
||||
- core: if forced subtitles not explicitly wanted, ignore existing forced subtitles when searching
|
||||
- core: add full IETF language support for `Treat languages with country attribute as ISO 639-1 (e.g. don't download pt-BR if pt subtitle exists)`-setting for embedded subtitles
|
||||
- menu: remove buggy dynamic permission-based channel icon introduced in 1715
|
||||
- menu: improve `Items with missing subtitles` menu usage and item display
|
||||
- menu: `Advanced -> Get my logs` handle custom domains without port
|
||||
- menu: correctly show country/script part of languages with such attributes (e.g. pt-BR)
|
||||
- config: rename `Scan:` settings; make them better understandable and translatable
|
||||
- config: rephrase IETF options as "languages with country attribute" (e.g. pt-BR)
|
||||
- config: separate IETF options into how to display languages with country attribute and how they should be handled when searching/scanning (e.g. pt-BR)
|
||||
- config: `Scheduler: Item age to be considered recent` now can go up to 12 weeks
|
||||
- config: `Scheduler: Periodically search for recent items with missing subtitles` added `every 2 hours`
|
||||
- submod: swe: add Ĺ to Å
|
||||
|
||||
|
||||
2.0.26.1715
|
||||
- core: submod: OCR fixes: swe: replace ĺ with å inside words
|
||||
- core: fix handling of non-existant PMS audio_codec info
|
||||
- core: filename matching ignored the strictness setting in certain global directory configurations (thanks @raduc)
|
||||
- core: don't fail on migration errors
|
||||
- provider titlovi: handle multiple subtitles per archive
|
||||
- provider addic7ed: reset default boost to 19 (was 21)
|
||||
- menu: add warning icon on missing permissions
|
||||
- menu: manual subtitle list sometimes listed duplicates (thanks @andreashoyer)
|
||||
- menu: don't request PMS metadata in item details menu twice
|
||||
- menu: don't fail badly on non existant PMS metadata in item details menu
|
||||
|
||||
|
||||
2.0.26.1695
|
||||
## ATTENTION: THIS RELEASE RESETS YOUR CONFIGURED LANGUAGES TO DEFAULT!
|
||||
- core: fix bug that caused SZ not to work for Windows users with special characters in their username
|
||||
- core: fix issues when logging failed manual download actions
|
||||
- core: update guessit to 2.1.4
|
||||
- core: fix issue causing the background task scheduler to stop after changing preferences
|
||||
- core: fix polish encoding (try windows-1250 first, then iso 8859-2)
|
||||
- core: remove subscenter provider as it now uses captchas
|
||||
- core: add titlovi as default provider (thanks viking!)
|
||||
- core: increase default PMS API request timeout to 15 (old: 10, max: 45); add preference for that
|
||||
- core: re-add separate legacy FindMissingSubtitles task and run it on the first run to prime SZ's internal subtitle storage
|
||||
- core: add "low impact mode" for people with remote filesystems (currently enabled for List LANGUAGE subtitles in detail menu); alleviates certain plexweb timeout issues
|
||||
- menu: change naming of find missing subtitles menu item
|
||||
- legendastv: fix multi value guessit issues
|
||||
- submod: OCR: update eng and hrv OCR replace dictionaries; fix ". L am huge"
|
||||
|
||||
|
||||
2.0.25.1635
|
||||
- core: update memory handling, possibly reduce memory problems of 2.0
|
||||
- core: support for MPL2 subtitle format
|
||||
- core: update task handling
|
||||
- core: re-enable NVIDIA SHIELD support by fixing rarfile behaviour
|
||||
- core: add SZ_UNRAR_TOOL environment variable for custom unrar location
|
||||
- core: disable SZ when no providers are enabled
|
||||
- core: only start activity monitor if channel or agent are enabled
|
||||
- core: improve custom provider integration
|
||||
- core: update eastern european encoding detection (especially Romanian)
|
||||
- tasks: reduce provider stress by introducing wait times between searches/downloads
|
||||
- windows: correctly ship UnRAR.exe
|
||||
- windows: skip DBM checks
|
||||
- addic7ed: fix Nip/Tuck
|
||||
- subscenter: use new domain
|
||||
|
||||
|
||||
2.0.24.1581
|
||||
- legendastv: ship unrar.exe for Windows users (fixes unrar issues)
|
||||
- addic7ed: fix TooManyRequests error
|
||||
- submod: OCR fixes NL: add custom dictionary data for malformed characters
|
||||
- submod: OCR fixes: update hrv/NL dictionaries
|
||||
- submod: common: remove spaces before punctuation
|
||||
- podnapisi: now returns more subtitles again
|
||||
ATTENTION: Sub-Zero is still broken on PMS for SHIELD. Help needed!
|
||||
|
||||
|
||||
2.0.24.1565
|
||||
- core: fix searchallrecentlymissing task erroring if item not found
|
||||
- core: fix non-plex-items appearing in and crashing the recently played list
|
||||
|
||||
+98
-21
@@ -1,7 +1,6 @@
|
||||
# coding=utf-8
|
||||
import sys
|
||||
import datetime
|
||||
import os
|
||||
|
||||
from subzero.sandbox import restore_builtins
|
||||
|
||||
@@ -24,8 +23,9 @@ sys.modules["interface"] = interface
|
||||
|
||||
from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
|
||||
from interface.menu import *
|
||||
from support.plex_media import media_to_videos, get_media_item_ids, scan_videos
|
||||
from support.storage import save_subtitles, store_subtitle_info
|
||||
from support.plex_media import media_to_videos, get_media_item_ids
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles, store_subtitle_info, get_subtitle_storage
|
||||
from support.items import is_ignored
|
||||
from support.config import config
|
||||
from support.lib import get_intent
|
||||
@@ -114,12 +114,43 @@ def update_local_media(metadata, media, media_type="movies"):
|
||||
pass
|
||||
|
||||
|
||||
def agent_extract_embedded(videos):
|
||||
try:
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
for video in videos:
|
||||
item = video["item"]
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
|
||||
for part in get_all_parts(item):
|
||||
for requested_language in config.lang_list:
|
||||
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(part.id, requested_language)
|
||||
if not embedded_subs:
|
||||
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language,
|
||||
get_forced=config.forced_only)
|
||||
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
|
||||
stream_index=str(stream.index),
|
||||
language=str(requested_language), with_mods=True, refresh=False,
|
||||
set_current=not current)
|
||||
else:
|
||||
Log.Debug("Skipping embedded subtitle extraction for %s, already got %r from %s",
|
||||
item.rating_key, requested_language, embedded_subs[0].id)
|
||||
except:
|
||||
Log.Error("Something went wrong when auto-extracting subtitles, continuing: %s", traceback.format_exc())
|
||||
|
||||
|
||||
class SubZeroAgent(object):
|
||||
agent_type = None
|
||||
agent_type_verbose = None
|
||||
languages = [Locale.Language.English]
|
||||
primary_provider = False
|
||||
score_prefs_key = None
|
||||
debounce = 10
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(SubZeroAgent, self).__init__(*args, **kwargs)
|
||||
@@ -130,7 +161,14 @@ class SubZeroAgent(object):
|
||||
Log.Debug("Sub-Zero %s, %s search" % (config.version, self.agent_type))
|
||||
results.Append(MetadataSearchResult(id='null', score=100))
|
||||
|
||||
def store_blank_subtitle_metadata(self, video_part_map):
|
||||
store_subtitle_info(video_part_map, dict((k, []) for k in video_part_map.keys()), None, mode="a")
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
if not config.enable_agent:
|
||||
Log.Debug("Skipping Sub-Zero agent(s)")
|
||||
return
|
||||
|
||||
Log.Debug("Sub-Zero %s, %s update called" % (config.version, self.agent_type))
|
||||
intent = get_intent()
|
||||
|
||||
@@ -167,36 +205,71 @@ class SubZeroAgent(object):
|
||||
set_refresh_menu_state(media, media_type=self.agent_type)
|
||||
|
||||
# scanned_video_part_map = {subliminal.Video: plex_part, ...}
|
||||
scanned_video_part_map = scan_videos(videos, kind=self.agent_type)
|
||||
providers = config.get_providers(media_type=self.agent_type)
|
||||
scanned_video_part_map = scan_videos(videos, providers=providers)
|
||||
|
||||
# auto extract embedded
|
||||
if config.embedded_auto_extract:
|
||||
agent_extract_embedded(videos)
|
||||
|
||||
# clear missing subtitles menu data
|
||||
if not scheduler.is_task_running("MissingSubtitles"):
|
||||
scheduler.clear_task_data("MissingSubtitles")
|
||||
|
||||
downloaded_subtitles = None
|
||||
if not config.enable_agent:
|
||||
Log.Debug("Skipping Sub-Zero agent(s)")
|
||||
|
||||
else:
|
||||
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
|
||||
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score)
|
||||
item_ids = get_media_item_ids(media, kind=self.agent_type)
|
||||
# debounce for self.debounce seconds
|
||||
now = datetime.datetime.now()
|
||||
if "last_call" in Dict:
|
||||
last_call = Dict["last_call"]
|
||||
if last_call + datetime.timedelta(seconds=self.debounce) > now:
|
||||
wait = self.debounce - (now - last_call).seconds
|
||||
if wait >= 1:
|
||||
Log.Debug("Waiting %s seconds until continuing", wait)
|
||||
Thread.Sleep(wait)
|
||||
|
||||
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
|
||||
try:
|
||||
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score,
|
||||
throttle_time=self.debounce, providers=providers)
|
||||
except:
|
||||
Log.Exception("Something went wrong when downloading subtitles")
|
||||
|
||||
if downloaded_subtitles is not None:
|
||||
Dict["last_call"] = datetime.datetime.now()
|
||||
|
||||
item_ids = get_media_item_ids(media, kind=self.agent_type)
|
||||
|
||||
downloaded_any = False
|
||||
if downloaded_subtitles:
|
||||
downloaded_any = any(downloaded_subtitles.values())
|
||||
|
||||
if downloaded_any:
|
||||
save_subtitles(scanned_video_part_map, downloaded_subtitles, mods=config.default_mods)
|
||||
save_successful = False
|
||||
try:
|
||||
save_successful = save_subtitles(scanned_video_part_map, downloaded_subtitles,
|
||||
mods=config.default_mods)
|
||||
except:
|
||||
Log.Exception("Something went wrong when saving subtitles")
|
||||
|
||||
track_usage("Subtitle", "refreshed", "download", 1)
|
||||
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
# store item(s) in history
|
||||
for subtitle in video_subtitles:
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle)
|
||||
# store SZ meta info even if download wasn't successful
|
||||
if not save_successful:
|
||||
self.store_blank_subtitle_metadata(scanned_video_part_map)
|
||||
|
||||
else:
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
# store item(s) in history
|
||||
for subtitle in video_subtitles:
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle)
|
||||
history.destroy()
|
||||
else:
|
||||
# store subtitle info even if we've downloaded none
|
||||
store_subtitle_info(scanned_video_part_map, dict((k, []) for k in scanned_video_part_map.keys()),
|
||||
None, mode="a")
|
||||
# store SZ meta info even if we've downloaded none
|
||||
self.store_blank_subtitle_metadata(scanned_video_part_map)
|
||||
|
||||
update_local_media(metadata, media, media_type=self.agent_type)
|
||||
|
||||
@@ -213,6 +286,10 @@ class SubZeroAgent(object):
|
||||
|
||||
Dict.Save()
|
||||
|
||||
# fsync cache
|
||||
if config.new_style_cache:
|
||||
config.sync_cache()
|
||||
|
||||
|
||||
class SubZeroSubtitlesAgentMovies(SubZeroAgent, Agent.Movies):
|
||||
contributes_to = ['com.plexapp.agents.imdb', 'com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.themoviedb', 'com.plexapp.agents.hama']
|
||||
|
||||
@@ -8,7 +8,7 @@ import urlparse
|
||||
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
|
||||
from subzero.lib.io import FileIO
|
||||
from subzero.constants import PREFIX, PLUGIN_IDENTIFIER
|
||||
@@ -49,6 +49,10 @@ def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
key=Callback(TriggerBetterSubtitles, randomize=timestamp()),
|
||||
title=pad_title("Trigger find better subtitles"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SkipFindBetterSubtitles, randomize=timestamp()),
|
||||
title=pad_title("Skip next find better subtitles (sets last run to now)"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerStorageMaintenance, randomize=timestamp()),
|
||||
title=pad_title("Trigger subtitle storage maintenance"),
|
||||
@@ -57,6 +61,10 @@ def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
key=Callback(TriggerStorageMigration, randomize=timestamp()),
|
||||
title=pad_title("Trigger subtitle storage migration (expensive)"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerCacheMaintenance, randomize=timestamp()),
|
||||
title=pad_title("Trigger cache maintenance (refiners, providers and packs/archives)"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ApplyDefaultMods, randomize=timestamp()),
|
||||
title=pad_title("Apply configured default subtitle mods to all (active) stored subtitles"),
|
||||
@@ -89,6 +97,10 @@ def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
key=Callback(InvalidateCache, randomize=timestamp()),
|
||||
title=pad_title("Invalidate Sub-Zero metadata caches (subliminal)"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetProviderThrottle, randomize=timestamp()),
|
||||
title=pad_title("Reset provider throttle states"),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@@ -158,6 +170,20 @@ def TriggerBetterSubtitles(randomize=None):
|
||||
)
|
||||
|
||||
|
||||
|
||||
@route(PREFIX + '/skipbetter')
|
||||
@debounce
|
||||
def SkipFindBetterSubtitles(randomize=None):
|
||||
task = scheduler.task("FindBetterSubtitles")
|
||||
task.last_run = datetime.datetime.now()
|
||||
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='FindBetterSubtitles skipped'
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/triggermaintenance')
|
||||
@debounce
|
||||
def TriggerStorageMaintenance(randomize=None):
|
||||
@@ -180,6 +206,17 @@ def TriggerStorageMigration(randomize=None):
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/triggercachemaintenance')
|
||||
@debounce
|
||||
def TriggerCacheMaintenance(randomize=None):
|
||||
scheduler.dispatch_task("CacheMaintenance")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='TriggerCacheMaintenance triggered'
|
||||
)
|
||||
|
||||
|
||||
def apply_default_mods(reapply_current=False):
|
||||
storage = get_subtitle_storage()
|
||||
subs_applied = 0
|
||||
@@ -264,7 +301,7 @@ def GetLogsLink():
|
||||
|
||||
elif "Referer" in req_headers:
|
||||
parsed = urlparse.urlparse(req_headers["Referer"])
|
||||
link_base = "%s://%s:%s" % (parsed.scheme, parsed.hostname, parsed.port)
|
||||
link_base = "%s://%s%s" % (parsed.scheme, parsed.hostname, (":%s" % parsed.port) if parsed.port else "")
|
||||
Log.Debug("Using referer-based link_base")
|
||||
get_external_ip = False
|
||||
|
||||
@@ -300,7 +337,10 @@ def DownloadLogs():
|
||||
@debounce
|
||||
def InvalidateCache(randomize=None):
|
||||
from subliminal.cache import region
|
||||
region.invalidate()
|
||||
if config.new_style_cache:
|
||||
region.backend.clear()
|
||||
else:
|
||||
region.invalidate()
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
@@ -338,3 +378,14 @@ def ClearPin(randomize=None):
|
||||
Dict["pin_correct_time"] = None
|
||||
config.locked = True
|
||||
return fatality(force_title="Menu locked", header=" ", no_history=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/reset_throttle')
|
||||
def ResetProviderThrottle(randomize=None):
|
||||
Dict["provider_throttle"] = {}
|
||||
Dict.Save()
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Provider throttles reset'
|
||||
)
|
||||
@@ -1,17 +1,21 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
import subprocess
|
||||
import traceback
|
||||
|
||||
from subzero.language import Language
|
||||
|
||||
from sub_mod import SubtitleModificationsMenu
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_ignore_options, get_item_task_data, \
|
||||
set_refresh_menu_state, route
|
||||
set_refresh_menu_state, route, extract_embedded_sub
|
||||
|
||||
from refresh_item import RefreshItem
|
||||
from subzero.constants import PREFIX
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, cast_bool, df, get_language
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub
|
||||
from support.lib import Plex
|
||||
from support.plex_media import get_plex_metadata, scan_videos, PMSMediaProxy
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import timestamp, df, get_language, display_language, quote_args, get_language_from_stream
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub, get_item_title, save_stored_sub
|
||||
from support.plex_media import get_plex_metadata, get_part, get_embedded_subtitle_streams
|
||||
from support.scanning import scan_videos
|
||||
from support.scheduler import scheduler
|
||||
from support.storage import get_subtitle_storage
|
||||
|
||||
@@ -20,7 +24,7 @@ from support.storage import get_subtitle_storage
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}/actions')
|
||||
@debounce
|
||||
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None):
|
||||
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None, header=None):
|
||||
"""
|
||||
displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
|
||||
:param rating_key:
|
||||
@@ -33,12 +37,22 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
from interface.main import IgnoreMenu
|
||||
|
||||
title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
|
||||
item = get_item(rating_key)
|
||||
item = plex_item = get_item(rating_key)
|
||||
current_kind = get_item_kind_from_rating_key(rating_key)
|
||||
|
||||
timeout = 30
|
||||
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True, header=header)
|
||||
|
||||
if not item:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=rating_key, title=title, base_title=base_title,
|
||||
item_title=item_title, randomize=timestamp()),
|
||||
title=u"Item not found: %s!" % item_title,
|
||||
summary="Plex didn't return any information about the item, please refresh it and come back later",
|
||||
thumb=default_thumb
|
||||
))
|
||||
return oc
|
||||
|
||||
# add back to season for episode
|
||||
if current_kind == "episode":
|
||||
@@ -74,9 +88,6 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
|
||||
# get the plex item
|
||||
plex_item = get_item(rating_key)
|
||||
|
||||
# look for subtitles for all available media parts and all of their languages
|
||||
has_multiple_parts = len(plex_item.media) > 1
|
||||
part_index = 0
|
||||
@@ -89,6 +100,12 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
part_id = str(part.id)
|
||||
part_index += 1
|
||||
|
||||
part_index_addon = ""
|
||||
part_summary_addon = ""
|
||||
if has_multiple_parts:
|
||||
part_index_addon = u"File %s: " % part_index
|
||||
part_summary_addon = "%s " % filename
|
||||
|
||||
# iterate through all configured languages
|
||||
for lang in config.lang_list:
|
||||
# get corresponding stored subtitle data for that media part (physical media item), for language
|
||||
@@ -96,12 +113,6 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
current_sub_id = None
|
||||
current_sub_provider_name = None
|
||||
|
||||
part_index_addon = ""
|
||||
part_summary_addon = ""
|
||||
if has_multiple_parts:
|
||||
part_index_addon = u"File %s: " % part_index
|
||||
part_summary_addon = "%s " % filename
|
||||
|
||||
summary = u"%sNo current subtitle in storage" % part_summary_addon
|
||||
current_score = None
|
||||
if current_sub:
|
||||
@@ -111,45 +122,77 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
|
||||
summary = u"%sCurrent subtitle: %s (added: %s, %s), Language: %s, Score: %i, Storage: %s" % \
|
||||
(part_summary_addon, current_sub.provider_name, df(current_sub.date_added),
|
||||
current_sub.mode_verbose, lang, current_sub.score, current_sub.storage_type)
|
||||
current_sub.mode_verbose, display_language(lang), current_sub.score,
|
||||
current_sub.storage_type)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, language_name=lang.name,
|
||||
item_title=item_title, language=lang, language_name=display_language(lang),
|
||||
current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"%sActions for %s subtitle" % (part_index_addon, lang.name),
|
||||
title=u"%sManage %s subtitle" % (part_index_addon, display_language(lang)),
|
||||
summary=summary
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, language_name=lang.name,
|
||||
item_title=item_title, language=lang, language_name=display_language(lang),
|
||||
current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"%sList %s subtitles" % (part_index_addon, lang.name),
|
||||
title=u"%sList %s subtitles" % (part_index_addon, display_language(lang)),
|
||||
summary=summary
|
||||
))
|
||||
|
||||
add_ignore_options(oc, "videos", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
if config.plex_transcoder:
|
||||
# embedded subtitles
|
||||
embedded_count = 0
|
||||
embedded_langs = []
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
|
||||
lang = get_language_from_stream(stream.language_code)
|
||||
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = list(config.lang_list)[0]
|
||||
|
||||
if lang:
|
||||
embedded_langs.append(lang)
|
||||
embedded_count += 1
|
||||
|
||||
if embedded_count:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListEmbeddedSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_type=plex_item.type, item_title=item_title, base_title=base_title,
|
||||
randomize=timestamp()),
|
||||
title=u"%sEmbedded subtitles (%s)" % (part_index_addon, ", ".join(display_language(l) for l in
|
||||
set(embedded_langs))),
|
||||
summary=u"Extract and activate embedded subtitle streams"
|
||||
))
|
||||
|
||||
ignore_title = item_title
|
||||
if current_kind == "episode":
|
||||
ignore_title = get_item_title(item)
|
||||
add_ignore_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}')
|
||||
def SubtitleOptionsMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True, header=kwargs.get("header"),
|
||||
message=kwargs.get("message"))
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
current_data = kwargs["current_data"]
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
subs_count = stored_subs.count(part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
@@ -159,22 +202,235 @@ def SubtitleOptionsMenu(**kwargs):
|
||||
summary=kwargs["current_data"],
|
||||
thumb=default_thumb
|
||||
))
|
||||
if subs_count:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListStoredSubsForItemMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Select active %s subtitle" % kwargs["language_name"],
|
||||
summary=u"%d subtitles in storage" % subs_count
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"List %s subtitles" % kwargs["language_name"],
|
||||
title=u"List available %s subtitles" % kwargs["language_name"],
|
||||
summary=kwargs["current_data"]
|
||||
))
|
||||
if current_sub:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Modify %s subtitle" % kwargs["language_name"],
|
||||
title=u"Modify current %s subtitle" % kwargs["language_name"],
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
))
|
||||
|
||||
if current_sub.provider_name != "embedded":
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(BlacklistSubtitleMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Blacklist current %s subtitle and search for a new one" % kwargs["language_name"],
|
||||
summary=current_data
|
||||
))
|
||||
|
||||
current_bl, subs = stored_subs.get_blacklist(part_id, language)
|
||||
if current_bl:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ManageBlacklistMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Manage blacklist (%s contained)" % len(current_bl),
|
||||
summary=u"Inspect currently blacklisted subtitles"
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/list_stored_subs/{rating_key}/{part_id}')
|
||||
def ListStoredSubsForItemMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = Language.fromietf(kwargs["language"])
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
all_subs = stored_subs.get_all(part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
for key, subtitle in sorted(filter(lambda x: x[0] != "current", all_subs.items()),
|
||||
key=lambda x: x[1].date_added, reverse=True):
|
||||
is_current = key == all_subs["current"]
|
||||
|
||||
summary = u"added: %s, %s, Language: %s, Score: %i, Storage: %s" % \
|
||||
(df(subtitle.date_added),
|
||||
subtitle.mode_verbose, display_language(language), subtitle.score,
|
||||
subtitle.storage_type)
|
||||
|
||||
sub_name = subtitle.provider_name
|
||||
if sub_name == "embedded":
|
||||
sub_name += " (%s)" % subtitle.id
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SelectStoredSubForItemMenu, randomize=timestamp(), sub_key="__".join(key), **kwargs),
|
||||
title=u"%s%s, Score: %s" % ("Current: " if is_current else "Stored: ", sub_name,
|
||||
subtitle.score),
|
||||
summary=summary
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/set_current_sub/{rating_key}/{part_id}')
|
||||
@debounce
|
||||
def SelectStoredSubForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = Language.fromietf(kwargs["language"])
|
||||
item_type = kwargs["item_type"]
|
||||
sub_key = tuple(kwargs.pop("sub_key").split("__"))
|
||||
|
||||
plex_item = get_item(rating_key)
|
||||
storage = get_subtitle_storage()
|
||||
stored_subs = storage.load(plex_item.rating_key)
|
||||
|
||||
subtitles = stored_subs.get_all(part_id, language)
|
||||
subtitle = subtitles[sub_key]
|
||||
|
||||
subtitles["current"] = sub_key
|
||||
|
||||
save_stored_sub(subtitle, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
|
||||
stored_subs=stored_subs)
|
||||
|
||||
storage.destroy()
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
kwargs["header"] = 'Success'
|
||||
kwargs["message"] = 'Subtitle saved to disk'
|
||||
|
||||
return SubtitleOptionsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/blacklist_recent/{language}')
|
||||
@route(PREFIX + '/item/blacklist_recent')
|
||||
def BlacklistRecentSubtitleMenu(**kwargs):
|
||||
if "last_played_items" not in Dict or not Dict["last_played_items"]:
|
||||
return
|
||||
|
||||
rating_key = Dict["last_played_items"][0]
|
||||
kwargs["rating_key"] = rating_key
|
||||
return BlacklistAllPartsSubtitleMenu(**kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/blacklist_all/{rating_key}/{language}')
|
||||
@route(PREFIX + '/item/blacklist_all/{rating_key}')
|
||||
def BlacklistAllPartsSubtitleMenu(**kwargs):
|
||||
rating_key = kwargs.get("rating_key")
|
||||
language = kwargs.get("language")
|
||||
if language:
|
||||
language = Language.fromietf(language)
|
||||
|
||||
item = get_item(rating_key)
|
||||
|
||||
if not item:
|
||||
return
|
||||
|
||||
item_title = get_item_title(item)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
for part_id, languages in stored_subs.parts.iteritems():
|
||||
sub_dict = languages
|
||||
if language:
|
||||
key = str(language)
|
||||
if key not in sub_dict:
|
||||
continue
|
||||
|
||||
sub_dict = {key: sub_dict[key]}
|
||||
|
||||
for language, subs in sub_dict.iteritems():
|
||||
if "current" in subs:
|
||||
stored_subs.blacklist(part_id, language, subs["current"])
|
||||
Log.Info("Added %s to blacklist", subs["current"])
|
||||
|
||||
subtitle_storage.save(stored_subs)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
|
||||
|
||||
|
||||
def blacklist(rating_key, part_id, language):
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
if not current_sub:
|
||||
return
|
||||
|
||||
stored_subs.blacklist(part_id, language, current_sub.key)
|
||||
storage.save(stored_subs)
|
||||
storage.destroy()
|
||||
|
||||
Log.Info("Added %s to blacklist", current_sub.key)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@route(PREFIX + '/item/blacklist/{rating_key}/{part_id}')
|
||||
@debounce
|
||||
def BlacklistSubtitleMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
item_title = kwargs["item_title"]
|
||||
|
||||
blacklist(rating_key, part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/manage_blacklist/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def ManageBlacklistMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
remove_sub_key = kwargs.pop("remove_sub_key", None)
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
current_bl, subs = stored_subs.get_blacklist(part_id, language)
|
||||
|
||||
if remove_sub_key:
|
||||
remove_sub_key = tuple(remove_sub_key.split("__"))
|
||||
stored_subs.blacklist(part_id, language, remove_sub_key, add=False)
|
||||
storage.save(stored_subs)
|
||||
Log.Info("Removed %s from blacklist", remove_sub_key)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
title=kwargs["title"], randomize=timestamp()),
|
||||
title=u"< Back to %s" % kwargs["title"],
|
||||
summary=kwargs["current_data"],
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
def sorter(pair):
|
||||
# thanks RestrictedModule parser for messing with lambda (x, y)
|
||||
return pair[1]["date_added"]
|
||||
|
||||
for sub_key, data in sorted(current_bl.iteritems(), key=sorter, reverse=True):
|
||||
provider_name, subtitle_id = sub_key
|
||||
title = u"%s, %s (added: %s, %s), Language: " \
|
||||
u"%s, Score: %i, Storage: %s" % (provider_name, subtitle_id, df(data["date_added"]),
|
||||
current_sub.get_mode_verbose(data["mode"]),
|
||||
display_language(Language.fromietf(language)), data["score"],
|
||||
data["storage_type"])
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ManageBlacklistMenu, remove_sub_key="__".join(sub_key), randomize=timestamp(), **kwargs),
|
||||
title=title,
|
||||
summary=u"Remove subtitle from blacklist"
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/search/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item_title=None, filename=None,
|
||||
@@ -200,18 +456,22 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
))
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
plex_part = None
|
||||
if not config.low_impact_mode:
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True)
|
||||
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
return oc
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
return oc
|
||||
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
video_display_data = [video.format] if video.format else []
|
||||
if video.release_group:
|
||||
video_display_data.append(u"by %s" % video.release_group)
|
||||
video_display_data = " ".join(video_display_data)
|
||||
video_display_data = [video.format] if video.format else []
|
||||
if video.release_group:
|
||||
video_display_data.append(u"by %s" % video.release_group)
|
||||
video_display_data = " ".join(video_display_data)
|
||||
else:
|
||||
video_display_data = metadata["filename"]
|
||||
|
||||
current_display = (u"Current: %s (%s) " % (current_provider, current_score) if current_provider else "")
|
||||
if not running:
|
||||
@@ -243,7 +503,8 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
randomize=timestamp()),
|
||||
title=u"Searching for %s subs (%s), refresh here ..." % (get_language(language).name, video_display_data),
|
||||
title=u"Searching for %s subs (%s), refresh here ..." % (display_language(get_language(language)),
|
||||
video_display_data),
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
@@ -251,25 +512,35 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
if not search_results or search_results == "found_none":
|
||||
return oc
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
current_bl, subs = stored_subs.get_blacklist(part_id, language)
|
||||
|
||||
seen = []
|
||||
for subtitle in search_results:
|
||||
if subtitle.id in seen:
|
||||
continue
|
||||
|
||||
bl_addon = ""
|
||||
if (str(subtitle.provider_name), str(subtitle.id)) in current_bl:
|
||||
bl_addon = "Blacklisted "
|
||||
|
||||
wrong_fps_addon = ""
|
||||
if subtitle.wrong_fps:
|
||||
wrong_fps_addon = " (wrong FPS, sub: %s, media: %s)" % (subtitle.fps, plex_part.fps)
|
||||
if plex_part:
|
||||
wrong_fps_addon = " (wrong FPS, sub: %s, media: %s)" % (subtitle.fps, plex_part.fps)
|
||||
else:
|
||||
wrong_fps_addon = " (wrong FPS, sub: %s, media: unknown, low impact mode)" % subtitle.fps
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerDownloadSubtitle, rating_key=rating_key, randomize=timestamp(), item_title=item_title,
|
||||
subtitle_id=str(subtitle.id), language=language),
|
||||
title=u"%s: %s, score: %s%s" % ("Available" if current_id != subtitle.id else "Current",
|
||||
subtitle.provider_name, subtitle.score, wrong_fps_addon),
|
||||
title=u"%s%s: %s, score: %s%s" % (bl_addon, "Available" if current_id != subtitle.id else "Current",
|
||||
subtitle.provider_name, subtitle.score, wrong_fps_addon),
|
||||
summary=u"Release: %s, Matches: %s" % (subtitle.release_info, ", ".join(subtitle.matches)),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
seen.append(current_id)
|
||||
seen.append(subtitle.id)
|
||||
|
||||
return oc
|
||||
|
||||
@@ -296,3 +567,74 @@ def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None,
|
||||
scheduler.clear_task_data("AvailableSubsForItem")
|
||||
|
||||
return fatality(randomize=timestamp(), header=" ", replace_parent=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/embedded/{rating_key}/{part_id}')
|
||||
def ListEmbeddedSubsForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
title = kwargs["title"]
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
base_title=kwargs["base_title"], title=kwargs["item_title"], randomize=timestamp()),
|
||||
title=u"< Back to %s" % kwargs["title"],
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
plex_item = get_item(rating_key)
|
||||
part = get_part(plex_item, part_id)
|
||||
|
||||
if part:
|
||||
for stream_data in get_embedded_subtitle_streams(part, skip_duplicate_unknown=False):
|
||||
language = stream_data["language"]
|
||||
is_unknown = stream_data["is_unknown"]
|
||||
stream = stream_data["stream"]
|
||||
|
||||
if language:
|
||||
forced = stream.forced
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, with_mods=True, **kwargs),
|
||||
title=u"Extract stream %s, "
|
||||
u"%s%s%s%s with default mods" % (stream.index, display_language(language),
|
||||
" (unknown)" if is_unknown else "",
|
||||
" (forced)" if forced else "",
|
||||
" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, **kwargs),
|
||||
title=u"Extract stream %s, %s%s%s%s" % (stream.index, display_language(language),
|
||||
" (unknown)" if is_unknown else "",
|
||||
" (forced)" if forced else "",
|
||||
" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/extract_embedded/{rating_key}/{part_id}/{stream_index}')
|
||||
@debounce
|
||||
def TriggerExtractEmbeddedSubForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.get("part_id")
|
||||
stream_index = kwargs.get("stream_index")
|
||||
|
||||
Thread.Create(extract_embedded_sub, **kwargs)
|
||||
header = u"Extracting of embedded subtitle %s of part %s:%s triggered" % (stream_index, rating_key, part_id)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
kwargs.pop("item_type")
|
||||
kwargs.pop("stream_index")
|
||||
kwargs.pop("part_id")
|
||||
kwargs.pop("with_mods", False)
|
||||
kwargs.pop("language")
|
||||
kwargs["title"] = kwargs["item_title"]
|
||||
kwargs["header"] = header
|
||||
|
||||
return ItemDetailsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
|
||||
@@ -2,11 +2,10 @@
|
||||
|
||||
from subzero.constants import PREFIX, TITLE, ART
|
||||
from support.config import config
|
||||
from support.helpers import pad_title, timestamp, df, get_plex_item_display_title
|
||||
from support.helpers import pad_title, timestamp, df, display_language
|
||||
from support.scheduler import scheduler
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, \
|
||||
get_item_kind_from_item
|
||||
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, get_item_title
|
||||
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_ignore_options, \
|
||||
ObjectContainer, route, handler
|
||||
from item_details import ItemDetailsMenu
|
||||
@@ -92,10 +91,9 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, randomize=timestamp()),
|
||||
title="Items with missing subtitles",
|
||||
summary="Shows the items honoring the configured 'Item age to be considered recent'-setting (%s)"
|
||||
" and allowing you to individually (force-) refresh their metadata/subtitles. " %
|
||||
Prefs["scheduler.item_is_recent_age"],
|
||||
title="Show recently added items with missing subtitles",
|
||||
summary="Lists items with missing subtitles. Click on \"Find recent items with missing subs\" "
|
||||
"to update list",
|
||||
thumb=R("icon-missing.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
@@ -112,9 +110,11 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
if task.ready_for_display:
|
||||
task_state = "Running: %s/%s (%s%%)" % (task.items_done, task.items_searching, task.percentage)
|
||||
else:
|
||||
task_state = "Last scheduler run: %s; Next scheduled run: %s; Last runtime: %s" % (
|
||||
df(scheduler.last_run(task_name)) or "never",
|
||||
df(scheduler.next_run(task_name)) or "never",
|
||||
lr = scheduler.last_run(task_name)
|
||||
nr = scheduler.next_run(task_name)
|
||||
task_state = "Last run: %s; Next scheduled run: %s; Last runtime: %s" % (
|
||||
df(scheduler.last_run(task_name)) if lr else "never",
|
||||
df(scheduler.next_run(task_name)) if nr else "never",
|
||||
str(task.last_run_time).split(".")[0])
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
@@ -158,6 +158,19 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
))
|
||||
|
||||
if not only_refresh:
|
||||
if "provider_throttle" in Dict and Dict["provider_throttle"].keys():
|
||||
summary_data = []
|
||||
for provider, data in Dict["provider_throttle"].iteritems():
|
||||
reason, until, desc = data
|
||||
summary_data.append("%s until %s (%s)" % (provider, until.strftime("%y/%m/%d %H:%M"), reason))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("Throttled providers: %s" % ", ".join(Dict["provider_throttle"].keys())),
|
||||
summary=", ".join(summary_data),
|
||||
thumb=R("icon-throttled.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(AdvancedMenu),
|
||||
title=pad_title("Advanced functions"),
|
||||
@@ -187,15 +200,10 @@ def RecentlyPlayedMenu():
|
||||
if not item:
|
||||
continue
|
||||
|
||||
kind = get_item_kind_from_item(item)
|
||||
if kind not in ("episode", "movie"):
|
||||
if getattr(getattr(item, "__class__"), "__name__") not in ("Episode", "Movie"):
|
||||
continue
|
||||
|
||||
if kind == "episode":
|
||||
item_title = get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
|
||||
parent_title=item.show.title)
|
||||
else:
|
||||
item_title = get_plex_item_display_title(item, kind, section_title=None)
|
||||
item_title = get_item_title(item)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
title=item_title,
|
||||
@@ -233,7 +241,7 @@ def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
if not running:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, force=True, randomize=timestamp()),
|
||||
title=u"Get items with missing subtitles",
|
||||
title=u"Find recent items with missing subtitles",
|
||||
thumb=default_thumb
|
||||
))
|
||||
else:
|
||||
@@ -249,7 +257,7 @@ def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
key=Callback(ItemDetailsMenu, title=title + " > " + item_title, item_title=item_title,
|
||||
rating_key=item_id),
|
||||
title=item_title,
|
||||
summary="Missing: %s" % ", ".join(l.name for l in missing_languages),
|
||||
summary="Missing: %s" % ", ".join(display_language(l) for l in missing_languages),
|
||||
thumb=get_item_thumb(item) or default_thumb
|
||||
))
|
||||
|
||||
|
||||
+150
-25
@@ -2,22 +2,28 @@
|
||||
import locale
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import traceback
|
||||
|
||||
import logger
|
||||
import copy
|
||||
|
||||
from requests import HTTPError
|
||||
from item_details import ItemDetailsMenu
|
||||
from refresh_item import RefreshItem
|
||||
from menu_helpers import add_ignore_options, dig_tree, set_refresh_menu_state, \
|
||||
should_display_ignore, default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route
|
||||
default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route, \
|
||||
extract_embedded_sub
|
||||
from main import fatality, IgnoreMenu
|
||||
from advanced import DispatchRestart
|
||||
from subzero.constants import ART, PREFIX, DEPENDENCY_MODULE_NAMES
|
||||
from support.plex_media import get_all_parts, get_embedded_subtitle_streams
|
||||
from support.scheduler import scheduler
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, df
|
||||
from support.helpers import timestamp, df, display_language
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_all_items, get_items_info, \
|
||||
get_item_kind_from_rating_key, get_item
|
||||
from support.items import get_all_items, get_items_info, get_item_kind_from_rating_key, get_item, MI_KEY, get_item_title
|
||||
from support.storage import get_subtitle_storage
|
||||
|
||||
# init GUI
|
||||
ObjectContainer.art = R(ART)
|
||||
@@ -25,6 +31,7 @@ ObjectContainer.no_cache = True
|
||||
|
||||
# default thumb for DirectoryObjects
|
||||
DirectoryObject.thumb = default_thumb
|
||||
Plugin.AddViewGroup("full_details", viewMode="InfoList", mediaType="items", type="list", summary=2)
|
||||
|
||||
|
||||
@route(PREFIX + '/section/firstLetter/key', deeper=bool)
|
||||
@@ -51,7 +58,7 @@ def FirstLetterMetadataMenu(rating_key, key, title=None, base_title=None, displa
|
||||
|
||||
@route(PREFIX + '/section/contents', display_items=bool)
|
||||
def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, previous_item_type=None,
|
||||
previous_rating_key=None, randomize=None):
|
||||
previous_rating_key=None, header=None, randomize=None):
|
||||
"""
|
||||
displays the contents of a section based on whether it has a deeper tree or not (movies->movie (item) list; series->series list)
|
||||
:param rating_key:
|
||||
@@ -65,16 +72,18 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
title = unicode(title)
|
||||
item_title = title
|
||||
title = base_title + " > " + title
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True, header=header,
|
||||
view_group="full_details")
|
||||
|
||||
current_kind = get_item_kind_from_rating_key(rating_key)
|
||||
|
||||
if display_items:
|
||||
timeout = 30
|
||||
show = None
|
||||
|
||||
# add back to series for season
|
||||
if current_kind == "season":
|
||||
timeout = 360
|
||||
timeout = 720
|
||||
|
||||
show = get_item(previous_rating_key)
|
||||
oc.add(DirectoryObject(
|
||||
@@ -84,16 +93,43 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
thumb=show.thumb or default_thumb
|
||||
))
|
||||
elif current_kind == "series":
|
||||
timeout = 1800
|
||||
# it shouldn't take more than 6 minutes to scan all of a series' files and determine the force refresh
|
||||
timeout = 3600
|
||||
|
||||
items = get_all_items(key="children", value=rating_key, base="library/metadata")
|
||||
kind, deeper = get_items_info(items)
|
||||
dig_tree(oc, items, MetadataMenu,
|
||||
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": kind,
|
||||
"previous_rating_key": rating_key})
|
||||
|
||||
# we don't know exactly where we are here, only add ignore option to series
|
||||
if should_display_ignore(items, previous=previous_item_type):
|
||||
add_ignore_options(oc, "series", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
if current_kind in ("series", "season"):
|
||||
item = get_item(rating_key)
|
||||
sub_title = get_item_title(item)
|
||||
add_ignore_options(oc, current_kind, title=sub_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
# mass-extract embedded
|
||||
if current_kind == "season" and config.plex_transcoder:
|
||||
for lang in config.lang_list:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title,
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=u"Extract missing %s embedded subtitles with default mods" % display_language(lang),
|
||||
summary="Extracts the not yet extracted embedded subtitles of all episodes for the current season "
|
||||
"with all configured default modifications"
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title,
|
||||
previous_item_type=previous_item_type, with_mods=False,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=u"Extract missing %s embedded subtitles" % display_language(lang),
|
||||
summary="Extracts the not yet extracted embedded subtitles of all episodes for the current season"
|
||||
))
|
||||
|
||||
# add refresh
|
||||
oc.add(DirectoryObject(
|
||||
@@ -115,6 +151,48 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/season/extract_embedded/{rating_key}/{language}')
|
||||
def SeasonExtractEmbedded(**kwargs):
|
||||
rating_key = kwargs.get("rating_key")
|
||||
requested_language = kwargs.pop("language")
|
||||
with_mods = kwargs.pop("with_mods")
|
||||
item_title = kwargs.pop("item_title")
|
||||
title = kwargs.pop("title")
|
||||
|
||||
Thread.Create(season_extract_embedded, **{"rating_key": rating_key, "requested_language": requested_language,
|
||||
"with_mods": with_mods})
|
||||
|
||||
kwargs["header"] = 'Success'
|
||||
kwargs["message"] = u"Extracting of embedded subtitles for %s triggered" % title
|
||||
|
||||
kwargs.pop("randomize")
|
||||
return MetadataMenu(randomize=timestamp(), title=item_title, **kwargs)
|
||||
|
||||
|
||||
def season_extract_embedded(rating_key, requested_language, with_mods=False):
|
||||
# get stored subtitle info for item id
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
try:
|
||||
for data in get_all_items(key="children", value=rating_key, base="library/metadata"):
|
||||
item = get_item(data[MI_KEY])
|
||||
if item:
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
for part in get_all_parts(item):
|
||||
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
|
||||
if not embedded_subs:
|
||||
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language,
|
||||
get_forced=config.forced_only)
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
|
||||
stream_index=str(stream.index),
|
||||
language=requested_language, with_mods=with_mods)
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
@route(PREFIX + '/ignore_list')
|
||||
def IgnoreListMenu():
|
||||
oc = SubFolderObjectContainer(title2="Ignore list", replace_parent=True)
|
||||
@@ -132,15 +210,20 @@ def HistoryMenu():
|
||||
history = get_history()
|
||||
oc = SubFolderObjectContainer(title2="History", replace_parent=True)
|
||||
|
||||
for item in history.history_items:
|
||||
for item in history.items:
|
||||
possible_language = item.language
|
||||
language_display = item.lang_name if not possible_language else display_language(possible_language)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, title=item.title, item_title=item.item_title,
|
||||
rating_key=item.rating_key),
|
||||
title=u"%s (%s)" % (item.item_title, item.mode_verbose),
|
||||
summary=u"%s in %s (%s, score: %s), %s" % (item.lang_name, item.section_title,
|
||||
summary=u"%s in %s (%s, score: %s), %s" % (language_display, item.section_title,
|
||||
item.provider_name, item.score, df(item.time))
|
||||
))
|
||||
|
||||
history.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@@ -152,6 +235,15 @@ def RefreshMissing(randomize=None):
|
||||
return fatality(header=header, replace_parent=True)
|
||||
|
||||
|
||||
def replace_item(obj, key, replace_value):
|
||||
for k, v in obj.items():
|
||||
if isinstance(v, dict):
|
||||
obj[k] = replace_item(v, key, replace_value)
|
||||
if key in obj:
|
||||
obj[key] = replace_value
|
||||
return obj
|
||||
|
||||
|
||||
@route(PREFIX + '/ValidatePrefs', enforce_route=True)
|
||||
def ValidatePrefs():
|
||||
Core.log.setLevel(logging.DEBUG)
|
||||
@@ -196,8 +288,8 @@ def ValidatePrefs():
|
||||
DispatchRestart()
|
||||
return
|
||||
|
||||
scheduler.stop()
|
||||
scheduler.setup_tasks()
|
||||
scheduler.clear_task_data("MissingSubtitles")
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
Log.Debug("Validate Prefs called.")
|
||||
@@ -205,21 +297,31 @@ def ValidatePrefs():
|
||||
# SZ config debug
|
||||
Log.Debug("--- SZ Config-Debug ---")
|
||||
for attr in [
|
||||
"app_support_path", "data_path", "data_items_path", "enable_agent",
|
||||
"version", "app_support_path", "data_path", "data_items_path", "enable_agent",
|
||||
"enable_channel", "permissions_ok", "missing_permissions", "fs_encoding",
|
||||
"subtitle_destination_folder", "dbm_supported", "lang_list", "providers"]:
|
||||
Log.Debug("config.%s: %s", attr, getattr(config, attr))
|
||||
"subtitle_destination_folder", "new_style_cache", "dbm_supported", "lang_list", "providers",
|
||||
"plex_transcoder", "refiner_settings"]:
|
||||
|
||||
value = getattr(config, attr)
|
||||
if isinstance(value, dict):
|
||||
d = replace_item(copy.deepcopy(value), "api_key", "xxxxxxxxxxxxxxxxxxxxxxxxx")
|
||||
Log.Debug("config.%s: %s", attr, d)
|
||||
continue
|
||||
|
||||
Log.Debug("config.%s: %s", attr, value)
|
||||
|
||||
for attr in ["plugin_log_path", "server_log_path"]:
|
||||
value = getattr(config, attr)
|
||||
access = os.access(value, os.R_OK)
|
||||
if Core.runtime.os == "Windows":
|
||||
try:
|
||||
f = open(value, "r")
|
||||
f.read(1)
|
||||
f.close()
|
||||
except:
|
||||
access = False
|
||||
|
||||
if value:
|
||||
access = os.access(value, os.R_OK)
|
||||
if Core.runtime.os == "Windows":
|
||||
try:
|
||||
f = open(value, "r")
|
||||
f.read(1)
|
||||
f.close()
|
||||
except:
|
||||
access = False
|
||||
|
||||
Log.Debug("config.%s: %s (accessible: %s)", attr, value, access)
|
||||
|
||||
@@ -227,10 +329,33 @@ def ValidatePrefs():
|
||||
"subtitles.save.filesystem", ]:
|
||||
Log.Debug("Pref.%s: %s", attr, Prefs[attr])
|
||||
|
||||
# debug drone
|
||||
if "sonarr" in config.refiner_settings or "radarr" in config.refiner_settings:
|
||||
Log.Debug("----- Connections -----")
|
||||
from subliminal_patch.refiners.drone import SonarrClient, RadarrClient
|
||||
for key, cls in [("sonarr", SonarrClient), ("radarr", RadarrClient)]:
|
||||
if key in config.refiner_settings:
|
||||
cname = key.capitalize()
|
||||
try:
|
||||
status = cls(**config.refiner_settings[key]).status()
|
||||
except HTTPError, e:
|
||||
if e.response.status_code == 401:
|
||||
Log.Debug("%s: NOT WORKING - BAD API KEY", cname)
|
||||
else:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
|
||||
except:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
|
||||
else:
|
||||
if status["version"]:
|
||||
Log.Debug("%s: OK - %s", cname, status["version"])
|
||||
else:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname)
|
||||
|
||||
# fixme: check existance of and os access of logs
|
||||
Log.Debug("----- Environment -----")
|
||||
Log.Debug("Platform: %s", Core.runtime.platform)
|
||||
Log.Debug("OS: %s", Core.runtime.os)
|
||||
Log.Debug("----- Environment -----")
|
||||
Log.Debug("Python: %s", platform.python_version())
|
||||
for key, value in os.environ.iteritems():
|
||||
if key.startswith("PLEX") or key.startswith("SZ_"):
|
||||
if "TOKEN" in key:
|
||||
|
||||
@@ -1,15 +1,24 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
import types
|
||||
import datetime
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
from func import enable_channel_wrapper
|
||||
from support.items import get_kind, get_item_thumb
|
||||
from support.helpers import get_video_display_title
|
||||
from subzero.language import Language
|
||||
from support.items import get_kind, get_item_thumb, get_item, get_item_kind_from_item, refresh_item
|
||||
from support.helpers import get_video_display_title, pad_title, display_language, quote_args
|
||||
from support.ignore import ignore_list
|
||||
from support.lib import get_intent
|
||||
from support.config import config
|
||||
from subzero.constants import ICON_SUB, ICON
|
||||
from support.plex_media import get_part, get_plex_metadata
|
||||
from support.scheduler import scheduler
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles
|
||||
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
|
||||
default_thumb = R(ICON_SUB)
|
||||
main_icon = ICON if not config.is_development else "icon-dev.jpg"
|
||||
@@ -20,14 +29,6 @@ route = enable_channel_wrapper(route)
|
||||
handler = enable_channel_wrapper(handler)
|
||||
|
||||
|
||||
def should_display_ignore(items, previous=None):
|
||||
kind = get_kind(items)
|
||||
return items and (
|
||||
(kind in ("show", "season")) or
|
||||
(kind == "episode" and previous != "season")
|
||||
)
|
||||
|
||||
|
||||
def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None, add_kind=True):
|
||||
"""
|
||||
|
||||
@@ -72,7 +73,7 @@ def dig_tree(oc, items, menu_callback, menu_determination_callback=None, force_r
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(menu_callback or menu_determination_callback(kind, item, pass_kwargs=pass_kwargs), title=title,
|
||||
rating_key=force_rating_key or key, **add_kwargs),
|
||||
title=title, thumb=thumb, summary=summary
|
||||
title=pad_title(title) if kind in ("show", "season") else title, thumb=thumb, summary=summary
|
||||
))
|
||||
return oc
|
||||
|
||||
@@ -150,6 +151,57 @@ def debounce(func):
|
||||
return wrap
|
||||
|
||||
|
||||
def extract_embedded_sub(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.pop("part_id")
|
||||
stream_index = kwargs.pop("stream_index")
|
||||
with_mods = kwargs.pop("with_mods", False)
|
||||
language = Language.fromietf(kwargs.pop("language"))
|
||||
refresh = kwargs.pop("refresh", True)
|
||||
set_current = kwargs.pop("set_current", True)
|
||||
|
||||
plex_item = get_item(rating_key)
|
||||
item_type = get_item_kind_from_item(plex_item)
|
||||
part = get_part(plex_item, part_id)
|
||||
|
||||
if part:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if str(stream.index) == stream_index:
|
||||
forced = stream.forced
|
||||
bn = os.path.basename(part.file)
|
||||
|
||||
set_refresh_menu_state(u"Extracting subtitle %s of %s" % (stream_index, bn))
|
||||
Log.Info(u"Extracting stream %s (%s) of %s", stream_index, display_language(language), bn)
|
||||
|
||||
args = [
|
||||
config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", "srt", "-"
|
||||
]
|
||||
output = None
|
||||
try:
|
||||
output = subprocess.check_output(quote_args(args), stderr=subprocess.PIPE, shell=True)
|
||||
except:
|
||||
Log.Error("Extraction failed: %s", traceback.format_exc())
|
||||
|
||||
if output:
|
||||
subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None)
|
||||
subtitle.content = output
|
||||
subtitle.provider_name = "embedded"
|
||||
subtitle.id = "stream_%s" % stream_index
|
||||
subtitle.score = 0
|
||||
subtitle.set_encoding("utf-8")
|
||||
|
||||
# fixme: speedup video; only video.name is needed
|
||||
save_successful = save_subtitles(scanned_parts, {scanned_parts.keys()[0]: [subtitle]}, mode="m",
|
||||
set_current=set_current)
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
if save_successful and refresh:
|
||||
refresh_item(rating_key)
|
||||
|
||||
|
||||
class SZObjectContainer(ObjectContainer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
skip_pin_lock = kwargs.pop("skip_pin_lock", False)
|
||||
|
||||
@@ -6,7 +6,8 @@ from support.items import refresh_item
|
||||
from support.helpers import timestamp
|
||||
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}')
|
||||
@route(PREFIX + '/item/refresh/{rating_key}/force', force=True)
|
||||
@route(PREFIX + '/item/refresh/{rating_key}')
|
||||
@debounce
|
||||
def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=False, refresh_kind=None,
|
||||
previous_rating_key=None, timeout=8000, randomize=None, trigger=True):
|
||||
|
||||
@@ -3,12 +3,13 @@
|
||||
import traceback
|
||||
import types
|
||||
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, route
|
||||
from subzero.modification import registry as mod_registry, SubtitleModifications
|
||||
from subzero.constants import PREFIX
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.scanning import scan_videos
|
||||
from support.helpers import timestamp, pad_title
|
||||
from support.items import get_current_sub, set_mods_for_part
|
||||
|
||||
@@ -75,6 +76,11 @@ def SubtitleModificationsMenu(**kwargs):
|
||||
title=pad_title("Manage applied mods"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods))
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleReapplyMods, randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Reapply applied mods"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=None, mode="clear", randomize=timestamp(), **kwargs),
|
||||
@@ -103,12 +109,12 @@ def SubtitleFPSModMenu(**kwargs):
|
||||
))
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
target_fps = plex_part.fps
|
||||
|
||||
for fps in ["23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
|
||||
for fps in ["23.980", "23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
|
||||
if float(fps) == float(target_fps):
|
||||
continue
|
||||
|
||||
@@ -227,6 +233,22 @@ def SubtitleSetMods(mods=None, mode=None, **kwargs):
|
||||
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_reapply_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleReapplyMods(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
lang_a2 = kwargs["language"]
|
||||
item_type = kwargs["item_type"]
|
||||
|
||||
language = Language.fromietf(lang_a2)
|
||||
|
||||
set_mods_for_part(rating_key, part_id, language, item_type, [], mode="add")
|
||||
|
||||
kwargs.pop("randomize")
|
||||
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_list_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleListMods(**kwargs):
|
||||
|
||||
@@ -28,22 +28,25 @@ import items
|
||||
|
||||
sys.modules["support.items"] = items
|
||||
|
||||
import missing_subtitles
|
||||
|
||||
sys.modules["support.missing_subtitles"] = missing_subtitles
|
||||
|
||||
import scheduler
|
||||
|
||||
sys.modules["support.scheduler"] = scheduler
|
||||
|
||||
import tasks
|
||||
|
||||
sys.modules["support.tasks"] = tasks
|
||||
|
||||
import storage
|
||||
|
||||
sys.modules["support.storage"] = storage
|
||||
|
||||
import scanning
|
||||
sys.modules["support.scanning"] = scanning
|
||||
|
||||
import missing_subtitles
|
||||
|
||||
sys.modules["support.missing_subtitles"] = missing_subtitles
|
||||
|
||||
import tasks
|
||||
|
||||
sys.modules["support.tasks"] = tasks
|
||||
|
||||
import ignore
|
||||
|
||||
sys.modules["support.ignore"] = ignore
|
||||
@@ -60,4 +63,4 @@ import activities
|
||||
sys.modules["support.activities"] = activities
|
||||
|
||||
import download
|
||||
sys.modules["support.download"] = download
|
||||
sys.modules["support.download"] = download
|
||||
@@ -3,14 +3,20 @@ from wraptor.decorators import throttle
|
||||
from config import config
|
||||
from items import get_item, get_item_kind_from_item, refresh_item
|
||||
|
||||
from plex_activity import Activity
|
||||
from plex_activity.sources.s_logging.main import Logging as Activity_Logging
|
||||
Activity = None
|
||||
try:
|
||||
from plex_activity import Activity
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
class PlexActivityManager(object):
|
||||
def start(self):
|
||||
activity_sources_enabled = None
|
||||
|
||||
if not Activity:
|
||||
return
|
||||
|
||||
if config.plex_token:
|
||||
from plex import Plex
|
||||
Plex.configuration.defaults.authentication(config.plex_token)
|
||||
|
||||
+308
-37
@@ -1,27 +1,41 @@
|
||||
# coding=utf-8
|
||||
|
||||
import copy
|
||||
import os
|
||||
import re
|
||||
import inspect
|
||||
import sys
|
||||
import rarfile
|
||||
|
||||
import jstyleson
|
||||
import datetime
|
||||
|
||||
import subliminal
|
||||
import subliminal_patch
|
||||
import subzero.constants
|
||||
import lib
|
||||
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded
|
||||
|
||||
from subliminal_patch.core import is_windows_special_path
|
||||
from whichdb import whichdb
|
||||
from babelfish import Language
|
||||
|
||||
from subliminal_patch.exceptions import TooManyRequests
|
||||
from subzero.language import Language
|
||||
from subliminal.cli import MutexLock
|
||||
from subzero.lib.io import FileIO, get_viable_encoding
|
||||
from subzero.lib.dict import Dicked
|
||||
from subzero.util import get_root_path
|
||||
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW, MEDIA_TYPE_TO_STRING
|
||||
from dogpile.cache.region import register_backend as register_cache_backend
|
||||
from lib import Plex
|
||||
from helpers import check_write_permissions, cast_bool
|
||||
from helpers import check_write_permissions, cast_bool, cast_int, mswindows
|
||||
|
||||
SUBTITLE_EXTS = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'txt', 'psb',
|
||||
'vtt']
|
||||
register_cache_backend(
|
||||
"subzero.cache.file", "subzero.cache_backends.file", "SZFileBackend")
|
||||
|
||||
SUBTITLE_EXTS_BASE = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'psb',
|
||||
'vtt']
|
||||
SUBTITLE_EXTS = SUBTITLE_EXTS_BASE + ["txt"]
|
||||
|
||||
TEXT_SUBTITLE_EXTS = ("srt", "ass", "ssa", "vtt")
|
||||
VIDEO_EXTS = ['3g2', '3gp', 'asf', 'asx', 'avc', 'avi', 'avs', 'bivx', 'bup', 'divx', 'dv', 'dvr-ms', 'evo', 'fli',
|
||||
'flv',
|
||||
'm2t', 'm2ts', 'm2v', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'mts', 'nsv', 'nuv', 'ogm', 'ogv', 'tp',
|
||||
@@ -42,6 +56,24 @@ def int_or_default(s, default):
|
||||
return default
|
||||
|
||||
|
||||
VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable)
|
||||
|
||||
PROVIDER_THROTTLE_MAP = {
|
||||
"default": {
|
||||
TooManyRequests: (datetime.timedelta(hours=1), "1 hour"),
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
|
||||
ServiceUnavailable: (datetime.timedelta(minutes=20), "20 minutes"),
|
||||
},
|
||||
"opensubtitles": {
|
||||
TooManyRequests: (datetime.timedelta(hours=3), "3 hours"),
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
|
||||
},
|
||||
"addic7ed": {
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=24), "24 hours"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class Config(object):
|
||||
libraries_root = None
|
||||
plugin_info = ""
|
||||
@@ -56,6 +88,11 @@ class Config(object):
|
||||
plex_token = None
|
||||
is_development = False
|
||||
dbm_supported = False
|
||||
pms_request_timeout = 15
|
||||
low_impact_mode = False
|
||||
new_style_cache = False
|
||||
pack_cache_dir = None
|
||||
advanced = None
|
||||
|
||||
enable_channel = True
|
||||
enable_agent = True
|
||||
@@ -64,11 +101,8 @@ class Config(object):
|
||||
lock_advanced_menu = False
|
||||
locked = False
|
||||
pin_valid_minutes = 10
|
||||
lang_list = None
|
||||
subtitle_destination_folder = None
|
||||
subtitle_formats = None
|
||||
providers = None
|
||||
provider_settings = None
|
||||
max_recent_items_per_library = 200
|
||||
permissions_ok = False
|
||||
missing_permissions = None
|
||||
@@ -79,6 +113,7 @@ class Config(object):
|
||||
sections = None
|
||||
enabled_sections = None
|
||||
remove_hi = False
|
||||
remove_tags = False
|
||||
fix_ocr = False
|
||||
fix_common = False
|
||||
colors = ""
|
||||
@@ -86,15 +121,21 @@ class Config(object):
|
||||
forced_only = False
|
||||
exotic_ext = False
|
||||
treat_und_as_first = False
|
||||
subtitle_sub_dir = None, None
|
||||
ext_match_strictness = False
|
||||
default_mods = None
|
||||
debug_mods = False
|
||||
react_to_activities = False
|
||||
activity_mode = None
|
||||
subtitles_save_to = None
|
||||
no_refresh = False
|
||||
plex_transcoder = None
|
||||
refiner_settings = None
|
||||
exact_filenames = False
|
||||
only_one = False
|
||||
embedded_auto_extract = False
|
||||
ietf_as_alpha3 = False
|
||||
|
||||
store_recently_played_amount = 20
|
||||
store_recently_played_amount = 40
|
||||
|
||||
initialized = False
|
||||
|
||||
@@ -102,6 +143,10 @@ class Config(object):
|
||||
self.libraries_root = os.path.abspath(os.path.join(get_root_path(), ".."))
|
||||
self.init_libraries()
|
||||
|
||||
if is_windows_special_path:
|
||||
Log.Warn("The Plex metadata folder is residing inside a folder with special characters. "
|
||||
"Multithreading and playback activities will be disabled.")
|
||||
|
||||
self.fs_encoding = get_viable_encoding()
|
||||
self.plugin_info = self.get_plugin_info()
|
||||
self.is_development = self.get_dev_mode()
|
||||
@@ -113,20 +158,24 @@ class Config(object):
|
||||
self.data_items_path = os.path.join(self.data_path, "DataItems")
|
||||
self.universal_plex_token = self.get_universal_plex_token()
|
||||
self.plex_token = os.environ.get("PLEXTOKEN", self.universal_plex_token)
|
||||
subzero.constants.DEFAULT_TIMEOUT = lib.DEFAULT_TIMEOUT = self.pms_request_timeout = \
|
||||
min(cast_int(Prefs['pms_request_timeout'], 15), 45)
|
||||
self.low_impact_mode = cast_bool(Prefs['low_impact_mode'])
|
||||
self.new_style_cache = cast_bool(Prefs['new_style_cache'])
|
||||
self.pack_cache_dir = self.get_pack_cache_dir()
|
||||
self.advanced = self.get_advanced_config()
|
||||
|
||||
os.environ["SZ_USER_AGENT"] = self.get_user_agent()
|
||||
|
||||
self.providers = self.get_providers()
|
||||
|
||||
self.setup_proxies()
|
||||
self.set_plugin_mode()
|
||||
self.set_plugin_lock()
|
||||
self.set_activity_modes()
|
||||
self.parse_rename_mode()
|
||||
|
||||
self.lang_list = self.get_lang_list()
|
||||
self.subtitle_destination_folder = self.get_subtitle_destination_folder()
|
||||
self.subtitle_formats = self.get_subtitle_formats()
|
||||
self.forced_only = cast_bool(Prefs["subtitles.only_foreign"])
|
||||
self.provider_settings = self.get_provider_settings()
|
||||
self.max_recent_items_per_library = int_or_default(Prefs["scheduler.max_recent_items_per_library"], 2000)
|
||||
self.sections = list(Plex["library"].sections())
|
||||
self.missing_permissions = []
|
||||
@@ -136,17 +185,22 @@ class Config(object):
|
||||
self.permissions_ok = self.check_permissions()
|
||||
self.notify_executable = self.check_notify_executable()
|
||||
self.remove_hi = cast_bool(Prefs['subtitles.remove_hi'])
|
||||
self.remove_tags = cast_bool(Prefs['subtitles.remove_tags'])
|
||||
self.fix_ocr = cast_bool(Prefs['subtitles.fix_ocr'])
|
||||
self.fix_common = cast_bool(Prefs['subtitles.fix_common'])
|
||||
self.colors = Prefs['subtitles.colors'] if Prefs['subtitles.colors'] != "don't change" else None
|
||||
self.chmod = self.check_chmod()
|
||||
self.exotic_ext = cast_bool(Prefs["subtitles.scan.exotic_ext"])
|
||||
self.treat_und_as_first = cast_bool(Prefs["subtitles.language.treat_und_as_first"])
|
||||
self.subtitle_sub_dir = self.get_subtitle_sub_dir()
|
||||
self.ext_match_strictness = self.determine_ext_sub_strictness()
|
||||
self.default_mods = self.get_default_mods()
|
||||
self.debug_mods = cast_bool(Prefs['log_debug_mods'])
|
||||
self.subtitles_save_to = Prefs['subtitles.save.filesystem']
|
||||
self.no_refresh = os.environ.get("SZ_NO_REFRESH", False)
|
||||
self.plex_transcoder = self.get_plex_transcoder()
|
||||
self.only_one = cast_bool(Prefs['subtitles.only_one'])
|
||||
self.embedded_auto_extract = cast_bool(Prefs["subtitles.embedded.autoextract"])
|
||||
self.ietf_as_alpha3 = cast_bool(Prefs["subtitles.language.ietf_normalize"])
|
||||
self.initialized = True
|
||||
|
||||
def init_libraries(self):
|
||||
@@ -162,6 +216,13 @@ class Config(object):
|
||||
Log.Info("Using UnRAR from: %s", custom_unrar)
|
||||
|
||||
def init_cache(self):
|
||||
if self.new_style_cache:
|
||||
subliminal.region.configure('subzero.cache.file', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'appname': "sz_cache",
|
||||
'app_cache_dir': self.data_path})
|
||||
Log.Info("Using new style file based cache!")
|
||||
return
|
||||
|
||||
names = ['dbhash', 'gdbm', 'dbm']
|
||||
dbfn = None
|
||||
self.dbm_supported = False
|
||||
@@ -207,12 +268,37 @@ class Config(object):
|
||||
Log.Warn("Not using file based cache!")
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
|
||||
def sync_cache(self):
|
||||
if not self.new_style_cache:
|
||||
return
|
||||
Log.Debug("Syncing cache")
|
||||
subliminal.region.backend.sync()
|
||||
|
||||
def get_pack_cache_dir(self):
|
||||
pack_cache_dir = os.path.join(config.data_path, "pack_cache")
|
||||
if not os.path.isdir(pack_cache_dir):
|
||||
os.makedirs(pack_cache_dir)
|
||||
|
||||
return pack_cache_dir
|
||||
|
||||
def get_advanced_config(self):
|
||||
path = os.path.join(config.data_path, "advanced_settings.json")
|
||||
if os.path.isfile(path):
|
||||
data = FileIO.read(path, "r")
|
||||
|
||||
return Dicked(**jstyleson.loads(data))
|
||||
|
||||
return Dicked()
|
||||
|
||||
def set_log_paths(self):
|
||||
# find log handler
|
||||
for handler in Core.log.handlers:
|
||||
if getattr(getattr(handler, "__class__"), "__name__") in (
|
||||
'FileHandler', 'RotatingFileHandler', 'TimedRotatingFileHandler'):
|
||||
cls_name = getattr(getattr(handler, "__class__"), "__name__")
|
||||
if cls_name in ('FileHandler', 'RotatingFileHandler', 'TimedRotatingFileHandler'):
|
||||
plugin_log_file = handler.baseFilename
|
||||
if cls_name in ("RotatingFileHandler", "TimedRotatingFileHandler"):
|
||||
handler.backupCount = int_or_default(Prefs['log_rotate_keep'], 5)
|
||||
|
||||
if os.path.isfile(os.path.realpath(plugin_log_file)):
|
||||
self.plugin_log_path = plugin_log_file
|
||||
|
||||
@@ -284,7 +370,7 @@ class Config(object):
|
||||
self.permissions_ok = self.check_permissions()
|
||||
|
||||
def check_permissions(self):
|
||||
if not Prefs["subtitles.save.filesystem"] or not Prefs["check_permissions"]:
|
||||
if not cast_bool(Prefs["subtitles.save.filesystem"]) or not cast_bool(Prefs["check_permissions"]):
|
||||
return True
|
||||
|
||||
self.missing_permissions = []
|
||||
@@ -300,6 +386,9 @@ class Config(object):
|
||||
if isinstance(path_str, unicode):
|
||||
path_str = path_str.encode(self.fs_encoding)
|
||||
|
||||
if not os.path.exists(path_str):
|
||||
continue
|
||||
|
||||
if use_ignore_fs:
|
||||
# check whether we've got an ignore file inside the section path
|
||||
if self.is_physically_ignored(path_str):
|
||||
@@ -405,18 +494,44 @@ class Config(object):
|
||||
return enabled_sections
|
||||
|
||||
# Prepare a list of languages we want subs for
|
||||
def get_lang_list(self):
|
||||
l = {Language.fromietf(Prefs["langPref1"])}
|
||||
def get_lang_list(self, provider=None):
|
||||
# advanced settings
|
||||
if provider and self.advanced.providers and provider in self.advanced.providers:
|
||||
adv_languages = self.advanced.providers[provider].get("languages", None)
|
||||
if adv_languages:
|
||||
adv_out = set()
|
||||
for adv_lang in adv_languages:
|
||||
adv_lang = adv_lang.strip()
|
||||
try:
|
||||
real_lang = Language.fromietf(adv_lang)
|
||||
except:
|
||||
try:
|
||||
real_lang = Language.fromname(adv_lang)
|
||||
except:
|
||||
continue
|
||||
adv_out.update({real_lang})
|
||||
|
||||
# fallback to default languages if no valid language was found in advanced settings
|
||||
if adv_out:
|
||||
return adv_out
|
||||
|
||||
l = {Language.fromietf(Prefs["langPref1a"])}
|
||||
lang_custom = Prefs["langPrefCustom"].strip()
|
||||
|
||||
if Prefs['subtitles.only_one']:
|
||||
return l
|
||||
|
||||
if Prefs["langPref2"] != "None":
|
||||
l.update({Language.fromietf(Prefs["langPref2"])})
|
||||
if Prefs["langPref2a"] != "None":
|
||||
try:
|
||||
l.update({Language.fromietf(Prefs["langPref2a"])})
|
||||
except:
|
||||
pass
|
||||
|
||||
if Prefs["langPref3"] != "None":
|
||||
l.update({Language.fromietf(Prefs["langPref3"])})
|
||||
if Prefs["langPref3a"] != "None":
|
||||
try:
|
||||
l.update({Language.fromietf(Prefs["langPref3a"])})
|
||||
except:
|
||||
pass
|
||||
|
||||
if len(lang_custom) and lang_custom != "None":
|
||||
for lang in lang_custom.split(u","):
|
||||
@@ -432,6 +547,8 @@ class Config(object):
|
||||
|
||||
return l
|
||||
|
||||
lang_list = property(get_lang_list)
|
||||
|
||||
def get_subtitle_destination_folder(self):
|
||||
if not Prefs["subtitles.save.filesystem"]:
|
||||
return
|
||||
@@ -450,18 +567,26 @@ class Config(object):
|
||||
out.append("vtt")
|
||||
return out
|
||||
|
||||
def get_providers(self):
|
||||
def get_providers(self, media_type="series"):
|
||||
providers = {'opensubtitles': cast_bool(Prefs['provider.opensubtitles.enabled']),
|
||||
# 'thesubdb': Prefs['provider.thesubdb.enabled'],
|
||||
'podnapisi': cast_bool(Prefs['provider.podnapisi.enabled']),
|
||||
'titlovi': cast_bool(Prefs['provider.titlovi.enabled']),
|
||||
'addic7ed': cast_bool(Prefs['provider.addic7ed.enabled']),
|
||||
'tvsubtitles': cast_bool(Prefs['provider.tvsubtitles.enabled']),
|
||||
'legendastv': cast_bool(Prefs['provider.legendastv.enabled']),
|
||||
'napiprojekt': cast_bool(Prefs['provider.napiprojekt.enabled']),
|
||||
'shooter': cast_bool(Prefs['provider.shooter.enabled']),
|
||||
'subscenter': cast_bool(Prefs['provider.subscenter.enabled']),
|
||||
'shooter': False,
|
||||
'subscene': cast_bool(Prefs['provider.subscene.enabled']),
|
||||
'subscenter': False,
|
||||
}
|
||||
|
||||
providers_by_prefs = copy.deepcopy(providers)
|
||||
|
||||
# disable subscene for movies by default
|
||||
if media_type == "movies":
|
||||
providers["subscene"] = False
|
||||
|
||||
# ditch non-forced-subtitles-reporting providers
|
||||
if self.forced_only:
|
||||
providers["addic7ed"] = False
|
||||
@@ -469,10 +594,41 @@ class Config(object):
|
||||
providers["legendastv"] = False
|
||||
providers["napiprojekt"] = False
|
||||
providers["shooter"] = False
|
||||
providers["subscenter"] = False
|
||||
providers["titlovi"] = False
|
||||
|
||||
# advanced settings
|
||||
if media_type and self.advanced.providers:
|
||||
for provider, data in self.advanced.providers.iteritems():
|
||||
if provider not in providers or not providers_by_prefs[provider]:
|
||||
continue
|
||||
|
||||
if data["enabled_for"] is not None:
|
||||
providers[provider] = media_type in data["enabled_for"]
|
||||
|
||||
if "provider_throttle" not in Dict:
|
||||
Dict["provider_throttle"] = {}
|
||||
|
||||
changed = False
|
||||
for provider, enabled in dict(providers).iteritems():
|
||||
reason, until, throttle_desc = Dict["provider_throttle"].get(provider, (None, None, None))
|
||||
if reason:
|
||||
now = datetime.datetime.now()
|
||||
if now < until:
|
||||
Log.Info("Not using %s until %s, because of: %s", provider,
|
||||
until.strftime("%y/%m/%d %H:%M"), reason)
|
||||
providers[provider] = False
|
||||
else:
|
||||
Log.Info("Using %s again after %s, (disabled because: %s)", provider, throttle_desc, reason)
|
||||
del Dict["provider_throttle"][provider]
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
Dict.Save()
|
||||
|
||||
return filter(lambda prov: providers[prov], providers)
|
||||
|
||||
providers = property(get_providers)
|
||||
|
||||
def get_provider_settings(self):
|
||||
provider_settings = {'addic7ed': {'username': Prefs['provider.addic7ed.username'],
|
||||
'password': Prefs['provider.addic7ed.password'],
|
||||
@@ -480,22 +636,54 @@ class Config(object):
|
||||
},
|
||||
'opensubtitles': {'username': Prefs['provider.opensubtitles.username'],
|
||||
'password': Prefs['provider.opensubtitles.password'],
|
||||
'use_tag_search': cast_bool(Prefs['provider.opensubtitles.use_tags']),
|
||||
'only_foreign': cast_bool(Prefs['subtitles.only_foreign'])
|
||||
'use_tag_search': self.exact_filenames,
|
||||
'only_foreign': self.forced_only,
|
||||
'is_vip': cast_bool(Prefs['provider.opensubtitles.is_vip'])
|
||||
},
|
||||
'podnapisi': {
|
||||
'only_foreign': cast_bool(Prefs['subtitles.only_foreign'])
|
||||
'only_foreign': self.forced_only,
|
||||
},
|
||||
'legendastv': {'username': Prefs['provider.legendastv.username'],
|
||||
'password': Prefs['provider.legendastv.password'],
|
||||
},
|
||||
'subscenter': {'username': Prefs['provider.subscenter.username'],
|
||||
'password': Prefs['provider.subscenter.password'],
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return provider_settings
|
||||
|
||||
provider_settings = property(get_provider_settings)
|
||||
|
||||
def provider_throttle(self, name, exception):
|
||||
"""
|
||||
throttle a provider :name: for X hours based on the :exception: type
|
||||
:param name:
|
||||
:param exception:
|
||||
:return:
|
||||
"""
|
||||
cls = getattr(exception, "__class__")
|
||||
cls_name = getattr(cls, "__name__")
|
||||
if cls not in VALID_THROTTLE_EXCEPTIONS:
|
||||
for valid_cls in VALID_THROTTLE_EXCEPTIONS:
|
||||
if isinstance(cls, valid_cls):
|
||||
cls = valid_cls
|
||||
|
||||
throttle_data = PROVIDER_THROTTLE_MAP.get(name, PROVIDER_THROTTLE_MAP["default"]).get(cls, None) or \
|
||||
PROVIDER_THROTTLE_MAP["default"].get(cls, None)
|
||||
|
||||
if not throttle_data:
|
||||
return
|
||||
|
||||
throttle_delta, throttle_description = throttle_data
|
||||
|
||||
if "provider_throttle" not in Dict:
|
||||
Dict["provider_throttle"] = {}
|
||||
|
||||
throttle_until = datetime.datetime.now() + throttle_delta
|
||||
Dict["provider_throttle"][name] = (cls_name, throttle_until, throttle_description)
|
||||
|
||||
Log.Info("Throttling %s for %s, until %s, because of: %s", name, throttle_description,
|
||||
throttle_until.strftime("%y/%m/%d %H:%M"), cls_name)
|
||||
Dict.Save()
|
||||
|
||||
@property
|
||||
def provider_pool(self):
|
||||
if cast_bool(Prefs['providers.multithreading']):
|
||||
@@ -519,6 +707,22 @@ class Config(object):
|
||||
if wrong_chmod:
|
||||
Log.Warn("Chmod setting ignored, please use only 4-digit integers with leading 0 (e.g.: 775)")
|
||||
|
||||
def get_subtitle_sub_dir(self):
|
||||
"""
|
||||
|
||||
:return: folder, is_absolute
|
||||
"""
|
||||
if not cast_bool(Prefs['subtitles.save.filesystem']):
|
||||
return None, None
|
||||
|
||||
if Prefs["subtitles.save.subFolder.Custom"]:
|
||||
return Prefs["subtitles.save.subFolder.Custom"], os.path.isabs(Prefs["subtitles.save.subFolder.Custom"])
|
||||
|
||||
if Prefs["subtitles.save.subFolder"] == "current folder":
|
||||
return ".", False
|
||||
|
||||
return Prefs["subtitles.save.subFolder"], False
|
||||
|
||||
def determine_ext_sub_strictness(self):
|
||||
val = Prefs["subtitles.scan.filename_strictness"]
|
||||
if val == "any":
|
||||
@@ -531,6 +735,8 @@ class Config(object):
|
||||
mods = []
|
||||
if self.remove_hi:
|
||||
mods.append("remove_HI")
|
||||
if self.remove_tags:
|
||||
mods.append("remove_tags")
|
||||
if self.fix_ocr:
|
||||
mods.append("OCR_fixes")
|
||||
if self.fix_common:
|
||||
@@ -540,6 +746,12 @@ class Config(object):
|
||||
|
||||
return mods
|
||||
|
||||
def setup_proxies(self):
|
||||
proxy = Prefs["proxy"]
|
||||
if proxy:
|
||||
os.environ["SZ_HTTP_PROXY"] = proxy.strip()
|
||||
Log.Debug("Using HTTP Proxy: %s", proxy)
|
||||
|
||||
def set_activity_modes(self):
|
||||
val = Prefs["activity.on_playback"]
|
||||
if val == "never":
|
||||
@@ -556,6 +768,65 @@ class Config(object):
|
||||
else:
|
||||
self.activity_mode = "next_episode"
|
||||
|
||||
def get_plex_transcoder(self):
|
||||
base_path = os.environ.get("PLEX_MEDIA_SERVER_HOME", None)
|
||||
if not base_path:
|
||||
# fall back to bundled plugins path
|
||||
bundle_path = os.environ.get("PLEXBUNDLEDPLUGINSPATH", None)
|
||||
if bundle_path:
|
||||
base_path = os.path.normpath(os.path.join(bundle_path, "..", ".."))
|
||||
|
||||
if sys.platform == "darwin":
|
||||
fn = os.path.join(base_path, "MacOS", "Plex Transcoder")
|
||||
elif mswindows:
|
||||
fn = os.path.join(base_path, "plextranscoder.exe")
|
||||
else:
|
||||
fn = os.path.join(base_path, "Plex Transcoder")
|
||||
|
||||
if os.path.isfile(fn):
|
||||
return fn
|
||||
|
||||
def parse_rename_mode(self):
|
||||
# fixme: exact_filenames should be determined via callback combined with info about the current video
|
||||
# (original_name)
|
||||
|
||||
mode = str(Prefs["media_rename1"])
|
||||
self.refiner_settings = {}
|
||||
|
||||
if cast_bool(Prefs['use_file_info_file']):
|
||||
self.refiner_settings["file_info_file"] = True
|
||||
self.exact_filenames = True
|
||||
|
||||
if mode == "none of the above":
|
||||
return
|
||||
|
||||
elif mode == "Symlink to original file":
|
||||
self.refiner_settings["symlinks"] = True
|
||||
self.exact_filenames = True
|
||||
return
|
||||
|
||||
elif mode == "I keep the original filenames":
|
||||
self.exact_filenames = True
|
||||
return
|
||||
|
||||
if mode in ("Filebot", "Sonarr/Radarr/Filebot"):
|
||||
self.refiner_settings["filebot"] = True
|
||||
|
||||
if mode in ("Sonarr/Radarr (fill api info below)", "Sonarr/Radarr/Filebot"):
|
||||
if Prefs["drone_api.sonarr.url"] and Prefs["drone_api.sonarr.api_key"]:
|
||||
self.refiner_settings["sonarr"] = {
|
||||
"base_url": Prefs["drone_api.sonarr.url"],
|
||||
"api_key": Prefs["drone_api.sonarr.api_key"]
|
||||
}
|
||||
self.exact_filenames = True
|
||||
|
||||
if Prefs["drone_api.radarr.url"] and Prefs["drone_api.radarr.api_key"]:
|
||||
self.refiner_settings["radarr"] = {
|
||||
"base_url": Prefs["drone_api.radarr.url"],
|
||||
"api_key": Prefs["drone_api.radarr.api_key"]
|
||||
}
|
||||
self.exact_filenames = True
|
||||
|
||||
def init_subliminal_patches(self):
|
||||
# configure custom subtitle destination folders for scanning pre-existing subs
|
||||
Log.Debug("Patching subliminal ...")
|
||||
@@ -564,7 +835,7 @@ class Config(object):
|
||||
subliminal_patch.core.INCLUDE_EXOTIC_SUBS = self.exotic_ext
|
||||
|
||||
subliminal_patch.core.DOWNLOAD_TRIES = int(Prefs['subtitles.try_downloads'])
|
||||
subliminal.score.episode_scores["addic7ed_boost"] = int(Prefs['provider.addic7ed.boost_by1'])
|
||||
subliminal.score.episode_scores["addic7ed_boost"] = int(Prefs['provider.addic7ed.boost_by2'])
|
||||
|
||||
|
||||
config = Config()
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
|
||||
|
||||
def dispatch_migrate():
|
||||
@@ -6,6 +7,8 @@ def dispatch_migrate():
|
||||
migrate()
|
||||
except:
|
||||
Log.Error("Migration failed: %s" % traceback.format_exc())
|
||||
del Dict["subs"]
|
||||
Dict.Save()
|
||||
|
||||
|
||||
def migrate():
|
||||
@@ -25,6 +28,7 @@ def migrate():
|
||||
time=item.time)
|
||||
|
||||
del Dict["history"]
|
||||
history.destroy()
|
||||
Dict.Save()
|
||||
|
||||
# migrate subtitle storage from Dict to Data
|
||||
|
||||
@@ -1,46 +1,120 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
|
||||
from subzero.language import Language
|
||||
|
||||
import subliminal_patch as subliminal
|
||||
|
||||
from support.config import config
|
||||
from support.helpers import cast_bool
|
||||
from subtitlehelpers import get_subtitles_from_metadata
|
||||
from subliminal_patch import compute_score
|
||||
from support.plex_media import get_blacklist_from_part_map
|
||||
from subzero.video import refine_video
|
||||
from support.storage import get_pack_data, store_pack_data
|
||||
|
||||
|
||||
def download_best_subtitles(video_part_map, min_score=0):
|
||||
def get_missing_languages(video, part):
|
||||
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
|
||||
|
||||
# should we treat IETF as alpha3? (ditch the country part)
|
||||
alpha3_map = {}
|
||||
if config.ietf_as_alpha3:
|
||||
for language in languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
if not Prefs['subtitles.save.filesystem']:
|
||||
# scan for existing metadata subtitles
|
||||
meta_subs = get_subtitles_from_metadata(part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
if subList:
|
||||
video.subtitle_languages.add(language)
|
||||
Log.Debug("Found metadata subtitle %s for %s", language, video)
|
||||
|
||||
have_languages = video.subtitle_languages.copy()
|
||||
if config.ietf_as_alpha3:
|
||||
for language in have_languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
missing_languages = (set(str(l) for l in languages) - set(str(l) for l in have_languages))
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
|
||||
if not missing_languages or found_one_which_is_enough:
|
||||
if found_one_which_is_enough:
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
return False
|
||||
|
||||
# re-add country codes to the missing languages, in case we've removed them above
|
||||
if config.ietf_as_alpha3:
|
||||
for language in languages:
|
||||
language.country = alpha3_map.get(language.alpha3, None)
|
||||
|
||||
return missing_languages
|
||||
|
||||
|
||||
def pre_download_hook(subtitle):
|
||||
if subtitle.is_pack:
|
||||
# try retrieving the subtitle from a cached pack archive
|
||||
pack_data = get_pack_data(subtitle)
|
||||
if pack_data:
|
||||
subtitle.pack_data = pack_data
|
||||
|
||||
|
||||
def post_download_hook(subtitle):
|
||||
# if a new pack was downloaded, store it in the cache; providers' download method is responsible for
|
||||
# setting subtitle.pack_data to None in case the cached pack data we provided was successfully used
|
||||
if subtitle.is_pack and subtitle.pack_data:
|
||||
# store pack data in cache
|
||||
store_pack_data(subtitle, subtitle.pack_data)
|
||||
|
||||
# may be redundant
|
||||
subtitle.pack_data = None
|
||||
|
||||
|
||||
def language_hook(provider):
|
||||
return config.get_lang_list(provider=provider)
|
||||
|
||||
|
||||
def download_best_subtitles(video_part_map, min_score=0, throttle_time=None, providers=None):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
languages = config.lang_list
|
||||
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
|
||||
if not languages:
|
||||
return
|
||||
|
||||
missing_languages = False
|
||||
use_videos = []
|
||||
for video, part in video_part_map.iteritems():
|
||||
if not Prefs['subtitles.save.filesystem']:
|
||||
# scan for existing metadata subtitles
|
||||
meta_subs = get_subtitles_from_metadata(part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
if subList:
|
||||
video.subtitle_languages.add(language)
|
||||
Log.Debug("Found metadata subtitle %s for %s", language, video)
|
||||
if not video.ignore_all:
|
||||
missing_languages = get_missing_languages(video, part)
|
||||
else:
|
||||
missing_languages = languages
|
||||
|
||||
missing_subs = (languages - video.subtitle_languages)
|
||||
if missing_languages:
|
||||
Log.Info(u"%s has missing languages: %s", os.path.basename(video.name), missing_languages)
|
||||
refine_video(video, refiner_settings=config.refiner_settings)
|
||||
use_videos.append(video)
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
|
||||
if not missing_subs or found_one_which_is_enough:
|
||||
if found_one_which_is_enough:
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
continue
|
||||
missing_languages = True
|
||||
break
|
||||
# prepare blacklist
|
||||
blacklist = get_blacklist_from_part_map(video_part_map, languages)
|
||||
|
||||
if missing_languages:
|
||||
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s" % (min_score, hearing_impaired))
|
||||
if use_videos:
|
||||
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s, languages: %s" %
|
||||
(min_score, hearing_impaired, languages))
|
||||
|
||||
return subliminal.download_best_subtitles(video_part_map.keys(), languages, min_score, hearing_impaired, providers=config.providers,
|
||||
provider_configs=config.provider_settings, pool_class=config.provider_pool,
|
||||
compute_score=compute_score)
|
||||
return subliminal.download_best_subtitles(set(use_videos), languages, min_score, hearing_impaired,
|
||||
providers=providers or config.providers,
|
||||
provider_configs=config.provider_settings,
|
||||
pool_class=config.provider_pool,
|
||||
compute_score=compute_score, throttle_time=throttle_time,
|
||||
blacklist=blacklist, throttle_callback=config.provider_throttle,
|
||||
pre_download_hook=pre_download_hook,
|
||||
post_download_hook=post_download_hook,
|
||||
language_hook=language_hook)
|
||||
Log.Debug("All languages for all requested videos exist. Doing nothing.")
|
||||
@@ -15,7 +15,7 @@ from collections import OrderedDict
|
||||
import chardet
|
||||
|
||||
from bs4 import UnicodeDammit
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
from subzero.analytics import track_event
|
||||
|
||||
mswindows = (sys.platform == "win32")
|
||||
@@ -44,6 +44,13 @@ def cast_bool(value):
|
||||
return str(value).strip() in ("true", "True")
|
||||
|
||||
|
||||
def cast_int(value, default=None):
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
# A platform independent way to split paths which might come in with different separators.
|
||||
def split_path(str):
|
||||
if str.find('\\') != -1:
|
||||
@@ -151,10 +158,11 @@ def get_video_display_title(kind, title, section_title=None, parent_title=None,
|
||||
if add_section_title:
|
||||
section_add = ("%s: " % section_title) if section_title else ""
|
||||
|
||||
if kind == "show" and parent_title:
|
||||
if kind in ("season", "show") and parent_title:
|
||||
if season and episode:
|
||||
return '%s%s S%02dE%02d%s' % (section_add, parent_title, season or 0, episode or 0,
|
||||
(", %s" % title if title else ""))
|
||||
|
||||
return '%s%s%s' % (section_add, parent_title, (", %s" % title if title else ""))
|
||||
return "%s%s" % (section_add, title)
|
||||
|
||||
@@ -202,7 +210,7 @@ def decode_message(s):
|
||||
|
||||
|
||||
def timestamp():
|
||||
return int(time.time())
|
||||
return int(time.time()*1000)
|
||||
|
||||
|
||||
def df(d):
|
||||
@@ -329,9 +337,12 @@ def track_usage(category=None, action=None, label=None, value=None):
|
||||
except:
|
||||
pass
|
||||
|
||||
Thread.Create(dispatch_track_usage, category, action, label, value,
|
||||
identifier=Dict["anon_id"], first_use=Dict["first_use"],
|
||||
add=Network.PublicAddress)
|
||||
try:
|
||||
Thread.Create(dispatch_track_usage, category, action, label, value,
|
||||
identifier=Dict["anon_id"], first_use=Dict["first_use"],
|
||||
add=Network.PublicAddress)
|
||||
except:
|
||||
Log.Debug("Something went wrong when reporting anonymous user statistics: %s", traceback.format_exc())
|
||||
|
||||
|
||||
def dispatch_track_usage(*args, **kwargs):
|
||||
@@ -344,9 +355,27 @@ def dispatch_track_usage(*args, **kwargs):
|
||||
Log.Debug("Something went wrong when reporting anonymous user statistics: %s", traceback.format_exc())
|
||||
|
||||
|
||||
def get_language_from_stream(lang_code):
|
||||
if lang_code:
|
||||
lang = Locale.Language.Match(lang_code)
|
||||
if lang and lang != "xx":
|
||||
# Log.Debug("Found language: %r", lang)
|
||||
return Language.fromietf(lang)
|
||||
|
||||
|
||||
def get_language(lang_short):
|
||||
return Language.fromietf(lang_short)
|
||||
|
||||
|
||||
def display_language(l):
|
||||
addons = []
|
||||
if l.country:
|
||||
addons.append(l.country.alpha2)
|
||||
if l.script:
|
||||
addons.append(l.script.code)
|
||||
|
||||
return l.name if not addons else "%s (%s)" % (l.name, ", ".join(addons))
|
||||
|
||||
|
||||
class PartUnknownException(Exception):
|
||||
pass
|
||||
@@ -1,4 +1,4 @@
|
||||
# coding=utf-8
|
||||
from subzero.history_storage import SubtitleHistory
|
||||
|
||||
get_history = lambda: SubtitleHistory(Data, int(Prefs["history_size"]))
|
||||
get_history = lambda: SubtitleHistory(Data, Thread, int(Prefs["history_size"]))
|
||||
|
||||
@@ -11,7 +11,8 @@ class IgnoreDict(DictProxy):
|
||||
"section": "sections",
|
||||
"show": "series",
|
||||
"movie": "videos",
|
||||
"episode": "videos"
|
||||
"episode": "videos",
|
||||
"season": "seasons",
|
||||
}
|
||||
|
||||
# getItems types mapped to their verbose names
|
||||
@@ -19,9 +20,10 @@ class IgnoreDict(DictProxy):
|
||||
"sections": "Section",
|
||||
"series": "Series",
|
||||
"videos": "Item",
|
||||
"seasons": "Season",
|
||||
}
|
||||
|
||||
key_order = ("sections", "series", "videos")
|
||||
key_order = ("sections", "series", "videos", "seasons")
|
||||
|
||||
def __len__(self):
|
||||
try:
|
||||
@@ -35,7 +37,7 @@ class IgnoreDict(DictProxy):
|
||||
return self.translate_keys.get(name)
|
||||
|
||||
def verbose(self, name):
|
||||
return self.keys_verbose.get(name)
|
||||
return self.keys_verbose.get(self.translate_key(name) or name)
|
||||
|
||||
def get_title_key(self, kind, key):
|
||||
return "%s_%s" % (kind, key)
|
||||
@@ -57,6 +59,7 @@ class IgnoreDict(DictProxy):
|
||||
Dict.Save()
|
||||
|
||||
def setup_defaults(self):
|
||||
return {"sections": [], "series": [], "videos": [], "titles": {}}
|
||||
return {"sections": [], "series": [], "videos": [], "titles": {}, "seasons": []}
|
||||
|
||||
|
||||
ignore_list = IgnoreDict(Dict)
|
||||
|
||||
@@ -5,6 +5,11 @@ import re
|
||||
import traceback
|
||||
import types
|
||||
import os
|
||||
|
||||
import time
|
||||
|
||||
import datetime
|
||||
|
||||
from ignore import ignore_list
|
||||
from helpers import is_recent, get_plex_item_display_title, query_plex, PartUnknownException
|
||||
from lib import Plex, get_intent
|
||||
@@ -54,6 +59,21 @@ def get_item_kind_from_item(item):
|
||||
return PLEX_API_TYPE_MAP.get(get_item_kind(item))
|
||||
|
||||
|
||||
def get_item_title(item):
|
||||
kind = get_item_kind_from_item(item)
|
||||
if kind not in ("episode", "movie", "season", "series"):
|
||||
return
|
||||
|
||||
if kind == "episode":
|
||||
return get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
|
||||
parent_title=item.show.title)
|
||||
elif kind == "season":
|
||||
return get_plex_item_display_title(item, "season", parent=item.show, section_title="Season",
|
||||
parent_title=item.show.title)
|
||||
else:
|
||||
return get_plex_item_display_title(item, kind, section_title=None)
|
||||
|
||||
|
||||
def get_item_thumb(item):
|
||||
kind = get_item_kind(item)
|
||||
if kind == "Episode":
|
||||
@@ -240,7 +260,7 @@ def is_ignored(rating_key, item=None):
|
||||
:return:
|
||||
"""
|
||||
# item in soft ignore list
|
||||
if rating_key in ignore_list["videos"]:
|
||||
if ignore_list["videos"] and rating_key in ignore_list["videos"]:
|
||||
Log.Debug("Item %s is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
@@ -248,12 +268,17 @@ def is_ignored(rating_key, item=None):
|
||||
kind = get_item_kind(item)
|
||||
|
||||
# show in soft ignore list
|
||||
if kind == "Episode" and item.show.rating_key in ignore_list["series"]:
|
||||
if kind == "Episode" and ignore_list["series"] and item.show.rating_key in ignore_list["series"]:
|
||||
Log.Debug("Item %s's show is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
# season in soft ignore list
|
||||
if kind == "Episode" and ignore_list["seasons"] and item.season.rating_key in ignore_list["seasons"]:
|
||||
Log.Debug("Item %s's season is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
# section in soft ignore list
|
||||
if item.section.key in ignore_list["sections"]:
|
||||
if ignore_list["sections"] and item.section.key in ignore_list["sections"]:
|
||||
Log.Debug("Item %s's section is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
@@ -303,26 +328,87 @@ def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, paren
|
||||
# season refresh, needs explicit per-episode refresh
|
||||
refresh = [item.rating_key for item in list(Plex["library/metadata"].children(int(rating_key)))]
|
||||
|
||||
multiple = len(refresh) > 1
|
||||
for key in refresh:
|
||||
Log.Info("%s item %s", "Refreshing" if not force else "Forced-refreshing", key)
|
||||
Plex["library/metadata"].refresh(key)
|
||||
if multiple:
|
||||
Thread.Sleep(10.0)
|
||||
|
||||
|
||||
def get_current_sub(rating_key, part_id, language):
|
||||
def get_current_sub(rating_key, part_id, language, plex_item=None):
|
||||
from support.storage import get_subtitle_storage
|
||||
|
||||
item = get_item(rating_key)
|
||||
item = plex_item or get_item(rating_key)
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
current_sub = stored_subs.get_any(part_id, language)
|
||||
return current_sub, stored_subs, subtitle_storage
|
||||
|
||||
|
||||
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.storage import save_subtitles
|
||||
def save_stored_sub(stored_subtitle, rating_key, part_id, language, item_type, plex_item=None, storage=None,
|
||||
stored_subs=None):
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles, get_subtitle_storage
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
plex_item = plex_item or get_item(rating_key)
|
||||
storage = storage or get_subtitle_storage()
|
||||
|
||||
cleanup = not storage
|
||||
|
||||
stored_subs = stored_subs or storage.load(plex_item.rating_key)
|
||||
|
||||
if not all([plex_item, stored_subs]):
|
||||
return
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
except PartUnknownException:
|
||||
return
|
||||
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
subtitle = ModifiedSubtitle(language, mods=stored_subtitle.mods)
|
||||
subtitle.content = stored_subtitle.content
|
||||
if stored_subtitle.encoding:
|
||||
# thanks plex
|
||||
setattr(subtitle, "_guessed_encoding", stored_subtitle.encoding)
|
||||
|
||||
if stored_subtitle.encoding != "utf-8":
|
||||
subtitle.normalize()
|
||||
stored_subtitle.content = subtitle.content
|
||||
stored_subtitle.encoding = "utf-8"
|
||||
storage.save(stored_subs)
|
||||
|
||||
subtitle.plex_media_fps = plex_part.fps
|
||||
subtitle.page_link = stored_subtitle.id
|
||||
subtitle.language = language
|
||||
subtitle.id = stored_subtitle.id
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
|
||||
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
|
||||
", ".join(stored_subtitle.mods) if stored_subtitle.mods else "none")
|
||||
except:
|
||||
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
|
||||
|
||||
if subtitle.storage_path:
|
||||
stored_subtitle.last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
|
||||
storage.save(stored_subs)
|
||||
|
||||
if cleanup:
|
||||
storage.destroy()
|
||||
|
||||
|
||||
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
|
||||
plex_item = get_item(rating_key)
|
||||
|
||||
if not plex_item:
|
||||
return
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language, plex_item=plex_item)
|
||||
if mode == "add":
|
||||
for mod in mods:
|
||||
identifier, args = SubtitleModifications.parse_identifier(mod)
|
||||
@@ -352,37 +438,7 @@ def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"
|
||||
raise NotImplementedError("Wrong mode given")
|
||||
storage.save(stored_subs)
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
except PartUnknownException:
|
||||
return
|
||||
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True,
|
||||
no_refining=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
subtitle = ModifiedSubtitle(language, mods=current_sub.mods)
|
||||
subtitle.content = current_sub.content
|
||||
if current_sub.encoding:
|
||||
# thanks plex
|
||||
setattr(subtitle, "_guessed_encoding", current_sub.encoding)
|
||||
|
||||
if current_sub.encoding != "utf-8":
|
||||
subtitle.set_encoding("utf-8")
|
||||
current_sub.content = subtitle.content
|
||||
current_sub.encoding = "utf-8"
|
||||
storage.save(stored_subs)
|
||||
save_stored_sub(current_sub, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
|
||||
stored_subs=stored_subs)
|
||||
|
||||
storage.destroy()
|
||||
|
||||
subtitle.plex_media_fps = plex_part.fps
|
||||
subtitle.page_link = "modify subtitles with: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
subtitle.language = language
|
||||
subtitle.id = current_sub.id
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
|
||||
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
|
||||
", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
except:
|
||||
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
|
||||
|
||||
@@ -9,29 +9,33 @@ import subtitlehelpers
|
||||
from config import config as sz_config
|
||||
|
||||
|
||||
SECONDARY_TAGS = ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom', 'hi', 'cc', 'sdh']
|
||||
|
||||
|
||||
def find_subtitles(part):
|
||||
lang_sub_map = {}
|
||||
part_filename = helpers.unicodize(part.file)
|
||||
part_basename = os.path.splitext(os.path.basename(part_filename))[0]
|
||||
use_filesystem = helpers.cast_bool(Prefs["subtitles.save.filesystem"])
|
||||
paths = [os.path.dirname(part_filename)] if use_filesystem else []
|
||||
sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
|
||||
global_subtitle_folder = None
|
||||
use_sub_subfolder = Prefs["subtitles.save.subFolder"] != "current folder" and not sub_dir_custom
|
||||
sub_subfolder = None
|
||||
paths = [os.path.dirname(part_filename)] if use_filesystem else []
|
||||
|
||||
global_folders = []
|
||||
|
||||
if use_filesystem:
|
||||
# Check for local subtitles subdirectory
|
||||
sub_dir_base = paths[0]
|
||||
|
||||
sub_dir_list = []
|
||||
|
||||
if Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
if use_sub_subfolder:
|
||||
# got selected subfolder
|
||||
sub_dir_list.append(os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"]))
|
||||
|
||||
sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
sub_subfolder = os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"])
|
||||
sub_dir_list.append(sub_subfolder)
|
||||
sub_subfolder = os.path.normpath(helpers.unicodize(sub_subfolder))
|
||||
|
||||
if sub_dir_custom:
|
||||
# got custom subfolder
|
||||
@@ -84,8 +88,12 @@ def find_subtitles(part):
|
||||
media_files.append(root)
|
||||
|
||||
# cleanup any leftover subtitle if no associated media file was found
|
||||
if helpers.cast_bool(Prefs["subtitles.autoclean"]):
|
||||
if use_filesystem and helpers.cast_bool(Prefs["subtitles.autoclean"]):
|
||||
for path in paths:
|
||||
# only housekeep in sub_subfolder if sub_subfolder is used
|
||||
if use_sub_subfolder and path != sub_subfolder and not sz_config.advanced.thorough_cleaning:
|
||||
continue
|
||||
|
||||
# we can't housekeep the global subtitle folders as we don't know about *all* media files
|
||||
# in a library; skip them
|
||||
skip_path = False
|
||||
@@ -105,11 +113,10 @@ def find_subtitles(part):
|
||||
if os.path.isfile(enc_fn):
|
||||
(root, ext) = os.path.splitext(file_path_listing)
|
||||
# it's a subtitle file
|
||||
if ext.lower()[1:] in config.SUBTITLE_EXTS:
|
||||
if ext.lower()[1:] in config.SUBTITLE_EXTS_BASE:
|
||||
# get fn without forced/default/normal tag
|
||||
split_tag = root.rsplit(".", 1)
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded',
|
||||
'custom']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
|
||||
root = split_tag[0]
|
||||
|
||||
# get associated media file name without language
|
||||
@@ -135,7 +142,7 @@ def find_subtitles(part):
|
||||
# get fn without forced/default/normal tag
|
||||
split_tag = local_basename.rsplit(".", 1)
|
||||
has_additional_tag = False
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'custom']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
|
||||
local_basename = split_tag[0]
|
||||
has_additional_tag = True
|
||||
|
||||
@@ -159,7 +166,7 @@ def find_subtitles(part):
|
||||
continue
|
||||
|
||||
# determine whether to pick up the subtitle based on our match strictness
|
||||
elif not filename_matches_part:
|
||||
if not filename_matches_part:
|
||||
if sz_config.ext_match_strictness == "strict" or (
|
||||
sz_config.ext_match_strictness == "loose" and not filename_contains_part):
|
||||
# Log.Debug("%s doesn't match %s, skipping" % (helpers.unicodize(local_filename),
|
||||
|
||||
@@ -2,10 +2,17 @@
|
||||
import traceback
|
||||
import time
|
||||
|
||||
from support.config import config
|
||||
from support.helpers import get_plex_item_display_title, cast_bool
|
||||
import os
|
||||
|
||||
from babelfish import LanguageReverseError
|
||||
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import get_plex_item_display_title, cast_bool, get_language_from_stream
|
||||
from support.items import get_item
|
||||
from support.lib import Plex
|
||||
from support.storage import get_subtitle_storage
|
||||
from subzero.video import has_external_subtitle
|
||||
from subzero.language import Language
|
||||
|
||||
|
||||
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()):
|
||||
@@ -17,11 +24,59 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
else:
|
||||
item_title = get_plex_item_display_title(item, kind, section_title=section_title)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load(rating_key)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
subtitle_target_dir, tdir_is_absolute = config.subtitle_sub_dir
|
||||
|
||||
missing = set()
|
||||
languages_set = set(languages)
|
||||
languages_set = set([Language.fromietf(str(l)) for l in languages])
|
||||
for media in item.media:
|
||||
existing_subs = {"internal": [], "external": [], "count": 0}
|
||||
existing_subs = {"internal": [], "external": [], "own_external": [], "count": 0}
|
||||
for part in media.parts:
|
||||
|
||||
# did we already download an external subtitle before?
|
||||
if subtitle_target_dir and stored_subs:
|
||||
for language in languages_set:
|
||||
if has_external_subtitle(part.id, stored_subs, language):
|
||||
# check the existence of the actual subtitle file
|
||||
|
||||
# get media filename without extension
|
||||
part_basename = os.path.splitext(os.path.basename(part.file))[0]
|
||||
|
||||
# compute target directory for subtitle
|
||||
# fixme: move to central location
|
||||
if tdir_is_absolute:
|
||||
possible_subtitle_path_base = subtitle_target_dir
|
||||
else:
|
||||
possible_subtitle_path_base = os.path.join(os.path.dirname(part.file), subtitle_target_dir)
|
||||
|
||||
possible_subtitle_path_base = os.path.realpath(possible_subtitle_path_base)
|
||||
|
||||
# folder actually exists?
|
||||
if not os.path.isdir(possible_subtitle_path_base):
|
||||
continue
|
||||
|
||||
found_any = False
|
||||
for ext in config.subtitle_formats:
|
||||
if cast_bool(Prefs['subtitles.only_one']):
|
||||
possible_subtitle_path = os.path.join(possible_subtitle_path_base,
|
||||
u"%s.%s" % (part_basename, ext))
|
||||
else:
|
||||
possible_subtitle_path = os.path.join(possible_subtitle_path_base,
|
||||
u"%s.%s.%s" % (part_basename, language, ext))
|
||||
|
||||
# check for subtitle existence
|
||||
if os.path.isfile(possible_subtitle_path):
|
||||
found_any = True
|
||||
Log.Debug(u"Found: %s", possible_subtitle_path)
|
||||
break
|
||||
|
||||
if found_any:
|
||||
existing_subs["own_external"].append(language)
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
|
||||
for stream in part.streams:
|
||||
if stream.stream_type == 3:
|
||||
if stream.index:
|
||||
@@ -29,18 +84,72 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
else:
|
||||
key = "external"
|
||||
|
||||
existing_subs[key].append(Locale.Language.Match(stream.language_code or ""))
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
if not config.exotic_ext and stream.codec.lower() not in TEXT_SUBTITLE_EXTS:
|
||||
continue
|
||||
|
||||
missing_from_part = set(languages_set)
|
||||
# treat unknown language as lang1?
|
||||
if not stream.language_code and config.treat_und_as_first:
|
||||
lang = Language.fromietf(str(list(config.lang_list)[0]))
|
||||
|
||||
# we can't parse empty language codes
|
||||
elif not stream.language_code or not stream.codec:
|
||||
continue
|
||||
|
||||
else:
|
||||
# parse with internal language parser first
|
||||
try:
|
||||
lang = get_language_from_stream(stream.language_code)
|
||||
if not lang:
|
||||
if config.treat_und_as_first:
|
||||
lang = Language.fromietf(str(list(config.lang_list)[0]))
|
||||
else:
|
||||
continue
|
||||
|
||||
except (ValueError, LanguageReverseError):
|
||||
continue
|
||||
|
||||
if lang:
|
||||
# Log.Debug("Found babelfish language: %r", lang)
|
||||
existing_subs[key].append(lang)
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
|
||||
missing_from_part = set([Language.fromietf(str(l)) for l in languages])
|
||||
if existing_subs["count"]:
|
||||
existing_flat = set((existing_subs["internal"] if internal else []) + (existing_subs["external"] if external else []))
|
||||
if languages_set.issubset(existing_flat) or (len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
|
||||
|
||||
# fixme: this is actually somewhat broken with IETF, as Plex doesn't store the country portion
|
||||
# (pt instead of pt-BR) inside the database. So it might actually download pt-BR if there's a local pt-BR
|
||||
# subtitle but not our own.
|
||||
existing_flat = set((existing_subs["internal"] if internal else [])
|
||||
+ (existing_subs["external"] if external else [])
|
||||
+ existing_subs["own_external"])
|
||||
|
||||
check_languages = set([Language.fromietf(str(l)) for l in languages])
|
||||
alpha3_map = {}
|
||||
if config.ietf_as_alpha3:
|
||||
for language in existing_flat:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
for language in check_languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
# compare sets of strings, not sets of different Language instances
|
||||
check_languages_str = set(str(l) for l in check_languages)
|
||||
existing_flat_str = set(str(l) for l in existing_flat)
|
||||
|
||||
if check_languages_str.issubset(existing_flat_str) or \
|
||||
(len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
|
||||
# all subs found
|
||||
#Log.Info(u"All subtitles exist for '%s'", item_title)
|
||||
continue
|
||||
|
||||
missing_from_part = languages_set - existing_flat
|
||||
missing_from_part = set(Language.fromietf(l) for l in check_languages_str - existing_flat_str)
|
||||
if config.ietf_as_alpha3:
|
||||
for language in missing_from_part:
|
||||
language.country = alpha3_map.get(language.alpha3, None)
|
||||
|
||||
if missing_from_part:
|
||||
Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id,
|
||||
@@ -48,6 +157,8 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
missing.update(missing_from_part)
|
||||
|
||||
if missing:
|
||||
# deduplicate
|
||||
missing = set(Language.fromietf(la) for la in set(str(l) for l in missing))
|
||||
return added_at, item_id, item_title, item, missing
|
||||
|
||||
|
||||
@@ -60,7 +171,7 @@ def items_get_all_missing_subs(items, sleep_after_request=False):
|
||||
kind=kind,
|
||||
added_at=added_at,
|
||||
section_title=section_title,
|
||||
languages=config.lang_list,
|
||||
languages=config.lang_list.copy(),
|
||||
internal=cast_bool(Prefs["subtitles.scan.embedded"]),
|
||||
external=cast_bool(Prefs["subtitles.scan.external"])
|
||||
)
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
# coding=utf-8
|
||||
|
||||
import os
|
||||
from urllib2 import URLError
|
||||
|
||||
import helpers
|
||||
from config import config
|
||||
from items import get_item
|
||||
from lib import get_intent, Plex
|
||||
from subzero.video import parse_video
|
||||
from lib import Plex
|
||||
from support.config import TEXT_SUBTITLE_EXTS, config
|
||||
|
||||
|
||||
def get_metadata_dict(item, part, add):
|
||||
data = {
|
||||
@@ -45,10 +44,11 @@ def get_plexapi_stream_info(plex_item, part_id=None):
|
||||
return d
|
||||
|
||||
data["video_codec"] = current_media.video_codec
|
||||
data["audio_codec"] = current_media.audio_codec.upper()
|
||||
if current_media.audio_codec:
|
||||
data["audio_codec"] = current_media.audio_codec.upper()
|
||||
|
||||
if data["audio_codec"] == "DCA":
|
||||
data["audio_codec"] = "DTS"
|
||||
if data["audio_codec"] == "DCA":
|
||||
data["audio_codec"] = "DTS"
|
||||
|
||||
if current_media.audio_channels == 8:
|
||||
data["audio_channels"] = "7.1"
|
||||
@@ -153,10 +153,9 @@ def get_stream_fps(streams):
|
||||
|
||||
|
||||
def get_media_item_ids(media, kind="series"):
|
||||
ids = []
|
||||
if kind == "movies":
|
||||
ids.append(media.id)
|
||||
else:
|
||||
# fixme: does this work correctly for full series force-refreshes and its intents?
|
||||
ids = [media.id]
|
||||
if kind == "series":
|
||||
for season in media.seasons:
|
||||
for episode in media.seasons[season].episodes:
|
||||
ids.append(media.seasons[season].episodes[episode].id)
|
||||
@@ -164,98 +163,51 @@ def get_media_item_ids(media, kind="series"):
|
||||
return ids
|
||||
|
||||
|
||||
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, no_refining=False):
|
||||
"""
|
||||
returnes a subliminal/guessit-refined parsed video
|
||||
:param pms_video_info:
|
||||
:param ignore_all:
|
||||
:param hints:
|
||||
:param rating_key:
|
||||
:return:
|
||||
"""
|
||||
embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
|
||||
external_subtitles = not ignore_all and Prefs['subtitles.scan.external']
|
||||
|
||||
plex_part = pms_video_info["plex_part"]
|
||||
|
||||
if ignore_all:
|
||||
Log.Debug("Force refresh intended.")
|
||||
|
||||
Log.Debug("Scanning video: %s, external_subtitles=%s, embedded_subtitles=%s" % (
|
||||
plex_part.file, external_subtitles, embedded_subtitles))
|
||||
|
||||
known_embedded = []
|
||||
def get_all_parts(plex_item):
|
||||
parts = []
|
||||
for media in list(Plex["library"].metadata(rating_key))[0].media:
|
||||
for media in plex_item.media:
|
||||
parts += media.parts
|
||||
|
||||
plexpy_part = None
|
||||
for part in parts:
|
||||
if int(part.id) == int(plex_part.id):
|
||||
plexpy_part = part
|
||||
|
||||
# embedded subtitles
|
||||
if plexpy_part:
|
||||
for stream in plexpy_part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3:
|
||||
if (config.forced_only and getattr(stream, "forced")) or \
|
||||
(not config.forced_only and not getattr(stream, "forced")):
|
||||
|
||||
# embedded subtitle
|
||||
if not stream.stream_key:
|
||||
if config.exotic_ext or stream.codec in ("srt", "ass", "ssa"):
|
||||
lang_code = stream.language_code
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang_code and config.treat_und_as_first:
|
||||
lang_code = list(config.lang_list)[0].alpha3
|
||||
known_embedded.append(lang_code)
|
||||
else:
|
||||
Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)
|
||||
|
||||
try:
|
||||
# get basic video info scan (filename)
|
||||
video = parse_video(plex_part.file, pms_video_info, hints, external_subtitles=external_subtitles,
|
||||
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
forced_only=config.forced_only, no_refining=no_refining)
|
||||
|
||||
# add video fps info
|
||||
video.fps = plex_part.fps
|
||||
return video
|
||||
|
||||
except ValueError:
|
||||
Log.Warn("File could not be guessed by subliminal: %s" % plex_part.file)
|
||||
return parts
|
||||
|
||||
|
||||
def scan_videos(videos, kind="series", ignore_all=False, no_refining=False):
|
||||
"""
|
||||
receives a list of videos containing dictionaries returned by media_to_videos
|
||||
:param videos:
|
||||
:param kind: series or movies
|
||||
:return: dictionary of subliminal.video.scan_video, key=subliminal scanned video, value=plex file part
|
||||
"""
|
||||
ret = {}
|
||||
for video in videos:
|
||||
intent = get_intent()
|
||||
force_refresh = intent.get("force", video["id"], video["series_id"], video["season_id"])
|
||||
Log.Debug("Determining force-refresh (video: %s, series: %s, season: %s), result: %s"
|
||||
% (video["id"], video["series_id"], video["season_id"], force_refresh))
|
||||
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, get_forced=None):
|
||||
streams = []
|
||||
has_unknown = False
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
|
||||
language = helpers.get_language_from_stream(stream.language_code)
|
||||
is_unknown = False
|
||||
found_requested_language = requested_language and requested_language == language
|
||||
|
||||
hints = helpers.get_item_hints(video)
|
||||
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
|
||||
scanned_video = scan_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
|
||||
rating_key=video["id"], no_refining=no_refining)
|
||||
if get_forced is not None:
|
||||
if (get_forced and not stream.forced) or (not get_forced and stream.forced):
|
||||
continue
|
||||
|
||||
if not scanned_video:
|
||||
continue
|
||||
if not language and config.treat_und_as_first:
|
||||
# only consider first unknown subtitle stream
|
||||
if has_unknown and skip_duplicate_unknown:
|
||||
continue
|
||||
|
||||
scanned_video.id = video["id"]
|
||||
part_metadata = video.copy()
|
||||
del part_metadata["plex_part"]
|
||||
scanned_video.plexapi_metadata = part_metadata
|
||||
ret[scanned_video] = video["plex_part"]
|
||||
return ret
|
||||
language = list(config.lang_list)[0]
|
||||
is_unknown = True
|
||||
has_unknown = True
|
||||
|
||||
if not requested_language or found_requested_language:
|
||||
streams.append({"stream": stream, "is_unknown": is_unknown, "language": language})
|
||||
|
||||
if found_requested_language:
|
||||
break
|
||||
|
||||
return streams
|
||||
|
||||
|
||||
def get_part(plex_item, part_id):
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
if str(part.id) == str(part_id):
|
||||
return part
|
||||
|
||||
|
||||
def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
|
||||
@@ -275,11 +227,7 @@ def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
|
||||
return
|
||||
|
||||
# find current part
|
||||
current_part = None
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
if str(part.id) == str(part_id):
|
||||
current_part = part
|
||||
current_part = get_part(plex_item, part_id)
|
||||
|
||||
if not current_part:
|
||||
raise helpers.PartUnknownException("Part unknown")
|
||||
@@ -334,6 +282,24 @@ def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
|
||||
return metadata
|
||||
|
||||
|
||||
def get_blacklist_from_part_map(video_part_map, languages):
|
||||
from support.storage import get_subtitle_storage
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
blacklist = []
|
||||
for video, part in video_part_map.iteritems():
|
||||
stored_subs = subtitle_storage.load_or_new(video.plexapi_metadata["item"])
|
||||
for language in languages:
|
||||
current_bl, subs = stored_subs.get_blacklist(part.id, language)
|
||||
if not current_bl:
|
||||
continue
|
||||
|
||||
blacklist = blacklist + [(str(a), str(b)) for a, b in current_bl.keys()]
|
||||
|
||||
subtitle_storage.destroy()
|
||||
|
||||
return blacklist
|
||||
|
||||
|
||||
class PMSMediaProxy(object):
|
||||
"""
|
||||
Proxy object for getting data from a mediatree items "internally" via the PMS
|
||||
|
||||
@@ -0,0 +1,124 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
import helpers
|
||||
|
||||
from support.lib import Plex, get_intent
|
||||
from support.plex_media import get_stream_fps
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
|
||||
from subzero.video import parse_video, set_existing_languages
|
||||
|
||||
|
||||
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False):
|
||||
"""
|
||||
returnes a subliminal/guessit-refined parsed video
|
||||
:param pms_video_info:
|
||||
:param ignore_all:
|
||||
:param hints:
|
||||
:param rating_key:
|
||||
:return:
|
||||
"""
|
||||
embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
|
||||
external_subtitles = not ignore_all and Prefs['subtitles.scan.external']
|
||||
|
||||
plex_part = pms_video_info["plex_part"]
|
||||
|
||||
if ignore_all:
|
||||
Log.Debug("Force refresh intended.")
|
||||
|
||||
Log.Debug("Scanning video: %s, external_subtitles=%s, embedded_subtitles=%s" % (
|
||||
plex_part.file, external_subtitles, embedded_subtitles))
|
||||
|
||||
known_embedded = []
|
||||
parts = []
|
||||
for media in list(Plex["library"].metadata(rating_key))[0].media:
|
||||
parts += media.parts
|
||||
|
||||
plexpy_part = None
|
||||
for part in parts:
|
||||
if int(part.id) == int(plex_part.id):
|
||||
plexpy_part = part
|
||||
|
||||
# embedded subtitles
|
||||
if plexpy_part:
|
||||
for stream in plexpy_part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3:
|
||||
if (config.forced_only and getattr(stream, "forced")) or \
|
||||
(not config.forced_only and not getattr(stream, "forced")):
|
||||
|
||||
# embedded subtitle
|
||||
# fixme: tap into external subtitles here instead of scanning for ourselves later?
|
||||
if not stream.stream_key and stream.codec:
|
||||
if config.exotic_ext or stream.codec.lower() in TEXT_SUBTITLE_EXTS:
|
||||
lang = helpers.get_language_from_stream(stream.language_code)
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = list(config.lang_list)[0]
|
||||
|
||||
if lang:
|
||||
known_embedded.append(lang.alpha3)
|
||||
else:
|
||||
Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load(rating_key)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
try:
|
||||
# get basic video info scan (filename)
|
||||
# video = parse_video(plex_part.file, pms_video_info, hints, external_subtitles=external_subtitles,
|
||||
# embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
# forced_only=config.forced_only, no_refining=no_refining, ignore_all=ignore_all,
|
||||
# stored_subs=stored_subs, refiner_settings=config.refiner_settings, providers=providers,
|
||||
# skip_hashing=config.low_impact_mode)
|
||||
video = parse_video(plex_part.file, hints, skip_hashing=config.low_impact_mode or skip_hashing,
|
||||
providers=providers)
|
||||
|
||||
if not ignore_all:
|
||||
set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles,
|
||||
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
forced_only=config.forced_only, stored_subs=stored_subs, languages=config.lang_list,
|
||||
only_one=config.only_one)
|
||||
|
||||
# add video fps info
|
||||
video.fps = plex_part.fps
|
||||
return video
|
||||
|
||||
except ValueError:
|
||||
Log.Warn("File could not be guessed: %s: %s", plex_part.file, traceback.format_exc())
|
||||
|
||||
|
||||
def scan_videos(videos, ignore_all=False, providers=None, skip_hashing=False):
|
||||
"""
|
||||
receives a list of videos containing dictionaries returned by media_to_videos
|
||||
:param videos:
|
||||
:param kind: series or movies
|
||||
:return: dictionary of subliminal.video.scan_video, key=subliminal scanned video, value=plex file part
|
||||
"""
|
||||
ret = {}
|
||||
for video in videos:
|
||||
intent = get_intent()
|
||||
force_refresh = intent.get("force", video["id"], video["series_id"], video["season_id"])
|
||||
Log.Debug("Determining force-refresh (video: %s, series: %s, season: %s), result: %s"
|
||||
% (video["id"], video["series_id"], video["season_id"], force_refresh))
|
||||
|
||||
hints = helpers.get_item_hints(video)
|
||||
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
|
||||
p = providers or config.get_providers(media_type="series" if video["type"] == "episode" else "movies")
|
||||
scanned_video = scan_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
|
||||
rating_key=video["id"], providers=p,
|
||||
skip_hashing=skip_hashing)
|
||||
|
||||
if not scanned_video:
|
||||
continue
|
||||
|
||||
scanned_video.id = video["id"]
|
||||
part_metadata = video.copy()
|
||||
del part_metadata["plex_part"]
|
||||
scanned_video.plexapi_metadata = part_metadata
|
||||
scanned_video.ignore_all = force_refresh
|
||||
ret[scanned_video] = video["plex_part"]
|
||||
return ret
|
||||
@@ -4,21 +4,24 @@ import datetime
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
from config import config
|
||||
|
||||
def parse_frequency(s):
|
||||
if s == "never" or s == None:
|
||||
if s == "never" or s is None:
|
||||
return None, None
|
||||
kind, num, unit = s.split()
|
||||
return int(num), unit
|
||||
|
||||
|
||||
class DefaultScheduler(object):
|
||||
thread = None
|
||||
queue_thread = None
|
||||
scheduler_thread = None
|
||||
running = False
|
||||
registry = None
|
||||
|
||||
def __init__(self):
|
||||
self.thread = None
|
||||
self.queue_thread = None
|
||||
self.scheduler_thread = None
|
||||
self.running = False
|
||||
self.registry = []
|
||||
|
||||
@@ -47,6 +50,7 @@ class DefaultScheduler(object):
|
||||
if Dict["tasks"]:
|
||||
for task_name in Dict["tasks"].keys():
|
||||
if task_name == "queue":
|
||||
Dict["tasks"][task_name] = []
|
||||
continue
|
||||
|
||||
Dict["tasks"][task_name]["data"] = {}
|
||||
@@ -58,6 +62,7 @@ class DefaultScheduler(object):
|
||||
raise NotImplementedError("Task missing! %s" % name)
|
||||
|
||||
Dict["tasks"][name]["data"] = {}
|
||||
Dict["tasks"][name]["running"] = False
|
||||
Dict.Save()
|
||||
Log.Debug("Task data cleared: %s", name)
|
||||
|
||||
@@ -78,7 +83,8 @@ class DefaultScheduler(object):
|
||||
|
||||
def run(self):
|
||||
self.running = True
|
||||
self.thread = Thread.Create(self.worker)
|
||||
self.scheduler_thread = Thread.Create(self.scheduler_worker)
|
||||
self.queue_thread = Thread.Create(self.queue_worker)
|
||||
|
||||
def stop(self):
|
||||
self.running = False
|
||||
@@ -113,6 +119,7 @@ class DefaultScheduler(object):
|
||||
|
||||
def run_task(self, name, *args, **kwargs):
|
||||
task = self.tasks[name]["task"]
|
||||
|
||||
if task.running:
|
||||
Log.Debug("Scheduler: Not running %s, as it's currently running.", name)
|
||||
return False
|
||||
@@ -124,8 +131,12 @@ class DefaultScheduler(object):
|
||||
except Exception, e:
|
||||
Log.Error("Scheduler: Something went wrong when running %s: %s", name, traceback.format_exc())
|
||||
finally:
|
||||
task.post_run(Dict["tasks"][name]["data"])
|
||||
try:
|
||||
task.post_run(Dict["tasks"][name]["data"])
|
||||
except:
|
||||
Log.Error("Scheduler: task.post_run failed for %s: %s", name, traceback.format_exc())
|
||||
Dict.Save()
|
||||
config.sync_cache()
|
||||
|
||||
def dispatch_task(self, *args, **kwargs):
|
||||
if "queue" not in Dict["tasks"]:
|
||||
@@ -157,7 +168,7 @@ class DefaultScheduler(object):
|
||||
continue
|
||||
Log.Debug("Scheduler: Not sending signal %s to task %s, because: not running", name, task_name)
|
||||
|
||||
def worker(self):
|
||||
def queue_worker(self):
|
||||
Thread.Sleep(10.0)
|
||||
while 1:
|
||||
if not self.running:
|
||||
@@ -170,10 +181,18 @@ class DefaultScheduler(object):
|
||||
Dict["tasks"]["queue"] = []
|
||||
Dict.Save()
|
||||
for args, kwargs in queue:
|
||||
Log.Debug("Dispatching single task: %s, %s", args, kwargs)
|
||||
Log.Debug("Queue: Dispatching single task: %s, %s", args, kwargs)
|
||||
Thread.Create(self.run_task, True, *args, **kwargs)
|
||||
Thread.Sleep(5.0)
|
||||
|
||||
Thread.Sleep(1)
|
||||
|
||||
def scheduler_worker(self):
|
||||
Thread.Sleep(10.0)
|
||||
while 1:
|
||||
if not self.running:
|
||||
break
|
||||
|
||||
# scheduled tasks
|
||||
for name in self.tasks.keys():
|
||||
now = datetime.datetime.now()
|
||||
@@ -193,6 +212,14 @@ class DefaultScheduler(object):
|
||||
if not frequency_num:
|
||||
continue
|
||||
|
||||
# run legacy SARAM once
|
||||
if name == "SearchAllRecentlyAddedMissing" and ("hasRunLSARAM" not in Dict or not Dict["hasRunLSARAM"]):
|
||||
task = self.tasks["LegacySearchAllRecentlyAddedMissing"]["task"]
|
||||
task.last_run = None
|
||||
name = "LegacySearchAllRecentlyAddedMissing"
|
||||
Dict["hasRunLSARAM"] = True
|
||||
Dict.Save()
|
||||
|
||||
if not task.last_run or (task.last_run + datetime.timedelta(**{frequency_key: frequency_num}) <= now):
|
||||
# fixme: scheduled tasks run synchronously. is this the best idea?
|
||||
Thread.Create(self.run_task, True, name)
|
||||
|
||||
@@ -4,9 +4,12 @@ import datetime
|
||||
import os
|
||||
import pprint
|
||||
import copy
|
||||
import traceback
|
||||
import types
|
||||
|
||||
from subliminal_patch.core import save_subtitles as subliminal_save_subtitles
|
||||
from subzero.subtitle_storage import StoredSubtitlesManager
|
||||
from subzero.lib.io import FileIO
|
||||
|
||||
from subtitlehelpers import force_utf8
|
||||
from config import config
|
||||
@@ -16,13 +19,14 @@ from support.items import get_item
|
||||
|
||||
|
||||
def get_subtitle_storage():
|
||||
return StoredSubtitlesManager(Data, get_item)
|
||||
return StoredSubtitlesManager(Data, Thread, get_item)
|
||||
|
||||
|
||||
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a"):
|
||||
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a", set_current=True):
|
||||
"""
|
||||
stores information about downloaded subtitles in plex's Dict()
|
||||
"""
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
part = scanned_video_part_map[video]
|
||||
part_id = str(part.id)
|
||||
@@ -31,15 +35,25 @@ def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_ty
|
||||
metadata = video.plexapi_metadata
|
||||
title = get_title_for_video_metadata(metadata)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(plex_item)
|
||||
stored_subs = subtitle_storage.load(video_id)
|
||||
is_new = False
|
||||
if not stored_subs:
|
||||
is_new = True
|
||||
Log.Debug(u"Creating new subtitle storage: %s, %s", video_id, part_id)
|
||||
stored_subs = subtitle_storage.new(plex_item)
|
||||
|
||||
for subtitle in video_subtitles:
|
||||
lang = str(subtitle.language)
|
||||
subtitle.set_encoding("utf-8")
|
||||
Log.Debug(u"Adding subtitle to storage: %s, %s, %s, %s" % (video_id, part_id, title,
|
||||
subtitle.guess_encoding()))
|
||||
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode)
|
||||
subtitle.normalize()
|
||||
Log.Debug(u"Adding subtitle to storage: %s, %s, %s, %s, %s" % (video_id, part_id, lang, title,
|
||||
subtitle.guess_encoding()))
|
||||
|
||||
last_mod = None
|
||||
if subtitle.storage_path:
|
||||
last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
|
||||
|
||||
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode, last_mod=last_mod,
|
||||
set_current=set_current)
|
||||
|
||||
if ret_val:
|
||||
Log.Debug("Subtitle stored")
|
||||
@@ -47,9 +61,11 @@ def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_ty
|
||||
else:
|
||||
Log.Debug("Subtitle already existing in storage")
|
||||
|
||||
Log.Debug("Saving subtitle storage for %s" % video_id)
|
||||
subtitle_storage.save(stored_subs)
|
||||
subtitle_storage.destroy()
|
||||
if is_new or video_subtitles:
|
||||
Log.Debug("Saving subtitle storage for %s" % video_id)
|
||||
subtitle_storage.save(stored_subs)
|
||||
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
def reset_storage(key):
|
||||
@@ -71,32 +87,43 @@ def log_storage(key):
|
||||
Log.Debug(pprint.pformat(Dict[key]))
|
||||
|
||||
|
||||
def save_subtitles_to_file(subtitles):
|
||||
def get_target_folder(file_path):
|
||||
fld = None
|
||||
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
|
||||
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
# specific subFolder requested, create it if it doesn't exist
|
||||
fld_base = os.path.split(file_path)[0]
|
||||
if fld_custom:
|
||||
if fld_custom.startswith("/"):
|
||||
# absolute folder
|
||||
fld = fld_custom
|
||||
else:
|
||||
fld = os.path.join(fld_base, fld_custom)
|
||||
else:
|
||||
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
|
||||
fld = force_unicode(fld)
|
||||
if not os.path.exists(fld):
|
||||
os.makedirs(fld)
|
||||
return fld
|
||||
|
||||
|
||||
def save_subtitles_to_file(subtitles, tags=None, forced_tag=None):
|
||||
forced_tag = forced_tag or config.forced_only
|
||||
for video, video_subtitles in subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
fld = None
|
||||
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
# specific subFolder requested, create it if it doesn't exist
|
||||
fld_base = os.path.split(video.name)[0]
|
||||
if fld_custom:
|
||||
if fld_custom.startswith("/"):
|
||||
# absolute folder
|
||||
fld = fld_custom
|
||||
else:
|
||||
fld = os.path.join(fld_base, fld_custom)
|
||||
else:
|
||||
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
|
||||
fld = force_unicode(fld)
|
||||
if not os.path.exists(fld):
|
||||
os.makedirs(fld)
|
||||
subliminal_save_subtitles(video, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
|
||||
chmod=config.chmod, forced_tag=config.forced_only, path_decoder=force_unicode,
|
||||
debug_mods=config.debug_mods, formats=config.subtitle_formats)
|
||||
if not isinstance(video, types.StringTypes):
|
||||
file_path = video.name
|
||||
else:
|
||||
file_path = video
|
||||
|
||||
fld = get_target_folder(file_path)
|
||||
subliminal_save_subtitles(file_path, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
|
||||
chmod=config.chmod, forced_tag=forced_tag, path_decoder=force_unicode,
|
||||
debug_mods=config.debug_mods, formats=config.subtitle_formats, tags=tags)
|
||||
return True
|
||||
|
||||
|
||||
@@ -116,10 +143,12 @@ def save_subtitles_to_metadata(videos, subtitles):
|
||||
return True
|
||||
|
||||
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None):
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None,
|
||||
set_current=True):
|
||||
"""
|
||||
|
||||
:param scanned_video_part_map:
|
||||
:param set_current: save the subtitle as the current one
|
||||
:param scanned_video_part_map:
|
||||
:param downloaded_subtitles:
|
||||
:param mode:
|
||||
:param bare_save: don't trigger anything; don't store information
|
||||
@@ -129,6 +158,8 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_
|
||||
meta_fallback = False
|
||||
save_successful = False
|
||||
|
||||
# big fixme: scanned_video_part_map isn't needed to the current extent. rewrite.
|
||||
|
||||
if mods:
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
if not video_subtitles:
|
||||
@@ -140,31 +171,66 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_
|
||||
subtitle.plex_media_fps = video.fps
|
||||
|
||||
storage = "metadata"
|
||||
if Prefs['subtitles.save.filesystem']:
|
||||
save_to_fs = cast_bool(Prefs['subtitles.save.filesystem'])
|
||||
if save_to_fs:
|
||||
storage = "filesystem"
|
||||
try:
|
||||
Log.Debug("Using filesystem as subtitle storage")
|
||||
save_subtitles_to_file(downloaded_subtitles)
|
||||
except OSError:
|
||||
if Prefs["subtitles.save.metadata_fallback"]:
|
||||
meta_fallback = True
|
||||
|
||||
if set_current:
|
||||
if save_to_fs:
|
||||
try:
|
||||
Log.Debug("Using filesystem as subtitle storage")
|
||||
save_subtitles_to_file(downloaded_subtitles)
|
||||
except OSError:
|
||||
if cast_bool(Prefs["subtitles.save.metadata_fallback"]):
|
||||
meta_fallback = True
|
||||
storage = "metadata"
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
save_successful = True
|
||||
save_successful = True
|
||||
|
||||
if not Prefs['subtitles.save.filesystem'] or meta_fallback:
|
||||
if meta_fallback:
|
||||
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
|
||||
else:
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
|
||||
if not save_to_fs or meta_fallback:
|
||||
if meta_fallback:
|
||||
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
|
||||
else:
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
|
||||
|
||||
if not bare_save and save_successful and config.notify_executable:
|
||||
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
|
||||
if not bare_save and save_successful and config.notify_executable:
|
||||
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
|
||||
|
||||
if not bare_save and save_successful:
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode)
|
||||
if not bare_save and (save_successful or not set_current):
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode, set_current=set_current)
|
||||
|
||||
return save_successful
|
||||
|
||||
|
||||
def get_pack_id(subtitle):
|
||||
return "%s_%s" % (subtitle.provider_name, subtitle.numeric_id)
|
||||
|
||||
|
||||
def get_pack_data(subtitle):
|
||||
subtitle_id = get_pack_id(subtitle)
|
||||
|
||||
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
|
||||
if os.path.isfile(archive):
|
||||
Log.Info("Loading archive from pack cache: %s", subtitle_id)
|
||||
try:
|
||||
data = FileIO.read(archive, 'rb')
|
||||
|
||||
return data
|
||||
except:
|
||||
Log.Error("Couldn't load archive from pack cache: %s: %s", subtitle_id, traceback.format_exc())
|
||||
|
||||
|
||||
def store_pack_data(subtitle, data):
|
||||
subtitle_id = get_pack_id(subtitle)
|
||||
|
||||
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
|
||||
|
||||
Log.Info("Storing archive in pack cache: %s", subtitle_id)
|
||||
try:
|
||||
FileIO.write(archive, data, 'wb')
|
||||
|
||||
except:
|
||||
Log.Error("Couldn't store archive in pack cache: %s: %s", subtitle_id, traceback.format_exc())
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# coding=utf-8
|
||||
|
||||
import re, os
|
||||
import config
|
||||
import helpers
|
||||
|
||||
from config import config, SUBTITLE_EXTS, TEXT_SUBTITLE_EXTS
|
||||
from bs4 import UnicodeDammit
|
||||
|
||||
|
||||
@@ -90,7 +90,7 @@ ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
|
||||
|
||||
|
||||
def match_ietf_language(s):
|
||||
language_match = re.match(".+\.([^\.]+)$" if not helpers.cast_bool(Prefs["subtitles.language.ietf"])
|
||||
language_match = re.match(".+\.([^\.]+)$" if not helpers.cast_bool(Prefs["subtitles.language.ietf_display"])
|
||||
else IETF_MATCH, s)
|
||||
if language_match and len(language_match.groups()) == 1:
|
||||
language = language_match.groups()[0]
|
||||
@@ -102,7 +102,7 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, filename):
|
||||
(file, file_extension) = os.path.splitext(filename)
|
||||
return file_extension.lower()[1:] in config.SUBTITLE_EXTS
|
||||
return file_extension.lower()[1:] in SUBTITLE_EXTS
|
||||
|
||||
def process_subtitles(self, part):
|
||||
|
||||
@@ -120,21 +120,29 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
forced = ''
|
||||
default = ''
|
||||
split_tag = file.rsplit('.', 1)
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'custom']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'embedded-forced',
|
||||
'custom']:
|
||||
file = split_tag[0]
|
||||
sub_tag = split_tag[1].lower()
|
||||
# don't do anything with 'normal', we don't need it
|
||||
if 'forced' == split_tag[1].lower():
|
||||
if 'forced' in sub_tag:
|
||||
forced = '1'
|
||||
if 'default' == split_tag[1].lower():
|
||||
elif 'default' == sub_tag:
|
||||
default = '1'
|
||||
|
||||
# Attempt to extract the language from the filename (e.g. Avatar (2009).eng)
|
||||
# IETF support thanks to
|
||||
# https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
|
||||
language = Locale.Language.Match(match_ietf_language(file))
|
||||
lang_part = match_ietf_language(file)
|
||||
if lang_part != file:
|
||||
language = Locale.Language.Match(lang_part)
|
||||
elif config.only_one:
|
||||
language = Locale.Language.Match(list(config.lang_list)[0].alpha2)
|
||||
else:
|
||||
language = Locale.Language.Match("xx")
|
||||
|
||||
# skip non-SRT if wanted
|
||||
if not helpers.cast_bool(Prefs["subtitles.scan.exotic_ext"]) and ext not in ["srt", "ass", "ssa", "vtt"]:
|
||||
if not config.exotic_ext and ext not in TEXT_SUBTITLE_EXTS:
|
||||
return lang_sub_map
|
||||
|
||||
codec = None
|
||||
@@ -157,7 +165,8 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
Log("An error occurred while attempting to parse the subtitle file, skipping... : " + self.filename)
|
||||
return lang_sub_map
|
||||
|
||||
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb', 'vtt']:
|
||||
# fixme: re-add vtt once Plex Inc. fixes this line in LocalMedia.bundle
|
||||
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb']:
|
||||
codec = ext.replace('ass', 'ssa')
|
||||
|
||||
if format is None:
|
||||
|
||||
+437
-228
@@ -1,24 +1,26 @@
|
||||
# coding=utf-8
|
||||
|
||||
import glob
|
||||
import os
|
||||
import datetime
|
||||
import time
|
||||
import operator
|
||||
import traceback
|
||||
from urllib2 import URLError
|
||||
|
||||
from subliminal_patch.score import compute_score
|
||||
from subliminal_patch.core import download_subtitles
|
||||
from subliminal import list_subtitles as list_all_subtitles
|
||||
from babelfish import Language
|
||||
from subliminal import list_subtitles as list_all_subtitles, region as subliminal_cache_region
|
||||
from subzero.language import Language
|
||||
from subzero.video import refine_video
|
||||
|
||||
from missing_subtitles import items_get_all_missing_subs, refresh_item
|
||||
from scheduler import scheduler
|
||||
from storage import save_subtitles, get_subtitle_storage
|
||||
from support.config import config
|
||||
from support.items import get_recent_items, get_item, is_ignored
|
||||
from support.items import get_recent_items, get_item, is_ignored, get_item_title
|
||||
from support.helpers import track_usage, get_title_for_video_metadata, cast_bool, PartUnknownException
|
||||
from support.plex_media import scan_videos, get_plex_metadata
|
||||
from download import download_best_subtitles
|
||||
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.scanning import scan_videos
|
||||
from download import download_best_subtitles, pre_download_hook, post_download_hook, language_hook
|
||||
|
||||
PROVIDER_SLACK = 30
|
||||
DL_PROVIDER_SLACK = 30
|
||||
@@ -97,7 +99,7 @@ class Task(object):
|
||||
|
||||
class SubtitleListingMixin(object):
|
||||
def list_subtitles(self, rating_key, item_type, part_id, language, skip_wrong_fps=True, metadata=None,
|
||||
scanned_parts=None):
|
||||
scanned_parts=None, air_date_cutoff=None):
|
||||
|
||||
if not metadata:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
@@ -105,18 +107,26 @@ class SubtitleListingMixin(object):
|
||||
if not metadata:
|
||||
return
|
||||
|
||||
providers = config.get_providers(media_type="series" if item_type == "episode" else "movies")
|
||||
if not scanned_parts:
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, providers=providers)
|
||||
if not scanned_parts:
|
||||
Log.Error(u"%s: Couldn't list available subtitles for %s", self.name, rating_key)
|
||||
return
|
||||
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
refine_video(video, refiner_settings=config.refiner_settings)
|
||||
|
||||
if air_date_cutoff is not None and metadata["item"].year and \
|
||||
metadata["item"].year + air_date_cutoff < datetime.date.today().year:
|
||||
Log.Debug("Skipping searching for subtitles: %s, it aired over %s year(s) ago.", rating_key,
|
||||
air_date_cutoff)
|
||||
return
|
||||
|
||||
config.init_subliminal_patches()
|
||||
|
||||
provider_settings = config.provider_settings.copy()
|
||||
provider_settings = config.provider_settings
|
||||
if not skip_wrong_fps:
|
||||
provider_settings = config.provider_settings.copy()
|
||||
provider_settings["opensubtitles"]["skip_wrong_fps"] = False
|
||||
|
||||
if item_type == "episode":
|
||||
@@ -126,10 +136,14 @@ class SubtitleListingMixin(object):
|
||||
else:
|
||||
min_score = 60
|
||||
|
||||
available_subs = list_all_subtitles(scanned_parts, {Language.fromietf(language)},
|
||||
providers=config.providers,
|
||||
languages = {Language.fromietf(language)}
|
||||
|
||||
available_subs = list_all_subtitles([video], languages,
|
||||
providers=providers,
|
||||
provider_configs=provider_settings,
|
||||
pool_class=config.provider_pool)
|
||||
pool_class=config.provider_pool,
|
||||
throttle_callback=config.provider_throttle,
|
||||
language_hook=language_hook)
|
||||
|
||||
use_hearing_impaired = Prefs['subtitles.search.hearingImpaired'] in ("prefer", "force HI")
|
||||
|
||||
@@ -143,6 +157,11 @@ class SubtitleListingMixin(object):
|
||||
Log.Error(u"%s: Match computation failed for %s: %s", self.name, s, traceback.format_exc())
|
||||
continue
|
||||
|
||||
# skip wrong season/episodes
|
||||
if item_type == "episode" and not {"series", "season", "episode"}.issubset(matches):
|
||||
Log.Debug(u"%s: Skipping %s, because it doesn't match our series/episode", self.name, s)
|
||||
continue
|
||||
|
||||
unsorted_subtitles.append(
|
||||
(s, compute_score(matches, s, video, hearing_impaired=use_hearing_impaired), matches))
|
||||
scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1), reverse=True)
|
||||
@@ -168,21 +187,36 @@ class DownloadSubtitleMixin(object):
|
||||
item_type = subtitle.item_type
|
||||
part_id = subtitle.part_id
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
providers = config.get_providers(media_type="series" if item_type == "episode" else "movies")
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, providers=providers)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
pre_download_hook(subtitle)
|
||||
|
||||
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
|
||||
download_subtitles([subtitle], providers=config.providers, provider_configs=config.provider_settings,
|
||||
pool_class=config.provider_pool)
|
||||
download_subtitles([subtitle], providers=providers,
|
||||
provider_configs=config.provider_settings,
|
||||
pool_class=config.provider_pool, throttle_callback=config.provider_throttle)
|
||||
|
||||
post_download_hook(subtitle)
|
||||
|
||||
# may be redundant
|
||||
subtitle.pack_data = None
|
||||
|
||||
download_successful = False
|
||||
|
||||
if subtitle.content:
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode=mode, mods=config.default_mods)
|
||||
Log.Debug(u"%s: Manually downloaded subtitle for: %s", self.name, rating_key)
|
||||
if mode == "m":
|
||||
Log.Debug(u"%s: Manually downloaded subtitle for: %s", self.name, rating_key)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
elif mode == "b":
|
||||
Log.Debug(u"%s: Downloaded better subtitle for: %s", self.name, rating_key)
|
||||
track_usage("Subtitle", "better", "download", 1)
|
||||
download_successful = True
|
||||
refresh_item(rating_key)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
|
||||
except:
|
||||
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s",
|
||||
self.name, traceback.format_exc())
|
||||
@@ -197,8 +231,13 @@ class DownloadSubtitleMixin(object):
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle,
|
||||
mode=mode)
|
||||
history.destroy()
|
||||
|
||||
# clear missing subtitles menu data
|
||||
if not scheduler.is_task_running("MissingSubtitles"):
|
||||
scheduler.clear_task_data("MissingSubtitles")
|
||||
else:
|
||||
set_refresh_menu_state(u"%s: Subtitle download failed (%s)", self.name, rating_key)
|
||||
set_refresh_menu_state(u"%s: Subtitle download failed (%s)" % (self.name, rating_key))
|
||||
return download_successful
|
||||
|
||||
|
||||
@@ -223,7 +262,12 @@ class AvailableSubsForItem(SubtitleListingMixin, Task):
|
||||
def run(self):
|
||||
super(AvailableSubsForItem, self).run()
|
||||
self.running = True
|
||||
track_usage("Subtitle", "manual", "list", 1)
|
||||
try:
|
||||
track_usage("Subtitle", "manual", "list", 1)
|
||||
except:
|
||||
Log.Error("Something went wrong with track_usage: %s", traceback.format_exc())
|
||||
|
||||
Log.Debug("Listing available subtitles for: %s", self.rating_key)
|
||||
subs = self.list_subtitles(self.rating_key, self.item_type, self.part_id, self.language, skip_wrong_fps=False)
|
||||
if not subs:
|
||||
self.data = "found_none"
|
||||
@@ -309,6 +353,8 @@ class SearchAllRecentlyAddedMissing(Task):
|
||||
now = datetime.datetime.now()
|
||||
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
|
||||
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
|
||||
series_providers = config.get_providers(media_type="series")
|
||||
movie_providers = config.get_providers(media_type="movies")
|
||||
|
||||
is_recent_str = Prefs["scheduler.item_is_recent_age"]
|
||||
num, ident = is_recent_str.split()
|
||||
@@ -320,24 +366,9 @@ class SearchAllRecentlyAddedMissing(Task):
|
||||
max_search_days = int(num) * 7
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
recent_sub_fns = subtitle_storage.get_recent_files(age_days=max_search_days)
|
||||
viable_items = {}
|
||||
recent_files = subtitle_storage.get_recent_files(age_days=max_search_days)
|
||||
|
||||
# determine viable items
|
||||
for fn in recent_sub_fns:
|
||||
# added_date <= max_search_days?
|
||||
stored_subs = subtitle_storage.load(filename=fn)
|
||||
if not stored_subs:
|
||||
continue
|
||||
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
continue
|
||||
|
||||
viable_items[fn] = stored_subs
|
||||
|
||||
subtitle_storage.destroy()
|
||||
|
||||
self.items_searching = len(viable_items)
|
||||
self.items_searching = len(recent_files)
|
||||
|
||||
download_count = 0
|
||||
videos_with_downloads = 0
|
||||
@@ -346,98 +377,129 @@ class SearchAllRecentlyAddedMissing(Task):
|
||||
|
||||
Log.Info(u"%s: Searching for subtitles for %s items", self.name, self.items_searching)
|
||||
|
||||
# search for subtitles in viable items
|
||||
for fn, stored_subs in viable_items.iteritems():
|
||||
video_id = stored_subs.video_id
|
||||
|
||||
if stored_subs.item_type == "episode":
|
||||
min_score = min_score_series
|
||||
else:
|
||||
min_score = min_score_movies
|
||||
|
||||
parts = []
|
||||
plex_item = get_item(video_id)
|
||||
|
||||
if not plex_item:
|
||||
Log.Info(u"%s: Item %s unknown, skipping", self.name, video_id)
|
||||
continue
|
||||
|
||||
if is_ignored(video_id, item=plex_item):
|
||||
continue
|
||||
|
||||
for media in plex_item.media:
|
||||
parts += media.parts
|
||||
|
||||
downloads_per_video = 0
|
||||
hit_providers = False
|
||||
for part in parts:
|
||||
part_id = part.id
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(video_id, part_id, stored_subs.item_type)
|
||||
except PartUnknownException:
|
||||
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
|
||||
continue
|
||||
|
||||
if not metadata:
|
||||
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
|
||||
continue
|
||||
|
||||
Log.Debug(u"%s: Looking for missing subtitles: %s:%s", self.name, video_id, part_id)
|
||||
scanned_parts = scan_videos([metadata], kind="series"
|
||||
if stored_subs.item_type == "episode" else "movie")
|
||||
|
||||
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score)
|
||||
hit_providers = downloaded_subtitles is not None
|
||||
download_successful = False
|
||||
|
||||
if downloaded_subtitles:
|
||||
downloaded_any = any(downloaded_subtitles.values())
|
||||
if not downloaded_any:
|
||||
continue
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, downloaded_subtitles, mode="a", mods=config.default_mods)
|
||||
Log.Debug(u"%s: Downloaded subtitle for item with missing subs: %s", self.name, video_id)
|
||||
download_successful = True
|
||||
refresh_item(video_id)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
except:
|
||||
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s", self.name,
|
||||
traceback.format_exc())
|
||||
finally:
|
||||
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
|
||||
if download_successful:
|
||||
# store item in history
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
for subtitle in video_subtitles:
|
||||
downloads_per_video += 1
|
||||
history.add(item_title, video.id, section_title=metadata["section"],
|
||||
subtitle=subtitle,
|
||||
mode="a")
|
||||
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
time.sleep(PROVIDER_SLACK)
|
||||
|
||||
download_count += downloads_per_video
|
||||
|
||||
if downloads_per_video:
|
||||
videos_with_downloads += 1
|
||||
|
||||
self.items_done = self.items_done + 1
|
||||
def skip_item():
|
||||
self.items_searching = self.items_searching - 1
|
||||
self.percentage = int(self.items_done * 100 / self.items_searching)
|
||||
|
||||
if downloads_per_video:
|
||||
Log.Debug(u"%s: Subtitles have been downloaded, "
|
||||
u"waiting %s seconds before continuing", self.name, DL_PROVIDER_SLACK)
|
||||
time.sleep(DL_PROVIDER_SLACK)
|
||||
else:
|
||||
if hit_providers:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
time.sleep(PROVIDER_SLACK)
|
||||
# search for subtitles in viable items
|
||||
try:
|
||||
for fn in recent_files:
|
||||
stored_subs = subtitle_storage.load(filename=fn)
|
||||
video_id = stored_subs.video_id
|
||||
if not stored_subs:
|
||||
Log.Debug("Skipping item %s because storage is empty", video_id)
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
# added_date <= max_search_days?
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
Log.Debug("Skipping item %s because it's too old", video_id)
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
if stored_subs.item_type == "episode":
|
||||
min_score = min_score_series
|
||||
providers = series_providers
|
||||
else:
|
||||
min_score = min_score_movies
|
||||
providers = movie_providers
|
||||
|
||||
parts = []
|
||||
plex_item = get_item(video_id)
|
||||
|
||||
if not plex_item:
|
||||
Log.Info(u"%s: Item %s unknown, skipping", self.name, video_id)
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
if is_ignored(video_id, item=plex_item):
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
for media in plex_item.media:
|
||||
parts += media.parts
|
||||
|
||||
downloads_per_video = 0
|
||||
hit_providers = False
|
||||
for part in parts:
|
||||
part_id = part.id
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(video_id, part_id, stored_subs.item_type)
|
||||
except PartUnknownException:
|
||||
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
|
||||
continue
|
||||
|
||||
if not metadata:
|
||||
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
|
||||
continue
|
||||
|
||||
Log.Debug(u"%s: Looking for missing subtitles: %s", self.name, get_item_title(plex_item))
|
||||
scanned_parts = scan_videos([metadata], providers=providers)
|
||||
|
||||
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score,
|
||||
providers=providers)
|
||||
hit_providers = downloaded_subtitles is not None
|
||||
download_successful = False
|
||||
|
||||
if downloaded_subtitles:
|
||||
downloaded_any = any(downloaded_subtitles.values())
|
||||
if not downloaded_any:
|
||||
continue
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, downloaded_subtitles, mode="a", mods=config.default_mods)
|
||||
Log.Debug(u"%s: Downloaded subtitle for item with missing subs: %s", self.name, video_id)
|
||||
download_successful = True
|
||||
refresh_item(video_id)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
except:
|
||||
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s", self.name,
|
||||
traceback.format_exc())
|
||||
finally:
|
||||
scanned_parts = None
|
||||
try:
|
||||
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
|
||||
if download_successful:
|
||||
# store item in history
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
for subtitle in video_subtitles:
|
||||
downloads_per_video += 1
|
||||
history.add(item_title, video.id, section_title=metadata["section"],
|
||||
subtitle=subtitle,
|
||||
mode="a")
|
||||
|
||||
downloaded_subtitles = None
|
||||
except:
|
||||
Log.Error(u"%s: DEBUG HIT: %s", self.name, traceback.format_exc())
|
||||
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
Thread.Sleep(PROVIDER_SLACK)
|
||||
|
||||
download_count += downloads_per_video
|
||||
|
||||
if downloads_per_video:
|
||||
videos_with_downloads += 1
|
||||
|
||||
self.items_done = self.items_done + 1
|
||||
self.percentage = int(self.items_done * 100 / self.items_searching)
|
||||
|
||||
stored_subs = None
|
||||
|
||||
if downloads_per_video:
|
||||
Log.Debug(u"%s: Subtitles have been downloaded, "
|
||||
u"waiting %s seconds before continuing", self.name, DL_PROVIDER_SLACK)
|
||||
Thread.Sleep(DL_PROVIDER_SLACK)
|
||||
else:
|
||||
if hit_providers:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
Thread.Sleep(PROVIDER_SLACK)
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
history.destroy()
|
||||
|
||||
if download_count:
|
||||
Log.Debug(u"%s: done. Missing subtitles found for %s/%s items (%s subs downloaded)", self.name,
|
||||
@@ -453,6 +515,103 @@ class SearchAllRecentlyAddedMissing(Task):
|
||||
self.items_searching = None
|
||||
|
||||
|
||||
class LegacySearchAllRecentlyAddedMissing(Task):
|
||||
periodic = True
|
||||
frequency = "never"
|
||||
items_done = None
|
||||
items_searching = None
|
||||
items_searching_ids = None
|
||||
items_failed = None
|
||||
percentage = 0
|
||||
|
||||
stall_time = 30
|
||||
|
||||
def __init__(self):
|
||||
super(LegacySearchAllRecentlyAddedMissing, self).__init__()
|
||||
self.items_done = None
|
||||
self.items_searching = None
|
||||
self.items_searching_ids = None
|
||||
self.items_failed = None
|
||||
self.percentage = 0
|
||||
|
||||
def signal(self, signal_name, *args, **kwargs):
|
||||
handler = getattr(self, "signal_%s" % signal_name)
|
||||
return handler(*args, **kwargs) if handler else None
|
||||
|
||||
def signal_updated_metadata(self, *args, **kwargs):
|
||||
item_id = int(args[0])
|
||||
|
||||
if self.items_searching_ids is not None and item_id in self.items_searching_ids:
|
||||
self.items_done.append(item_id)
|
||||
return True
|
||||
|
||||
def prepare(self, *args, **kwargs):
|
||||
self.items_done = []
|
||||
recent_items = get_recent_items()
|
||||
missing = items_get_all_missing_subs(recent_items, sleep_after_request=0.2)
|
||||
ids = set([id for added_at, id, title, item, missing_languages in missing if not is_ignored(id, item=item)])
|
||||
self.items_searching = missing
|
||||
self.items_searching_ids = ids
|
||||
self.items_failed = []
|
||||
self.percentage = 0
|
||||
self.ready_for_display = True
|
||||
|
||||
def run(self):
|
||||
super(LegacySearchAllRecentlyAddedMissing, self).run()
|
||||
self.running = True
|
||||
missing_count = len(self.items_searching)
|
||||
items_done_count = 0
|
||||
|
||||
for added_at, item_id, title, item, missing_languages in self.items_searching:
|
||||
Log.Debug(u"Task: %s, triggering refresh for %s (%s)", self.name, title, item_id)
|
||||
try:
|
||||
refresh_item(item_id)
|
||||
except URLError:
|
||||
# timeout
|
||||
pass
|
||||
search_started = datetime.datetime.now()
|
||||
tries = 1
|
||||
while 1:
|
||||
if item_id in self.items_done:
|
||||
items_done_count += 1
|
||||
self.percentage = int(items_done_count * 100 / missing_count)
|
||||
Log.Debug(u"Task: %s, item %s done (%s%%, %s/%s)", self.name, item_id, self.percentage,
|
||||
items_done_count, missing_count)
|
||||
break
|
||||
|
||||
# item considered stalled after self.stall_time seconds passed after last refresh
|
||||
if (datetime.datetime.now() - search_started).total_seconds() > self.stall_time:
|
||||
if tries > 3:
|
||||
self.items_failed.append(item_id)
|
||||
Log.Debug(u"Task: %s, item stalled for %s times: %s, skipping", self.name, tries, item_id)
|
||||
break
|
||||
|
||||
Log.Debug(u"Task: %s, item stalled for %s seconds: %s, retrying", self.name, self.stall_time,
|
||||
item_id)
|
||||
tries += 1
|
||||
try:
|
||||
refresh_item(item_id)
|
||||
except URLError:
|
||||
pass
|
||||
search_started = datetime.datetime.now()
|
||||
Thread.Sleep(1)
|
||||
Thread.Sleep(0.1)
|
||||
# we can't hammer the PMS, otherwise requests will be stalled
|
||||
Thread.Sleep(5)
|
||||
|
||||
Log.Debug("Task: %s, done (%s%%, %s/%s). Failed items: %s", self.name, self.percentage,
|
||||
items_done_count, missing_count, self.items_failed)
|
||||
|
||||
def post_run(self, task_data):
|
||||
super(LegacySearchAllRecentlyAddedMissing, self).post_run(task_data)
|
||||
self.ready_for_display = False
|
||||
self.percentage = 0
|
||||
self.items_done = None
|
||||
self.items_failed = None
|
||||
self.items_searching = None
|
||||
self.items_searching_ids = None
|
||||
|
||||
|
||||
class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
periodic = True
|
||||
|
||||
@@ -487,129 +646,146 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
overwrite_manually_selected = cast_bool(
|
||||
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected"])
|
||||
|
||||
air_date_cutoff_pref = Prefs["scheduler.tasks.FindBetterSubtitles.air_date_cutoff"]
|
||||
if air_date_cutoff_pref == "don't limit":
|
||||
air_date_cutoff = None
|
||||
else:
|
||||
air_date_cutoff = int(air_date_cutoff_pref.split()[0])
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
recent_subs = subtitle_storage.load_recent_files(age_days=max_search_days)
|
||||
viable_item_count = 0
|
||||
|
||||
for fn, stored_subs in recent_subs.iteritems():
|
||||
video_id = stored_subs.video_id
|
||||
try:
|
||||
for fn in subtitle_storage.get_recent_files(age_days=max_search_days):
|
||||
stored_subs = subtitle_storage.load(filename=fn)
|
||||
if not stored_subs:
|
||||
continue
|
||||
|
||||
if stored_subs.item_type == "episode":
|
||||
cutoff = self.series_cutoff
|
||||
min_score = min_score_series
|
||||
else:
|
||||
cutoff = self.movies_cutoff
|
||||
min_score = min_score_movies
|
||||
video_id = stored_subs.video_id
|
||||
|
||||
# don't search for better subtitles until at least 30 minutes have passed
|
||||
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
|
||||
Log.Debug(u"%s: Item %s too new, skipping", self.name, video_id)
|
||||
continue
|
||||
if stored_subs.item_type == "episode":
|
||||
cutoff = self.series_cutoff
|
||||
min_score = min_score_series
|
||||
else:
|
||||
cutoff = self.movies_cutoff
|
||||
min_score = min_score_movies
|
||||
|
||||
# added_date <= max_search_days?
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
continue
|
||||
# don't search for better subtitles until at least 30 minutes have passed
|
||||
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
|
||||
Log.Debug(u"%s: Item %s too new, skipping", self.name, video_id)
|
||||
continue
|
||||
|
||||
viable_item_count += 1
|
||||
ditch_parts = []
|
||||
# added_date <= max_search_days?
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
continue
|
||||
|
||||
# look through all stored subtitle data
|
||||
for part_id, languages in stored_subs.parts.iteritems():
|
||||
part_id = str(part_id)
|
||||
viable_item_count += 1
|
||||
ditch_parts = []
|
||||
|
||||
# all languages
|
||||
for language, current_subs in languages.iteritems():
|
||||
current_key = current_subs.get("current")
|
||||
current = current_subs.get(current_key)
|
||||
# look through all stored subtitle data
|
||||
for part_id, languages in stored_subs.parts.iteritems():
|
||||
part_id = str(part_id)
|
||||
|
||||
# currently got subtitle?
|
||||
if not current:
|
||||
continue
|
||||
current_score = current.score
|
||||
current_mode = current.mode
|
||||
# all languages
|
||||
for language, current_subs in languages.iteritems():
|
||||
current_key = current_subs.get("current")
|
||||
current = current_subs.get(current_key)
|
||||
|
||||
# late cutoff met? skip
|
||||
if current_score >= cutoff:
|
||||
Log.Debug(u"%s: Skipping finding better subs, "
|
||||
u"cutoff met (current: %s, cutoff: %s): %s (%s)",
|
||||
self.name, current_score, cutoff, stored_subs.title, video_id)
|
||||
continue
|
||||
# currently got subtitle?
|
||||
# fixme: check for existence
|
||||
if not current:
|
||||
continue
|
||||
current_score = current.score
|
||||
current_mode = current.mode
|
||||
|
||||
# got manual subtitle but don't want to touch those?
|
||||
if current_mode == "m" and not overwrite_manually_selected:
|
||||
Log.Debug(u"%s: Skipping finding better subs, "
|
||||
u"had manual: %s (%s)", self.name, stored_subs.title, video_id)
|
||||
continue
|
||||
# late cutoff met? skip
|
||||
if current_score >= cutoff:
|
||||
Log.Debug(u"%s: Skipping finding better subs, "
|
||||
u"cutoff met (current: %s, cutoff: %s): %s (%s)",
|
||||
self.name, current_score, cutoff, stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
# subtitle modifications different from default
|
||||
if not overwrite_manually_modified and current.mods \
|
||||
and set(current.mods).difference(set(config.default_mods)):
|
||||
Log.Debug(u"%s: Skipping finding better subs, it has manual modifications: %s (%s)",
|
||||
self.name, stored_subs.title, video_id)
|
||||
continue
|
||||
# got manual subtitle but don't want to touch those?
|
||||
if current_mode == "m" and not overwrite_manually_selected:
|
||||
Log.Debug(u"%s: Skipping finding better subs, "
|
||||
u"had manual: %s (%s)", self.name, stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
try:
|
||||
subs = self.list_subtitles(video_id, stored_subs.item_type, part_id, language)
|
||||
except PartUnknownException:
|
||||
Log.Info(u"%s: Part %s unknown/gone; ditching subtitle info", self.name, part_id)
|
||||
ditch_parts.append(part_id)
|
||||
continue
|
||||
# subtitle modifications different from default
|
||||
if not overwrite_manually_modified and current.mods \
|
||||
and set(current.mods).difference(set(config.default_mods)):
|
||||
Log.Debug(u"%s: Skipping finding better subs, it has manual modifications: %s (%s)",
|
||||
self.name, stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
hit_providers = subs is not None
|
||||
try:
|
||||
subs = self.list_subtitles(video_id, stored_subs.item_type, part_id, language,
|
||||
air_date_cutoff=air_date_cutoff)
|
||||
except PartUnknownException:
|
||||
Log.Info(u"%s: Part %s unknown/gone; ditching subtitle info", self.name, part_id)
|
||||
ditch_parts.append(part_id)
|
||||
continue
|
||||
|
||||
if subs:
|
||||
# subs are already sorted by score
|
||||
better_downloaded = False
|
||||
better_tried_download = 0
|
||||
better_visited = 0
|
||||
for sub in subs:
|
||||
if sub.score > current_score and sub.score > min_score:
|
||||
Log.Debug(u"%s: Better subtitle found for %s, downloading", self.name, video_id)
|
||||
better_tried_download += 1
|
||||
ret = self.download_subtitle(sub, video_id, mode="b")
|
||||
if ret:
|
||||
better_found += 1
|
||||
better_downloaded = True
|
||||
break
|
||||
else:
|
||||
Log.Debug(u"%s: Couldn't download/save subtitle. "
|
||||
u"Continuing to the next one", self.name)
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing",
|
||||
self.name, DL_PROVIDER_SLACK)
|
||||
time.sleep(DL_PROVIDER_SLACK)
|
||||
better_visited += 1
|
||||
hit_providers = subs is not None
|
||||
|
||||
if better_tried_download and not better_downloaded:
|
||||
Log.Debug(u"%s: Tried downloading better subtitle for %s, "
|
||||
u"but every try failed.", self.name, video_id)
|
||||
if subs:
|
||||
# subs are already sorted by score
|
||||
better_downloaded = False
|
||||
better_tried_download = 0
|
||||
better_visited = 0
|
||||
for sub in subs:
|
||||
if sub.score > current_score and sub.score > min_score:
|
||||
Log.Debug(u"%s: Better subtitle found for %s, downloading", self.name, video_id)
|
||||
better_tried_download += 1
|
||||
ret = self.download_subtitle(sub, video_id, mode="b")
|
||||
if ret:
|
||||
better_found += 1
|
||||
better_downloaded = True
|
||||
break
|
||||
else:
|
||||
Log.Debug(u"%s: Couldn't download/save subtitle. "
|
||||
u"Continuing to the next one", self.name)
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing",
|
||||
self.name, DL_PROVIDER_SLACK)
|
||||
Thread.Sleep(DL_PROVIDER_SLACK)
|
||||
better_visited += 1
|
||||
|
||||
elif better_downloaded:
|
||||
Log.Debug(u"%s: Better subtitle downloaded for %s", self.name, video_id)
|
||||
if better_tried_download and not better_downloaded:
|
||||
Log.Debug(u"%s: Tried downloading better subtitle for %s, "
|
||||
u"but every try failed.", self.name, video_id)
|
||||
|
||||
if better_tried_download or better_downloaded:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, DL_PROVIDER_SLACK)
|
||||
time.sleep(DL_PROVIDER_SLACK)
|
||||
elif better_downloaded:
|
||||
Log.Debug(u"%s: Better subtitle downloaded for %s", self.name, video_id)
|
||||
|
||||
elif better_visited:
|
||||
if better_tried_download or better_downloaded:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, DL_PROVIDER_SLACK)
|
||||
Thread.Sleep(DL_PROVIDER_SLACK)
|
||||
|
||||
elif better_visited:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
Thread.Sleep(PROVIDER_SLACK)
|
||||
|
||||
subs = None
|
||||
|
||||
elif hit_providers:
|
||||
# hit the providers but didn't try downloading? wait.
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
time.sleep(PROVIDER_SLACK)
|
||||
Thread.Sleep(PROVIDER_SLACK)
|
||||
|
||||
elif hit_providers:
|
||||
# hit the providers but didn't try downloading? wait.
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
time.sleep(PROVIDER_SLACK)
|
||||
if ditch_parts:
|
||||
for part_id in ditch_parts:
|
||||
try:
|
||||
del stored_subs.parts[part_id]
|
||||
except KeyError:
|
||||
pass
|
||||
subtitle_storage.save(stored_subs)
|
||||
ditch_parts = None
|
||||
|
||||
if ditch_parts:
|
||||
for part_id in ditch_parts:
|
||||
try:
|
||||
del stored_subs.parts[part_id]
|
||||
except KeyError:
|
||||
pass
|
||||
subtitle_storage.save(stored_subs)
|
||||
stored_subs = None
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
subtitle_storage.destroy()
|
||||
Thread.Sleep(1)
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
|
||||
if better_found:
|
||||
Log.Debug(u"%s: done. Better subtitles found for %s/%s items", self.name, better_found,
|
||||
@@ -674,6 +850,38 @@ class MigrateSubtitleStorage(Task):
|
||||
storage.destroy()
|
||||
|
||||
|
||||
class CacheMaintenance(Task):
|
||||
periodic = True
|
||||
frequency = "every 1 days"
|
||||
|
||||
main_cache_validity = 14 # days
|
||||
pack_cache_validity = 4 # days
|
||||
|
||||
def run(self):
|
||||
super(CacheMaintenance, self).run()
|
||||
self.running = True
|
||||
Log.Info(u"%s: Running cache maintenance", self.name)
|
||||
now = datetime.datetime.now()
|
||||
|
||||
def remove_expired(path, expiry):
|
||||
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(path))
|
||||
if mtime + datetime.timedelta(days=expiry) < now:
|
||||
try:
|
||||
os.remove(path)
|
||||
except (IOError, OSError):
|
||||
Log.Debug("Couldn't remove cache file: %s", os.path.basename(path))
|
||||
|
||||
# main cache
|
||||
if config.new_style_cache:
|
||||
for fn in subliminal_cache_region.backend.all_filenames:
|
||||
remove_expired(fn, self.main_cache_validity)
|
||||
|
||||
# archive cache
|
||||
for fn in glob.iglob(os.path.join(config.pack_cache_dir, "*.archive")):
|
||||
remove_expired(fn, self.pack_cache_validity)
|
||||
|
||||
|
||||
scheduler.register(LegacySearchAllRecentlyAddedMissing)
|
||||
scheduler.register(SearchAllRecentlyAddedMissing)
|
||||
scheduler.register(AvailableSubsForItem)
|
||||
scheduler.register(DownloadSubtitleForItem)
|
||||
@@ -682,3 +890,4 @@ scheduler.register(FindBetterSubtitles)
|
||||
scheduler.register(SubtitleStorageMaintenance)
|
||||
scheduler.register(MigrateSubtitleStorage)
|
||||
scheduler.register(MenuHistoryMaintenance)
|
||||
scheduler.register(CacheMaintenance)
|
||||
|
||||
+143
-46
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"id": "langPref1",
|
||||
"id": "langPref1a",
|
||||
"label": "Subtitle Language (1)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -55,7 +55,7 @@
|
||||
"default": "en"
|
||||
},
|
||||
{
|
||||
"id": "langPref2",
|
||||
"id": "langPref2a",
|
||||
"label": "Subtitle Language (2)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -111,7 +111,7 @@
|
||||
"default": "None"
|
||||
},
|
||||
{
|
||||
"id": "langPref3",
|
||||
"id": "langPref3a",
|
||||
"label": "Subtitle Language (3)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -179,11 +179,17 @@
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.language.ietf",
|
||||
"label": "Treat IETF language tags as ISO 639-1 (e.g. pt-BR = pt)",
|
||||
"id": "subtitles.language.ietf_display",
|
||||
"label": "Display languages with country attribute as ISO 639-1 (e.g. pt-BR = pt)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.language.ietf_normalize",
|
||||
"label": "Treat languages with country attribute as ISO 639-1 (e.g. don't download pt-BR if pt subtitle exists)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.only_one",
|
||||
"label": "Restrict to one language (skips adding \".lang.\" to the subtitle filename; only uses \"Subtitle Language (1)\")",
|
||||
@@ -196,6 +202,50 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "media_rename1",
|
||||
"label": "I rename my files using",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"Sonarr/Radarr (fill api info below)",
|
||||
"Filebot",
|
||||
"Sonarr/Radarr/Filebot",
|
||||
"Symlink to original file",
|
||||
"I keep the original filenames",
|
||||
"none of the above"
|
||||
],
|
||||
"default": "I keep the original filenames"
|
||||
},
|
||||
{
|
||||
"id": "use_file_info_file",
|
||||
"label": "Retrieve original filename from .file_info/file_info index files (see wiki)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.sonarr.url",
|
||||
"label": "Sonarr URL (add URL base if configured)",
|
||||
"type": "text",
|
||||
"default": "http://127.0.0.1:8989"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.sonarr.api_key",
|
||||
"label": "Sonarr API key",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "drone_api.radarr.url",
|
||||
"label": "Radarr URL (add URL base if configured, min. version: 0.2.0.897)",
|
||||
"type": "text",
|
||||
"default": "http://127.0.0.1:7878"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.radarr.api_key",
|
||||
"label": "Radarr API key",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.enabled",
|
||||
"label": "Provider: Enable OpenSubtitles",
|
||||
@@ -204,7 +254,7 @@
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.username",
|
||||
"label": "Opensubtitles Username (VIP)",
|
||||
"label": "Opensubtitles Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
@@ -216,12 +266,24 @@
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.is_vip",
|
||||
"label": "OpenSubtitles VIP? (ad-free subs, 1000 subs/day, no-cache VIP server: http://v.ht/osvip)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.podnapisi.enabled",
|
||||
"label": "Provider: Enable Podnapisi.NET",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.titlovi.enabled",
|
||||
"label": "Provider: Enable Titlovi.com",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.enabled",
|
||||
"label": "Provider: Enable Addic7ed",
|
||||
@@ -243,7 +305,7 @@
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.boost_by1",
|
||||
"id": "provider.addic7ed.boost_by2",
|
||||
"label": "Addic7ed: boost score (if requirements met)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -266,12 +328,13 @@
|
||||
"25",
|
||||
"21",
|
||||
"20",
|
||||
"19",
|
||||
"15",
|
||||
"10",
|
||||
"5",
|
||||
"0"
|
||||
],
|
||||
"default": "21"
|
||||
"default": "19"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.use_random_agents",
|
||||
@@ -312,31 +375,11 @@
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.shooter.enabled",
|
||||
"label": "Provider: Enable Shooter.cn (Chinese)",
|
||||
"id": "provider.subscene.enabled",
|
||||
"label": "Provider: Enable SubScene",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.subscenter.enabled",
|
||||
"label": "Provider: Enable SubsCenter (Hebrew)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.subscenter.username",
|
||||
"label": "SubsCenter Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.subscenter.password",
|
||||
"label": "SubsCenter Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "providers.multithreading",
|
||||
"label": "Search enabled providers simultaneously (multithreading)",
|
||||
@@ -344,32 +387,26 @@
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.use_tags",
|
||||
"label": "I keep the exact (release-) filename of my media files",
|
||||
"id": "subtitles.embedded.autoextract",
|
||||
"label": "Automatically extract and use embedded subtitles upon media addition (with configured default mods)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.embedded",
|
||||
"label": "Scan: include embedded subtitles (in the media file (MKV/MP4), don't download if existing)",
|
||||
"label": "Don't search for subtitles of a language if there are embedded subtitles inside the media file (MKV/MP4)?",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.external",
|
||||
"label": "Scan: include external subtitles (metadata/filesystem, don't download if existing)",
|
||||
"label": "Don't search for subtitles of a language if they already exist on the filesystem (metadata/filesystem)?",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.exotic_ext",
|
||||
"label": "Scan: include \"exotic\" subtitle formats (anything else than .srt/.ssa/.ass/.vtt; embedded or external)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.filename_strictness",
|
||||
"label": "Scan: which external subtitles should be picked up?",
|
||||
"label": "How strict should these subtitles existing on the filesystem be detected?",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"exact: media filename match",
|
||||
@@ -378,6 +415,12 @@
|
||||
],
|
||||
"default": "loose: filename contains media filename"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.exotic_ext",
|
||||
"label": "Include non-text subtitle formats (anything else than .srt/.ssa/.ass/.vtt; embedded or external) in the above?",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.search.minimumTVScore2",
|
||||
"label": "Minimum score for TV (min: 240, def/sane: 337, min-ideal: 352; see http://v.ht/szscores)",
|
||||
@@ -408,6 +451,12 @@
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.remove_tags",
|
||||
"label": "Remove style tags from downloaded subtitles (bold, italic, underline, colors, ...)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.fix_common",
|
||||
"label": "Fix common whitespace/punctuation issues in subtitles",
|
||||
@@ -518,8 +567,6 @@
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"never",
|
||||
"every 1 hours",
|
||||
"every 3 hours",
|
||||
"every 6 hours",
|
||||
"every 12 hours",
|
||||
"every 24 hours"
|
||||
@@ -540,7 +587,8 @@
|
||||
"3 weeks",
|
||||
"4 weeks",
|
||||
"5 weeks",
|
||||
"6 weeks"
|
||||
"6 weeks",
|
||||
"12 weeks"
|
||||
],
|
||||
"default": "2 weeks"
|
||||
},
|
||||
@@ -568,11 +616,30 @@
|
||||
"type": "text",
|
||||
"default": "7"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.air_date_cutoff",
|
||||
"label": "Scheduler: Don't search for better subtitles if the item's air date is older than",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"don't limit",
|
||||
"1 year",
|
||||
"2 years",
|
||||
"3 years",
|
||||
"4 years",
|
||||
"5 years",
|
||||
"6 years",
|
||||
"7 years",
|
||||
"8 years",
|
||||
"9 years",
|
||||
"10 years"
|
||||
],
|
||||
"default": "1 year"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected",
|
||||
"label": "Scheduler: Overwrite manually selected subtitles when better found",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified",
|
||||
@@ -665,6 +732,30 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "new_style_cache",
|
||||
"label": "Use new style caching (for subliminal)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "low_impact_mode",
|
||||
"label": "Low impact mode (for remote filesystems)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "pms_request_timeout",
|
||||
"label": "Timeout for API requests sent to the PMS",
|
||||
"type": "text",
|
||||
"default": "15"
|
||||
},
|
||||
{
|
||||
"id": "proxy",
|
||||
"label": "HTTP proxy to use for providers (supports credentials)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "log_level",
|
||||
"label": "How verbose should the logging be?",
|
||||
@@ -678,6 +769,12 @@
|
||||
],
|
||||
"default": "WARNING"
|
||||
},
|
||||
{
|
||||
"id": "log_rotate_keep",
|
||||
"label": "How many log backups to keep?",
|
||||
"type": "text",
|
||||
"default": "5"
|
||||
},
|
||||
{
|
||||
"id": "log_debug_mods",
|
||||
"label": "Log subtitle modification (debug)",
|
||||
|
||||
+3
-3
@@ -9,11 +9,11 @@
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>2.0.25</string>
|
||||
<string>2.5.0</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>2.0.25.1635</string>
|
||||
<string>2.5.0.2241</string>
|
||||
<key>PlexFrameworkVersion</key>
|
||||
<string>2</string>
|
||||
<key>PlexPluginClass</key>
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
<h1>Sub-Zero for Plex</h1><i>Subtitles done right</i>
|
||||
|
||||
Version 2.0.25.1635
|
||||
Version 2.5.0.2241
|
||||
|
||||
Originally based on @bramwalet's awesome <a href="https://github.com/bramwalet/Subliminal.bundle">Subliminal.bundle</a>
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,6 @@
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from . import basestr
|
||||
|
||||
@@ -5,26 +5,31 @@ http://www.crummy.com/software/BeautifulSoup/
|
||||
|
||||
Beautiful Soup uses a pluggable XML or HTML parser to parse a
|
||||
(possibly invalid) document into a tree representation. Beautiful Soup
|
||||
provides provides methods and Pythonic idioms that make it easy to
|
||||
navigate, search, and modify the parse tree.
|
||||
provides methods and Pythonic idioms that make it easy to navigate,
|
||||
search, and modify the parse tree.
|
||||
|
||||
Beautiful Soup works with Python 2.6 and up. It works better if lxml
|
||||
Beautiful Soup works with Python 2.7 and up. It works better if lxml
|
||||
and/or html5lib is installed.
|
||||
|
||||
For more than you ever wanted to know about Beautiful Soup, see the
|
||||
documentation:
|
||||
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
|
||||
"""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.4.1"
|
||||
__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
|
||||
__version__ = "4.6.0"
|
||||
__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = ['BeautifulSoup']
|
||||
|
||||
import os
|
||||
import re
|
||||
import traceback
|
||||
import warnings
|
||||
|
||||
from .builder import builder_registry, ParserRejectedMarkup
|
||||
@@ -77,7 +82,7 @@ class BeautifulSoup(Tag):
|
||||
|
||||
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
|
||||
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
|
||||
|
||||
def __init__(self, markup="", features=None, builder=None,
|
||||
parse_only=None, from_encoding=None, exclude_encodings=None,
|
||||
@@ -137,6 +142,10 @@ class BeautifulSoup(Tag):
|
||||
from_encoding = from_encoding or deprecated_argument(
|
||||
"fromEncoding", "from_encoding")
|
||||
|
||||
if from_encoding and isinstance(markup, unicode):
|
||||
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
|
||||
from_encoding = None
|
||||
|
||||
if len(kwargs) > 0:
|
||||
arg = kwargs.keys().pop()
|
||||
raise TypeError(
|
||||
@@ -161,19 +170,29 @@ class BeautifulSoup(Tag):
|
||||
markup_type = "XML"
|
||||
else:
|
||||
markup_type = "HTML"
|
||||
|
||||
caller = traceback.extract_stack()[0]
|
||||
filename = caller[0]
|
||||
line_number = caller[1]
|
||||
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
|
||||
filename=filename,
|
||||
line_number=line_number,
|
||||
parser=builder.NAME,
|
||||
markup_type=markup_type))
|
||||
|
||||
self.builder = builder
|
||||
self.is_xml = builder.is_xml
|
||||
self.known_xml = self.is_xml
|
||||
self.builder.soup = self
|
||||
|
||||
self.parse_only = parse_only
|
||||
|
||||
if hasattr(markup, 'read'): # It's a file-type object.
|
||||
markup = markup.read()
|
||||
elif len(markup) <= 256:
|
||||
elif len(markup) <= 256 and (
|
||||
(isinstance(markup, bytes) and not b'<' in markup)
|
||||
or (isinstance(markup, unicode) and not u'<' in markup)
|
||||
):
|
||||
# Print out warnings for a couple beginner problems
|
||||
# involving passing non-markup to Beautiful Soup.
|
||||
# Beautiful Soup will still parse the input as markup,
|
||||
@@ -195,16 +214,10 @@ class BeautifulSoup(Tag):
|
||||
if isinstance(markup, unicode):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
|
||||
if markup[:5] == "http:" or markup[:6] == "https:":
|
||||
# TODO: This is ugly but I couldn't get it to work in
|
||||
# Python 3 otherwise.
|
||||
if ((isinstance(markup, bytes) and not b' ' in markup)
|
||||
or (isinstance(markup, unicode) and not u' ' in markup)):
|
||||
if isinstance(markup, unicode):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
|
||||
'"%s" looks like a filename, not markup. You should'
|
||||
' probably open this file and pass the filehandle into'
|
||||
' Beautiful Soup.' % markup)
|
||||
self._check_markup_is_url(markup)
|
||||
|
||||
for (self.markup, self.original_encoding, self.declared_html_encoding,
|
||||
self.contains_replacement_characters) in (
|
||||
@@ -223,15 +236,52 @@ class BeautifulSoup(Tag):
|
||||
self.builder.soup = None
|
||||
|
||||
def __copy__(self):
|
||||
return type(self)(self.encode(), builder=self.builder)
|
||||
copy = type(self)(
|
||||
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
|
||||
)
|
||||
|
||||
# Although we encoded the tree to UTF-8, that may not have
|
||||
# been the encoding of the original markup. Set the copy's
|
||||
# .original_encoding to reflect the original object's
|
||||
# .original_encoding.
|
||||
copy.original_encoding = self.original_encoding
|
||||
return copy
|
||||
|
||||
def __getstate__(self):
|
||||
# Frequently a tree builder can't be pickled.
|
||||
d = dict(self.__dict__)
|
||||
if 'builder' in d and not self.builder.picklable:
|
||||
del d['builder']
|
||||
d['builder'] = None
|
||||
return d
|
||||
|
||||
@staticmethod
|
||||
def _check_markup_is_url(markup):
|
||||
"""
|
||||
Check if markup looks like it's actually a url and raise a warning
|
||||
if so. Markup can be unicode or str (py2) / bytes (py3).
|
||||
"""
|
||||
if isinstance(markup, bytes):
|
||||
space = b' '
|
||||
cant_start_with = (b"http:", b"https:")
|
||||
elif isinstance(markup, unicode):
|
||||
space = u' '
|
||||
cant_start_with = (u"http:", u"https:")
|
||||
else:
|
||||
return
|
||||
|
||||
if any(markup.startswith(prefix) for prefix in cant_start_with):
|
||||
if not space in markup:
|
||||
if isinstance(markup, bytes):
|
||||
decoded_markup = markup.decode('utf-8', 'replace')
|
||||
else:
|
||||
decoded_markup = markup
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an'
|
||||
' HTTP client. You should probably use an HTTP client like'
|
||||
' requests to get the document behind the URL, and feed'
|
||||
' that document to Beautiful Soup.' % decoded_markup
|
||||
)
|
||||
|
||||
def _feed(self):
|
||||
# Convert the document to Unicode.
|
||||
self.builder.reset()
|
||||
@@ -335,7 +385,18 @@ class BeautifulSoup(Tag):
|
||||
if parent.next_sibling:
|
||||
# This node is being inserted into an element that has
|
||||
# already been parsed. Deal with any dangling references.
|
||||
index = parent.contents.index(o)
|
||||
index = len(parent.contents)-1
|
||||
while index >= 0:
|
||||
if parent.contents[index] is o:
|
||||
break
|
||||
index -= 1
|
||||
else:
|
||||
raise ValueError(
|
||||
"Error building tree: supposedly %r was inserted "
|
||||
"into %r after the fact, but I don't see it!" % (
|
||||
o, parent
|
||||
)
|
||||
)
|
||||
if index == 0:
|
||||
previous_element = parent
|
||||
previous_sibling = None
|
||||
@@ -387,7 +448,7 @@ class BeautifulSoup(Tag):
|
||||
"""Push a start tag on to the stack.
|
||||
|
||||
If this method returns None, the tag was rejected by the
|
||||
SoupStrainer. You should proceed as if the tag had not occured
|
||||
SoupStrainer. You should proceed as if the tag had not occurred
|
||||
in the document. For instance, if this was a self-closing tag,
|
||||
don't call handle_endtag.
|
||||
"""
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
import sys
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
HTMLAwareEntitySubstitution,
|
||||
whitespace_re
|
||||
)
|
||||
|
||||
@@ -227,9 +231,14 @@ class HTMLTreeBuilder(TreeBuilder):
|
||||
Such as which tags are empty-element tags.
|
||||
"""
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
|
||||
'spacer', 'link', 'frame', 'base'])
|
||||
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
|
||||
empty_element_tags = set([
|
||||
# These are from HTML5.
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
|
||||
# These are from HTML4, removed in HTML5.
|
||||
'spacer', 'frame'
|
||||
])
|
||||
|
||||
# The HTML standard defines these attributes as containing a
|
||||
# space-separated list of values, not a single value. That is,
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__all__ = [
|
||||
'HTML5TreeBuilder',
|
||||
]
|
||||
|
||||
from pdb import set_trace
|
||||
import warnings
|
||||
import re
|
||||
from bs4.builder import (
|
||||
PERMISSIVE,
|
||||
HTML,
|
||||
@@ -15,7 +18,10 @@ from bs4.element import (
|
||||
whitespace_re,
|
||||
)
|
||||
import html5lib
|
||||
from html5lib.constants import namespaces
|
||||
from html5lib.constants import (
|
||||
namespaces,
|
||||
prefixes,
|
||||
)
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
@@ -23,6 +29,15 @@ from bs4.element import (
|
||||
Tag,
|
||||
)
|
||||
|
||||
try:
|
||||
# Pre-0.99999999
|
||||
from html5lib.treebuilders import _base as treebuilder_base
|
||||
new_html5lib = False
|
||||
except ImportError, e:
|
||||
# 0.99999999 and up
|
||||
from html5lib.treebuilders import base as treebuilder_base
|
||||
new_html5lib = True
|
||||
|
||||
class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
"""Use html5lib to build a tree."""
|
||||
|
||||
@@ -47,7 +62,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
if self.soup.parse_only is not None:
|
||||
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
|
||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||
doc = parser.parse(markup, encoding=self.user_specified_encoding)
|
||||
|
||||
extra_kwargs = dict()
|
||||
if not isinstance(markup, unicode):
|
||||
if new_html5lib:
|
||||
extra_kwargs['override_encoding'] = self.user_specified_encoding
|
||||
else:
|
||||
extra_kwargs['encoding'] = self.user_specified_encoding
|
||||
doc = parser.parse(markup, **extra_kwargs)
|
||||
|
||||
# Set the character encoding detected by the tokenizer.
|
||||
if isinstance(markup, unicode):
|
||||
@@ -55,11 +77,17 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
# charEncoding to UTF-8 if it gets Unicode input.
|
||||
doc.original_encoding = None
|
||||
else:
|
||||
doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
if not isinstance(original_encoding, basestring):
|
||||
# In 0.99999999 and up, the encoding is an html5lib
|
||||
# Encoding object. We want to use a string for compatibility
|
||||
# with other tree builders.
|
||||
original_encoding = original_encoding.name
|
||||
doc.original_encoding = original_encoding
|
||||
|
||||
def create_treebuilder(self, namespaceHTMLElements):
|
||||
self.underlying_builder = TreeBuilderForHtml5lib(
|
||||
self.soup, namespaceHTMLElements)
|
||||
namespaceHTMLElements, self.soup)
|
||||
return self.underlying_builder
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
@@ -67,10 +95,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
return u'<html><head></head><body>%s</body></html>' % fragment
|
||||
|
||||
|
||||
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
||||
|
||||
def __init__(self, soup, namespaceHTMLElements):
|
||||
self.soup = soup
|
||||
def __init__(self, namespaceHTMLElements, soup=None):
|
||||
if soup:
|
||||
self.soup = soup
|
||||
else:
|
||||
from bs4 import BeautifulSoup
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
|
||||
|
||||
def documentClass(self):
|
||||
@@ -93,7 +125,8 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
return TextNode(Comment(data), self.soup)
|
||||
|
||||
def fragmentClass(self):
|
||||
self.soup = BeautifulSoup("")
|
||||
from bs4 import BeautifulSoup
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
self.soup.name = "[document_fragment]"
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
@@ -105,7 +138,57 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
return self.soup
|
||||
|
||||
def getFragment(self):
|
||||
return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
|
||||
return treebuilder_base.TreeBuilder.getFragment(self).element
|
||||
|
||||
def testSerializer(self, element):
|
||||
from bs4 import BeautifulSoup
|
||||
rv = []
|
||||
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if isinstance(element, BeautifulSoup):
|
||||
pass
|
||||
if isinstance(element, Doctype):
|
||||
m = doctype_re.match(element)
|
||||
if m:
|
||||
name = m.group(1)
|
||||
if m.lastindex > 1:
|
||||
publicId = m.group(2) or ""
|
||||
systemId = m.group(3) or m.group(4) or ""
|
||||
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
|
||||
(' ' * indent, name, publicId, systemId))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
|
||||
elif isinstance(element, Comment):
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element))
|
||||
elif isinstance(element, NavigableString):
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
||||
else:
|
||||
if element.namespace:
|
||||
name = "%s %s" % (prefixes[element.namespace],
|
||||
element.name)
|
||||
else:
|
||||
name = element.name
|
||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||
if element.attrs:
|
||||
attributes = []
|
||||
for name, value in element.attrs.items():
|
||||
if isinstance(name, NamespacedAttribute):
|
||||
name = "%s %s" % (prefixes[name.namespace], name.name)
|
||||
if isinstance(value, list):
|
||||
value = " ".join(value)
|
||||
attributes.append((name, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
indent += 2
|
||||
for child in element.children:
|
||||
serializeElement(child, indent)
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
class AttrList(object):
|
||||
def __init__(self, element):
|
||||
@@ -137,9 +220,9 @@ class AttrList(object):
|
||||
return name in list(self.attrs.keys())
|
||||
|
||||
|
||||
class Element(html5lib.treebuilders._base.Node):
|
||||
class Element(treebuilder_base.Node):
|
||||
def __init__(self, element, soup, namespace):
|
||||
html5lib.treebuilders._base.Node.__init__(self, element.name)
|
||||
treebuilder_base.Node.__init__(self, element.name)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
self.namespace = namespace
|
||||
@@ -158,8 +241,10 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
child = node
|
||||
elif node.element.__class__ == NavigableString:
|
||||
string_child = child = node.element
|
||||
node.parent = self
|
||||
else:
|
||||
child = node.element
|
||||
node.parent = self
|
||||
|
||||
if not isinstance(child, basestring) and child.parent is not None:
|
||||
node.element.extract()
|
||||
@@ -197,6 +282,8 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
most_recent_element=most_recent_element)
|
||||
|
||||
def getAttributes(self):
|
||||
if isinstance(self.element, Comment):
|
||||
return {}
|
||||
return AttrList(self.element)
|
||||
|
||||
def setAttributes(self, attributes):
|
||||
@@ -224,11 +311,11 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
if insertBefore:
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
self.insertBefore(data, insertBefore)
|
||||
self.insertBefore(text, insertBefore)
|
||||
else:
|
||||
self.appendChild(data)
|
||||
self.appendChild(text)
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = self.element.index(refNode.element)
|
||||
@@ -250,6 +337,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
# print "MOVE", self.element.contents
|
||||
# print "FROM", self.element
|
||||
# print "TO", new_parent.element
|
||||
|
||||
element = self.element
|
||||
new_parent_element = new_parent.element
|
||||
# Determine what this tag's next_element will be once all the children
|
||||
@@ -268,7 +356,6 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
new_parents_last_descendant_next_element = new_parent_element.next_element
|
||||
|
||||
to_append = element.contents
|
||||
append_after = new_parent_element.contents
|
||||
if len(to_append) > 0:
|
||||
# Set the first child's previous_element and previous_sibling
|
||||
# to elements within the new parent
|
||||
@@ -285,12 +372,19 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
if new_parents_last_child:
|
||||
new_parents_last_child.next_sibling = first_child
|
||||
|
||||
# Fix the last child's next_element and next_sibling
|
||||
last_child = to_append[-1]
|
||||
last_child.next_element = new_parents_last_descendant_next_element
|
||||
# Find the very last element being moved. It is now the
|
||||
# parent's last descendant. It has no .next_sibling and
|
||||
# its .next_element is whatever the previous last
|
||||
# descendant had.
|
||||
last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
|
||||
|
||||
last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
|
||||
if new_parents_last_descendant_next_element:
|
||||
new_parents_last_descendant_next_element.previous_element = last_child
|
||||
last_child.next_sibling = None
|
||||
# TODO: This code has no test coverage and I'm not sure
|
||||
# how to get html5lib to go through this path, but it's
|
||||
# just the other side of the previous line.
|
||||
new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
|
||||
last_childs_last_descendant.next_sibling = None
|
||||
|
||||
for child in to_append:
|
||||
child.parent = new_parent_element
|
||||
@@ -324,7 +418,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
|
||||
class TextNode(Element):
|
||||
def __init__(self, element, soup):
|
||||
html5lib.treebuilders._base.Node.__init__(self, None)
|
||||
treebuilder_base.Node.__init__(self, None)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__all__ = [
|
||||
'HTMLParserTreeBuilder',
|
||||
]
|
||||
@@ -49,7 +52,31 @@ from bs4.builder import (
|
||||
HTMLPARSER = 'html.parser'
|
||||
|
||||
class BeautifulSoupHTMLParser(HTMLParser):
|
||||
def handle_starttag(self, name, attrs):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
HTMLParser.__init__(self, *args, **kwargs)
|
||||
|
||||
# Keep a list of empty-element tags that were encountered
|
||||
# without an explicit closing tag. If we encounter a closing tag
|
||||
# of this type, we'll associate it with one of those entries.
|
||||
#
|
||||
# This isn't a stack because we don't care about the
|
||||
# order. It's a list of closing tags we've already handled and
|
||||
# will ignore, assuming they ever show up.
|
||||
self.already_closed_empty_element = []
|
||||
|
||||
def handle_startendtag(self, name, attrs):
|
||||
# This is only called when the markup looks like
|
||||
# <tag/>.
|
||||
|
||||
# is_startend() tells handle_starttag not to close the tag
|
||||
# just because its name matches a known empty-element tag. We
|
||||
# know that this is an empty-element tag and we want to call
|
||||
# handle_endtag ourselves.
|
||||
tag = self.handle_starttag(name, attrs, handle_empty_element=False)
|
||||
self.handle_endtag(name)
|
||||
|
||||
def handle_starttag(self, name, attrs, handle_empty_element=True):
|
||||
# XXX namespace
|
||||
attr_dict = {}
|
||||
for key, value in attrs:
|
||||
@@ -59,10 +86,34 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
||||
value = ''
|
||||
attr_dict[key] = value
|
||||
attrvalue = '""'
|
||||
self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
#print "START", name
|
||||
tag = self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
if tag and tag.is_empty_element and handle_empty_element:
|
||||
# Unlike other parsers, html.parser doesn't send separate end tag
|
||||
# events for empty-element tags. (It's handled in
|
||||
# handle_startendtag, but only if the original markup looked like
|
||||
# <tag/>.)
|
||||
#
|
||||
# So we need to call handle_endtag() ourselves. Since we
|
||||
# know the start event is identical to the end event, we
|
||||
# don't want handle_endtag() to cross off any previous end
|
||||
# events for tags of this name.
|
||||
self.handle_endtag(name, check_already_closed=False)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self.soup.handle_endtag(name)
|
||||
# But we might encounter an explicit closing tag for this tag
|
||||
# later on. If so, we want to ignore it.
|
||||
self.already_closed_empty_element.append(name)
|
||||
|
||||
def handle_endtag(self, name, check_already_closed=True):
|
||||
#print "END", name
|
||||
if check_already_closed and name in self.already_closed_empty_element:
|
||||
# This is a redundant end tag for an empty-element tag.
|
||||
# We've already called handle_endtag() for it, so just
|
||||
# check it off the list.
|
||||
# print "ALREADY CLOSED", name
|
||||
self.already_closed_empty_element.remove(name)
|
||||
else:
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.soup.handle_data(data)
|
||||
@@ -166,6 +217,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
warnings.warn(RuntimeWarning(
|
||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
||||
raise e
|
||||
parser.already_closed_empty_element = []
|
||||
|
||||
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
|
||||
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__all__ = [
|
||||
'LXMLTreeBuilderForXML',
|
||||
'LXMLTreeBuilder',
|
||||
@@ -12,6 +14,7 @@ from bs4.element import (
|
||||
Doctype,
|
||||
NamespacedAttribute,
|
||||
ProcessingInstruction,
|
||||
XMLProcessingInstruction,
|
||||
)
|
||||
from bs4.builder import (
|
||||
FAST,
|
||||
@@ -29,6 +32,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
DEFAULT_PARSER_CLASS = etree.XMLParser
|
||||
|
||||
is_xml = True
|
||||
processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
NAME = "lxml-xml"
|
||||
ALTERNATE_NAMES = ["xml"]
|
||||
@@ -87,6 +91,16 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
|
||||
Each 4-tuple represents a strategy for parsing the document.
|
||||
"""
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
if is_html:
|
||||
self.processing_instruction_class = ProcessingInstruction
|
||||
else:
|
||||
self.processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
if isinstance(markup, unicode):
|
||||
# We were given Unicode. Maybe lxml can parse Unicode on
|
||||
# this system?
|
||||
@@ -98,11 +112,6 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
yield (markup.encode("utf8"), "utf8",
|
||||
document_declared_encoding, False)
|
||||
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
detector = EncodingDetector(
|
||||
markup, try_encodings, is_html, exclude_encodings)
|
||||
@@ -201,7 +210,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
def pi(self, target, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(target + ' ' + data)
|
||||
self.soup.endData(ProcessingInstruction)
|
||||
self.soup.endData(self.processing_instruction_class)
|
||||
|
||||
def data(self, content):
|
||||
self.soup.handle_data(content)
|
||||
@@ -229,6 +238,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
|
||||
|
||||
features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
|
||||
is_xml = False
|
||||
processing_instruction_class = ProcessingInstruction
|
||||
|
||||
def default_parser(self, encoding):
|
||||
return etree.HTMLParser
|
||||
|
||||
@@ -6,9 +6,10 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal
|
||||
Feed Parser. It works best on XML and HTML, but it does not rewrite the
|
||||
XML or HTML to reflect a new encoding; that's the tree builder's job.
|
||||
"""
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
from pdb import set_trace
|
||||
import codecs
|
||||
from htmlentitydefs import codepoint2name
|
||||
import re
|
||||
@@ -309,7 +310,7 @@ class EncodingDetector:
|
||||
else:
|
||||
xml_endpos = 1024
|
||||
html_endpos = max(2048, int(len(markup) * 0.05))
|
||||
|
||||
|
||||
declared_encoding = None
|
||||
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
|
||||
if not declared_encoding_match and is_html:
|
||||
@@ -346,7 +347,7 @@ class UnicodeDammit:
|
||||
self.tried_encodings = []
|
||||
self.contains_replacement_characters = False
|
||||
self.is_html = is_html
|
||||
|
||||
self.log = logging.getLogger(__name__)
|
||||
self.detector = EncodingDetector(
|
||||
markup, override_encodings, is_html, exclude_encodings)
|
||||
|
||||
@@ -376,9 +377,10 @@ class UnicodeDammit:
|
||||
if encoding != "ascii":
|
||||
u = self._convert_from(encoding, "replace")
|
||||
if u is not None:
|
||||
logging.warning(
|
||||
self.log.warning(
|
||||
"Some characters could not be decoded, and were "
|
||||
"replaced with REPLACEMENT CHARACTER.")
|
||||
"replaced with REPLACEMENT CHARACTER."
|
||||
)
|
||||
self.contains_replacement_characters = True
|
||||
break
|
||||
|
||||
@@ -734,7 +736,7 @@ class UnicodeDammit:
|
||||
0xde : b'\xc3\x9e', # Þ
|
||||
0xdf : b'\xc3\x9f', # ß
|
||||
0xe0 : b'\xc3\xa0', # à
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe2 : b'\xc3\xa2', # â
|
||||
0xe3 : b'\xc3\xa3', # ã
|
||||
0xe4 : b'\xc3\xa4', # ä
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Diagnostic functions, mainly for use when doing tech support."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
import cProfile
|
||||
@@ -56,7 +58,8 @@ def diagnose(data):
|
||||
data = data.read()
|
||||
elif os.path.exists(data):
|
||||
print '"%s" looks like a filename. Reading data from the file.' % data
|
||||
data = open(data).read()
|
||||
with open(data) as fp:
|
||||
data = fp.read()
|
||||
elif data.startswith("http:") or data.startswith("https:"):
|
||||
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
|
||||
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
from pdb import set_trace
|
||||
import collections
|
||||
import re
|
||||
import shlex
|
||||
import sys
|
||||
import warnings
|
||||
from bs4.dammit import EntitySubstitution
|
||||
@@ -99,6 +101,8 @@ class HTMLAwareEntitySubstitution(EntitySubstitution):
|
||||
|
||||
preformatted_tags = set(["pre"])
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
|
||||
@classmethod
|
||||
def _substitute_if_appropriate(cls, ns, f):
|
||||
if (isinstance(ns, NavigableString)
|
||||
@@ -127,8 +131,8 @@ class PageElement(object):
|
||||
# to methods like encode() and prettify():
|
||||
#
|
||||
# "html" - All Unicode characters with corresponding HTML entities
|
||||
# are converted to those entities on output.
|
||||
# "minimal" - Bare ampersands and angle brackets are converted to
|
||||
# are converted to those entities on output.
|
||||
# "minimal" - Bare ampersands and angle brackets are converted to
|
||||
# XML entities: & < >
|
||||
# None - The null formatter. Unicode characters are never
|
||||
# converted to entities. This is not recommended, but it's
|
||||
@@ -169,11 +173,19 @@ class PageElement(object):
|
||||
|
||||
This is used when mapping a formatter name ("minimal") to an
|
||||
appropriate function (one that performs entity-substitution on
|
||||
the contents of <script> and <style> tags, or not). It's
|
||||
the contents of <script> and <style> tags, or not). It can be
|
||||
inefficient, but it should be called very rarely.
|
||||
"""
|
||||
if self.known_xml is not None:
|
||||
# Most of the time we will have determined this when the
|
||||
# document is parsed.
|
||||
return self.known_xml
|
||||
|
||||
# Otherwise, it's likely that this element was created by
|
||||
# direct invocation of the constructor from within the user's
|
||||
# Python code.
|
||||
if self.parent is None:
|
||||
# This is the top-level object. It should have .is_xml set
|
||||
# This is the top-level object. It should have .known_xml set
|
||||
# from tree creation. If not, take a guess--BS is usually
|
||||
# used on HTML markup.
|
||||
return getattr(self, 'is_xml', False)
|
||||
@@ -523,9 +535,16 @@ class PageElement(object):
|
||||
return ResultSet(strainer, result)
|
||||
elif isinstance(name, basestring):
|
||||
# Optimization to find all tags with a given name.
|
||||
if name.count(':') == 1:
|
||||
# This is a name with a prefix.
|
||||
prefix, name = name.split(':', 1)
|
||||
else:
|
||||
prefix = None
|
||||
result = (element for element in generator
|
||||
if isinstance(element, Tag)
|
||||
and element.name == name)
|
||||
and element.name == name
|
||||
and (prefix is None or element.prefix == prefix)
|
||||
)
|
||||
return ResultSet(strainer, result)
|
||||
results = ResultSet(strainer)
|
||||
while True:
|
||||
@@ -637,7 +656,7 @@ class PageElement(object):
|
||||
return lambda el: el._attr_value_as_string(
|
||||
attribute, '').startswith(value)
|
||||
elif operator == '$':
|
||||
# string represenation of `attribute` ends with `value`
|
||||
# string representation of `attribute` ends with `value`
|
||||
return lambda el: el._attr_value_as_string(
|
||||
attribute, '').endswith(value)
|
||||
elif operator == '*':
|
||||
@@ -677,6 +696,11 @@ class NavigableString(unicode, PageElement):
|
||||
PREFIX = ''
|
||||
SUFFIX = ''
|
||||
|
||||
# We can't tell just by looking at a string whether it's contained
|
||||
# in an XML document or an HTML document.
|
||||
|
||||
known_xml = None
|
||||
|
||||
def __new__(cls, value):
|
||||
"""Create a new NavigableString.
|
||||
|
||||
@@ -743,10 +767,16 @@ class CData(PreformattedString):
|
||||
SUFFIX = u']]>'
|
||||
|
||||
class ProcessingInstruction(PreformattedString):
|
||||
"""A SGML processing instruction."""
|
||||
|
||||
PREFIX = u'<?'
|
||||
SUFFIX = u'>'
|
||||
|
||||
class XMLProcessingInstruction(ProcessingInstruction):
|
||||
"""An XML processing instruction."""
|
||||
PREFIX = u'<?'
|
||||
SUFFIX = u'?>'
|
||||
|
||||
class Comment(PreformattedString):
|
||||
|
||||
PREFIX = u'<!--'
|
||||
@@ -781,7 +811,8 @@ class Tag(PageElement):
|
||||
"""Represents a found HTML tag with its attributes and contents."""
|
||||
|
||||
def __init__(self, parser=None, builder=None, name=None, namespace=None,
|
||||
prefix=None, attrs=None, parent=None, previous=None):
|
||||
prefix=None, attrs=None, parent=None, previous=None,
|
||||
is_xml=None):
|
||||
"Basic constructor."
|
||||
|
||||
if parser is None:
|
||||
@@ -795,6 +826,14 @@ class Tag(PageElement):
|
||||
self.name = name
|
||||
self.namespace = namespace
|
||||
self.prefix = prefix
|
||||
if builder is not None:
|
||||
preserve_whitespace_tags = builder.preserve_whitespace_tags
|
||||
else:
|
||||
if is_xml:
|
||||
preserve_whitespace_tags = []
|
||||
else:
|
||||
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
|
||||
self.preserve_whitespace_tags = preserve_whitespace_tags
|
||||
if attrs is None:
|
||||
attrs = {}
|
||||
elif attrs:
|
||||
@@ -805,6 +844,13 @@ class Tag(PageElement):
|
||||
attrs = dict(attrs)
|
||||
else:
|
||||
attrs = dict(attrs)
|
||||
|
||||
# If possible, determine ahead of time whether this tag is an
|
||||
# XML tag.
|
||||
if builder:
|
||||
self.known_xml = builder.is_xml
|
||||
else:
|
||||
self.known_xml = is_xml
|
||||
self.attrs = attrs
|
||||
self.contents = []
|
||||
self.setup(parent, previous)
|
||||
@@ -824,7 +870,7 @@ class Tag(PageElement):
|
||||
Its contents are a copy of the old Tag's contents.
|
||||
"""
|
||||
clone = type(self)(None, self.builder, self.name, self.namespace,
|
||||
self.nsprefix, self.attrs)
|
||||
self.prefix, self.attrs, is_xml=self._is_xml)
|
||||
for attr in ('can_be_empty_element', 'hidden'):
|
||||
setattr(clone, attr, getattr(self, attr))
|
||||
for child in self.contents:
|
||||
@@ -946,6 +992,13 @@ class Tag(PageElement):
|
||||
attribute."""
|
||||
return self.attrs.get(key, default)
|
||||
|
||||
def get_attribute_list(self, key, default=None):
|
||||
"""The same as get(), but always returns a list."""
|
||||
value = self.get(key, default)
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return value
|
||||
|
||||
def has_attr(self, key):
|
||||
return key in self.attrs
|
||||
|
||||
@@ -997,7 +1050,7 @@ class Tag(PageElement):
|
||||
tag_name, tag_name))
|
||||
return self.find(tag_name)
|
||||
# We special case contents to avoid recursion.
|
||||
elif not tag.startswith("__") and not tag=="contents":
|
||||
elif not tag.startswith("__") and not tag == "contents":
|
||||
return self.find(tag)
|
||||
raise AttributeError(
|
||||
"'%s' object has no attribute '%s'" % (self.__class__, tag))
|
||||
@@ -1057,10 +1110,11 @@ class Tag(PageElement):
|
||||
|
||||
def _should_pretty_print(self, indent_level):
|
||||
"""Should this tag be pretty-printed?"""
|
||||
|
||||
return (
|
||||
indent_level is not None and
|
||||
(self.name not in HTMLAwareEntitySubstitution.preformatted_tags
|
||||
or self._is_xml))
|
||||
indent_level is not None
|
||||
and self.name not in self.preserve_whitespace_tags
|
||||
)
|
||||
|
||||
def decode(self, indent_level=None,
|
||||
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
@@ -1280,6 +1334,7 @@ class Tag(PageElement):
|
||||
|
||||
_selector_combinators = ['>', '+', '~']
|
||||
_select_debug = False
|
||||
quoted_colon = re.compile('"[^"]*:[^"]*"')
|
||||
def select_one(self, selector):
|
||||
"""Perform a CSS selection operation on the current element."""
|
||||
value = self.select(selector, limit=1)
|
||||
@@ -1305,8 +1360,7 @@ class Tag(PageElement):
|
||||
if limit and len(context) >= limit:
|
||||
break
|
||||
return context
|
||||
|
||||
tokens = selector.split()
|
||||
tokens = shlex.split(selector)
|
||||
current_context = [self]
|
||||
|
||||
if tokens[-1] in self._selector_combinators:
|
||||
@@ -1358,7 +1412,7 @@ class Tag(PageElement):
|
||||
return classes.issubset(candidate.get('class', []))
|
||||
checker = classes_match
|
||||
|
||||
elif ':' in token:
|
||||
elif ':' in token and not self.quoted_colon.search(token):
|
||||
# Pseudo-class
|
||||
tag_name, pseudo = token.split(':', 1)
|
||||
if tag_name == '':
|
||||
@@ -1389,11 +1443,8 @@ class Tag(PageElement):
|
||||
self.count += 1
|
||||
if self.count == self.destination:
|
||||
return True
|
||||
if self.count > self.destination:
|
||||
# Stop the generator that's sending us
|
||||
# these things.
|
||||
raise StopIteration()
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
checker = Counter(pseudo_value).nth_child_of_type
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
@@ -1498,13 +1549,12 @@ class Tag(PageElement):
|
||||
# don't include it in the context more than once.
|
||||
new_context.append(candidate)
|
||||
new_context_ids.add(id(candidate))
|
||||
if limit and len(new_context) >= limit:
|
||||
break
|
||||
elif self._select_debug:
|
||||
print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
|
||||
|
||||
|
||||
current_context = new_context
|
||||
if limit and len(current_context) >= limit:
|
||||
current_context = current_context[:limit]
|
||||
|
||||
if self._select_debug:
|
||||
print "Final verdict:"
|
||||
@@ -1662,28 +1712,22 @@ class SoupStrainer(object):
|
||||
"I don't know how to match against a %s" % markup.__class__)
|
||||
return found
|
||||
|
||||
def _matches(self, markup, match_against):
|
||||
def _matches(self, markup, match_against, already_tried=None):
|
||||
# print u"Matching %s against %s" % (markup, match_against)
|
||||
result = False
|
||||
if isinstance(markup, list) or isinstance(markup, tuple):
|
||||
# This should only happen when searching a multi-valued attribute
|
||||
# like 'class'.
|
||||
if (isinstance(match_against, unicode)
|
||||
and ' ' in match_against):
|
||||
# A bit of a special case. If they try to match "foo
|
||||
# bar" on a multivalue attribute's value, only accept
|
||||
# the literal value "foo bar"
|
||||
#
|
||||
# XXX This is going to be pretty slow because we keep
|
||||
# splitting match_against. But it shouldn't come up
|
||||
# too often.
|
||||
return (whitespace_re.split(match_against) == markup)
|
||||
else:
|
||||
for item in markup:
|
||||
if self._matches(item, match_against):
|
||||
return True
|
||||
return False
|
||||
|
||||
for item in markup:
|
||||
if self._matches(item, match_against):
|
||||
return True
|
||||
# We didn't match any particular value of the multivalue
|
||||
# attribute, but maybe we match the attribute value when
|
||||
# considered as a string.
|
||||
if self._matches(' '.join(markup), match_against):
|
||||
return True
|
||||
return False
|
||||
|
||||
if match_against is True:
|
||||
# True matches any non-None value.
|
||||
return markup is not None
|
||||
@@ -1693,6 +1737,7 @@ class SoupStrainer(object):
|
||||
|
||||
# Custom callables take the tag as an argument, but all
|
||||
# other ways of matching match the tag name as a string.
|
||||
original_markup = markup
|
||||
if isinstance(markup, Tag):
|
||||
markup = markup.name
|
||||
|
||||
@@ -1703,18 +1748,51 @@ class SoupStrainer(object):
|
||||
# None matches None, False, an empty string, an empty list, and so on.
|
||||
return not match_against
|
||||
|
||||
if isinstance(match_against, unicode):
|
||||
if (hasattr(match_against, '__iter__')
|
||||
and not isinstance(match_against, basestring)):
|
||||
# We're asked to match against an iterable of items.
|
||||
# The markup must be match at least one item in the
|
||||
# iterable. We'll try each one in turn.
|
||||
#
|
||||
# To avoid infinite recursion we need to keep track of
|
||||
# items we've already seen.
|
||||
if not already_tried:
|
||||
already_tried = set()
|
||||
for item in match_against:
|
||||
if item.__hash__:
|
||||
key = item
|
||||
else:
|
||||
key = id(item)
|
||||
if key in already_tried:
|
||||
continue
|
||||
else:
|
||||
already_tried.add(key)
|
||||
if self._matches(original_markup, item, already_tried):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
# Beyond this point we might need to run the test twice: once against
|
||||
# the tag's name and once against its prefixed name.
|
||||
match = False
|
||||
|
||||
if not match and isinstance(match_against, unicode):
|
||||
# Exact string match
|
||||
return markup == match_against
|
||||
match = markup == match_against
|
||||
|
||||
if hasattr(match_against, 'match'):
|
||||
if not match and hasattr(match_against, 'search'):
|
||||
# Regexp match
|
||||
return match_against.search(markup)
|
||||
|
||||
if hasattr(match_against, '__iter__'):
|
||||
# The markup must be an exact match against something
|
||||
# in the iterable.
|
||||
return markup in match_against
|
||||
if (not match
|
||||
and isinstance(original_markup, Tag)
|
||||
and original_markup.prefix):
|
||||
# Try the whole thing again with the prefixed tag name.
|
||||
return self._matches(
|
||||
original_markup.prefix + ':' + original_markup.name, match_against
|
||||
)
|
||||
|
||||
return match
|
||||
|
||||
|
||||
class ResultSet(list):
|
||||
@@ -1723,3 +1801,8 @@ class ResultSet(list):
|
||||
def __init__(self, source, result=()):
|
||||
super(ResultSet, self).__init__(result)
|
||||
self.source = source
|
||||
|
||||
def __getattr__(self, key):
|
||||
raise AttributeError(
|
||||
"ResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?" % key
|
||||
)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Helper classes for tests."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
import pickle
|
||||
@@ -67,6 +69,18 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
markup in these tests, there's not much room for interpretation.
|
||||
"""
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify that all HTML4 and HTML5 empty element (aka void element) tags
|
||||
are handled correctly.
|
||||
"""
|
||||
for name in [
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
'spacer', 'frame'
|
||||
]:
|
||||
soup = self.soup("")
|
||||
new_tag = soup.new_tag(name)
|
||||
self.assertEqual(True, new_tag.is_empty_element)
|
||||
|
||||
def test_pickle_and_unpickle_identity(self):
|
||||
# Pickling a tree, then unpickling it, yields a tree identical
|
||||
# to the original.
|
||||
@@ -137,6 +151,14 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
markup.replace(b"\n", b""))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
# We test both Unicode and bytestring to verify that
|
||||
# process_markup correctly sets processing_instruction_class
|
||||
# even when the markup is already Unicode and there is no
|
||||
# need to process anything.
|
||||
markup = u"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.decode())
|
||||
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
@@ -215,9 +237,22 @@ Hello, world!
|
||||
self.assertEqual(comment, baz.previous_element)
|
||||
|
||||
def test_preserved_whitespace_in_pre_and_textarea(self):
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags."""
|
||||
self.assertSoupEquals("<pre> </pre>")
|
||||
self.assertSoupEquals("<textarea> woo </textarea>")
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags,
|
||||
even if that would mean not prettifying the markup.
|
||||
"""
|
||||
pre_markup = "<pre> </pre>"
|
||||
textarea_markup = "<textarea> woo\nwoo </textarea>"
|
||||
self.assertSoupEquals(pre_markup)
|
||||
self.assertSoupEquals(textarea_markup)
|
||||
|
||||
soup = self.soup(pre_markup)
|
||||
self.assertEqual(soup.pre.prettify(), pre_markup)
|
||||
|
||||
soup = self.soup(textarea_markup)
|
||||
self.assertEqual(soup.textarea.prettify(), textarea_markup)
|
||||
|
||||
soup = self.soup("<textarea></textarea>")
|
||||
self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
|
||||
|
||||
def test_nested_inline_elements(self):
|
||||
"""Inline elements can be nested indefinitely."""
|
||||
@@ -307,6 +342,13 @@ Hello, world!
|
||||
self.assertEqual("p", soup.p.name)
|
||||
self.assertConnectedness(soup)
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify consistent handling of empty-element tags,
|
||||
no matter how they come in through the markup.
|
||||
"""
|
||||
self.assertSoupEquals('<br/><br/><br/>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('<br /><br /><br />', "<br/><br/><br/>")
|
||||
|
||||
def test_head_tag_between_head_and_body(self):
|
||||
"Prevent recurrence of a bug in the html5lib treebuilder."
|
||||
content = """<html><head></head>
|
||||
@@ -480,7 +522,9 @@ Hello, world!
|
||||
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
|
||||
soup = self.soup(
|
||||
hebrew_document, from_encoding="iso8859-8")
|
||||
self.assertEqual(soup.original_encoding, 'iso8859-8')
|
||||
# Some tree builders call it iso8859-8, others call it iso-8859-9.
|
||||
# That's not a difference we really care about.
|
||||
assert soup.original_encoding in ('iso8859-8', 'iso-8859-8')
|
||||
self.assertEqual(
|
||||
soup.encode('utf-8'),
|
||||
hebrew_document.decode("iso8859-8").encode("utf-8"))
|
||||
@@ -563,6 +607,11 @@ class XMLTreeBuilderSmokeTest(object):
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out *exactly* the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
@@ -639,6 +688,40 @@ class XMLTreeBuilderSmokeTest(object):
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
def test_find_by_prefixed_name(self):
|
||||
doc = """<?xml version="1.0" encoding="utf-8"?>
|
||||
<Document xmlns="http://example.com/ns0"
|
||||
xmlns:ns1="http://example.com/ns1"
|
||||
xmlns:ns2="http://example.com/ns2"
|
||||
<ns1:tag>foo</ns1:tag>
|
||||
<ns1:tag>bar</ns1:tag>
|
||||
<ns2:tag key="value">baz</ns2:tag>
|
||||
</Document>
|
||||
"""
|
||||
soup = self.soup(doc)
|
||||
|
||||
# There are three <tag> tags.
|
||||
self.assertEqual(3, len(soup.find_all('tag')))
|
||||
|
||||
# But two of them are ns1:tag and one of them is ns2:tag.
|
||||
self.assertEqual(2, len(soup.find_all('ns1:tag')))
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag')))
|
||||
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag', key='value')))
|
||||
self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag'])))
|
||||
|
||||
def test_copy_tag_preserves_namespace(self):
|
||||
xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<w:document xmlns:w="http://example.com/ns0"/>"""
|
||||
|
||||
soup = self.soup(xml)
|
||||
tag = soup.document
|
||||
duplicate = copy.copy(tag)
|
||||
|
||||
# The two tags have the same namespace prefix.
|
||||
self.assertEqual(tag.prefix, duplicate.prefix)
|
||||
|
||||
|
||||
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
||||
"""Smoke test for a tree builder that supports HTML5."""
|
||||
|
||||
|
||||
@@ -84,6 +84,33 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
|
||||
def test_reparented_markup_containing_identical_whitespace_nodes(self):
|
||||
"""Verify that we keep the two whitespace nodes in this
|
||||
document distinct when reparenting the adjacent <tbody> tags.
|
||||
"""
|
||||
markup = '<table> <tbody><tbody><ims></tbody> </table>'
|
||||
soup = self.soup(markup)
|
||||
space1, space2 = soup.find_all(string=' ')
|
||||
tbody1, tbody2 = soup.find_all('tbody')
|
||||
assert space1.next_element is tbody1
|
||||
assert tbody2.next_element is space2
|
||||
|
||||
def test_reparented_markup_containing_children(self):
|
||||
markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
|
||||
soup = self.soup(markup)
|
||||
noscript = soup.noscript
|
||||
self.assertEqual("target", noscript.next_element)
|
||||
target = soup.find(string='target')
|
||||
|
||||
# The 'aftermath' string was duplicated; we want the second one.
|
||||
final_aftermath = soup.find_all(string='aftermath')[-1]
|
||||
|
||||
# The <noscript> tag was moved beneath a copy of the <a> tag,
|
||||
# but the 'target' string within is still connected to the
|
||||
# (second) 'aftermath' string.
|
||||
self.assertEqual(final_aftermath, target.next_element)
|
||||
self.assertEqual(target, final_aftermath.previous_element)
|
||||
|
||||
def test_processing_instruction(self):
|
||||
"""Processing instructions become comments."""
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
@@ -96,3 +123,8 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
a1, a2 = soup.find_all('a')
|
||||
self.assertEqual(a1, a2)
|
||||
assert a1 is not a2
|
||||
|
||||
def test_foster_parenting(self):
|
||||
markup = b"""<table><td></tbody>A"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
|
||||
|
||||
@@ -29,4 +29,6 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
|
||||
|
||||
|
||||
def test_redundant_empty_element_closing_tags(self):
|
||||
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('</br></br></br>', "")
|
||||
|
||||
@@ -35,7 +35,6 @@ try:
|
||||
except ImportError, e:
|
||||
LXML_PRESENT = False
|
||||
|
||||
PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
|
||||
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
|
||||
|
||||
class TestConstructor(SoupTest):
|
||||
@@ -77,7 +76,7 @@ class TestWarnings(SoupTest):
|
||||
def test_no_warning_if_explicit_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", "html.parser")
|
||||
self.assertEquals([], w)
|
||||
self.assertEqual([], w)
|
||||
|
||||
def test_parseOnlyThese_renamed_to_parse_only(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
@@ -118,15 +117,34 @@ class TestWarnings(SoupTest):
|
||||
soup = self.soup(filename)
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
def test_url_warning(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("http://www.crummy.com/")
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("looks like a URL" in msg)
|
||||
def test_url_warning_with_bytes_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/")
|
||||
# Be aware this isn't the only warning that can be raised during
|
||||
# execution..
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
# note - this url must differ from the bytes one otherwise
|
||||
# python's warnings system swallows the second warning
|
||||
soup = self.soup(u"http://www.crummyunicode.com/")
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_bytes_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(u"http://www.crummyuncode.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("http://www.crummy.com/ is great")
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
class TestSelectiveParsing(SoupTest):
|
||||
|
||||
@@ -260,7 +278,7 @@ class TestEncodingConversion(SoupTest):
|
||||
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
|
||||
|
||||
@skipIf(
|
||||
PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
|
||||
PYTHON_3_PRE_3_2,
|
||||
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
|
||||
def test_attribute_name_containing_unicode_characters(self):
|
||||
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Tests for Beautiful Soup's tree traversal methods.
|
||||
|
||||
@@ -222,7 +223,19 @@ class TestFindAllByName(TreeTest):
|
||||
self.assertSelects(
|
||||
tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
|
||||
|
||||
def test_find_with_multi_valued_attribute(self):
|
||||
soup = self.soup(
|
||||
"<div class='a b'>1</div><div class='a c'>2</div><div class='a d'>3</div>"
|
||||
)
|
||||
r1 = soup.find('div', 'a d');
|
||||
r2 = soup.find('div', re.compile(r'a d'));
|
||||
r3, r4 = soup.find_all('div', ['a b', 'a d']);
|
||||
self.assertEqual('3', r1.string)
|
||||
self.assertEqual('3', r2.string)
|
||||
self.assertEqual('1', r3.string)
|
||||
self.assertEqual('3', r4.string)
|
||||
|
||||
|
||||
class TestFindAllByAttribute(TreeTest):
|
||||
|
||||
def test_find_all_by_attribute_name(self):
|
||||
@@ -294,10 +307,10 @@ class TestFindAllByAttribute(TreeTest):
|
||||
f = tree.find_all("gar", class_=re.compile("a"))
|
||||
self.assertSelects(f, ["Found it"])
|
||||
|
||||
# Since the class is not the string "foo bar", but the two
|
||||
# strings "foo" and "bar", this will not find anything.
|
||||
# If the search fails to match the individual strings "foo" and "bar",
|
||||
# it will be tried against the combined string "foo bar".
|
||||
f = tree.find_all("gar", class_=re.compile("o b"))
|
||||
self.assertSelects(f, [])
|
||||
self.assertSelects(f, ["Found it"])
|
||||
|
||||
def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
|
||||
soup = self.soup("<a class='bar'>Found it</a>")
|
||||
@@ -335,7 +348,7 @@ class TestFindAllByAttribute(TreeTest):
|
||||
strainer = SoupStrainer(attrs={'id' : 'first'})
|
||||
self.assertSelects(tree.find_all(strainer), ['Match.'])
|
||||
|
||||
def test_find_all_with_missing_atribute(self):
|
||||
def test_find_all_with_missing_attribute(self):
|
||||
# You can pass in None as the value of an attribute to find_all.
|
||||
# This will match tags that do not have that attribute set.
|
||||
tree = self.soup("""<a id="1">ID present.</a>
|
||||
@@ -1273,6 +1286,10 @@ class TestCDAtaListAttributes(SoupTest):
|
||||
soup = self.soup("<a class='foo\tbar'>")
|
||||
self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode())
|
||||
|
||||
def test_get_attribute_list(self):
|
||||
soup = self.soup("<a id='abc def'>")
|
||||
self.assertEqual(['abc def'], soup.a.get_attribute_list('id'))
|
||||
|
||||
def test_accept_charset(self):
|
||||
soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">')
|
||||
self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset'])
|
||||
@@ -1328,6 +1345,13 @@ class TestPersistence(SoupTest):
|
||||
copied = copy.deepcopy(self.tree)
|
||||
self.assertEqual(copied.decode(), self.tree.decode())
|
||||
|
||||
def test_copy_preserves_encoding(self):
|
||||
soup = BeautifulSoup(b'<p> </p>', 'html.parser')
|
||||
encoding = soup.original_encoding
|
||||
copy = soup.__copy__()
|
||||
self.assertEqual(u"<p> </p>", unicode(copy))
|
||||
self.assertEqual(encoding, copy.original_encoding)
|
||||
|
||||
def test_unicode_pickle(self):
|
||||
# A tree containing Unicode characters can be pickled.
|
||||
html = u"<b>\N{SNOWMAN}</b>"
|
||||
@@ -1676,8 +1700,8 @@ class TestSoupSelector(TreeTest):
|
||||
def setUp(self):
|
||||
self.soup = BeautifulSoup(self.HTML, 'html.parser')
|
||||
|
||||
def assertSelects(self, selector, expected_ids):
|
||||
el_ids = [el['id'] for el in self.soup.select(selector)]
|
||||
def assertSelects(self, selector, expected_ids, **kwargs):
|
||||
el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
|
||||
el_ids.sort()
|
||||
expected_ids.sort()
|
||||
self.assertEqual(expected_ids, el_ids,
|
||||
@@ -1720,6 +1744,13 @@ class TestSoupSelector(TreeTest):
|
||||
for selector in ('html div', 'html body div', 'body div'):
|
||||
self.assertSelects(selector, ['data1', 'main', 'inner', 'footer'])
|
||||
|
||||
|
||||
def test_limit(self):
|
||||
self.assertSelects('html div', ['main'], limit=1)
|
||||
self.assertSelects('html body div', ['inner', 'main'], limit=2)
|
||||
self.assertSelects('body div', ['data1', 'main', 'inner', 'footer'],
|
||||
limit=10)
|
||||
|
||||
def test_tag_no_match(self):
|
||||
self.assertEqual(len(self.soup.select('del')), 0)
|
||||
|
||||
@@ -1902,6 +1933,14 @@ class TestSoupSelector(TreeTest):
|
||||
('div[data-tag]', ['data1'])
|
||||
)
|
||||
|
||||
def test_quoted_space_in_selector_name(self):
|
||||
html = """<div style="display: wrong">nope</div>
|
||||
<div style="display: right">yes</div>
|
||||
"""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
[chosen] = soup.select('div[style="display: right"]')
|
||||
self.assertEqual("yes", chosen.string)
|
||||
|
||||
def test_unsupported_pseudoclass(self):
|
||||
self.assertRaises(
|
||||
NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from .core import where, old_where
|
||||
|
||||
__version__ = "2017.04.17"
|
||||
__version__ = "2017.11.05"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,17 +19,18 @@ class DeprecatedBundleWarning(DeprecationWarning):
|
||||
|
||||
|
||||
def where():
|
||||
f = os.path.split(__file__)[0]
|
||||
f = os.path.dirname(__file__)
|
||||
|
||||
return os.path.join(f, 'cacert.pem')
|
||||
|
||||
|
||||
def old_where():
|
||||
warnings.warn(
|
||||
"The weak security bundle is being deprecated.",
|
||||
"The weak security bundle is being deprecated. It will be removed in "
|
||||
"2018.",
|
||||
DeprecatedBundleWarning
|
||||
)
|
||||
f = os.path.split(__file__)[0]
|
||||
f = os.path.dirname(__file__)
|
||||
return os.path.join(f, 'weak.pem')
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,436 @@
|
||||
"""contextlib2 - backports and enhancements to the contextlib module"""
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
from collections import deque
|
||||
from functools import wraps
|
||||
|
||||
__all__ = ["contextmanager", "closing", "ContextDecorator", "ExitStack",
|
||||
"redirect_stdout", "redirect_stderr", "suppress"]
|
||||
|
||||
# Backwards compatibility
|
||||
__all__ += ["ContextStack"]
|
||||
|
||||
class ContextDecorator(object):
|
||||
"A base class or mixin that enables context managers to work as decorators."
|
||||
|
||||
def refresh_cm(self):
|
||||
"""Returns the context manager used to actually wrap the call to the
|
||||
decorated function.
|
||||
|
||||
The default implementation just returns *self*.
|
||||
|
||||
Overriding this method allows otherwise one-shot context managers
|
||||
like _GeneratorContextManager to support use as decorators via
|
||||
implicit recreation.
|
||||
|
||||
DEPRECATED: refresh_cm was never added to the standard library's
|
||||
ContextDecorator API
|
||||
"""
|
||||
warnings.warn("refresh_cm was never added to the standard library",
|
||||
DeprecationWarning)
|
||||
return self._recreate_cm()
|
||||
|
||||
def _recreate_cm(self):
|
||||
"""Return a recreated instance of self.
|
||||
|
||||
Allows an otherwise one-shot context manager like
|
||||
_GeneratorContextManager to support use as
|
||||
a decorator via implicit recreation.
|
||||
|
||||
This is a private interface just for _GeneratorContextManager.
|
||||
See issue #11647 for details.
|
||||
"""
|
||||
return self
|
||||
|
||||
def __call__(self, func):
|
||||
@wraps(func)
|
||||
def inner(*args, **kwds):
|
||||
with self._recreate_cm():
|
||||
return func(*args, **kwds)
|
||||
return inner
|
||||
|
||||
|
||||
class _GeneratorContextManager(ContextDecorator):
|
||||
"""Helper for @contextmanager decorator."""
|
||||
|
||||
def __init__(self, func, args, kwds):
|
||||
self.gen = func(*args, **kwds)
|
||||
self.func, self.args, self.kwds = func, args, kwds
|
||||
# Issue 19330: ensure context manager instances have good docstrings
|
||||
doc = getattr(func, "__doc__", None)
|
||||
if doc is None:
|
||||
doc = type(self).__doc__
|
||||
self.__doc__ = doc
|
||||
# Unfortunately, this still doesn't provide good help output when
|
||||
# inspecting the created context manager instances, since pydoc
|
||||
# currently bypasses the instance docstring and shows the docstring
|
||||
# for the class instead.
|
||||
# See http://bugs.python.org/issue19404 for more details.
|
||||
|
||||
def _recreate_cm(self):
|
||||
# _GCM instances are one-shot context managers, so the
|
||||
# CM must be recreated each time a decorated function is
|
||||
# called
|
||||
return self.__class__(self.func, self.args, self.kwds)
|
||||
|
||||
def __enter__(self):
|
||||
try:
|
||||
return next(self.gen)
|
||||
except StopIteration:
|
||||
raise RuntimeError("generator didn't yield")
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if type is None:
|
||||
try:
|
||||
next(self.gen)
|
||||
except StopIteration:
|
||||
return
|
||||
else:
|
||||
raise RuntimeError("generator didn't stop")
|
||||
else:
|
||||
if value is None:
|
||||
# Need to force instantiation so we can reliably
|
||||
# tell if we get the same exception back
|
||||
value = type()
|
||||
try:
|
||||
self.gen.throw(type, value, traceback)
|
||||
raise RuntimeError("generator didn't stop after throw()")
|
||||
except StopIteration as exc:
|
||||
# Suppress StopIteration *unless* it's the same exception that
|
||||
# was passed to throw(). This prevents a StopIteration
|
||||
# raised inside the "with" statement from being suppressed.
|
||||
return exc is not value
|
||||
except RuntimeError as exc:
|
||||
# Don't re-raise the passed in exception
|
||||
if exc is value:
|
||||
return False
|
||||
# Likewise, avoid suppressing if a StopIteration exception
|
||||
# was passed to throw() and later wrapped into a RuntimeError
|
||||
# (see PEP 479).
|
||||
if _HAVE_EXCEPTION_CHAINING and exc.__cause__ is value:
|
||||
return False
|
||||
raise
|
||||
except:
|
||||
# only re-raise if it's *not* the exception that was
|
||||
# passed to throw(), because __exit__() must not raise
|
||||
# an exception unless __exit__() itself failed. But throw()
|
||||
# has to raise the exception to signal propagation, so this
|
||||
# fixes the impedance mismatch between the throw() protocol
|
||||
# and the __exit__() protocol.
|
||||
#
|
||||
if sys.exc_info()[1] is not value:
|
||||
raise
|
||||
|
||||
|
||||
def contextmanager(func):
|
||||
"""@contextmanager decorator.
|
||||
|
||||
Typical usage:
|
||||
|
||||
@contextmanager
|
||||
def some_generator(<arguments>):
|
||||
<setup>
|
||||
try:
|
||||
yield <value>
|
||||
finally:
|
||||
<cleanup>
|
||||
|
||||
This makes this:
|
||||
|
||||
with some_generator(<arguments>) as <variable>:
|
||||
<body>
|
||||
|
||||
equivalent to this:
|
||||
|
||||
<setup>
|
||||
try:
|
||||
<variable> = <value>
|
||||
<body>
|
||||
finally:
|
||||
<cleanup>
|
||||
|
||||
"""
|
||||
@wraps(func)
|
||||
def helper(*args, **kwds):
|
||||
return _GeneratorContextManager(func, args, kwds)
|
||||
return helper
|
||||
|
||||
|
||||
class closing(object):
|
||||
"""Context to automatically close something at the end of a block.
|
||||
|
||||
Code like this:
|
||||
|
||||
with closing(<module>.open(<arguments>)) as f:
|
||||
<block>
|
||||
|
||||
is equivalent to this:
|
||||
|
||||
f = <module>.open(<arguments>)
|
||||
try:
|
||||
<block>
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
"""
|
||||
def __init__(self, thing):
|
||||
self.thing = thing
|
||||
def __enter__(self):
|
||||
return self.thing
|
||||
def __exit__(self, *exc_info):
|
||||
self.thing.close()
|
||||
|
||||
|
||||
class _RedirectStream(object):
|
||||
|
||||
_stream = None
|
||||
|
||||
def __init__(self, new_target):
|
||||
self._new_target = new_target
|
||||
# We use a list of old targets to make this CM re-entrant
|
||||
self._old_targets = []
|
||||
|
||||
def __enter__(self):
|
||||
self._old_targets.append(getattr(sys, self._stream))
|
||||
setattr(sys, self._stream, self._new_target)
|
||||
return self._new_target
|
||||
|
||||
def __exit__(self, exctype, excinst, exctb):
|
||||
setattr(sys, self._stream, self._old_targets.pop())
|
||||
|
||||
|
||||
class redirect_stdout(_RedirectStream):
|
||||
"""Context manager for temporarily redirecting stdout to another file.
|
||||
|
||||
# How to send help() to stderr
|
||||
with redirect_stdout(sys.stderr):
|
||||
help(dir)
|
||||
|
||||
# How to write help() to a file
|
||||
with open('help.txt', 'w') as f:
|
||||
with redirect_stdout(f):
|
||||
help(pow)
|
||||
"""
|
||||
|
||||
_stream = "stdout"
|
||||
|
||||
|
||||
class redirect_stderr(_RedirectStream):
|
||||
"""Context manager for temporarily redirecting stderr to another file."""
|
||||
|
||||
_stream = "stderr"
|
||||
|
||||
|
||||
class suppress(object):
|
||||
"""Context manager to suppress specified exceptions
|
||||
|
||||
After the exception is suppressed, execution proceeds with the next
|
||||
statement following the with statement.
|
||||
|
||||
with suppress(FileNotFoundError):
|
||||
os.remove(somefile)
|
||||
# Execution still resumes here if the file was already removed
|
||||
"""
|
||||
|
||||
def __init__(self, *exceptions):
|
||||
self._exceptions = exceptions
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exctype, excinst, exctb):
|
||||
# Unlike isinstance and issubclass, CPython exception handling
|
||||
# currently only looks at the concrete type hierarchy (ignoring
|
||||
# the instance and subclass checking hooks). While Guido considers
|
||||
# that a bug rather than a feature, it's a fairly hard one to fix
|
||||
# due to various internal implementation details. suppress provides
|
||||
# the simpler issubclass based semantics, rather than trying to
|
||||
# exactly reproduce the limitations of the CPython interpreter.
|
||||
#
|
||||
# See http://bugs.python.org/issue12029 for more details
|
||||
return exctype is not None and issubclass(exctype, self._exceptions)
|
||||
|
||||
|
||||
# Context manipulation is Python 3 only
|
||||
_HAVE_EXCEPTION_CHAINING = sys.version_info[0] >= 3
|
||||
if _HAVE_EXCEPTION_CHAINING:
|
||||
def _make_context_fixer(frame_exc):
|
||||
def _fix_exception_context(new_exc, old_exc):
|
||||
# Context may not be correct, so find the end of the chain
|
||||
while 1:
|
||||
exc_context = new_exc.__context__
|
||||
if exc_context is old_exc:
|
||||
# Context is already set correctly (see issue 20317)
|
||||
return
|
||||
if exc_context is None or exc_context is frame_exc:
|
||||
break
|
||||
new_exc = exc_context
|
||||
# Change the end of the chain to point to the exception
|
||||
# we expect it to reference
|
||||
new_exc.__context__ = old_exc
|
||||
return _fix_exception_context
|
||||
|
||||
def _reraise_with_existing_context(exc_details):
|
||||
try:
|
||||
# bare "raise exc_details[1]" replaces our carefully
|
||||
# set-up context
|
||||
fixed_ctx = exc_details[1].__context__
|
||||
raise exc_details[1]
|
||||
except BaseException:
|
||||
exc_details[1].__context__ = fixed_ctx
|
||||
raise
|
||||
else:
|
||||
# No exception context in Python 2
|
||||
def _make_context_fixer(frame_exc):
|
||||
return lambda new_exc, old_exc: None
|
||||
|
||||
# Use 3 argument raise in Python 2,
|
||||
# but use exec to avoid SyntaxError in Python 3
|
||||
def _reraise_with_existing_context(exc_details):
|
||||
exc_type, exc_value, exc_tb = exc_details
|
||||
exec ("raise exc_type, exc_value, exc_tb")
|
||||
|
||||
# Handle old-style classes if they exist
|
||||
try:
|
||||
from types import InstanceType
|
||||
except ImportError:
|
||||
# Python 3 doesn't have old-style classes
|
||||
_get_type = type
|
||||
else:
|
||||
# Need to handle old-style context managers on Python 2
|
||||
def _get_type(obj):
|
||||
obj_type = type(obj)
|
||||
if obj_type is InstanceType:
|
||||
return obj.__class__ # Old-style class
|
||||
return obj_type # New-style class
|
||||
|
||||
# Inspired by discussions on http://bugs.python.org/issue13585
|
||||
class ExitStack(object):
|
||||
"""Context manager for dynamic management of a stack of exit callbacks
|
||||
|
||||
For example:
|
||||
|
||||
with ExitStack() as stack:
|
||||
files = [stack.enter_context(open(fname)) for fname in filenames]
|
||||
# All opened files will automatically be closed at the end of
|
||||
# the with statement, even if attempts to open files later
|
||||
# in the list raise an exception
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self._exit_callbacks = deque()
|
||||
|
||||
def pop_all(self):
|
||||
"""Preserve the context stack by transferring it to a new instance"""
|
||||
new_stack = type(self)()
|
||||
new_stack._exit_callbacks = self._exit_callbacks
|
||||
self._exit_callbacks = deque()
|
||||
return new_stack
|
||||
|
||||
def _push_cm_exit(self, cm, cm_exit):
|
||||
"""Helper to correctly register callbacks to __exit__ methods"""
|
||||
def _exit_wrapper(*exc_details):
|
||||
return cm_exit(cm, *exc_details)
|
||||
_exit_wrapper.__self__ = cm
|
||||
self.push(_exit_wrapper)
|
||||
|
||||
def push(self, exit):
|
||||
"""Registers a callback with the standard __exit__ method signature
|
||||
|
||||
Can suppress exceptions the same way __exit__ methods can.
|
||||
|
||||
Also accepts any object with an __exit__ method (registering a call
|
||||
to the method instead of the object itself)
|
||||
"""
|
||||
# We use an unbound method rather than a bound method to follow
|
||||
# the standard lookup behaviour for special methods
|
||||
_cb_type = _get_type(exit)
|
||||
try:
|
||||
exit_method = _cb_type.__exit__
|
||||
except AttributeError:
|
||||
# Not a context manager, so assume its a callable
|
||||
self._exit_callbacks.append(exit)
|
||||
else:
|
||||
self._push_cm_exit(exit, exit_method)
|
||||
return exit # Allow use as a decorator
|
||||
|
||||
def callback(self, callback, *args, **kwds):
|
||||
"""Registers an arbitrary callback and arguments.
|
||||
|
||||
Cannot suppress exceptions.
|
||||
"""
|
||||
def _exit_wrapper(exc_type, exc, tb):
|
||||
callback(*args, **kwds)
|
||||
# We changed the signature, so using @wraps is not appropriate, but
|
||||
# setting __wrapped__ may still help with introspection
|
||||
_exit_wrapper.__wrapped__ = callback
|
||||
self.push(_exit_wrapper)
|
||||
return callback # Allow use as a decorator
|
||||
|
||||
def enter_context(self, cm):
|
||||
"""Enters the supplied context manager
|
||||
|
||||
If successful, also pushes its __exit__ method as a callback and
|
||||
returns the result of the __enter__ method.
|
||||
"""
|
||||
# We look up the special methods on the type to match the with statement
|
||||
_cm_type = _get_type(cm)
|
||||
_exit = _cm_type.__exit__
|
||||
result = _cm_type.__enter__(cm)
|
||||
self._push_cm_exit(cm, _exit)
|
||||
return result
|
||||
|
||||
def close(self):
|
||||
"""Immediately unwind the context stack"""
|
||||
self.__exit__(None, None, None)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc_details):
|
||||
received_exc = exc_details[0] is not None
|
||||
|
||||
# We manipulate the exception state so it behaves as though
|
||||
# we were actually nesting multiple with statements
|
||||
frame_exc = sys.exc_info()[1]
|
||||
_fix_exception_context = _make_context_fixer(frame_exc)
|
||||
|
||||
# Callbacks are invoked in LIFO order to match the behaviour of
|
||||
# nested context managers
|
||||
suppressed_exc = False
|
||||
pending_raise = False
|
||||
while self._exit_callbacks:
|
||||
cb = self._exit_callbacks.pop()
|
||||
try:
|
||||
if cb(*exc_details):
|
||||
suppressed_exc = True
|
||||
pending_raise = False
|
||||
exc_details = (None, None, None)
|
||||
except:
|
||||
new_exc_details = sys.exc_info()
|
||||
# simulate the stack of exceptions by setting the context
|
||||
_fix_exception_context(new_exc_details[1], exc_details[1])
|
||||
pending_raise = True
|
||||
exc_details = new_exc_details
|
||||
if pending_raise:
|
||||
_reraise_with_existing_context(exc_details)
|
||||
return received_exc and suppressed_exc
|
||||
|
||||
# Preserve backwards compatibility
|
||||
class ContextStack(ExitStack):
|
||||
"""Backwards compatibility alias for ExitStack"""
|
||||
|
||||
def __init__(self):
|
||||
warnings.warn("ContextStack has been renamed to ExitStack",
|
||||
DeprecationWarning)
|
||||
super(ContextStack, self).__init__()
|
||||
|
||||
def register_exit(self, callback):
|
||||
return self.push(callback)
|
||||
|
||||
def register(self, callback, *args, **kwds):
|
||||
return self.callback(callback, *args, **kwds)
|
||||
|
||||
def preserve(self):
|
||||
return self.pop_all()
|
||||
@@ -1,4 +1,4 @@
|
||||
__version__ = '0.6.2'
|
||||
__version__ = '0.6.5'
|
||||
|
||||
from .lock import Lock # noqa
|
||||
from .lock import NeedRegenerationException # noqa
|
||||
from .lock import NeedRegenerationException # noqa
|
||||
|
||||
@@ -13,6 +13,13 @@ class NoValue(object):
|
||||
def payload(self):
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
"""Ensure __repr__ is a consistent value in case NoValue is used to
|
||||
fill another cache key.
|
||||
|
||||
"""
|
||||
return '<dogpile.cache.api.NoValue object>'
|
||||
|
||||
if py3k:
|
||||
def __bool__(self): # pragma NO COVERAGE
|
||||
return False
|
||||
@@ -20,6 +27,7 @@ class NoValue(object):
|
||||
def __nonzero__(self): # pragma NO COVERAGE
|
||||
return False
|
||||
|
||||
|
||||
NO_VALUE = NoValue()
|
||||
"""Value returned from ``get()`` that describes
|
||||
a key not present."""
|
||||
|
||||
@@ -15,3 +15,11 @@ class RegionNotConfigured(DogpileCacheException):
|
||||
|
||||
class ValidationError(DogpileCacheException):
|
||||
"""Error validating a value or option."""
|
||||
|
||||
|
||||
class PluginNotFound(DogpileCacheException):
|
||||
"""The specified plugin could not be found.
|
||||
|
||||
.. versionadded:: 0.6.4
|
||||
|
||||
"""
|
||||
|
||||
+35
-5
@@ -410,7 +410,13 @@ class CacheRegion(object):
|
||||
"configured with backend: %s. "
|
||||
"Specify replace_existing_backend=True to replace."
|
||||
% self.backend)
|
||||
backend_cls = _backend_loader.load(backend)
|
||||
|
||||
try:
|
||||
backend_cls = _backend_loader.load(backend)
|
||||
except PluginLoader.NotFound:
|
||||
raise exception.PluginNotFound(
|
||||
"Couldn't find cache plugin to load: %s" % backend)
|
||||
|
||||
if _config_argument_dict:
|
||||
self.backend = backend_cls.from_config_dict(
|
||||
_config_argument_dict,
|
||||
@@ -487,8 +493,19 @@ class CacheRegion(object):
|
||||
a value. Any retrieved value whose creation
|
||||
time is prior to this timestamp
|
||||
is considered to be stale. It does not
|
||||
affect the data in the cache in any way, and is also
|
||||
local to this instance of :class:`.CacheRegion`.
|
||||
affect the data in the cache in any way, and is
|
||||
**local to this instance of :class:`.CacheRegion`.**
|
||||
|
||||
.. warning::
|
||||
|
||||
The :meth:`.CacheRegion.invalidate` method's default mode of
|
||||
operation is to set a timestamp **local to this CacheRegion
|
||||
in this Python process only**. It does not impact other Python
|
||||
processes or regions as the timestamp is **only stored locally in
|
||||
memory**. To implement invalidation where the
|
||||
timestamp is stored in the cache or similar so that all Python
|
||||
processes can be affected by an invalidation timestamp, implement a
|
||||
custom :class:`.RegionInvalidationStrategy`.
|
||||
|
||||
Once set, the invalidation time is honored by
|
||||
the :meth:`.CacheRegion.get_or_create`,
|
||||
@@ -550,6 +567,8 @@ class CacheRegion(object):
|
||||
_config_prefix="%sarguments." % prefix,
|
||||
wrap=config_dict.get(
|
||||
"%swrap" % prefix, None),
|
||||
replace_existing_backend=config_dict.get(
|
||||
"%sreplace_existing_backend" % prefix, False),
|
||||
)
|
||||
|
||||
@memoized_property
|
||||
@@ -944,11 +963,14 @@ class CacheRegion(object):
|
||||
if not should_cache_fn:
|
||||
self.backend.set_multi(values_w_created)
|
||||
else:
|
||||
self.backend.set_multi(dict(
|
||||
values_to_cache = dict(
|
||||
(k, v)
|
||||
for k, v in values_w_created.items()
|
||||
if should_cache_fn(v[0])
|
||||
))
|
||||
)
|
||||
|
||||
if values_to_cache:
|
||||
self.backend.set_multi(values_to_cache)
|
||||
|
||||
values.update(values_w_created)
|
||||
return [values[orig_to_mangled[k]].payload for k in keys]
|
||||
@@ -1075,6 +1097,14 @@ class CacheRegion(object):
|
||||
.. versionadded:: 0.5.0 Added ``refresh()`` method to decorated
|
||||
function.
|
||||
|
||||
``original()`` on other hand will invoke the decorated function
|
||||
without any caching::
|
||||
|
||||
newvalue = generate_something.original(5, 6)
|
||||
|
||||
.. versionadded:: 0.6.0 Added ``original()`` method to decorated
|
||||
function.
|
||||
|
||||
Lastly, the ``get()`` method returns either the value cached
|
||||
for the given key, or the token ``NO_VALUE`` if no such key
|
||||
exists::
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .nameregistry import NameRegistry # noqa
|
||||
from .readwrite_lock import ReadWriteMutex # noqa
|
||||
from .langhelpers import PluginLoader, memoized_property, \
|
||||
coerce_string_conf, to_list, KeyReentrantMutex # noqa
|
||||
coerce_string_conf, to_list, KeyReentrantMutex # noqa
|
||||
|
||||
@@ -39,9 +39,9 @@ class PluginLoader(object):
|
||||
self.impls[name] = impl.load
|
||||
return impl.load()
|
||||
else:
|
||||
raise Exception(
|
||||
"Can't load plugin %s %s" %
|
||||
(self.group, name))
|
||||
raise self.NotFound(
|
||||
"Can't load plugin %s %s" % (self.group, name)
|
||||
)
|
||||
|
||||
def register(self, name, modulepath, objname):
|
||||
def load():
|
||||
@@ -49,6 +49,9 @@ class PluginLoader(object):
|
||||
return getattr(mod, objname)
|
||||
self.impls[name] = load
|
||||
|
||||
class NotFound(Exception):
|
||||
"""The specified plugin could not be found."""
|
||||
|
||||
|
||||
class memoized_property(object):
|
||||
"""A read-only @property that is only evaluated once."""
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
Copyright (c) 2013, Ethan Furman.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
Redistributions of source code must retain the above
|
||||
copyright notice, this list of conditions and the
|
||||
following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
Neither the name Ethan Furman nor the names of any
|
||||
contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -0,0 +1,3 @@
|
||||
enum34 is the new Python stdlib enum module available in Python 3.4
|
||||
backported for previous versions of Python from 2.4 to 3.3.
|
||||
tested on 2.6, 2.7, and 3.3+
|
||||
@@ -0,0 +1,837 @@
|
||||
"""Python Enumerations"""
|
||||
|
||||
import sys as _sys
|
||||
|
||||
__all__ = ['Enum', 'IntEnum', 'unique']
|
||||
|
||||
version = 1, 1, 6
|
||||
|
||||
pyver = float('%s.%s' % _sys.version_info[:2])
|
||||
|
||||
try:
|
||||
any
|
||||
except NameError:
|
||||
def any(iterable):
|
||||
for element in iterable:
|
||||
if element:
|
||||
return True
|
||||
return False
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
OrderedDict = None
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
# In Python 2 basestring is the ancestor of both str and unicode
|
||||
# in Python 3 it's just str, but was missing in 3.1
|
||||
basestring = str
|
||||
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
# In Python 3 unicode no longer exists (it's just str)
|
||||
unicode = str
|
||||
|
||||
class _RouteClassAttributeToGetattr(object):
|
||||
"""Route attribute access on a class to __getattr__.
|
||||
|
||||
This is a descriptor, used to define attributes that act differently when
|
||||
accessed through an instance and through a class. Instance access remains
|
||||
normal, but access to an attribute through a class will be routed to the
|
||||
class's __getattr__ method; this is done by raising AttributeError.
|
||||
|
||||
"""
|
||||
def __init__(self, fget=None):
|
||||
self.fget = fget
|
||||
|
||||
def __get__(self, instance, ownerclass=None):
|
||||
if instance is None:
|
||||
raise AttributeError()
|
||||
return self.fget(instance)
|
||||
|
||||
def __set__(self, instance, value):
|
||||
raise AttributeError("can't set attribute")
|
||||
|
||||
def __delete__(self, instance):
|
||||
raise AttributeError("can't delete attribute")
|
||||
|
||||
|
||||
def _is_descriptor(obj):
|
||||
"""Returns True if obj is a descriptor, False otherwise."""
|
||||
return (
|
||||
hasattr(obj, '__get__') or
|
||||
hasattr(obj, '__set__') or
|
||||
hasattr(obj, '__delete__'))
|
||||
|
||||
|
||||
def _is_dunder(name):
|
||||
"""Returns True if a __dunder__ name, False otherwise."""
|
||||
return (name[:2] == name[-2:] == '__' and
|
||||
name[2:3] != '_' and
|
||||
name[-3:-2] != '_' and
|
||||
len(name) > 4)
|
||||
|
||||
|
||||
def _is_sunder(name):
|
||||
"""Returns True if a _sunder_ name, False otherwise."""
|
||||
return (name[0] == name[-1] == '_' and
|
||||
name[1:2] != '_' and
|
||||
name[-2:-1] != '_' and
|
||||
len(name) > 2)
|
||||
|
||||
|
||||
def _make_class_unpicklable(cls):
|
||||
"""Make the given class un-picklable."""
|
||||
def _break_on_call_reduce(self, protocol=None):
|
||||
raise TypeError('%r cannot be pickled' % self)
|
||||
cls.__reduce_ex__ = _break_on_call_reduce
|
||||
cls.__module__ = '<unknown>'
|
||||
|
||||
|
||||
class _EnumDict(dict):
|
||||
"""Track enum member order and ensure member names are not reused.
|
||||
|
||||
EnumMeta will use the names found in self._member_names as the
|
||||
enumeration member names.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
super(_EnumDict, self).__init__()
|
||||
self._member_names = []
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""Changes anything not dundered or not a descriptor.
|
||||
|
||||
If a descriptor is added with the same name as an enum member, the name
|
||||
is removed from _member_names (this may leave a hole in the numerical
|
||||
sequence of values).
|
||||
|
||||
If an enum member name is used twice, an error is raised; duplicate
|
||||
values are not checked for.
|
||||
|
||||
Single underscore (sunder) names are reserved.
|
||||
|
||||
Note: in 3.x __order__ is simply discarded as a not necessary piece
|
||||
leftover from 2.x
|
||||
|
||||
"""
|
||||
if pyver >= 3.0 and key in ('_order_', '__order__'):
|
||||
return
|
||||
elif key == '__order__':
|
||||
key = '_order_'
|
||||
if _is_sunder(key):
|
||||
if key != '_order_':
|
||||
raise ValueError('_names_ are reserved for future Enum use')
|
||||
elif _is_dunder(key):
|
||||
pass
|
||||
elif key in self._member_names:
|
||||
# descriptor overwriting an enum?
|
||||
raise TypeError('Attempted to reuse key: %r' % key)
|
||||
elif not _is_descriptor(value):
|
||||
if key in self:
|
||||
# enum overwriting a descriptor?
|
||||
raise TypeError('Key already defined as: %r' % self[key])
|
||||
self._member_names.append(key)
|
||||
super(_EnumDict, self).__setitem__(key, value)
|
||||
|
||||
|
||||
# Dummy value for Enum as EnumMeta explicity checks for it, but of course until
|
||||
# EnumMeta finishes running the first time the Enum class doesn't exist. This
|
||||
# is also why there are checks in EnumMeta like `if Enum is not None`
|
||||
Enum = None
|
||||
|
||||
|
||||
class EnumMeta(type):
|
||||
"""Metaclass for Enum"""
|
||||
@classmethod
|
||||
def __prepare__(metacls, cls, bases):
|
||||
return _EnumDict()
|
||||
|
||||
def __new__(metacls, cls, bases, classdict):
|
||||
# an Enum class is final once enumeration items have been defined; it
|
||||
# cannot be mixed with other types (int, float, etc.) if it has an
|
||||
# inherited __new__ unless a new __new__ is defined (or the resulting
|
||||
# class will fail).
|
||||
if type(classdict) is dict:
|
||||
original_dict = classdict
|
||||
classdict = _EnumDict()
|
||||
for k, v in original_dict.items():
|
||||
classdict[k] = v
|
||||
|
||||
member_type, first_enum = metacls._get_mixins_(bases)
|
||||
__new__, save_new, use_args = metacls._find_new_(classdict, member_type,
|
||||
first_enum)
|
||||
# save enum items into separate mapping so they don't get baked into
|
||||
# the new class
|
||||
members = dict((k, classdict[k]) for k in classdict._member_names)
|
||||
for name in classdict._member_names:
|
||||
del classdict[name]
|
||||
|
||||
# py2 support for definition order
|
||||
_order_ = classdict.get('_order_')
|
||||
if _order_ is None:
|
||||
if pyver < 3.0:
|
||||
try:
|
||||
_order_ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])]
|
||||
except TypeError:
|
||||
_order_ = [name for name in sorted(members.keys())]
|
||||
else:
|
||||
_order_ = classdict._member_names
|
||||
else:
|
||||
del classdict['_order_']
|
||||
if pyver < 3.0:
|
||||
_order_ = _order_.replace(',', ' ').split()
|
||||
aliases = [name for name in members if name not in _order_]
|
||||
_order_ += aliases
|
||||
|
||||
# check for illegal enum names (any others?)
|
||||
invalid_names = set(members) & set(['mro'])
|
||||
if invalid_names:
|
||||
raise ValueError('Invalid enum member name(s): %s' % (
|
||||
', '.join(invalid_names), ))
|
||||
|
||||
# save attributes from super classes so we know if we can take
|
||||
# the shortcut of storing members in the class dict
|
||||
base_attributes = set([a for b in bases for a in b.__dict__])
|
||||
# create our new Enum type
|
||||
enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict)
|
||||
enum_class._member_names_ = [] # names in random order
|
||||
if OrderedDict is not None:
|
||||
enum_class._member_map_ = OrderedDict()
|
||||
else:
|
||||
enum_class._member_map_ = {} # name->value map
|
||||
enum_class._member_type_ = member_type
|
||||
|
||||
# Reverse value->name map for hashable values.
|
||||
enum_class._value2member_map_ = {}
|
||||
|
||||
# instantiate them, checking for duplicates as we go
|
||||
# we instantiate first instead of checking for duplicates first in case
|
||||
# a custom __new__ is doing something funky with the values -- such as
|
||||
# auto-numbering ;)
|
||||
if __new__ is None:
|
||||
__new__ = enum_class.__new__
|
||||
for member_name in _order_:
|
||||
value = members[member_name]
|
||||
if not isinstance(value, tuple):
|
||||
args = (value, )
|
||||
else:
|
||||
args = value
|
||||
if member_type is tuple: # special case for tuple enums
|
||||
args = (args, ) # wrap it one more time
|
||||
if not use_args or not args:
|
||||
enum_member = __new__(enum_class)
|
||||
if not hasattr(enum_member, '_value_'):
|
||||
enum_member._value_ = value
|
||||
else:
|
||||
enum_member = __new__(enum_class, *args)
|
||||
if not hasattr(enum_member, '_value_'):
|
||||
enum_member._value_ = member_type(*args)
|
||||
value = enum_member._value_
|
||||
enum_member._name_ = member_name
|
||||
enum_member.__objclass__ = enum_class
|
||||
enum_member.__init__(*args)
|
||||
# If another member with the same value was already defined, the
|
||||
# new member becomes an alias to the existing one.
|
||||
for name, canonical_member in enum_class._member_map_.items():
|
||||
if canonical_member.value == enum_member._value_:
|
||||
enum_member = canonical_member
|
||||
break
|
||||
else:
|
||||
# Aliases don't appear in member names (only in __members__).
|
||||
enum_class._member_names_.append(member_name)
|
||||
# performance boost for any member that would not shadow
|
||||
# a DynamicClassAttribute (aka _RouteClassAttributeToGetattr)
|
||||
if member_name not in base_attributes:
|
||||
setattr(enum_class, member_name, enum_member)
|
||||
# now add to _member_map_
|
||||
enum_class._member_map_[member_name] = enum_member
|
||||
try:
|
||||
# This may fail if value is not hashable. We can't add the value
|
||||
# to the map, and by-value lookups for this value will be
|
||||
# linear.
|
||||
enum_class._value2member_map_[value] = enum_member
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
|
||||
# If a custom type is mixed into the Enum, and it does not know how
|
||||
# to pickle itself, pickle.dumps will succeed but pickle.loads will
|
||||
# fail. Rather than have the error show up later and possibly far
|
||||
# from the source, sabotage the pickle protocol for this class so
|
||||
# that pickle.dumps also fails.
|
||||
#
|
||||
# However, if the new class implements its own __reduce_ex__, do not
|
||||
# sabotage -- it's on them to make sure it works correctly. We use
|
||||
# __reduce_ex__ instead of any of the others as it is preferred by
|
||||
# pickle over __reduce__, and it handles all pickle protocols.
|
||||
unpicklable = False
|
||||
if '__reduce_ex__' not in classdict:
|
||||
if member_type is not object:
|
||||
methods = ('__getnewargs_ex__', '__getnewargs__',
|
||||
'__reduce_ex__', '__reduce__')
|
||||
if not any(m in member_type.__dict__ for m in methods):
|
||||
_make_class_unpicklable(enum_class)
|
||||
unpicklable = True
|
||||
|
||||
|
||||
# double check that repr and friends are not the mixin's or various
|
||||
# things break (such as pickle)
|
||||
for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'):
|
||||
class_method = getattr(enum_class, name)
|
||||
obj_method = getattr(member_type, name, None)
|
||||
enum_method = getattr(first_enum, name, None)
|
||||
if name not in classdict and class_method is not enum_method:
|
||||
if name == '__reduce_ex__' and unpicklable:
|
||||
continue
|
||||
setattr(enum_class, name, enum_method)
|
||||
|
||||
# method resolution and int's are not playing nice
|
||||
# Python's less than 2.6 use __cmp__
|
||||
|
||||
if pyver < 2.6:
|
||||
|
||||
if issubclass(enum_class, int):
|
||||
setattr(enum_class, '__cmp__', getattr(int, '__cmp__'))
|
||||
|
||||
elif pyver < 3.0:
|
||||
|
||||
if issubclass(enum_class, int):
|
||||
for method in (
|
||||
'__le__',
|
||||
'__lt__',
|
||||
'__gt__',
|
||||
'__ge__',
|
||||
'__eq__',
|
||||
'__ne__',
|
||||
'__hash__',
|
||||
):
|
||||
setattr(enum_class, method, getattr(int, method))
|
||||
|
||||
# replace any other __new__ with our own (as long as Enum is not None,
|
||||
# anyway) -- again, this is to support pickle
|
||||
if Enum is not None:
|
||||
# if the user defined their own __new__, save it before it gets
|
||||
# clobbered in case they subclass later
|
||||
if save_new:
|
||||
setattr(enum_class, '__member_new__', enum_class.__dict__['__new__'])
|
||||
setattr(enum_class, '__new__', Enum.__dict__['__new__'])
|
||||
return enum_class
|
||||
|
||||
def __bool__(cls):
|
||||
"""
|
||||
classes/types should always be True.
|
||||
"""
|
||||
return True
|
||||
|
||||
def __call__(cls, value, names=None, module=None, type=None, start=1):
|
||||
"""Either returns an existing member, or creates a new enum class.
|
||||
|
||||
This method is used both when an enum class is given a value to match
|
||||
to an enumeration member (i.e. Color(3)) and for the functional API
|
||||
(i.e. Color = Enum('Color', names='red green blue')).
|
||||
|
||||
When used for the functional API: `module`, if set, will be stored in
|
||||
the new class' __module__ attribute; `type`, if set, will be mixed in
|
||||
as the first base class.
|
||||
|
||||
Note: if `module` is not set this routine will attempt to discover the
|
||||
calling module by walking the frame stack; if this is unsuccessful
|
||||
the resulting class will not be pickleable.
|
||||
|
||||
"""
|
||||
if names is None: # simple value lookup
|
||||
return cls.__new__(cls, value)
|
||||
# otherwise, functional API: we're creating a new Enum type
|
||||
return cls._create_(value, names, module=module, type=type, start=start)
|
||||
|
||||
def __contains__(cls, member):
|
||||
return isinstance(member, cls) and member.name in cls._member_map_
|
||||
|
||||
def __delattr__(cls, attr):
|
||||
# nicer error message when someone tries to delete an attribute
|
||||
# (see issue19025).
|
||||
if attr in cls._member_map_:
|
||||
raise AttributeError(
|
||||
"%s: cannot delete Enum member." % cls.__name__)
|
||||
super(EnumMeta, cls).__delattr__(attr)
|
||||
|
||||
def __dir__(self):
|
||||
return (['__class__', '__doc__', '__members__', '__module__'] +
|
||||
self._member_names_)
|
||||
|
||||
@property
|
||||
def __members__(cls):
|
||||
"""Returns a mapping of member name->value.
|
||||
|
||||
This mapping lists all enum members, including aliases. Note that this
|
||||
is a copy of the internal mapping.
|
||||
|
||||
"""
|
||||
return cls._member_map_.copy()
|
||||
|
||||
def __getattr__(cls, name):
|
||||
"""Return the enum member matching `name`
|
||||
|
||||
We use __getattr__ instead of descriptors or inserting into the enum
|
||||
class' __dict__ in order to support `name` and `value` being both
|
||||
properties for enum members (which live in the class' __dict__) and
|
||||
enum members themselves.
|
||||
|
||||
"""
|
||||
if _is_dunder(name):
|
||||
raise AttributeError(name)
|
||||
try:
|
||||
return cls._member_map_[name]
|
||||
except KeyError:
|
||||
raise AttributeError(name)
|
||||
|
||||
def __getitem__(cls, name):
|
||||
return cls._member_map_[name]
|
||||
|
||||
def __iter__(cls):
|
||||
return (cls._member_map_[name] for name in cls._member_names_)
|
||||
|
||||
def __reversed__(cls):
|
||||
return (cls._member_map_[name] for name in reversed(cls._member_names_))
|
||||
|
||||
def __len__(cls):
|
||||
return len(cls._member_names_)
|
||||
|
||||
__nonzero__ = __bool__
|
||||
|
||||
def __repr__(cls):
|
||||
return "<enum %r>" % cls.__name__
|
||||
|
||||
def __setattr__(cls, name, value):
|
||||
"""Block attempts to reassign Enum members.
|
||||
|
||||
A simple assignment to the class namespace only changes one of the
|
||||
several possible ways to get an Enum member from the Enum class,
|
||||
resulting in an inconsistent Enumeration.
|
||||
|
||||
"""
|
||||
member_map = cls.__dict__.get('_member_map_', {})
|
||||
if name in member_map:
|
||||
raise AttributeError('Cannot reassign members.')
|
||||
super(EnumMeta, cls).__setattr__(name, value)
|
||||
|
||||
def _create_(cls, class_name, names=None, module=None, type=None, start=1):
|
||||
"""Convenience method to create a new Enum class.
|
||||
|
||||
`names` can be:
|
||||
|
||||
* A string containing member names, separated either with spaces or
|
||||
commas. Values are auto-numbered from 1.
|
||||
* An iterable of member names. Values are auto-numbered from 1.
|
||||
* An iterable of (member name, value) pairs.
|
||||
* A mapping of member name -> value.
|
||||
|
||||
"""
|
||||
if pyver < 3.0:
|
||||
# if class_name is unicode, attempt a conversion to ASCII
|
||||
if isinstance(class_name, unicode):
|
||||
try:
|
||||
class_name = class_name.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
raise TypeError('%r is not representable in ASCII' % class_name)
|
||||
metacls = cls.__class__
|
||||
if type is None:
|
||||
bases = (cls, )
|
||||
else:
|
||||
bases = (type, cls)
|
||||
classdict = metacls.__prepare__(class_name, bases)
|
||||
_order_ = []
|
||||
|
||||
# special processing needed for names?
|
||||
if isinstance(names, basestring):
|
||||
names = names.replace(',', ' ').split()
|
||||
if isinstance(names, (tuple, list)) and isinstance(names[0], basestring):
|
||||
names = [(e, i+start) for (i, e) in enumerate(names)]
|
||||
|
||||
# Here, names is either an iterable of (name, value) or a mapping.
|
||||
item = None # in case names is empty
|
||||
for item in names:
|
||||
if isinstance(item, basestring):
|
||||
member_name, member_value = item, names[item]
|
||||
else:
|
||||
member_name, member_value = item
|
||||
classdict[member_name] = member_value
|
||||
_order_.append(member_name)
|
||||
# only set _order_ in classdict if name/value was not from a mapping
|
||||
if not isinstance(item, basestring):
|
||||
classdict['_order_'] = ' '.join(_order_)
|
||||
enum_class = metacls.__new__(metacls, class_name, bases, classdict)
|
||||
|
||||
# TODO: replace the frame hack if a blessed way to know the calling
|
||||
# module is ever developed
|
||||
if module is None:
|
||||
try:
|
||||
module = _sys._getframe(2).f_globals['__name__']
|
||||
except (AttributeError, ValueError):
|
||||
pass
|
||||
if module is None:
|
||||
_make_class_unpicklable(enum_class)
|
||||
else:
|
||||
enum_class.__module__ = module
|
||||
|
||||
return enum_class
|
||||
|
||||
@staticmethod
|
||||
def _get_mixins_(bases):
|
||||
"""Returns the type for creating enum members, and the first inherited
|
||||
enum class.
|
||||
|
||||
bases: the tuple of bases that was given to __new__
|
||||
|
||||
"""
|
||||
if not bases or Enum is None:
|
||||
return object, Enum
|
||||
|
||||
|
||||
# double check that we are not subclassing a class with existing
|
||||
# enumeration members; while we're at it, see if any other data
|
||||
# type has been mixed in so we can use the correct __new__
|
||||
member_type = first_enum = None
|
||||
for base in bases:
|
||||
if (base is not Enum and
|
||||
issubclass(base, Enum) and
|
||||
base._member_names_):
|
||||
raise TypeError("Cannot extend enumerations")
|
||||
# base is now the last base in bases
|
||||
if not issubclass(base, Enum):
|
||||
raise TypeError("new enumerations must be created as "
|
||||
"`ClassName([mixin_type,] enum_type)`")
|
||||
|
||||
# get correct mix-in type (either mix-in type of Enum subclass, or
|
||||
# first base if last base is Enum)
|
||||
if not issubclass(bases[0], Enum):
|
||||
member_type = bases[0] # first data type
|
||||
first_enum = bases[-1] # enum type
|
||||
else:
|
||||
for base in bases[0].__mro__:
|
||||
# most common: (IntEnum, int, Enum, object)
|
||||
# possible: (<Enum 'AutoIntEnum'>, <Enum 'IntEnum'>,
|
||||
# <class 'int'>, <Enum 'Enum'>,
|
||||
# <class 'object'>)
|
||||
if issubclass(base, Enum):
|
||||
if first_enum is None:
|
||||
first_enum = base
|
||||
else:
|
||||
if member_type is None:
|
||||
member_type = base
|
||||
|
||||
return member_type, first_enum
|
||||
|
||||
if pyver < 3.0:
|
||||
@staticmethod
|
||||
def _find_new_(classdict, member_type, first_enum):
|
||||
"""Returns the __new__ to be used for creating the enum members.
|
||||
|
||||
classdict: the class dictionary given to __new__
|
||||
member_type: the data type whose __new__ will be used by default
|
||||
first_enum: enumeration to check for an overriding __new__
|
||||
|
||||
"""
|
||||
# now find the correct __new__, checking to see of one was defined
|
||||
# by the user; also check earlier enum classes in case a __new__ was
|
||||
# saved as __member_new__
|
||||
__new__ = classdict.get('__new__', None)
|
||||
if __new__:
|
||||
return None, True, True # __new__, save_new, use_args
|
||||
|
||||
N__new__ = getattr(None, '__new__')
|
||||
O__new__ = getattr(object, '__new__')
|
||||
if Enum is None:
|
||||
E__new__ = N__new__
|
||||
else:
|
||||
E__new__ = Enum.__dict__['__new__']
|
||||
# check all possibles for __member_new__ before falling back to
|
||||
# __new__
|
||||
for method in ('__member_new__', '__new__'):
|
||||
for possible in (member_type, first_enum):
|
||||
try:
|
||||
target = possible.__dict__[method]
|
||||
except (AttributeError, KeyError):
|
||||
target = getattr(possible, method, None)
|
||||
if target not in [
|
||||
None,
|
||||
N__new__,
|
||||
O__new__,
|
||||
E__new__,
|
||||
]:
|
||||
if method == '__member_new__':
|
||||
classdict['__new__'] = target
|
||||
return None, False, True
|
||||
if isinstance(target, staticmethod):
|
||||
target = target.__get__(member_type)
|
||||
__new__ = target
|
||||
break
|
||||
if __new__ is not None:
|
||||
break
|
||||
else:
|
||||
__new__ = object.__new__
|
||||
|
||||
# if a non-object.__new__ is used then whatever value/tuple was
|
||||
# assigned to the enum member name will be passed to __new__ and to the
|
||||
# new enum member's __init__
|
||||
if __new__ is object.__new__:
|
||||
use_args = False
|
||||
else:
|
||||
use_args = True
|
||||
|
||||
return __new__, False, use_args
|
||||
else:
|
||||
@staticmethod
|
||||
def _find_new_(classdict, member_type, first_enum):
|
||||
"""Returns the __new__ to be used for creating the enum members.
|
||||
|
||||
classdict: the class dictionary given to __new__
|
||||
member_type: the data type whose __new__ will be used by default
|
||||
first_enum: enumeration to check for an overriding __new__
|
||||
|
||||
"""
|
||||
# now find the correct __new__, checking to see of one was defined
|
||||
# by the user; also check earlier enum classes in case a __new__ was
|
||||
# saved as __member_new__
|
||||
__new__ = classdict.get('__new__', None)
|
||||
|
||||
# should __new__ be saved as __member_new__ later?
|
||||
save_new = __new__ is not None
|
||||
|
||||
if __new__ is None:
|
||||
# check all possibles for __member_new__ before falling back to
|
||||
# __new__
|
||||
for method in ('__member_new__', '__new__'):
|
||||
for possible in (member_type, first_enum):
|
||||
target = getattr(possible, method, None)
|
||||
if target not in (
|
||||
None,
|
||||
None.__new__,
|
||||
object.__new__,
|
||||
Enum.__new__,
|
||||
):
|
||||
__new__ = target
|
||||
break
|
||||
if __new__ is not None:
|
||||
break
|
||||
else:
|
||||
__new__ = object.__new__
|
||||
|
||||
# if a non-object.__new__ is used then whatever value/tuple was
|
||||
# assigned to the enum member name will be passed to __new__ and to the
|
||||
# new enum member's __init__
|
||||
if __new__ is object.__new__:
|
||||
use_args = False
|
||||
else:
|
||||
use_args = True
|
||||
|
||||
return __new__, save_new, use_args
|
||||
|
||||
|
||||
########################################################
|
||||
# In order to support Python 2 and 3 with a single
|
||||
# codebase we have to create the Enum methods separately
|
||||
# and then use the `type(name, bases, dict)` method to
|
||||
# create the class.
|
||||
########################################################
|
||||
temp_enum_dict = {}
|
||||
temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n"
|
||||
|
||||
def __new__(cls, value):
|
||||
# all enum instances are actually created during class construction
|
||||
# without calling this method; this method is called by the metaclass'
|
||||
# __call__ (i.e. Color(3) ), and by pickle
|
||||
if type(value) is cls:
|
||||
# For lookups like Color(Color.red)
|
||||
value = value.value
|
||||
#return value
|
||||
# by-value search for a matching enum member
|
||||
# see if it's in the reverse mapping (for hashable values)
|
||||
try:
|
||||
if value in cls._value2member_map_:
|
||||
return cls._value2member_map_[value]
|
||||
except TypeError:
|
||||
# not there, now do long search -- O(n) behavior
|
||||
for member in cls._member_map_.values():
|
||||
if member.value == value:
|
||||
return member
|
||||
raise ValueError("%s is not a valid %s" % (value, cls.__name__))
|
||||
temp_enum_dict['__new__'] = __new__
|
||||
del __new__
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s.%s: %r>" % (
|
||||
self.__class__.__name__, self._name_, self._value_)
|
||||
temp_enum_dict['__repr__'] = __repr__
|
||||
del __repr__
|
||||
|
||||
def __str__(self):
|
||||
return "%s.%s" % (self.__class__.__name__, self._name_)
|
||||
temp_enum_dict['__str__'] = __str__
|
||||
del __str__
|
||||
|
||||
if pyver >= 3.0:
|
||||
def __dir__(self):
|
||||
added_behavior = [
|
||||
m
|
||||
for cls in self.__class__.mro()
|
||||
for m in cls.__dict__
|
||||
if m[0] != '_' and m not in self._member_map_
|
||||
]
|
||||
return (['__class__', '__doc__', '__module__', ] + added_behavior)
|
||||
temp_enum_dict['__dir__'] = __dir__
|
||||
del __dir__
|
||||
|
||||
def __format__(self, format_spec):
|
||||
# mixed-in Enums should use the mixed-in type's __format__, otherwise
|
||||
# we can get strange results with the Enum name showing up instead of
|
||||
# the value
|
||||
|
||||
# pure Enum branch
|
||||
if self._member_type_ is object:
|
||||
cls = str
|
||||
val = str(self)
|
||||
# mix-in branch
|
||||
else:
|
||||
cls = self._member_type_
|
||||
val = self.value
|
||||
return cls.__format__(val, format_spec)
|
||||
temp_enum_dict['__format__'] = __format__
|
||||
del __format__
|
||||
|
||||
|
||||
####################################
|
||||
# Python's less than 2.6 use __cmp__
|
||||
|
||||
if pyver < 2.6:
|
||||
|
||||
def __cmp__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
if self is other:
|
||||
return 0
|
||||
return -1
|
||||
return NotImplemented
|
||||
raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__cmp__'] = __cmp__
|
||||
del __cmp__
|
||||
|
||||
else:
|
||||
|
||||
def __le__(self, other):
|
||||
raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__le__'] = __le__
|
||||
del __le__
|
||||
|
||||
def __lt__(self, other):
|
||||
raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__lt__'] = __lt__
|
||||
del __lt__
|
||||
|
||||
def __ge__(self, other):
|
||||
raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__ge__'] = __ge__
|
||||
del __ge__
|
||||
|
||||
def __gt__(self, other):
|
||||
raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__gt__'] = __gt__
|
||||
del __gt__
|
||||
|
||||
|
||||
def __eq__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
return self is other
|
||||
return NotImplemented
|
||||
temp_enum_dict['__eq__'] = __eq__
|
||||
del __eq__
|
||||
|
||||
def __ne__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
return self is not other
|
||||
return NotImplemented
|
||||
temp_enum_dict['__ne__'] = __ne__
|
||||
del __ne__
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self._name_)
|
||||
temp_enum_dict['__hash__'] = __hash__
|
||||
del __hash__
|
||||
|
||||
def __reduce_ex__(self, proto):
|
||||
return self.__class__, (self._value_, )
|
||||
temp_enum_dict['__reduce_ex__'] = __reduce_ex__
|
||||
del __reduce_ex__
|
||||
|
||||
# _RouteClassAttributeToGetattr is used to provide access to the `name`
|
||||
# and `value` properties of enum members while keeping some measure of
|
||||
# protection from modification, while still allowing for an enumeration
|
||||
# to have members named `name` and `value`. This works because enumeration
|
||||
# members are not set directly on the enum class -- __getattr__ is
|
||||
# used to look them up.
|
||||
|
||||
@_RouteClassAttributeToGetattr
|
||||
def name(self):
|
||||
return self._name_
|
||||
temp_enum_dict['name'] = name
|
||||
del name
|
||||
|
||||
@_RouteClassAttributeToGetattr
|
||||
def value(self):
|
||||
return self._value_
|
||||
temp_enum_dict['value'] = value
|
||||
del value
|
||||
|
||||
@classmethod
|
||||
def _convert(cls, name, module, filter, source=None):
|
||||
"""
|
||||
Create a new Enum subclass that replaces a collection of global constants
|
||||
"""
|
||||
# convert all constants from source (or module) that pass filter() to
|
||||
# a new Enum called name, and export the enum and its members back to
|
||||
# module;
|
||||
# also, replace the __reduce_ex__ method so unpickling works in
|
||||
# previous Python versions
|
||||
module_globals = vars(_sys.modules[module])
|
||||
if source:
|
||||
source = vars(source)
|
||||
else:
|
||||
source = module_globals
|
||||
members = dict((name, value) for name, value in source.items() if filter(name))
|
||||
cls = cls(name, members, module=module)
|
||||
cls.__reduce_ex__ = _reduce_ex_by_name
|
||||
module_globals.update(cls.__members__)
|
||||
module_globals[name] = cls
|
||||
return cls
|
||||
temp_enum_dict['_convert'] = _convert
|
||||
del _convert
|
||||
|
||||
Enum = EnumMeta('Enum', (object, ), temp_enum_dict)
|
||||
del temp_enum_dict
|
||||
|
||||
# Enum has now been created
|
||||
###########################
|
||||
|
||||
class IntEnum(int, Enum):
|
||||
"""Enum where members are also (and must be) ints"""
|
||||
|
||||
def _reduce_ex_by_name(self, proto):
|
||||
return self.name
|
||||
|
||||
def unique(enumeration):
|
||||
"""Class decorator that ensures only unique members exist in an enumeration."""
|
||||
duplicates = []
|
||||
for name, member in enumeration.__members__.items():
|
||||
if name != member.name:
|
||||
duplicates.append((name, member.name))
|
||||
if duplicates:
|
||||
duplicate_names = ', '.join(
|
||||
["%s -> %s" % (alias, name) for (alias, name) in duplicates]
|
||||
)
|
||||
raise ValueError('duplicate names found in %r: %s' %
|
||||
(enumeration, duplicate_names)
|
||||
)
|
||||
return enumeration
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,735 @@
|
||||
``enum`` --- support for enumerations
|
||||
========================================
|
||||
|
||||
.. :synopsis: enumerations are sets of symbolic names bound to unique, constant
|
||||
values.
|
||||
.. :moduleauthor:: Ethan Furman <ethan@stoneleaf.us>
|
||||
.. :sectionauthor:: Barry Warsaw <barry@python.org>,
|
||||
.. :sectionauthor:: Eli Bendersky <eliben@gmail.com>,
|
||||
.. :sectionauthor:: Ethan Furman <ethan@stoneleaf.us>
|
||||
|
||||
----------------
|
||||
|
||||
An enumeration is a set of symbolic names (members) bound to unique, constant
|
||||
values. Within an enumeration, the members can be compared by identity, and
|
||||
the enumeration itself can be iterated over.
|
||||
|
||||
|
||||
Module Contents
|
||||
---------------
|
||||
|
||||
This module defines two enumeration classes that can be used to define unique
|
||||
sets of names and values: ``Enum`` and ``IntEnum``. It also defines
|
||||
one decorator, ``unique``.
|
||||
|
||||
``Enum``
|
||||
|
||||
Base class for creating enumerated constants. See section `Functional API`_
|
||||
for an alternate construction syntax.
|
||||
|
||||
``IntEnum``
|
||||
|
||||
Base class for creating enumerated constants that are also subclasses of ``int``.
|
||||
|
||||
``unique``
|
||||
|
||||
Enum class decorator that ensures only one name is bound to any one value.
|
||||
|
||||
|
||||
Creating an Enum
|
||||
----------------
|
||||
|
||||
Enumerations are created using the ``class`` syntax, which makes them
|
||||
easy to read and write. An alternative creation method is described in
|
||||
`Functional API`_. To define an enumeration, subclass ``Enum`` as
|
||||
follows::
|
||||
|
||||
>>> from enum import Enum
|
||||
>>> class Color(Enum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
... blue = 3
|
||||
|
||||
Note: Nomenclature
|
||||
|
||||
- The class ``Color`` is an *enumeration* (or *enum*)
|
||||
- The attributes ``Color.red``, ``Color.green``, etc., are
|
||||
*enumeration members* (or *enum members*).
|
||||
- The enum members have *names* and *values* (the name of
|
||||
``Color.red`` is ``red``, the value of ``Color.blue`` is
|
||||
``3``, etc.)
|
||||
|
||||
Note:
|
||||
|
||||
Even though we use the ``class`` syntax to create Enums, Enums
|
||||
are not normal Python classes. See `How are Enums different?`_ for
|
||||
more details.
|
||||
|
||||
Enumeration members have human readable string representations::
|
||||
|
||||
>>> print(Color.red)
|
||||
Color.red
|
||||
|
||||
...while their ``repr`` has more information::
|
||||
|
||||
>>> print(repr(Color.red))
|
||||
<Color.red: 1>
|
||||
|
||||
The *type* of an enumeration member is the enumeration it belongs to::
|
||||
|
||||
>>> type(Color.red)
|
||||
<enum 'Color'>
|
||||
>>> isinstance(Color.green, Color)
|
||||
True
|
||||
>>>
|
||||
|
||||
Enum members also have a property that contains just their item name::
|
||||
|
||||
>>> print(Color.red.name)
|
||||
red
|
||||
|
||||
Enumerations support iteration. In Python 3.x definition order is used; in
|
||||
Python 2.x the definition order is not available, but class attribute
|
||||
``__order__`` is supported; otherwise, value order is used::
|
||||
|
||||
>>> class Shake(Enum):
|
||||
... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x
|
||||
... vanilla = 7
|
||||
... chocolate = 4
|
||||
... cookies = 9
|
||||
... mint = 3
|
||||
...
|
||||
>>> for shake in Shake:
|
||||
... print(shake)
|
||||
...
|
||||
Shake.vanilla
|
||||
Shake.chocolate
|
||||
Shake.cookies
|
||||
Shake.mint
|
||||
|
||||
The ``__order__`` attribute is always removed, and in 3.x it is also ignored
|
||||
(order is definition order); however, in the stdlib version it will be ignored
|
||||
but not removed.
|
||||
|
||||
Enumeration members are hashable, so they can be used in dictionaries and sets::
|
||||
|
||||
>>> apples = {}
|
||||
>>> apples[Color.red] = 'red delicious'
|
||||
>>> apples[Color.green] = 'granny smith'
|
||||
>>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'}
|
||||
True
|
||||
|
||||
|
||||
Programmatic access to enumeration members and their attributes
|
||||
---------------------------------------------------------------
|
||||
|
||||
Sometimes it's useful to access members in enumerations programmatically (i.e.
|
||||
situations where ``Color.red`` won't do because the exact color is not known
|
||||
at program-writing time). ``Enum`` allows such access::
|
||||
|
||||
>>> Color(1)
|
||||
<Color.red: 1>
|
||||
>>> Color(3)
|
||||
<Color.blue: 3>
|
||||
|
||||
If you want to access enum members by *name*, use item access::
|
||||
|
||||
>>> Color['red']
|
||||
<Color.red: 1>
|
||||
>>> Color['green']
|
||||
<Color.green: 2>
|
||||
|
||||
If have an enum member and need its ``name`` or ``value``::
|
||||
|
||||
>>> member = Color.red
|
||||
>>> member.name
|
||||
'red'
|
||||
>>> member.value
|
||||
1
|
||||
|
||||
|
||||
Duplicating enum members and values
|
||||
-----------------------------------
|
||||
|
||||
Having two enum members (or any other attribute) with the same name is invalid;
|
||||
in Python 3.x this would raise an error, but in Python 2.x the second member
|
||||
simply overwrites the first::
|
||||
|
||||
>>> # python 2.x
|
||||
>>> class Shape(Enum):
|
||||
... square = 2
|
||||
... square = 3
|
||||
...
|
||||
>>> Shape.square
|
||||
<Shape.square: 3>
|
||||
|
||||
>>> # python 3.x
|
||||
>>> class Shape(Enum):
|
||||
... square = 2
|
||||
... square = 3
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: Attempted to reuse key: 'square'
|
||||
|
||||
However, two enum members are allowed to have the same value. Given two members
|
||||
A and B with the same value (and A defined first), B is an alias to A. By-value
|
||||
lookup of the value of A and B will return A. By-name lookup of B will also
|
||||
return A::
|
||||
|
||||
>>> class Shape(Enum):
|
||||
... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x
|
||||
... square = 2
|
||||
... diamond = 1
|
||||
... circle = 3
|
||||
... alias_for_square = 2
|
||||
...
|
||||
>>> Shape.square
|
||||
<Shape.square: 2>
|
||||
>>> Shape.alias_for_square
|
||||
<Shape.square: 2>
|
||||
>>> Shape(2)
|
||||
<Shape.square: 2>
|
||||
|
||||
|
||||
Allowing aliases is not always desirable. ``unique`` can be used to ensure
|
||||
that none exist in a particular enumeration::
|
||||
|
||||
>>> from enum import unique
|
||||
>>> @unique
|
||||
... class Mistake(Enum):
|
||||
... __order__ = 'one two three four' # only needed in 2.x
|
||||
... one = 1
|
||||
... two = 2
|
||||
... three = 3
|
||||
... four = 3
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: duplicate names found in <enum 'Mistake'>: four -> three
|
||||
|
||||
Iterating over the members of an enum does not provide the aliases::
|
||||
|
||||
>>> list(Shape)
|
||||
[<Shape.square: 2>, <Shape.diamond: 1>, <Shape.circle: 3>]
|
||||
|
||||
The special attribute ``__members__`` is a dictionary mapping names to members.
|
||||
It includes all names defined in the enumeration, including the aliases::
|
||||
|
||||
>>> for name, member in sorted(Shape.__members__.items()):
|
||||
... name, member
|
||||
...
|
||||
('alias_for_square', <Shape.square: 2>)
|
||||
('circle', <Shape.circle: 3>)
|
||||
('diamond', <Shape.diamond: 1>)
|
||||
('square', <Shape.square: 2>)
|
||||
|
||||
The ``__members__`` attribute can be used for detailed programmatic access to
|
||||
the enumeration members. For example, finding all the aliases::
|
||||
|
||||
>>> [name for name, member in Shape.__members__.items() if member.name != name]
|
||||
['alias_for_square']
|
||||
|
||||
Comparisons
|
||||
-----------
|
||||
|
||||
Enumeration members are compared by identity::
|
||||
|
||||
>>> Color.red is Color.red
|
||||
True
|
||||
>>> Color.red is Color.blue
|
||||
False
|
||||
>>> Color.red is not Color.blue
|
||||
True
|
||||
|
||||
Ordered comparisons between enumeration values are *not* supported. Enum
|
||||
members are not integers (but see `IntEnum`_ below)::
|
||||
|
||||
>>> Color.red < Color.blue
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
TypeError: unorderable types: Color() < Color()
|
||||
|
||||
.. warning::
|
||||
|
||||
In Python 2 *everything* is ordered, even though the ordering may not
|
||||
make sense. If you want your enumerations to have a sensible ordering
|
||||
check out the `OrderedEnum`_ recipe below.
|
||||
|
||||
|
||||
Equality comparisons are defined though::
|
||||
|
||||
>>> Color.blue == Color.red
|
||||
False
|
||||
>>> Color.blue != Color.red
|
||||
True
|
||||
>>> Color.blue == Color.blue
|
||||
True
|
||||
|
||||
Comparisons against non-enumeration values will always compare not equal
|
||||
(again, ``IntEnum`` was explicitly designed to behave differently, see
|
||||
below)::
|
||||
|
||||
>>> Color.blue == 2
|
||||
False
|
||||
|
||||
|
||||
Allowed members and attributes of enumerations
|
||||
----------------------------------------------
|
||||
|
||||
The examples above use integers for enumeration values. Using integers is
|
||||
short and handy (and provided by default by the `Functional API`_), but not
|
||||
strictly enforced. In the vast majority of use-cases, one doesn't care what
|
||||
the actual value of an enumeration is. But if the value *is* important,
|
||||
enumerations can have arbitrary values.
|
||||
|
||||
Enumerations are Python classes, and can have methods and special methods as
|
||||
usual. If we have this enumeration::
|
||||
|
||||
>>> class Mood(Enum):
|
||||
... funky = 1
|
||||
... happy = 3
|
||||
...
|
||||
... def describe(self):
|
||||
... # self is the member here
|
||||
... return self.name, self.value
|
||||
...
|
||||
... def __str__(self):
|
||||
... return 'my custom str! {0}'.format(self.value)
|
||||
...
|
||||
... @classmethod
|
||||
... def favorite_mood(cls):
|
||||
... # cls here is the enumeration
|
||||
... return cls.happy
|
||||
|
||||
Then::
|
||||
|
||||
>>> Mood.favorite_mood()
|
||||
<Mood.happy: 3>
|
||||
>>> Mood.happy.describe()
|
||||
('happy', 3)
|
||||
>>> str(Mood.funky)
|
||||
'my custom str! 1'
|
||||
|
||||
The rules for what is allowed are as follows: _sunder_ names (starting and
|
||||
ending with a single underscore) are reserved by enum and cannot be used;
|
||||
all other attributes defined within an enumeration will become members of this
|
||||
enumeration, with the exception of *__dunder__* names and descriptors (methods
|
||||
are also descriptors).
|
||||
|
||||
Note:
|
||||
|
||||
If your enumeration defines ``__new__`` and/or ``__init__`` then
|
||||
whatever value(s) were given to the enum member will be passed into
|
||||
those methods. See `Planet`_ for an example.
|
||||
|
||||
|
||||
Restricted subclassing of enumerations
|
||||
--------------------------------------
|
||||
|
||||
Subclassing an enumeration is allowed only if the enumeration does not define
|
||||
any members. So this is forbidden::
|
||||
|
||||
>>> class MoreColor(Color):
|
||||
... pink = 17
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: Cannot extend enumerations
|
||||
|
||||
But this is allowed::
|
||||
|
||||
>>> class Foo(Enum):
|
||||
... def some_behavior(self):
|
||||
... pass
|
||||
...
|
||||
>>> class Bar(Foo):
|
||||
... happy = 1
|
||||
... sad = 2
|
||||
...
|
||||
|
||||
Allowing subclassing of enums that define members would lead to a violation of
|
||||
some important invariants of types and instances. On the other hand, it makes
|
||||
sense to allow sharing some common behavior between a group of enumerations.
|
||||
(See `OrderedEnum`_ for an example.)
|
||||
|
||||
|
||||
Pickling
|
||||
--------
|
||||
|
||||
Enumerations can be pickled and unpickled::
|
||||
|
||||
>>> from enum.test_enum import Fruit
|
||||
>>> from pickle import dumps, loads
|
||||
>>> Fruit.tomato is loads(dumps(Fruit.tomato, 2))
|
||||
True
|
||||
|
||||
The usual restrictions for pickling apply: picklable enums must be defined in
|
||||
the top level of a module, since unpickling requires them to be importable
|
||||
from that module.
|
||||
|
||||
Note:
|
||||
|
||||
With pickle protocol version 4 (introduced in Python 3.4) it is possible
|
||||
to easily pickle enums nested in other classes.
|
||||
|
||||
|
||||
|
||||
Functional API
|
||||
--------------
|
||||
|
||||
The ``Enum`` class is callable, providing the following functional API::
|
||||
|
||||
>>> Animal = Enum('Animal', 'ant bee cat dog')
|
||||
>>> Animal
|
||||
<enum 'Animal'>
|
||||
>>> Animal.ant
|
||||
<Animal.ant: 1>
|
||||
>>> Animal.ant.value
|
||||
1
|
||||
>>> list(Animal)
|
||||
[<Animal.ant: 1>, <Animal.bee: 2>, <Animal.cat: 3>, <Animal.dog: 4>]
|
||||
|
||||
The semantics of this API resemble ``namedtuple``. The first argument
|
||||
of the call to ``Enum`` is the name of the enumeration.
|
||||
|
||||
The second argument is the *source* of enumeration member names. It can be a
|
||||
whitespace-separated string of names, a sequence of names, a sequence of
|
||||
2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to
|
||||
values. The last two options enable assigning arbitrary values to
|
||||
enumerations; the others auto-assign increasing integers starting with 1. A
|
||||
new class derived from ``Enum`` is returned. In other words, the above
|
||||
assignment to ``Animal`` is equivalent to::
|
||||
|
||||
>>> class Animals(Enum):
|
||||
... ant = 1
|
||||
... bee = 2
|
||||
... cat = 3
|
||||
... dog = 4
|
||||
|
||||
Pickling enums created with the functional API can be tricky as frame stack
|
||||
implementation details are used to try and figure out which module the
|
||||
enumeration is being created in (e.g. it will fail if you use a utility
|
||||
function in separate module, and also may not work on IronPython or Jython).
|
||||
The solution is to specify the module name explicitly as follows::
|
||||
|
||||
>>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__)
|
||||
|
||||
Derived Enumerations
|
||||
--------------------
|
||||
|
||||
IntEnum
|
||||
^^^^^^^
|
||||
|
||||
A variation of ``Enum`` is provided which is also a subclass of
|
||||
``int``. Members of an ``IntEnum`` can be compared to integers;
|
||||
by extension, integer enumerations of different types can also be compared
|
||||
to each other::
|
||||
|
||||
>>> from enum import IntEnum
|
||||
>>> class Shape(IntEnum):
|
||||
... circle = 1
|
||||
... square = 2
|
||||
...
|
||||
>>> class Request(IntEnum):
|
||||
... post = 1
|
||||
... get = 2
|
||||
...
|
||||
>>> Shape == 1
|
||||
False
|
||||
>>> Shape.circle == 1
|
||||
True
|
||||
>>> Shape.circle == Request.post
|
||||
True
|
||||
|
||||
However, they still can't be compared to standard ``Enum`` enumerations::
|
||||
|
||||
>>> class Shape(IntEnum):
|
||||
... circle = 1
|
||||
... square = 2
|
||||
...
|
||||
>>> class Color(Enum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
...
|
||||
>>> Shape.circle == Color.red
|
||||
False
|
||||
|
||||
``IntEnum`` values behave like integers in other ways you'd expect::
|
||||
|
||||
>>> int(Shape.circle)
|
||||
1
|
||||
>>> ['a', 'b', 'c'][Shape.circle]
|
||||
'b'
|
||||
>>> [i for i in range(Shape.square)]
|
||||
[0, 1]
|
||||
|
||||
For the vast majority of code, ``Enum`` is strongly recommended,
|
||||
since ``IntEnum`` breaks some semantic promises of an enumeration (by
|
||||
being comparable to integers, and thus by transitivity to other
|
||||
unrelated enumerations). It should be used only in special cases where
|
||||
there's no other choice; for example, when integer constants are
|
||||
replaced with enumerations and backwards compatibility is required with code
|
||||
that still expects integers.
|
||||
|
||||
|
||||
Others
|
||||
^^^^^^
|
||||
|
||||
While ``IntEnum`` is part of the ``enum`` module, it would be very
|
||||
simple to implement independently::
|
||||
|
||||
class IntEnum(int, Enum):
|
||||
pass
|
||||
|
||||
This demonstrates how similar derived enumerations can be defined; for example
|
||||
a ``StrEnum`` that mixes in ``str`` instead of ``int``.
|
||||
|
||||
Some rules:
|
||||
|
||||
1. When subclassing ``Enum``, mix-in types must appear before
|
||||
``Enum`` itself in the sequence of bases, as in the ``IntEnum``
|
||||
example above.
|
||||
2. While ``Enum`` can have members of any type, once you mix in an
|
||||
additional type, all the members must have values of that type, e.g.
|
||||
``int`` above. This restriction does not apply to mix-ins which only
|
||||
add methods and don't specify another data type such as ``int`` or
|
||||
``str``.
|
||||
3. When another data type is mixed in, the ``value`` attribute is *not the
|
||||
same* as the enum member itself, although it is equivalant and will compare
|
||||
equal.
|
||||
4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and
|
||||
``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for
|
||||
IntEnum) treat the enum member as its mixed-in type.
|
||||
|
||||
Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int``
|
||||
subclasses are printed as strings and not numbers when the ``%d``, ``%i``,
|
||||
or ``%u`` codes are used.
|
||||
5. ``str.__format__`` (or ``format``) will use the mixed-in
|
||||
type's ``__format__``. If the ``Enum``'s ``str`` or
|
||||
``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes.
|
||||
|
||||
|
||||
Decorators
|
||||
----------
|
||||
|
||||
unique
|
||||
^^^^^^
|
||||
|
||||
A ``class`` decorator specifically for enumerations. It searches an
|
||||
enumeration's ``__members__`` gathering any aliases it finds; if any are
|
||||
found ``ValueError`` is raised with the details::
|
||||
|
||||
>>> @unique
|
||||
... class NoDupes(Enum):
|
||||
... first = 'one'
|
||||
... second = 'two'
|
||||
... third = 'two'
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: duplicate names found in <enum 'NoDupes'>: third -> second
|
||||
|
||||
|
||||
Interesting examples
|
||||
--------------------
|
||||
|
||||
While ``Enum`` and ``IntEnum`` are expected to cover the majority of
|
||||
use-cases, they cannot cover them all. Here are recipes for some different
|
||||
types of enumerations that can be used directly, or as examples for creating
|
||||
one's own.
|
||||
|
||||
|
||||
AutoNumber
|
||||
^^^^^^^^^^
|
||||
|
||||
Avoids having to specify the value for each enumeration member::
|
||||
|
||||
>>> class AutoNumber(Enum):
|
||||
... def __new__(cls):
|
||||
... value = len(cls.__members__) + 1
|
||||
... obj = object.__new__(cls)
|
||||
... obj._value_ = value
|
||||
... return obj
|
||||
...
|
||||
>>> class Color(AutoNumber):
|
||||
... __order__ = "red green blue" # only needed in 2.x
|
||||
... red = ()
|
||||
... green = ()
|
||||
... blue = ()
|
||||
...
|
||||
>>> Color.green.value == 2
|
||||
True
|
||||
|
||||
Note:
|
||||
|
||||
The `__new__` method, if defined, is used during creation of the Enum
|
||||
members; it is then replaced by Enum's `__new__` which is used after
|
||||
class creation for lookup of existing members. Due to the way Enums are
|
||||
supposed to behave, there is no way to customize Enum's `__new__`.
|
||||
|
||||
|
||||
UniqueEnum
|
||||
^^^^^^^^^^
|
||||
|
||||
Raises an error if a duplicate member name is found instead of creating an
|
||||
alias::
|
||||
|
||||
>>> class UniqueEnum(Enum):
|
||||
... def __init__(self, *args):
|
||||
... cls = self.__class__
|
||||
... if any(self.value == e.value for e in cls):
|
||||
... a = self.name
|
||||
... e = cls(self.value).name
|
||||
... raise ValueError(
|
||||
... "aliases not allowed in UniqueEnum: %r --> %r"
|
||||
... % (a, e))
|
||||
...
|
||||
>>> class Color(UniqueEnum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
... blue = 3
|
||||
... grene = 2
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green'
|
||||
|
||||
|
||||
OrderedEnum
|
||||
^^^^^^^^^^^
|
||||
|
||||
An ordered enumeration that is not based on ``IntEnum`` and so maintains
|
||||
the normal ``Enum`` invariants (such as not being comparable to other
|
||||
enumerations)::
|
||||
|
||||
>>> class OrderedEnum(Enum):
|
||||
... def __ge__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ >= other._value_
|
||||
... return NotImplemented
|
||||
... def __gt__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ > other._value_
|
||||
... return NotImplemented
|
||||
... def __le__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ <= other._value_
|
||||
... return NotImplemented
|
||||
... def __lt__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ < other._value_
|
||||
... return NotImplemented
|
||||
...
|
||||
>>> class Grade(OrderedEnum):
|
||||
... __ordered__ = 'A B C D F'
|
||||
... A = 5
|
||||
... B = 4
|
||||
... C = 3
|
||||
... D = 2
|
||||
... F = 1
|
||||
...
|
||||
>>> Grade.C < Grade.A
|
||||
True
|
||||
|
||||
|
||||
Planet
|
||||
^^^^^^
|
||||
|
||||
If ``__new__`` or ``__init__`` is defined the value of the enum member
|
||||
will be passed to those methods::
|
||||
|
||||
>>> class Planet(Enum):
|
||||
... MERCURY = (3.303e+23, 2.4397e6)
|
||||
... VENUS = (4.869e+24, 6.0518e6)
|
||||
... EARTH = (5.976e+24, 6.37814e6)
|
||||
... MARS = (6.421e+23, 3.3972e6)
|
||||
... JUPITER = (1.9e+27, 7.1492e7)
|
||||
... SATURN = (5.688e+26, 6.0268e7)
|
||||
... URANUS = (8.686e+25, 2.5559e7)
|
||||
... NEPTUNE = (1.024e+26, 2.4746e7)
|
||||
... def __init__(self, mass, radius):
|
||||
... self.mass = mass # in kilograms
|
||||
... self.radius = radius # in meters
|
||||
... @property
|
||||
... def surface_gravity(self):
|
||||
... # universal gravitational constant (m3 kg-1 s-2)
|
||||
... G = 6.67300E-11
|
||||
... return G * self.mass / (self.radius * self.radius)
|
||||
...
|
||||
>>> Planet.EARTH.value
|
||||
(5.976e+24, 6378140.0)
|
||||
>>> Planet.EARTH.surface_gravity
|
||||
9.802652743337129
|
||||
|
||||
|
||||
How are Enums different?
|
||||
------------------------
|
||||
|
||||
Enums have a custom metaclass that affects many aspects of both derived Enum
|
||||
classes and their instances (members).
|
||||
|
||||
|
||||
Enum Classes
|
||||
^^^^^^^^^^^^
|
||||
|
||||
The ``EnumMeta`` metaclass is responsible for providing the
|
||||
``__contains__``, ``__dir__``, ``__iter__`` and other methods that
|
||||
allow one to do things with an ``Enum`` class that fail on a typical
|
||||
class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is
|
||||
responsible for ensuring that various other methods on the final ``Enum``
|
||||
class are correct (such as ``__new__``, ``__getnewargs__``,
|
||||
``__str__`` and ``__repr__``).
|
||||
|
||||
.. note::
|
||||
|
||||
``__dir__`` is not changed in the Python 2 line as it messes up some
|
||||
of the decorators included in the stdlib.
|
||||
|
||||
|
||||
Enum Members (aka instances)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The most interesting thing about Enum members is that they are singletons.
|
||||
``EnumMeta`` creates them all while it is creating the ``Enum``
|
||||
class itself, and then puts a custom ``__new__`` in place to ensure
|
||||
that no new ones are ever instantiated by returning only the existing
|
||||
member instances.
|
||||
|
||||
|
||||
Finer Points
|
||||
^^^^^^^^^^^^
|
||||
|
||||
``Enum`` members are instances of an ``Enum`` class, and even though they
|
||||
are accessible as `EnumClass.member1.member2`, they should not be
|
||||
accessed directly from the member as that lookup may fail or, worse,
|
||||
return something besides the ``Enum`` member you were looking for
|
||||
(changed in version 1.1.1)::
|
||||
|
||||
>>> class FieldTypes(Enum):
|
||||
... name = 1
|
||||
... value = 2
|
||||
... size = 3
|
||||
...
|
||||
>>> FieldTypes.value.size
|
||||
<FieldTypes.size: 3>
|
||||
>>> FieldTypes.size.value
|
||||
3
|
||||
|
||||
The ``__members__`` attribute is only available on the class.
|
||||
|
||||
In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being
|
||||
the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if
|
||||
``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python
|
||||
2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified
|
||||
as the ``OrderedDict`` type didn't exist yet.
|
||||
|
||||
If you give your ``Enum`` subclass extra methods, like the `Planet`_
|
||||
class above, those methods will show up in a `dir` of the member,
|
||||
but not of the class::
|
||||
|
||||
>>> dir(Planet)
|
||||
['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS',
|
||||
'VENUS', '__class__', '__doc__', '__members__', '__module__']
|
||||
>>> dir(Planet.EARTH)
|
||||
['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value']
|
||||
|
||||
A ``__new__`` method will only be used for the creation of the
|
||||
``Enum`` members -- after that it is replaced. This means if you wish to
|
||||
change how ``Enum`` members are looked up you either have to write a
|
||||
helper function or a ``classmethod``.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,312 @@
|
||||
import codecs
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import shutil
|
||||
import tempfile
|
||||
import traceback
|
||||
|
||||
import appdirs
|
||||
|
||||
from scandir import scandir
|
||||
|
||||
try:
|
||||
from collections.abc import MutableMapping
|
||||
unicode = str
|
||||
except ImportError:
|
||||
# Python 2 imports
|
||||
from collections import MutableMapping
|
||||
FileNotFoundError = IOError
|
||||
|
||||
from .posixemulation import rename
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileCache(MutableMapping):
|
||||
"""A persistent file cache that is dictionary-like and has a write buffer.
|
||||
|
||||
*appname* is passed to `appdirs <https://pypi.python.org/pypi/appdirs/>`_
|
||||
to determine a system-appropriate location for the cache files. The cache
|
||||
directory used is available via :data:`cache_dir`.
|
||||
|
||||
By default, a write buffer is used, so writing to cache files is not done
|
||||
until :meth:`sync` is explicitly called. This behavior can be changed using
|
||||
the optional *flag* argument.
|
||||
|
||||
.. NOTE::
|
||||
Keys and values are always stored as :class:`bytes` objects. If data
|
||||
serialization is enabled, keys are returned as :class:`str` or
|
||||
:class:`unicode` objects.
|
||||
If data serialization is disabled, keys are returned as a
|
||||
:class:`bytes` object.
|
||||
|
||||
:param str appname: The app/script the cache should be associated with.
|
||||
:param str flag: How the cache should be opened. See below for details.
|
||||
:param mode: The Unix mode for the cache files.
|
||||
:param str keyencoding: The encoding the keys use, defaults to 'utf-8'.
|
||||
This is used if *serialize* is ``False``; the keys are treated as
|
||||
:class:`bytes` objects.
|
||||
:param bool serialize: Whether or not to (de)serialize the values. If a
|
||||
cache is used with a :class:`~shelve.Shelf`, set this to ``False``.
|
||||
:param str app_cache_dir: absolute path to root cache directory to be
|
||||
used in place of system-appropriate location determined by appdirs
|
||||
|
||||
The optional *flag* argument can be:
|
||||
|
||||
+---------+-------------------------------------------+
|
||||
| Value | Meaning |
|
||||
+=========+===========================================+
|
||||
| ``'r'`` | Open existing cache for reading only |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'w'`` | Open existing cache for reading and |
|
||||
| | writing |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'c'`` | Open cache for reading and writing, |
|
||||
| | creating it if it doesn't exist (default) |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'n'`` | Always create a new, empty cache, open |
|
||||
| | for reading and writing |
|
||||
+---------+-------------------------------------------+
|
||||
|
||||
If a ``'s'`` is appended to the *flag* argument, the cache will be opened
|
||||
in sync mode. Writing to the cache will happen immediately and will not be
|
||||
buffered.
|
||||
|
||||
If an application needs to use more than one cache, then it should use
|
||||
subcaches. To create a subcache, append a series of one or more names
|
||||
separated by periods to the application name when creating a
|
||||
:class:`FileCache` object (e.g. ``'appname.subcache'`` or
|
||||
``'appname.subcache.subcache'``).
|
||||
Subcaches are a way for an application to use more than one cache without
|
||||
polluting a user's cache directory. All caches -- main caches or subcaches
|
||||
-- are totally independent. The only aspect in which they are linked is
|
||||
that all of an application's caches exist in the same system directory.
|
||||
Because each cache is independent of every other cache, calling
|
||||
:meth:`delete` on an application's main cache will not delete data in
|
||||
its subcaches.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, appname, flag='c', mode=0o666, keyencoding='utf-8',
|
||||
serialize=True, app_cache_dir=None):
|
||||
"""Initialize a :class:`FileCache` object."""
|
||||
if not isinstance(flag, str):
|
||||
raise TypeError("flag must be str not '{}'".format(type(flag)))
|
||||
elif flag[0] not in 'rwcn':
|
||||
raise ValueError("invalid flag: '{}', first flag must be one of "
|
||||
"'r', 'w', 'c' or 'n'".format(flag))
|
||||
elif len(flag) > 1 and flag[1] != 's':
|
||||
raise ValueError("invalid flag: '{}', second flag must be "
|
||||
"'s'".format(flag))
|
||||
|
||||
appname, subcache = self._parse_appname(appname)
|
||||
if 'cache' in subcache:
|
||||
raise ValueError("invalid subcache name: 'cache'.")
|
||||
self._is_subcache = bool(subcache)
|
||||
|
||||
if not app_cache_dir:
|
||||
app_cache_dir = appdirs.user_cache_dir(appname, appname)
|
||||
subcache_dir = os.path.join(app_cache_dir, *subcache)
|
||||
self.cache_dir = os.path.join(subcache_dir, 'cache')
|
||||
exists = os.path.exists(self.cache_dir)
|
||||
|
||||
if len(flag) > 1 and flag[1] == 's':
|
||||
self._sync = True
|
||||
else:
|
||||
self._sync = False
|
||||
self._buffer = {}
|
||||
|
||||
if exists and 'n' in flag:
|
||||
self.clear()
|
||||
self.create()
|
||||
elif not exists and ('c' in flag or 'n' in flag):
|
||||
self.create()
|
||||
elif not exists:
|
||||
raise FileNotFoundError("no such directory: '{}'".format(
|
||||
self.cache_dir))
|
||||
|
||||
self._flag = 'rb' if 'r' in flag else 'wb'
|
||||
self._mode = mode
|
||||
self._keyencoding = keyencoding
|
||||
self._serialize = serialize
|
||||
|
||||
def _parse_appname(self, appname):
|
||||
"""Splits an appname into the appname and subcache components."""
|
||||
components = appname.split('.')
|
||||
return components[0], components[1:]
|
||||
|
||||
def create(self):
|
||||
"""Create the write buffer and cache directory."""
|
||||
if not self._sync and not hasattr(self, '_buffer'):
|
||||
self._buffer = {}
|
||||
if not os.path.exists(self.cache_dir):
|
||||
os.makedirs(self.cache_dir)
|
||||
|
||||
def clear(self):
|
||||
"""Remove all items from the write buffer and cache.
|
||||
|
||||
The write buffer object and cache directory are not deleted.
|
||||
|
||||
"""
|
||||
self.delete()
|
||||
self.create()
|
||||
|
||||
def delete(self):
|
||||
"""Delete the write buffer and cache directory."""
|
||||
if not self._sync:
|
||||
del self._buffer
|
||||
shutil.rmtree(self.cache_dir)
|
||||
|
||||
def close(self):
|
||||
"""Sync the write buffer, then close the cache.
|
||||
|
||||
If a closed :class:`FileCache` object's methods are called, a
|
||||
:exc:`ValueError` will be raised.
|
||||
|
||||
"""
|
||||
self.sync()
|
||||
self.sync = self.create = self.delete = self._closed
|
||||
self._write_to_file = self._read_to_file = self._closed
|
||||
self._key_to_filename = self._filename_to_key = self._closed
|
||||
self.__getitem__ = self.__setitem__ = self.__delitem__ = self._closed
|
||||
self.__iter__ = self.__len__ = self.__contains__ = self._closed
|
||||
|
||||
def sync(self):
|
||||
"""Sync the write buffer with the cache files and clear the buffer.
|
||||
|
||||
If the :class:`FileCache` object was opened with the optional ``'s'``
|
||||
*flag* argument, then calling :meth:`sync` will do nothing.
|
||||
"""
|
||||
if self._sync:
|
||||
return # opened in sync mode, so skip the manual sync
|
||||
self._sync = True
|
||||
for ekey in self._buffer:
|
||||
filename = self._key_to_filename(ekey)
|
||||
try:
|
||||
self._write_to_file(filename, self._buffer[ekey])
|
||||
except:
|
||||
logger.error("Couldn't write content from %r to cache file: %r: %s", ekey, filename,
|
||||
traceback.format_exc())
|
||||
self._buffer.clear()
|
||||
self._sync = False
|
||||
|
||||
def _closed(self, *args, **kwargs):
|
||||
"""Filler method for closed cache methods."""
|
||||
raise ValueError("invalid operation on closed cache")
|
||||
|
||||
def _encode_key(self, key):
|
||||
"""Encode key using *hex_codec* for constructing a cache filename.
|
||||
|
||||
Keys are implicitly converted to :class:`bytes` if passed as
|
||||
:class:`str`.
|
||||
|
||||
"""
|
||||
if isinstance(key, str) or isinstance(key, unicode):
|
||||
key = key.encode(self._keyencoding)
|
||||
elif not isinstance(key, bytes):
|
||||
raise TypeError("key must be bytes or str")
|
||||
return codecs.encode(key, 'hex_codec').decode(self._keyencoding)
|
||||
|
||||
def _decode_key(self, key):
|
||||
"""Decode key using hex_codec to retrieve the original key.
|
||||
|
||||
Keys are returned as :class:`str` if serialization is enabled.
|
||||
Keys are returned as :class:`bytes` if serialization is disabled.
|
||||
|
||||
"""
|
||||
bkey = codecs.decode(key.encode(self._keyencoding), 'hex_codec')
|
||||
return bkey.decode(self._keyencoding) if self._serialize else bkey
|
||||
|
||||
def _dumps(self, value):
|
||||
return value if not self._serialize else pickle.dumps(value)
|
||||
|
||||
def _loads(self, value):
|
||||
return value if not self._serialize else pickle.loads(value)
|
||||
|
||||
def _key_to_filename(self, key):
|
||||
"""Convert an encoded key to an absolute cache filename."""
|
||||
return os.path.join(self.cache_dir, key)
|
||||
|
||||
def _filename_to_key(self, absfilename):
|
||||
"""Convert an absolute cache filename to a key name."""
|
||||
return os.path.split(absfilename)[1]
|
||||
|
||||
def _all_filenames(self):
|
||||
"""Return a list of absolute cache filenames"""
|
||||
try:
|
||||
for entry in scandir(self.cache_dir):
|
||||
if entry.is_file(follow_symlinks=False):
|
||||
yield os.path.join(self.cache_dir, entry.name)
|
||||
except (FileNotFoundError, OSError):
|
||||
raise StopIteration
|
||||
|
||||
def _all_keys(self):
|
||||
"""Return a list of all encoded key names."""
|
||||
file_keys = [self._filename_to_key(fn) for fn in self._all_filenames()]
|
||||
if self._sync:
|
||||
return set(file_keys)
|
||||
else:
|
||||
return set(file_keys + list(self._buffer))
|
||||
|
||||
def _write_to_file(self, filename, bytesvalue):
|
||||
"""Write bytesvalue to filename."""
|
||||
fh, tmp = tempfile.mkstemp()
|
||||
with os.fdopen(fh, self._flag) as f:
|
||||
f.write(self._dumps(bytesvalue))
|
||||
rename(tmp, filename)
|
||||
os.chmod(filename, self._mode)
|
||||
|
||||
def _read_from_file(self, filename):
|
||||
"""Read data from filename."""
|
||||
try:
|
||||
with open(filename, 'rb') as f:
|
||||
return self._loads(f.read())
|
||||
except (IOError, OSError):
|
||||
logger.warning('Error opening file: {}'.format(filename))
|
||||
return None
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
ekey = self._encode_key(key)
|
||||
if not self._sync:
|
||||
self._buffer[ekey] = value
|
||||
else:
|
||||
filename = self._key_to_filename(ekey)
|
||||
self._write_to_file(filename, value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
if not self._sync:
|
||||
try:
|
||||
return self._buffer[ekey]
|
||||
except KeyError:
|
||||
pass
|
||||
filename = self._key_to_filename(ekey)
|
||||
if filename not in self._all_filenames():
|
||||
raise KeyError(key)
|
||||
return self._read_from_file(filename)
|
||||
|
||||
def __delitem__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
filename = self._key_to_filename(ekey)
|
||||
if not self._sync:
|
||||
try:
|
||||
del self._buffer[ekey]
|
||||
except KeyError:
|
||||
if filename not in self._all_filenames():
|
||||
raise KeyError(key)
|
||||
try:
|
||||
os.remove(filename)
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
|
||||
def __iter__(self):
|
||||
for key in self._all_keys():
|
||||
yield self._decode_key(key)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._all_keys())
|
||||
|
||||
def __contains__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
return ekey in self._all_keys()
|
||||
@@ -0,0 +1,113 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
r"""
|
||||
werkzeug.posixemulation
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Provides a POSIX emulation for some features that are relevant to
|
||||
web applications. The main purpose is to simplify support for
|
||||
systems such as Windows NT that are not 100% POSIX compatible.
|
||||
|
||||
Currently this only implements a :func:`rename` function that
|
||||
follows POSIX semantics. Eg: if the target file already exists it
|
||||
will be replaced without asking.
|
||||
|
||||
This module was introduced in 0.6.1 and is not a public interface.
|
||||
It might become one in later versions of Werkzeug.
|
||||
|
||||
:copyright: (c) 2013 by the Werkzeug Team, see AUTHORS for more details.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import errno
|
||||
import time
|
||||
import random
|
||||
import shutil
|
||||
|
||||
|
||||
can_rename_open_file = False
|
||||
if os.name == 'nt': # pragma: no cover
|
||||
_rename = lambda src, dst: False
|
||||
_rename_atomic = lambda src, dst: False
|
||||
if sys.version_info >= (3, 0):
|
||||
unicode = str
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
|
||||
_MOVEFILE_REPLACE_EXISTING = 0x1
|
||||
_MOVEFILE_WRITE_THROUGH = 0x8
|
||||
_MoveFileEx = ctypes.windll.kernel32.MoveFileExW
|
||||
|
||||
def _rename(src, dst):
|
||||
if not isinstance(src, unicode):
|
||||
src = unicode(src, sys.getfilesystemencoding())
|
||||
if not isinstance(dst, unicode):
|
||||
dst = unicode(dst, sys.getfilesystemencoding())
|
||||
if _rename_atomic(src, dst):
|
||||
return True
|
||||
retry = 0
|
||||
rv = False
|
||||
while not rv and retry < 100:
|
||||
rv = _MoveFileEx(src, dst, _MOVEFILE_REPLACE_EXISTING |
|
||||
_MOVEFILE_WRITE_THROUGH)
|
||||
if not rv:
|
||||
time.sleep(0.001)
|
||||
retry += 1
|
||||
return rv
|
||||
|
||||
# new in Vista and Windows Server 2008
|
||||
_CreateTransaction = ctypes.windll.ktmw32.CreateTransaction
|
||||
_CommitTransaction = ctypes.windll.ktmw32.CommitTransaction
|
||||
_MoveFileTransacted = ctypes.windll.kernel32.MoveFileTransactedW
|
||||
_CloseHandle = ctypes.windll.kernel32.CloseHandle
|
||||
can_rename_open_file = True
|
||||
|
||||
def _rename_atomic(src, dst):
|
||||
ta = _CreateTransaction(None, 0, 0, 0, 0, 1000, 'Werkzeug rename')
|
||||
if ta == -1:
|
||||
return False
|
||||
try:
|
||||
retry = 0
|
||||
rv = False
|
||||
while not rv and retry < 100:
|
||||
rv = _MoveFileTransacted(src, dst, None, None,
|
||||
_MOVEFILE_REPLACE_EXISTING |
|
||||
_MOVEFILE_WRITE_THROUGH, ta)
|
||||
if rv:
|
||||
rv = _CommitTransaction(ta)
|
||||
break
|
||||
else:
|
||||
time.sleep(0.001)
|
||||
retry += 1
|
||||
return rv
|
||||
finally:
|
||||
_CloseHandle(ta)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def rename(src, dst):
|
||||
# Try atomic or pseudo-atomic rename
|
||||
if _rename(src, dst):
|
||||
return
|
||||
# Fall back to "move away and replace"
|
||||
try:
|
||||
os.rename(src, dst)
|
||||
except OSError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
old = "%s-%08x" % (dst, random.randint(0, sys.maxint))
|
||||
os.rename(dst, old)
|
||||
os.rename(src, dst)
|
||||
try:
|
||||
os.unlink(old)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
"""
|
||||
If dst on current filesystem then use
|
||||
atomic rename. Otherwise, fall back to a
|
||||
non-atomic copy and remove.
|
||||
"""
|
||||
rename = shutil.move
|
||||
can_rename_open_file = True
|
||||
@@ -4,7 +4,6 @@ This gives other modules access to the gritty details about characters and the
|
||||
encodings that use them.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import zlib
|
||||
import unicodedata
|
||||
@@ -15,13 +14,13 @@ from ftfy.compatibility import unichr
|
||||
# These are the encodings we will try to fix in ftfy, in the
|
||||
# order that they should be tried.
|
||||
CHARMAP_ENCODINGS = [
|
||||
'latin-1',
|
||||
'sloppy-windows-1252',
|
||||
'sloppy-windows-1250',
|
||||
'iso-8859-2',
|
||||
'sloppy-windows-1251',
|
||||
'macroman',
|
||||
'cp437',
|
||||
u'latin-1',
|
||||
u'sloppy-windows-1252',
|
||||
u'sloppy-windows-1250',
|
||||
u'iso-8859-2',
|
||||
u'sloppy-windows-1251',
|
||||
u'macroman',
|
||||
u'cp437',
|
||||
]
|
||||
|
||||
|
||||
@@ -29,25 +28,25 @@ def _build_regexes():
|
||||
"""
|
||||
ENCODING_REGEXES contain reasonably fast ways to detect if we
|
||||
could represent a given string in a given encoding. The simplest one is
|
||||
the 'ascii' detector, which of course just determines if all characters
|
||||
the u'ascii' detector, which of course just determines if all characters
|
||||
are between U+0000 and U+007F.
|
||||
"""
|
||||
# Define a regex that matches ASCII text.
|
||||
encoding_regexes = {'ascii': re.compile('^[\x00-\x7f]*$')}
|
||||
encoding_regexes = {u'ascii': re.compile('^[\x00-\x7f]*$')}
|
||||
|
||||
for encoding in CHARMAP_ENCODINGS:
|
||||
# Make a sequence of characters that bytes \x80 to \xFF decode to
|
||||
# in each encoding, as well as byte \x1A, which is used to represent
|
||||
# the replacement character � in the sloppy-* encodings.
|
||||
latin1table = ''.join(unichr(i) for i in range(128, 256)) + '\x1a'
|
||||
charlist = latin1table.encode('latin-1').decode(encoding)
|
||||
latin1table = u''.join(unichr(i) for i in range(128, 256)) + '\x1a'
|
||||
charlist = latin1table.encode(u'latin-1').decode(encoding)
|
||||
|
||||
# The rest of the ASCII bytes -- bytes \x00 to \x19 and \x1B
|
||||
# to \x7F -- will decode as those ASCII characters in any encoding we
|
||||
# support, so we can just include them as ranges. This also lets us
|
||||
# not worry about escaping regex special characters, because all of
|
||||
# them are in the \x1B to \x7F range.
|
||||
regex = '^[\x00-\x19\x1b-\x7f{0}]*$'.format(charlist)
|
||||
regex = u'^[\x00-\x19\x1b-\x7f{0}]*$'.format(charlist)
|
||||
encoding_regexes[encoding] = re.compile(regex)
|
||||
return encoding_regexes
|
||||
ENCODING_REGEXES = _build_regexes()
|
||||
@@ -57,10 +56,10 @@ def _build_utf8_punct_regex():
|
||||
"""
|
||||
Recognize UTF-8 mojibake that's so blatant that we can fix it even when the
|
||||
rest of the string doesn't decode as UTF-8 -- namely, UTF-8 sequences for
|
||||
the 'General Punctuation' characters U+2000 to U+2040, re-encoded in
|
||||
the u'General Punctuation' characters U+2000 to U+2040, re-encoded in
|
||||
Windows-1252.
|
||||
|
||||
These are recognizable by the distinctive 'â€' ('\xe2\x80') sequence they
|
||||
These are recognizable by the distinctiveu'â€u' ('\xe2\x80') sequence they
|
||||
all begin with when decoded as Windows-1252.
|
||||
"""
|
||||
# We're making a regex that has all the literal bytes from 0x80 to 0xbf in
|
||||
@@ -72,10 +71,10 @@ def _build_utf8_punct_regex():
|
||||
# prettier when we deprecate Python 2.
|
||||
continuation_char_list = ''.join(
|
||||
unichr(i) for i in range(0x80, 0xc0)
|
||||
).encode('latin-1')
|
||||
obvious_utf8 = ('â€['
|
||||
+ continuation_char_list.decode('sloppy-windows-1252')
|
||||
+ ']')
|
||||
).encode(u'latin-1')
|
||||
obvious_utf8 = (u'â€['
|
||||
+ continuation_char_list.decode(u'sloppy-windows-1252')
|
||||
+ u']')
|
||||
return re.compile(obvious_utf8)
|
||||
PARTIAL_UTF8_PUNCT_RE = _build_utf8_punct_regex()
|
||||
|
||||
@@ -126,8 +125,8 @@ LOSSY_UTF8_RE = re.compile(
|
||||
)
|
||||
|
||||
# These regexes match various Unicode variations on single and double quotes.
|
||||
SINGLE_QUOTE_RE = re.compile('[\u2018-\u201b]')
|
||||
DOUBLE_QUOTE_RE = re.compile('[\u201c-\u201f]')
|
||||
SINGLE_QUOTE_RE = re.compile(u'[\u2018-\u201b]')
|
||||
DOUBLE_QUOTE_RE = re.compile(u'[\u201c-\u201f]')
|
||||
|
||||
|
||||
def possible_encoding(text, encoding):
|
||||
@@ -143,7 +142,7 @@ def possible_encoding(text, encoding):
|
||||
|
||||
CHAR_CLASS_STRING = zlib.decompress(
|
||||
resource_string(__name__, 'char_classes.dat')
|
||||
).decode('ascii')
|
||||
).decode(u'ascii')
|
||||
|
||||
def chars_to_classes(string):
|
||||
"""
|
||||
@@ -185,15 +184,15 @@ CONTROL_CHARS = _build_control_char_mapping()
|
||||
# Ligatures may also be separated by NFKC normalization, but that is sometimes
|
||||
# more normalization than you want.
|
||||
LIGATURES = {
|
||||
ord('IJ'): 'IJ',
|
||||
ord('ij'): 'ij',
|
||||
ord('ff'): 'ff',
|
||||
ord('fi'): 'fi',
|
||||
ord('fl'): 'fl',
|
||||
ord('ffi'): 'ffi',
|
||||
ord('ffl'): 'ffl',
|
||||
ord('ſt'): 'ſt',
|
||||
ord('st'): 'st'
|
||||
ord(u'IJ'): u'IJ',
|
||||
ord(u'ij'): u'ij',
|
||||
ord(u'ff'): u'ff',
|
||||
ord(u'fi'): u'fi',
|
||||
ord(u'fl'): u'fl',
|
||||
ord(u'ffi'): u'ffi',
|
||||
ord(u'ffl'): u'ffl',
|
||||
ord(u'ſt'): u'ſt',
|
||||
ord(u'st'): u'st'
|
||||
}
|
||||
|
||||
|
||||
@@ -205,10 +204,10 @@ def _build_width_map():
|
||||
# Though it's not listed as a fullwidth character, we'll want to convert
|
||||
# U+3000 IDEOGRAPHIC SPACE to U+20 SPACE on the same principle, so start
|
||||
# with that in the dictionary.
|
||||
width_map = {0x3000: ' '}
|
||||
width_map = {0x3000: u' '}
|
||||
for i in range(0xff01, 0xfff0):
|
||||
char = unichr(i)
|
||||
alternate = unicodedata.normalize('NFKC', char)
|
||||
alternate = unicodedata.normalize(u'NFKC', char)
|
||||
if alternate != char:
|
||||
width_map[i] = alternate
|
||||
return width_map
|
||||
|
||||
@@ -4,4 +4,4 @@
|
||||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '3.0.0.dev0'
|
||||
__version__ = '2.1.4'
|
||||
|
||||
@@ -133,7 +133,6 @@ class ValidateHasNeighbor(Rule):
|
||||
Validate tag has-neighbor
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
@@ -159,7 +158,6 @@ class ValidateHasNeighborBefore(Rule):
|
||||
Validate tag has-neighbor-before that previous match exists.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
@@ -179,7 +177,6 @@ class ValidateHasNeighborAfter(Rule):
|
||||
Validate tag has-neighbor-after that next match exists.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
|
||||
@@ -3895,6 +3895,7 @@
|
||||
season: 7
|
||||
episode: 22
|
||||
episode_title: 2000 Light Years from Home
|
||||
other: Classic
|
||||
container: mkv
|
||||
mimetype: video/x-matroska
|
||||
type: episode
|
||||
@@ -3962,15 +3963,3 @@
|
||||
subtitle_language: fr
|
||||
other: FullHD
|
||||
type: episode
|
||||
|
||||
? Whose Line is it anyway/Season 01/Whose.Line.is.it.Anyway.US.S13E01.720p.WEB.x264-TBS.mkv
|
||||
: title: Whose Line is it Anyway
|
||||
season: 13
|
||||
episode: 1
|
||||
country: US
|
||||
screen_size: 720p
|
||||
format: WEB-DL
|
||||
video_codec: h264
|
||||
release_group: TBS
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
@@ -1,163 +0,0 @@
|
||||
Change Log
|
||||
----------
|
||||
|
||||
0.999
|
||||
~~~~~
|
||||
|
||||
Released on December 23, 2013
|
||||
|
||||
* Fix #127: add work-around for CPython issue #20007: .read(0) on
|
||||
http.client.HTTPResponse drops the rest of the content.
|
||||
|
||||
* Fix #115: lxml treewalker can now deal with fragments containing, at
|
||||
their root level, text nodes with non-ASCII characters on Python 2.
|
||||
|
||||
|
||||
0.99
|
||||
~~~~
|
||||
|
||||
Released on September 10, 2013
|
||||
|
||||
* No library changes from 1.0b3; released as 0.99 as pip has changed
|
||||
behaviour from 1.4 to avoid installing pre-release versions per
|
||||
PEP 440.
|
||||
|
||||
|
||||
1.0b3
|
||||
~~~~~
|
||||
|
||||
Released on July 24, 2013
|
||||
|
||||
* Removed ``RecursiveTreeWalker`` from ``treewalkers._base``. Any
|
||||
implementation using it should be moved to
|
||||
``NonRecursiveTreeWalker``, as everything bundled with html5lib has
|
||||
for years.
|
||||
|
||||
* Fix #67 so that ``BufferedStream`` to correctly returns a bytes
|
||||
object, thereby fixing any case where html5lib is passed a
|
||||
non-seekable RawIOBase-like object.
|
||||
|
||||
|
||||
1.0b2
|
||||
~~~~~
|
||||
|
||||
Released on June 27, 2013
|
||||
|
||||
* Removed reordering of attributes within the serializer. There is now
|
||||
an ``alphabetical_attributes`` option which preserves the previous
|
||||
behaviour through a new filter. This allows attribute order to be
|
||||
preserved through html5lib if the tree builder preserves order.
|
||||
|
||||
* Removed ``dom2sax`` from DOM treebuilders. It has been replaced by
|
||||
``treeadapters.sax.to_sax`` which is generic and supports any
|
||||
treewalker; it also resolves all known bugs with ``dom2sax``.
|
||||
|
||||
* Fix treewalker assertions on hitting bytes strings on
|
||||
Python 2. Previous to 1.0b1, treewalkers coped with mixed
|
||||
bytes/unicode data on Python 2; this reintroduces this prior
|
||||
behaviour on Python 2. Behaviour is unchanged on Python 3.
|
||||
|
||||
|
||||
1.0b1
|
||||
~~~~~
|
||||
|
||||
Released on May 17, 2013
|
||||
|
||||
* Implementation updated to implement the `HTML specification
|
||||
<http://www.whatwg.org/specs/web-apps/current-work/>`_ as of 5th May
|
||||
2013 (`SVN <http://svn.whatwg.org/webapps/>`_ revision r7867).
|
||||
|
||||
* Python 3.2+ supported in a single codebase using the ``six`` library.
|
||||
|
||||
* Removed support for Python 2.5 and older.
|
||||
|
||||
* Removed the deprecated Beautiful Soup 3 treebuilder.
|
||||
``beautifulsoup4`` can use ``html5lib`` as a parser instead. Note that
|
||||
since it doesn't support namespaces, foreign content like SVG and
|
||||
MathML is parsed incorrectly.
|
||||
|
||||
* Removed ``simpletree`` from the package. The default tree builder is
|
||||
now ``etree`` (using the ``xml.etree.cElementTree`` implementation if
|
||||
available, and ``xml.etree.ElementTree`` otherwise).
|
||||
|
||||
* Removed the ``XHTMLSerializer`` as it never actually guaranteed its
|
||||
output was well-formed XML, and hence provided little of use.
|
||||
|
||||
* Removed default DOM treebuilder, so ``html5lib.treebuilders.dom`` is no
|
||||
longer supported. ``html5lib.treebuilders.getTreeBuilder("dom")`` will
|
||||
return the default DOM treebuilder, which uses ``xml.dom.minidom``.
|
||||
|
||||
* Optional heuristic character encoding detection now based on
|
||||
``charade`` for Python 2.6 - 3.3 compatibility.
|
||||
|
||||
* Optional ``Genshi`` treewalker support fixed.
|
||||
|
||||
* Many bugfixes, including:
|
||||
|
||||
* #33: null in attribute value breaks XML AttValue;
|
||||
|
||||
* #4: nested, indirect descendant, <button> causes infinite loop;
|
||||
|
||||
* `Google Code 215
|
||||
<http://code.google.com/p/html5lib/issues/detail?id=215>`_: Properly
|
||||
detect seekable streams;
|
||||
|
||||
* `Google Code 206
|
||||
<http://code.google.com/p/html5lib/issues/detail?id=206>`_: add
|
||||
support for <video preload=...>, <audio preload=...>;
|
||||
|
||||
* `Google Code 205
|
||||
<http://code.google.com/p/html5lib/issues/detail?id=205>`_: add
|
||||
support for <video poster=...>;
|
||||
|
||||
* `Google Code 202
|
||||
<http://code.google.com/p/html5lib/issues/detail?id=202>`_: Unicode
|
||||
file breaks InputStream.
|
||||
|
||||
* Source code is now mostly PEP 8 compliant.
|
||||
|
||||
* Test harness has been improved and now depends on ``nose``.
|
||||
|
||||
* Documentation updated and moved to http://html5lib.readthedocs.org/.
|
||||
|
||||
|
||||
0.95
|
||||
~~~~
|
||||
|
||||
Released on February 11, 2012
|
||||
|
||||
|
||||
0.90
|
||||
~~~~
|
||||
|
||||
Released on January 17, 2010
|
||||
|
||||
|
||||
0.11.1
|
||||
~~~~~~
|
||||
|
||||
Released on June 12, 2008
|
||||
|
||||
|
||||
0.11
|
||||
~~~~
|
||||
|
||||
Released on June 10, 2008
|
||||
|
||||
|
||||
0.10
|
||||
~~~~
|
||||
|
||||
Released on October 7, 2007
|
||||
|
||||
|
||||
0.9
|
||||
~~~
|
||||
|
||||
Released on March 11, 2007
|
||||
|
||||
|
||||
0.2
|
||||
~~~
|
||||
|
||||
Released on January 8, 2007
|
||||
@@ -1,157 +0,0 @@
|
||||
html5lib
|
||||
========
|
||||
|
||||
.. image:: https://travis-ci.org/html5lib/html5lib-python.png?branch=master
|
||||
:target: https://travis-ci.org/html5lib/html5lib-python
|
||||
|
||||
html5lib is a pure-python library for parsing HTML. It is designed to
|
||||
conform to the WHATWG HTML specification, as is implemented by all major
|
||||
web browsers.
|
||||
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
Simple usage follows this pattern:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
with open("mydocument.html", "rb") as f:
|
||||
document = html5lib.parse(f)
|
||||
|
||||
or:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
document = html5lib.parse("<p>Hello World!")
|
||||
|
||||
By default, the ``document`` will be an ``xml.etree`` element instance.
|
||||
Whenever possible, html5lib chooses the accelerated ``ElementTree``
|
||||
implementation (i.e. ``xml.etree.cElementTree`` on Python 2.x).
|
||||
|
||||
Two other tree types are supported: ``xml.dom.minidom`` and
|
||||
``lxml.etree``. To use an alternative format, specify the name of
|
||||
a treebuilder:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
with open("mydocument.html", "rb") as f:
|
||||
lxml_etree_document = html5lib.parse(f, treebuilder="lxml")
|
||||
|
||||
When using with ``urllib2`` (Python 2), the charset from HTTP should be
|
||||
pass into html5lib as follows:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from contextlib import closing
|
||||
from urllib2 import urlopen
|
||||
import html5lib
|
||||
|
||||
with closing(urlopen("http://example.com/")) as f:
|
||||
document = html5lib.parse(f, encoding=f.info().getparam("charset"))
|
||||
|
||||
When using with ``urllib.request`` (Python 3), the charset from HTTP
|
||||
should be pass into html5lib as follows:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from urllib.request import urlopen
|
||||
import html5lib
|
||||
|
||||
with urlopen("http://example.com/") as f:
|
||||
document = html5lib.parse(f, encoding=f.info().get_content_charset())
|
||||
|
||||
To have more control over the parser, create a parser object explicitly.
|
||||
For instance, to make the parser raise exceptions on parse errors, use:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
with open("mydocument.html", "rb") as f:
|
||||
parser = html5lib.HTMLParser(strict=True)
|
||||
document = parser.parse(f)
|
||||
|
||||
When you're instantiating parser objects explicitly, pass a treebuilder
|
||||
class as the ``tree`` keyword argument to use an alternative document
|
||||
format:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
|
||||
minidom_document = parser.parse("<p>Hello World!")
|
||||
|
||||
More documentation is available at http://html5lib.readthedocs.org/.
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
html5lib works on CPython 2.6+, CPython 3.2+ and PyPy. To install it,
|
||||
use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ pip install html5lib
|
||||
|
||||
|
||||
Optional Dependencies
|
||||
---------------------
|
||||
|
||||
The following third-party libraries may be used for additional
|
||||
functionality:
|
||||
|
||||
- ``datrie`` can be used to improve parsing performance (though in
|
||||
almost all cases the improvement is marginal);
|
||||
|
||||
- ``lxml`` is supported as a tree format (for both building and
|
||||
walking) under CPython (but *not* PyPy where it is known to cause
|
||||
segfaults);
|
||||
|
||||
- ``genshi`` has a treewalker (but not builder); and
|
||||
|
||||
- ``charade`` can be used as a fallback when character encoding cannot
|
||||
be determined; ``chardet``, from which it was forked, can also be used
|
||||
on Python 2.
|
||||
|
||||
- ``ordereddict`` can be used under Python 2.6
|
||||
(``collections.OrderedDict`` is used instead on later versions) to
|
||||
serialize attributes in alphabetical order.
|
||||
|
||||
|
||||
Bugs
|
||||
----
|
||||
|
||||
Please report any bugs on the `issue tracker
|
||||
<https://github.com/html5lib/html5lib-python/issues>`_.
|
||||
|
||||
|
||||
Tests
|
||||
-----
|
||||
|
||||
Unit tests require the ``nose`` library and can be run using the
|
||||
``nosetests`` command in the root directory; ``ordereddict`` is
|
||||
required under Python 2.6. All should pass.
|
||||
|
||||
Test data are contained in a separate `html5lib-tests
|
||||
<https://github.com/html5lib/html5lib-tests>`_ repository and included
|
||||
as a submodule, thus for git checkouts they must be initialized::
|
||||
|
||||
$ git submodule init
|
||||
$ git submodule update
|
||||
|
||||
If you have all compatible Python implementations available on your
|
||||
system, you can run tests on all of them using the ``tox`` utility,
|
||||
which can be found on PyPI.
|
||||
|
||||
|
||||
Questions?
|
||||
----------
|
||||
|
||||
There's a mailing list available for support on Google Groups,
|
||||
`html5lib-discuss <http://groups.google.com/group/html5lib-discuss>`_,
|
||||
though you may get a quicker response asking on IRC in `#whatwg on
|
||||
irc.freenode.net <http://wiki.whatwg.org/wiki/IRC>`_.
|
||||
@@ -1,14 +1,23 @@
|
||||
"""
|
||||
HTML parsing library based on the WHATWG "HTML5"
|
||||
specification. The parser is designed to be compatible with existing
|
||||
HTML found in the wild and implements well-defined error recovery that
|
||||
HTML parsing library based on the `WHATWG HTML specification
|
||||
<https://whatwg.org/html>`_. The parser is designed to be compatible with
|
||||
existing HTML found in the wild and implements well-defined error recovery that
|
||||
is largely compatible with modern desktop web browsers.
|
||||
|
||||
Example usage:
|
||||
Example usage::
|
||||
|
||||
import html5lib
|
||||
f = open("my_document.html")
|
||||
tree = html5lib.parse(f)
|
||||
import html5lib
|
||||
with open("my_document.html", "rb") as f:
|
||||
tree = html5lib.parse(f)
|
||||
|
||||
For convenience, this module re-exports the following names:
|
||||
|
||||
* :func:`~.html5parser.parse`
|
||||
* :func:`~.html5parser.parseFragment`
|
||||
* :class:`~.html5parser.HTMLParser`
|
||||
* :func:`~.treebuilders.getTreeBuilder`
|
||||
* :func:`~.treewalkers.getTreeWalker`
|
||||
* :func:`~.serializer.serialize`
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
@@ -20,4 +29,7 @@ from .serializer import serialize
|
||||
|
||||
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
||||
"getTreeWalker", "serialize"]
|
||||
__version__ = "0.999"
|
||||
|
||||
# this has to be at the top level, see how setup.py parses this
|
||||
#: Distribution version number.
|
||||
__version__ = "1.0.1"
|
||||
|
||||
+9
-6
@@ -175,18 +175,18 @@ def escapeRegexp(string):
|
||||
return string
|
||||
|
||||
# output from the above
|
||||
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
|
||||
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
|
||||
|
||||
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
|
||||
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
|
||||
|
||||
# Simpler things
|
||||
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
|
||||
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
|
||||
|
||||
|
||||
class InfosetFilter(object):
|
||||
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
|
||||
|
||||
def __init__(self, replaceChars=None,
|
||||
def __init__(self,
|
||||
dropXmlnsLocalName=False,
|
||||
dropXmlnsAttrNs=False,
|
||||
preventDoubleDashComments=False,
|
||||
@@ -217,7 +217,7 @@ class InfosetFilter(object):
|
||||
else:
|
||||
return self.toXmlName(name)
|
||||
|
||||
def coerceElement(self, name, namespace=None):
|
||||
def coerceElement(self, name):
|
||||
return self.toXmlName(name)
|
||||
|
||||
def coerceComment(self, data):
|
||||
@@ -225,11 +225,14 @@ class InfosetFilter(object):
|
||||
while "--" in data:
|
||||
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
|
||||
data = data.replace("--", "- -")
|
||||
if data.endswith("-"):
|
||||
warnings.warn("Comments cannot end in a dash", DataLossWarning)
|
||||
data += " "
|
||||
return data
|
||||
|
||||
def coerceCharacters(self, data):
|
||||
if self.replaceFormFeedCharacters:
|
||||
for i in range(data.count("\x0C")):
|
||||
for _ in range(data.count("\x0C")):
|
||||
warnings.warn("Text cannot contain U+000C", DataLossWarning)
|
||||
data = data.replace("\x0C", " ")
|
||||
# Other non-xml characters
|
||||
+134
-97
@@ -1,13 +1,16 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from six import text_type
|
||||
from six.moves import http_client
|
||||
|
||||
from six import text_type, binary_type
|
||||
from six.moves import http_client, urllib
|
||||
|
||||
import codecs
|
||||
import re
|
||||
|
||||
import webencodings
|
||||
|
||||
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
|
||||
from .constants import encodings, ReparseException
|
||||
from . import utils
|
||||
from .constants import _ReparseException
|
||||
from . import _utils
|
||||
|
||||
from io import StringIO
|
||||
|
||||
@@ -16,19 +19,26 @@ try:
|
||||
except ImportError:
|
||||
BytesIO = StringIO
|
||||
|
||||
try:
|
||||
from io import BufferedIOBase
|
||||
except ImportError:
|
||||
class BufferedIOBase(object):
|
||||
pass
|
||||
|
||||
# Non-unicode versions of constants for use in the pre-parser
|
||||
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
|
||||
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
|
||||
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
|
||||
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
|
||||
|
||||
invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
|
||||
|
||||
invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa
|
||||
|
||||
if _utils.supports_lone_surrogates:
|
||||
# Use one extra step of indirection and create surrogates with
|
||||
# eval. Not using this indirection would introduce an illegal
|
||||
# unicode literal on platforms not supporting such lone
|
||||
# surrogates.
|
||||
assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
|
||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
|
||||
eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used
|
||||
"]")
|
||||
else:
|
||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
|
||||
|
||||
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
||||
@@ -38,7 +48,7 @@ non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
||||
0x10FFFE, 0x10FFFF])
|
||||
|
||||
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
|
||||
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
|
||||
|
||||
# Cache for charsUntil()
|
||||
charsUntilRegEx = {}
|
||||
@@ -118,10 +128,13 @@ class BufferedStream(object):
|
||||
return b"".join(rv)
|
||||
|
||||
|
||||
def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
|
||||
if isinstance(source, http_client.HTTPResponse):
|
||||
# Work around Python bug #20007: read(0) closes the connection.
|
||||
# http://bugs.python.org/issue20007
|
||||
def HTMLInputStream(source, **kwargs):
|
||||
# Work around Python bug #20007: read(0) closes the connection.
|
||||
# http://bugs.python.org/issue20007
|
||||
if (isinstance(source, http_client.HTTPResponse) or
|
||||
# Also check for addinfourl wrapping HTTPResponse
|
||||
(isinstance(source, urllib.response.addbase) and
|
||||
isinstance(source.fp, http_client.HTTPResponse))):
|
||||
isUnicode = False
|
||||
elif hasattr(source, "read"):
|
||||
isUnicode = isinstance(source.read(0), text_type)
|
||||
@@ -129,12 +142,13 @@ def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
|
||||
isUnicode = isinstance(source, text_type)
|
||||
|
||||
if isUnicode:
|
||||
if encoding is not None:
|
||||
raise TypeError("Cannot explicitly set an encoding with a unicode string")
|
||||
encodings = [x for x in kwargs if x.endswith("_encoding")]
|
||||
if encodings:
|
||||
raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
|
||||
|
||||
return HTMLUnicodeInputStream(source)
|
||||
return HTMLUnicodeInputStream(source, **kwargs)
|
||||
else:
|
||||
return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
|
||||
return HTMLBinaryInputStream(source, **kwargs)
|
||||
|
||||
|
||||
class HTMLUnicodeInputStream(object):
|
||||
@@ -160,22 +174,21 @@ class HTMLUnicodeInputStream(object):
|
||||
regardless of any BOM or later declaration (such as in a meta
|
||||
element)
|
||||
|
||||
parseMeta - Look for a <meta> element containing encoding information
|
||||
|
||||
"""
|
||||
|
||||
# Craziness
|
||||
if len("\U0010FFFF") == 1:
|
||||
if not _utils.supports_lone_surrogates:
|
||||
# Such platforms will have already checked for such
|
||||
# surrogate errors, so no need to do this checking.
|
||||
self.reportCharacterErrors = None
|
||||
elif len("\U0010FFFF") == 1:
|
||||
self.reportCharacterErrors = self.characterErrorsUCS4
|
||||
self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
|
||||
else:
|
||||
self.reportCharacterErrors = self.characterErrorsUCS2
|
||||
self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")
|
||||
|
||||
# List of where new lines occur
|
||||
self.newLines = [0]
|
||||
|
||||
self.charEncoding = ("utf-8", "certain")
|
||||
self.charEncoding = (lookupEncoding("utf-8"), "certain")
|
||||
self.dataStream = self.openStream(source)
|
||||
|
||||
self.reset()
|
||||
@@ -265,12 +278,10 @@ class HTMLUnicodeInputStream(object):
|
||||
self._bufferedCharacter = data[-1]
|
||||
data = data[:-1]
|
||||
|
||||
self.reportCharacterErrors(data)
|
||||
if self.reportCharacterErrors:
|
||||
self.reportCharacterErrors(data)
|
||||
|
||||
# Replace invalid characters
|
||||
# Note U+0000 is dealt with in the tokenizer
|
||||
data = self.replaceCharactersRegexp.sub("\ufffd", data)
|
||||
|
||||
data = data.replace("\r\n", "\n")
|
||||
data = data.replace("\r", "\n")
|
||||
|
||||
@@ -280,7 +291,7 @@ class HTMLUnicodeInputStream(object):
|
||||
return True
|
||||
|
||||
def characterErrorsUCS4(self, data):
|
||||
for i in range(len(invalid_unicode_re.findall(data))):
|
||||
for _ in range(len(invalid_unicode_re.findall(data))):
|
||||
self.errors.append("invalid-codepoint")
|
||||
|
||||
def characterErrorsUCS2(self, data):
|
||||
@@ -293,9 +304,9 @@ class HTMLUnicodeInputStream(object):
|
||||
codepoint = ord(match.group())
|
||||
pos = match.start()
|
||||
# Pretty sure there should be endianness issues here
|
||||
if utils.isSurrogatePair(data[pos:pos + 2]):
|
||||
if _utils.isSurrogatePair(data[pos:pos + 2]):
|
||||
# We have a surrogate pair!
|
||||
char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
|
||||
char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
|
||||
if char_val in non_bmp_invalid_codepoints:
|
||||
self.errors.append("invalid-codepoint")
|
||||
skip = True
|
||||
@@ -378,7 +389,9 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
|
||||
def __init__(self, source, override_encoding=None, transport_encoding=None,
|
||||
same_origin_parent_encoding=None, likely_encoding=None,
|
||||
default_encoding="windows-1252", useChardet=True):
|
||||
"""Initialises the HTMLInputStream.
|
||||
|
||||
HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
||||
@@ -391,8 +404,6 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
regardless of any BOM or later declaration (such as in a meta
|
||||
element)
|
||||
|
||||
parseMeta - Look for a <meta> element containing encoding information
|
||||
|
||||
"""
|
||||
# Raw Stream - for unicode objects this will encode to utf-8 and set
|
||||
# self.charEncoding as appropriate
|
||||
@@ -400,27 +411,28 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
|
||||
HTMLUnicodeInputStream.__init__(self, self.rawStream)
|
||||
|
||||
self.charEncoding = (codecName(encoding), "certain")
|
||||
|
||||
# Encoding Information
|
||||
# Number of bytes to use when looking for a meta element with
|
||||
# encoding information
|
||||
self.numBytesMeta = 512
|
||||
self.numBytesMeta = 1024
|
||||
# Number of bytes to use when using detecting encoding using chardet
|
||||
self.numBytesChardet = 100
|
||||
# Encoding to use if no other information can be found
|
||||
self.defaultEncoding = "windows-1252"
|
||||
# Things from args
|
||||
self.override_encoding = override_encoding
|
||||
self.transport_encoding = transport_encoding
|
||||
self.same_origin_parent_encoding = same_origin_parent_encoding
|
||||
self.likely_encoding = likely_encoding
|
||||
self.default_encoding = default_encoding
|
||||
|
||||
# Detect encoding iff no explicit "transport level" encoding is supplied
|
||||
if (self.charEncoding[0] is None):
|
||||
self.charEncoding = self.detectEncoding(parseMeta, chardet)
|
||||
# Determine encoding
|
||||
self.charEncoding = self.determineEncoding(useChardet)
|
||||
assert self.charEncoding[0] is not None
|
||||
|
||||
# Call superclass
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
|
||||
'replace')
|
||||
self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
|
||||
HTMLUnicodeInputStream.reset(self)
|
||||
|
||||
def openStream(self, source):
|
||||
@@ -437,29 +449,50 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
|
||||
try:
|
||||
stream.seek(stream.tell())
|
||||
except:
|
||||
except: # pylint:disable=bare-except
|
||||
stream = BufferedStream(stream)
|
||||
|
||||
return stream
|
||||
|
||||
def detectEncoding(self, parseMeta=True, chardet=True):
|
||||
# First look for a BOM
|
||||
def determineEncoding(self, chardet=True):
|
||||
# BOMs take precedence over everything
|
||||
# This will also read past the BOM if present
|
||||
encoding = self.detectBOM()
|
||||
confidence = "certain"
|
||||
# If there is no BOM need to look for meta elements with encoding
|
||||
# information
|
||||
if encoding is None and parseMeta:
|
||||
encoding = self.detectEncodingMeta()
|
||||
confidence = "tentative"
|
||||
# Guess with chardet, if avaliable
|
||||
if encoding is None and chardet:
|
||||
confidence = "tentative"
|
||||
charEncoding = self.detectBOM(), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# If we've been overriden, we've been overriden
|
||||
charEncoding = lookupEncoding(self.override_encoding), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Now check the transport layer
|
||||
charEncoding = lookupEncoding(self.transport_encoding), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Look for meta elements with encoding information
|
||||
charEncoding = self.detectEncodingMeta(), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Parent document encoding
|
||||
charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
|
||||
if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
|
||||
return charEncoding
|
||||
|
||||
# "likely" encoding
|
||||
charEncoding = lookupEncoding(self.likely_encoding), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Guess with chardet, if available
|
||||
if chardet:
|
||||
try:
|
||||
try:
|
||||
from charade.universaldetector import UniversalDetector
|
||||
except ImportError:
|
||||
from chardet.universaldetector import UniversalDetector
|
||||
from chardet.universaldetector import UniversalDetector
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
buffers = []
|
||||
detector = UniversalDetector()
|
||||
while not detector.done:
|
||||
@@ -470,37 +503,34 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
buffers.append(buffer)
|
||||
detector.feed(buffer)
|
||||
detector.close()
|
||||
encoding = detector.result['encoding']
|
||||
encoding = lookupEncoding(detector.result['encoding'])
|
||||
self.rawStream.seek(0)
|
||||
except ImportError:
|
||||
pass
|
||||
# If all else fails use the default encoding
|
||||
if encoding is None:
|
||||
confidence = "tentative"
|
||||
encoding = self.defaultEncoding
|
||||
if encoding is not None:
|
||||
return encoding, "tentative"
|
||||
|
||||
# Substitute for equivalent encodings:
|
||||
encodingSub = {"iso-8859-1": "windows-1252"}
|
||||
# Try the default encoding
|
||||
charEncoding = lookupEncoding(self.default_encoding), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
if encoding.lower() in encodingSub:
|
||||
encoding = encodingSub[encoding.lower()]
|
||||
|
||||
return encoding, confidence
|
||||
# Fallback to html5lib's default if even that hasn't worked
|
||||
return lookupEncoding("windows-1252"), "tentative"
|
||||
|
||||
def changeEncoding(self, newEncoding):
|
||||
assert self.charEncoding[1] != "certain"
|
||||
newEncoding = codecName(newEncoding)
|
||||
if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
|
||||
newEncoding = "utf-8"
|
||||
newEncoding = lookupEncoding(newEncoding)
|
||||
if newEncoding is None:
|
||||
return
|
||||
if newEncoding.name in ("utf-16be", "utf-16le"):
|
||||
newEncoding = lookupEncoding("utf-8")
|
||||
assert newEncoding is not None
|
||||
elif newEncoding == self.charEncoding[0]:
|
||||
self.charEncoding = (self.charEncoding[0], "certain")
|
||||
else:
|
||||
self.rawStream.seek(0)
|
||||
self.reset()
|
||||
self.charEncoding = (newEncoding, "certain")
|
||||
raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
|
||||
self.reset()
|
||||
raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
|
||||
|
||||
def detectBOM(self):
|
||||
"""Attempts to detect at BOM at the start of the stream. If
|
||||
@@ -508,8 +538,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
encoding otherwise return None"""
|
||||
bomDict = {
|
||||
codecs.BOM_UTF8: 'utf-8',
|
||||
codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
|
||||
codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
|
||||
codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
|
||||
codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
|
||||
}
|
||||
|
||||
# Go to beginning of file and read in 4 bytes
|
||||
@@ -529,9 +559,12 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
|
||||
# Set the read position past the BOM if one was found, otherwise
|
||||
# set it to the start of the stream
|
||||
self.rawStream.seek(encoding and seek or 0)
|
||||
|
||||
return encoding
|
||||
if encoding:
|
||||
self.rawStream.seek(seek)
|
||||
return lookupEncoding(encoding)
|
||||
else:
|
||||
self.rawStream.seek(0)
|
||||
return None
|
||||
|
||||
def detectEncodingMeta(self):
|
||||
"""Report the encoding declared by the meta element
|
||||
@@ -542,8 +575,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
self.rawStream.seek(0)
|
||||
encoding = parser.getEncoding()
|
||||
|
||||
if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
|
||||
encoding = "utf-8"
|
||||
if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
|
||||
encoding = lookupEncoding("utf-8")
|
||||
|
||||
return encoding
|
||||
|
||||
@@ -557,6 +590,7 @@ class EncodingBytes(bytes):
|
||||
return bytes.__new__(self, value.lower())
|
||||
|
||||
def __init__(self, value):
|
||||
# pylint:disable=unused-argument
|
||||
self._position = -1
|
||||
|
||||
def __iter__(self):
|
||||
@@ -667,7 +701,7 @@ class EncodingParser(object):
|
||||
(b"<!", self.handleOther),
|
||||
(b"<?", self.handleOther),
|
||||
(b"<", self.handlePossibleStartTag))
|
||||
for byte in self.data:
|
||||
for _ in self.data:
|
||||
keepParsing = True
|
||||
for key, method in methodDispatch:
|
||||
if self.data.matchBytes(key):
|
||||
@@ -706,7 +740,7 @@ class EncodingParser(object):
|
||||
return False
|
||||
elif attr[0] == b"charset":
|
||||
tentativeEncoding = attr[1]
|
||||
codec = codecName(tentativeEncoding)
|
||||
codec = lookupEncoding(tentativeEncoding)
|
||||
if codec is not None:
|
||||
self.encoding = codec
|
||||
return False
|
||||
@@ -714,7 +748,7 @@ class EncodingParser(object):
|
||||
contentParser = ContentAttrParser(EncodingBytes(attr[1]))
|
||||
tentativeEncoding = contentParser.parse()
|
||||
if tentativeEncoding is not None:
|
||||
codec = codecName(tentativeEncoding)
|
||||
codec = lookupEncoding(tentativeEncoding)
|
||||
if codec is not None:
|
||||
if hasPragma:
|
||||
self.encoding = codec
|
||||
@@ -871,16 +905,19 @@ class ContentAttrParser(object):
|
||||
return None
|
||||
|
||||
|
||||
def codecName(encoding):
|
||||
def lookupEncoding(encoding):
|
||||
"""Return the python codec name corresponding to an encoding or None if the
|
||||
string doesn't correspond to a valid encoding."""
|
||||
if isinstance(encoding, bytes):
|
||||
if isinstance(encoding, binary_type):
|
||||
try:
|
||||
encoding = encoding.decode("ascii")
|
||||
except UnicodeDecodeError:
|
||||
return None
|
||||
if encoding:
|
||||
canonicalName = ascii_punctuation_re.sub("", encoding).lower()
|
||||
return encodings.get(canonicalName, None)
|
||||
|
||||
if encoding is not None:
|
||||
try:
|
||||
return webencodings.lookup(encoding)
|
||||
except AttributeError:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
+13
-23
@@ -1,9 +1,6 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
try:
|
||||
chr = unichr # flake8: noqa
|
||||
except NameError:
|
||||
pass
|
||||
from six import unichr as chr
|
||||
|
||||
from collections import deque
|
||||
|
||||
@@ -14,9 +11,9 @@ from .constants import digits, hexDigits, EOF
|
||||
from .constants import tokenTypes, tagTokenTypes
|
||||
from .constants import replacementCharacters
|
||||
|
||||
from .inputstream import HTMLInputStream
|
||||
from ._inputstream import HTMLInputStream
|
||||
|
||||
from .trie import Trie
|
||||
from ._trie import Trie
|
||||
|
||||
entitiesTrie = Trie(entities)
|
||||
|
||||
@@ -34,16 +31,11 @@ class HTMLTokenizer(object):
|
||||
Points to HTMLInputStream object.
|
||||
"""
|
||||
|
||||
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
|
||||
lowercaseElementName=True, lowercaseAttrName=True, parser=None):
|
||||
def __init__(self, stream, parser=None, **kwargs):
|
||||
|
||||
self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
|
||||
self.stream = HTMLInputStream(stream, **kwargs)
|
||||
self.parser = parser
|
||||
|
||||
# Perform case conversions?
|
||||
self.lowercaseElementName = lowercaseElementName
|
||||
self.lowercaseAttrName = lowercaseAttrName
|
||||
|
||||
# Setup the initial tokenizer state
|
||||
self.escapeFlag = False
|
||||
self.lastFourChars = []
|
||||
@@ -147,8 +139,8 @@ class HTMLTokenizer(object):
|
||||
output = "&"
|
||||
|
||||
charStack = [self.stream.char()]
|
||||
if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&")
|
||||
or (allowedChar is not None and allowedChar == charStack[0])):
|
||||
if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or
|
||||
(allowedChar is not None and allowedChar == charStack[0])):
|
||||
self.stream.unget(charStack[0])
|
||||
|
||||
elif charStack[0] == "#":
|
||||
@@ -235,8 +227,7 @@ class HTMLTokenizer(object):
|
||||
token = self.currentToken
|
||||
# Add token to the queue to be yielded
|
||||
if (token["type"] in tagTokenTypes):
|
||||
if self.lowercaseElementName:
|
||||
token["name"] = token["name"].translate(asciiUpper2Lower)
|
||||
token["name"] = token["name"].translate(asciiUpper2Lower)
|
||||
if token["type"] == tokenTypes["EndTag"]:
|
||||
if token["data"]:
|
||||
self.tokenQueue.append({"type": tokenTypes["ParseError"],
|
||||
@@ -921,10 +912,9 @@ class HTMLTokenizer(object):
|
||||
# Attributes are not dropped at this stage. That happens when the
|
||||
# start tag token is emitted so values can still be safely appended
|
||||
# to attributes, but we do want to report the parse error in time.
|
||||
if self.lowercaseAttrName:
|
||||
self.currentToken["data"][-1][0] = (
|
||||
self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
|
||||
for name, value in self.currentToken["data"][:-1]:
|
||||
self.currentToken["data"][-1][0] = (
|
||||
self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
|
||||
for name, _ in self.currentToken["data"][:-1]:
|
||||
if self.currentToken["data"][-1][0] == name:
|
||||
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
|
||||
"duplicate-attribute"})
|
||||
@@ -1716,11 +1706,11 @@ class HTMLTokenizer(object):
|
||||
else:
|
||||
data.append(char)
|
||||
|
||||
data = "".join(data)
|
||||
data = "".join(data) # pylint:disable=redefined-variable-type
|
||||
# Deal with null here rather than in the parser
|
||||
nullCount = data.count("\u0000")
|
||||
if nullCount > 0:
|
||||
for i in range(nullCount):
|
||||
for _ in range(nullCount):
|
||||
self.tokenQueue.append({"type": tokenTypes["ParseError"],
|
||||
"data": "invalid-codepoint"})
|
||||
data = data.replace("\u0000", "\uFFFD")
|
||||
+2
@@ -4,9 +4,11 @@ from .py import Trie as PyTrie
|
||||
|
||||
Trie = PyTrie
|
||||
|
||||
# pylint:disable=wrong-import-position
|
||||
try:
|
||||
from .datrie import Trie as DATrie
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
Trie = DATrie
|
||||
# pylint:enable=wrong-import-position
|
||||
+3
-3
@@ -7,13 +7,13 @@ class Trie(Mapping):
|
||||
"""Abstract base class for tries"""
|
||||
|
||||
def keys(self, prefix=None):
|
||||
keys = super().keys()
|
||||
# pylint:disable=arguments-differ
|
||||
keys = super(Trie, self).keys()
|
||||
|
||||
if prefix is None:
|
||||
return set(keys)
|
||||
|
||||
# Python 2.6: no set comprehensions
|
||||
return set([x for x in keys if x.startswith(prefix)])
|
||||
return {x for x in keys if x.startswith(prefix)}
|
||||
|
||||
def has_keys_with_prefix(self, prefix):
|
||||
for key in self.keys():
|
||||
+49
-7
@@ -2,6 +2,8 @@ from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from types import ModuleType
|
||||
|
||||
from six import text_type
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as default_etree
|
||||
except ImportError:
|
||||
@@ -9,7 +11,26 @@ except ImportError:
|
||||
|
||||
|
||||
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
|
||||
"surrogatePairToCodepoint", "moduleFactoryFactory"]
|
||||
"surrogatePairToCodepoint", "moduleFactoryFactory",
|
||||
"supports_lone_surrogates"]
|
||||
|
||||
|
||||
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
|
||||
# caught by the below test. In general this would be any platform
|
||||
# using UTF-16 as its encoding of unicode strings, such as
|
||||
# Jython. This is because UTF-16 itself is based on the use of such
|
||||
# surrogates, and there is no mechanism to further escape such
|
||||
# escapes.
|
||||
try:
|
||||
_x = eval('"\\uD800"') # pylint:disable=eval-used
|
||||
if not isinstance(_x, text_type):
|
||||
# We need this with u"" because of http://bugs.jython.org/issue2039
|
||||
_x = eval('u"\\uD800"') # pylint:disable=eval-used
|
||||
assert isinstance(_x, text_type)
|
||||
except: # pylint:disable=bare-except
|
||||
supports_lone_surrogates = False
|
||||
else:
|
||||
supports_lone_surrogates = True
|
||||
|
||||
|
||||
class MethodDispatcher(dict):
|
||||
@@ -31,19 +52,20 @@ class MethodDispatcher(dict):
|
||||
# anything here.
|
||||
_dictEntries = []
|
||||
for name, value in items:
|
||||
if type(name) in (list, tuple, frozenset, set):
|
||||
if isinstance(name, (list, tuple, frozenset, set)):
|
||||
for item in name:
|
||||
_dictEntries.append((item, value))
|
||||
else:
|
||||
_dictEntries.append((name, value))
|
||||
dict.__init__(self, _dictEntries)
|
||||
assert len(self) == len(_dictEntries)
|
||||
self.default = None
|
||||
|
||||
def __getitem__(self, key):
|
||||
return dict.get(self, key, self.default)
|
||||
|
||||
|
||||
# Some utility functions to dal with weirdness around UCS2 vs UCS4
|
||||
# Some utility functions to deal with weirdness around UCS2 vs UCS4
|
||||
# python builds
|
||||
|
||||
def isSurrogatePair(data):
|
||||
@@ -70,13 +92,33 @@ def moduleFactoryFactory(factory):
|
||||
else:
|
||||
name = b"_%s_factory" % baseModule.__name__
|
||||
|
||||
if name in moduleCache:
|
||||
return moduleCache[name]
|
||||
else:
|
||||
kwargs_tuple = tuple(kwargs.items())
|
||||
|
||||
try:
|
||||
return moduleCache[name][args][kwargs_tuple]
|
||||
except KeyError:
|
||||
mod = ModuleType(name)
|
||||
objs = factory(baseModule, *args, **kwargs)
|
||||
mod.__dict__.update(objs)
|
||||
moduleCache[name] = mod
|
||||
if "name" not in moduleCache:
|
||||
moduleCache[name] = {}
|
||||
if "args" not in moduleCache[name]:
|
||||
moduleCache[name][args] = {}
|
||||
if "kwargs" not in moduleCache[name][args]:
|
||||
moduleCache[name][args][kwargs_tuple] = {}
|
||||
moduleCache[name][args][kwargs_tuple] = mod
|
||||
return mod
|
||||
|
||||
return moduleFactory
|
||||
|
||||
|
||||
def memoize(func):
|
||||
cache = {}
|
||||
|
||||
def wrapped(*args, **kwargs):
|
||||
key = (tuple(args), tuple(kwargs.items()))
|
||||
if key not in cache:
|
||||
cache[key] = func(*args, **kwargs)
|
||||
return cache[key]
|
||||
|
||||
return wrapped
|
||||
@@ -1,292 +1,296 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import string
|
||||
import gettext
|
||||
_ = gettext.gettext
|
||||
|
||||
EOF = None
|
||||
|
||||
E = {
|
||||
"null-character":
|
||||
_("Null character in input stream, replaced with U+FFFD."),
|
||||
"Null character in input stream, replaced with U+FFFD.",
|
||||
"invalid-codepoint":
|
||||
_("Invalid codepoint in stream."),
|
||||
"Invalid codepoint in stream.",
|
||||
"incorrectly-placed-solidus":
|
||||
_("Solidus (/) incorrectly placed in tag."),
|
||||
"Solidus (/) incorrectly placed in tag.",
|
||||
"incorrect-cr-newline-entity":
|
||||
_("Incorrect CR newline entity, replaced with LF."),
|
||||
"Incorrect CR newline entity, replaced with LF.",
|
||||
"illegal-windows-1252-entity":
|
||||
_("Entity used with illegal number (windows-1252 reference)."),
|
||||
"Entity used with illegal number (windows-1252 reference).",
|
||||
"cant-convert-numeric-entity":
|
||||
_("Numeric entity couldn't be converted to character "
|
||||
"(codepoint U+%(charAsInt)08x)."),
|
||||
"Numeric entity couldn't be converted to character "
|
||||
"(codepoint U+%(charAsInt)08x).",
|
||||
"illegal-codepoint-for-numeric-entity":
|
||||
_("Numeric entity represents an illegal codepoint: "
|
||||
"U+%(charAsInt)08x."),
|
||||
"Numeric entity represents an illegal codepoint: "
|
||||
"U+%(charAsInt)08x.",
|
||||
"numeric-entity-without-semicolon":
|
||||
_("Numeric entity didn't end with ';'."),
|
||||
"Numeric entity didn't end with ';'.",
|
||||
"expected-numeric-entity-but-got-eof":
|
||||
_("Numeric entity expected. Got end of file instead."),
|
||||
"Numeric entity expected. Got end of file instead.",
|
||||
"expected-numeric-entity":
|
||||
_("Numeric entity expected but none found."),
|
||||
"Numeric entity expected but none found.",
|
||||
"named-entity-without-semicolon":
|
||||
_("Named entity didn't end with ';'."),
|
||||
"Named entity didn't end with ';'.",
|
||||
"expected-named-entity":
|
||||
_("Named entity expected. Got none."),
|
||||
"Named entity expected. Got none.",
|
||||
"attributes-in-end-tag":
|
||||
_("End tag contains unexpected attributes."),
|
||||
"End tag contains unexpected attributes.",
|
||||
'self-closing-flag-on-end-tag':
|
||||
_("End tag contains unexpected self-closing flag."),
|
||||
"End tag contains unexpected self-closing flag.",
|
||||
"expected-tag-name-but-got-right-bracket":
|
||||
_("Expected tag name. Got '>' instead."),
|
||||
"Expected tag name. Got '>' instead.",
|
||||
"expected-tag-name-but-got-question-mark":
|
||||
_("Expected tag name. Got '?' instead. (HTML doesn't "
|
||||
"support processing instructions.)"),
|
||||
"Expected tag name. Got '?' instead. (HTML doesn't "
|
||||
"support processing instructions.)",
|
||||
"expected-tag-name":
|
||||
_("Expected tag name. Got something else instead"),
|
||||
"Expected tag name. Got something else instead",
|
||||
"expected-closing-tag-but-got-right-bracket":
|
||||
_("Expected closing tag. Got '>' instead. Ignoring '</>'."),
|
||||
"Expected closing tag. Got '>' instead. Ignoring '</>'.",
|
||||
"expected-closing-tag-but-got-eof":
|
||||
_("Expected closing tag. Unexpected end of file."),
|
||||
"Expected closing tag. Unexpected end of file.",
|
||||
"expected-closing-tag-but-got-char":
|
||||
_("Expected closing tag. Unexpected character '%(data)s' found."),
|
||||
"Expected closing tag. Unexpected character '%(data)s' found.",
|
||||
"eof-in-tag-name":
|
||||
_("Unexpected end of file in the tag name."),
|
||||
"Unexpected end of file in the tag name.",
|
||||
"expected-attribute-name-but-got-eof":
|
||||
_("Unexpected end of file. Expected attribute name instead."),
|
||||
"Unexpected end of file. Expected attribute name instead.",
|
||||
"eof-in-attribute-name":
|
||||
_("Unexpected end of file in attribute name."),
|
||||
"Unexpected end of file in attribute name.",
|
||||
"invalid-character-in-attribute-name":
|
||||
_("Invalid character in attribute name"),
|
||||
"Invalid character in attribute name",
|
||||
"duplicate-attribute":
|
||||
_("Dropped duplicate attribute on tag."),
|
||||
"Dropped duplicate attribute on tag.",
|
||||
"expected-end-of-tag-name-but-got-eof":
|
||||
_("Unexpected end of file. Expected = or end of tag."),
|
||||
"Unexpected end of file. Expected = or end of tag.",
|
||||
"expected-attribute-value-but-got-eof":
|
||||
_("Unexpected end of file. Expected attribute value."),
|
||||
"Unexpected end of file. Expected attribute value.",
|
||||
"expected-attribute-value-but-got-right-bracket":
|
||||
_("Expected attribute value. Got '>' instead."),
|
||||
"Expected attribute value. Got '>' instead.",
|
||||
'equals-in-unquoted-attribute-value':
|
||||
_("Unexpected = in unquoted attribute"),
|
||||
"Unexpected = in unquoted attribute",
|
||||
'unexpected-character-in-unquoted-attribute-value':
|
||||
_("Unexpected character in unquoted attribute"),
|
||||
"Unexpected character in unquoted attribute",
|
||||
"invalid-character-after-attribute-name":
|
||||
_("Unexpected character after attribute name."),
|
||||
"Unexpected character after attribute name.",
|
||||
"unexpected-character-after-attribute-value":
|
||||
_("Unexpected character after attribute value."),
|
||||
"Unexpected character after attribute value.",
|
||||
"eof-in-attribute-value-double-quote":
|
||||
_("Unexpected end of file in attribute value (\")."),
|
||||
"Unexpected end of file in attribute value (\").",
|
||||
"eof-in-attribute-value-single-quote":
|
||||
_("Unexpected end of file in attribute value (')."),
|
||||
"Unexpected end of file in attribute value (').",
|
||||
"eof-in-attribute-value-no-quotes":
|
||||
_("Unexpected end of file in attribute value."),
|
||||
"Unexpected end of file in attribute value.",
|
||||
"unexpected-EOF-after-solidus-in-tag":
|
||||
_("Unexpected end of file in tag. Expected >"),
|
||||
"Unexpected end of file in tag. Expected >",
|
||||
"unexpected-character-after-solidus-in-tag":
|
||||
_("Unexpected character after / in tag. Expected >"),
|
||||
"Unexpected character after / in tag. Expected >",
|
||||
"expected-dashes-or-doctype":
|
||||
_("Expected '--' or 'DOCTYPE'. Not found."),
|
||||
"Expected '--' or 'DOCTYPE'. Not found.",
|
||||
"unexpected-bang-after-double-dash-in-comment":
|
||||
_("Unexpected ! after -- in comment"),
|
||||
"Unexpected ! after -- in comment",
|
||||
"unexpected-space-after-double-dash-in-comment":
|
||||
_("Unexpected space after -- in comment"),
|
||||
"Unexpected space after -- in comment",
|
||||
"incorrect-comment":
|
||||
_("Incorrect comment."),
|
||||
"Incorrect comment.",
|
||||
"eof-in-comment":
|
||||
_("Unexpected end of file in comment."),
|
||||
"Unexpected end of file in comment.",
|
||||
"eof-in-comment-end-dash":
|
||||
_("Unexpected end of file in comment (-)"),
|
||||
"Unexpected end of file in comment (-)",
|
||||
"unexpected-dash-after-double-dash-in-comment":
|
||||
_("Unexpected '-' after '--' found in comment."),
|
||||
"Unexpected '-' after '--' found in comment.",
|
||||
"eof-in-comment-double-dash":
|
||||
_("Unexpected end of file in comment (--)."),
|
||||
"Unexpected end of file in comment (--).",
|
||||
"eof-in-comment-end-space-state":
|
||||
_("Unexpected end of file in comment."),
|
||||
"Unexpected end of file in comment.",
|
||||
"eof-in-comment-end-bang-state":
|
||||
_("Unexpected end of file in comment."),
|
||||
"Unexpected end of file in comment.",
|
||||
"unexpected-char-in-comment":
|
||||
_("Unexpected character in comment found."),
|
||||
"Unexpected character in comment found.",
|
||||
"need-space-after-doctype":
|
||||
_("No space after literal string 'DOCTYPE'."),
|
||||
"No space after literal string 'DOCTYPE'.",
|
||||
"expected-doctype-name-but-got-right-bracket":
|
||||
_("Unexpected > character. Expected DOCTYPE name."),
|
||||
"Unexpected > character. Expected DOCTYPE name.",
|
||||
"expected-doctype-name-but-got-eof":
|
||||
_("Unexpected end of file. Expected DOCTYPE name."),
|
||||
"Unexpected end of file. Expected DOCTYPE name.",
|
||||
"eof-in-doctype-name":
|
||||
_("Unexpected end of file in DOCTYPE name."),
|
||||
"Unexpected end of file in DOCTYPE name.",
|
||||
"eof-in-doctype":
|
||||
_("Unexpected end of file in DOCTYPE."),
|
||||
"Unexpected end of file in DOCTYPE.",
|
||||
"expected-space-or-right-bracket-in-doctype":
|
||||
_("Expected space or '>'. Got '%(data)s'"),
|
||||
"Expected space or '>'. Got '%(data)s'",
|
||||
"unexpected-end-of-doctype":
|
||||
_("Unexpected end of DOCTYPE."),
|
||||
"Unexpected end of DOCTYPE.",
|
||||
"unexpected-char-in-doctype":
|
||||
_("Unexpected character in DOCTYPE."),
|
||||
"Unexpected character in DOCTYPE.",
|
||||
"eof-in-innerhtml":
|
||||
_("XXX innerHTML EOF"),
|
||||
"XXX innerHTML EOF",
|
||||
"unexpected-doctype":
|
||||
_("Unexpected DOCTYPE. Ignored."),
|
||||
"Unexpected DOCTYPE. Ignored.",
|
||||
"non-html-root":
|
||||
_("html needs to be the first start tag."),
|
||||
"html needs to be the first start tag.",
|
||||
"expected-doctype-but-got-eof":
|
||||
_("Unexpected End of file. Expected DOCTYPE."),
|
||||
"Unexpected End of file. Expected DOCTYPE.",
|
||||
"unknown-doctype":
|
||||
_("Erroneous DOCTYPE."),
|
||||
"Erroneous DOCTYPE.",
|
||||
"expected-doctype-but-got-chars":
|
||||
_("Unexpected non-space characters. Expected DOCTYPE."),
|
||||
"Unexpected non-space characters. Expected DOCTYPE.",
|
||||
"expected-doctype-but-got-start-tag":
|
||||
_("Unexpected start tag (%(name)s). Expected DOCTYPE."),
|
||||
"Unexpected start tag (%(name)s). Expected DOCTYPE.",
|
||||
"expected-doctype-but-got-end-tag":
|
||||
_("Unexpected end tag (%(name)s). Expected DOCTYPE."),
|
||||
"Unexpected end tag (%(name)s). Expected DOCTYPE.",
|
||||
"end-tag-after-implied-root":
|
||||
_("Unexpected end tag (%(name)s) after the (implied) root element."),
|
||||
"Unexpected end tag (%(name)s) after the (implied) root element.",
|
||||
"expected-named-closing-tag-but-got-eof":
|
||||
_("Unexpected end of file. Expected end tag (%(name)s)."),
|
||||
"Unexpected end of file. Expected end tag (%(name)s).",
|
||||
"two-heads-are-not-better-than-one":
|
||||
_("Unexpected start tag head in existing head. Ignored."),
|
||||
"Unexpected start tag head in existing head. Ignored.",
|
||||
"unexpected-end-tag":
|
||||
_("Unexpected end tag (%(name)s). Ignored."),
|
||||
"Unexpected end tag (%(name)s). Ignored.",
|
||||
"unexpected-start-tag-out-of-my-head":
|
||||
_("Unexpected start tag (%(name)s) that can be in head. Moved."),
|
||||
"Unexpected start tag (%(name)s) that can be in head. Moved.",
|
||||
"unexpected-start-tag":
|
||||
_("Unexpected start tag (%(name)s)."),
|
||||
"Unexpected start tag (%(name)s).",
|
||||
"missing-end-tag":
|
||||
_("Missing end tag (%(name)s)."),
|
||||
"Missing end tag (%(name)s).",
|
||||
"missing-end-tags":
|
||||
_("Missing end tags (%(name)s)."),
|
||||
"Missing end tags (%(name)s).",
|
||||
"unexpected-start-tag-implies-end-tag":
|
||||
_("Unexpected start tag (%(startName)s) "
|
||||
"implies end tag (%(endName)s)."),
|
||||
"Unexpected start tag (%(startName)s) "
|
||||
"implies end tag (%(endName)s).",
|
||||
"unexpected-start-tag-treated-as":
|
||||
_("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
|
||||
"Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
|
||||
"deprecated-tag":
|
||||
_("Unexpected start tag %(name)s. Don't use it!"),
|
||||
"Unexpected start tag %(name)s. Don't use it!",
|
||||
"unexpected-start-tag-ignored":
|
||||
_("Unexpected start tag %(name)s. Ignored."),
|
||||
"Unexpected start tag %(name)s. Ignored.",
|
||||
"expected-one-end-tag-but-got-another":
|
||||
_("Unexpected end tag (%(gotName)s). "
|
||||
"Missing end tag (%(expectedName)s)."),
|
||||
"Unexpected end tag (%(gotName)s). "
|
||||
"Missing end tag (%(expectedName)s).",
|
||||
"end-tag-too-early":
|
||||
_("End tag (%(name)s) seen too early. Expected other end tag."),
|
||||
"End tag (%(name)s) seen too early. Expected other end tag.",
|
||||
"end-tag-too-early-named":
|
||||
_("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
|
||||
"Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
|
||||
"end-tag-too-early-ignored":
|
||||
_("End tag (%(name)s) seen too early. Ignored."),
|
||||
"End tag (%(name)s) seen too early. Ignored.",
|
||||
"adoption-agency-1.1":
|
||||
_("End tag (%(name)s) violates step 1, "
|
||||
"paragraph 1 of the adoption agency algorithm."),
|
||||
"End tag (%(name)s) violates step 1, "
|
||||
"paragraph 1 of the adoption agency algorithm.",
|
||||
"adoption-agency-1.2":
|
||||
_("End tag (%(name)s) violates step 1, "
|
||||
"paragraph 2 of the adoption agency algorithm."),
|
||||
"End tag (%(name)s) violates step 1, "
|
||||
"paragraph 2 of the adoption agency algorithm.",
|
||||
"adoption-agency-1.3":
|
||||
_("End tag (%(name)s) violates step 1, "
|
||||
"paragraph 3 of the adoption agency algorithm."),
|
||||
"End tag (%(name)s) violates step 1, "
|
||||
"paragraph 3 of the adoption agency algorithm.",
|
||||
"adoption-agency-4.4":
|
||||
_("End tag (%(name)s) violates step 4, "
|
||||
"paragraph 4 of the adoption agency algorithm."),
|
||||
"End tag (%(name)s) violates step 4, "
|
||||
"paragraph 4 of the adoption agency algorithm.",
|
||||
"unexpected-end-tag-treated-as":
|
||||
_("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
|
||||
"Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
|
||||
"no-end-tag":
|
||||
_("This element (%(name)s) has no end tag."),
|
||||
"This element (%(name)s) has no end tag.",
|
||||
"unexpected-implied-end-tag-in-table":
|
||||
_("Unexpected implied end tag (%(name)s) in the table phase."),
|
||||
"Unexpected implied end tag (%(name)s) in the table phase.",
|
||||
"unexpected-implied-end-tag-in-table-body":
|
||||
_("Unexpected implied end tag (%(name)s) in the table body phase."),
|
||||
"Unexpected implied end tag (%(name)s) in the table body phase.",
|
||||
"unexpected-char-implies-table-voodoo":
|
||||
_("Unexpected non-space characters in "
|
||||
"table context caused voodoo mode."),
|
||||
"Unexpected non-space characters in "
|
||||
"table context caused voodoo mode.",
|
||||
"unexpected-hidden-input-in-table":
|
||||
_("Unexpected input with type hidden in table context."),
|
||||
"Unexpected input with type hidden in table context.",
|
||||
"unexpected-form-in-table":
|
||||
_("Unexpected form in table context."),
|
||||
"Unexpected form in table context.",
|
||||
"unexpected-start-tag-implies-table-voodoo":
|
||||
_("Unexpected start tag (%(name)s) in "
|
||||
"table context caused voodoo mode."),
|
||||
"Unexpected start tag (%(name)s) in "
|
||||
"table context caused voodoo mode.",
|
||||
"unexpected-end-tag-implies-table-voodoo":
|
||||
_("Unexpected end tag (%(name)s) in "
|
||||
"table context caused voodoo mode."),
|
||||
"Unexpected end tag (%(name)s) in "
|
||||
"table context caused voodoo mode.",
|
||||
"unexpected-cell-in-table-body":
|
||||
_("Unexpected table cell start tag (%(name)s) "
|
||||
"in the table body phase."),
|
||||
"Unexpected table cell start tag (%(name)s) "
|
||||
"in the table body phase.",
|
||||
"unexpected-cell-end-tag":
|
||||
_("Got table cell end tag (%(name)s) "
|
||||
"while required end tags are missing."),
|
||||
"Got table cell end tag (%(name)s) "
|
||||
"while required end tags are missing.",
|
||||
"unexpected-end-tag-in-table-body":
|
||||
_("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
|
||||
"Unexpected end tag (%(name)s) in the table body phase. Ignored.",
|
||||
"unexpected-implied-end-tag-in-table-row":
|
||||
_("Unexpected implied end tag (%(name)s) in the table row phase."),
|
||||
"Unexpected implied end tag (%(name)s) in the table row phase.",
|
||||
"unexpected-end-tag-in-table-row":
|
||||
_("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
|
||||
"Unexpected end tag (%(name)s) in the table row phase. Ignored.",
|
||||
"unexpected-select-in-select":
|
||||
_("Unexpected select start tag in the select phase "
|
||||
"treated as select end tag."),
|
||||
"Unexpected select start tag in the select phase "
|
||||
"treated as select end tag.",
|
||||
"unexpected-input-in-select":
|
||||
_("Unexpected input start tag in the select phase."),
|
||||
"Unexpected input start tag in the select phase.",
|
||||
"unexpected-start-tag-in-select":
|
||||
_("Unexpected start tag token (%(name)s in the select phase. "
|
||||
"Ignored."),
|
||||
"Unexpected start tag token (%(name)s in the select phase. "
|
||||
"Ignored.",
|
||||
"unexpected-end-tag-in-select":
|
||||
_("Unexpected end tag (%(name)s) in the select phase. Ignored."),
|
||||
"Unexpected end tag (%(name)s) in the select phase. Ignored.",
|
||||
"unexpected-table-element-start-tag-in-select-in-table":
|
||||
_("Unexpected table element start tag (%(name)s) in the select in table phase."),
|
||||
"Unexpected table element start tag (%(name)s) in the select in table phase.",
|
||||
"unexpected-table-element-end-tag-in-select-in-table":
|
||||
_("Unexpected table element end tag (%(name)s) in the select in table phase."),
|
||||
"Unexpected table element end tag (%(name)s) in the select in table phase.",
|
||||
"unexpected-char-after-body":
|
||||
_("Unexpected non-space characters in the after body phase."),
|
||||
"Unexpected non-space characters in the after body phase.",
|
||||
"unexpected-start-tag-after-body":
|
||||
_("Unexpected start tag token (%(name)s)"
|
||||
" in the after body phase."),
|
||||
"Unexpected start tag token (%(name)s)"
|
||||
" in the after body phase.",
|
||||
"unexpected-end-tag-after-body":
|
||||
_("Unexpected end tag token (%(name)s)"
|
||||
" in the after body phase."),
|
||||
"Unexpected end tag token (%(name)s)"
|
||||
" in the after body phase.",
|
||||
"unexpected-char-in-frameset":
|
||||
_("Unexpected characters in the frameset phase. Characters ignored."),
|
||||
"Unexpected characters in the frameset phase. Characters ignored.",
|
||||
"unexpected-start-tag-in-frameset":
|
||||
_("Unexpected start tag token (%(name)s)"
|
||||
" in the frameset phase. Ignored."),
|
||||
"Unexpected start tag token (%(name)s)"
|
||||
" in the frameset phase. Ignored.",
|
||||
"unexpected-frameset-in-frameset-innerhtml":
|
||||
_("Unexpected end tag token (frameset) "
|
||||
"in the frameset phase (innerHTML)."),
|
||||
"Unexpected end tag token (frameset) "
|
||||
"in the frameset phase (innerHTML).",
|
||||
"unexpected-end-tag-in-frameset":
|
||||
_("Unexpected end tag token (%(name)s)"
|
||||
" in the frameset phase. Ignored."),
|
||||
"Unexpected end tag token (%(name)s)"
|
||||
" in the frameset phase. Ignored.",
|
||||
"unexpected-char-after-frameset":
|
||||
_("Unexpected non-space characters in the "
|
||||
"after frameset phase. Ignored."),
|
||||
"Unexpected non-space characters in the "
|
||||
"after frameset phase. Ignored.",
|
||||
"unexpected-start-tag-after-frameset":
|
||||
_("Unexpected start tag (%(name)s)"
|
||||
" in the after frameset phase. Ignored."),
|
||||
"Unexpected start tag (%(name)s)"
|
||||
" in the after frameset phase. Ignored.",
|
||||
"unexpected-end-tag-after-frameset":
|
||||
_("Unexpected end tag (%(name)s)"
|
||||
" in the after frameset phase. Ignored."),
|
||||
"Unexpected end tag (%(name)s)"
|
||||
" in the after frameset phase. Ignored.",
|
||||
"unexpected-end-tag-after-body-innerhtml":
|
||||
_("Unexpected end tag after body(innerHtml)"),
|
||||
"Unexpected end tag after body(innerHtml)",
|
||||
"expected-eof-but-got-char":
|
||||
_("Unexpected non-space characters. Expected end of file."),
|
||||
"Unexpected non-space characters. Expected end of file.",
|
||||
"expected-eof-but-got-start-tag":
|
||||
_("Unexpected start tag (%(name)s)"
|
||||
". Expected end of file."),
|
||||
"Unexpected start tag (%(name)s)"
|
||||
". Expected end of file.",
|
||||
"expected-eof-but-got-end-tag":
|
||||
_("Unexpected end tag (%(name)s)"
|
||||
". Expected end of file."),
|
||||
"Unexpected end tag (%(name)s)"
|
||||
". Expected end of file.",
|
||||
"eof-in-table":
|
||||
_("Unexpected end of file. Expected table content."),
|
||||
"Unexpected end of file. Expected table content.",
|
||||
"eof-in-select":
|
||||
_("Unexpected end of file. Expected select content."),
|
||||
"Unexpected end of file. Expected select content.",
|
||||
"eof-in-frameset":
|
||||
_("Unexpected end of file. Expected frameset content."),
|
||||
"Unexpected end of file. Expected frameset content.",
|
||||
"eof-in-script-in-script":
|
||||
_("Unexpected end of file. Expected script content."),
|
||||
"Unexpected end of file. Expected script content.",
|
||||
"eof-in-foreign-lands":
|
||||
_("Unexpected end of file. Expected foreign content"),
|
||||
"Unexpected end of file. Expected foreign content",
|
||||
"non-void-element-with-trailing-solidus":
|
||||
_("Trailing solidus not allowed on element %(name)s"),
|
||||
"Trailing solidus not allowed on element %(name)s",
|
||||
"unexpected-html-element-in-foreign-content":
|
||||
_("Element %(name)s not allowed in a non-html context"),
|
||||
"Element %(name)s not allowed in a non-html context",
|
||||
"unexpected-end-tag-before-html":
|
||||
_("Unexpected end tag (%(name)s) before html."),
|
||||
"Unexpected end tag (%(name)s) before html.",
|
||||
"unexpected-inhead-noscript-tag":
|
||||
"Element %(name)s not allowed in a inhead-noscript context",
|
||||
"eof-in-head-noscript":
|
||||
"Unexpected end of file. Expected inhead-noscript content",
|
||||
"char-in-head-noscript":
|
||||
"Unexpected non-space character. Expected inhead-noscript content",
|
||||
"XXX-undefined-error":
|
||||
_("Undefined error (this sucks and should be fixed)"),
|
||||
"Undefined error (this sucks and should be fixed)",
|
||||
}
|
||||
|
||||
namespaces = {
|
||||
@@ -298,7 +302,7 @@ namespaces = {
|
||||
"xmlns": "http://www.w3.org/2000/xmlns/"
|
||||
}
|
||||
|
||||
scopingElements = frozenset((
|
||||
scopingElements = frozenset([
|
||||
(namespaces["html"], "applet"),
|
||||
(namespaces["html"], "caption"),
|
||||
(namespaces["html"], "html"),
|
||||
@@ -316,9 +320,9 @@ scopingElements = frozenset((
|
||||
(namespaces["svg"], "foreignObject"),
|
||||
(namespaces["svg"], "desc"),
|
||||
(namespaces["svg"], "title"),
|
||||
))
|
||||
])
|
||||
|
||||
formattingElements = frozenset((
|
||||
formattingElements = frozenset([
|
||||
(namespaces["html"], "a"),
|
||||
(namespaces["html"], "b"),
|
||||
(namespaces["html"], "big"),
|
||||
@@ -333,9 +337,9 @@ formattingElements = frozenset((
|
||||
(namespaces["html"], "strong"),
|
||||
(namespaces["html"], "tt"),
|
||||
(namespaces["html"], "u")
|
||||
))
|
||||
])
|
||||
|
||||
specialElements = frozenset((
|
||||
specialElements = frozenset([
|
||||
(namespaces["html"], "address"),
|
||||
(namespaces["html"], "applet"),
|
||||
(namespaces["html"], "area"),
|
||||
@@ -416,22 +420,89 @@ specialElements = frozenset((
|
||||
(namespaces["html"], "wbr"),
|
||||
(namespaces["html"], "xmp"),
|
||||
(namespaces["svg"], "foreignObject")
|
||||
))
|
||||
])
|
||||
|
||||
htmlIntegrationPointElements = frozenset((
|
||||
(namespaces["mathml"], "annotaion-xml"),
|
||||
htmlIntegrationPointElements = frozenset([
|
||||
(namespaces["mathml"], "annotation-xml"),
|
||||
(namespaces["svg"], "foreignObject"),
|
||||
(namespaces["svg"], "desc"),
|
||||
(namespaces["svg"], "title")
|
||||
))
|
||||
])
|
||||
|
||||
mathmlTextIntegrationPointElements = frozenset((
|
||||
mathmlTextIntegrationPointElements = frozenset([
|
||||
(namespaces["mathml"], "mi"),
|
||||
(namespaces["mathml"], "mo"),
|
||||
(namespaces["mathml"], "mn"),
|
||||
(namespaces["mathml"], "ms"),
|
||||
(namespaces["mathml"], "mtext")
|
||||
))
|
||||
])
|
||||
|
||||
adjustSVGAttributes = {
|
||||
"attributename": "attributeName",
|
||||
"attributetype": "attributeType",
|
||||
"basefrequency": "baseFrequency",
|
||||
"baseprofile": "baseProfile",
|
||||
"calcmode": "calcMode",
|
||||
"clippathunits": "clipPathUnits",
|
||||
"contentscripttype": "contentScriptType",
|
||||
"contentstyletype": "contentStyleType",
|
||||
"diffuseconstant": "diffuseConstant",
|
||||
"edgemode": "edgeMode",
|
||||
"externalresourcesrequired": "externalResourcesRequired",
|
||||
"filterres": "filterRes",
|
||||
"filterunits": "filterUnits",
|
||||
"glyphref": "glyphRef",
|
||||
"gradienttransform": "gradientTransform",
|
||||
"gradientunits": "gradientUnits",
|
||||
"kernelmatrix": "kernelMatrix",
|
||||
"kernelunitlength": "kernelUnitLength",
|
||||
"keypoints": "keyPoints",
|
||||
"keysplines": "keySplines",
|
||||
"keytimes": "keyTimes",
|
||||
"lengthadjust": "lengthAdjust",
|
||||
"limitingconeangle": "limitingConeAngle",
|
||||
"markerheight": "markerHeight",
|
||||
"markerunits": "markerUnits",
|
||||
"markerwidth": "markerWidth",
|
||||
"maskcontentunits": "maskContentUnits",
|
||||
"maskunits": "maskUnits",
|
||||
"numoctaves": "numOctaves",
|
||||
"pathlength": "pathLength",
|
||||
"patterncontentunits": "patternContentUnits",
|
||||
"patterntransform": "patternTransform",
|
||||
"patternunits": "patternUnits",
|
||||
"pointsatx": "pointsAtX",
|
||||
"pointsaty": "pointsAtY",
|
||||
"pointsatz": "pointsAtZ",
|
||||
"preservealpha": "preserveAlpha",
|
||||
"preserveaspectratio": "preserveAspectRatio",
|
||||
"primitiveunits": "primitiveUnits",
|
||||
"refx": "refX",
|
||||
"refy": "refY",
|
||||
"repeatcount": "repeatCount",
|
||||
"repeatdur": "repeatDur",
|
||||
"requiredextensions": "requiredExtensions",
|
||||
"requiredfeatures": "requiredFeatures",
|
||||
"specularconstant": "specularConstant",
|
||||
"specularexponent": "specularExponent",
|
||||
"spreadmethod": "spreadMethod",
|
||||
"startoffset": "startOffset",
|
||||
"stddeviation": "stdDeviation",
|
||||
"stitchtiles": "stitchTiles",
|
||||
"surfacescale": "surfaceScale",
|
||||
"systemlanguage": "systemLanguage",
|
||||
"tablevalues": "tableValues",
|
||||
"targetx": "targetX",
|
||||
"targety": "targetY",
|
||||
"textlength": "textLength",
|
||||
"viewbox": "viewBox",
|
||||
"viewtarget": "viewTarget",
|
||||
"xchannelselector": "xChannelSelector",
|
||||
"ychannelselector": "yChannelSelector",
|
||||
"zoomandpan": "zoomAndPan"
|
||||
}
|
||||
|
||||
adjustMathMLAttributes = {"definitionurl": "definitionURL"}
|
||||
|
||||
adjustForeignAttributes = {
|
||||
"xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
|
||||
@@ -451,21 +522,21 @@ adjustForeignAttributes = {
|
||||
unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
|
||||
adjustForeignAttributes.items()])
|
||||
|
||||
spaceCharacters = frozenset((
|
||||
spaceCharacters = frozenset([
|
||||
"\t",
|
||||
"\n",
|
||||
"\u000C",
|
||||
" ",
|
||||
"\r"
|
||||
))
|
||||
])
|
||||
|
||||
tableInsertModeElements = frozenset((
|
||||
tableInsertModeElements = frozenset([
|
||||
"table",
|
||||
"tbody",
|
||||
"tfoot",
|
||||
"thead",
|
||||
"tr"
|
||||
))
|
||||
])
|
||||
|
||||
asciiLowercase = frozenset(string.ascii_lowercase)
|
||||
asciiUppercase = frozenset(string.ascii_uppercase)
|
||||
@@ -486,7 +557,7 @@ headingElements = (
|
||||
"h6"
|
||||
)
|
||||
|
||||
voidElements = frozenset((
|
||||
voidElements = frozenset([
|
||||
"base",
|
||||
"command",
|
||||
"event-source",
|
||||
@@ -502,11 +573,11 @@ voidElements = frozenset((
|
||||
"input",
|
||||
"source",
|
||||
"track"
|
||||
))
|
||||
])
|
||||
|
||||
cdataElements = frozenset(('title', 'textarea'))
|
||||
cdataElements = frozenset(['title', 'textarea'])
|
||||
|
||||
rcdataElements = frozenset((
|
||||
rcdataElements = frozenset([
|
||||
'style',
|
||||
'script',
|
||||
'xmp',
|
||||
@@ -514,27 +585,28 @@ rcdataElements = frozenset((
|
||||
'noembed',
|
||||
'noframes',
|
||||
'noscript'
|
||||
))
|
||||
])
|
||||
|
||||
booleanAttributes = {
|
||||
"": frozenset(("irrelevant",)),
|
||||
"style": frozenset(("scoped",)),
|
||||
"img": frozenset(("ismap",)),
|
||||
"audio": frozenset(("autoplay", "controls")),
|
||||
"video": frozenset(("autoplay", "controls")),
|
||||
"script": frozenset(("defer", "async")),
|
||||
"details": frozenset(("open",)),
|
||||
"datagrid": frozenset(("multiple", "disabled")),
|
||||
"command": frozenset(("hidden", "disabled", "checked", "default")),
|
||||
"hr": frozenset(("noshade")),
|
||||
"menu": frozenset(("autosubmit",)),
|
||||
"fieldset": frozenset(("disabled", "readonly")),
|
||||
"option": frozenset(("disabled", "readonly", "selected")),
|
||||
"optgroup": frozenset(("disabled", "readonly")),
|
||||
"button": frozenset(("disabled", "autofocus")),
|
||||
"input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")),
|
||||
"select": frozenset(("disabled", "readonly", "autofocus", "multiple")),
|
||||
"output": frozenset(("disabled", "readonly")),
|
||||
"": frozenset(["irrelevant", "itemscope"]),
|
||||
"style": frozenset(["scoped"]),
|
||||
"img": frozenset(["ismap"]),
|
||||
"audio": frozenset(["autoplay", "controls"]),
|
||||
"video": frozenset(["autoplay", "controls"]),
|
||||
"script": frozenset(["defer", "async"]),
|
||||
"details": frozenset(["open"]),
|
||||
"datagrid": frozenset(["multiple", "disabled"]),
|
||||
"command": frozenset(["hidden", "disabled", "checked", "default"]),
|
||||
"hr": frozenset(["noshade"]),
|
||||
"menu": frozenset(["autosubmit"]),
|
||||
"fieldset": frozenset(["disabled", "readonly"]),
|
||||
"option": frozenset(["disabled", "readonly", "selected"]),
|
||||
"optgroup": frozenset(["disabled", "readonly"]),
|
||||
"button": frozenset(["disabled", "autofocus"]),
|
||||
"input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
|
||||
"select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
|
||||
"output": frozenset(["disabled", "readonly"]),
|
||||
"iframe": frozenset(["seamless"]),
|
||||
}
|
||||
|
||||
# entitiesWindows1252 has to be _ordered_ and needs to have an index. It
|
||||
@@ -574,7 +646,7 @@ entitiesWindows1252 = (
|
||||
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
)
|
||||
|
||||
xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
|
||||
xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])
|
||||
|
||||
entities = {
|
||||
"AElig": "\xc6",
|
||||
@@ -2815,7 +2887,6 @@ replacementCharacters = {
|
||||
0x0d: "\u000D",
|
||||
0x80: "\u20AC",
|
||||
0x81: "\u0081",
|
||||
0x81: "\u0081",
|
||||
0x82: "\u201A",
|
||||
0x83: "\u0192",
|
||||
0x84: "\u201E",
|
||||
@@ -2848,235 +2919,6 @@ replacementCharacters = {
|
||||
0x9F: "\u0178",
|
||||
}
|
||||
|
||||
encodings = {
|
||||
'437': 'cp437',
|
||||
'850': 'cp850',
|
||||
'852': 'cp852',
|
||||
'855': 'cp855',
|
||||
'857': 'cp857',
|
||||
'860': 'cp860',
|
||||
'861': 'cp861',
|
||||
'862': 'cp862',
|
||||
'863': 'cp863',
|
||||
'865': 'cp865',
|
||||
'866': 'cp866',
|
||||
'869': 'cp869',
|
||||
'ansix341968': 'ascii',
|
||||
'ansix341986': 'ascii',
|
||||
'arabic': 'iso8859-6',
|
||||
'ascii': 'ascii',
|
||||
'asmo708': 'iso8859-6',
|
||||
'big5': 'big5',
|
||||
'big5hkscs': 'big5hkscs',
|
||||
'chinese': 'gbk',
|
||||
'cp037': 'cp037',
|
||||
'cp1026': 'cp1026',
|
||||
'cp154': 'ptcp154',
|
||||
'cp367': 'ascii',
|
||||
'cp424': 'cp424',
|
||||
'cp437': 'cp437',
|
||||
'cp500': 'cp500',
|
||||
'cp775': 'cp775',
|
||||
'cp819': 'windows-1252',
|
||||
'cp850': 'cp850',
|
||||
'cp852': 'cp852',
|
||||
'cp855': 'cp855',
|
||||
'cp857': 'cp857',
|
||||
'cp860': 'cp860',
|
||||
'cp861': 'cp861',
|
||||
'cp862': 'cp862',
|
||||
'cp863': 'cp863',
|
||||
'cp864': 'cp864',
|
||||
'cp865': 'cp865',
|
||||
'cp866': 'cp866',
|
||||
'cp869': 'cp869',
|
||||
'cp936': 'gbk',
|
||||
'cpgr': 'cp869',
|
||||
'cpis': 'cp861',
|
||||
'csascii': 'ascii',
|
||||
'csbig5': 'big5',
|
||||
'cseuckr': 'cp949',
|
||||
'cseucpkdfmtjapanese': 'euc_jp',
|
||||
'csgb2312': 'gbk',
|
||||
'cshproman8': 'hp-roman8',
|
||||
'csibm037': 'cp037',
|
||||
'csibm1026': 'cp1026',
|
||||
'csibm424': 'cp424',
|
||||
'csibm500': 'cp500',
|
||||
'csibm855': 'cp855',
|
||||
'csibm857': 'cp857',
|
||||
'csibm860': 'cp860',
|
||||
'csibm861': 'cp861',
|
||||
'csibm863': 'cp863',
|
||||
'csibm864': 'cp864',
|
||||
'csibm865': 'cp865',
|
||||
'csibm866': 'cp866',
|
||||
'csibm869': 'cp869',
|
||||
'csiso2022jp': 'iso2022_jp',
|
||||
'csiso2022jp2': 'iso2022_jp_2',
|
||||
'csiso2022kr': 'iso2022_kr',
|
||||
'csiso58gb231280': 'gbk',
|
||||
'csisolatin1': 'windows-1252',
|
||||
'csisolatin2': 'iso8859-2',
|
||||
'csisolatin3': 'iso8859-3',
|
||||
'csisolatin4': 'iso8859-4',
|
||||
'csisolatin5': 'windows-1254',
|
||||
'csisolatin6': 'iso8859-10',
|
||||
'csisolatinarabic': 'iso8859-6',
|
||||
'csisolatincyrillic': 'iso8859-5',
|
||||
'csisolatingreek': 'iso8859-7',
|
||||
'csisolatinhebrew': 'iso8859-8',
|
||||
'cskoi8r': 'koi8-r',
|
||||
'csksc56011987': 'cp949',
|
||||
'cspc775baltic': 'cp775',
|
||||
'cspc850multilingual': 'cp850',
|
||||
'cspc862latinhebrew': 'cp862',
|
||||
'cspc8codepage437': 'cp437',
|
||||
'cspcp852': 'cp852',
|
||||
'csptcp154': 'ptcp154',
|
||||
'csshiftjis': 'shift_jis',
|
||||
'csunicode11utf7': 'utf-7',
|
||||
'cyrillic': 'iso8859-5',
|
||||
'cyrillicasian': 'ptcp154',
|
||||
'ebcdiccpbe': 'cp500',
|
||||
'ebcdiccpca': 'cp037',
|
||||
'ebcdiccpch': 'cp500',
|
||||
'ebcdiccphe': 'cp424',
|
||||
'ebcdiccpnl': 'cp037',
|
||||
'ebcdiccpus': 'cp037',
|
||||
'ebcdiccpwt': 'cp037',
|
||||
'ecma114': 'iso8859-6',
|
||||
'ecma118': 'iso8859-7',
|
||||
'elot928': 'iso8859-7',
|
||||
'eucjp': 'euc_jp',
|
||||
'euckr': 'cp949',
|
||||
'extendedunixcodepackedformatforjapanese': 'euc_jp',
|
||||
'gb18030': 'gb18030',
|
||||
'gb2312': 'gbk',
|
||||
'gb231280': 'gbk',
|
||||
'gbk': 'gbk',
|
||||
'greek': 'iso8859-7',
|
||||
'greek8': 'iso8859-7',
|
||||
'hebrew': 'iso8859-8',
|
||||
'hproman8': 'hp-roman8',
|
||||
'hzgb2312': 'hz',
|
||||
'ibm037': 'cp037',
|
||||
'ibm1026': 'cp1026',
|
||||
'ibm367': 'ascii',
|
||||
'ibm424': 'cp424',
|
||||
'ibm437': 'cp437',
|
||||
'ibm500': 'cp500',
|
||||
'ibm775': 'cp775',
|
||||
'ibm819': 'windows-1252',
|
||||
'ibm850': 'cp850',
|
||||
'ibm852': 'cp852',
|
||||
'ibm855': 'cp855',
|
||||
'ibm857': 'cp857',
|
||||
'ibm860': 'cp860',
|
||||
'ibm861': 'cp861',
|
||||
'ibm862': 'cp862',
|
||||
'ibm863': 'cp863',
|
||||
'ibm864': 'cp864',
|
||||
'ibm865': 'cp865',
|
||||
'ibm866': 'cp866',
|
||||
'ibm869': 'cp869',
|
||||
'iso2022jp': 'iso2022_jp',
|
||||
'iso2022jp2': 'iso2022_jp_2',
|
||||
'iso2022kr': 'iso2022_kr',
|
||||
'iso646irv1991': 'ascii',
|
||||
'iso646us': 'ascii',
|
||||
'iso88591': 'windows-1252',
|
||||
'iso885910': 'iso8859-10',
|
||||
'iso8859101992': 'iso8859-10',
|
||||
'iso885911987': 'windows-1252',
|
||||
'iso885913': 'iso8859-13',
|
||||
'iso885914': 'iso8859-14',
|
||||
'iso8859141998': 'iso8859-14',
|
||||
'iso885915': 'iso8859-15',
|
||||
'iso885916': 'iso8859-16',
|
||||
'iso8859162001': 'iso8859-16',
|
||||
'iso88592': 'iso8859-2',
|
||||
'iso885921987': 'iso8859-2',
|
||||
'iso88593': 'iso8859-3',
|
||||
'iso885931988': 'iso8859-3',
|
||||
'iso88594': 'iso8859-4',
|
||||
'iso885941988': 'iso8859-4',
|
||||
'iso88595': 'iso8859-5',
|
||||
'iso885951988': 'iso8859-5',
|
||||
'iso88596': 'iso8859-6',
|
||||
'iso885961987': 'iso8859-6',
|
||||
'iso88597': 'iso8859-7',
|
||||
'iso885971987': 'iso8859-7',
|
||||
'iso88598': 'iso8859-8',
|
||||
'iso885981988': 'iso8859-8',
|
||||
'iso88599': 'windows-1254',
|
||||
'iso885991989': 'windows-1254',
|
||||
'isoceltic': 'iso8859-14',
|
||||
'isoir100': 'windows-1252',
|
||||
'isoir101': 'iso8859-2',
|
||||
'isoir109': 'iso8859-3',
|
||||
'isoir110': 'iso8859-4',
|
||||
'isoir126': 'iso8859-7',
|
||||
'isoir127': 'iso8859-6',
|
||||
'isoir138': 'iso8859-8',
|
||||
'isoir144': 'iso8859-5',
|
||||
'isoir148': 'windows-1254',
|
||||
'isoir149': 'cp949',
|
||||
'isoir157': 'iso8859-10',
|
||||
'isoir199': 'iso8859-14',
|
||||
'isoir226': 'iso8859-16',
|
||||
'isoir58': 'gbk',
|
||||
'isoir6': 'ascii',
|
||||
'koi8r': 'koi8-r',
|
||||
'koi8u': 'koi8-u',
|
||||
'korean': 'cp949',
|
||||
'ksc5601': 'cp949',
|
||||
'ksc56011987': 'cp949',
|
||||
'ksc56011989': 'cp949',
|
||||
'l1': 'windows-1252',
|
||||
'l10': 'iso8859-16',
|
||||
'l2': 'iso8859-2',
|
||||
'l3': 'iso8859-3',
|
||||
'l4': 'iso8859-4',
|
||||
'l5': 'windows-1254',
|
||||
'l6': 'iso8859-10',
|
||||
'l8': 'iso8859-14',
|
||||
'latin1': 'windows-1252',
|
||||
'latin10': 'iso8859-16',
|
||||
'latin2': 'iso8859-2',
|
||||
'latin3': 'iso8859-3',
|
||||
'latin4': 'iso8859-4',
|
||||
'latin5': 'windows-1254',
|
||||
'latin6': 'iso8859-10',
|
||||
'latin8': 'iso8859-14',
|
||||
'latin9': 'iso8859-15',
|
||||
'ms936': 'gbk',
|
||||
'mskanji': 'shift_jis',
|
||||
'pt154': 'ptcp154',
|
||||
'ptcp154': 'ptcp154',
|
||||
'r8': 'hp-roman8',
|
||||
'roman8': 'hp-roman8',
|
||||
'shiftjis': 'shift_jis',
|
||||
'tis620': 'cp874',
|
||||
'unicode11utf7': 'utf-7',
|
||||
'us': 'ascii',
|
||||
'usascii': 'ascii',
|
||||
'utf16': 'utf-16',
|
||||
'utf16be': 'utf-16-be',
|
||||
'utf16le': 'utf-16-le',
|
||||
'utf8': 'utf-8',
|
||||
'windows1250': 'cp1250',
|
||||
'windows1251': 'cp1251',
|
||||
'windows1252': 'cp1252',
|
||||
'windows1253': 'cp1253',
|
||||
'windows1254': 'cp1254',
|
||||
'windows1255': 'cp1255',
|
||||
'windows1256': 'cp1256',
|
||||
'windows1257': 'cp1257',
|
||||
'windows1258': 'cp1258',
|
||||
'windows936': 'gbk',
|
||||
'x-x-big5': 'big5'}
|
||||
|
||||
tokenTypes = {
|
||||
"Doctype": 0,
|
||||
"Characters": 1,
|
||||
@@ -3088,8 +2930,8 @@ tokenTypes = {
|
||||
"ParseError": 7
|
||||
}
|
||||
|
||||
tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
|
||||
tokenTypes["EmptyTag"]))
|
||||
tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
|
||||
tokenTypes["EmptyTag"]])
|
||||
|
||||
|
||||
prefixes = dict([(v, k) for k, v in namespaces.items()])
|
||||
@@ -3097,8 +2939,9 @@ prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
|
||||
|
||||
|
||||
class DataLossWarning(UserWarning):
|
||||
"""Raised when the current tree is unable to represent the input data"""
|
||||
pass
|
||||
|
||||
|
||||
class ReparseException(Exception):
|
||||
class _ReparseException(Exception):
|
||||
pass
|
||||
|
||||
@@ -1,20 +1,29 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
from ordereddict import OrderedDict
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
def _attr_key(attr):
|
||||
"""Return an appropriate key for an attribute for sorting
|
||||
|
||||
Attributes have a namespace that can be either ``None`` or a string. We
|
||||
can't compare the two because they're different types, so we convert
|
||||
``None`` to an empty string first.
|
||||
|
||||
"""
|
||||
return (attr[0][0] or ''), attr[0][1]
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Alphabetizes attributes for elements"""
|
||||
def __iter__(self):
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
if token["type"] in ("StartTag", "EmptyTag"):
|
||||
attrs = OrderedDict()
|
||||
for name, value in sorted(token["data"].items(),
|
||||
key=lambda x: x[0]):
|
||||
key=_attr_key):
|
||||
attrs[name] = value
|
||||
token["data"] = attrs
|
||||
yield token
|
||||
|
||||
@@ -1,11 +1,19 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
class Filter(base.Filter):
|
||||
"""Injects ``<meta charset=ENCODING>`` tag into head of document"""
|
||||
def __init__(self, source, encoding):
|
||||
_base.Filter.__init__(self, source)
|
||||
"""Creates a Filter
|
||||
|
||||
:arg source: the source token stream
|
||||
|
||||
:arg encoding: the encoding to set
|
||||
|
||||
"""
|
||||
base.Filter.__init__(self, source)
|
||||
self.encoding = encoding
|
||||
|
||||
def __iter__(self):
|
||||
@@ -13,7 +21,7 @@ class Filter(_base.Filter):
|
||||
meta_found = (self.encoding is None)
|
||||
pending = []
|
||||
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type == "StartTag":
|
||||
if token["name"].lower() == "head":
|
||||
|
||||
@@ -1,93 +1,93 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from gettext import gettext
|
||||
_ = gettext
|
||||
from six import text_type
|
||||
|
||||
from . import _base
|
||||
from ..constants import cdataElements, rcdataElements, voidElements
|
||||
from . import base
|
||||
from ..constants import namespaces, voidElements
|
||||
|
||||
from ..constants import spaceCharacters
|
||||
spaceCharacters = "".join(spaceCharacters)
|
||||
|
||||
|
||||
class LintError(Exception):
|
||||
pass
|
||||
class Filter(base.Filter):
|
||||
"""Lints the token stream for errors
|
||||
|
||||
If it finds any errors, it'll raise an ``AssertionError``.
|
||||
|
||||
"""
|
||||
def __init__(self, source, require_matching_tags=True):
|
||||
"""Creates a Filter
|
||||
|
||||
:arg source: the source token stream
|
||||
|
||||
:arg require_matching_tags: whether or not to require matching tags
|
||||
|
||||
"""
|
||||
super(Filter, self).__init__(source)
|
||||
self.require_matching_tags = require_matching_tags
|
||||
|
||||
class Filter(_base.Filter):
|
||||
def __iter__(self):
|
||||
open_elements = []
|
||||
contentModelFlag = "PCDATA"
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
namespace = token["namespace"]
|
||||
name = token["name"]
|
||||
if contentModelFlag != "PCDATA":
|
||||
raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||
if not name:
|
||||
raise LintError(_("Empty tag name"))
|
||||
if type == "StartTag" and name in voidElements:
|
||||
raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
|
||||
elif type == "EmptyTag" and name not in voidElements:
|
||||
raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
|
||||
if type == "StartTag":
|
||||
open_elements.append(name)
|
||||
for name, value in token["data"]:
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
|
||||
if not name:
|
||||
raise LintError(_("Empty attribute name"))
|
||||
if not isinstance(value, str):
|
||||
raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
|
||||
if name in cdataElements:
|
||||
contentModelFlag = "CDATA"
|
||||
elif name in rcdataElements:
|
||||
contentModelFlag = "RCDATA"
|
||||
elif name == "plaintext":
|
||||
contentModelFlag = "PLAINTEXT"
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
assert isinstance(token["data"], dict)
|
||||
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
|
||||
assert type == "EmptyTag"
|
||||
else:
|
||||
assert type == "StartTag"
|
||||
if type == "StartTag" and self.require_matching_tags:
|
||||
open_elements.append((namespace, name))
|
||||
for (namespace, name), value in token["data"].items():
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
assert isinstance(value, text_type)
|
||||
|
||||
elif type == "EndTag":
|
||||
namespace = token["namespace"]
|
||||
name = token["name"]
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||
if not name:
|
||||
raise LintError(_("Empty tag name"))
|
||||
if name in voidElements:
|
||||
raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
|
||||
start_name = open_elements.pop()
|
||||
if start_name != name:
|
||||
raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
|
||||
contentModelFlag = "PCDATA"
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
|
||||
assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
|
||||
elif self.require_matching_tags:
|
||||
start = open_elements.pop()
|
||||
assert start == (namespace, name)
|
||||
|
||||
elif type == "Comment":
|
||||
if contentModelFlag != "PCDATA":
|
||||
raise LintError(_("Comment not in PCDATA content model flag"))
|
||||
data = token["data"]
|
||||
assert isinstance(data, text_type)
|
||||
|
||||
elif type in ("Characters", "SpaceCharacters"):
|
||||
data = token["data"]
|
||||
if not isinstance(data, str):
|
||||
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
|
||||
if not data:
|
||||
raise LintError(_("%(type)s token with empty data") % {"type": type})
|
||||
assert isinstance(data, text_type)
|
||||
assert data != ""
|
||||
if type == "SpaceCharacters":
|
||||
data = data.strip(spaceCharacters)
|
||||
if data:
|
||||
raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
|
||||
assert data.strip(spaceCharacters) == ""
|
||||
|
||||
elif type == "Doctype":
|
||||
name = token["name"]
|
||||
if contentModelFlag != "PCDATA":
|
||||
raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||
# XXX: what to do with token["data"] ?
|
||||
assert name is None or isinstance(name, text_type)
|
||||
assert token["publicId"] is None or isinstance(name, text_type)
|
||||
assert token["systemId"] is None or isinstance(name, text_type)
|
||||
|
||||
elif type in ("ParseError", "SerializeError"):
|
||||
pass
|
||||
elif type == "Entity":
|
||||
assert isinstance(token["name"], text_type)
|
||||
|
||||
elif type == "SerializerError":
|
||||
assert isinstance(token["data"], text_type)
|
||||
|
||||
else:
|
||||
raise LintError(_("Unknown token type: %(type)s") % {"type": type})
|
||||
assert False, "Unknown token type: %(type)s" % {"type": type}
|
||||
|
||||
yield token
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
class Filter(base.Filter):
|
||||
"""Removes optional tags from the token stream"""
|
||||
def slider(self):
|
||||
previous1 = previous2 = None
|
||||
for token in self.source:
|
||||
@@ -11,7 +12,8 @@ class Filter(_base.Filter):
|
||||
yield previous2, previous1, token
|
||||
previous2 = previous1
|
||||
previous1 = token
|
||||
yield previous2, previous1, None
|
||||
if previous1 is not None:
|
||||
yield previous2, previous1, None
|
||||
|
||||
def __iter__(self):
|
||||
for previous, token, next in self.slider():
|
||||
@@ -58,7 +60,7 @@ class Filter(_base.Filter):
|
||||
elif tagname == 'colgroup':
|
||||
# A colgroup element's start tag may be omitted if the first thing
|
||||
# inside the colgroup element is a col element, and if the element
|
||||
# is not immediately preceeded by another colgroup element whose
|
||||
# is not immediately preceded by another colgroup element whose
|
||||
# end tag has been omitted.
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
# XXX: we do not look at the preceding event, so instead we never
|
||||
@@ -70,7 +72,7 @@ class Filter(_base.Filter):
|
||||
elif tagname == 'tbody':
|
||||
# A tbody element's start tag may be omitted if the first thing
|
||||
# inside the tbody element is a tr element, and if the element is
|
||||
# not immediately preceeded by a tbody, thead, or tfoot element
|
||||
# not immediately preceded by a tbody, thead, or tfoot element
|
||||
# whose end tag has been omitted.
|
||||
if type == "StartTag":
|
||||
# omit the thead and tfoot elements' end tag when they are
|
||||
|
||||
@@ -1,12 +1,896 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from ..sanitizer import HTMLSanitizerMixin
|
||||
import re
|
||||
from xml.sax.saxutils import escape, unescape
|
||||
|
||||
from six.moves import urllib_parse as urlparse
|
||||
|
||||
from . import base
|
||||
from ..constants import namespaces, prefixes
|
||||
|
||||
__all__ = ["Filter"]
|
||||
|
||||
|
||||
class Filter(_base.Filter, HTMLSanitizerMixin):
|
||||
allowed_elements = frozenset((
|
||||
(namespaces['html'], 'a'),
|
||||
(namespaces['html'], 'abbr'),
|
||||
(namespaces['html'], 'acronym'),
|
||||
(namespaces['html'], 'address'),
|
||||
(namespaces['html'], 'area'),
|
||||
(namespaces['html'], 'article'),
|
||||
(namespaces['html'], 'aside'),
|
||||
(namespaces['html'], 'audio'),
|
||||
(namespaces['html'], 'b'),
|
||||
(namespaces['html'], 'big'),
|
||||
(namespaces['html'], 'blockquote'),
|
||||
(namespaces['html'], 'br'),
|
||||
(namespaces['html'], 'button'),
|
||||
(namespaces['html'], 'canvas'),
|
||||
(namespaces['html'], 'caption'),
|
||||
(namespaces['html'], 'center'),
|
||||
(namespaces['html'], 'cite'),
|
||||
(namespaces['html'], 'code'),
|
||||
(namespaces['html'], 'col'),
|
||||
(namespaces['html'], 'colgroup'),
|
||||
(namespaces['html'], 'command'),
|
||||
(namespaces['html'], 'datagrid'),
|
||||
(namespaces['html'], 'datalist'),
|
||||
(namespaces['html'], 'dd'),
|
||||
(namespaces['html'], 'del'),
|
||||
(namespaces['html'], 'details'),
|
||||
(namespaces['html'], 'dfn'),
|
||||
(namespaces['html'], 'dialog'),
|
||||
(namespaces['html'], 'dir'),
|
||||
(namespaces['html'], 'div'),
|
||||
(namespaces['html'], 'dl'),
|
||||
(namespaces['html'], 'dt'),
|
||||
(namespaces['html'], 'em'),
|
||||
(namespaces['html'], 'event-source'),
|
||||
(namespaces['html'], 'fieldset'),
|
||||
(namespaces['html'], 'figcaption'),
|
||||
(namespaces['html'], 'figure'),
|
||||
(namespaces['html'], 'footer'),
|
||||
(namespaces['html'], 'font'),
|
||||
(namespaces['html'], 'form'),
|
||||
(namespaces['html'], 'header'),
|
||||
(namespaces['html'], 'h1'),
|
||||
(namespaces['html'], 'h2'),
|
||||
(namespaces['html'], 'h3'),
|
||||
(namespaces['html'], 'h4'),
|
||||
(namespaces['html'], 'h5'),
|
||||
(namespaces['html'], 'h6'),
|
||||
(namespaces['html'], 'hr'),
|
||||
(namespaces['html'], 'i'),
|
||||
(namespaces['html'], 'img'),
|
||||
(namespaces['html'], 'input'),
|
||||
(namespaces['html'], 'ins'),
|
||||
(namespaces['html'], 'keygen'),
|
||||
(namespaces['html'], 'kbd'),
|
||||
(namespaces['html'], 'label'),
|
||||
(namespaces['html'], 'legend'),
|
||||
(namespaces['html'], 'li'),
|
||||
(namespaces['html'], 'm'),
|
||||
(namespaces['html'], 'map'),
|
||||
(namespaces['html'], 'menu'),
|
||||
(namespaces['html'], 'meter'),
|
||||
(namespaces['html'], 'multicol'),
|
||||
(namespaces['html'], 'nav'),
|
||||
(namespaces['html'], 'nextid'),
|
||||
(namespaces['html'], 'ol'),
|
||||
(namespaces['html'], 'output'),
|
||||
(namespaces['html'], 'optgroup'),
|
||||
(namespaces['html'], 'option'),
|
||||
(namespaces['html'], 'p'),
|
||||
(namespaces['html'], 'pre'),
|
||||
(namespaces['html'], 'progress'),
|
||||
(namespaces['html'], 'q'),
|
||||
(namespaces['html'], 's'),
|
||||
(namespaces['html'], 'samp'),
|
||||
(namespaces['html'], 'section'),
|
||||
(namespaces['html'], 'select'),
|
||||
(namespaces['html'], 'small'),
|
||||
(namespaces['html'], 'sound'),
|
||||
(namespaces['html'], 'source'),
|
||||
(namespaces['html'], 'spacer'),
|
||||
(namespaces['html'], 'span'),
|
||||
(namespaces['html'], 'strike'),
|
||||
(namespaces['html'], 'strong'),
|
||||
(namespaces['html'], 'sub'),
|
||||
(namespaces['html'], 'sup'),
|
||||
(namespaces['html'], 'table'),
|
||||
(namespaces['html'], 'tbody'),
|
||||
(namespaces['html'], 'td'),
|
||||
(namespaces['html'], 'textarea'),
|
||||
(namespaces['html'], 'time'),
|
||||
(namespaces['html'], 'tfoot'),
|
||||
(namespaces['html'], 'th'),
|
||||
(namespaces['html'], 'thead'),
|
||||
(namespaces['html'], 'tr'),
|
||||
(namespaces['html'], 'tt'),
|
||||
(namespaces['html'], 'u'),
|
||||
(namespaces['html'], 'ul'),
|
||||
(namespaces['html'], 'var'),
|
||||
(namespaces['html'], 'video'),
|
||||
(namespaces['mathml'], 'maction'),
|
||||
(namespaces['mathml'], 'math'),
|
||||
(namespaces['mathml'], 'merror'),
|
||||
(namespaces['mathml'], 'mfrac'),
|
||||
(namespaces['mathml'], 'mi'),
|
||||
(namespaces['mathml'], 'mmultiscripts'),
|
||||
(namespaces['mathml'], 'mn'),
|
||||
(namespaces['mathml'], 'mo'),
|
||||
(namespaces['mathml'], 'mover'),
|
||||
(namespaces['mathml'], 'mpadded'),
|
||||
(namespaces['mathml'], 'mphantom'),
|
||||
(namespaces['mathml'], 'mprescripts'),
|
||||
(namespaces['mathml'], 'mroot'),
|
||||
(namespaces['mathml'], 'mrow'),
|
||||
(namespaces['mathml'], 'mspace'),
|
||||
(namespaces['mathml'], 'msqrt'),
|
||||
(namespaces['mathml'], 'mstyle'),
|
||||
(namespaces['mathml'], 'msub'),
|
||||
(namespaces['mathml'], 'msubsup'),
|
||||
(namespaces['mathml'], 'msup'),
|
||||
(namespaces['mathml'], 'mtable'),
|
||||
(namespaces['mathml'], 'mtd'),
|
||||
(namespaces['mathml'], 'mtext'),
|
||||
(namespaces['mathml'], 'mtr'),
|
||||
(namespaces['mathml'], 'munder'),
|
||||
(namespaces['mathml'], 'munderover'),
|
||||
(namespaces['mathml'], 'none'),
|
||||
(namespaces['svg'], 'a'),
|
||||
(namespaces['svg'], 'animate'),
|
||||
(namespaces['svg'], 'animateColor'),
|
||||
(namespaces['svg'], 'animateMotion'),
|
||||
(namespaces['svg'], 'animateTransform'),
|
||||
(namespaces['svg'], 'clipPath'),
|
||||
(namespaces['svg'], 'circle'),
|
||||
(namespaces['svg'], 'defs'),
|
||||
(namespaces['svg'], 'desc'),
|
||||
(namespaces['svg'], 'ellipse'),
|
||||
(namespaces['svg'], 'font-face'),
|
||||
(namespaces['svg'], 'font-face-name'),
|
||||
(namespaces['svg'], 'font-face-src'),
|
||||
(namespaces['svg'], 'g'),
|
||||
(namespaces['svg'], 'glyph'),
|
||||
(namespaces['svg'], 'hkern'),
|
||||
(namespaces['svg'], 'linearGradient'),
|
||||
(namespaces['svg'], 'line'),
|
||||
(namespaces['svg'], 'marker'),
|
||||
(namespaces['svg'], 'metadata'),
|
||||
(namespaces['svg'], 'missing-glyph'),
|
||||
(namespaces['svg'], 'mpath'),
|
||||
(namespaces['svg'], 'path'),
|
||||
(namespaces['svg'], 'polygon'),
|
||||
(namespaces['svg'], 'polyline'),
|
||||
(namespaces['svg'], 'radialGradient'),
|
||||
(namespaces['svg'], 'rect'),
|
||||
(namespaces['svg'], 'set'),
|
||||
(namespaces['svg'], 'stop'),
|
||||
(namespaces['svg'], 'svg'),
|
||||
(namespaces['svg'], 'switch'),
|
||||
(namespaces['svg'], 'text'),
|
||||
(namespaces['svg'], 'title'),
|
||||
(namespaces['svg'], 'tspan'),
|
||||
(namespaces['svg'], 'use'),
|
||||
))
|
||||
|
||||
allowed_attributes = frozenset((
|
||||
# HTML attributes
|
||||
(None, 'abbr'),
|
||||
(None, 'accept'),
|
||||
(None, 'accept-charset'),
|
||||
(None, 'accesskey'),
|
||||
(None, 'action'),
|
||||
(None, 'align'),
|
||||
(None, 'alt'),
|
||||
(None, 'autocomplete'),
|
||||
(None, 'autofocus'),
|
||||
(None, 'axis'),
|
||||
(None, 'background'),
|
||||
(None, 'balance'),
|
||||
(None, 'bgcolor'),
|
||||
(None, 'bgproperties'),
|
||||
(None, 'border'),
|
||||
(None, 'bordercolor'),
|
||||
(None, 'bordercolordark'),
|
||||
(None, 'bordercolorlight'),
|
||||
(None, 'bottompadding'),
|
||||
(None, 'cellpadding'),
|
||||
(None, 'cellspacing'),
|
||||
(None, 'ch'),
|
||||
(None, 'challenge'),
|
||||
(None, 'char'),
|
||||
(None, 'charoff'),
|
||||
(None, 'choff'),
|
||||
(None, 'charset'),
|
||||
(None, 'checked'),
|
||||
(None, 'cite'),
|
||||
(None, 'class'),
|
||||
(None, 'clear'),
|
||||
(None, 'color'),
|
||||
(None, 'cols'),
|
||||
(None, 'colspan'),
|
||||
(None, 'compact'),
|
||||
(None, 'contenteditable'),
|
||||
(None, 'controls'),
|
||||
(None, 'coords'),
|
||||
(None, 'data'),
|
||||
(None, 'datafld'),
|
||||
(None, 'datapagesize'),
|
||||
(None, 'datasrc'),
|
||||
(None, 'datetime'),
|
||||
(None, 'default'),
|
||||
(None, 'delay'),
|
||||
(None, 'dir'),
|
||||
(None, 'disabled'),
|
||||
(None, 'draggable'),
|
||||
(None, 'dynsrc'),
|
||||
(None, 'enctype'),
|
||||
(None, 'end'),
|
||||
(None, 'face'),
|
||||
(None, 'for'),
|
||||
(None, 'form'),
|
||||
(None, 'frame'),
|
||||
(None, 'galleryimg'),
|
||||
(None, 'gutter'),
|
||||
(None, 'headers'),
|
||||
(None, 'height'),
|
||||
(None, 'hidefocus'),
|
||||
(None, 'hidden'),
|
||||
(None, 'high'),
|
||||
(None, 'href'),
|
||||
(None, 'hreflang'),
|
||||
(None, 'hspace'),
|
||||
(None, 'icon'),
|
||||
(None, 'id'),
|
||||
(None, 'inputmode'),
|
||||
(None, 'ismap'),
|
||||
(None, 'keytype'),
|
||||
(None, 'label'),
|
||||
(None, 'leftspacing'),
|
||||
(None, 'lang'),
|
||||
(None, 'list'),
|
||||
(None, 'longdesc'),
|
||||
(None, 'loop'),
|
||||
(None, 'loopcount'),
|
||||
(None, 'loopend'),
|
||||
(None, 'loopstart'),
|
||||
(None, 'low'),
|
||||
(None, 'lowsrc'),
|
||||
(None, 'max'),
|
||||
(None, 'maxlength'),
|
||||
(None, 'media'),
|
||||
(None, 'method'),
|
||||
(None, 'min'),
|
||||
(None, 'multiple'),
|
||||
(None, 'name'),
|
||||
(None, 'nohref'),
|
||||
(None, 'noshade'),
|
||||
(None, 'nowrap'),
|
||||
(None, 'open'),
|
||||
(None, 'optimum'),
|
||||
(None, 'pattern'),
|
||||
(None, 'ping'),
|
||||
(None, 'point-size'),
|
||||
(None, 'poster'),
|
||||
(None, 'pqg'),
|
||||
(None, 'preload'),
|
||||
(None, 'prompt'),
|
||||
(None, 'radiogroup'),
|
||||
(None, 'readonly'),
|
||||
(None, 'rel'),
|
||||
(None, 'repeat-max'),
|
||||
(None, 'repeat-min'),
|
||||
(None, 'replace'),
|
||||
(None, 'required'),
|
||||
(None, 'rev'),
|
||||
(None, 'rightspacing'),
|
||||
(None, 'rows'),
|
||||
(None, 'rowspan'),
|
||||
(None, 'rules'),
|
||||
(None, 'scope'),
|
||||
(None, 'selected'),
|
||||
(None, 'shape'),
|
||||
(None, 'size'),
|
||||
(None, 'span'),
|
||||
(None, 'src'),
|
||||
(None, 'start'),
|
||||
(None, 'step'),
|
||||
(None, 'style'),
|
||||
(None, 'summary'),
|
||||
(None, 'suppress'),
|
||||
(None, 'tabindex'),
|
||||
(None, 'target'),
|
||||
(None, 'template'),
|
||||
(None, 'title'),
|
||||
(None, 'toppadding'),
|
||||
(None, 'type'),
|
||||
(None, 'unselectable'),
|
||||
(None, 'usemap'),
|
||||
(None, 'urn'),
|
||||
(None, 'valign'),
|
||||
(None, 'value'),
|
||||
(None, 'variable'),
|
||||
(None, 'volume'),
|
||||
(None, 'vspace'),
|
||||
(None, 'vrml'),
|
||||
(None, 'width'),
|
||||
(None, 'wrap'),
|
||||
(namespaces['xml'], 'lang'),
|
||||
# MathML attributes
|
||||
(None, 'actiontype'),
|
||||
(None, 'align'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnlines'),
|
||||
(None, 'columnspacing'),
|
||||
(None, 'columnspan'),
|
||||
(None, 'depth'),
|
||||
(None, 'display'),
|
||||
(None, 'displaystyle'),
|
||||
(None, 'equalcolumns'),
|
||||
(None, 'equalrows'),
|
||||
(None, 'fence'),
|
||||
(None, 'fontstyle'),
|
||||
(None, 'fontweight'),
|
||||
(None, 'frame'),
|
||||
(None, 'height'),
|
||||
(None, 'linethickness'),
|
||||
(None, 'lspace'),
|
||||
(None, 'mathbackground'),
|
||||
(None, 'mathcolor'),
|
||||
(None, 'mathvariant'),
|
||||
(None, 'mathvariant'),
|
||||
(None, 'maxsize'),
|
||||
(None, 'minsize'),
|
||||
(None, 'other'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowlines'),
|
||||
(None, 'rowspacing'),
|
||||
(None, 'rowspan'),
|
||||
(None, 'rspace'),
|
||||
(None, 'scriptlevel'),
|
||||
(None, 'selection'),
|
||||
(None, 'separator'),
|
||||
(None, 'stretchy'),
|
||||
(None, 'width'),
|
||||
(None, 'width'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xlink'], 'show'),
|
||||
(namespaces['xlink'], 'type'),
|
||||
# SVG attributes
|
||||
(None, 'accent-height'),
|
||||
(None, 'accumulate'),
|
||||
(None, 'additive'),
|
||||
(None, 'alphabetic'),
|
||||
(None, 'arabic-form'),
|
||||
(None, 'ascent'),
|
||||
(None, 'attributeName'),
|
||||
(None, 'attributeType'),
|
||||
(None, 'baseProfile'),
|
||||
(None, 'bbox'),
|
||||
(None, 'begin'),
|
||||
(None, 'by'),
|
||||
(None, 'calcMode'),
|
||||
(None, 'cap-height'),
|
||||
(None, 'class'),
|
||||
(None, 'clip-path'),
|
||||
(None, 'color'),
|
||||
(None, 'color-rendering'),
|
||||
(None, 'content'),
|
||||
(None, 'cx'),
|
||||
(None, 'cy'),
|
||||
(None, 'd'),
|
||||
(None, 'dx'),
|
||||
(None, 'dy'),
|
||||
(None, 'descent'),
|
||||
(None, 'display'),
|
||||
(None, 'dur'),
|
||||
(None, 'end'),
|
||||
(None, 'fill'),
|
||||
(None, 'fill-opacity'),
|
||||
(None, 'fill-rule'),
|
||||
(None, 'font-family'),
|
||||
(None, 'font-size'),
|
||||
(None, 'font-stretch'),
|
||||
(None, 'font-style'),
|
||||
(None, 'font-variant'),
|
||||
(None, 'font-weight'),
|
||||
(None, 'from'),
|
||||
(None, 'fx'),
|
||||
(None, 'fy'),
|
||||
(None, 'g1'),
|
||||
(None, 'g2'),
|
||||
(None, 'glyph-name'),
|
||||
(None, 'gradientUnits'),
|
||||
(None, 'hanging'),
|
||||
(None, 'height'),
|
||||
(None, 'horiz-adv-x'),
|
||||
(None, 'horiz-origin-x'),
|
||||
(None, 'id'),
|
||||
(None, 'ideographic'),
|
||||
(None, 'k'),
|
||||
(None, 'keyPoints'),
|
||||
(None, 'keySplines'),
|
||||
(None, 'keyTimes'),
|
||||
(None, 'lang'),
|
||||
(None, 'marker-end'),
|
||||
(None, 'marker-mid'),
|
||||
(None, 'marker-start'),
|
||||
(None, 'markerHeight'),
|
||||
(None, 'markerUnits'),
|
||||
(None, 'markerWidth'),
|
||||
(None, 'mathematical'),
|
||||
(None, 'max'),
|
||||
(None, 'min'),
|
||||
(None, 'name'),
|
||||
(None, 'offset'),
|
||||
(None, 'opacity'),
|
||||
(None, 'orient'),
|
||||
(None, 'origin'),
|
||||
(None, 'overline-position'),
|
||||
(None, 'overline-thickness'),
|
||||
(None, 'panose-1'),
|
||||
(None, 'path'),
|
||||
(None, 'pathLength'),
|
||||
(None, 'points'),
|
||||
(None, 'preserveAspectRatio'),
|
||||
(None, 'r'),
|
||||
(None, 'refX'),
|
||||
(None, 'refY'),
|
||||
(None, 'repeatCount'),
|
||||
(None, 'repeatDur'),
|
||||
(None, 'requiredExtensions'),
|
||||
(None, 'requiredFeatures'),
|
||||
(None, 'restart'),
|
||||
(None, 'rotate'),
|
||||
(None, 'rx'),
|
||||
(None, 'ry'),
|
||||
(None, 'slope'),
|
||||
(None, 'stemh'),
|
||||
(None, 'stemv'),
|
||||
(None, 'stop-color'),
|
||||
(None, 'stop-opacity'),
|
||||
(None, 'strikethrough-position'),
|
||||
(None, 'strikethrough-thickness'),
|
||||
(None, 'stroke'),
|
||||
(None, 'stroke-dasharray'),
|
||||
(None, 'stroke-dashoffset'),
|
||||
(None, 'stroke-linecap'),
|
||||
(None, 'stroke-linejoin'),
|
||||
(None, 'stroke-miterlimit'),
|
||||
(None, 'stroke-opacity'),
|
||||
(None, 'stroke-width'),
|
||||
(None, 'systemLanguage'),
|
||||
(None, 'target'),
|
||||
(None, 'text-anchor'),
|
||||
(None, 'to'),
|
||||
(None, 'transform'),
|
||||
(None, 'type'),
|
||||
(None, 'u1'),
|
||||
(None, 'u2'),
|
||||
(None, 'underline-position'),
|
||||
(None, 'underline-thickness'),
|
||||
(None, 'unicode'),
|
||||
(None, 'unicode-range'),
|
||||
(None, 'units-per-em'),
|
||||
(None, 'values'),
|
||||
(None, 'version'),
|
||||
(None, 'viewBox'),
|
||||
(None, 'visibility'),
|
||||
(None, 'width'),
|
||||
(None, 'widths'),
|
||||
(None, 'x'),
|
||||
(None, 'x-height'),
|
||||
(None, 'x1'),
|
||||
(None, 'x2'),
|
||||
(namespaces['xlink'], 'actuate'),
|
||||
(namespaces['xlink'], 'arcrole'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xlink'], 'role'),
|
||||
(namespaces['xlink'], 'show'),
|
||||
(namespaces['xlink'], 'title'),
|
||||
(namespaces['xlink'], 'type'),
|
||||
(namespaces['xml'], 'base'),
|
||||
(namespaces['xml'], 'lang'),
|
||||
(namespaces['xml'], 'space'),
|
||||
(None, 'y'),
|
||||
(None, 'y1'),
|
||||
(None, 'y2'),
|
||||
(None, 'zoomAndPan'),
|
||||
))
|
||||
|
||||
attr_val_is_uri = frozenset((
|
||||
(None, 'href'),
|
||||
(None, 'src'),
|
||||
(None, 'cite'),
|
||||
(None, 'action'),
|
||||
(None, 'longdesc'),
|
||||
(None, 'poster'),
|
||||
(None, 'background'),
|
||||
(None, 'datasrc'),
|
||||
(None, 'dynsrc'),
|
||||
(None, 'lowsrc'),
|
||||
(None, 'ping'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xml'], 'base'),
|
||||
))
|
||||
|
||||
svg_attr_val_allows_ref = frozenset((
|
||||
(None, 'clip-path'),
|
||||
(None, 'color-profile'),
|
||||
(None, 'cursor'),
|
||||
(None, 'fill'),
|
||||
(None, 'filter'),
|
||||
(None, 'marker'),
|
||||
(None, 'marker-start'),
|
||||
(None, 'marker-mid'),
|
||||
(None, 'marker-end'),
|
||||
(None, 'mask'),
|
||||
(None, 'stroke'),
|
||||
))
|
||||
|
||||
svg_allow_local_href = frozenset((
|
||||
(None, 'altGlyph'),
|
||||
(None, 'animate'),
|
||||
(None, 'animateColor'),
|
||||
(None, 'animateMotion'),
|
||||
(None, 'animateTransform'),
|
||||
(None, 'cursor'),
|
||||
(None, 'feImage'),
|
||||
(None, 'filter'),
|
||||
(None, 'linearGradient'),
|
||||
(None, 'pattern'),
|
||||
(None, 'radialGradient'),
|
||||
(None, 'textpath'),
|
||||
(None, 'tref'),
|
||||
(None, 'set'),
|
||||
(None, 'use')
|
||||
))
|
||||
|
||||
allowed_css_properties = frozenset((
|
||||
'azimuth',
|
||||
'background-color',
|
||||
'border-bottom-color',
|
||||
'border-collapse',
|
||||
'border-color',
|
||||
'border-left-color',
|
||||
'border-right-color',
|
||||
'border-top-color',
|
||||
'clear',
|
||||
'color',
|
||||
'cursor',
|
||||
'direction',
|
||||
'display',
|
||||
'elevation',
|
||||
'float',
|
||||
'font',
|
||||
'font-family',
|
||||
'font-size',
|
||||
'font-style',
|
||||
'font-variant',
|
||||
'font-weight',
|
||||
'height',
|
||||
'letter-spacing',
|
||||
'line-height',
|
||||
'overflow',
|
||||
'pause',
|
||||
'pause-after',
|
||||
'pause-before',
|
||||
'pitch',
|
||||
'pitch-range',
|
||||
'richness',
|
||||
'speak',
|
||||
'speak-header',
|
||||
'speak-numeral',
|
||||
'speak-punctuation',
|
||||
'speech-rate',
|
||||
'stress',
|
||||
'text-align',
|
||||
'text-decoration',
|
||||
'text-indent',
|
||||
'unicode-bidi',
|
||||
'vertical-align',
|
||||
'voice-family',
|
||||
'volume',
|
||||
'white-space',
|
||||
'width',
|
||||
))
|
||||
|
||||
allowed_css_keywords = frozenset((
|
||||
'auto',
|
||||
'aqua',
|
||||
'black',
|
||||
'block',
|
||||
'blue',
|
||||
'bold',
|
||||
'both',
|
||||
'bottom',
|
||||
'brown',
|
||||
'center',
|
||||
'collapse',
|
||||
'dashed',
|
||||
'dotted',
|
||||
'fuchsia',
|
||||
'gray',
|
||||
'green',
|
||||
'!important',
|
||||
'italic',
|
||||
'left',
|
||||
'lime',
|
||||
'maroon',
|
||||
'medium',
|
||||
'none',
|
||||
'navy',
|
||||
'normal',
|
||||
'nowrap',
|
||||
'olive',
|
||||
'pointer',
|
||||
'purple',
|
||||
'red',
|
||||
'right',
|
||||
'solid',
|
||||
'silver',
|
||||
'teal',
|
||||
'top',
|
||||
'transparent',
|
||||
'underline',
|
||||
'white',
|
||||
'yellow',
|
||||
))
|
||||
|
||||
allowed_svg_properties = frozenset((
|
||||
'fill',
|
||||
'fill-opacity',
|
||||
'fill-rule',
|
||||
'stroke',
|
||||
'stroke-width',
|
||||
'stroke-linecap',
|
||||
'stroke-linejoin',
|
||||
'stroke-opacity',
|
||||
))
|
||||
|
||||
allowed_protocols = frozenset((
|
||||
'ed2k',
|
||||
'ftp',
|
||||
'http',
|
||||
'https',
|
||||
'irc',
|
||||
'mailto',
|
||||
'news',
|
||||
'gopher',
|
||||
'nntp',
|
||||
'telnet',
|
||||
'webcal',
|
||||
'xmpp',
|
||||
'callto',
|
||||
'feed',
|
||||
'urn',
|
||||
'aim',
|
||||
'rsync',
|
||||
'tag',
|
||||
'ssh',
|
||||
'sftp',
|
||||
'rtsp',
|
||||
'afs',
|
||||
'data',
|
||||
))
|
||||
|
||||
allowed_content_types = frozenset((
|
||||
'image/png',
|
||||
'image/jpeg',
|
||||
'image/gif',
|
||||
'image/webp',
|
||||
'image/bmp',
|
||||
'text/plain',
|
||||
))
|
||||
|
||||
|
||||
data_content_type = re.compile(r'''
|
||||
^
|
||||
# Match a content type <application>/<type>
|
||||
(?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
|
||||
# Match any character set and encoding
|
||||
(?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
|
||||
|(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
|
||||
# Assume the rest is data
|
||||
,.*
|
||||
$
|
||||
''',
|
||||
re.VERBOSE)
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
|
||||
def __init__(self,
|
||||
source,
|
||||
allowed_elements=allowed_elements,
|
||||
allowed_attributes=allowed_attributes,
|
||||
allowed_css_properties=allowed_css_properties,
|
||||
allowed_css_keywords=allowed_css_keywords,
|
||||
allowed_svg_properties=allowed_svg_properties,
|
||||
allowed_protocols=allowed_protocols,
|
||||
allowed_content_types=allowed_content_types,
|
||||
attr_val_is_uri=attr_val_is_uri,
|
||||
svg_attr_val_allows_ref=svg_attr_val_allows_ref,
|
||||
svg_allow_local_href=svg_allow_local_href):
|
||||
"""Creates a Filter
|
||||
|
||||
:arg allowed_elements: set of elements to allow--everything else will
|
||||
be escaped
|
||||
|
||||
:arg allowed_attributes: set of attributes to allow in
|
||||
elements--everything else will be stripped
|
||||
|
||||
:arg allowed_css_properties: set of CSS properties to allow--everything
|
||||
else will be stripped
|
||||
|
||||
:arg allowed_css_keywords: set of CSS keywords to allow--everything
|
||||
else will be stripped
|
||||
|
||||
:arg allowed_svg_properties: set of SVG properties to allow--everything
|
||||
else will be removed
|
||||
|
||||
:arg allowed_protocols: set of allowed protocols for URIs
|
||||
|
||||
:arg allowed_content_types: set of allowed content types for ``data`` URIs.
|
||||
|
||||
:arg attr_val_is_uri: set of attributes that have URI values--values
|
||||
that have a scheme not listed in ``allowed_protocols`` are removed
|
||||
|
||||
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
|
||||
references
|
||||
|
||||
:arg svg_allow_local_href: set of SVG elements that can have local
|
||||
hrefs--these are removed
|
||||
|
||||
"""
|
||||
super(Filter, self).__init__(source)
|
||||
self.allowed_elements = allowed_elements
|
||||
self.allowed_attributes = allowed_attributes
|
||||
self.allowed_css_properties = allowed_css_properties
|
||||
self.allowed_css_keywords = allowed_css_keywords
|
||||
self.allowed_svg_properties = allowed_svg_properties
|
||||
self.allowed_protocols = allowed_protocols
|
||||
self.allowed_content_types = allowed_content_types
|
||||
self.attr_val_is_uri = attr_val_is_uri
|
||||
self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
|
||||
self.svg_allow_local_href = svg_allow_local_href
|
||||
|
||||
def __iter__(self):
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
token = self.sanitize_token(token)
|
||||
if token:
|
||||
yield token
|
||||
|
||||
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
|
||||
# stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
|
||||
# are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
|
||||
# ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
|
||||
# are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
|
||||
# allowed.
|
||||
#
|
||||
# sanitize_html('<script> do_nasty_stuff() </script>')
|
||||
# => <script> do_nasty_stuff() </script>
|
||||
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
||||
# => <a>Click here for $100</a>
|
||||
def sanitize_token(self, token):
|
||||
|
||||
# accommodate filters which use token_type differently
|
||||
token_type = token["type"]
|
||||
if token_type in ("StartTag", "EndTag", "EmptyTag"):
|
||||
name = token["name"]
|
||||
namespace = token["namespace"]
|
||||
if ((namespace, name) in self.allowed_elements or
|
||||
(namespace is None and
|
||||
(namespaces["html"], name) in self.allowed_elements)):
|
||||
return self.allowed_token(token)
|
||||
else:
|
||||
return self.disallowed_token(token)
|
||||
elif token_type == "Comment":
|
||||
pass
|
||||
else:
|
||||
return token
|
||||
|
||||
def allowed_token(self, token):
|
||||
if "data" in token:
|
||||
attrs = token["data"]
|
||||
attr_names = set(attrs.keys())
|
||||
|
||||
# Remove forbidden attributes
|
||||
for to_remove in (attr_names - self.allowed_attributes):
|
||||
del token["data"][to_remove]
|
||||
attr_names.remove(to_remove)
|
||||
|
||||
# Remove attributes with disallowed URL values
|
||||
for attr in (attr_names & self.attr_val_is_uri):
|
||||
assert attr in attrs
|
||||
# I don't have a clue where this regexp comes from or why it matches those
|
||||
# characters, nor why we call unescape. I just know it's always been here.
|
||||
# Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
|
||||
# this will do is remove *more* than it otherwise would.
|
||||
val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
|
||||
unescape(attrs[attr])).lower()
|
||||
# remove replacement characters from unescaped characters
|
||||
val_unescaped = val_unescaped.replace("\ufffd", "")
|
||||
try:
|
||||
uri = urlparse.urlparse(val_unescaped)
|
||||
except ValueError:
|
||||
uri = None
|
||||
del attrs[attr]
|
||||
if uri and uri.scheme:
|
||||
if uri.scheme not in self.allowed_protocols:
|
||||
del attrs[attr]
|
||||
if uri.scheme == 'data':
|
||||
m = data_content_type.match(uri.path)
|
||||
if not m:
|
||||
del attrs[attr]
|
||||
elif m.group('content_type') not in self.allowed_content_types:
|
||||
del attrs[attr]
|
||||
|
||||
for attr in self.svg_attr_val_allows_ref:
|
||||
if attr in attrs:
|
||||
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
|
||||
' ',
|
||||
unescape(attrs[attr]))
|
||||
if (token["name"] in self.svg_allow_local_href and
|
||||
(namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
|
||||
attrs[(namespaces['xlink'], 'href')])):
|
||||
del attrs[(namespaces['xlink'], 'href')]
|
||||
if (None, 'style') in attrs:
|
||||
attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
|
||||
token["data"] = attrs
|
||||
return token
|
||||
|
||||
def disallowed_token(self, token):
|
||||
token_type = token["type"]
|
||||
if token_type == "EndTag":
|
||||
token["data"] = "</%s>" % token["name"]
|
||||
elif token["data"]:
|
||||
assert token_type in ("StartTag", "EmptyTag")
|
||||
attrs = []
|
||||
for (ns, name), v in token["data"].items():
|
||||
attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
|
||||
token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
|
||||
else:
|
||||
token["data"] = "<%s>" % token["name"]
|
||||
if token.get("selfClosing"):
|
||||
token["data"] = token["data"][:-1] + "/>"
|
||||
|
||||
token["type"] = "Characters"
|
||||
|
||||
del token["name"]
|
||||
return token
|
||||
|
||||
def sanitize_css(self, style):
|
||||
# disallow urls
|
||||
style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
|
||||
|
||||
# gauntlet
|
||||
if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
|
||||
return ''
|
||||
if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
||||
return ''
|
||||
|
||||
clean = []
|
||||
for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
|
||||
if not value:
|
||||
continue
|
||||
if prop.lower() in self.allowed_css_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
|
||||
'padding']:
|
||||
for keyword in value.split():
|
||||
if keyword not in self.allowed_css_keywords and \
|
||||
not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
|
||||
break
|
||||
else:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.lower() in self.allowed_svg_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
|
||||
return ' '.join(clean)
|
||||
|
||||
@@ -2,20 +2,20 @@ from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
from ..constants import rcdataElements, spaceCharacters
|
||||
spaceCharacters = "".join(spaceCharacters)
|
||||
|
||||
SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Collapses whitespace except in pre, textarea, and script elements"""
|
||||
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
|
||||
|
||||
def __iter__(self):
|
||||
preserve = 0
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type == "StartTag" \
|
||||
and (preserve or token["name"] in self.spacePreserveElements):
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,271 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import re
|
||||
from xml.sax.saxutils import escape, unescape
|
||||
|
||||
from .tokenizer import HTMLTokenizer
|
||||
from .constants import tokenTypes
|
||||
|
||||
|
||||
class HTMLSanitizerMixin(object):
|
||||
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
|
||||
|
||||
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
|
||||
'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
|
||||
'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
|
||||
'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
|
||||
'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
|
||||
'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
|
||||
'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
|
||||
'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
|
||||
'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
|
||||
'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
|
||||
'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
|
||||
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
|
||||
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
|
||||
|
||||
mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
|
||||
'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
|
||||
'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
|
||||
'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
|
||||
'munderover', 'none']
|
||||
|
||||
svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
|
||||
'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
|
||||
'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
|
||||
'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
|
||||
'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
|
||||
'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
|
||||
|
||||
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
|
||||
'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
|
||||
'background', 'balance', 'bgcolor', 'bgproperties', 'border',
|
||||
'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
|
||||
'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
|
||||
'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
|
||||
'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
|
||||
'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
|
||||
'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
|
||||
'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
|
||||
'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
|
||||
'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
|
||||
'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
|
||||
'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
|
||||
'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
|
||||
'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
|
||||
'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
|
||||
'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
|
||||
'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
|
||||
'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
|
||||
'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
|
||||
'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
|
||||
'width', 'wrap', 'xml:lang']
|
||||
|
||||
mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
|
||||
'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
|
||||
'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
|
||||
'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
|
||||
'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
|
||||
'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
|
||||
'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
|
||||
'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
|
||||
'xlink:type', 'xmlns', 'xmlns:xlink']
|
||||
|
||||
svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
|
||||
'arabic-form', 'ascent', 'attributeName', 'attributeType',
|
||||
'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
|
||||
'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
|
||||
'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
|
||||
'fill-opacity', 'fill-rule', 'font-family', 'font-size',
|
||||
'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
|
||||
'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
|
||||
'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
|
||||
'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
|
||||
'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
|
||||
'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
|
||||
'opacity', 'orient', 'origin', 'overline-position',
|
||||
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
|
||||
'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
|
||||
'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
|
||||
'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
|
||||
'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
|
||||
'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
|
||||
'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
|
||||
'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
|
||||
'transform', 'type', 'u1', 'u2', 'underline-position',
|
||||
'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
|
||||
'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
|
||||
'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
|
||||
'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
|
||||
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
|
||||
'y1', 'y2', 'zoomAndPan']
|
||||
|
||||
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
|
||||
'xlink:href', 'xml:base']
|
||||
|
||||
svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
|
||||
'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
|
||||
'mask', 'stroke']
|
||||
|
||||
svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
|
||||
'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
|
||||
'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
|
||||
'set', 'use']
|
||||
|
||||
acceptable_css_properties = ['azimuth', 'background-color',
|
||||
'border-bottom-color', 'border-collapse', 'border-color',
|
||||
'border-left-color', 'border-right-color', 'border-top-color', 'clear',
|
||||
'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
|
||||
'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
|
||||
'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
|
||||
'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
|
||||
'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
|
||||
'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
|
||||
'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
|
||||
'white-space', 'width']
|
||||
|
||||
acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
|
||||
'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
|
||||
'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
|
||||
'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
|
||||
'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
|
||||
'transparent', 'underline', 'white', 'yellow']
|
||||
|
||||
acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
|
||||
'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
|
||||
'stroke-opacity']
|
||||
|
||||
acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
|
||||
'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
|
||||
'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
|
||||
'ssh', 'sftp', 'rtsp', 'afs']
|
||||
|
||||
# subclasses may define their own versions of these constants
|
||||
allowed_elements = acceptable_elements + mathml_elements + svg_elements
|
||||
allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
|
||||
allowed_css_properties = acceptable_css_properties
|
||||
allowed_css_keywords = acceptable_css_keywords
|
||||
allowed_svg_properties = acceptable_svg_properties
|
||||
allowed_protocols = acceptable_protocols
|
||||
|
||||
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
|
||||
# stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
|
||||
# attributes are parsed, and a restricted set, # specified by
|
||||
# ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
|
||||
# attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
|
||||
# in ALLOWED_PROTOCOLS are allowed.
|
||||
#
|
||||
# sanitize_html('<script> do_nasty_stuff() </script>')
|
||||
# => <script> do_nasty_stuff() </script>
|
||||
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
||||
# => <a>Click here for $100</a>
|
||||
def sanitize_token(self, token):
|
||||
|
||||
# accommodate filters which use token_type differently
|
||||
token_type = token["type"]
|
||||
if token_type in list(tokenTypes.keys()):
|
||||
token_type = tokenTypes[token_type]
|
||||
|
||||
if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
|
||||
tokenTypes["EmptyTag"]):
|
||||
if token["name"] in self.allowed_elements:
|
||||
return self.allowed_token(token, token_type)
|
||||
else:
|
||||
return self.disallowed_token(token, token_type)
|
||||
elif token_type == tokenTypes["Comment"]:
|
||||
pass
|
||||
else:
|
||||
return token
|
||||
|
||||
def allowed_token(self, token, token_type):
|
||||
if "data" in token:
|
||||
attrs = dict([(name, val) for name, val in
|
||||
token["data"][::-1]
|
||||
if name in self.allowed_attributes])
|
||||
for attr in self.attr_val_is_uri:
|
||||
if attr not in attrs:
|
||||
continue
|
||||
val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
|
||||
unescape(attrs[attr])).lower()
|
||||
# remove replacement characters from unescaped characters
|
||||
val_unescaped = val_unescaped.replace("\ufffd", "")
|
||||
if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
|
||||
(val_unescaped.split(':')[0] not in
|
||||
self.allowed_protocols)):
|
||||
del attrs[attr]
|
||||
for attr in self.svg_attr_val_allows_ref:
|
||||
if attr in attrs:
|
||||
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
|
||||
' ',
|
||||
unescape(attrs[attr]))
|
||||
if (token["name"] in self.svg_allow_local_href and
|
||||
'xlink:href' in attrs and re.search('^\s*[^#\s].*',
|
||||
attrs['xlink:href'])):
|
||||
del attrs['xlink:href']
|
||||
if 'style' in attrs:
|
||||
attrs['style'] = self.sanitize_css(attrs['style'])
|
||||
token["data"] = [[name, val] for name, val in list(attrs.items())]
|
||||
return token
|
||||
|
||||
def disallowed_token(self, token, token_type):
|
||||
if token_type == tokenTypes["EndTag"]:
|
||||
token["data"] = "</%s>" % token["name"]
|
||||
elif token["data"]:
|
||||
attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
|
||||
token["data"] = "<%s%s>" % (token["name"], attrs)
|
||||
else:
|
||||
token["data"] = "<%s>" % token["name"]
|
||||
if token.get("selfClosing"):
|
||||
token["data"] = token["data"][:-1] + "/>"
|
||||
|
||||
if token["type"] in list(tokenTypes.keys()):
|
||||
token["type"] = "Characters"
|
||||
else:
|
||||
token["type"] = tokenTypes["Characters"]
|
||||
|
||||
del token["name"]
|
||||
return token
|
||||
|
||||
def sanitize_css(self, style):
|
||||
# disallow urls
|
||||
style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
|
||||
|
||||
# gauntlet
|
||||
if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
|
||||
return ''
|
||||
if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
||||
return ''
|
||||
|
||||
clean = []
|
||||
for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
|
||||
if not value:
|
||||
continue
|
||||
if prop.lower() in self.allowed_css_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
|
||||
'padding']:
|
||||
for keyword in value.split():
|
||||
if not keyword in self.acceptable_css_keywords and \
|
||||
not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
|
||||
break
|
||||
else:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.lower() in self.allowed_svg_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
|
||||
return ' '.join(clean)
|
||||
|
||||
|
||||
class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
|
||||
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
|
||||
lowercaseElementName=False, lowercaseAttrName=False, parser=None):
|
||||
# Change case matching defaults as we only output lowercase html anyway
|
||||
# This solution doesn't seem ideal...
|
||||
HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
|
||||
lowercaseElementName, lowercaseAttrName, parser=parser)
|
||||
|
||||
def __iter__(self):
|
||||
for token in HTMLTokenizer.__iter__(self):
|
||||
token = self.sanitize_token(token)
|
||||
if token:
|
||||
yield token
|
||||
@@ -0,0 +1,409 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from six import text_type
|
||||
|
||||
import re
|
||||
|
||||
from codecs import register_error, xmlcharrefreplace_errors
|
||||
|
||||
from .constants import voidElements, booleanAttributes, spaceCharacters
|
||||
from .constants import rcdataElements, entities, xmlEntities
|
||||
from . import treewalkers, _utils
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`"
|
||||
_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]")
|
||||
_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars +
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
|
||||
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
|
||||
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
|
||||
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
|
||||
"\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
|
||||
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
|
||||
"\u3000]")
|
||||
|
||||
|
||||
_encode_entity_map = {}
|
||||
_is_ucs4 = len("\U0010FFFF") == 1
|
||||
for k, v in list(entities.items()):
|
||||
# skip multi-character entities
|
||||
if ((_is_ucs4 and len(v) > 1) or
|
||||
(not _is_ucs4 and len(v) > 2)):
|
||||
continue
|
||||
if v != "&":
|
||||
if len(v) == 2:
|
||||
v = _utils.surrogatePairToCodepoint(v)
|
||||
else:
|
||||
v = ord(v)
|
||||
if v not in _encode_entity_map or k.islower():
|
||||
# prefer < over < and similarly for &, >, etc.
|
||||
_encode_entity_map[v] = k
|
||||
|
||||
|
||||
def htmlentityreplace_errors(exc):
|
||||
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
|
||||
res = []
|
||||
codepoints = []
|
||||
skip = False
|
||||
for i, c in enumerate(exc.object[exc.start:exc.end]):
|
||||
if skip:
|
||||
skip = False
|
||||
continue
|
||||
index = i + exc.start
|
||||
if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
|
||||
codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
|
||||
skip = True
|
||||
else:
|
||||
codepoint = ord(c)
|
||||
codepoints.append(codepoint)
|
||||
for cp in codepoints:
|
||||
e = _encode_entity_map.get(cp)
|
||||
if e:
|
||||
res.append("&")
|
||||
res.append(e)
|
||||
if not e.endswith(";"):
|
||||
res.append(";")
|
||||
else:
|
||||
res.append("&#x%s;" % (hex(cp)[2:]))
|
||||
return ("".join(res), exc.end)
|
||||
else:
|
||||
return xmlcharrefreplace_errors(exc)
|
||||
|
||||
|
||||
register_error("htmlentityreplace", htmlentityreplace_errors)
|
||||
|
||||
|
||||
def serialize(input, tree="etree", encoding=None, **serializer_opts):
|
||||
"""Serializes the input token stream using the specified treewalker
|
||||
|
||||
:arg input: the token stream to serialize
|
||||
|
||||
:arg tree: the treewalker to use
|
||||
|
||||
:arg encoding: the encoding to use
|
||||
|
||||
:arg serializer_opts: any options to pass to the
|
||||
:py:class:`html5lib.serializer.HTMLSerializer` that gets created
|
||||
|
||||
:returns: the tree serialized as a string
|
||||
|
||||
Example:
|
||||
|
||||
>>> from html5lib.html5parser import parse
|
||||
>>> from html5lib.serializer import serialize
|
||||
>>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
|
||||
>>> serialize(token_stream, omit_optional_tags=False)
|
||||
'<html><head></head><body><p>Hi!</p></body></html>'
|
||||
|
||||
"""
|
||||
# XXX: Should we cache this?
|
||||
walker = treewalkers.getTreeWalker(tree)
|
||||
s = HTMLSerializer(**serializer_opts)
|
||||
return s.render(walker(input), encoding)
|
||||
|
||||
|
||||
class HTMLSerializer(object):
|
||||
|
||||
# attribute quoting options
|
||||
quote_attr_values = "legacy" # be secure by default
|
||||
quote_char = '"'
|
||||
use_best_quote_char = True
|
||||
|
||||
# tag syntax options
|
||||
omit_optional_tags = True
|
||||
minimize_boolean_attributes = True
|
||||
use_trailing_solidus = False
|
||||
space_before_trailing_solidus = True
|
||||
|
||||
# escaping options
|
||||
escape_lt_in_attrs = False
|
||||
escape_rcdata = False
|
||||
resolve_entities = True
|
||||
|
||||
# miscellaneous options
|
||||
alphabetical_attributes = False
|
||||
inject_meta_charset = True
|
||||
strip_whitespace = False
|
||||
sanitize = False
|
||||
|
||||
options = ("quote_attr_values", "quote_char", "use_best_quote_char",
|
||||
"omit_optional_tags", "minimize_boolean_attributes",
|
||||
"use_trailing_solidus", "space_before_trailing_solidus",
|
||||
"escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
|
||||
"alphabetical_attributes", "inject_meta_charset",
|
||||
"strip_whitespace", "sanitize")
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize HTMLSerializer
|
||||
|
||||
:arg inject_meta_charset: Whether or not to inject the meta charset.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg quote_attr_values: Whether to quote attribute values that don't
|
||||
require quoting per legacy browser behavior (``"legacy"``), when
|
||||
required by the standard (``"spec"``), or always (``"always"``).
|
||||
|
||||
Defaults to ``"legacy"``.
|
||||
|
||||
:arg quote_char: Use given quote character for attribute quoting.
|
||||
|
||||
Defaults to ``"`` which will use double quotes unless attribute
|
||||
value contains a double quote, in which case single quotes are
|
||||
used.
|
||||
|
||||
:arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
|
||||
values.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg escape_rcdata: Whether to escape characters that need to be
|
||||
escaped within normal elements within rcdata elements such as
|
||||
style.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg resolve_entities: Whether to resolve named character entities that
|
||||
appear in the source tree. The XML predefined entities < >
|
||||
& " ' are unaffected by this setting.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg strip_whitespace: Whether to remove semantically meaningless
|
||||
whitespace. (This compresses all whitespace to a single space
|
||||
except within ``pre``.)
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg minimize_boolean_attributes: Shortens boolean attributes to give
|
||||
just the attribute value, for example::
|
||||
|
||||
<input disabled="disabled">
|
||||
|
||||
becomes::
|
||||
|
||||
<input disabled>
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg use_trailing_solidus: Includes a close-tag slash at the end of the
|
||||
start tag of void elements (empty elements whose end tag is
|
||||
forbidden). E.g. ``<hr/>``.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg space_before_trailing_solidus: Places a space immediately before
|
||||
the closing slash in a tag using a trailing solidus. E.g.
|
||||
``<hr />``. Requires ``use_trailing_solidus=True``.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg sanitize: Strip all unsafe or unknown constructs from output.
|
||||
See :py:class:`html5lib.filters.sanitizer.Filter`.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg omit_optional_tags: Omit start/end tags that are optional.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
"""
|
||||
unexpected_args = frozenset(kwargs) - frozenset(self.options)
|
||||
if len(unexpected_args) > 0:
|
||||
raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args)))
|
||||
if 'quote_char' in kwargs:
|
||||
self.use_best_quote_char = False
|
||||
for attr in self.options:
|
||||
setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
|
||||
self.errors = []
|
||||
self.strict = False
|
||||
|
||||
def encode(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, "htmlentityreplace")
|
||||
else:
|
||||
return string
|
||||
|
||||
def encodeStrict(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, "strict")
|
||||
else:
|
||||
return string
|
||||
|
||||
def serialize(self, treewalker, encoding=None):
|
||||
# pylint:disable=too-many-nested-blocks
|
||||
self.encoding = encoding
|
||||
in_cdata = False
|
||||
self.errors = []
|
||||
|
||||
if encoding and self.inject_meta_charset:
|
||||
from .filters.inject_meta_charset import Filter
|
||||
treewalker = Filter(treewalker, encoding)
|
||||
# Alphabetical attributes is here under the assumption that none of
|
||||
# the later filters add or change order of attributes; it needs to be
|
||||
# before the sanitizer so escaped elements come out correctly
|
||||
if self.alphabetical_attributes:
|
||||
from .filters.alphabeticalattributes import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
# WhitespaceFilter should be used before OptionalTagFilter
|
||||
# for maximum efficiently of this latter filter
|
||||
if self.strip_whitespace:
|
||||
from .filters.whitespace import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
if self.sanitize:
|
||||
from .filters.sanitizer import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
if self.omit_optional_tags:
|
||||
from .filters.optionaltags import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
|
||||
for token in treewalker:
|
||||
type = token["type"]
|
||||
if type == "Doctype":
|
||||
doctype = "<!DOCTYPE %s" % token["name"]
|
||||
|
||||
if token["publicId"]:
|
||||
doctype += ' PUBLIC "%s"' % token["publicId"]
|
||||
elif token["systemId"]:
|
||||
doctype += " SYSTEM"
|
||||
if token["systemId"]:
|
||||
if token["systemId"].find('"') >= 0:
|
||||
if token["systemId"].find("'") >= 0:
|
||||
self.serializeError("System identifer contains both single and double quote characters")
|
||||
quote_char = "'"
|
||||
else:
|
||||
quote_char = '"'
|
||||
doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
|
||||
|
||||
doctype += ">"
|
||||
yield self.encodeStrict(doctype)
|
||||
|
||||
elif type in ("Characters", "SpaceCharacters"):
|
||||
if type == "SpaceCharacters" or in_cdata:
|
||||
if in_cdata and token["data"].find("</") >= 0:
|
||||
self.serializeError("Unexpected </ in CDATA")
|
||||
yield self.encode(token["data"])
|
||||
else:
|
||||
yield self.encode(escape(token["data"]))
|
||||
|
||||
elif type in ("StartTag", "EmptyTag"):
|
||||
name = token["name"]
|
||||
yield self.encodeStrict("<%s" % name)
|
||||
if name in rcdataElements and not self.escape_rcdata:
|
||||
in_cdata = True
|
||||
elif in_cdata:
|
||||
self.serializeError("Unexpected child element of a CDATA element")
|
||||
for (_, attr_name), attr_value in token["data"].items():
|
||||
# TODO: Add namespace support here
|
||||
k = attr_name
|
||||
v = attr_value
|
||||
yield self.encodeStrict(' ')
|
||||
|
||||
yield self.encodeStrict(k)
|
||||
if not self.minimize_boolean_attributes or \
|
||||
(k not in booleanAttributes.get(name, tuple()) and
|
||||
k not in booleanAttributes.get("", tuple())):
|
||||
yield self.encodeStrict("=")
|
||||
if self.quote_attr_values == "always" or len(v) == 0:
|
||||
quote_attr = True
|
||||
elif self.quote_attr_values == "spec":
|
||||
quote_attr = _quoteAttributeSpec.search(v) is not None
|
||||
elif self.quote_attr_values == "legacy":
|
||||
quote_attr = _quoteAttributeLegacy.search(v) is not None
|
||||
else:
|
||||
raise ValueError("quote_attr_values must be one of: "
|
||||
"'always', 'spec', or 'legacy'")
|
||||
v = v.replace("&", "&")
|
||||
if self.escape_lt_in_attrs:
|
||||
v = v.replace("<", "<")
|
||||
if quote_attr:
|
||||
quote_char = self.quote_char
|
||||
if self.use_best_quote_char:
|
||||
if "'" in v and '"' not in v:
|
||||
quote_char = '"'
|
||||
elif '"' in v and "'" not in v:
|
||||
quote_char = "'"
|
||||
if quote_char == "'":
|
||||
v = v.replace("'", "'")
|
||||
else:
|
||||
v = v.replace('"', """)
|
||||
yield self.encodeStrict(quote_char)
|
||||
yield self.encode(v)
|
||||
yield self.encodeStrict(quote_char)
|
||||
else:
|
||||
yield self.encode(v)
|
||||
if name in voidElements and self.use_trailing_solidus:
|
||||
if self.space_before_trailing_solidus:
|
||||
yield self.encodeStrict(" /")
|
||||
else:
|
||||
yield self.encodeStrict("/")
|
||||
yield self.encode(">")
|
||||
|
||||
elif type == "EndTag":
|
||||
name = token["name"]
|
||||
if name in rcdataElements:
|
||||
in_cdata = False
|
||||
elif in_cdata:
|
||||
self.serializeError("Unexpected child element of a CDATA element")
|
||||
yield self.encodeStrict("</%s>" % name)
|
||||
|
||||
elif type == "Comment":
|
||||
data = token["data"]
|
||||
if data.find("--") >= 0:
|
||||
self.serializeError("Comment contains --")
|
||||
yield self.encodeStrict("<!--%s-->" % token["data"])
|
||||
|
||||
elif type == "Entity":
|
||||
name = token["name"]
|
||||
key = name + ";"
|
||||
if key not in entities:
|
||||
self.serializeError("Entity %s not recognized" % name)
|
||||
if self.resolve_entities and key not in xmlEntities:
|
||||
data = entities[key]
|
||||
else:
|
||||
data = "&%s;" % name
|
||||
yield self.encodeStrict(data)
|
||||
|
||||
else:
|
||||
self.serializeError(token["data"])
|
||||
|
||||
def render(self, treewalker, encoding=None):
|
||||
"""Serializes the stream from the treewalker into a string
|
||||
|
||||
:arg treewalker: the treewalker to serialize
|
||||
|
||||
:arg encoding: the string encoding to use
|
||||
|
||||
:returns: the serialized tree
|
||||
|
||||
Example:
|
||||
|
||||
>>> from html5lib import parse, getTreeWalker
|
||||
>>> from html5lib.serializer import HTMLSerializer
|
||||
>>> token_stream = parse('<html><body>Hi!</body></html>')
|
||||
>>> walker = getTreeWalker('etree')
|
||||
>>> serializer = HTMLSerializer(omit_optional_tags=False)
|
||||
>>> serializer.render(walker(token_stream))
|
||||
'<html><head></head><body>Hi!</body></html>'
|
||||
|
||||
"""
|
||||
if encoding:
|
||||
return b"".join(list(self.serialize(treewalker, encoding)))
|
||||
else:
|
||||
return "".join(list(self.serialize(treewalker)))
|
||||
|
||||
def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
|
||||
# XXX The idea is to make data mandatory.
|
||||
self.errors.append(data)
|
||||
if self.strict:
|
||||
raise SerializeError
|
||||
|
||||
|
||||
class SerializeError(Exception):
|
||||
"""Error in serialized tree"""
|
||||
pass
|
||||
@@ -1,16 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from .. import treewalkers
|
||||
|
||||
from .htmlserializer import HTMLSerializer
|
||||
|
||||
|
||||
def serialize(input, tree="etree", format="html", encoding=None,
|
||||
**serializer_opts):
|
||||
# XXX: Should we cache this?
|
||||
walker = treewalkers.getTreeWalker(tree)
|
||||
if format == "html":
|
||||
s = HTMLSerializer(**serializer_opts)
|
||||
else:
|
||||
raise ValueError("type must be html")
|
||||
return s.render(walker(input), encoding)
|
||||
@@ -1,320 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from six import text_type
|
||||
|
||||
import gettext
|
||||
_ = gettext.gettext
|
||||
|
||||
try:
|
||||
from functools import reduce
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
from ..constants import voidElements, booleanAttributes, spaceCharacters
|
||||
from ..constants import rcdataElements, entities, xmlEntities
|
||||
from .. import utils
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
spaceCharacters = "".join(spaceCharacters)
|
||||
|
||||
try:
|
||||
from codecs import register_error, xmlcharrefreplace_errors
|
||||
except ImportError:
|
||||
unicode_encode_errors = "strict"
|
||||
else:
|
||||
unicode_encode_errors = "htmlentityreplace"
|
||||
|
||||
encode_entity_map = {}
|
||||
is_ucs4 = len("\U0010FFFF") == 1
|
||||
for k, v in list(entities.items()):
|
||||
# skip multi-character entities
|
||||
if ((is_ucs4 and len(v) > 1) or
|
||||
(not is_ucs4 and len(v) > 2)):
|
||||
continue
|
||||
if v != "&":
|
||||
if len(v) == 2:
|
||||
v = utils.surrogatePairToCodepoint(v)
|
||||
else:
|
||||
v = ord(v)
|
||||
if not v in encode_entity_map or k.islower():
|
||||
# prefer < over < and similarly for &, >, etc.
|
||||
encode_entity_map[v] = k
|
||||
|
||||
def htmlentityreplace_errors(exc):
|
||||
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
|
||||
res = []
|
||||
codepoints = []
|
||||
skip = False
|
||||
for i, c in enumerate(exc.object[exc.start:exc.end]):
|
||||
if skip:
|
||||
skip = False
|
||||
continue
|
||||
index = i + exc.start
|
||||
if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
|
||||
codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
|
||||
skip = True
|
||||
else:
|
||||
codepoint = ord(c)
|
||||
codepoints.append(codepoint)
|
||||
for cp in codepoints:
|
||||
e = encode_entity_map.get(cp)
|
||||
if e:
|
||||
res.append("&")
|
||||
res.append(e)
|
||||
if not e.endswith(";"):
|
||||
res.append(";")
|
||||
else:
|
||||
res.append("&#x%s;" % (hex(cp)[2:]))
|
||||
return ("".join(res), exc.end)
|
||||
else:
|
||||
return xmlcharrefreplace_errors(exc)
|
||||
|
||||
register_error(unicode_encode_errors, htmlentityreplace_errors)
|
||||
|
||||
del register_error
|
||||
|
||||
|
||||
class HTMLSerializer(object):
|
||||
|
||||
# attribute quoting options
|
||||
quote_attr_values = False
|
||||
quote_char = '"'
|
||||
use_best_quote_char = True
|
||||
|
||||
# tag syntax options
|
||||
omit_optional_tags = True
|
||||
minimize_boolean_attributes = True
|
||||
use_trailing_solidus = False
|
||||
space_before_trailing_solidus = True
|
||||
|
||||
# escaping options
|
||||
escape_lt_in_attrs = False
|
||||
escape_rcdata = False
|
||||
resolve_entities = True
|
||||
|
||||
# miscellaneous options
|
||||
alphabetical_attributes = False
|
||||
inject_meta_charset = True
|
||||
strip_whitespace = False
|
||||
sanitize = False
|
||||
|
||||
options = ("quote_attr_values", "quote_char", "use_best_quote_char",
|
||||
"omit_optional_tags", "minimize_boolean_attributes",
|
||||
"use_trailing_solidus", "space_before_trailing_solidus",
|
||||
"escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
|
||||
"alphabetical_attributes", "inject_meta_charset",
|
||||
"strip_whitespace", "sanitize")
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize HTMLSerializer.
|
||||
|
||||
Keyword options (default given first unless specified) include:
|
||||
|
||||
inject_meta_charset=True|False
|
||||
Whether it insert a meta element to define the character set of the
|
||||
document.
|
||||
quote_attr_values=True|False
|
||||
Whether to quote attribute values that don't require quoting
|
||||
per HTML5 parsing rules.
|
||||
quote_char=u'"'|u"'"
|
||||
Use given quote character for attribute quoting. Default is to
|
||||
use double quote unless attribute value contains a double quote,
|
||||
in which case single quotes are used instead.
|
||||
escape_lt_in_attrs=False|True
|
||||
Whether to escape < in attribute values.
|
||||
escape_rcdata=False|True
|
||||
Whether to escape characters that need to be escaped within normal
|
||||
elements within rcdata elements such as style.
|
||||
resolve_entities=True|False
|
||||
Whether to resolve named character entities that appear in the
|
||||
source tree. The XML predefined entities < > & " '
|
||||
are unaffected by this setting.
|
||||
strip_whitespace=False|True
|
||||
Whether to remove semantically meaningless whitespace. (This
|
||||
compresses all whitespace to a single space except within pre.)
|
||||
minimize_boolean_attributes=True|False
|
||||
Shortens boolean attributes to give just the attribute value,
|
||||
for example <input disabled="disabled"> becomes <input disabled>.
|
||||
use_trailing_solidus=False|True
|
||||
Includes a close-tag slash at the end of the start tag of void
|
||||
elements (empty elements whose end tag is forbidden). E.g. <hr/>.
|
||||
space_before_trailing_solidus=True|False
|
||||
Places a space immediately before the closing slash in a tag
|
||||
using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
|
||||
sanitize=False|True
|
||||
Strip all unsafe or unknown constructs from output.
|
||||
See `html5lib user documentation`_
|
||||
omit_optional_tags=True|False
|
||||
Omit start/end tags that are optional.
|
||||
alphabetical_attributes=False|True
|
||||
Reorder attributes to be in alphabetical order.
|
||||
|
||||
.. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
|
||||
"""
|
||||
if 'quote_char' in kwargs:
|
||||
self.use_best_quote_char = False
|
||||
for attr in self.options:
|
||||
setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
|
||||
self.errors = []
|
||||
self.strict = False
|
||||
|
||||
def encode(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, unicode_encode_errors)
|
||||
else:
|
||||
return string
|
||||
|
||||
def encodeStrict(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, "strict")
|
||||
else:
|
||||
return string
|
||||
|
||||
def serialize(self, treewalker, encoding=None):
|
||||
self.encoding = encoding
|
||||
in_cdata = False
|
||||
self.errors = []
|
||||
|
||||
if encoding and self.inject_meta_charset:
|
||||
from ..filters.inject_meta_charset import Filter
|
||||
treewalker = Filter(treewalker, encoding)
|
||||
# WhitespaceFilter should be used before OptionalTagFilter
|
||||
# for maximum efficiently of this latter filter
|
||||
if self.strip_whitespace:
|
||||
from ..filters.whitespace import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
if self.sanitize:
|
||||
from ..filters.sanitizer import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
if self.omit_optional_tags:
|
||||
from ..filters.optionaltags import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
# Alphabetical attributes must be last, as other filters
|
||||
# could add attributes and alter the order
|
||||
if self.alphabetical_attributes:
|
||||
from ..filters.alphabeticalattributes import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
|
||||
for token in treewalker:
|
||||
type = token["type"]
|
||||
if type == "Doctype":
|
||||
doctype = "<!DOCTYPE %s" % token["name"]
|
||||
|
||||
if token["publicId"]:
|
||||
doctype += ' PUBLIC "%s"' % token["publicId"]
|
||||
elif token["systemId"]:
|
||||
doctype += " SYSTEM"
|
||||
if token["systemId"]:
|
||||
if token["systemId"].find('"') >= 0:
|
||||
if token["systemId"].find("'") >= 0:
|
||||
self.serializeError(_("System identifer contains both single and double quote characters"))
|
||||
quote_char = "'"
|
||||
else:
|
||||
quote_char = '"'
|
||||
doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
|
||||
|
||||
doctype += ">"
|
||||
yield self.encodeStrict(doctype)
|
||||
|
||||
elif type in ("Characters", "SpaceCharacters"):
|
||||
if type == "SpaceCharacters" or in_cdata:
|
||||
if in_cdata and token["data"].find("</") >= 0:
|
||||
self.serializeError(_("Unexpected </ in CDATA"))
|
||||
yield self.encode(token["data"])
|
||||
else:
|
||||
yield self.encode(escape(token["data"]))
|
||||
|
||||
elif type in ("StartTag", "EmptyTag"):
|
||||
name = token["name"]
|
||||
yield self.encodeStrict("<%s" % name)
|
||||
if name in rcdataElements and not self.escape_rcdata:
|
||||
in_cdata = True
|
||||
elif in_cdata:
|
||||
self.serializeError(_("Unexpected child element of a CDATA element"))
|
||||
for (attr_namespace, attr_name), attr_value in token["data"].items():
|
||||
# TODO: Add namespace support here
|
||||
k = attr_name
|
||||
v = attr_value
|
||||
yield self.encodeStrict(' ')
|
||||
|
||||
yield self.encodeStrict(k)
|
||||
if not self.minimize_boolean_attributes or \
|
||||
(k not in booleanAttributes.get(name, tuple())
|
||||
and k not in booleanAttributes.get("", tuple())):
|
||||
yield self.encodeStrict("=")
|
||||
if self.quote_attr_values or not v:
|
||||
quote_attr = True
|
||||
else:
|
||||
quote_attr = reduce(lambda x, y: x or (y in v),
|
||||
spaceCharacters + ">\"'=", False)
|
||||
v = v.replace("&", "&")
|
||||
if self.escape_lt_in_attrs:
|
||||
v = v.replace("<", "<")
|
||||
if quote_attr:
|
||||
quote_char = self.quote_char
|
||||
if self.use_best_quote_char:
|
||||
if "'" in v and '"' not in v:
|
||||
quote_char = '"'
|
||||
elif '"' in v and "'" not in v:
|
||||
quote_char = "'"
|
||||
if quote_char == "'":
|
||||
v = v.replace("'", "'")
|
||||
else:
|
||||
v = v.replace('"', """)
|
||||
yield self.encodeStrict(quote_char)
|
||||
yield self.encode(v)
|
||||
yield self.encodeStrict(quote_char)
|
||||
else:
|
||||
yield self.encode(v)
|
||||
if name in voidElements and self.use_trailing_solidus:
|
||||
if self.space_before_trailing_solidus:
|
||||
yield self.encodeStrict(" /")
|
||||
else:
|
||||
yield self.encodeStrict("/")
|
||||
yield self.encode(">")
|
||||
|
||||
elif type == "EndTag":
|
||||
name = token["name"]
|
||||
if name in rcdataElements:
|
||||
in_cdata = False
|
||||
elif in_cdata:
|
||||
self.serializeError(_("Unexpected child element of a CDATA element"))
|
||||
yield self.encodeStrict("</%s>" % name)
|
||||
|
||||
elif type == "Comment":
|
||||
data = token["data"]
|
||||
if data.find("--") >= 0:
|
||||
self.serializeError(_("Comment contains --"))
|
||||
yield self.encodeStrict("<!--%s-->" % token["data"])
|
||||
|
||||
elif type == "Entity":
|
||||
name = token["name"]
|
||||
key = name + ";"
|
||||
if not key in entities:
|
||||
self.serializeError(_("Entity %s not recognized" % name))
|
||||
if self.resolve_entities and key not in xmlEntities:
|
||||
data = entities[key]
|
||||
else:
|
||||
data = "&%s;" % name
|
||||
yield self.encodeStrict(data)
|
||||
|
||||
else:
|
||||
self.serializeError(token["data"])
|
||||
|
||||
def render(self, treewalker, encoding=None):
|
||||
if encoding:
|
||||
return b"".join(list(self.serialize(treewalker, encoding)))
|
||||
else:
|
||||
return "".join(list(self.serialize(treewalker)))
|
||||
|
||||
def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
|
||||
# XXX The idea is to make data mandatory.
|
||||
self.errors.append(data)
|
||||
if self.strict:
|
||||
raise SerializeError
|
||||
|
||||
|
||||
def SerializeError(Exception):
|
||||
"""Error in serialized tree"""
|
||||
pass
|
||||
@@ -0,0 +1,108 @@
|
||||
from __future__ import print_function
|
||||
import os.path
|
||||
import sys
|
||||
|
||||
import pkg_resources
|
||||
import pytest
|
||||
|
||||
from .tree_construction import TreeConstructionFile
|
||||
from .tokenizer import TokenizerFile
|
||||
from .sanitizer import SanitizerFile
|
||||
|
||||
_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
_root = os.path.join(_dir, "..", "..")
|
||||
_testdata = os.path.join(_dir, "testdata")
|
||||
_tree_construction = os.path.join(_testdata, "tree-construction")
|
||||
_tokenizer = os.path.join(_testdata, "tokenizer")
|
||||
_sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
|
||||
|
||||
|
||||
def fail_if_missing_pytest_expect():
|
||||
"""Throws an exception halting pytest if pytest-expect isn't working"""
|
||||
try:
|
||||
from pytest_expect import expect # noqa
|
||||
except ImportError:
|
||||
header = '*' * 78
|
||||
print(
|
||||
'\n' +
|
||||
header + '\n' +
|
||||
'ERROR: Either pytest-expect or its dependency u-msgpack-python is not\n' +
|
||||
'installed. Please install them both before running pytest.\n' +
|
||||
header + '\n',
|
||||
file=sys.stderr
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
fail_if_missing_pytest_expect()
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
msgs = []
|
||||
|
||||
if not os.path.exists(_testdata):
|
||||
msg = "testdata not available! "
|
||||
if os.path.exists(os.path.join(_root, ".git")):
|
||||
msg += ("Please run git submodule update --init --recursive " +
|
||||
"and then run tests again.")
|
||||
else:
|
||||
msg += ("The testdata doesn't appear to be included with this package, " +
|
||||
"so finding the right version will be hard. :(")
|
||||
msgs.append(msg)
|
||||
|
||||
if config.option.update_xfail:
|
||||
# Check for optional requirements
|
||||
req_file = os.path.join(_root, "requirements-optional.txt")
|
||||
if os.path.exists(req_file):
|
||||
with open(req_file, "r") as fp:
|
||||
for line in fp:
|
||||
if (line.strip() and
|
||||
not (line.startswith("-r") or
|
||||
line.startswith("#"))):
|
||||
if ";" in line:
|
||||
spec, marker = line.strip().split(";", 1)
|
||||
else:
|
||||
spec, marker = line.strip(), None
|
||||
req = pkg_resources.Requirement.parse(spec)
|
||||
if marker and not pkg_resources.evaluate_marker(marker):
|
||||
msgs.append("%s not available in this environment" % spec)
|
||||
else:
|
||||
try:
|
||||
installed = pkg_resources.working_set.find(req)
|
||||
except pkg_resources.VersionConflict:
|
||||
msgs.append("Outdated version of %s installed, need %s" % (req.name, spec))
|
||||
else:
|
||||
if not installed:
|
||||
msgs.append("Need %s" % spec)
|
||||
|
||||
# Check cElementTree
|
||||
import xml.etree.ElementTree as ElementTree
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as cElementTree
|
||||
except ImportError:
|
||||
msgs.append("cElementTree unable to be imported")
|
||||
else:
|
||||
if cElementTree.Element is ElementTree.Element:
|
||||
msgs.append("cElementTree is just an alias for ElementTree")
|
||||
|
||||
if msgs:
|
||||
pytest.exit("\n".join(msgs))
|
||||
|
||||
|
||||
def pytest_collect_file(path, parent):
|
||||
dir = os.path.abspath(path.dirname)
|
||||
dir_and_parents = set()
|
||||
while dir not in dir_and_parents:
|
||||
dir_and_parents.add(dir)
|
||||
dir = os.path.dirname(dir)
|
||||
|
||||
if _tree_construction in dir_and_parents:
|
||||
if path.ext == ".dat":
|
||||
return TreeConstructionFile(path, parent)
|
||||
elif _tokenizer in dir_and_parents:
|
||||
if path.ext == ".test":
|
||||
return TokenizerFile(path, parent)
|
||||
elif _sanitizer_testdata in dir_and_parents:
|
||||
if path.ext == ".dat":
|
||||
return SanitizerFile(path, parent)
|
||||
@@ -1,41 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Allow us to import from the src directory
|
||||
os.chdir(os.path.split(os.path.abspath(__file__))[0])
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
|
||||
|
||||
from html5lib.tokenizer import HTMLTokenizer
|
||||
|
||||
|
||||
class HTMLParser(object):
|
||||
""" Fake parser to test tokenizer output """
|
||||
def parse(self, stream, output=True):
|
||||
tokenizer = HTMLTokenizer(stream)
|
||||
for token in tokenizer:
|
||||
if output:
|
||||
print(token)
|
||||
|
||||
if __name__ == "__main__":
|
||||
x = HTMLParser()
|
||||
if len(sys.argv) > 1:
|
||||
if len(sys.argv) > 2:
|
||||
import hotshot
|
||||
import hotshot.stats
|
||||
prof = hotshot.Profile('stats.prof')
|
||||
prof.runcall(x.parse, sys.argv[1], False)
|
||||
prof.close()
|
||||
stats = hotshot.stats.load('stats.prof')
|
||||
stats.strip_dirs()
|
||||
stats.sort_stats('time')
|
||||
stats.print_stats()
|
||||
else:
|
||||
x.parse(sys.argv[1])
|
||||
else:
|
||||
print("""Usage: python mockParser.py filename [stats]
|
||||
If stats is specified the hotshots profiler will run and output the
|
||||
stats instead.
|
||||
""")
|
||||
@@ -1,36 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
def f1():
|
||||
x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
x += y + z
|
||||
|
||||
|
||||
def f2():
|
||||
x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
x = x + y + z
|
||||
|
||||
|
||||
def f3():
|
||||
x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
x = "".join((x, y, z))
|
||||
|
||||
|
||||
def f4():
|
||||
x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
x = "%s%s%s" % (x, y, z)
|
||||
|
||||
import timeit
|
||||
for x in range(4):
|
||||
statement = "f%s" % (x + 1)
|
||||
t = timeit.Timer(statement, "from __main__ import " + statement)
|
||||
r = t.repeat(3, 1000000)
|
||||
print(r, min(r))
|
||||
@@ -0,0 +1,50 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import codecs
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from html5lib import parseFragment, serialize
|
||||
|
||||
|
||||
class SanitizerFile(pytest.File):
|
||||
def collect(self):
|
||||
with codecs.open(str(self.fspath), "r", encoding="utf-8") as fp:
|
||||
tests = json.load(fp)
|
||||
for i, test in enumerate(tests):
|
||||
yield SanitizerTest(str(i), self, test=test)
|
||||
|
||||
|
||||
class SanitizerTest(pytest.Item):
|
||||
def __init__(self, name, parent, test):
|
||||
super(SanitizerTest, self).__init__(name, parent)
|
||||
self.obj = lambda: 1 # this is to hack around skipif needing a function!
|
||||
self.test = test
|
||||
|
||||
def runtest(self):
|
||||
input = self.test["input"]
|
||||
expected = self.test["output"]
|
||||
|
||||
parsed = parseFragment(input)
|
||||
serialized = serialize(parsed,
|
||||
sanitize=True,
|
||||
omit_optional_tags=False,
|
||||
use_trailing_solidus=True,
|
||||
space_before_trailing_solidus=False,
|
||||
quote_attr_values="always",
|
||||
quote_char="'",
|
||||
alphabetical_attributes=True)
|
||||
errorMsg = "\n".join(["\n\nInput:", input,
|
||||
"\nExpected:", expected,
|
||||
"\nReceived:", serialized])
|
||||
assert expected == serialized, errorMsg
|
||||
|
||||
def repr_failure(self, excinfo):
|
||||
traceback = excinfo.traceback
|
||||
ntraceback = traceback.cut(path=__file__)
|
||||
excinfo.traceback = ntraceback.filter()
|
||||
|
||||
return excinfo.getrepr(funcargs=True,
|
||||
showlocals=False,
|
||||
style="short", tbfilter=False)
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
# pylint:disable=wrong-import-position
|
||||
|
||||
import os
|
||||
import sys
|
||||
import codecs
|
||||
@@ -13,44 +15,66 @@ sys.path.insert(0, os.path.abspath(os.path.join(base_path,
|
||||
os.path.pardir,
|
||||
os.path.pardir)))
|
||||
|
||||
from html5lib import treebuilders
|
||||
from html5lib import treebuilders, treewalkers, treeadapters # noqa
|
||||
del base_path
|
||||
|
||||
# Build a dict of avaliable trees
|
||||
treeTypes = {"DOM": treebuilders.getTreeBuilder("dom")}
|
||||
# Build a dict of available trees
|
||||
treeTypes = {}
|
||||
|
||||
# Try whatever etree implementations are avaliable from a list that are
|
||||
#"supposed" to work
|
||||
try:
|
||||
import xml.etree.ElementTree as ElementTree
|
||||
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
|
||||
except ImportError:
|
||||
try:
|
||||
import elementtree.ElementTree as ElementTree
|
||||
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
|
||||
except ImportError:
|
||||
pass
|
||||
# DOM impls
|
||||
treeTypes["DOM"] = {
|
||||
"builder": treebuilders.getTreeBuilder("dom"),
|
||||
"walker": treewalkers.getTreeWalker("dom")
|
||||
}
|
||||
|
||||
# ElementTree impls
|
||||
import xml.etree.ElementTree as ElementTree # noqa
|
||||
treeTypes['ElementTree'] = {
|
||||
"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
|
||||
"walker": treewalkers.getTreeWalker("etree", ElementTree)
|
||||
}
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as cElementTree
|
||||
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
|
||||
import xml.etree.cElementTree as cElementTree # noqa
|
||||
except ImportError:
|
||||
try:
|
||||
import cElementTree
|
||||
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import lxml.etree as lxml # flake8: noqa
|
||||
except ImportError:
|
||||
pass
|
||||
treeTypes['cElementTree'] = None
|
||||
else:
|
||||
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
|
||||
# On Python 3.3 and above cElementTree is an alias, don't run them twice.
|
||||
if cElementTree.Element is ElementTree.Element:
|
||||
treeTypes['cElementTree'] = None
|
||||
else:
|
||||
treeTypes['cElementTree'] = {
|
||||
"builder": treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True),
|
||||
"walker": treewalkers.getTreeWalker("etree", cElementTree)
|
||||
}
|
||||
|
||||
try:
|
||||
import lxml.etree as lxml # noqa
|
||||
except ImportError:
|
||||
treeTypes['lxml'] = None
|
||||
else:
|
||||
treeTypes['lxml'] = {
|
||||
"builder": treebuilders.getTreeBuilder("lxml"),
|
||||
"walker": treewalkers.getTreeWalker("lxml")
|
||||
}
|
||||
|
||||
# Genshi impls
|
||||
try:
|
||||
import genshi # noqa
|
||||
except ImportError:
|
||||
treeTypes["genshi"] = None
|
||||
else:
|
||||
treeTypes["genshi"] = {
|
||||
"builder": treebuilders.getTreeBuilder("dom"),
|
||||
"adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
|
||||
"walker": treewalkers.getTreeWalker("genshi")
|
||||
}
|
||||
|
||||
# pylint:enable=wrong-import-position
|
||||
|
||||
|
||||
def get_data_files(subdirectory, files='*.dat'):
|
||||
return glob.glob(os.path.join(test_dir, subdirectory, files))
|
||||
def get_data_files(subdirectory, files='*.dat', search_dir=test_dir):
|
||||
return sorted(glob.glob(os.path.join(search_dir, subdirectory, files)))
|
||||
|
||||
|
||||
class DefaultDict(dict):
|
||||
@@ -71,9 +95,6 @@ class TestData(object):
|
||||
self.encoding = encoding
|
||||
self.newTestHeading = newTestHeading
|
||||
|
||||
def __del__(self):
|
||||
self.f.close()
|
||||
|
||||
def __iter__(self):
|
||||
data = DefaultDict(None)
|
||||
key = None
|
||||
@@ -128,7 +149,7 @@ convertExpected = convert(2)
|
||||
def errorMessage(input, expected, actual):
|
||||
msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" %
|
||||
(repr(input), repr(expected), repr(actual)))
|
||||
if sys.version_info.major == 2:
|
||||
if sys.version_info[0] == 2:
|
||||
msg = msg.encode("ascii", "backslashreplace")
|
||||
return msg
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user