Compare commits
911 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b4eda8bbff | |||
| 8ef44c3520 | |||
| 449de57fc7 | |||
| cbe29e233d | |||
| bef56ff124 | |||
| 5a05c0f858 | |||
| c1e13e520b | |||
| cebe92bd8f | |||
| 6f8cfc7914 | |||
| e7e98b83d2 | |||
| 4b72bb9d28 | |||
| 221068874b | |||
| 6028d8b2f1 | |||
| ddaafe9310 | |||
| 139e38731a | |||
| d25056cb35 | |||
| 5c80a7091b | |||
| 5faf190202 | |||
| 169b114ff6 | |||
| bc67326573 | |||
| a32543533d | |||
| 6b6e40ef96 | |||
| 8127b7ecf0 | |||
| 09425ccbe0 | |||
| 61fbc4e3b5 | |||
| 158e4f85da | |||
| 8b1107d2e1 | |||
| 59ffa9084f | |||
| 19df673c50 | |||
| 5f20894413 | |||
| 7349874804 | |||
| fda5dc7e89 | |||
| d60b45a667 | |||
| ab2e69a76e | |||
| 6a836338a5 | |||
| 5a02365605 | |||
| 26b38c4f64 | |||
| 9b7edf2960 | |||
| 7050f64fae | |||
| 4623a989d8 | |||
| 87b942bd6d | |||
| 87ee5cc627 | |||
| bff8fe8b70 | |||
| 1495882dc7 | |||
| 2e50d84f2a | |||
| d32716f4c5 | |||
| 876aa4eda0 | |||
| 3673aee8e9 | |||
| a758191ee0 | |||
| 99410249c7 | |||
| a705f2ad30 | |||
| 33223dedc1 | |||
| bd8e8ef346 | |||
| c75e7bf656 | |||
| cb4117376a | |||
| 0d37920aad | |||
| 0da6e76200 | |||
| 5f5934a6ee | |||
| 85b7a2f4f5 | |||
| 3dcfd30a04 | |||
| b5a0f65783 | |||
| 3862e6f3a4 | |||
| 1d4e2ec50b | |||
| 8b85485510 | |||
| 722ce3ac8b | |||
| 1e132f2808 | |||
| d007e0a172 | |||
| 3ddd722cc1 | |||
| 82d8189966 | |||
| 2d533eb004 | |||
| f9c899701f | |||
| e9f62fbb09 | |||
| 5b2f09318a | |||
| 8c260c43a8 | |||
| eee793302c | |||
| 0d1fdf6e60 | |||
| 64398d8f30 | |||
| cab736b573 | |||
| 93071dd81e | |||
| e8fcb8f91a | |||
| 33cacfe884 | |||
| f624f7f05a | |||
| 624195d870 | |||
| ab2ef66263 | |||
| 4ea0372212 | |||
| ff31912e8a | |||
| dcefed2e4c | |||
| 55bbc4f585 | |||
| 0f2bb99b39 | |||
| 85342eeed3 | |||
| 374a6a668a | |||
| e3be3195ee | |||
| 503279f3c2 | |||
| f8bb54024c | |||
| 6e53fc606a | |||
| ab810c48af | |||
| 13bb9183af | |||
| 2c5b6ea690 | |||
| a8efa2e266 | |||
| e73eb2fd86 | |||
| d38fa26e13 | |||
| 716f4493e8 | |||
| 3220974a4a | |||
| 6732272047 | |||
| 547f038139 | |||
| 3b0ee60eaa | |||
| a869281de7 | |||
| a4ed77c7bb | |||
| 81718e64d3 | |||
| dee0daf8aa | |||
| 8e599fb22a | |||
| acb5589af1 | |||
| 6db2771cd6 | |||
| 06d4e0a19a | |||
| 3b18c6c14f | |||
| 300359acf2 | |||
| 5456d0200a | |||
| 9890f66443 | |||
| aba863bc84 | |||
| ade416f5c8 | |||
| 7097267f7c | |||
| b0d8d1a86d | |||
| 2c8296ba85 | |||
| 4dd17de146 | |||
| 3a281b0b57 | |||
| 04ed625f1a | |||
| 1cddfb1b2d | |||
| 796b64d83e | |||
| 240a3687d7 | |||
| 9ed4764ab2 | |||
| f253a13297 | |||
| 744cd57dd5 | |||
| e2a5647363 | |||
| a1f324c105 | |||
| 767e0f8ac7 | |||
| 0c0ad02234 | |||
| c09973ec56 | |||
| 03a72e1917 | |||
| f9e0eaaf83 | |||
| 985f75f7da | |||
| 171cbd6c53 | |||
| 9875bc5c5b | |||
| 882509f891 | |||
| 3396502334 | |||
| b7fb99c3d4 | |||
| c82307a710 | |||
| 309a99d183 | |||
| 09a6ef0194 | |||
| 43afcb4239 | |||
| 7a78f33ac3 | |||
| d5fb538630 | |||
| a22cdf5d5b | |||
| fe0636bbbf | |||
| 13859cfbd7 | |||
| 0adadc59ac | |||
| d65ba19c6c | |||
| 5cedbd2fa0 | |||
| 735fb09762 | |||
| 79d61419b0 | |||
| 248b93e5c6 | |||
| d8eff1adb5 | |||
| c911620254 | |||
| c68a32b889 | |||
| 788819a900 | |||
| 27c94af980 | |||
| 81122665a0 | |||
| 1856e687eb | |||
| 6055793d46 | |||
| 99b670ff10 | |||
| 7a09218cc0 | |||
| a34d0523b5 | |||
| f06e900bab | |||
| 7da15a2d44 | |||
| e999cc53d0 | |||
| b7d4bd00a5 | |||
| 8c2aa849d7 | |||
| 01a759fff8 | |||
| cb0008b59e | |||
| 9cd825aff1 | |||
| 8ad52d2979 | |||
| efd6143498 | |||
| 157fae5f83 | |||
| 6d63301b63 | |||
| 9801c8c6b3 | |||
| e04f4c0bd0 | |||
| b501578584 | |||
| 308f429c91 | |||
| 1d45172475 | |||
| 085a4f30db | |||
| 7a600dc2b6 | |||
| c0c2891d8d | |||
| 06b269a2ba | |||
| f3a4db0d87 | |||
| bcd99d18c4 | |||
| c05c400c6f | |||
| 0f081d8d7b | |||
| 833dc5e3ae | |||
| 0be3df435b | |||
| f4446af57e | |||
| 253aa664a8 | |||
| 0df037a295 | |||
| ed49d743f9 | |||
| 203cc392c0 | |||
| 52ba5a7f24 | |||
| 8aa0576bbc | |||
| 5ce9cc79c8 | |||
| 1a596dfdea | |||
| aeecb3ff59 | |||
| 85c8d2d558 | |||
| 2cf4e7ac59 | |||
| e7412a91f9 | |||
| 9888d03982 | |||
| 765cc39553 | |||
| 6e58c2f984 | |||
| 295542ff18 | |||
| 9d72d9c647 | |||
| 853897ec3e | |||
| 9cf8ad7399 | |||
| fdf974c5e3 | |||
| 2920dbfe8d | |||
| 77d05f7697 | |||
| 3ffeaeffb6 | |||
| db2755675c | |||
| 7ca090f73c | |||
| bb251ad29e | |||
| 75d770e019 | |||
| 49bf116c18 | |||
| b7d227fe0f | |||
| 83f59935f2 | |||
| 37b794fa14 | |||
| 1f5c45df91 | |||
| 62e3020234 | |||
| 895d457500 | |||
| 586269efd3 | |||
| 576718fc03 | |||
| 648dd4147a | |||
| c4df743c3e | |||
| b98fead37e | |||
| 6522094164 | |||
| fcd3dfe75c | |||
| ec9a798590 | |||
| 5825443d4d | |||
| 9768b3fadd | |||
| 77a72d6663 | |||
| 08d647c024 | |||
| a77ef040be | |||
| 13e581b953 | |||
| 1cc18617c5 | |||
| 2642f65614 | |||
| 4abb2aacf9 | |||
| 904daaf2b3 | |||
| 3044f2b1fb | |||
| 826accb2d1 | |||
| d5cb35ed95 | |||
| 24c7e4be8c | |||
| abbd7283b2 | |||
| 2980aa08d7 | |||
| e2344abbc4 | |||
| 80097c3500 | |||
| 714f36caee | |||
| fb1860d78b | |||
| ce7acd278e | |||
| b8d9899796 | |||
| e58fa1964d | |||
| 1627dee77e | |||
| bbac0c033f | |||
| 6437e1dbad | |||
| 48a9e998ff | |||
| 6b6ca461f0 | |||
| 7960952a30 | |||
| 5ec64efb75 | |||
| 2440b2eae4 | |||
| 54db2857c9 | |||
| 5b8f0b7361 | |||
| 053ebe3963 | |||
| 661b0367f5 | |||
| 01da0697a0 | |||
| a3d3b670ae | |||
| 5c64a332f8 | |||
| 6fcd9b645a | |||
| 78da16654a | |||
| da20d4882b | |||
| 1f31c38d24 | |||
| 5f2fd9733b | |||
| 8a225b4e09 | |||
| af05b41937 | |||
| d618da457e | |||
| d16bdad782 | |||
| f6d33e73a0 | |||
| 7b48e445f5 | |||
| 2390f904bd | |||
| 3bee3631a3 | |||
| 9da0b2d3c1 | |||
| 7a092e4585 | |||
| 196fb6b4f6 | |||
| 9507002961 | |||
| 943ed38c2f | |||
| 496619b492 | |||
| 4772b42d64 | |||
| 5bc10953cc | |||
| 18deca202d | |||
| 84bc4b018d | |||
| 1a0598a47a | |||
| 973d117887 | |||
| c284c8f336 | |||
| df69cbc84c | |||
| 646453887f | |||
| 189d617005 | |||
| 554cd8bfe7 | |||
| 79505dea20 | |||
| 5358a46b7e | |||
| aff1599ce7 | |||
| bc7df1c8a1 | |||
| f1df1d25a8 | |||
| 47d9b472ed | |||
| 89ab8c34d8 | |||
| 600498f9c1 | |||
| 845fbcd2ac | |||
| 3cc9f19b8f | |||
| e68c642005 | |||
| 81ae950577 | |||
| 62b4496cd6 | |||
| 29b7292d15 | |||
| 791058a2d2 | |||
| b6c108faef | |||
| 72d592866a | |||
| 4052993246 | |||
| a24f6e7789 | |||
| 0d0fd49924 | |||
| 139dcb409e | |||
| 707e6e7d13 | |||
| 36abb29ddd | |||
| a700fe761e | |||
| 7577164471 | |||
| 1bce743ea3 | |||
| f85ab0364a | |||
| eb3a0d52fd | |||
| b8cd295a12 | |||
| d3ff49ee0c | |||
| d4833f1e6e | |||
| 548483ed2f | |||
| f6f39b97c8 | |||
| 21ea5e0df9 | |||
| 3cbab6a5c7 | |||
| f19f39ba16 | |||
| b9c0fd9a1c | |||
| ce520e6944 | |||
| 0ad62a95e2 | |||
| 8f62a69e06 | |||
| 34bbb98f7f | |||
| 26cd6bb955 | |||
| 97534c633d | |||
| 0a9a2963c2 | |||
| 05afc39a35 | |||
| 84fdc1f55f | |||
| 3b03c3c2bb | |||
| 980f62686d | |||
| 202f2532a6 | |||
| 78d193a2fd | |||
| 0c109b0f27 | |||
| e33c0ab86c | |||
| 3a0189069d | |||
| 2688bd9edd | |||
| 889f7bd2d7 | |||
| 0561c2d640 | |||
| b76f1ad004 | |||
| cde6153f64 | |||
| 12bdaa510b | |||
| 0e6a4acf80 | |||
| e7785f7094 | |||
| 2dcf39eff8 | |||
| 1125c5c133 | |||
| faf7cedfe2 | |||
| 52a6127625 | |||
| b552f6f9fa | |||
| 9b558fcce2 | |||
| c8eae6df6c | |||
| 5f50bd7095 | |||
| c8617218dc | |||
| a8ceae993e | |||
| a72a8854c9 | |||
| dc658db9ba | |||
| 8d8ecfe9e1 | |||
| 4b77e63857 | |||
| 19aa800324 | |||
| 85adb6b0e3 | |||
| bd2523821d | |||
| c1838a3c84 | |||
| d836f8f5d0 | |||
| 37491c134e | |||
| aa6efb7e5c | |||
| e4d990c06d | |||
| 01288afac0 | |||
| 579e3ca3ab | |||
| f61bc3ce7c | |||
| cc6004e981 | |||
| 35eb037d05 | |||
| 1eb0e4419d | |||
| 7b5ca875dc | |||
| 2d22a6c383 | |||
| f4884f1c18 | |||
| 27cc3bd185 | |||
| 9b894c2ea7 | |||
| a341808873 | |||
| 8927513f8e | |||
| 84436dfa94 | |||
| 2b73f633e0 | |||
| 3d7a452141 | |||
| 38a8557311 | |||
| 79672923c5 | |||
| 3842182a83 | |||
| 8b0d359e0b | |||
| db2903edfd | |||
| 18d22a72bd | |||
| 402cfc1632 | |||
| 9dec7e4971 | |||
| 931c224247 | |||
| f6ee6d4027 | |||
| 332d41fb25 | |||
| 8303af25fb | |||
| ee02bdb19a | |||
| e674132d5a | |||
| c9eb8bc7be | |||
| 2076a2c6d0 | |||
| 32c0f09b16 | |||
| 1264cabb3f | |||
| fb722d0581 | |||
| cb00ab9610 | |||
| 4102a1c8fd | |||
| af6d7a1ae2 | |||
| 36cae6311a | |||
| 327bb31daa | |||
| 8c2effe337 | |||
| da59adddf4 | |||
| 6f3c806a21 | |||
| 3d119bcd98 | |||
| 6264c21e23 | |||
| d5d6aa0bd5 | |||
| 7ad49fa65a | |||
| 5b8dfb48c3 | |||
| 4d557be99a | |||
| a7e022c6f4 | |||
| fc3f5dad4f | |||
| fa42669580 | |||
| 0c73de726a | |||
| ea87d21977 | |||
| a9e9e8cf44 | |||
| 9905cd307f | |||
| 92ea32b52c | |||
| 4c56f7583a | |||
| fc3050ef3d | |||
| 29c63e11bd | |||
| 64cbe21f6e | |||
| a56bb97d45 | |||
| 6edc6a1c6d | |||
| 01c656ffb2 | |||
| 078c6d0c21 | |||
| 580a8c0f3e | |||
| f0258349bf | |||
| d9080eeb80 | |||
| b504744876 | |||
| 638e8b5b47 | |||
| 9b9c40f310 | |||
| cc3a1db879 | |||
| a16312803e | |||
| 206f9fa5ad | |||
| f20e97574a | |||
| 51764f0ce0 | |||
| e698b9d608 | |||
| e2a7cc6b45 | |||
| 6eaf307be9 | |||
| 9743af5db0 | |||
| 07d02ad75e | |||
| 91f51a27af | |||
| a60318260a | |||
| c3e7e336b5 | |||
| 0b1037b497 | |||
| 7da48b7dc5 | |||
| 73bcfc6151 | |||
| dfe1a16aa0 | |||
| 4f0e685feb | |||
| fca052b308 | |||
| c449f42444 | |||
| 5ec956943c | |||
| 1ad696be6d | |||
| 92b3b762b2 | |||
| 0b29a57079 | |||
| 0dee015181 | |||
| 2f1294a119 | |||
| e609e55710 | |||
| b752ce8572 | |||
| de59c68328 | |||
| f92e78e8be | |||
| 9abc611f1e | |||
| 8e42f61a52 | |||
| 48fd3f977d | |||
| 451636e0b3 | |||
| 1fc810470b | |||
| 1c96efdafa | |||
| 8fb0711973 | |||
| aabb4f2c13 | |||
| eb1c5d976f | |||
| fd89533903 | |||
| d5ec60f0f6 | |||
| 18b896ec0b | |||
| af93e1edec | |||
| a8a5b4ad16 | |||
| 0d40883929 | |||
| 3b6645156d | |||
| 7596346fcd | |||
| 877ff60077 | |||
| 928da6e679 | |||
| c1a9ccef3c | |||
| 5f41c85281 | |||
| 18ef38b90b | |||
| 7b155e6b31 | |||
| ba4d7b2199 | |||
| 869387af34 | |||
| 5b16a80730 | |||
| adf1190584 | |||
| 1c16cf5926 | |||
| a833cf7b0b | |||
| 62a35e7ced | |||
| 7b005760c1 | |||
| b07631f0b5 | |||
| 595d8a8f53 | |||
| 35321b00cd | |||
| 8928f19818 | |||
| 76cc8fad47 | |||
| cb851d8519 | |||
| af0aff3aee | |||
| 6d4099c79c | |||
| d9672e179c | |||
| 1e291343fe | |||
| a5d0bf68fd | |||
| b8e2b524e1 | |||
| 6abd062477 | |||
| fbcc2644bf | |||
| 34b05c8c17 | |||
| e3dce02716 | |||
| ed8a70b5c8 | |||
| 35944b0776 | |||
| 2f80ee5b39 | |||
| 280eb71ae4 | |||
| 9462b1b175 | |||
| 874204838d | |||
| 0e4a936176 | |||
| 5089708e2d | |||
| e17367aa13 | |||
| 26be0978ee | |||
| de1aea9dd2 | |||
| 4c143be906 | |||
| b83cea1073 | |||
| 2418b67089 | |||
| 7e550cf916 | |||
| dce72fcb08 | |||
| adede7bb2e | |||
| 377799ace3 | |||
| 02a822c630 | |||
| 8101bca753 | |||
| 40e177ded0 | |||
| 13f732d733 | |||
| fbca4cbf8c | |||
| 45c8cd1536 | |||
| da293bbc2f | |||
| 7991568d6d | |||
| 5fc1c8cbb1 | |||
| 596981aca2 | |||
| 6d55197218 | |||
| 85cb813a75 | |||
| 5f99319985 | |||
| f34c76eb90 | |||
| adb08aff75 | |||
| 93f8bf561b | |||
| 52e391aa83 | |||
| 751e9fc0c5 | |||
| 77b0b9dc6b | |||
| 5729552206 | |||
| 929f53ac13 | |||
| c6b983ea6c | |||
| 419bee76e2 | |||
| 2f3180cc07 | |||
| b5eb917e10 | |||
| 9fed8d6335 | |||
| becbdba56e | |||
| 85b9373760 | |||
| c069541cee | |||
| 4c0f20694d | |||
| a99175d46c | |||
| 4bab9b9f5b | |||
| a5ea603116 | |||
| 8be6d9bd77 | |||
| 9a9043aa67 | |||
| 7ed58386e5 | |||
| 51660449a8 | |||
| af1a8d13f1 | |||
| 8e13e6c181 | |||
| de915ba840 | |||
| 834922aa35 | |||
| 2d4e67c268 | |||
| 48a036a2bb | |||
| 140fb72aeb | |||
| 2d4c3790a6 | |||
| 74860fe2ee | |||
| aab69705b6 | |||
| d6c88621f6 | |||
| bd275601aa | |||
| 72c04e7b43 | |||
| f281d6bfce | |||
| 62fc223d7b | |||
| e274a542c1 | |||
| cd3b453bbb | |||
| 84bc6c95be | |||
| 3862447fa1 | |||
| f85224258b | |||
| 11d5edcc5e | |||
| 4df519e67a | |||
| e9afcaa9e6 | |||
| 672403ef92 | |||
| 4fbdd67255 | |||
| d6dd93b9d0 | |||
| 80f223e706 | |||
| 0f0d709975 | |||
| 8db5e100b8 | |||
| ebc984d371 | |||
| 80c73e5871 | |||
| 5de4d29dd8 | |||
| fad95a0b22 | |||
| ebd3867c5f | |||
| 0781265baa | |||
| 9b8798d534 | |||
| 190724360c | |||
| 93acb7fbc1 | |||
| 90cc235d23 | |||
| 515698fd95 | |||
| 2596d0a4bc | |||
| ef8f9f7816 | |||
| 276ecf262f | |||
| 5c8d083038 | |||
| a2c399b4b7 | |||
| 4ecec2e362 | |||
| e072cb4123 | |||
| e44cdd4191 | |||
| 43d60b20ca | |||
| 6a61c0e722 | |||
| 9de9428825 | |||
| 13cb31d2db | |||
| 211c687609 | |||
| 3151df31f8 | |||
| db30396c26 | |||
| efed67f6e4 | |||
| 3c0d0a7d60 | |||
| 0f0254675e | |||
| 068cf1a2fd | |||
| 5f3d2904aa | |||
| e81d3a43b8 | |||
| 7006687292 | |||
| d044c65d2c | |||
| d3ae88f5fe | |||
| a598104778 | |||
| c7099f1a7b | |||
| 3955a27594 | |||
| 597ecd8c0b | |||
| 006505bf22 | |||
| 3b0102c5a8 | |||
| 5d8c49c537 | |||
| b276b6eda9 | |||
| dc502c95b2 | |||
| 1b29e4eae5 | |||
| cb5cf573e5 | |||
| 0f5dd3a722 | |||
| ea4a77dbcc | |||
| 02a6de68b8 | |||
| 1dbb7373c6 | |||
| 4bbd2aa56a | |||
| 169fca23a9 | |||
| f4058b7981 | |||
| baffc7a775 | |||
| 0140d20793 | |||
| 8ced7206f0 | |||
| fa4274f2e3 | |||
| 64d0d211b1 | |||
| aaaa6aa731 | |||
| 47d61bb83a | |||
| d5850afcc2 | |||
| 0c48b0799e | |||
| 1b96dbae3d | |||
| 244e183a2b | |||
| 5cb00a0532 | |||
| 09ce46f46a | |||
| 881a23ec7f | |||
| d53da82ddf | |||
| 177d95128f | |||
| 867a162fcf | |||
| fe0291ef55 | |||
| 1a21ab513d | |||
| 1a275e9501 | |||
| 96a8c33767 | |||
| 084284d1ee | |||
| 13b087e44b | |||
| 22b318f05e | |||
| a575e40859 | |||
| ef044e4937 | |||
| 1e1f8e7ca0 | |||
| 814395b58e | |||
| 5ac5c3c595 | |||
| 64a8daab76 | |||
| 3fb6017976 | |||
| 9379e84ba2 | |||
| 8eaa468b1c | |||
| a1c3e64bf3 | |||
| e90e1bd0c5 | |||
| 30cec00f0e | |||
| 2a0c1a13ad | |||
| 072aa0883b | |||
| 2e22c585d0 | |||
| 3240b19649 | |||
| 2f4b47e456 | |||
| f735c9128c | |||
| 56e8cb0f44 | |||
| d5253f130c | |||
| 261c6f3c7e | |||
| 2ad59e6592 | |||
| f5cf977788 | |||
| d392707ecf | |||
| cbc57fbc0b | |||
| b32a2ded77 | |||
| e7ee9ae747 | |||
| 97acfb6845 | |||
| 709197a957 | |||
| 7d003cdc3b | |||
| c0266a5b84 | |||
| 5b61c71cdd | |||
| 3423b42a8a | |||
| 942124ac67 | |||
| 58d4534176 | |||
| 93517582d1 | |||
| 75c60c2b60 | |||
| 1fbd9cfd50 | |||
| 2e6843fd78 | |||
| c073de4acd | |||
| dcd85c85d0 | |||
| 6e5bfd162a | |||
| b579fa7804 | |||
| f356313e67 | |||
| 4055debc6f | |||
| fcc907c507 | |||
| 8a90a51182 | |||
| 4c42b3090a | |||
| 626d519c81 | |||
| dae3672a9a | |||
| 640bf5515f | |||
| 476fd09397 | |||
| bfbf12914f | |||
| 91eae536ae | |||
| 404becadba | |||
| d71d33d899 | |||
| 65e72da01e | |||
| 8556bebb1f | |||
| dc5c353b8d | |||
| 9f7f877cf2 | |||
| 9a827b783a | |||
| d2641f045e | |||
| e4ef6dc604 | |||
| c8cc9bb188 | |||
| a21dd3d0c0 | |||
| b16d6658f8 | |||
| 01aab808c3 | |||
| eb1ae54739 | |||
| 5483d02a6f | |||
| 9d434eb1e9 | |||
| 43269befd6 | |||
| d8d2b06c6c | |||
| 1f9a2f6554 | |||
| 940162a8b5 | |||
| 3c2b39453a | |||
| 459cd92017 | |||
| a5aa0a773d | |||
| d1b569fbbe | |||
| 6d609f628b | |||
| 8d5eaf0f8d | |||
| de93b439ca | |||
| d11d9ef03c | |||
| f1fc8e1d82 | |||
| 9a44c37cab | |||
| 25a9e5efdf | |||
| 9352193986 | |||
| 61436ca278 | |||
| 17b6fcc48a | |||
| 9f9c5cf27a | |||
| 8fd38fbb40 | |||
| ac2c9fff38 | |||
| 8dc4877379 | |||
| d22a3a3953 | |||
| 182538d2a7 | |||
| 997c0bc297 | |||
| f9099cd680 | |||
| e8b47c33b6 | |||
| 6618fdd86b | |||
| 0b5ef5e257 | |||
| 4f36e6119c | |||
| 24b58d9615 | |||
| 4621c21907 | |||
| a53f6005b3 | |||
| 8bad1b2dfc | |||
| 856ec02083 | |||
| 45c63bdac7 | |||
| a5202b8eb8 | |||
| 766e47a757 | |||
| 0026ef7db7 | |||
| 368c7927ff | |||
| 1dd1ec3a0d | |||
| 6ed5c83b05 | |||
| 3efd1e56c4 | |||
| 1e18c9e309 | |||
| c79048027c | |||
| b2c981fca1 | |||
| 88af4d608d | |||
| 2008b35e8e | |||
| a082714ad5 | |||
| 2f28fde4e6 | |||
| e3004b9db7 | |||
| b192f4f80d | |||
| 809331b9fd | |||
| 3828c8bf89 | |||
| 4731750684 | |||
| 54f2308944 | |||
| afdd44323e | |||
| 9b88d5814c | |||
| 02a924e97d | |||
| e167439ed0 | |||
| 9f26d5a401 | |||
| d7f72470ec | |||
| abc45b1a2f | |||
| 5bc530deb2 | |||
| 6a206b0c5e | |||
| 2485639e11 | |||
| d056c14b91 | |||
| 834a8dd0a8 | |||
| ea5e4d48d3 | |||
| 2b08a8958a | |||
| 759b09c8d6 | |||
| 0266afe9ab | |||
| 109c5e0703 | |||
| 40a79c2cc4 | |||
| debc425f99 | |||
| 602a1cc8a3 | |||
| d080eae809 | |||
| 631b5033fe | |||
| af8ea6934b | |||
| 19740ae6c2 | |||
| 7b78b71487 | |||
| 86a43a79c8 | |||
| 6035a1bde4 | |||
| a32e952323 | |||
| d55b1c67df | |||
| 103f7bc18b | |||
| e857c223d4 | |||
| ea07997522 | |||
| d492c73f94 | |||
| 3b836d29a2 | |||
| 9248916527 | |||
| 2006ebb244 | |||
| 58c852cdba | |||
| 9e77a8e304 | |||
| e9817f1e0d | |||
| 123dde7b8f | |||
| c1b84eabdb | |||
| c7ececde77 | |||
| 6f305d636e | |||
| d25990895c | |||
| d406ced759 | |||
| b858b56120 | |||
| c94fe81dbf | |||
| a67bbebb84 | |||
| cf577c81e1 | |||
| ad236be02c | |||
| 3412e379d6 | |||
| 95f240ab07 | |||
| 0c8ae3f45b | |||
| fe87944049 | |||
| 2cbe290916 | |||
| a85321a1a9 | |||
| c55071d157 | |||
| 86eac774e7 | |||
| dac6df4282 | |||
| d7918b1714 | |||
| c4de84a23a | |||
| c147c29756 | |||
| 5a4a50bc9d | |||
| 55ea4009c9 | |||
| 536fd7dfe4 | |||
| a1f6568b84 | |||
| 6a9112f03c | |||
| 89b4305ccb | |||
| 8643e6a055 | |||
| 12509eb93a | |||
| 621623bdb6 | |||
| a2f5ce797d | |||
| 6b17825fa2 | |||
| d20e0bd2c2 | |||
| 6e2f2fb9d2 | |||
| fc00566469 | |||
| 7587860c12 | |||
| fabb5dd003 | |||
| 314da8b50f | |||
| c8fbfcbc24 | |||
| a922961621 | |||
| 2995eb1cac | |||
| 758b732142 | |||
| 50b80f3267 |
@@ -0,0 +1,3 @@
|
||||
.gitattributes export-ignore
|
||||
/Wiki export-ignore
|
||||
.gitignore export-ignore
|
||||
+2
-1
@@ -55,4 +55,5 @@ docs/_build/
|
||||
# pycharm
|
||||
.idea
|
||||
|
||||
icon.psd
|
||||
icon.psd
|
||||
main-icon.psd
|
||||
+210
@@ -1,3 +1,213 @@
|
||||
|
||||
2.0.33.1871
|
||||
- core: normalize line endings in subtitles to LF (\n)
|
||||
- core: add subtitle storage lock to avoid race condition
|
||||
- core: be more verbose about subtitle storage addition
|
||||
- core: fix MPL2 newline parsing, which resulted in broken subtitles
|
||||
- core: encoding change: reduce log spam
|
||||
- submod: common: fix CM_starting_spacedots
|
||||
- opensubtitles: fix request/response handling
|
||||
|
||||
|
||||
|
||||
2.0.33.1849
|
||||
- opensubtitles: add VIP server handling + preference; VIP benefits: 10€/year, ad-free subs, 1000 subs/day, no-cache VIP server, help SZ and subscribe via http://v.ht/osvip
|
||||
- opensubtitles: try to reuse previous token instead of logging in every time
|
||||
- core: add throttling between searches (10 seconds)
|
||||
- core: fix IETF handling for good
|
||||
- core: fix no subtitles being searched in certain situations (when an external subtitle without special tag exists)
|
||||
- core: add subtitle blacklist
|
||||
- core: fixes
|
||||
- core: fix detection of certain PMS media stream language tags ("FR" for example)
|
||||
- core: missing subtitles: correctly skip unwanted subtitle extensions
|
||||
- core: missing subtitles: honor "treat undefined as first language" option correctly
|
||||
- api: add blacklisting endpoints for quickly searching for new subtitls via bookmarklet
|
||||
- submod: colors: apply color mods at the end of processing modifications; fix color mods
|
||||
- submod: new remove_tags modification to remove all styling tags from subtitles
|
||||
- submod: HI: be more aggressive at handling brackets
|
||||
- submod: OCR: update en and hrv
|
||||
- submod: common: remove "torrent downloaded from ..." lines
|
||||
- submod: OCR: fix WholeWord handling, improving modification
|
||||
- submod: apply OCR fixes before HI
|
||||
- submod: OCR: fix broken HI tag colons (ANNOUNCER'. instead of ANNOUNCER:)
|
||||
- menu: advanced: speed up batch modifications
|
||||
- menu: add subtitle blacklist
|
||||
- menu: recently played: show only TV episodes and movies (music tracks were listed here as well)
|
||||
|
||||
|
||||
2.0.29.1767
|
||||
- core: fix internal subtitle storage issues
|
||||
- core: handle "embedded-forced" tag (futureproofing)
|
||||
- core: remove more garbage tags from release groups (nzbgeek, chamele0n, buymore, xpost, postbot)
|
||||
- submod: OCR fix: fix music icon = paragraph
|
||||
|
||||
|
||||
2.0.29.1756
|
||||
- core: don't fail on uppercase file extensions
|
||||
- core: don't re-download a subtitle if we already downloaded one, it still physically exists and external subtitles are configured to be ignored
|
||||
- core: fix VTT subtitle duplication
|
||||
- core: if forced subtitles not explicitly wanted, ignore existing forced subtitles when searching
|
||||
- core: add full IETF language support for `Treat languages with country attribute as ISO 639-1 (e.g. don't download pt-BR if pt subtitle exists)`-setting for embedded subtitles
|
||||
- menu: remove buggy dynamic permission-based channel icon introduced in 1715
|
||||
- menu: improve `Items with missing subtitles` menu usage and item display
|
||||
- menu: `Advanced -> Get my logs` handle custom domains without port
|
||||
- menu: correctly show country/script part of languages with such attributes (e.g. pt-BR)
|
||||
- config: rename `Scan:` settings; make them better understandable and translatable
|
||||
- config: rephrase IETF options as "languages with country attribute" (e.g. pt-BR)
|
||||
- config: separate IETF options into how to display languages with country attribute and how they should be handled when searching/scanning (e.g. pt-BR)
|
||||
- config: `Scheduler: Item age to be considered recent` now can go up to 12 weeks
|
||||
- config: `Scheduler: Periodically search for recent items with missing subtitles` added `every 2 hours`
|
||||
- submod: swe: add Ĺ to Å
|
||||
|
||||
|
||||
2.0.26.1715
|
||||
- core: submod: OCR fixes: swe: replace ĺ with å inside words
|
||||
- core: fix handling of non-existant PMS audio_codec info
|
||||
- core: filename matching ignored the strictness setting in certain global directory configurations (thanks @raduc)
|
||||
- core: don't fail on migration errors
|
||||
- provider titlovi: handle multiple subtitles per archive
|
||||
- provider addic7ed: reset default boost to 19 (was 21)
|
||||
- menu: add warning icon on missing permissions
|
||||
- menu: manual subtitle list sometimes listed duplicates (thanks @andreashoyer)
|
||||
- menu: don't request PMS metadata in item details menu twice
|
||||
- menu: don't fail badly on non existant PMS metadata in item details menu
|
||||
|
||||
|
||||
2.0.26.1695
|
||||
## ATTENTION: THIS RELEASE RESETS YOUR CONFIGURED LANGUAGES TO DEFAULT!
|
||||
- core: fix bug that caused SZ not to work for Windows users with special characters in their username
|
||||
- core: fix issues when logging failed manual download actions
|
||||
- core: update guessit to 2.1.4
|
||||
- core: fix issue causing the background task scheduler to stop after changing preferences
|
||||
- core: fix polish encoding (try windows-1250 first, then iso 8859-2)
|
||||
- core: remove subscenter provider as it now uses captchas
|
||||
- core: add titlovi as default provider (thanks viking!)
|
||||
- core: increase default PMS API request timeout to 15 (old: 10, max: 45); add preference for that
|
||||
- core: re-add separate legacy FindMissingSubtitles task and run it on the first run to prime SZ's internal subtitle storage
|
||||
- core: add "low impact mode" for people with remote filesystems (currently enabled for List LANGUAGE subtitles in detail menu); alleviates certain plexweb timeout issues
|
||||
- menu: change naming of find missing subtitles menu item
|
||||
- legendastv: fix multi value guessit issues
|
||||
- submod: OCR: update eng and hrv OCR replace dictionaries; fix ". L am huge"
|
||||
|
||||
|
||||
2.0.25.1635
|
||||
- core: update memory handling, possibly reduce memory problems of 2.0
|
||||
- core: support for MPL2 subtitle format
|
||||
- core: update task handling
|
||||
- core: re-enable NVIDIA SHIELD support by fixing rarfile behaviour
|
||||
- core: add SZ_UNRAR_TOOL environment variable for custom unrar location
|
||||
- core: disable SZ when no providers are enabled
|
||||
- core: only start activity monitor if channel or agent are enabled
|
||||
- core: improve custom provider integration
|
||||
- core: update eastern european encoding detection (especially Romanian)
|
||||
- tasks: reduce provider stress by introducing wait times between searches/downloads
|
||||
- windows: correctly ship UnRAR.exe
|
||||
- windows: skip DBM checks
|
||||
- addic7ed: fix Nip/Tuck
|
||||
- subscenter: use new domain
|
||||
|
||||
|
||||
2.0.24.1581
|
||||
- legendastv: ship unrar.exe for Windows users (fixes unrar issues)
|
||||
- addic7ed: fix TooManyRequests error
|
||||
- submod: OCR fixes NL: add custom dictionary data for malformed characters
|
||||
- submod: OCR fixes: update hrv/NL dictionaries
|
||||
- submod: common: remove spaces before punctuation
|
||||
- podnapisi: now returns more subtitles again
|
||||
ATTENTION: Sub-Zero is still broken on PMS for SHIELD. Help needed!
|
||||
|
||||
|
||||
2.0.24.1565
|
||||
- core: fix searchallrecentlymissing task erroring if item not found
|
||||
- core: fix non-plex-items appearing in and crashing the recently played list
|
||||
- core: add hybrid-plus activity setting (current media file and next episode)
|
||||
- podnapisi: fix by using correct guessit parameters
|
||||
|
||||
|
||||
2.0.24.1558
|
||||
- core: fix handling of broken RAR files from legendas
|
||||
|
||||
|
||||
2.0.24.1555
|
||||
- core: fix rare microdvd issue from OpenSubtitles by generally providing FPS info when encountering a microdvd subtitle
|
||||
|
||||
|
||||
2.0.24.1549
|
||||
Changes from 1.4
|
||||
- wiki: new wiki! (thanks @dane22!)
|
||||
- core: update subliminal to version 2
|
||||
- core: update all dependencies
|
||||
- core: add new providers: legendastv (pt-BR), napiprojekt (pl), shooter (cn), subscenter (heb)
|
||||
- core: rewritten all subliminal patches for version 2
|
||||
- core: use SSL again for opensubtitles
|
||||
- core: improved matching due to subliminal 2 (and SZ custom) tvdb/omdb refiners
|
||||
- core: improved matching by relying on existing metadata provided by the PMS
|
||||
- core: improved performance due to multithreaded provider-querying
|
||||
- core: improved performance due to less physical media file access (no more MKV metadata scanning)
|
||||
- core: VTT subtitle format output supported (for Chromecast)
|
||||
- core: rewrote and streamlined internal subtitle data storage format
|
||||
- core: support Cyrillic and Latin variants of Serbian language
|
||||
- core: simplified (custom) provider registration; add own provider registry
|
||||
- core: rewrote recently added missing task
|
||||
- core: automatically fix badly (re-) encoded unicode entities in subtitles
|
||||
- core: always store subtitles in proper UTF-8 encoding
|
||||
- core: add periodic internal subtitle data storage cleanup task
|
||||
- core: on non-windows systems, utilize a file-based cache database for provider media lists and subliminal refiner results
|
||||
- core: add manual and automatic subtitle modification framework (fix common OCR issues, remove hearing impaired etc.)
|
||||
- core: relieve some stress on providers by providing better fine-grained retry handling
|
||||
- menu: add icons for menu items; update main channel icon
|
||||
- menu: add subtitle modifications (subtitle content fixes, offset-based shifting, framerate conversion)
|
||||
- menu: add recently played menu
|
||||
- menu: add "Get my logs" function to the advanced menu, which zips up all necessary logs suitable for posting in the forums
|
||||
- menu: add generic "back to season" and "back to series" entries to item detail views to make navigation easier
|
||||
- config: all scores changed (defaults updated)
|
||||
- config: remove "Force UTF-8 when storing subtitles" (it's now always implied)
|
||||
- improve almost everything Sub-Zero did in 1.4 :)
|
||||
|
||||
|
||||
2.0.23.1464 RC10.1
|
||||
- core: huge bugfix; please check `Library/Application Support/Plex Media\ Server/Plug-in Support/Data/com.plexapp.agents.subzero/DataItems`
|
||||
for any `subs_XXXXX.json.gz` file bigger than 500kb and delete them
|
||||
|
||||
|
||||
2.0.23.1456 RC10
|
||||
- core: findBetterSubtitles: increase series cutoff by 2 (resolution match)
|
||||
- core: add VTT format
|
||||
- core: fix crashes regarding DBM/cache management
|
||||
- core: update rarfile.py
|
||||
- core: add missing encodings
|
||||
- core: full support for Serbian subtitles (Cyrillic and Latin)
|
||||
- podnapisi: fix pt-BR, srp-cyrl and srp-latn
|
||||
- core: implement own provider registry and ditch the subliminal one
|
||||
- core: use ftfy library to fix re-encoding errors inside subtitles introduced by the subtitle author
|
||||
- core: always store and save subtitles normalized to UTF-8
|
||||
- core: replace spaced dashes in movie/series names before re-refining with plex metadata info
|
||||
- submod: remove_HI: handle multiline brackets correctly
|
||||
|
||||
|
||||
2.0.20.1364 RC9
|
||||
- core: performance improvements
|
||||
- core: if info couldn't be guessed from the filename, fill missing info from PMS #270
|
||||
- submod: OCR: add more to the eng dictionary
|
||||
- submod: HI: fixed some issues with font style tags
|
||||
- core: don't ignore subtitles from providers that don't have hearing impaired info, when hearing impaired mode is set to "force non-HI"
|
||||
- legendastv/menu: fix manual subtitle selection issues in menu
|
||||
- core: improve specials matching on OpenSubtitles
|
||||
- core: update guessit
|
||||
|
||||
|
||||
2.0.19.1337 RC8
|
||||
- napiprojekt: fixed: couldn't convert microdvd to SRT in certain occasions
|
||||
- core: when normalize to UTF-8 is enabled, also store the subtitle in UTF-8 encoding in the internal storage
|
||||
- core: add more encodings for western/eastern/northern europe
|
||||
- submod: OCR: update dictionaries from SubtitleEdit
|
||||
- submod: common: be smarter about uppercase i's in words that should have lowercase L's
|
||||
- submod: fix unopened/unclosed font style tags after modification
|
||||
- core: re-enable OMDB support
|
||||
- core: update guessit for better matching
|
||||
- core: fix SearchAllRecentlyMissing (was broken since RC3)
|
||||
|
||||
|
||||
2.0.19.1299 RC7
|
||||
- submod: offset mods now get merged internally when applied multiple times (to avoid errors and increase performance)
|
||||
- submod: improve performance
|
||||
|
||||
+108
-61
@@ -1,9 +1,7 @@
|
||||
# coding=utf-8
|
||||
import sys
|
||||
import datetime
|
||||
import os
|
||||
|
||||
from subliminal_patch import compute_score
|
||||
from subzero.sandbox import restore_builtins
|
||||
|
||||
module = sys.modules['__main__']
|
||||
@@ -18,7 +16,6 @@ import logger
|
||||
|
||||
sys.modules["logger"] = logger
|
||||
|
||||
import subliminal
|
||||
import support
|
||||
|
||||
import interface
|
||||
@@ -26,9 +23,9 @@ sys.modules["interface"] = interface
|
||||
|
||||
from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
|
||||
from interface.menu import *
|
||||
from support.plex_media import media_to_videos, get_media_item_ids, scan_videos
|
||||
from support.subtitlehelpers import get_subtitles_from_metadata
|
||||
from support.storage import whack_missing_parts, save_subtitles
|
||||
from support.plex_media import media_to_videos, get_media_item_ids
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles, store_subtitle_info, get_subtitle_storage
|
||||
from support.items import is_ignored
|
||||
from support.config import config
|
||||
from support.lib import get_intent
|
||||
@@ -36,6 +33,7 @@ from support.helpers import track_usage, get_title_for_video_metadata, get_ident
|
||||
from support.history import get_history
|
||||
from support.data import dispatch_migrate
|
||||
from support.activities import activity
|
||||
from support.download import download_best_subtitles
|
||||
|
||||
|
||||
def Start():
|
||||
@@ -51,9 +49,12 @@ def Start():
|
||||
# clear expired menu history items
|
||||
now = datetime.datetime.now()
|
||||
if "menu_history" in Dict:
|
||||
for key, timeout in Dict["menu_history"].items():
|
||||
for key, timeout in Dict["menu_history"].copy().items():
|
||||
if now > timeout:
|
||||
del Dict["menu_history"][key]
|
||||
try:
|
||||
del Dict["menu_history"][key]
|
||||
except:
|
||||
pass
|
||||
|
||||
# run migrations
|
||||
if "subs" in Dict or "history" in Dict:
|
||||
@@ -75,7 +76,8 @@ def Start():
|
||||
scheduler.run()
|
||||
|
||||
# bind activities
|
||||
Thread.Create(activity.start)
|
||||
if config.enable_channel:
|
||||
Thread.Create(activity.start)
|
||||
|
||||
if "anon_id" not in Dict:
|
||||
Dict["anon_id"] = get_identifier()
|
||||
@@ -89,45 +91,6 @@ def Start():
|
||||
track_usage("General", "plugin", "start", config.version)
|
||||
|
||||
|
||||
def download_best_subtitles(video_part_map, min_score=0):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
languages = config.lang_list
|
||||
if not languages:
|
||||
return
|
||||
|
||||
missing_languages = False
|
||||
for video, part in video_part_map.iteritems():
|
||||
if not Prefs['subtitles.save.filesystem']:
|
||||
# scan for existing metadata subtitles
|
||||
meta_subs = get_subtitles_from_metadata(part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
if subList:
|
||||
video.subtitle_languages.add(language)
|
||||
Log.Debug("Found metadata subtitle %s for %s", language, video)
|
||||
|
||||
missing_subs = (languages - video.subtitle_languages)
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
|
||||
if not missing_subs or found_one_which_is_enough:
|
||||
if found_one_which_is_enough:
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
continue
|
||||
missing_languages = True
|
||||
break
|
||||
|
||||
if missing_languages:
|
||||
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s" % (min_score, hearing_impaired))
|
||||
|
||||
return subliminal.download_best_subtitles(video_part_map.keys(), languages, min_score, hearing_impaired, providers=config.providers,
|
||||
provider_configs=config.provider_settings, pool_class=config.provider_pool,
|
||||
compute_score=compute_score)
|
||||
Log.Debug("All languages for all requested videos exist. Doing nothing.")
|
||||
|
||||
|
||||
def update_local_media(metadata, media, media_type="movies"):
|
||||
# Look for subtitles
|
||||
if media_type == "movies":
|
||||
@@ -151,12 +114,43 @@ def update_local_media(metadata, media, media_type="movies"):
|
||||
pass
|
||||
|
||||
|
||||
def agent_extract_embedded(videos):
|
||||
try:
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
for video in videos:
|
||||
item = video["item"]
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
|
||||
for part in get_all_parts(item):
|
||||
for requested_language in config.lang_list:
|
||||
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(part.id, requested_language)
|
||||
if not embedded_subs:
|
||||
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language,
|
||||
get_forced=config.forced_only)
|
||||
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
|
||||
stream_index=str(stream.index),
|
||||
language=str(requested_language), with_mods=True, refresh=False,
|
||||
set_current=not current)
|
||||
else:
|
||||
Log.Debug("Skipping embedded subtitle extraction for %s, already got %r from %s",
|
||||
item.rating_key, requested_language, embedded_subs[0].id)
|
||||
except:
|
||||
Log.Error("Something went wrong when auto-extracting subtitles, continuing: %s", traceback.format_exc())
|
||||
|
||||
|
||||
class SubZeroAgent(object):
|
||||
agent_type = None
|
||||
agent_type_verbose = None
|
||||
languages = [Locale.Language.English]
|
||||
primary_provider = False
|
||||
score_prefs_key = None
|
||||
debounce = 10
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(SubZeroAgent, self).__init__(*args, **kwargs)
|
||||
@@ -167,6 +161,9 @@ class SubZeroAgent(object):
|
||||
Log.Debug("Sub-Zero %s, %s search" % (config.version, self.agent_type))
|
||||
results.Append(MetadataSearchResult(id='null', score=100))
|
||||
|
||||
def store_blank_subtitle_metadata(self, video_part_map):
|
||||
store_subtitle_info(video_part_map, dict((k, []) for k in video_part_map.keys()), None, mode="a")
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
if not config.enable_agent:
|
||||
Log.Debug("Skipping Sub-Zero agent(s)")
|
||||
@@ -208,25 +205,71 @@ class SubZeroAgent(object):
|
||||
set_refresh_menu_state(media, media_type=self.agent_type)
|
||||
|
||||
# scanned_video_part_map = {subliminal.Video: plex_part, ...}
|
||||
scanned_video_part_map = scan_videos(videos, kind=self.agent_type)
|
||||
providers = config.get_providers(media_type=self.agent_type)
|
||||
scanned_video_part_map = scan_videos(videos, providers=providers)
|
||||
|
||||
# auto extract embedded
|
||||
if config.embedded_auto_extract:
|
||||
agent_extract_embedded(videos)
|
||||
|
||||
# clear missing subtitles menu data
|
||||
if not scheduler.is_task_running("MissingSubtitles"):
|
||||
scheduler.clear_task_data("MissingSubtitles")
|
||||
|
||||
downloaded_subtitles = None
|
||||
|
||||
# debounce for self.debounce seconds
|
||||
now = datetime.datetime.now()
|
||||
if "last_call" in Dict:
|
||||
last_call = Dict["last_call"]
|
||||
if last_call + datetime.timedelta(seconds=self.debounce) > now:
|
||||
wait = self.debounce - (now - last_call).seconds
|
||||
if wait >= 1:
|
||||
Log.Debug("Waiting %s seconds until continuing", wait)
|
||||
Thread.Sleep(wait)
|
||||
|
||||
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
|
||||
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score)
|
||||
try:
|
||||
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score,
|
||||
throttle_time=self.debounce, providers=providers)
|
||||
except:
|
||||
Log.Exception("Something went wrong when downloading subtitles")
|
||||
|
||||
if downloaded_subtitles is not None:
|
||||
Dict["last_call"] = datetime.datetime.now()
|
||||
|
||||
item_ids = get_media_item_ids(media, kind=self.agent_type)
|
||||
|
||||
whack_missing_parts(scanned_video_part_map)
|
||||
|
||||
downloaded_any = False
|
||||
if downloaded_subtitles:
|
||||
save_subtitles(scanned_video_part_map, downloaded_subtitles, mods=config.default_mods)
|
||||
downloaded_any = any(downloaded_subtitles.values())
|
||||
|
||||
if downloaded_any:
|
||||
save_successful = False
|
||||
try:
|
||||
save_successful = save_subtitles(scanned_video_part_map, downloaded_subtitles,
|
||||
mods=config.default_mods)
|
||||
except:
|
||||
Log.Exception("Something went wrong when saving subtitles")
|
||||
|
||||
track_usage("Subtitle", "refreshed", "download", 1)
|
||||
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
# store item(s) in history
|
||||
for subtitle in video_subtitles:
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle)
|
||||
# store SZ meta info even if download wasn't successful
|
||||
if not save_successful:
|
||||
self.store_blank_subtitle_metadata(scanned_video_part_map)
|
||||
|
||||
else:
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
# store item(s) in history
|
||||
for subtitle in video_subtitles:
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle)
|
||||
history.destroy()
|
||||
else:
|
||||
# store SZ meta info even if we've downloaded none
|
||||
self.store_blank_subtitle_metadata(scanned_video_part_map)
|
||||
|
||||
update_local_media(metadata, media, media_type=self.agent_type)
|
||||
|
||||
@@ -236,13 +279,17 @@ class SubZeroAgent(object):
|
||||
|
||||
# notify any running tasks about our finished update
|
||||
for item_id in item_ids:
|
||||
scheduler.signal("updated_metadata", item_id)
|
||||
#scheduler.signal("updated_metadata", item_id)
|
||||
|
||||
# resolve existing intent for that id
|
||||
intent.resolve("force", item_id)
|
||||
|
||||
Dict.Save()
|
||||
|
||||
# fsync cache
|
||||
if config.new_style_cache:
|
||||
config.sync_cache()
|
||||
|
||||
|
||||
class SubZeroSubtitlesAgentMovies(SubZeroAgent, Agent.Movies):
|
||||
contributes_to = ['com.plexapp.agents.imdb', 'com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.themoviedb', 'com.plexapp.agents.hama']
|
||||
|
||||
@@ -8,11 +8,11 @@ import urlparse
|
||||
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
|
||||
from subzero.lib.io import FileIO
|
||||
from subzero.constants import PREFIX, PLUGIN_IDENTIFIER
|
||||
from menu_helpers import SubFolderObjectContainer, debounce, set_refresh_menu_state, ZipObject, ObjectContainer
|
||||
from menu_helpers import SubFolderObjectContainer, debounce, set_refresh_menu_state, ZipObject, ObjectContainer, route
|
||||
from main import fatality
|
||||
from support.helpers import timestamp, pad_title
|
||||
from support.config import config
|
||||
@@ -49,6 +49,10 @@ def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
key=Callback(TriggerBetterSubtitles, randomize=timestamp()),
|
||||
title=pad_title("Trigger find better subtitles"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SkipFindBetterSubtitles, randomize=timestamp()),
|
||||
title=pad_title("Skip next find better subtitles (sets last run to now)"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerStorageMaintenance, randomize=timestamp()),
|
||||
title=pad_title("Trigger subtitle storage maintenance"),
|
||||
@@ -57,6 +61,10 @@ def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
key=Callback(TriggerStorageMigration, randomize=timestamp()),
|
||||
title=pad_title("Trigger subtitle storage migration (expensive)"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerCacheMaintenance, randomize=timestamp()),
|
||||
title=pad_title("Trigger cache maintenance (refiners, providers and packs/archives)"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ApplyDefaultMods, randomize=timestamp()),
|
||||
title=pad_title("Apply configured default subtitle mods to all (active) stored subtitles"),
|
||||
@@ -73,6 +81,10 @@ def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
key=Callback(LogStorage, key="ignore", randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's internal ignorelist storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key=None, randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's complete state storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="tasks", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's scheduled tasks state storage"),
|
||||
@@ -85,6 +97,10 @@ def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
key=Callback(InvalidateCache, randomize=timestamp()),
|
||||
title=pad_title("Invalidate Sub-Zero metadata caches (subliminal)"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetProviderThrottle, randomize=timestamp()),
|
||||
title=pad_title("Reset provider throttle states"),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@@ -154,6 +170,20 @@ def TriggerBetterSubtitles(randomize=None):
|
||||
)
|
||||
|
||||
|
||||
|
||||
@route(PREFIX + '/skipbetter')
|
||||
@debounce
|
||||
def SkipFindBetterSubtitles(randomize=None):
|
||||
task = scheduler.task("FindBetterSubtitles")
|
||||
task.last_run = datetime.datetime.now()
|
||||
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='FindBetterSubtitles skipped'
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/triggermaintenance')
|
||||
@debounce
|
||||
def TriggerStorageMaintenance(randomize=None):
|
||||
@@ -176,6 +206,17 @@ def TriggerStorageMigration(randomize=None):
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/triggercachemaintenance')
|
||||
@debounce
|
||||
def TriggerCacheMaintenance(randomize=None):
|
||||
scheduler.dispatch_task("CacheMaintenance")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='TriggerCacheMaintenance triggered'
|
||||
)
|
||||
|
||||
|
||||
def apply_default_mods(reapply_current=False):
|
||||
storage = get_subtitle_storage()
|
||||
subs_applied = 0
|
||||
@@ -214,6 +255,7 @@ def apply_default_mods(reapply_current=False):
|
||||
continue
|
||||
|
||||
subs_applied += 1
|
||||
storage.destroy()
|
||||
Log.Debug("Applied mods to %i items" % subs_applied)
|
||||
|
||||
|
||||
@@ -259,7 +301,7 @@ def GetLogsLink():
|
||||
|
||||
elif "Referer" in req_headers:
|
||||
parsed = urlparse.urlparse(req_headers["Referer"])
|
||||
link_base = "%s://%s:%s" % (parsed.scheme, parsed.hostname, parsed.port)
|
||||
link_base = "%s://%s%s" % (parsed.scheme, parsed.hostname, (":%s" % parsed.port) if parsed.port else "")
|
||||
Log.Debug("Using referer-based link_base")
|
||||
get_external_ip = False
|
||||
|
||||
@@ -295,7 +337,10 @@ def DownloadLogs():
|
||||
@debounce
|
||||
def InvalidateCache(randomize=None):
|
||||
from subliminal.cache import region
|
||||
region.invalidate()
|
||||
if config.new_style_cache:
|
||||
region.backend.clear()
|
||||
else:
|
||||
region.invalidate()
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
@@ -333,3 +378,14 @@ def ClearPin(randomize=None):
|
||||
Dict["pin_correct_time"] = None
|
||||
config.locked = True
|
||||
return fatality(force_title="Menu locked", header=" ", no_history=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/reset_throttle')
|
||||
def ResetProviderThrottle(randomize=None):
|
||||
Dict["provider_throttle"] = {}
|
||||
Dict.Save()
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Provider throttles reset'
|
||||
)
|
||||
@@ -0,0 +1,27 @@
|
||||
# coding=utf-8
|
||||
|
||||
from support.config import config
|
||||
|
||||
|
||||
def enable_channel_wrapper(func):
|
||||
"""
|
||||
returns the original wrapper :func: (route or handler) if applicable, else the plain to-be-wrapped function
|
||||
:param func: original wrapper
|
||||
:return: original wrapper or wrapped function
|
||||
"""
|
||||
def noop(*args, **kwargs):
|
||||
def inner(*a, **k):
|
||||
"""
|
||||
:param a: args
|
||||
:param k: kwargs
|
||||
:return: originally to-be-wrapped function
|
||||
"""
|
||||
return a[0]
|
||||
|
||||
return inner
|
||||
|
||||
def wrap(*args, **kwargs):
|
||||
enforce_route = kwargs.pop("enforce_route", None)
|
||||
return (func if (config.enable_channel or enforce_route) else noop)(*args, **kwargs)
|
||||
|
||||
return wrap
|
||||
@@ -1,24 +1,30 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
import subprocess
|
||||
import traceback
|
||||
|
||||
from subzero.language import Language
|
||||
|
||||
from sub_mod import SubtitleModificationsMenu
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_ignore_options, get_item_task_data, \
|
||||
set_refresh_menu_state
|
||||
set_refresh_menu_state, route, extract_embedded_sub
|
||||
|
||||
from refresh_item import RefreshItem
|
||||
from subzero.constants import PREFIX
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, cast_bool, df, get_language
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub
|
||||
from support.lib import Plex
|
||||
from support.plex_media import get_plex_metadata, scan_videos, PMSMediaProxy
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import timestamp, df, get_language, display_language, quote_args, get_language_from_stream
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub, get_item_title, save_stored_sub
|
||||
from support.plex_media import get_plex_metadata, get_part, get_embedded_subtitle_streams
|
||||
from support.scanning import scan_videos
|
||||
from support.scheduler import scheduler
|
||||
from support.storage import get_subtitle_storage
|
||||
|
||||
|
||||
# fixme: needs kwargs cleanup
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}/actions')
|
||||
@debounce
|
||||
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None):
|
||||
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None, header=None):
|
||||
"""
|
||||
displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
|
||||
:param rating_key:
|
||||
@@ -31,12 +37,22 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
from interface.main import IgnoreMenu
|
||||
|
||||
title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
|
||||
item = get_item(rating_key)
|
||||
item = plex_item = get_item(rating_key)
|
||||
current_kind = get_item_kind_from_rating_key(rating_key)
|
||||
|
||||
timeout = 30
|
||||
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True, header=header)
|
||||
|
||||
if not item:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=rating_key, title=title, base_title=base_title,
|
||||
item_title=item_title, randomize=timestamp()),
|
||||
title=u"Item not found: %s!" % item_title,
|
||||
summary="Plex didn't return any information about the item, please refresh it and come back later",
|
||||
thumb=default_thumb
|
||||
))
|
||||
return oc
|
||||
|
||||
# add back to season for episode
|
||||
if current_kind == "episode":
|
||||
@@ -53,13 +69,6 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
thumb=season.thumb or default_thumb
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(UpdateLocalMedia, rating_key=rating_key, title=title, item_title=item_title, base_title=base_title,
|
||||
randomize=timestamp()),
|
||||
title=u"Find local subtitles (doesn't refresh metadata)",
|
||||
summary="Searches for locally available subtitles",
|
||||
thumb=item.thumb or default_thumb
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, randomize=timestamp(),
|
||||
timeout=timeout * 1000),
|
||||
@@ -79,9 +88,6 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
|
||||
# get the plex item
|
||||
plex_item = list(Plex["library"].metadata(rating_key))[0]
|
||||
|
||||
# look for subtitles for all available media parts and all of their languages
|
||||
has_multiple_parts = len(plex_item.media) > 1
|
||||
part_index = 0
|
||||
@@ -94,24 +100,19 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
part_id = str(part.id)
|
||||
part_index += 1
|
||||
|
||||
part_index_addon = ""
|
||||
part_summary_addon = ""
|
||||
if has_multiple_parts:
|
||||
part_index_addon = u"File %s: " % part_index
|
||||
part_summary_addon = "%s " % filename
|
||||
|
||||
# iterate through all configured languages
|
||||
for lang in config.lang_list:
|
||||
lang_a2 = lang.alpha2
|
||||
# ietf lang?
|
||||
if cast_bool(Prefs["subtitles.language.ietf"]) and "-" in lang_a2:
|
||||
lang_a2 = lang_a2.split("-")[0]
|
||||
|
||||
# get corresponding stored subtitle data for that media part (physical media item), for language
|
||||
current_sub = stored_subs.get_any(part_id, lang_a2)
|
||||
current_sub = stored_subs.get_any(part_id, lang)
|
||||
current_sub_id = None
|
||||
current_sub_provider_name = None
|
||||
|
||||
part_index_addon = ""
|
||||
part_summary_addon = ""
|
||||
if has_multiple_parts:
|
||||
part_index_addon = u"File %s: " % part_index
|
||||
part_summary_addon = "%s " % filename
|
||||
|
||||
summary = u"%sNo current subtitle in storage" % part_summary_addon
|
||||
current_score = None
|
||||
if current_sub:
|
||||
@@ -121,46 +122,77 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
|
||||
summary = u"%sCurrent subtitle: %s (added: %s, %s), Language: %s, Score: %i, Storage: %s" % \
|
||||
(part_summary_addon, current_sub.provider_name, df(current_sub.date_added),
|
||||
current_sub.mode_verbose, lang, current_sub.score, current_sub.storage_type)
|
||||
current_sub.mode_verbose, display_language(lang), current_sub.score,
|
||||
current_sub.storage_type)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, language_name=lang.name, current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"%sActions for %s subtitle" % (part_index_addon, lang.name),
|
||||
summary=summary
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, language_name=display_language(lang),
|
||||
current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"%sManage %s subtitle" % (part_index_addon, display_language(lang)),
|
||||
summary=summary
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, language_name=display_language(lang),
|
||||
current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"%sList %s subtitles" % (part_index_addon, display_language(lang)),
|
||||
summary=summary
|
||||
))
|
||||
|
||||
add_ignore_options(oc, "videos", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
if config.plex_transcoder:
|
||||
# embedded subtitles
|
||||
embedded_count = 0
|
||||
embedded_langs = []
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
|
||||
lang = get_language_from_stream(stream.language_code)
|
||||
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = list(config.lang_list)[0]
|
||||
|
||||
if lang:
|
||||
embedded_langs.append(lang)
|
||||
embedded_count += 1
|
||||
|
||||
if embedded_count:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListEmbeddedSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_type=plex_item.type, item_title=item_title, base_title=base_title,
|
||||
randomize=timestamp()),
|
||||
title=u"%sEmbedded subtitles (%s)" % (part_index_addon, ", ".join(display_language(l) for l in
|
||||
set(embedded_langs))),
|
||||
summary=u"Extract and activate embedded subtitle streams"
|
||||
))
|
||||
|
||||
ignore_title = item_title
|
||||
if current_kind == "episode":
|
||||
ignore_title = get_item_title(item)
|
||||
add_ignore_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/update_local_media/{rating_key}', force=bool)
|
||||
@debounce
|
||||
def UpdateLocalMedia(**kwargs):
|
||||
from support.localmedia import find_subtitles
|
||||
rating_key = kwargs["rating_key"]
|
||||
parts = PMSMediaProxy(rating_key).get_all_parts()
|
||||
for part in parts:
|
||||
find_subtitles(part)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
return ItemDetailsMenu(**kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}')
|
||||
def SubtitleOptionsMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True, header=kwargs.get("header"),
|
||||
message=kwargs.get("message"))
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
current_data = kwargs["current_data"]
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
subs_count = stored_subs.count(part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
@@ -170,17 +202,232 @@ def SubtitleOptionsMenu(**kwargs):
|
||||
summary=kwargs["current_data"],
|
||||
thumb=default_thumb
|
||||
))
|
||||
if subs_count:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListStoredSubsForItemMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Select active %s subtitle" % kwargs["language_name"],
|
||||
summary=u"%d subtitles in storage" % subs_count
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"List %s subtitles" % kwargs["language_name"],
|
||||
title=u"List available %s subtitles" % kwargs["language_name"],
|
||||
summary=kwargs["current_data"]
|
||||
))
|
||||
if current_sub:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Modify %s subtitle" % kwargs["language_name"],
|
||||
title=u"Modify current %s subtitle" % kwargs["language_name"],
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
))
|
||||
|
||||
if current_sub.provider_name != "embedded":
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(BlacklistSubtitleMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Blacklist current %s subtitle and search for a new one" % kwargs["language_name"],
|
||||
summary=current_data
|
||||
))
|
||||
|
||||
current_bl, subs = stored_subs.get_blacklist(part_id, language)
|
||||
if current_bl:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ManageBlacklistMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Manage blacklist (%s contained)" % len(current_bl),
|
||||
summary=u"Inspect currently blacklisted subtitles"
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/list_stored_subs/{rating_key}/{part_id}')
|
||||
def ListStoredSubsForItemMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = Language.fromietf(kwargs["language"])
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
all_subs = stored_subs.get_all(part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
for key, subtitle in sorted(filter(lambda x: x[0] != "current", all_subs.items()),
|
||||
key=lambda x: x[1].date_added, reverse=True):
|
||||
is_current = key == all_subs["current"]
|
||||
|
||||
summary = u"added: %s, %s, Language: %s, Score: %i, Storage: %s" % \
|
||||
(df(subtitle.date_added),
|
||||
subtitle.mode_verbose, display_language(language), subtitle.score,
|
||||
subtitle.storage_type)
|
||||
|
||||
sub_name = subtitle.provider_name
|
||||
if sub_name == "embedded":
|
||||
sub_name += " (%s)" % subtitle.id
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SelectStoredSubForItemMenu, randomize=timestamp(), sub_key="__".join(key), **kwargs),
|
||||
title=u"%s%s, Score: %s" % ("Current: " if is_current else "Stored: ", sub_name,
|
||||
subtitle.score),
|
||||
summary=summary
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/set_current_sub/{rating_key}/{part_id}')
|
||||
@debounce
|
||||
def SelectStoredSubForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = Language.fromietf(kwargs["language"])
|
||||
item_type = kwargs["item_type"]
|
||||
sub_key = tuple(kwargs.pop("sub_key").split("__"))
|
||||
|
||||
plex_item = get_item(rating_key)
|
||||
storage = get_subtitle_storage()
|
||||
stored_subs = storage.load(plex_item.rating_key)
|
||||
|
||||
subtitles = stored_subs.get_all(part_id, language)
|
||||
subtitle = subtitles[sub_key]
|
||||
|
||||
subtitles["current"] = sub_key
|
||||
|
||||
save_stored_sub(subtitle, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
|
||||
stored_subs=stored_subs)
|
||||
|
||||
storage.destroy()
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
kwargs["header"] = 'Success'
|
||||
kwargs["message"] = 'Subtitle saved to disk'
|
||||
|
||||
return SubtitleOptionsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/blacklist_recent/{language}')
|
||||
@route(PREFIX + '/item/blacklist_recent')
|
||||
def BlacklistRecentSubtitleMenu(**kwargs):
|
||||
if "last_played_items" not in Dict or not Dict["last_played_items"]:
|
||||
return
|
||||
|
||||
rating_key = Dict["last_played_items"][0]
|
||||
kwargs["rating_key"] = rating_key
|
||||
return BlacklistAllPartsSubtitleMenu(**kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/blacklist_all/{rating_key}/{language}')
|
||||
@route(PREFIX + '/item/blacklist_all/{rating_key}')
|
||||
def BlacklistAllPartsSubtitleMenu(**kwargs):
|
||||
rating_key = kwargs.get("rating_key")
|
||||
language = kwargs.get("language")
|
||||
if language:
|
||||
language = Language.fromietf(language)
|
||||
|
||||
item = get_item(rating_key)
|
||||
|
||||
if not item:
|
||||
return
|
||||
|
||||
item_title = get_item_title(item)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
for part_id, languages in stored_subs.parts.iteritems():
|
||||
sub_dict = languages
|
||||
if language:
|
||||
key = str(language)
|
||||
if key not in sub_dict:
|
||||
continue
|
||||
|
||||
sub_dict = {key: sub_dict[key]}
|
||||
|
||||
for language, subs in sub_dict.iteritems():
|
||||
if "current" in subs:
|
||||
stored_subs.blacklist(part_id, language, subs["current"])
|
||||
Log.Info("Added %s to blacklist", subs["current"])
|
||||
|
||||
subtitle_storage.save(stored_subs)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
|
||||
|
||||
|
||||
def blacklist(rating_key, part_id, language):
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
if not current_sub:
|
||||
return
|
||||
|
||||
stored_subs.blacklist(part_id, language, current_sub.key)
|
||||
storage.save(stored_subs)
|
||||
storage.destroy()
|
||||
|
||||
Log.Info("Added %s to blacklist", current_sub.key)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@route(PREFIX + '/item/blacklist/{rating_key}/{part_id}')
|
||||
@debounce
|
||||
def BlacklistSubtitleMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
item_title = kwargs["item_title"]
|
||||
|
||||
blacklist(rating_key, part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/manage_blacklist/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def ManageBlacklistMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
remove_sub_key = kwargs.pop("remove_sub_key", None)
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
current_bl, subs = stored_subs.get_blacklist(part_id, language)
|
||||
|
||||
if remove_sub_key:
|
||||
remove_sub_key = tuple(remove_sub_key.split("__"))
|
||||
stored_subs.blacklist(part_id, language, remove_sub_key, add=False)
|
||||
storage.save(stored_subs)
|
||||
Log.Info("Removed %s from blacklist", remove_sub_key)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
title=kwargs["title"], randomize=timestamp()),
|
||||
title=u"< Back to %s" % kwargs["title"],
|
||||
summary=kwargs["current_data"],
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
def sorter(pair):
|
||||
# thanks RestrictedModule parser for messing with lambda (x, y)
|
||||
return pair[1]["date_added"]
|
||||
|
||||
for sub_key, data in sorted(current_bl.iteritems(), key=sorter, reverse=True):
|
||||
provider_name, subtitle_id = sub_key
|
||||
title = u"%s, %s (added: %s, %s), Language: " \
|
||||
u"%s, Score: %i, Storage: %s" % (provider_name, subtitle_id, df(data["date_added"]),
|
||||
current_sub.get_mode_verbose(data["mode"]),
|
||||
display_language(Language.fromietf(language)), data["score"],
|
||||
data["storage_type"])
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ManageBlacklistMenu, remove_sub_key="__".join(sub_key), randomize=timestamp(), **kwargs),
|
||||
title=title,
|
||||
summary=u"Remove subtitle from blacklist"
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@@ -209,18 +456,22 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
))
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
plex_part = None
|
||||
if not config.low_impact_mode:
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True)
|
||||
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
return oc
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
return oc
|
||||
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
video_display_data = [video.format] if video.format else []
|
||||
if video.release_group:
|
||||
video_display_data.append(u"by %s" % video.release_group)
|
||||
video_display_data = " ".join(video_display_data)
|
||||
video_display_data = [video.format] if video.format else []
|
||||
if video.release_group:
|
||||
video_display_data.append(u"by %s" % video.release_group)
|
||||
video_display_data = " ".join(video_display_data)
|
||||
else:
|
||||
video_display_data = metadata["filename"]
|
||||
|
||||
current_display = (u"Current: %s (%s) " % (current_provider, current_score) if current_provider else "")
|
||||
if not running:
|
||||
@@ -233,6 +484,18 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
if search_results == "found_none":
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
|
||||
language=language, filename=filename, current_data=current_data, force=True,
|
||||
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
randomize=timestamp()),
|
||||
title=u"No subtitles found",
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
|
||||
@@ -240,28 +503,45 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
randomize=timestamp()),
|
||||
title=u"Searching for %s subs (%s), refresh here ..." % (get_language(language).name, video_display_data),
|
||||
title=u"Searching for %s subs (%s), refresh here ..." % (display_language(get_language(language)),
|
||||
video_display_data),
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
if not search_results:
|
||||
if not search_results or search_results == "found_none":
|
||||
return oc
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
current_bl, subs = stored_subs.get_blacklist(part_id, language)
|
||||
|
||||
seen = []
|
||||
for subtitle in search_results:
|
||||
if subtitle.id in seen:
|
||||
continue
|
||||
|
||||
bl_addon = ""
|
||||
if (str(subtitle.provider_name), str(subtitle.id)) in current_bl:
|
||||
bl_addon = "Blacklisted "
|
||||
|
||||
wrong_fps_addon = ""
|
||||
if subtitle.wrong_fps:
|
||||
wrong_fps_addon = " (wrong FPS, sub: %s, media: %s)" % (subtitle.fps, plex_part.fps)
|
||||
if plex_part:
|
||||
wrong_fps_addon = " (wrong FPS, sub: %s, media: %s)" % (subtitle.fps, plex_part.fps)
|
||||
else:
|
||||
wrong_fps_addon = " (wrong FPS, sub: %s, media: unknown, low impact mode)" % subtitle.fps
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerDownloadSubtitle, rating_key=rating_key, randomize=timestamp(), item_title=item_title,
|
||||
subtitle_id=str(subtitle.id), language=language),
|
||||
title=u"%s: %s, score: %s%s" % ("Available" if current_id != subtitle.id else "Current",
|
||||
subtitle.provider_name, subtitle.score, wrong_fps_addon),
|
||||
title=u"%s%s: %s, score: %s%s" % (bl_addon, "Available" if current_id != subtitle.id else "Current",
|
||||
subtitle.provider_name, subtitle.score, wrong_fps_addon),
|
||||
summary=u"Release: %s, Matches: %s" % (subtitle.release_info, ", ".join(subtitle.matches)),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
seen.append(subtitle.id)
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@@ -284,4 +564,77 @@ def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None,
|
||||
else:
|
||||
scheduler.dispatch_task("DownloadSubtitleForItem", rating_key=rating_key, subtitle=download_subtitle)
|
||||
|
||||
scheduler.clear_task_data("AvailableSubsForItem")
|
||||
|
||||
return fatality(randomize=timestamp(), header=" ", replace_parent=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/embedded/{rating_key}/{part_id}')
|
||||
def ListEmbeddedSubsForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
title = kwargs["title"]
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
base_title=kwargs["base_title"], title=kwargs["item_title"], randomize=timestamp()),
|
||||
title=u"< Back to %s" % kwargs["title"],
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
plex_item = get_item(rating_key)
|
||||
part = get_part(plex_item, part_id)
|
||||
|
||||
if part:
|
||||
for stream_data in get_embedded_subtitle_streams(part, skip_duplicate_unknown=False):
|
||||
language = stream_data["language"]
|
||||
is_unknown = stream_data["is_unknown"]
|
||||
stream = stream_data["stream"]
|
||||
|
||||
if language:
|
||||
forced = stream.forced
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, with_mods=True, **kwargs),
|
||||
title=u"Extract stream %s, "
|
||||
u"%s%s%s%s with default mods" % (stream.index, display_language(language),
|
||||
" (unknown)" if is_unknown else "",
|
||||
" (forced)" if forced else "",
|
||||
" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, **kwargs),
|
||||
title=u"Extract stream %s, %s%s%s%s" % (stream.index, display_language(language),
|
||||
" (unknown)" if is_unknown else "",
|
||||
" (forced)" if forced else "",
|
||||
" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/extract_embedded/{rating_key}/{part_id}/{stream_index}')
|
||||
@debounce
|
||||
def TriggerExtractEmbeddedSubForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.get("part_id")
|
||||
stream_index = kwargs.get("stream_index")
|
||||
|
||||
Thread.Create(extract_embedded_sub, **kwargs)
|
||||
header = u"Extracting of embedded subtitle %s of part %s:%s triggered" % (stream_index, rating_key, part_id)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
kwargs.pop("item_type")
|
||||
kwargs.pop("stream_index")
|
||||
kwargs.pop("part_id")
|
||||
kwargs.pop("with_mods", False)
|
||||
kwargs.pop("language")
|
||||
kwargs["title"] = kwargs["item_title"]
|
||||
kwargs["header"] = header
|
||||
|
||||
return ItemDetailsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
|
||||
@@ -2,13 +2,12 @@
|
||||
|
||||
from subzero.constants import PREFIX, TITLE, ART
|
||||
from support.config import config
|
||||
from support.helpers import pad_title, timestamp, df, get_plex_item_display_title
|
||||
from support.helpers import pad_title, timestamp, df, display_language
|
||||
from support.scheduler import scheduler
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, \
|
||||
get_item_kind_from_item
|
||||
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_ignore_options,\
|
||||
ObjectContainer
|
||||
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, get_item_title
|
||||
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_ignore_options, \
|
||||
ObjectContainer, route, handler
|
||||
from item_details import ItemDetailsMenu
|
||||
|
||||
|
||||
@@ -92,10 +91,9 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, randomize=timestamp()),
|
||||
title="Items with missing subtitles",
|
||||
summary="Shows the items honoring the configured 'Item age to be considered recent'-setting (%s)"
|
||||
" and allowing you to individually (force-) refresh their metadata/subtitles. " %
|
||||
Prefs["scheduler.item_is_recent_age"],
|
||||
title="Show recently added items with missing subtitles",
|
||||
summary="Lists items with missing subtitles. Click on \"Find recent items with missing subs\" "
|
||||
"to update list",
|
||||
thumb=R("icon-missing.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
@@ -110,11 +108,13 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
task = scheduler.task(task_name)
|
||||
|
||||
if task.ready_for_display:
|
||||
task_state = "Running: %s/%s (%s%%)" % (len(task.items_done), len(task.items_searching), task.percentage)
|
||||
task_state = "Running: %s/%s (%s%%)" % (task.items_done, task.items_searching, task.percentage)
|
||||
else:
|
||||
task_state = "Last scheduler run: %s; Next scheduled run: %s; Last runtime: %s" % (
|
||||
df(scheduler.last_run(task_name)) or "never",
|
||||
df(scheduler.next_run(task_name)) or "never",
|
||||
lr = scheduler.last_run(task_name)
|
||||
nr = scheduler.next_run(task_name)
|
||||
task_state = "Last run: %s; Next scheduled run: %s; Last runtime: %s" % (
|
||||
df(scheduler.last_run(task_name)) if lr else "never",
|
||||
df(scheduler.next_run(task_name)) if nr else "never",
|
||||
str(task.last_run_time).split(".")[0])
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
@@ -158,6 +158,19 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
))
|
||||
|
||||
if not only_refresh:
|
||||
if "provider_throttle" in Dict and Dict["provider_throttle"].keys():
|
||||
summary_data = []
|
||||
for provider, data in Dict["provider_throttle"].iteritems():
|
||||
reason, until, desc = data
|
||||
summary_data.append("%s until %s (%s)" % (provider, until.strftime("%y/%m/%d %H:%M"), reason))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("Throttled providers: %s" % ", ".join(Dict["provider_throttle"].keys())),
|
||||
summary=", ".join(summary_data),
|
||||
thumb=R("icon-throttled.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(AdvancedMenu),
|
||||
title=pad_title("Advanced functions"),
|
||||
@@ -184,15 +197,13 @@ def RecentlyPlayedMenu():
|
||||
oc = SubFolderObjectContainer(title2=base_title, replace_parent=True)
|
||||
|
||||
for item in [get_item(rating_key) for rating_key in Dict["last_played_items"]]:
|
||||
kind = get_item_kind_from_item(item)
|
||||
if kind not in ("episode", "movie"):
|
||||
if not item:
|
||||
continue
|
||||
|
||||
if kind == "episode":
|
||||
item_title = get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
|
||||
parent_title=item.show.title)
|
||||
else:
|
||||
item_title = get_plex_item_display_title(item, kind, section_title=None)
|
||||
if getattr(getattr(item, "__class__"), "__name__") not in ("Episode", "Movie"):
|
||||
continue
|
||||
|
||||
item_title = get_item_title(item)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
title=item_title,
|
||||
@@ -230,7 +241,7 @@ def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
if not running:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, force=True, randomize=timestamp()),
|
||||
title=u"Get items with missing subtitles",
|
||||
title=u"Find recent items with missing subtitles",
|
||||
thumb=default_thumb
|
||||
))
|
||||
else:
|
||||
@@ -246,7 +257,7 @@ def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
key=Callback(ItemDetailsMenu, title=title + " > " + item_title, item_title=item_title,
|
||||
rating_key=item_id),
|
||||
title=item_title,
|
||||
summary="Missing: %s" % ", ".join(l.name for l in missing_languages),
|
||||
summary="Missing: %s" % ", ".join(display_language(l) for l in missing_languages),
|
||||
thumb=get_item_thumb(item) or default_thumb
|
||||
))
|
||||
|
||||
|
||||
+171
-37
@@ -2,22 +2,28 @@
|
||||
import locale
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import traceback
|
||||
|
||||
import logger
|
||||
import copy
|
||||
|
||||
from requests import HTTPError
|
||||
from item_details import ItemDetailsMenu
|
||||
from refresh_item import RefreshItem
|
||||
from menu_helpers import add_ignore_options, dig_tree, set_refresh_menu_state, \
|
||||
should_display_ignore, enable_channel_wrapper, default_thumb, debounce, ObjectContainer, SubFolderObjectContainer
|
||||
default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route, \
|
||||
extract_embedded_sub
|
||||
from main import fatality, IgnoreMenu
|
||||
from advanced import DispatchRestart
|
||||
from subzero.constants import ART, PREFIX, DEPENDENCY_MODULE_NAMES
|
||||
from support.plex_media import get_all_parts, get_embedded_subtitle_streams
|
||||
from support.scheduler import scheduler
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, df
|
||||
from support.helpers import timestamp, df, display_language
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_all_items, get_items_info, \
|
||||
get_item_kind_from_rating_key, get_item
|
||||
from support.items import get_all_items, get_items_info, get_item_kind_from_rating_key, get_item, MI_KEY, get_item_title
|
||||
from support.storage import get_subtitle_storage
|
||||
|
||||
# init GUI
|
||||
ObjectContainer.art = R(ART)
|
||||
@@ -25,11 +31,7 @@ ObjectContainer.no_cache = True
|
||||
|
||||
# default thumb for DirectoryObjects
|
||||
DirectoryObject.thumb = default_thumb
|
||||
|
||||
# noinspection PyUnboundLocalVariable
|
||||
route = enable_channel_wrapper(route)
|
||||
# noinspection PyUnboundLocalVariable
|
||||
handler = enable_channel_wrapper(handler)
|
||||
Plugin.AddViewGroup("full_details", viewMode="InfoList", mediaType="items", type="list", summary=2)
|
||||
|
||||
|
||||
@route(PREFIX + '/section/firstLetter/key', deeper=bool)
|
||||
@@ -56,7 +58,7 @@ def FirstLetterMetadataMenu(rating_key, key, title=None, base_title=None, displa
|
||||
|
||||
@route(PREFIX + '/section/contents', display_items=bool)
|
||||
def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, previous_item_type=None,
|
||||
previous_rating_key=None, randomize=None):
|
||||
previous_rating_key=None, header=None, randomize=None):
|
||||
"""
|
||||
displays the contents of a section based on whether it has a deeper tree or not (movies->movie (item) list; series->series list)
|
||||
:param rating_key:
|
||||
@@ -70,16 +72,18 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
title = unicode(title)
|
||||
item_title = title
|
||||
title = base_title + " > " + title
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True, header=header,
|
||||
view_group="full_details")
|
||||
|
||||
current_kind = get_item_kind_from_rating_key(rating_key)
|
||||
|
||||
if display_items:
|
||||
timeout = 30
|
||||
show = None
|
||||
|
||||
# add back to series for season
|
||||
if current_kind == "season":
|
||||
timeout = 360
|
||||
timeout = 720
|
||||
|
||||
show = get_item(previous_rating_key)
|
||||
oc.add(DirectoryObject(
|
||||
@@ -89,16 +93,43 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
thumb=show.thumb or default_thumb
|
||||
))
|
||||
elif current_kind == "series":
|
||||
timeout = 1800
|
||||
# it shouldn't take more than 6 minutes to scan all of a series' files and determine the force refresh
|
||||
timeout = 3600
|
||||
|
||||
items = get_all_items(key="children", value=rating_key, base="library/metadata")
|
||||
kind, deeper = get_items_info(items)
|
||||
dig_tree(oc, items, MetadataMenu,
|
||||
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": kind,
|
||||
"previous_rating_key": rating_key})
|
||||
|
||||
# we don't know exactly where we are here, only add ignore option to series
|
||||
if should_display_ignore(items, previous=previous_item_type):
|
||||
add_ignore_options(oc, "series", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
if current_kind in ("series", "season"):
|
||||
item = get_item(rating_key)
|
||||
sub_title = get_item_title(item)
|
||||
add_ignore_options(oc, current_kind, title=sub_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
# mass-extract embedded
|
||||
if current_kind == "season" and config.plex_transcoder:
|
||||
for lang in config.lang_list:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title,
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=u"Extract missing %s embedded subtitles with default mods" % display_language(lang),
|
||||
summary="Extracts the not yet extracted embedded subtitles of all episodes for the current season "
|
||||
"with all configured default modifications"
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title,
|
||||
previous_item_type=previous_item_type, with_mods=False,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=u"Extract missing %s embedded subtitles" % display_language(lang),
|
||||
summary="Extracts the not yet extracted embedded subtitles of all episodes for the current season"
|
||||
))
|
||||
|
||||
# add refresh
|
||||
oc.add(DirectoryObject(
|
||||
@@ -120,6 +151,48 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/season/extract_embedded/{rating_key}/{language}')
|
||||
def SeasonExtractEmbedded(**kwargs):
|
||||
rating_key = kwargs.get("rating_key")
|
||||
requested_language = kwargs.pop("language")
|
||||
with_mods = kwargs.pop("with_mods")
|
||||
item_title = kwargs.pop("item_title")
|
||||
title = kwargs.pop("title")
|
||||
|
||||
Thread.Create(season_extract_embedded, **{"rating_key": rating_key, "requested_language": requested_language,
|
||||
"with_mods": with_mods})
|
||||
|
||||
kwargs["header"] = 'Success'
|
||||
kwargs["message"] = u"Extracting of embedded subtitles for %s triggered" % title
|
||||
|
||||
kwargs.pop("randomize")
|
||||
return MetadataMenu(randomize=timestamp(), title=item_title, **kwargs)
|
||||
|
||||
|
||||
def season_extract_embedded(rating_key, requested_language, with_mods=False):
|
||||
# get stored subtitle info for item id
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
try:
|
||||
for data in get_all_items(key="children", value=rating_key, base="library/metadata"):
|
||||
item = get_item(data[MI_KEY])
|
||||
if item:
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
for part in get_all_parts(item):
|
||||
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
|
||||
if not embedded_subs:
|
||||
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language,
|
||||
get_forced=config.forced_only)
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
|
||||
stream_index=str(stream.index),
|
||||
language=requested_language, with_mods=with_mods)
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
@route(PREFIX + '/ignore_list')
|
||||
def IgnoreListMenu():
|
||||
oc = SubFolderObjectContainer(title2="Ignore list", replace_parent=True)
|
||||
@@ -137,15 +210,20 @@ def HistoryMenu():
|
||||
history = get_history()
|
||||
oc = SubFolderObjectContainer(title2="History", replace_parent=True)
|
||||
|
||||
for item in history.history_items:
|
||||
for item in history.items:
|
||||
possible_language = item.language
|
||||
language_display = item.lang_name if not possible_language else display_language(possible_language)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, title=item.title, item_title=item.item_title,
|
||||
rating_key=item.rating_key),
|
||||
title=u"%s (%s)" % (item.item_title, item.mode_verbose),
|
||||
summary=u"%s in %s (%s, score: %s), %s" % (item.lang_name, item.section_title,
|
||||
summary=u"%s in %s (%s, score: %s), %s" % (language_display, item.section_title,
|
||||
item.provider_name, item.score, df(item.time))
|
||||
))
|
||||
|
||||
history.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@@ -157,10 +235,26 @@ def RefreshMissing(randomize=None):
|
||||
return fatality(header=header, replace_parent=True)
|
||||
|
||||
|
||||
def replace_item(obj, key, replace_value):
|
||||
for k, v in obj.items():
|
||||
if isinstance(v, dict):
|
||||
obj[k] = replace_item(v, key, replace_value)
|
||||
if key in obj:
|
||||
obj[key] = replace_value
|
||||
return obj
|
||||
|
||||
|
||||
@route(PREFIX + '/ValidatePrefs', enforce_route=True)
|
||||
def ValidatePrefs():
|
||||
Core.log.setLevel(logging.DEBUG)
|
||||
|
||||
if Prefs["log_console"]:
|
||||
Core.log.addHandler(logger.console_handler)
|
||||
Log.Debug("Logging to console from now on")
|
||||
else:
|
||||
Core.log.removeHandler(logger.console_handler)
|
||||
Log.Debug("Stop logging to console")
|
||||
|
||||
# cache the channel state
|
||||
update_dict = False
|
||||
restart = False
|
||||
@@ -177,43 +271,57 @@ def ValidatePrefs():
|
||||
update_dict = True
|
||||
restart = True
|
||||
|
||||
if "plugin_pin_mode" not in Dict:
|
||||
update_dict = True
|
||||
|
||||
elif Dict["plugin_pin_mode"] != Prefs["plugin_pin_mode"]:
|
||||
update_dict = True
|
||||
restart = True
|
||||
|
||||
if update_dict:
|
||||
Dict["channel_enabled"] = config.enable_channel
|
||||
Dict["plugin_pin_mode"] = Prefs["plugin_pin_mode"]
|
||||
Dict.Save()
|
||||
|
||||
if restart:
|
||||
scheduler.stop()
|
||||
DispatchRestart()
|
||||
return
|
||||
|
||||
scheduler.setup_tasks()
|
||||
scheduler.clear_task_data("MissingSubtitles")
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
if Prefs["log_console"]:
|
||||
Core.log.addHandler(logger.console_handler)
|
||||
Log.Debug("Logging to console from now on")
|
||||
else:
|
||||
Core.log.removeHandler(logger.console_handler)
|
||||
Log.Debug("Stop logging to console")
|
||||
|
||||
Log.Debug("Validate Prefs called.")
|
||||
|
||||
# SZ config debug
|
||||
Log.Debug("--- SZ Config-Debug ---")
|
||||
for attr in [
|
||||
"app_support_path", "data_path", "data_items_path", "enable_agent",
|
||||
"enable_channel", "permissions_ok", "missing_permissions", "fs_encoding", "enforce_encoding",
|
||||
"subtitle_destination_folder"]:
|
||||
Log.Debug("config.%s: %s", attr, getattr(config, attr))
|
||||
"version", "app_support_path", "data_path", "data_items_path", "enable_agent",
|
||||
"enable_channel", "permissions_ok", "missing_permissions", "fs_encoding",
|
||||
"subtitle_destination_folder", "new_style_cache", "dbm_supported", "lang_list", "providers",
|
||||
"plex_transcoder", "refiner_settings"]:
|
||||
|
||||
value = getattr(config, attr)
|
||||
if isinstance(value, dict):
|
||||
d = replace_item(copy.deepcopy(value), "api_key", "xxxxxxxxxxxxxxxxxxxxxxxxx")
|
||||
Log.Debug("config.%s: %s", attr, d)
|
||||
continue
|
||||
|
||||
Log.Debug("config.%s: %s", attr, value)
|
||||
|
||||
for attr in ["plugin_log_path", "server_log_path"]:
|
||||
value = getattr(config, attr)
|
||||
access = os.access(value, os.R_OK)
|
||||
if Core.runtime.os == "Windows":
|
||||
try:
|
||||
f = open(value, "r")
|
||||
f.read(1)
|
||||
f.close()
|
||||
except:
|
||||
access = False
|
||||
|
||||
if value:
|
||||
access = os.access(value, os.R_OK)
|
||||
if Core.runtime.os == "Windows":
|
||||
try:
|
||||
f = open(value, "r")
|
||||
f.read(1)
|
||||
f.close()
|
||||
except:
|
||||
access = False
|
||||
|
||||
Log.Debug("config.%s: %s (accessible: %s)", attr, value, access)
|
||||
|
||||
@@ -221,10 +329,36 @@ def ValidatePrefs():
|
||||
"subtitles.save.filesystem", ]:
|
||||
Log.Debug("Pref.%s: %s", attr, Prefs[attr])
|
||||
|
||||
# debug drone
|
||||
if "sonarr" in config.refiner_settings or "radarr" in config.refiner_settings:
|
||||
Log.Debug("----- Connections -----")
|
||||
try:
|
||||
from subliminal_patch.refiners.drone import SonarrClient, RadarrClient
|
||||
for key, cls in [("sonarr", SonarrClient), ("radarr", RadarrClient)]:
|
||||
if key in config.refiner_settings:
|
||||
cname = key.capitalize()
|
||||
try:
|
||||
status = cls(**config.refiner_settings[key]).status()
|
||||
except HTTPError, e:
|
||||
if e.response.status_code == 401:
|
||||
Log.Debug("%s: NOT WORKING - BAD API KEY", cname)
|
||||
else:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
|
||||
except:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
|
||||
else:
|
||||
if status and status["version"]:
|
||||
Log.Debug("%s: OK - %s", cname, status["version"])
|
||||
else:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname)
|
||||
except:
|
||||
Log.Debug("Something went really wrong when evaluating Sonarr/Radarr: %s", traceback.format_exc())
|
||||
|
||||
# fixme: check existance of and os access of logs
|
||||
Log.Debug("----- Environment -----")
|
||||
Log.Debug("Platform: %s", Core.runtime.platform)
|
||||
Log.Debug("OS: %s", Core.runtime.os)
|
||||
Log.Debug("----- Environment -----")
|
||||
Log.Debug("Python: %s", platform.python_version())
|
||||
for key, value in os.environ.iteritems():
|
||||
if key.startswith("PLEX") or key.startswith("SZ_"):
|
||||
if "TOKEN" in key:
|
||||
|
||||
@@ -1,25 +1,32 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
import types
|
||||
import datetime
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
from support.items import get_kind, get_item_thumb
|
||||
from support.helpers import get_video_display_title
|
||||
from func import enable_channel_wrapper
|
||||
from subzero.language import Language
|
||||
from support.items import get_kind, get_item_thumb, get_item, get_item_kind_from_item, refresh_item
|
||||
from support.helpers import get_video_display_title, pad_title, display_language, quote_args
|
||||
from support.ignore import ignore_list
|
||||
from support.lib import get_intent
|
||||
from support.config import config
|
||||
from subzero.constants import ICON_SUB, ICON
|
||||
from support.plex_media import get_part, get_plex_metadata
|
||||
from support.scheduler import scheduler
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles
|
||||
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
|
||||
default_thumb = R(ICON_SUB)
|
||||
main_icon = ICON if not config.is_development else "icon-dev.jpg"
|
||||
|
||||
|
||||
def should_display_ignore(items, previous=None):
|
||||
kind = get_kind(items)
|
||||
return items and (
|
||||
(kind in ("show", "season")) or
|
||||
(kind == "episode" and previous != "season")
|
||||
)
|
||||
# noinspection PyUnboundLocalVariable
|
||||
route = enable_channel_wrapper(route)
|
||||
# noinspection PyUnboundLocalVariable
|
||||
handler = enable_channel_wrapper(handler)
|
||||
|
||||
|
||||
def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None, add_kind=True):
|
||||
@@ -66,7 +73,7 @@ def dig_tree(oc, items, menu_callback, menu_determination_callback=None, force_r
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(menu_callback or menu_determination_callback(kind, item, pass_kwargs=pass_kwargs), title=title,
|
||||
rating_key=force_rating_key or key, **add_kwargs),
|
||||
title=title, thumb=thumb, summary=summary
|
||||
title=pad_title(title) if kind in ("show", "season") else title, thumb=thumb, summary=summary
|
||||
))
|
||||
return oc
|
||||
|
||||
@@ -112,30 +119,6 @@ def get_item_task_data(task_name, rating_key, language):
|
||||
return search_results.get(language)
|
||||
|
||||
|
||||
def enable_channel_wrapper(func):
|
||||
"""
|
||||
returns the original wrapper :func: (route or handler) if applicable, else the plain to-be-wrapped function
|
||||
:param func: original wrapper
|
||||
:return: original wrapper or wrapped function
|
||||
"""
|
||||
def noop(*args, **kwargs):
|
||||
def inner(*a, **k):
|
||||
"""
|
||||
:param a: args
|
||||
:param k: kwargs
|
||||
:return: originally to-be-wrapped function
|
||||
"""
|
||||
return a[0]
|
||||
|
||||
return inner
|
||||
|
||||
def wrap(*args, **kwargs):
|
||||
enforce_route = kwargs.pop("enforce_route", None)
|
||||
return (func if config.enable_channel or enforce_route else noop)(*args, **kwargs)
|
||||
|
||||
return wrap
|
||||
|
||||
|
||||
def debounce(func):
|
||||
"""
|
||||
prevent func from being called twice with the same arguments
|
||||
@@ -148,7 +131,7 @@ def debounce(func):
|
||||
|
||||
def wrap(*args, **kwargs):
|
||||
if "randomize" in kwargs:
|
||||
if not "menu_history" in Dict:
|
||||
if "menu_history" not in Dict:
|
||||
Dict["menu_history"] = {}
|
||||
|
||||
key = get_lookup_key([func] + list(args), kwargs)
|
||||
@@ -156,13 +139,69 @@ def debounce(func):
|
||||
Log.Debug("not triggering %s twice with %s, %s" % (func, args, kwargs))
|
||||
return ObjectContainer()
|
||||
else:
|
||||
Dict["menu_history"][key] = datetime.datetime.now() + datetime.timedelta(days=1)
|
||||
Dict.Save()
|
||||
Dict["menu_history"][key] = datetime.datetime.now() + datetime.timedelta(hours=6)
|
||||
try:
|
||||
Dict.Save()
|
||||
except TypeError:
|
||||
Log.Error("Can't save menu history for: %r", key)
|
||||
del Dict["menu_history"][key]
|
||||
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrap
|
||||
|
||||
|
||||
def extract_embedded_sub(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.pop("part_id")
|
||||
stream_index = kwargs.pop("stream_index")
|
||||
with_mods = kwargs.pop("with_mods", False)
|
||||
language = Language.fromietf(kwargs.pop("language"))
|
||||
refresh = kwargs.pop("refresh", True)
|
||||
set_current = kwargs.pop("set_current", True)
|
||||
|
||||
plex_item = get_item(rating_key)
|
||||
item_type = get_item_kind_from_item(plex_item)
|
||||
part = get_part(plex_item, part_id)
|
||||
|
||||
if part:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if str(stream.index) == stream_index:
|
||||
forced = stream.forced
|
||||
bn = os.path.basename(part.file)
|
||||
|
||||
set_refresh_menu_state(u"Extracting subtitle %s of %s" % (stream_index, bn))
|
||||
Log.Info(u"Extracting stream %s (%s) of %s", stream_index, display_language(language), bn)
|
||||
|
||||
args = [
|
||||
config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", "srt", "-"
|
||||
]
|
||||
output = None
|
||||
try:
|
||||
output = subprocess.check_output(quote_args(args), stderr=subprocess.PIPE, shell=True)
|
||||
except:
|
||||
Log.Error("Extraction failed: %s", traceback.format_exc())
|
||||
|
||||
if output:
|
||||
subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None)
|
||||
subtitle.content = output
|
||||
subtitle.provider_name = "embedded"
|
||||
subtitle.id = "stream_%s" % stream_index
|
||||
subtitle.score = 0
|
||||
subtitle.set_encoding("utf-8")
|
||||
|
||||
# fixme: speedup video; only video.name is needed
|
||||
save_successful = save_subtitles(scanned_parts, {scanned_parts.keys()[0]: [subtitle]}, mode="m",
|
||||
set_current=set_current)
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
if save_successful and refresh:
|
||||
refresh_item(rating_key)
|
||||
|
||||
|
||||
class SZObjectContainer(ObjectContainer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
skip_pin_lock = kwargs.pop("skip_pin_lock", False)
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
# coding=utf-8
|
||||
|
||||
from subzero.constants import PREFIX
|
||||
from menu_helpers import debounce, set_refresh_menu_state
|
||||
from menu_helpers import debounce, set_refresh_menu_state, route
|
||||
from support.items import refresh_item
|
||||
from support.helpers import timestamp
|
||||
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}')
|
||||
@route(PREFIX + '/item/refresh/{rating_key}/force', force=True)
|
||||
@route(PREFIX + '/item/refresh/{rating_key}')
|
||||
@debounce
|
||||
def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=False, refresh_kind=None,
|
||||
previous_rating_key=None, timeout=8000, randomize=None, trigger=True):
|
||||
|
||||
@@ -3,12 +3,13 @@
|
||||
import traceback
|
||||
import types
|
||||
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, route
|
||||
from subzero.modification import registry as mod_registry, SubtitleModifications
|
||||
from subzero.constants import PREFIX
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.scanning import scan_videos
|
||||
from support.helpers import timestamp, pad_title
|
||||
from support.items import get_current_sub, set_mods_for_part
|
||||
|
||||
@@ -75,6 +76,11 @@ def SubtitleModificationsMenu(**kwargs):
|
||||
title=pad_title("Manage applied mods"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods))
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleReapplyMods, randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Reapply applied mods"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=None, mode="clear", randomize=timestamp(), **kwargs),
|
||||
@@ -82,6 +88,8 @@ def SubtitleModificationsMenu(**kwargs):
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@@ -101,12 +109,12 @@ def SubtitleFPSModMenu(**kwargs):
|
||||
))
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
target_fps = plex_part.fps
|
||||
|
||||
for fps in ["23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
|
||||
for fps in ["23.980", "23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
|
||||
if float(fps) == float(target_fps):
|
||||
continue
|
||||
|
||||
@@ -225,6 +233,22 @@ def SubtitleSetMods(mods=None, mode=None, **kwargs):
|
||||
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_reapply_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleReapplyMods(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
lang_a2 = kwargs["language"]
|
||||
item_type = kwargs["item_type"]
|
||||
|
||||
language = Language.fromietf(lang_a2)
|
||||
|
||||
set_mods_for_part(rating_key, part_id, language, item_type, [], mode="add")
|
||||
|
||||
kwargs.pop("randomize")
|
||||
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_list_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleListMods(**kwargs):
|
||||
@@ -248,4 +272,6 @@ def SubtitleListMods(**kwargs):
|
||||
title="Remove: %s" % identifier
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
|
||||
return oc
|
||||
@@ -28,22 +28,25 @@ import items
|
||||
|
||||
sys.modules["support.items"] = items
|
||||
|
||||
import missing_subtitles
|
||||
|
||||
sys.modules["support.missing_subtitles"] = missing_subtitles
|
||||
|
||||
import scheduler
|
||||
|
||||
sys.modules["support.scheduler"] = scheduler
|
||||
|
||||
import tasks
|
||||
|
||||
sys.modules["support.tasks"] = tasks
|
||||
|
||||
import storage
|
||||
|
||||
sys.modules["support.storage"] = storage
|
||||
|
||||
import scanning
|
||||
sys.modules["support.scanning"] = scanning
|
||||
|
||||
import missing_subtitles
|
||||
|
||||
sys.modules["support.missing_subtitles"] = missing_subtitles
|
||||
|
||||
import tasks
|
||||
|
||||
sys.modules["support.tasks"] = tasks
|
||||
|
||||
import ignore
|
||||
|
||||
sys.modules["support.ignore"] = ignore
|
||||
@@ -58,3 +61,6 @@ sys.modules["support.data"] = data
|
||||
|
||||
import activities
|
||||
sys.modules["support.activities"] = activities
|
||||
|
||||
import download
|
||||
sys.modules["support.download"] = download
|
||||
@@ -3,25 +3,26 @@ from wraptor.decorators import throttle
|
||||
from config import config
|
||||
from items import get_item, get_item_kind_from_item, refresh_item
|
||||
|
||||
from plex_activity import Activity
|
||||
from plex_activity.sources.s_logging.main import Logging as Activity_Logging
|
||||
Activity = None
|
||||
try:
|
||||
from plex_activity import Activity
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
class PlexActivityManager(object):
|
||||
def start(self):
|
||||
activity_sources_enabled = None
|
||||
|
||||
if not Activity:
|
||||
return
|
||||
|
||||
if config.plex_token:
|
||||
from plex import Plex
|
||||
Plex.configuration.defaults.authentication(config.plex_token)
|
||||
activity_sources_enabled = ["websocket"]
|
||||
Activity.on('websocket.playing', self.on_playing)
|
||||
|
||||
elif config.server_log_path:
|
||||
Activity_Logging.add_hint(config.server_log_path, None)
|
||||
activity_sources_enabled = ["logging"]
|
||||
Activity.on('logging.playing', self.on_playing)
|
||||
|
||||
if activity_sources_enabled:
|
||||
Activity.start(activity_sources_enabled)
|
||||
|
||||
@@ -38,6 +39,13 @@ class PlexActivityManager(object):
|
||||
return
|
||||
|
||||
rating_key = info["ratingKey"]
|
||||
|
||||
# only use integer based rating keys
|
||||
try:
|
||||
int(rating_key)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
if rating_key in Dict["last_played_items"] and rating_key != Dict["last_played_items"][0]:
|
||||
# shift last played
|
||||
Dict["last_played_items"].insert(0,
|
||||
@@ -56,10 +64,12 @@ class PlexActivityManager(object):
|
||||
|
||||
debug_msg = "Started playing %s. Refreshing it." % rating_key
|
||||
|
||||
key_to_refresh = None
|
||||
if config.activity_mode in ["refresh", "next_episode", "hybrid"]:
|
||||
# todo: cleanup debug messages for hybrid-plus
|
||||
|
||||
keys_to_refresh = []
|
||||
if config.activity_mode in ["refresh", "next_episode", "hybrid", "hybrid-plus"]:
|
||||
# next episode or next episode and current movie
|
||||
if config.activity_mode in ["next_episode", "hybrid"]:
|
||||
if config.activity_mode in ["next_episode", "hybrid", "hybrid-plus"]:
|
||||
plex_item = get_item(rating_key)
|
||||
if not plex_item:
|
||||
Log.Warn("Can't determine media type of %s, skipping" % rating_key)
|
||||
@@ -67,20 +77,24 @@ class PlexActivityManager(object):
|
||||
|
||||
if get_item_kind_from_item(plex_item) == "episode":
|
||||
next_ep = self.get_next_episode(rating_key)
|
||||
if config.activity_mode == "hybrid-plus":
|
||||
keys_to_refresh.append(rating_key)
|
||||
if next_ep:
|
||||
key_to_refresh = next_ep.rating_key
|
||||
keys_to_refresh.append(next_ep.rating_key)
|
||||
debug_msg = "Started playing %s. Refreshing next episode (%s, S%02iE%02i)." % \
|
||||
(rating_key, next_ep.rating_key, int(next_ep.season.index), int(next_ep.index))
|
||||
|
||||
else:
|
||||
if config.activity_mode == "hybrid":
|
||||
key_to_refresh = rating_key
|
||||
keys_to_refresh.append(rating_key)
|
||||
elif config.activity_mode == "refresh":
|
||||
key_to_refresh = rating_key
|
||||
keys_to_refresh.append(rating_key)
|
||||
|
||||
if key_to_refresh:
|
||||
if keys_to_refresh:
|
||||
Log.Debug(debug_msg)
|
||||
refresh_item(key_to_refresh)
|
||||
Log.Debug("Refreshing %s", keys_to_refresh)
|
||||
for key in keys_to_refresh:
|
||||
refresh_item(key)
|
||||
|
||||
def get_next_episode(self, rating_key):
|
||||
plex_item = get_item(rating_key)
|
||||
|
||||
+399
-53
@@ -1,21 +1,41 @@
|
||||
# coding=utf-8
|
||||
|
||||
import copy
|
||||
import os
|
||||
import re
|
||||
import inspect
|
||||
|
||||
import sys
|
||||
import rarfile
|
||||
import jstyleson
|
||||
import datetime
|
||||
|
||||
import subliminal
|
||||
import subliminal_patch
|
||||
from babelfish import Language
|
||||
import subzero.constants
|
||||
import lib
|
||||
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded
|
||||
|
||||
from subliminal_patch.core import is_windows_special_path
|
||||
from whichdb import whichdb
|
||||
|
||||
from subliminal_patch.exceptions import TooManyRequests
|
||||
from subzero.language import Language
|
||||
from subliminal.cli import MutexLock
|
||||
from subzero.lib.io import FileIO, get_viable_encoding
|
||||
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW
|
||||
from subzero.lib.dict import Dicked
|
||||
from subzero.util import get_root_path
|
||||
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW, MEDIA_TYPE_TO_STRING
|
||||
from dogpile.cache.region import register_backend as register_cache_backend
|
||||
from lib import Plex
|
||||
from helpers import check_write_permissions, cast_bool
|
||||
from helpers import check_write_permissions, cast_bool, cast_int, mswindows
|
||||
|
||||
SUBTITLE_EXTS = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'txt', 'psb']
|
||||
register_cache_backend(
|
||||
"subzero.cache.file", "subzero.cache_backends.file", "SZFileBackend")
|
||||
|
||||
SUBTITLE_EXTS_BASE = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'psb',
|
||||
'vtt']
|
||||
SUBTITLE_EXTS = SUBTITLE_EXTS_BASE + ["txt"]
|
||||
|
||||
TEXT_SUBTITLE_EXTS = ("srt", "ass", "ssa", "vtt")
|
||||
VIDEO_EXTS = ['3g2', '3gp', 'asf', 'asx', 'avc', 'avi', 'avs', 'bivx', 'bup', 'divx', 'dv', 'dvr-ms', 'evo', 'fli',
|
||||
'flv',
|
||||
'm2t', 'm2ts', 'm2v', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'mts', 'nsv', 'nuv', 'ogm', 'ogv', 'tp',
|
||||
@@ -36,7 +56,26 @@ def int_or_default(s, default):
|
||||
return default
|
||||
|
||||
|
||||
VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable)
|
||||
|
||||
PROVIDER_THROTTLE_MAP = {
|
||||
"default": {
|
||||
TooManyRequests: (datetime.timedelta(hours=1), "1 hour"),
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
|
||||
ServiceUnavailable: (datetime.timedelta(minutes=20), "20 minutes"),
|
||||
},
|
||||
"opensubtitles": {
|
||||
TooManyRequests: (datetime.timedelta(hours=3), "3 hours"),
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
|
||||
},
|
||||
"addic7ed": {
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=24), "24 hours"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class Config(object):
|
||||
libraries_root = None
|
||||
plugin_info = ""
|
||||
version = None
|
||||
full_version = None
|
||||
@@ -48,6 +87,12 @@ class Config(object):
|
||||
universal_plex_token = None
|
||||
plex_token = None
|
||||
is_development = False
|
||||
dbm_supported = False
|
||||
pms_request_timeout = 15
|
||||
low_impact_mode = False
|
||||
new_style_cache = False
|
||||
pack_cache_dir = None
|
||||
advanced = None
|
||||
|
||||
enable_channel = True
|
||||
enable_agent = True
|
||||
@@ -56,10 +101,8 @@ class Config(object):
|
||||
lock_advanced_menu = False
|
||||
locked = False
|
||||
pin_valid_minutes = 10
|
||||
lang_list = None
|
||||
subtitle_destination_folder = None
|
||||
providers = None
|
||||
provider_settings = None
|
||||
subtitle_formats = None
|
||||
max_recent_items_per_library = 200
|
||||
permissions_ok = False
|
||||
missing_permissions = None
|
||||
@@ -70,26 +113,40 @@ class Config(object):
|
||||
sections = None
|
||||
enabled_sections = None
|
||||
remove_hi = False
|
||||
remove_tags = False
|
||||
fix_ocr = False
|
||||
fix_common = False
|
||||
colors = ""
|
||||
enforce_encoding = False
|
||||
chmod = None
|
||||
forced_only = False
|
||||
exotic_ext = False
|
||||
treat_und_as_first = False
|
||||
subtitle_sub_dir = None, None
|
||||
ext_match_strictness = False
|
||||
default_mods = None
|
||||
debug_mods = False
|
||||
react_to_activities = False
|
||||
activity_mode = None
|
||||
subtitles_save_to = None
|
||||
no_refresh = False
|
||||
plex_transcoder = None
|
||||
refiner_settings = None
|
||||
exact_filenames = False
|
||||
only_one = False
|
||||
embedded_auto_extract = False
|
||||
ietf_as_alpha3 = False
|
||||
|
||||
store_recently_played_amount = 20
|
||||
store_recently_played_amount = 40
|
||||
|
||||
initialized = False
|
||||
|
||||
def initialize(self):
|
||||
self.libraries_root = os.path.abspath(os.path.join(get_root_path(), ".."))
|
||||
self.init_libraries()
|
||||
|
||||
if is_windows_special_path:
|
||||
Log.Warn("The Plex metadata folder is residing inside a folder with special characters. "
|
||||
"Multithreading and playback activities will be disabled.")
|
||||
|
||||
self.fs_encoding = get_viable_encoding()
|
||||
self.plugin_info = self.get_plugin_info()
|
||||
self.is_development = self.get_dev_mode()
|
||||
@@ -101,18 +158,24 @@ class Config(object):
|
||||
self.data_items_path = os.path.join(self.data_path, "DataItems")
|
||||
self.universal_plex_token = self.get_universal_plex_token()
|
||||
self.plex_token = os.environ.get("PLEXTOKEN", self.universal_plex_token)
|
||||
subzero.constants.DEFAULT_TIMEOUT = lib.DEFAULT_TIMEOUT = self.pms_request_timeout = \
|
||||
min(cast_int(Prefs['pms_request_timeout'], 15), 45)
|
||||
self.low_impact_mode = cast_bool(Prefs['low_impact_mode'])
|
||||
self.new_style_cache = cast_bool(Prefs['new_style_cache'])
|
||||
self.pack_cache_dir = self.get_pack_cache_dir()
|
||||
self.advanced = self.get_advanced_config()
|
||||
|
||||
os.environ["SZ_USER_AGENT"] = self.get_user_agent()
|
||||
|
||||
self.setup_proxies()
|
||||
self.set_plugin_mode()
|
||||
self.set_plugin_lock()
|
||||
self.set_activity_modes()
|
||||
self.parse_rename_mode()
|
||||
|
||||
self.lang_list = self.get_lang_list()
|
||||
self.subtitle_destination_folder = self.get_subtitle_destination_folder()
|
||||
self.subtitle_formats = self.get_subtitle_formats()
|
||||
self.forced_only = cast_bool(Prefs["subtitles.only_foreign"])
|
||||
self.providers = self.get_providers()
|
||||
self.provider_settings = self.get_provider_settings()
|
||||
self.max_recent_items_per_library = int_or_default(Prefs["scheduler.max_recent_items_per_library"], 2000)
|
||||
self.sections = list(Plex["library"].sections())
|
||||
self.missing_permissions = []
|
||||
@@ -122,43 +185,120 @@ class Config(object):
|
||||
self.permissions_ok = self.check_permissions()
|
||||
self.notify_executable = self.check_notify_executable()
|
||||
self.remove_hi = cast_bool(Prefs['subtitles.remove_hi'])
|
||||
self.remove_tags = cast_bool(Prefs['subtitles.remove_tags'])
|
||||
self.fix_ocr = cast_bool(Prefs['subtitles.fix_ocr'])
|
||||
self.fix_common = cast_bool(Prefs['subtitles.fix_common'])
|
||||
self.colors = Prefs['subtitles.colors'] if Prefs['subtitles.colors'] != "don't change" else None
|
||||
self.enforce_encoding = cast_bool(Prefs['subtitles.enforce_encoding'])
|
||||
|
||||
os.environ["SZ_ENFORCE_ENCODING"] = str(self.enforce_encoding)
|
||||
|
||||
self.chmod = self.check_chmod()
|
||||
self.exotic_ext = cast_bool(Prefs["subtitles.scan.exotic_ext"])
|
||||
self.treat_und_as_first = cast_bool(Prefs["subtitles.language.treat_und_as_first"])
|
||||
self.subtitle_sub_dir = self.get_subtitle_sub_dir()
|
||||
self.ext_match_strictness = self.determine_ext_sub_strictness()
|
||||
self.default_mods = self.get_default_mods()
|
||||
self.debug_mods = cast_bool(Prefs['log_debug_mods'])
|
||||
self.subtitles_save_to = Prefs['subtitles.save.filesystem']
|
||||
self.no_refresh = os.environ.get("SZ_NO_REFRESH", False)
|
||||
self.plex_transcoder = self.get_plex_transcoder()
|
||||
self.only_one = cast_bool(Prefs['subtitles.only_one'])
|
||||
self.embedded_auto_extract = cast_bool(Prefs["subtitles.embedded.autoextract"])
|
||||
self.ietf_as_alpha3 = cast_bool(Prefs["subtitles.language.ietf_normalize"])
|
||||
self.initialized = True
|
||||
|
||||
def init_libraries(self):
|
||||
if Core.runtime.os == "Windows":
|
||||
unrar_exe = os.path.abspath(os.path.join(self.libraries_root, "Windows", "i386", "UnRAR", "UnRAR.exe"))
|
||||
if os.path.isfile(unrar_exe):
|
||||
rarfile.UNRAR_TOOL = unrar_exe
|
||||
Log.Info("Using UnRAR from: %s", unrar_exe)
|
||||
|
||||
custom_unrar = os.environ.get("SZ_UNRAR_TOOL")
|
||||
if custom_unrar and os.path.isfile(custom_unrar):
|
||||
rarfile.UNRAR_TOOL = custom_unrar
|
||||
Log.Info("Using UnRAR from: %s", custom_unrar)
|
||||
|
||||
def init_cache(self):
|
||||
use_fallback_cache = True
|
||||
if self.new_style_cache:
|
||||
subliminal.region.configure('subzero.cache.file', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'appname': "sz_cache",
|
||||
'app_cache_dir': self.data_path})
|
||||
Log.Info("Using new style file based cache!")
|
||||
return
|
||||
|
||||
names = ['dbhash', 'gdbm', 'dbm']
|
||||
dbfn = None
|
||||
self.dbm_supported = False
|
||||
|
||||
# try importing dbm modules
|
||||
if Core.runtime.os != "Windows":
|
||||
impawrt = None
|
||||
try:
|
||||
subliminal.region.configure('dogpile.cache.dbm', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'filename': os.path.join(config.data_items_path, 'subzero.dbm'),
|
||||
'lock_factory': MutexLock})
|
||||
use_fallback_cache = False
|
||||
impawrt = getattr(sys.modules['__main__'], "__builtins__").get("__import__")
|
||||
except:
|
||||
pass
|
||||
|
||||
if use_fallback_cache:
|
||||
Log.Warn("Not using file based cache!")
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
if impawrt:
|
||||
for name in names:
|
||||
try:
|
||||
impawrt(name)
|
||||
except:
|
||||
continue
|
||||
if not self.dbm_supported:
|
||||
self.dbm_supported = name
|
||||
break
|
||||
|
||||
if self.dbm_supported:
|
||||
# anydbm checks; try guessing the format and importing the correct module
|
||||
dbfn = os.path.join(config.data_items_path, 'subzero.dbm')
|
||||
db_which = whichdb(dbfn)
|
||||
if db_which is not None and db_which != "":
|
||||
try:
|
||||
impawrt(db_which)
|
||||
except ImportError:
|
||||
self.dbm_supported = False
|
||||
|
||||
if self.dbm_supported:
|
||||
try:
|
||||
subliminal.region.configure('dogpile.cache.dbm', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'filename': dbfn,
|
||||
'lock_factory': MutexLock})
|
||||
Log.Info("Using file based cache!")
|
||||
return
|
||||
except:
|
||||
self.dbm_supported = False
|
||||
|
||||
Log.Warn("Not using file based cache!")
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
|
||||
def sync_cache(self):
|
||||
if not self.new_style_cache:
|
||||
return
|
||||
Log.Debug("Syncing cache")
|
||||
subliminal.region.backend.sync()
|
||||
|
||||
def get_pack_cache_dir(self):
|
||||
pack_cache_dir = os.path.join(config.data_path, "pack_cache")
|
||||
if not os.path.isdir(pack_cache_dir):
|
||||
os.makedirs(pack_cache_dir)
|
||||
|
||||
return pack_cache_dir
|
||||
|
||||
def get_advanced_config(self):
|
||||
path = os.path.join(config.data_path, "advanced_settings.json")
|
||||
if os.path.isfile(path):
|
||||
data = FileIO.read(path, "r")
|
||||
|
||||
return Dicked(**jstyleson.loads(data))
|
||||
|
||||
return Dicked()
|
||||
|
||||
def set_log_paths(self):
|
||||
# find log handler
|
||||
for handler in Core.log.handlers:
|
||||
if getattr(getattr(handler, "__class__"), "__name__") in (
|
||||
'FileHandler', 'RotatingFileHandler', 'TimedRotatingFileHandler'):
|
||||
cls_name = getattr(getattr(handler, "__class__"), "__name__")
|
||||
if cls_name in ('FileHandler', 'RotatingFileHandler', 'TimedRotatingFileHandler'):
|
||||
plugin_log_file = handler.baseFilename
|
||||
if cls_name in ("RotatingFileHandler", "TimedRotatingFileHandler"):
|
||||
handler.backupCount = int_or_default(Prefs['log_rotate_keep'], 5)
|
||||
|
||||
if os.path.isfile(os.path.realpath(plugin_log_file)):
|
||||
self.plugin_log_path = plugin_log_file
|
||||
|
||||
@@ -177,11 +317,21 @@ class Config(object):
|
||||
except:
|
||||
Log.Warn("Couldn't determine Plex Token")
|
||||
else:
|
||||
Log("Did NOT find Preferences file - most likely Windows OS. Otherwise please check logfile and hierarchy.")
|
||||
Log.Warn("Did NOT find Preferences file - most likely Windows OS. Otherwise please check logfile and hierarchy.")
|
||||
|
||||
# fixme: windows
|
||||
|
||||
def set_plugin_mode(self):
|
||||
self.enable_agent = True
|
||||
self.enable_channel = True
|
||||
|
||||
# any provider enabled?
|
||||
if not self.providers:
|
||||
self.enable_agent = False
|
||||
self.enable_channel = False
|
||||
Log.Warn("No providers enabled, disabling agent and channel!")
|
||||
return
|
||||
|
||||
if Prefs["plugin_mode"] == "only agent":
|
||||
self.enable_channel = False
|
||||
elif Prefs["plugin_mode"] == "only channel":
|
||||
@@ -220,7 +370,7 @@ class Config(object):
|
||||
self.permissions_ok = self.check_permissions()
|
||||
|
||||
def check_permissions(self):
|
||||
if not Prefs["subtitles.save.filesystem"] or not Prefs["check_permissions"]:
|
||||
if not cast_bool(Prefs["subtitles.save.filesystem"]) or not cast_bool(Prefs["check_permissions"]):
|
||||
return True
|
||||
|
||||
self.missing_permissions = []
|
||||
@@ -236,6 +386,9 @@ class Config(object):
|
||||
if isinstance(path_str, unicode):
|
||||
path_str = path_str.encode(self.fs_encoding)
|
||||
|
||||
if not os.path.exists(path_str):
|
||||
continue
|
||||
|
||||
if use_ignore_fs:
|
||||
# check whether we've got an ignore file inside the section path
|
||||
if self.is_physically_ignored(path_str):
|
||||
@@ -317,7 +470,7 @@ class Config(object):
|
||||
self.enabled_sections = self.check_enabled_sections()
|
||||
|
||||
def check_enabled_sections(self):
|
||||
enabled_for_primary_agents = []
|
||||
enabled_for_primary_agents = {"movie": [], "show": []}
|
||||
enabled_sections = {}
|
||||
|
||||
# find which agents we're enabled for
|
||||
@@ -330,29 +483,55 @@ class Config(object):
|
||||
related_agents = Plex.primary_agent(agent.identifier, t.media_type)
|
||||
for a in related_agents:
|
||||
if a.identifier == PLUGIN_IDENTIFIER and a.enabled:
|
||||
enabled_for_primary_agents.append(agent.identifier)
|
||||
enabled_for_primary_agents[MEDIA_TYPE_TO_STRING[t.media_type]].append(agent.identifier)
|
||||
|
||||
# find the libraries that use them
|
||||
for library in self.sections:
|
||||
if library.agent in enabled_for_primary_agents:
|
||||
if library.agent in enabled_for_primary_agents.get(library.type, []):
|
||||
enabled_sections[library.key] = library
|
||||
|
||||
Log.Debug(u"I'm enabled for: %s" % [lib.title for key, lib in enabled_sections.iteritems()])
|
||||
return enabled_sections
|
||||
|
||||
# Prepare a list of languages we want subs for
|
||||
def get_lang_list(self):
|
||||
l = {Language.fromietf(Prefs["langPref1"])}
|
||||
def get_lang_list(self, provider=None):
|
||||
# advanced settings
|
||||
if provider and self.advanced.providers and provider in self.advanced.providers:
|
||||
adv_languages = self.advanced.providers[provider].get("languages", None)
|
||||
if adv_languages:
|
||||
adv_out = set()
|
||||
for adv_lang in adv_languages:
|
||||
adv_lang = adv_lang.strip()
|
||||
try:
|
||||
real_lang = Language.fromietf(adv_lang)
|
||||
except:
|
||||
try:
|
||||
real_lang = Language.fromname(adv_lang)
|
||||
except:
|
||||
continue
|
||||
adv_out.update({real_lang})
|
||||
|
||||
# fallback to default languages if no valid language was found in advanced settings
|
||||
if adv_out:
|
||||
return adv_out
|
||||
|
||||
l = {Language.fromietf(Prefs["langPref1a"])}
|
||||
lang_custom = Prefs["langPrefCustom"].strip()
|
||||
|
||||
if Prefs['subtitles.only_one']:
|
||||
return l
|
||||
|
||||
if Prefs["langPref2"] != "None":
|
||||
l.update({Language.fromietf(Prefs["langPref2"])})
|
||||
if Prefs["langPref2a"] != "None":
|
||||
try:
|
||||
l.update({Language.fromietf(Prefs["langPref2a"])})
|
||||
except:
|
||||
pass
|
||||
|
||||
if Prefs["langPref3"] != "None":
|
||||
l.update({Language.fromietf(Prefs["langPref3"])})
|
||||
if Prefs["langPref3a"] != "None":
|
||||
try:
|
||||
l.update({Language.fromietf(Prefs["langPref3a"])})
|
||||
except:
|
||||
pass
|
||||
|
||||
if len(lang_custom) and lang_custom != "None":
|
||||
for lang in lang_custom.split(u","):
|
||||
@@ -368,6 +547,8 @@ class Config(object):
|
||||
|
||||
return l
|
||||
|
||||
lang_list = property(get_lang_list)
|
||||
|
||||
def get_subtitle_destination_folder(self):
|
||||
if not Prefs["subtitles.save.filesystem"]:
|
||||
return
|
||||
@@ -377,18 +558,35 @@ class Config(object):
|
||||
return fld_custom or (
|
||||
Prefs["subtitles.save.subFolder"] if Prefs["subtitles.save.subFolder"] != "current folder" else None)
|
||||
|
||||
def get_providers(self):
|
||||
def get_subtitle_formats(self):
|
||||
formats = Prefs["subtitles.save.formats"]
|
||||
out = []
|
||||
if "SRT" in formats:
|
||||
out.append("srt")
|
||||
if "VTT" in formats:
|
||||
out.append("vtt")
|
||||
return out
|
||||
|
||||
def get_providers(self, media_type="series"):
|
||||
providers = {'opensubtitles': cast_bool(Prefs['provider.opensubtitles.enabled']),
|
||||
# 'thesubdb': Prefs['provider.thesubdb.enabled'],
|
||||
'podnapisi': cast_bool(Prefs['provider.podnapisi.enabled']),
|
||||
'titlovi': cast_bool(Prefs['provider.titlovi.enabled']),
|
||||
'addic7ed': cast_bool(Prefs['provider.addic7ed.enabled']),
|
||||
'tvsubtitles': cast_bool(Prefs['provider.tvsubtitles.enabled']),
|
||||
'legendastv': cast_bool(Prefs['provider.legendastv.enabled']),
|
||||
'napiprojekt': cast_bool(Prefs['provider.napiprojekt.enabled']),
|
||||
'shooter': cast_bool(Prefs['provider.shooter.enabled']),
|
||||
'subscenter': cast_bool(Prefs['provider.subscenter.enabled']),
|
||||
'shooter': False,
|
||||
'subscene': cast_bool(Prefs['provider.subscene.enabled']),
|
||||
'subscenter': False,
|
||||
}
|
||||
|
||||
providers_by_prefs = copy.deepcopy(providers)
|
||||
|
||||
# disable subscene for movies by default
|
||||
if media_type == "movies":
|
||||
providers["subscene"] = False
|
||||
|
||||
# ditch non-forced-subtitles-reporting providers
|
||||
if self.forced_only:
|
||||
providers["addic7ed"] = False
|
||||
@@ -396,10 +594,41 @@ class Config(object):
|
||||
providers["legendastv"] = False
|
||||
providers["napiprojekt"] = False
|
||||
providers["shooter"] = False
|
||||
providers["subscenter"] = False
|
||||
providers["titlovi"] = False
|
||||
|
||||
# advanced settings
|
||||
if media_type and self.advanced.providers:
|
||||
for provider, data in self.advanced.providers.iteritems():
|
||||
if provider not in providers or not providers_by_prefs[provider]:
|
||||
continue
|
||||
|
||||
if data["enabled_for"] is not None:
|
||||
providers[provider] = media_type in data["enabled_for"]
|
||||
|
||||
if "provider_throttle" not in Dict:
|
||||
Dict["provider_throttle"] = {}
|
||||
|
||||
changed = False
|
||||
for provider, enabled in dict(providers).iteritems():
|
||||
reason, until, throttle_desc = Dict["provider_throttle"].get(provider, (None, None, None))
|
||||
if reason:
|
||||
now = datetime.datetime.now()
|
||||
if now < until:
|
||||
Log.Info("Not using %s until %s, because of: %s", provider,
|
||||
until.strftime("%y/%m/%d %H:%M"), reason)
|
||||
providers[provider] = False
|
||||
else:
|
||||
Log.Info("Using %s again after %s, (disabled because: %s)", provider, throttle_desc, reason)
|
||||
del Dict["provider_throttle"][provider]
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
Dict.Save()
|
||||
|
||||
return filter(lambda prov: providers[prov], providers)
|
||||
|
||||
providers = property(get_providers)
|
||||
|
||||
def get_provider_settings(self):
|
||||
provider_settings = {'addic7ed': {'username': Prefs['provider.addic7ed.username'],
|
||||
'password': Prefs['provider.addic7ed.password'],
|
||||
@@ -407,22 +636,54 @@ class Config(object):
|
||||
},
|
||||
'opensubtitles': {'username': Prefs['provider.opensubtitles.username'],
|
||||
'password': Prefs['provider.opensubtitles.password'],
|
||||
'use_tag_search': cast_bool(Prefs['provider.opensubtitles.use_tags']),
|
||||
'only_foreign': cast_bool(Prefs['subtitles.only_foreign'])
|
||||
'use_tag_search': self.exact_filenames,
|
||||
'only_foreign': self.forced_only,
|
||||
'is_vip': cast_bool(Prefs['provider.opensubtitles.is_vip'])
|
||||
},
|
||||
'podnapisi': {
|
||||
'only_foreign': cast_bool(Prefs['subtitles.only_foreign'])
|
||||
'only_foreign': self.forced_only,
|
||||
},
|
||||
'legendastv': {'username': Prefs['provider.legendastv.username'],
|
||||
'password': Prefs['provider.legendastv.password'],
|
||||
},
|
||||
'subscenter': {'username': Prefs['provider.subscenter.username'],
|
||||
'password': Prefs['provider.subscenter.password'],
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return provider_settings
|
||||
|
||||
provider_settings = property(get_provider_settings)
|
||||
|
||||
def provider_throttle(self, name, exception):
|
||||
"""
|
||||
throttle a provider :name: for X hours based on the :exception: type
|
||||
:param name:
|
||||
:param exception:
|
||||
:return:
|
||||
"""
|
||||
cls = getattr(exception, "__class__")
|
||||
cls_name = getattr(cls, "__name__")
|
||||
if cls not in VALID_THROTTLE_EXCEPTIONS:
|
||||
for valid_cls in VALID_THROTTLE_EXCEPTIONS:
|
||||
if isinstance(cls, valid_cls):
|
||||
cls = valid_cls
|
||||
|
||||
throttle_data = PROVIDER_THROTTLE_MAP.get(name, PROVIDER_THROTTLE_MAP["default"]).get(cls, None) or \
|
||||
PROVIDER_THROTTLE_MAP["default"].get(cls, None)
|
||||
|
||||
if not throttle_data:
|
||||
return
|
||||
|
||||
throttle_delta, throttle_description = throttle_data
|
||||
|
||||
if "provider_throttle" not in Dict:
|
||||
Dict["provider_throttle"] = {}
|
||||
|
||||
throttle_until = datetime.datetime.now() + throttle_delta
|
||||
Dict["provider_throttle"][name] = (cls_name, throttle_until, throttle_description)
|
||||
|
||||
Log.Info("Throttling %s for %s, until %s, because of: %s", name, throttle_description,
|
||||
throttle_until.strftime("%y/%m/%d %H:%M"), cls_name)
|
||||
Dict.Save()
|
||||
|
||||
@property
|
||||
def provider_pool(self):
|
||||
if cast_bool(Prefs['providers.multithreading']):
|
||||
@@ -446,6 +707,22 @@ class Config(object):
|
||||
if wrong_chmod:
|
||||
Log.Warn("Chmod setting ignored, please use only 4-digit integers with leading 0 (e.g.: 775)")
|
||||
|
||||
def get_subtitle_sub_dir(self):
|
||||
"""
|
||||
|
||||
:return: folder, is_absolute
|
||||
"""
|
||||
if not cast_bool(Prefs['subtitles.save.filesystem']):
|
||||
return None, None
|
||||
|
||||
if Prefs["subtitles.save.subFolder.Custom"]:
|
||||
return Prefs["subtitles.save.subFolder.Custom"], os.path.isabs(Prefs["subtitles.save.subFolder.Custom"])
|
||||
|
||||
if Prefs["subtitles.save.subFolder"] == "current folder":
|
||||
return ".", False
|
||||
|
||||
return Prefs["subtitles.save.subFolder"], False
|
||||
|
||||
def determine_ext_sub_strictness(self):
|
||||
val = Prefs["subtitles.scan.filename_strictness"]
|
||||
if val == "any":
|
||||
@@ -458,6 +735,8 @@ class Config(object):
|
||||
mods = []
|
||||
if self.remove_hi:
|
||||
mods.append("remove_HI")
|
||||
if self.remove_tags:
|
||||
mods.append("remove_tags")
|
||||
if self.fix_ocr:
|
||||
mods.append("OCR_fixes")
|
||||
if self.fix_common:
|
||||
@@ -467,6 +746,12 @@ class Config(object):
|
||||
|
||||
return mods
|
||||
|
||||
def setup_proxies(self):
|
||||
proxy = Prefs["proxy"]
|
||||
if proxy:
|
||||
os.environ["SZ_HTTP_PROXY"] = proxy.strip()
|
||||
Log.Debug("Using HTTP Proxy: %s", proxy)
|
||||
|
||||
def set_activity_modes(self):
|
||||
val = Prefs["activity.on_playback"]
|
||||
if val == "never":
|
||||
@@ -478,9 +763,70 @@ class Config(object):
|
||||
self.activity_mode = "refresh"
|
||||
elif val == "hybrid: current item or next episode":
|
||||
self.activity_mode = "hybrid"
|
||||
elif val == "hybrid-plus: current item and next episode":
|
||||
self.activity_mode = "hybrid-plus"
|
||||
else:
|
||||
self.activity_mode = "next_episode"
|
||||
|
||||
def get_plex_transcoder(self):
|
||||
base_path = os.environ.get("PLEX_MEDIA_SERVER_HOME", None)
|
||||
if not base_path:
|
||||
# fall back to bundled plugins path
|
||||
bundle_path = os.environ.get("PLEXBUNDLEDPLUGINSPATH", None)
|
||||
if bundle_path:
|
||||
base_path = os.path.normpath(os.path.join(bundle_path, "..", ".."))
|
||||
|
||||
if sys.platform == "darwin":
|
||||
fn = os.path.join(base_path, "MacOS", "Plex Transcoder")
|
||||
elif mswindows:
|
||||
fn = os.path.join(base_path, "plextranscoder.exe")
|
||||
else:
|
||||
fn = os.path.join(base_path, "Plex Transcoder")
|
||||
|
||||
if os.path.isfile(fn):
|
||||
return fn
|
||||
|
||||
def parse_rename_mode(self):
|
||||
# fixme: exact_filenames should be determined via callback combined with info about the current video
|
||||
# (original_name)
|
||||
|
||||
mode = str(Prefs["media_rename1"])
|
||||
self.refiner_settings = {}
|
||||
|
||||
if cast_bool(Prefs['use_file_info_file']):
|
||||
self.refiner_settings["file_info_file"] = True
|
||||
self.exact_filenames = True
|
||||
|
||||
if mode == "none of the above":
|
||||
return
|
||||
|
||||
elif mode == "Symlink to original file":
|
||||
self.refiner_settings["symlinks"] = True
|
||||
self.exact_filenames = True
|
||||
return
|
||||
|
||||
elif mode == "I keep the original filenames":
|
||||
self.exact_filenames = True
|
||||
return
|
||||
|
||||
if mode in ("Filebot", "Sonarr/Radarr/Filebot"):
|
||||
self.refiner_settings["filebot"] = True
|
||||
|
||||
if mode in ("Sonarr/Radarr (fill api info below)", "Sonarr/Radarr/Filebot"):
|
||||
if Prefs["drone_api.sonarr.url"] and Prefs["drone_api.sonarr.api_key"]:
|
||||
self.refiner_settings["sonarr"] = {
|
||||
"base_url": Prefs["drone_api.sonarr.url"],
|
||||
"api_key": Prefs["drone_api.sonarr.api_key"]
|
||||
}
|
||||
self.exact_filenames = True
|
||||
|
||||
if Prefs["drone_api.radarr.url"] and Prefs["drone_api.radarr.api_key"]:
|
||||
self.refiner_settings["radarr"] = {
|
||||
"base_url": Prefs["drone_api.radarr.url"],
|
||||
"api_key": Prefs["drone_api.radarr.api_key"]
|
||||
}
|
||||
self.exact_filenames = True
|
||||
|
||||
def init_subliminal_patches(self):
|
||||
# configure custom subtitle destination folders for scanning pre-existing subs
|
||||
Log.Debug("Patching subliminal ...")
|
||||
@@ -489,7 +835,7 @@ class Config(object):
|
||||
subliminal_patch.core.INCLUDE_EXOTIC_SUBS = self.exotic_ext
|
||||
|
||||
subliminal_patch.core.DOWNLOAD_TRIES = int(Prefs['subtitles.try_downloads'])
|
||||
subliminal.score.episode_scores["addic7ed_boost"] = int(Prefs['provider.addic7ed.boost_by1'])
|
||||
subliminal.score.episode_scores["addic7ed_boost"] = int(Prefs['provider.addic7ed.boost_by2'])
|
||||
|
||||
|
||||
config = Config()
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
|
||||
|
||||
def dispatch_migrate():
|
||||
@@ -6,6 +7,8 @@ def dispatch_migrate():
|
||||
migrate()
|
||||
except:
|
||||
Log.Error("Migration failed: %s" % traceback.format_exc())
|
||||
del Dict["subs"]
|
||||
Dict.Save()
|
||||
|
||||
|
||||
def migrate():
|
||||
@@ -25,6 +28,7 @@ def migrate():
|
||||
time=item.time)
|
||||
|
||||
del Dict["history"]
|
||||
history.destroy()
|
||||
Dict.Save()
|
||||
|
||||
# migrate subtitle storage from Dict to Data
|
||||
@@ -80,5 +84,6 @@ def migrate():
|
||||
if stored_any:
|
||||
subtitle_storage.save(stored_subs)
|
||||
|
||||
subtitle_storage.destroy()
|
||||
del Dict["subs"]
|
||||
Dict.Save()
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
|
||||
from subzero.language import Language
|
||||
|
||||
import subliminal_patch as subliminal
|
||||
|
||||
from support.config import config
|
||||
from support.helpers import cast_bool
|
||||
from subtitlehelpers import get_subtitles_from_metadata
|
||||
from subliminal_patch import compute_score
|
||||
from support.plex_media import get_blacklist_from_part_map
|
||||
from subzero.video import refine_video
|
||||
from support.storage import get_pack_data, store_pack_data
|
||||
|
||||
|
||||
def get_missing_languages(video, part):
|
||||
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
|
||||
|
||||
# should we treat IETF as alpha3? (ditch the country part)
|
||||
alpha3_map = {}
|
||||
if config.ietf_as_alpha3:
|
||||
for language in languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
if not Prefs['subtitles.save.filesystem']:
|
||||
# scan for existing metadata subtitles
|
||||
meta_subs = get_subtitles_from_metadata(part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
if subList:
|
||||
video.subtitle_languages.add(language)
|
||||
Log.Debug("Found metadata subtitle %s for %s", language, video)
|
||||
|
||||
have_languages = video.subtitle_languages.copy()
|
||||
if config.ietf_as_alpha3:
|
||||
for language in have_languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
missing_languages = (set(str(l) for l in languages) - set(str(l) for l in have_languages))
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
|
||||
if not missing_languages or found_one_which_is_enough:
|
||||
if found_one_which_is_enough:
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
return False
|
||||
|
||||
# re-add country codes to the missing languages, in case we've removed them above
|
||||
if config.ietf_as_alpha3:
|
||||
for language in languages:
|
||||
language.country = alpha3_map.get(language.alpha3, None)
|
||||
|
||||
return missing_languages
|
||||
|
||||
|
||||
def pre_download_hook(subtitle):
|
||||
if subtitle.is_pack:
|
||||
# try retrieving the subtitle from a cached pack archive
|
||||
pack_data = get_pack_data(subtitle)
|
||||
if pack_data:
|
||||
subtitle.pack_data = pack_data
|
||||
|
||||
|
||||
def post_download_hook(subtitle):
|
||||
# if a new pack was downloaded, store it in the cache; providers' download method is responsible for
|
||||
# setting subtitle.pack_data to None in case the cached pack data we provided was successfully used
|
||||
if subtitle.is_pack and subtitle.pack_data:
|
||||
# store pack data in cache
|
||||
store_pack_data(subtitle, subtitle.pack_data)
|
||||
|
||||
# may be redundant
|
||||
subtitle.pack_data = None
|
||||
|
||||
|
||||
def language_hook(provider):
|
||||
return config.get_lang_list(provider=provider)
|
||||
|
||||
|
||||
def download_best_subtitles(video_part_map, min_score=0, throttle_time=None, providers=None):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
|
||||
if not languages:
|
||||
return
|
||||
|
||||
use_videos = []
|
||||
for video, part in video_part_map.iteritems():
|
||||
if not video.ignore_all:
|
||||
missing_languages = get_missing_languages(video, part)
|
||||
else:
|
||||
missing_languages = languages
|
||||
|
||||
if missing_languages:
|
||||
Log.Info(u"%s has missing languages: %s", os.path.basename(video.name), missing_languages)
|
||||
refine_video(video, refiner_settings=config.refiner_settings)
|
||||
use_videos.append(video)
|
||||
|
||||
# prepare blacklist
|
||||
blacklist = get_blacklist_from_part_map(video_part_map, languages)
|
||||
|
||||
if use_videos:
|
||||
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s, languages: %s" %
|
||||
(min_score, hearing_impaired, languages))
|
||||
|
||||
return subliminal.download_best_subtitles(set(use_videos), languages, min_score, hearing_impaired,
|
||||
providers=providers or config.providers,
|
||||
provider_configs=config.provider_settings,
|
||||
pool_class=config.provider_pool,
|
||||
compute_score=compute_score, throttle_time=throttle_time,
|
||||
blacklist=blacklist, throttle_callback=config.provider_throttle,
|
||||
pre_download_hook=pre_download_hook,
|
||||
post_download_hook=post_download_hook,
|
||||
language_hook=language_hook)
|
||||
Log.Debug("All languages for all requested videos exist. Doing nothing.")
|
||||
@@ -10,10 +10,12 @@ import re
|
||||
import platform
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
|
||||
import chardet
|
||||
|
||||
from bs4 import UnicodeDammit
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
from subzero.analytics import track_event
|
||||
|
||||
mswindows = (sys.platform == "win32")
|
||||
@@ -42,6 +44,13 @@ def cast_bool(value):
|
||||
return str(value).strip() in ("true", "True")
|
||||
|
||||
|
||||
def cast_int(value, default=None):
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
# A platform independent way to split paths which might come in with different separators.
|
||||
def split_path(str):
|
||||
if str.find('\\') != -1:
|
||||
@@ -149,10 +158,11 @@ def get_video_display_title(kind, title, section_title=None, parent_title=None,
|
||||
if add_section_title:
|
||||
section_add = ("%s: " % section_title) if section_title else ""
|
||||
|
||||
if kind == "show" and parent_title:
|
||||
if kind in ("season", "show") and parent_title:
|
||||
if season and episode:
|
||||
return '%s%s S%02dE%02d%s' % (section_add, parent_title, season or 0, episode or 0,
|
||||
(", %s" % title if title else ""))
|
||||
|
||||
return '%s%s%s' % (section_add, parent_title, (", %s" % title if title else ""))
|
||||
return "%s%s" % (section_add, title)
|
||||
|
||||
@@ -200,7 +210,7 @@ def decode_message(s):
|
||||
|
||||
|
||||
def timestamp():
|
||||
return int(time.time())
|
||||
return int(time.time()*1000)
|
||||
|
||||
|
||||
def df(d):
|
||||
@@ -265,7 +275,7 @@ def notify_executable(exe_info, videos, subtitles, storage):
|
||||
exe, arguments = exe_info
|
||||
for video, video_subtitles in subtitles.items():
|
||||
for subtitle in video_subtitles:
|
||||
lang = Locale.Language.Match(subtitle.language.alpha2)
|
||||
lang = str(subtitle.language)
|
||||
data = video.plexapi_metadata.copy()
|
||||
data.update({
|
||||
"subtitle_language": lang,
|
||||
@@ -307,9 +317,32 @@ def track_usage(category=None, action=None, label=None, value=None):
|
||||
if not cast_bool(Prefs["track_usage"]):
|
||||
return
|
||||
|
||||
Thread.Create(dispatch_track_usage, category, action, label, value,
|
||||
identifier=Dict["anon_id"], first_use=Dict["first_use"],
|
||||
add=Network.PublicAddress)
|
||||
if "last_tracked" not in Dict:
|
||||
Dict["last_tracked"] = OrderedDict()
|
||||
Dict.Save()
|
||||
|
||||
event_key = (category, action, label, value)
|
||||
now = datetime.datetime.now()
|
||||
if event_key in Dict["last_tracked"] and (Dict["last_tracked"][event_key] + datetime.timedelta(minutes=30)) < now:
|
||||
return
|
||||
|
||||
Dict["last_tracked"][event_key] = now
|
||||
|
||||
# maintenance
|
||||
for key, value in Dict["last_tracked"].copy().iteritems():
|
||||
# kill day old values
|
||||
if value < now - datetime.timedelta(days=1):
|
||||
try:
|
||||
del Dict["last_tracked"][key]
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
Thread.Create(dispatch_track_usage, category, action, label, value,
|
||||
identifier=Dict["anon_id"], first_use=Dict["first_use"],
|
||||
add=Network.PublicAddress)
|
||||
except:
|
||||
Log.Debug("Something went wrong when reporting anonymous user statistics: %s", traceback.format_exc())
|
||||
|
||||
|
||||
def dispatch_track_usage(*args, **kwargs):
|
||||
@@ -322,9 +355,27 @@ def dispatch_track_usage(*args, **kwargs):
|
||||
Log.Debug("Something went wrong when reporting anonymous user statistics: %s", traceback.format_exc())
|
||||
|
||||
|
||||
def get_language_from_stream(lang_code):
|
||||
if lang_code:
|
||||
lang = Locale.Language.Match(lang_code)
|
||||
if lang and lang != "xx":
|
||||
# Log.Debug("Found language: %r", lang)
|
||||
return Language.fromietf(lang)
|
||||
|
||||
|
||||
def get_language(lang_short):
|
||||
return Language.fromietf(lang_short)
|
||||
|
||||
|
||||
def display_language(l):
|
||||
addons = []
|
||||
if l.country:
|
||||
addons.append(l.country.alpha2)
|
||||
if l.script:
|
||||
addons.append(l.script.code)
|
||||
|
||||
return l.name if not addons else "%s (%s)" % (l.name, ", ".join(addons))
|
||||
|
||||
|
||||
class PartUnknownException(Exception):
|
||||
pass
|
||||
@@ -1,4 +1,4 @@
|
||||
# coding=utf-8
|
||||
from subzero.history_storage import SubtitleHistory
|
||||
|
||||
get_history = lambda: SubtitleHistory(Data, int(Prefs["history_size"]))
|
||||
get_history = lambda: SubtitleHistory(Data, Thread, int(Prefs["history_size"]))
|
||||
|
||||
@@ -11,7 +11,8 @@ class IgnoreDict(DictProxy):
|
||||
"section": "sections",
|
||||
"show": "series",
|
||||
"movie": "videos",
|
||||
"episode": "videos"
|
||||
"episode": "videos",
|
||||
"season": "seasons",
|
||||
}
|
||||
|
||||
# getItems types mapped to their verbose names
|
||||
@@ -19,9 +20,10 @@ class IgnoreDict(DictProxy):
|
||||
"sections": "Section",
|
||||
"series": "Series",
|
||||
"videos": "Item",
|
||||
"seasons": "Season",
|
||||
}
|
||||
|
||||
key_order = ("sections", "series", "videos")
|
||||
key_order = ("sections", "series", "videos", "seasons")
|
||||
|
||||
def __len__(self):
|
||||
try:
|
||||
@@ -35,7 +37,7 @@ class IgnoreDict(DictProxy):
|
||||
return self.translate_keys.get(name)
|
||||
|
||||
def verbose(self, name):
|
||||
return self.keys_verbose.get(name)
|
||||
return self.keys_verbose.get(self.translate_key(name) or name)
|
||||
|
||||
def get_title_key(self, kind, key):
|
||||
return "%s_%s" % (kind, key)
|
||||
@@ -57,6 +59,7 @@ class IgnoreDict(DictProxy):
|
||||
Dict.Save()
|
||||
|
||||
def setup_defaults(self):
|
||||
return {"sections": [], "series": [], "videos": [], "titles": {}}
|
||||
return {"sections": [], "series": [], "videos": [], "titles": {}, "seasons": []}
|
||||
|
||||
|
||||
ignore_list = IgnoreDict(Dict)
|
||||
|
||||
+104
-35
@@ -5,6 +5,11 @@ import re
|
||||
import traceback
|
||||
import types
|
||||
import os
|
||||
|
||||
import time
|
||||
|
||||
import datetime
|
||||
|
||||
from ignore import ignore_list
|
||||
from helpers import is_recent, get_plex_item_display_title, query_plex, PartUnknownException
|
||||
from lib import Plex, get_intent
|
||||
@@ -20,12 +25,16 @@ container_size_re = re.compile(ur'totalSize="(\d+)"')
|
||||
|
||||
|
||||
def get_item(key):
|
||||
item_id = int(key)
|
||||
try:
|
||||
item_id = int(key)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
item_container = Plex["library"].metadata(item_id)
|
||||
|
||||
try:
|
||||
return list(item_container)[0]
|
||||
except IndexError:
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
@@ -50,6 +59,21 @@ def get_item_kind_from_item(item):
|
||||
return PLEX_API_TYPE_MAP.get(get_item_kind(item))
|
||||
|
||||
|
||||
def get_item_title(item):
|
||||
kind = get_item_kind_from_item(item)
|
||||
if kind not in ("episode", "movie", "season", "series"):
|
||||
return
|
||||
|
||||
if kind == "episode":
|
||||
return get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
|
||||
parent_title=item.show.title)
|
||||
elif kind == "season":
|
||||
return get_plex_item_display_title(item, "season", parent=item.show, section_title="Season",
|
||||
parent_title=item.show.title)
|
||||
else:
|
||||
return get_plex_item_display_title(item, kind, section_title=None)
|
||||
|
||||
|
||||
def get_item_thumb(item):
|
||||
kind = get_item_kind(item)
|
||||
if kind == "Episode":
|
||||
@@ -236,7 +260,7 @@ def is_ignored(rating_key, item=None):
|
||||
:return:
|
||||
"""
|
||||
# item in soft ignore list
|
||||
if rating_key in ignore_list["videos"]:
|
||||
if ignore_list["videos"] and rating_key in ignore_list["videos"]:
|
||||
Log.Debug("Item %s is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
@@ -244,12 +268,17 @@ def is_ignored(rating_key, item=None):
|
||||
kind = get_item_kind(item)
|
||||
|
||||
# show in soft ignore list
|
||||
if kind == "Episode" and item.show.rating_key in ignore_list["series"]:
|
||||
if kind == "Episode" and ignore_list["series"] and item.show.rating_key in ignore_list["series"]:
|
||||
Log.Debug("Item %s's show is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
# season in soft ignore list
|
||||
if kind == "Episode" and ignore_list["seasons"] and item.season.rating_key in ignore_list["seasons"]:
|
||||
Log.Debug("Item %s's season is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
# section in soft ignore list
|
||||
if item.section.key in ignore_list["sections"]:
|
||||
if ignore_list["sections"] and item.section.key in ignore_list["sections"]:
|
||||
Log.Debug("Item %s's section is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
@@ -299,26 +328,87 @@ def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, paren
|
||||
# season refresh, needs explicit per-episode refresh
|
||||
refresh = [item.rating_key for item in list(Plex["library/metadata"].children(int(rating_key)))]
|
||||
|
||||
multiple = len(refresh) > 1
|
||||
for key in refresh:
|
||||
Log.Info("%s item %s", "Refreshing" if not force else "Forced-refreshing", key)
|
||||
Plex["library/metadata"].refresh(key)
|
||||
if multiple:
|
||||
Thread.Sleep(10.0)
|
||||
|
||||
|
||||
def get_current_sub(rating_key, part_id, language):
|
||||
def get_current_sub(rating_key, part_id, language, plex_item=None):
|
||||
from support.storage import get_subtitle_storage
|
||||
|
||||
item = get_item(rating_key)
|
||||
item = plex_item or get_item(rating_key)
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
current_sub = stored_subs.get_any(part_id, language)
|
||||
return current_sub, stored_subs, subtitle_storage
|
||||
|
||||
|
||||
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.storage import save_subtitles
|
||||
def save_stored_sub(stored_subtitle, rating_key, part_id, language, item_type, plex_item=None, storage=None,
|
||||
stored_subs=None):
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles, get_subtitle_storage
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
plex_item = plex_item or get_item(rating_key)
|
||||
storage = storage or get_subtitle_storage()
|
||||
|
||||
cleanup = not storage
|
||||
|
||||
stored_subs = stored_subs or storage.load(plex_item.rating_key)
|
||||
|
||||
if not all([plex_item, stored_subs]):
|
||||
return
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
except PartUnknownException:
|
||||
return
|
||||
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
subtitle = ModifiedSubtitle(language, mods=stored_subtitle.mods)
|
||||
subtitle.content = stored_subtitle.content
|
||||
if stored_subtitle.encoding:
|
||||
# thanks plex
|
||||
setattr(subtitle, "_guessed_encoding", stored_subtitle.encoding)
|
||||
|
||||
if stored_subtitle.encoding != "utf-8":
|
||||
subtitle.normalize()
|
||||
stored_subtitle.content = subtitle.content
|
||||
stored_subtitle.encoding = "utf-8"
|
||||
storage.save(stored_subs)
|
||||
|
||||
subtitle.plex_media_fps = plex_part.fps
|
||||
subtitle.page_link = stored_subtitle.id
|
||||
subtitle.language = language
|
||||
subtitle.id = stored_subtitle.id
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
|
||||
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
|
||||
", ".join(stored_subtitle.mods) if stored_subtitle.mods else "none")
|
||||
except:
|
||||
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
|
||||
|
||||
if subtitle.storage_path:
|
||||
stored_subtitle.last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
|
||||
storage.save(stored_subs)
|
||||
|
||||
if cleanup:
|
||||
storage.destroy()
|
||||
|
||||
|
||||
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
|
||||
plex_item = get_item(rating_key)
|
||||
|
||||
if not plex_item:
|
||||
return
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language, plex_item=plex_item)
|
||||
if mode == "add":
|
||||
for mod in mods:
|
||||
identifier, args = SubtitleModifications.parse_identifier(mod)
|
||||
@@ -348,28 +438,7 @@ def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"
|
||||
raise NotImplementedError("Wrong mode given")
|
||||
storage.save(stored_subs)
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
except PartUnknownException:
|
||||
return
|
||||
save_stored_sub(current_sub, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
|
||||
stored_subs=stored_subs)
|
||||
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
subtitle = ModifiedSubtitle(language, mods=current_sub.mods)
|
||||
subtitle.content = current_sub.content
|
||||
if current_sub.encoding:
|
||||
# thanks plex
|
||||
setattr(subtitle, "_guessed_encoding", current_sub.encoding)
|
||||
|
||||
subtitle.plex_media_fps = plex_part.fps
|
||||
subtitle.page_link = "modify subtitles with: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
subtitle.language = language
|
||||
subtitle.id = current_sub.id
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
|
||||
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
|
||||
", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
except:
|
||||
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
|
||||
storage.destroy()
|
||||
|
||||
@@ -9,29 +9,33 @@ import subtitlehelpers
|
||||
from config import config as sz_config
|
||||
|
||||
|
||||
SECONDARY_TAGS = ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom', 'hi', 'cc', 'sdh']
|
||||
|
||||
|
||||
def find_subtitles(part):
|
||||
lang_sub_map = {}
|
||||
part_filename = helpers.unicodize(part.file)
|
||||
part_basename = os.path.splitext(os.path.basename(part_filename))[0]
|
||||
use_filesystem = helpers.cast_bool(Prefs["subtitles.save.filesystem"])
|
||||
paths = [os.path.dirname(part_filename)] if use_filesystem else []
|
||||
sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
|
||||
global_subtitle_folder = None
|
||||
use_sub_subfolder = Prefs["subtitles.save.subFolder"] != "current folder" and not sub_dir_custom
|
||||
sub_subfolder = None
|
||||
paths = [os.path.dirname(part_filename)] if use_filesystem else []
|
||||
|
||||
global_folders = []
|
||||
|
||||
if use_filesystem:
|
||||
# Check for local subtitles subdirectory
|
||||
sub_dir_base = paths[0]
|
||||
|
||||
sub_dir_list = []
|
||||
|
||||
if Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
if use_sub_subfolder:
|
||||
# got selected subfolder
|
||||
sub_dir_list.append(os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"]))
|
||||
|
||||
sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
sub_subfolder = os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"])
|
||||
sub_dir_list.append(sub_subfolder)
|
||||
sub_subfolder = os.path.normpath(helpers.unicodize(sub_subfolder))
|
||||
|
||||
if sub_dir_custom:
|
||||
# got custom subfolder
|
||||
@@ -84,8 +88,12 @@ def find_subtitles(part):
|
||||
media_files.append(root)
|
||||
|
||||
# cleanup any leftover subtitle if no associated media file was found
|
||||
if helpers.cast_bool(Prefs["subtitles.autoclean"]):
|
||||
if use_filesystem and helpers.cast_bool(Prefs["subtitles.autoclean"]):
|
||||
for path in paths:
|
||||
# only housekeep in sub_subfolder if sub_subfolder is used
|
||||
if use_sub_subfolder and path != sub_subfolder and not sz_config.advanced.thorough_cleaning:
|
||||
continue
|
||||
|
||||
# we can't housekeep the global subtitle folders as we don't know about *all* media files
|
||||
# in a library; skip them
|
||||
skip_path = False
|
||||
@@ -105,11 +113,10 @@ def find_subtitles(part):
|
||||
if os.path.isfile(enc_fn):
|
||||
(root, ext) = os.path.splitext(file_path_listing)
|
||||
# it's a subtitle file
|
||||
if ext.lower()[1:] in config.SUBTITLE_EXTS:
|
||||
if ext.lower()[1:] in config.SUBTITLE_EXTS_BASE:
|
||||
# get fn without forced/default/normal tag
|
||||
split_tag = root.rsplit(".", 1)
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded',
|
||||
'custom']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
|
||||
root = split_tag[0]
|
||||
|
||||
# get associated media file name without language
|
||||
@@ -135,7 +142,7 @@ def find_subtitles(part):
|
||||
# get fn without forced/default/normal tag
|
||||
split_tag = local_basename.rsplit(".", 1)
|
||||
has_additional_tag = False
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'custom']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
|
||||
local_basename = split_tag[0]
|
||||
has_additional_tag = True
|
||||
|
||||
@@ -159,7 +166,7 @@ def find_subtitles(part):
|
||||
continue
|
||||
|
||||
# determine whether to pick up the subtitle based on our match strictness
|
||||
elif not filename_matches_part:
|
||||
if not filename_matches_part:
|
||||
if sz_config.ext_match_strictness == "strict" or (
|
||||
sz_config.ext_match_strictness == "loose" and not filename_contains_part):
|
||||
# Log.Debug("%s doesn't match %s, skipping" % (helpers.unicodize(local_filename),
|
||||
|
||||
@@ -2,10 +2,17 @@
|
||||
import traceback
|
||||
import time
|
||||
|
||||
from support.config import config
|
||||
from support.helpers import get_plex_item_display_title, cast_bool
|
||||
import os
|
||||
|
||||
from babelfish import LanguageReverseError
|
||||
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import get_plex_item_display_title, cast_bool, get_language_from_stream
|
||||
from support.items import get_item
|
||||
from support.lib import Plex
|
||||
from support.storage import get_subtitle_storage
|
||||
from subzero.video import has_external_subtitle
|
||||
from subzero.language import Language
|
||||
|
||||
|
||||
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()):
|
||||
@@ -17,11 +24,59 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
else:
|
||||
item_title = get_plex_item_display_title(item, kind, section_title=section_title)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load(rating_key)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
subtitle_target_dir, tdir_is_absolute = config.subtitle_sub_dir
|
||||
|
||||
missing = set()
|
||||
languages_set = set(languages)
|
||||
languages_set = set([Language.fromietf(str(l)) for l in languages])
|
||||
for media in item.media:
|
||||
existing_subs = {"internal": [], "external": [], "count": 0}
|
||||
existing_subs = {"internal": [], "external": [], "own_external": [], "count": 0}
|
||||
for part in media.parts:
|
||||
|
||||
# did we already download an external subtitle before?
|
||||
if subtitle_target_dir and stored_subs:
|
||||
for language in languages_set:
|
||||
if has_external_subtitle(part.id, stored_subs, language):
|
||||
# check the existence of the actual subtitle file
|
||||
|
||||
# get media filename without extension
|
||||
part_basename = os.path.splitext(os.path.basename(part.file))[0]
|
||||
|
||||
# compute target directory for subtitle
|
||||
# fixme: move to central location
|
||||
if tdir_is_absolute:
|
||||
possible_subtitle_path_base = subtitle_target_dir
|
||||
else:
|
||||
possible_subtitle_path_base = os.path.join(os.path.dirname(part.file), subtitle_target_dir)
|
||||
|
||||
possible_subtitle_path_base = os.path.realpath(possible_subtitle_path_base)
|
||||
|
||||
# folder actually exists?
|
||||
if not os.path.isdir(possible_subtitle_path_base):
|
||||
continue
|
||||
|
||||
found_any = False
|
||||
for ext in config.subtitle_formats:
|
||||
if cast_bool(Prefs['subtitles.only_one']):
|
||||
possible_subtitle_path = os.path.join(possible_subtitle_path_base,
|
||||
u"%s.%s" % (part_basename, ext))
|
||||
else:
|
||||
possible_subtitle_path = os.path.join(possible_subtitle_path_base,
|
||||
u"%s.%s.%s" % (part_basename, language, ext))
|
||||
|
||||
# check for subtitle existence
|
||||
if os.path.isfile(possible_subtitle_path):
|
||||
found_any = True
|
||||
Log.Debug(u"Found: %s", possible_subtitle_path)
|
||||
break
|
||||
|
||||
if found_any:
|
||||
existing_subs["own_external"].append(language)
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
|
||||
for stream in part.streams:
|
||||
if stream.stream_type == 3:
|
||||
if stream.index:
|
||||
@@ -29,18 +84,72 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
else:
|
||||
key = "external"
|
||||
|
||||
existing_subs[key].append(Locale.Language.Match(stream.language_code or ""))
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
if not config.exotic_ext and stream.codec.lower() not in TEXT_SUBTITLE_EXTS:
|
||||
continue
|
||||
|
||||
missing_from_part = set(languages_set)
|
||||
# treat unknown language as lang1?
|
||||
if not stream.language_code and config.treat_und_as_first:
|
||||
lang = Language.fromietf(str(list(config.lang_list)[0]))
|
||||
|
||||
# we can't parse empty language codes
|
||||
elif not stream.language_code or not stream.codec:
|
||||
continue
|
||||
|
||||
else:
|
||||
# parse with internal language parser first
|
||||
try:
|
||||
lang = get_language_from_stream(stream.language_code)
|
||||
if not lang:
|
||||
if config.treat_und_as_first:
|
||||
lang = Language.fromietf(str(list(config.lang_list)[0]))
|
||||
else:
|
||||
continue
|
||||
|
||||
except (ValueError, LanguageReverseError):
|
||||
continue
|
||||
|
||||
if lang:
|
||||
# Log.Debug("Found babelfish language: %r", lang)
|
||||
existing_subs[key].append(lang)
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
|
||||
missing_from_part = set([Language.fromietf(str(l)) for l in languages])
|
||||
if existing_subs["count"]:
|
||||
existing_flat = set((existing_subs["internal"] if internal else []) + (existing_subs["external"] if external else []))
|
||||
if languages_set.issubset(existing_flat) or (len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
|
||||
|
||||
# fixme: this is actually somewhat broken with IETF, as Plex doesn't store the country portion
|
||||
# (pt instead of pt-BR) inside the database. So it might actually download pt-BR if there's a local pt-BR
|
||||
# subtitle but not our own.
|
||||
existing_flat = set((existing_subs["internal"] if internal else [])
|
||||
+ (existing_subs["external"] if external else [])
|
||||
+ existing_subs["own_external"])
|
||||
|
||||
check_languages = set([Language.fromietf(str(l)) for l in languages])
|
||||
alpha3_map = {}
|
||||
if config.ietf_as_alpha3:
|
||||
for language in existing_flat:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
for language in check_languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
# compare sets of strings, not sets of different Language instances
|
||||
check_languages_str = set(str(l) for l in check_languages)
|
||||
existing_flat_str = set(str(l) for l in existing_flat)
|
||||
|
||||
if check_languages_str.issubset(existing_flat_str) or \
|
||||
(len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
|
||||
# all subs found
|
||||
#Log.Info(u"All subtitles exist for '%s'", item_title)
|
||||
continue
|
||||
|
||||
missing_from_part = languages_set - existing_flat
|
||||
missing_from_part = set(Language.fromietf(l) for l in check_languages_str - existing_flat_str)
|
||||
if config.ietf_as_alpha3:
|
||||
for language in missing_from_part:
|
||||
language.country = alpha3_map.get(language.alpha3, None)
|
||||
|
||||
if missing_from_part:
|
||||
Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id,
|
||||
@@ -48,6 +157,8 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
missing.update(missing_from_part)
|
||||
|
||||
if missing:
|
||||
# deduplicate
|
||||
missing = set(Language.fromietf(la) for la in set(str(l) for l in missing))
|
||||
return added_at, item_id, item_title, item, missing
|
||||
|
||||
|
||||
@@ -60,7 +171,7 @@ def items_get_all_missing_subs(items, sleep_after_request=False):
|
||||
kind=kind,
|
||||
added_at=added_at,
|
||||
section_title=section_title,
|
||||
languages=config.lang_list,
|
||||
languages=config.lang_list.copy(),
|
||||
internal=cast_bool(Prefs["subtitles.scan.embedded"]),
|
||||
external=cast_bool(Prefs["subtitles.scan.external"])
|
||||
)
|
||||
@@ -75,9 +186,7 @@ def items_get_all_missing_subs(items, sleep_after_request=False):
|
||||
|
||||
|
||||
def refresh_item(item):
|
||||
Plex["library/metadata"].refresh(item)
|
||||
if not config.no_refresh:
|
||||
Plex["library/metadata"].refresh(item)
|
||||
|
||||
|
||||
def refresh_items(items):
|
||||
for item, title in items:
|
||||
refresh_item(item)
|
||||
|
||||
+171
-144
@@ -1,13 +1,12 @@
|
||||
# coding=utf-8
|
||||
|
||||
import os
|
||||
from urllib2 import URLError
|
||||
|
||||
import helpers
|
||||
from config import config
|
||||
from items import get_item
|
||||
from lib import get_intent, Plex
|
||||
from subzero.video import parse_video
|
||||
from lib import Plex
|
||||
from support.config import TEXT_SUBTITLE_EXTS, config
|
||||
|
||||
|
||||
def get_metadata_dict(item, part, add):
|
||||
data = {
|
||||
@@ -25,6 +24,51 @@ imdb_guid_identifier = "com.plexapp.agents.imdb://"
|
||||
tvdb_guid_identifier = "com.plexapp.agents.thetvdb://"
|
||||
|
||||
|
||||
def get_plexapi_stream_info(plex_item, part_id=None):
|
||||
d = {"stream": {}}
|
||||
data = d["stream"]
|
||||
|
||||
# find current part
|
||||
current_part = None
|
||||
current_media = None
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
if not part_id or str(part.id) == part_id:
|
||||
current_part = part
|
||||
current_media = media
|
||||
break
|
||||
if current_part:
|
||||
break
|
||||
|
||||
if not current_part:
|
||||
return d
|
||||
|
||||
data["video_codec"] = current_media.video_codec
|
||||
if current_media.audio_codec:
|
||||
data["audio_codec"] = current_media.audio_codec.upper()
|
||||
|
||||
if data["audio_codec"] == "DCA":
|
||||
data["audio_codec"] = "DTS"
|
||||
|
||||
if current_media.audio_channels == 8:
|
||||
data["audio_channels"] = "7.1"
|
||||
|
||||
elif current_media.audio_channels == 6:
|
||||
data["audio_channels"] = "5.1"
|
||||
else:
|
||||
data["audio_channels"] = "%s.0" % str(current_media.audio_channels)
|
||||
|
||||
# iter streams
|
||||
for stream in current_part.streams:
|
||||
if stream.stream_type == 1:
|
||||
# video stream
|
||||
data["resolution"] = "%s%s" % (current_media.video_resolution,
|
||||
"i" if stream.scan_type != "progressive" else "p")
|
||||
break
|
||||
|
||||
return d
|
||||
|
||||
|
||||
def media_to_videos(media, kind="series"):
|
||||
"""
|
||||
iterates through media and returns the associated parts (videos)
|
||||
@@ -34,9 +78,10 @@ def media_to_videos(media, kind="series"):
|
||||
"""
|
||||
videos = []
|
||||
|
||||
item = get_item(media.id)
|
||||
year = item.year
|
||||
original_title = item.title_original
|
||||
# this is a Show or a Movie object
|
||||
plex_item = get_item(media.id)
|
||||
year = plex_item.year
|
||||
original_title = plex_item.title_original
|
||||
|
||||
if kind == "series":
|
||||
for season in media.seasons:
|
||||
@@ -52,35 +97,42 @@ def media_to_videos(media, kind="series"):
|
||||
|
||||
# get plex item via API for additional metadata
|
||||
plex_episode = get_item(ep.id)
|
||||
stream_info = get_plexapi_stream_info(plex_episode)
|
||||
|
||||
for item in media.seasons[season].episodes[episode].items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
get_metadata_dict(plex_episode, part,
|
||||
{"plex_part": part, "type": "episode", "title": ep.title,
|
||||
"series": media.title, "id": ep.id, "year": year,
|
||||
"series_id": media.id, "season_id": season_object.id,
|
||||
"imdb_id": None, "series_tvdb_id": series_tvdb_id, "tvdb_id": tvdb_id,
|
||||
"original_title": original_title,
|
||||
"episode": plex_episode.index, "season": plex_episode.season.index,
|
||||
"section": plex_episode.section.title
|
||||
})
|
||||
dict(stream_info, **{"plex_part": part, "type": "episode",
|
||||
"title": ep.title,
|
||||
"series": media.title, "id": ep.id, "year": year,
|
||||
"series_id": media.id,
|
||||
"season_id": season_object.id,
|
||||
"imdb_id": None, "series_tvdb_id": series_tvdb_id,
|
||||
"tvdb_id": tvdb_id,
|
||||
"original_title": original_title,
|
||||
"episode": plex_episode.index,
|
||||
"season": plex_episode.season.index,
|
||||
"section": plex_episode.section.title
|
||||
})
|
||||
)
|
||||
)
|
||||
else:
|
||||
plex_item = get_item(media.id)
|
||||
stream_info = get_plexapi_stream_info(plex_item)
|
||||
imdb_id = None
|
||||
if imdb_guid_identifier in media.guid:
|
||||
imdb_id = media.guid[len(imdb_guid_identifier):].split("?")[0]
|
||||
for item in media.items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
get_metadata_dict(plex_item, part, {"plex_part": part, "type": "movie",
|
||||
"title": media.title, "id": media.id,
|
||||
"series_id": None, "year": year,
|
||||
"season_id": None, "imdb_id": imdb_id,
|
||||
"original_title": original_title,
|
||||
"series_tvdb_id": None, "tvdb_id": None,
|
||||
"section": plex_item.section.title})
|
||||
get_metadata_dict(plex_item, part, dict(stream_info, **{"plex_part": part, "type": "movie",
|
||||
"title": media.title, "id": media.id,
|
||||
"series_id": None, "year": year,
|
||||
"season_id": None, "imdb_id": imdb_id,
|
||||
"original_title": original_title,
|
||||
"series_tvdb_id": None, "tvdb_id": None,
|
||||
"section": plex_item.section.title})
|
||||
)
|
||||
)
|
||||
return videos
|
||||
|
||||
@@ -101,10 +153,9 @@ def get_stream_fps(streams):
|
||||
|
||||
|
||||
def get_media_item_ids(media, kind="series"):
|
||||
ids = []
|
||||
if kind == "movies":
|
||||
ids.append(media.id)
|
||||
else:
|
||||
# fixme: does this work correctly for full series force-refreshes and its intents?
|
||||
ids = [media.id]
|
||||
if kind == "series":
|
||||
for season in media.seasons:
|
||||
for episode in media.seasons[season].episodes:
|
||||
ids.append(media.seasons[season].episodes[episode].id)
|
||||
@@ -112,123 +163,77 @@ def get_media_item_ids(media, kind="series"):
|
||||
return ids
|
||||
|
||||
|
||||
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None):
|
||||
"""
|
||||
returnes a subliminal/guessit-refined parsed video
|
||||
:param pms_video_info:
|
||||
:param ignore_all:
|
||||
:param hints:
|
||||
:param rating_key:
|
||||
:return:
|
||||
"""
|
||||
embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
|
||||
external_subtitles = not ignore_all and Prefs['subtitles.scan.external']
|
||||
|
||||
plex_part = pms_video_info["plex_part"]
|
||||
|
||||
if ignore_all:
|
||||
Log.Debug("Force refresh intended.")
|
||||
|
||||
Log.Debug("Scanning video: %s, subtitles=%s, embedded_subtitles=%s" % (
|
||||
plex_part.file, external_subtitles, embedded_subtitles))
|
||||
|
||||
known_embedded = []
|
||||
def get_all_parts(plex_item):
|
||||
parts = []
|
||||
for media in list(Plex["library"].metadata(rating_key))[0].media:
|
||||
for media in plex_item.media:
|
||||
parts += media.parts
|
||||
|
||||
plexpy_part = None
|
||||
for part in parts:
|
||||
if int(part.id) == int(plex_part.id):
|
||||
plexpy_part = part
|
||||
|
||||
# embedded subtitles
|
||||
if plexpy_part:
|
||||
for stream in plexpy_part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3:
|
||||
if (config.forced_only and getattr(stream, "forced")) or \
|
||||
(not config.forced_only and not getattr(stream, "forced")):
|
||||
|
||||
# embedded subtitle
|
||||
if not stream.stream_key:
|
||||
if config.exotic_ext or stream.codec in ("srt", "ass", "ssa"):
|
||||
lang_code = stream.language_code
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang_code and config.treat_und_as_first:
|
||||
lang_code = list(config.lang_list)[0].alpha3
|
||||
known_embedded.append(lang_code)
|
||||
else:
|
||||
Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)
|
||||
|
||||
try:
|
||||
# get basic video info scan (filename)
|
||||
video = parse_video(plex_part.file, pms_video_info, hints, external_subtitles=external_subtitles,
|
||||
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
forced_only=config.forced_only, video_fps=plex_part.fps)
|
||||
|
||||
return video
|
||||
|
||||
except ValueError:
|
||||
Log.Warn("File could not be guessed by subliminal: %s" % plex_part.file)
|
||||
return parts
|
||||
|
||||
|
||||
def scan_videos(videos, kind="series", ignore_all=False):
|
||||
"""
|
||||
receives a list of videos containing dictionaries returned by media_to_videos
|
||||
:param videos:
|
||||
:param kind: series or movies
|
||||
:return: dictionary of subliminal.video.scan_video, key=subliminal scanned video, value=plex file part
|
||||
"""
|
||||
ret = {}
|
||||
for video in videos:
|
||||
intent = get_intent()
|
||||
force_refresh = intent.get("force", video["id"], video["series_id"], video["season_id"])
|
||||
Log.Debug("Determining force-refresh (video: %s, series: %s, season: %s), result: %s"
|
||||
% (video["id"], video["series_id"], video["season_id"], force_refresh))
|
||||
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, get_forced=None):
|
||||
streams = []
|
||||
has_unknown = False
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
|
||||
language = helpers.get_language_from_stream(stream.language_code)
|
||||
is_unknown = False
|
||||
found_requested_language = requested_language and requested_language == language
|
||||
|
||||
hints = helpers.get_item_hints(video)
|
||||
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
|
||||
scanned_video = scan_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
|
||||
rating_key=video["id"])
|
||||
if get_forced is not None:
|
||||
if (get_forced and not stream.forced) or (not get_forced and stream.forced):
|
||||
continue
|
||||
|
||||
if not scanned_video:
|
||||
continue
|
||||
if not language and config.treat_und_as_first:
|
||||
# only consider first unknown subtitle stream
|
||||
if has_unknown and skip_duplicate_unknown:
|
||||
continue
|
||||
|
||||
scanned_video.id = video["id"]
|
||||
part_metadata = video.copy()
|
||||
del part_metadata["plex_part"]
|
||||
scanned_video.plexapi_metadata = part_metadata
|
||||
ret[scanned_video] = video["plex_part"]
|
||||
return ret
|
||||
language = list(config.lang_list)[0]
|
||||
is_unknown = True
|
||||
has_unknown = True
|
||||
|
||||
if not requested_language or found_requested_language:
|
||||
streams.append({"stream": stream, "is_unknown": is_unknown, "language": language})
|
||||
|
||||
if found_requested_language:
|
||||
break
|
||||
|
||||
return streams
|
||||
|
||||
|
||||
def get_plex_metadata(rating_key, part_id, item_type):
|
||||
def get_part(plex_item, part_id):
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
if str(part.id) == str(part_id):
|
||||
return part
|
||||
|
||||
|
||||
def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
|
||||
"""
|
||||
uses the Plex 3rd party API accessor to get metadata information
|
||||
|
||||
:param rating_key:
|
||||
:param rating_key: movie or episode
|
||||
:param part_id:
|
||||
:param item_type:
|
||||
:return:
|
||||
"""
|
||||
|
||||
try:
|
||||
plex_item = list(Plex["library"].metadata(rating_key))[0]
|
||||
except URLError:
|
||||
return None
|
||||
if not plex_item:
|
||||
plex_item = get_item(rating_key)
|
||||
|
||||
if not plex_item:
|
||||
return
|
||||
|
||||
# find current part
|
||||
current_part = None
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
if str(part.id) == part_id:
|
||||
current_part = part
|
||||
current_part = get_part(plex_item, part_id)
|
||||
|
||||
if not current_part:
|
||||
raise helpers.PartUnknownException("Part unknown")
|
||||
|
||||
stream_info = get_plexapi_stream_info(plex_item, part_id)
|
||||
|
||||
# get normalized metadata
|
||||
# fixme: duplicated logic of media_to_videos
|
||||
if item_type == "episode":
|
||||
@@ -241,38 +246,60 @@ def get_plex_metadata(rating_key, part_id, item_type):
|
||||
tvdb_id = plex_item.guid[len(tvdb_guid_identifier):].split("?")[0]
|
||||
series_tvdb_id = tvdb_id.split("/")[0]
|
||||
metadata = get_metadata_dict(plex_item, current_part,
|
||||
{"plex_part": current_part, "type": "episode", "title": plex_item.title,
|
||||
"series": plex_item.show.title, "id": plex_item.rating_key,
|
||||
"series_id": plex_item.show.rating_key,
|
||||
"season_id": plex_item.season.rating_key,
|
||||
"imdb_id": None,
|
||||
"year": year,
|
||||
"tvdb_id": tvdb_id,
|
||||
"series_tvdb_id": series_tvdb_id,
|
||||
"original_title": original_title,
|
||||
"season": plex_item.season.index,
|
||||
"episode": plex_item.index
|
||||
})
|
||||
dict(stream_info,
|
||||
**{"plex_part": current_part, "type": "episode", "title": plex_item.title,
|
||||
"series": plex_item.show.title, "id": plex_item.rating_key,
|
||||
"series_id": plex_item.show.rating_key,
|
||||
"season_id": plex_item.season.rating_key,
|
||||
"imdb_id": None,
|
||||
"year": year,
|
||||
"tvdb_id": tvdb_id,
|
||||
"series_tvdb_id": series_tvdb_id,
|
||||
"original_title": original_title,
|
||||
"season": plex_item.season.index,
|
||||
"episode": plex_item.index
|
||||
})
|
||||
)
|
||||
else:
|
||||
imdb_id = None
|
||||
original_title = plex_item.title_original
|
||||
if imdb_guid_identifier in plex_item.guid:
|
||||
imdb_id = plex_item.guid[len(imdb_guid_identifier):].split("?")[0]
|
||||
metadata = get_metadata_dict(plex_item, current_part, {"plex_part": current_part, "type": "movie",
|
||||
"title": plex_item.title, "id": plex_item.rating_key,
|
||||
"series_id": None,
|
||||
"season_id": None,
|
||||
"imdb_id": imdb_id,
|
||||
"year": plex_item.year,
|
||||
"tvdb_id": None,
|
||||
"series_tvdb_id": None,
|
||||
"original_title": original_title,
|
||||
"season": None,
|
||||
"episode": None,
|
||||
"section": plex_item.section.title})
|
||||
metadata = get_metadata_dict(plex_item, current_part,
|
||||
dict(stream_info, **{"plex_part": current_part, "type": "movie",
|
||||
"title": plex_item.title, "id": plex_item.rating_key,
|
||||
"series_id": None,
|
||||
"season_id": None,
|
||||
"imdb_id": imdb_id,
|
||||
"year": plex_item.year,
|
||||
"tvdb_id": None,
|
||||
"series_tvdb_id": None,
|
||||
"original_title": original_title,
|
||||
"season": None,
|
||||
"episode": None,
|
||||
"section": plex_item.section.title})
|
||||
)
|
||||
return metadata
|
||||
|
||||
|
||||
def get_blacklist_from_part_map(video_part_map, languages):
|
||||
from support.storage import get_subtitle_storage
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
blacklist = []
|
||||
for video, part in video_part_map.iteritems():
|
||||
stored_subs = subtitle_storage.load_or_new(video.plexapi_metadata["item"])
|
||||
for language in languages:
|
||||
current_bl, subs = stored_subs.get_blacklist(part.id, language)
|
||||
if not current_bl:
|
||||
continue
|
||||
|
||||
blacklist = blacklist + [(str(a), str(b)) for a, b in current_bl.keys()]
|
||||
|
||||
subtitle_storage.destroy()
|
||||
|
||||
return blacklist
|
||||
|
||||
|
||||
class PMSMediaProxy(object):
|
||||
"""
|
||||
Proxy object for getting data from a mediatree items "internally" via the PMS
|
||||
|
||||
@@ -0,0 +1,124 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
import helpers
|
||||
|
||||
from support.lib import Plex, get_intent
|
||||
from support.plex_media import get_stream_fps
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
|
||||
from subzero.video import parse_video, set_existing_languages
|
||||
|
||||
|
||||
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False):
|
||||
"""
|
||||
returnes a subliminal/guessit-refined parsed video
|
||||
:param pms_video_info:
|
||||
:param ignore_all:
|
||||
:param hints:
|
||||
:param rating_key:
|
||||
:return:
|
||||
"""
|
||||
embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
|
||||
external_subtitles = not ignore_all and Prefs['subtitles.scan.external']
|
||||
|
||||
plex_part = pms_video_info["plex_part"]
|
||||
|
||||
if ignore_all:
|
||||
Log.Debug("Force refresh intended.")
|
||||
|
||||
Log.Debug("Scanning video: %s, external_subtitles=%s, embedded_subtitles=%s" % (
|
||||
plex_part.file, external_subtitles, embedded_subtitles))
|
||||
|
||||
known_embedded = []
|
||||
parts = []
|
||||
for media in list(Plex["library"].metadata(rating_key))[0].media:
|
||||
parts += media.parts
|
||||
|
||||
plexpy_part = None
|
||||
for part in parts:
|
||||
if int(part.id) == int(plex_part.id):
|
||||
plexpy_part = part
|
||||
|
||||
# embedded subtitles
|
||||
if plexpy_part:
|
||||
for stream in plexpy_part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3:
|
||||
if (config.forced_only and getattr(stream, "forced")) or \
|
||||
(not config.forced_only and not getattr(stream, "forced")):
|
||||
|
||||
# embedded subtitle
|
||||
# fixme: tap into external subtitles here instead of scanning for ourselves later?
|
||||
if not stream.stream_key and stream.codec:
|
||||
if config.exotic_ext or stream.codec.lower() in TEXT_SUBTITLE_EXTS:
|
||||
lang = helpers.get_language_from_stream(stream.language_code)
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = list(config.lang_list)[0]
|
||||
|
||||
if lang:
|
||||
known_embedded.append(lang.alpha3)
|
||||
else:
|
||||
Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load(rating_key)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
try:
|
||||
# get basic video info scan (filename)
|
||||
# video = parse_video(plex_part.file, pms_video_info, hints, external_subtitles=external_subtitles,
|
||||
# embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
# forced_only=config.forced_only, no_refining=no_refining, ignore_all=ignore_all,
|
||||
# stored_subs=stored_subs, refiner_settings=config.refiner_settings, providers=providers,
|
||||
# skip_hashing=config.low_impact_mode)
|
||||
video = parse_video(plex_part.file, hints, skip_hashing=config.low_impact_mode or skip_hashing,
|
||||
providers=providers)
|
||||
|
||||
if not ignore_all:
|
||||
set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles,
|
||||
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
forced_only=config.forced_only, stored_subs=stored_subs, languages=config.lang_list,
|
||||
only_one=config.only_one)
|
||||
|
||||
# add video fps info
|
||||
video.fps = plex_part.fps
|
||||
return video
|
||||
|
||||
except ValueError:
|
||||
Log.Warn("File could not be guessed: %s: %s", plex_part.file, traceback.format_exc())
|
||||
|
||||
|
||||
def scan_videos(videos, ignore_all=False, providers=None, skip_hashing=False):
|
||||
"""
|
||||
receives a list of videos containing dictionaries returned by media_to_videos
|
||||
:param videos:
|
||||
:param kind: series or movies
|
||||
:return: dictionary of subliminal.video.scan_video, key=subliminal scanned video, value=plex file part
|
||||
"""
|
||||
ret = {}
|
||||
for video in videos:
|
||||
intent = get_intent()
|
||||
force_refresh = intent.get("force", video["id"], video["series_id"], video["season_id"])
|
||||
Log.Debug("Determining force-refresh (video: %s, series: %s, season: %s), result: %s"
|
||||
% (video["id"], video["series_id"], video["season_id"], force_refresh))
|
||||
|
||||
hints = helpers.get_item_hints(video)
|
||||
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
|
||||
p = providers or config.get_providers(media_type="series" if video["type"] == "episode" else "movies")
|
||||
scanned_video = scan_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
|
||||
rating_key=video["id"], providers=p,
|
||||
skip_hashing=skip_hashing)
|
||||
|
||||
if not scanned_video:
|
||||
continue
|
||||
|
||||
scanned_video.id = video["id"]
|
||||
part_metadata = video.copy()
|
||||
del part_metadata["plex_part"]
|
||||
scanned_video.plexapi_metadata = part_metadata
|
||||
scanned_video.ignore_all = force_refresh
|
||||
ret[scanned_video] = video["plex_part"]
|
||||
return ret
|
||||
@@ -4,21 +4,24 @@ import datetime
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
from config import config
|
||||
|
||||
def parse_frequency(s):
|
||||
if s == "never" or s == None:
|
||||
if s == "never" or s is None:
|
||||
return None, None
|
||||
kind, num, unit = s.split()
|
||||
return int(num), unit
|
||||
|
||||
|
||||
class DefaultScheduler(object):
|
||||
thread = None
|
||||
queue_thread = None
|
||||
scheduler_thread = None
|
||||
running = False
|
||||
registry = None
|
||||
|
||||
def __init__(self):
|
||||
self.thread = None
|
||||
self.queue_thread = None
|
||||
self.scheduler_thread = None
|
||||
self.running = False
|
||||
self.registry = []
|
||||
|
||||
@@ -47,6 +50,7 @@ class DefaultScheduler(object):
|
||||
if Dict["tasks"]:
|
||||
for task_name in Dict["tasks"].keys():
|
||||
if task_name == "queue":
|
||||
Dict["tasks"][task_name] = []
|
||||
continue
|
||||
|
||||
Dict["tasks"][task_name]["data"] = {}
|
||||
@@ -58,6 +62,7 @@ class DefaultScheduler(object):
|
||||
raise NotImplementedError("Task missing! %s" % name)
|
||||
|
||||
Dict["tasks"][name]["data"] = {}
|
||||
Dict["tasks"][name]["running"] = False
|
||||
Dict.Save()
|
||||
Log.Debug("Task data cleared: %s", name)
|
||||
|
||||
@@ -68,7 +73,7 @@ class DefaultScheduler(object):
|
||||
# discover tasks;
|
||||
self.tasks = {}
|
||||
for cls in self.registry:
|
||||
task = cls(self)
|
||||
task = cls()
|
||||
try:
|
||||
task_frequency = Prefs["scheduler.tasks.%s.frequency" % task.name]
|
||||
except KeyError:
|
||||
@@ -78,7 +83,8 @@ class DefaultScheduler(object):
|
||||
|
||||
def run(self):
|
||||
self.running = True
|
||||
self.thread = Thread.Create(self.worker)
|
||||
self.scheduler_thread = Thread.Create(self.scheduler_worker)
|
||||
self.queue_thread = Thread.Create(self.queue_worker)
|
||||
|
||||
def stop(self):
|
||||
self.running = False
|
||||
@@ -113,6 +119,7 @@ class DefaultScheduler(object):
|
||||
|
||||
def run_task(self, name, *args, **kwargs):
|
||||
task = self.tasks[name]["task"]
|
||||
|
||||
if task.running:
|
||||
Log.Debug("Scheduler: Not running %s, as it's currently running.", name)
|
||||
return False
|
||||
@@ -124,8 +131,12 @@ class DefaultScheduler(object):
|
||||
except Exception, e:
|
||||
Log.Error("Scheduler: Something went wrong when running %s: %s", name, traceback.format_exc())
|
||||
finally:
|
||||
task.post_run(Dict["tasks"][name]["data"])
|
||||
try:
|
||||
task.post_run(Dict["tasks"][name]["data"])
|
||||
except:
|
||||
Log.Error("Scheduler: task.post_run failed for %s: %s", name, traceback.format_exc())
|
||||
Dict.Save()
|
||||
config.sync_cache()
|
||||
|
||||
def dispatch_task(self, *args, **kwargs):
|
||||
if "queue" not in Dict["tasks"]:
|
||||
@@ -134,8 +145,12 @@ class DefaultScheduler(object):
|
||||
Dict["tasks"]["queue"].append((args, kwargs))
|
||||
|
||||
def signal(self, name, *args, **kwargs):
|
||||
for task_name, info in self.tasks.iteritems():
|
||||
task = info["task"]
|
||||
for task_name in self.tasks.keys():
|
||||
task = self.task(task_name)
|
||||
if not task:
|
||||
Log.Error("Scheduler: Task %s not found (?!)" % task_name)
|
||||
continue
|
||||
|
||||
if not task.periodic:
|
||||
continue
|
||||
|
||||
@@ -153,7 +168,7 @@ class DefaultScheduler(object):
|
||||
continue
|
||||
Log.Debug("Scheduler: Not sending signal %s to task %s, because: not running", name, task_name)
|
||||
|
||||
def worker(self):
|
||||
def queue_worker(self):
|
||||
Thread.Sleep(10.0)
|
||||
while 1:
|
||||
if not self.running:
|
||||
@@ -166,13 +181,25 @@ class DefaultScheduler(object):
|
||||
Dict["tasks"]["queue"] = []
|
||||
Dict.Save()
|
||||
for args, kwargs in queue:
|
||||
Log.Debug("Dispatching single task: %s, %s", args, kwargs)
|
||||
Log.Debug("Queue: Dispatching single task: %s, %s", args, kwargs)
|
||||
Thread.Create(self.run_task, True, *args, **kwargs)
|
||||
Thread.Sleep(5.0)
|
||||
|
||||
Thread.Sleep(1)
|
||||
|
||||
def scheduler_worker(self):
|
||||
Thread.Sleep(10.0)
|
||||
while 1:
|
||||
if not self.running:
|
||||
break
|
||||
|
||||
# scheduled tasks
|
||||
for name, info in self.tasks.iteritems():
|
||||
for name in self.tasks.keys():
|
||||
now = datetime.datetime.now()
|
||||
info = self.tasks.get(name)
|
||||
if not info:
|
||||
Log.Error("Scheduler: Task %s not found (?!)" % name)
|
||||
continue
|
||||
task = info["task"]
|
||||
|
||||
if name not in Dict["tasks"] or not task.periodic:
|
||||
@@ -185,11 +212,19 @@ class DefaultScheduler(object):
|
||||
if not frequency_num:
|
||||
continue
|
||||
|
||||
# run legacy SARAM once
|
||||
if name == "SearchAllRecentlyAddedMissing" and ("hasRunLSARAM" not in Dict or not Dict["hasRunLSARAM"]):
|
||||
task = self.tasks["LegacySearchAllRecentlyAddedMissing"]["task"]
|
||||
task.last_run = None
|
||||
name = "LegacySearchAllRecentlyAddedMissing"
|
||||
Dict["hasRunLSARAM"] = True
|
||||
Dict.Save()
|
||||
|
||||
if not task.last_run or (task.last_run + datetime.timedelta(**{frequency_key: frequency_num}) <= now):
|
||||
# fixme: scheduled tasks run synchronously. is this the best idea?
|
||||
#Thread.Create(self.run_task, True, name)
|
||||
Thread.Create(self.run_task, True, name)
|
||||
#Thread.Sleep(5.0)
|
||||
self.run_task(name)
|
||||
#self.run_task(name)
|
||||
Thread.Sleep(5.0)
|
||||
|
||||
Thread.Sleep(1)
|
||||
|
||||
@@ -4,9 +4,12 @@ import datetime
|
||||
import os
|
||||
import pprint
|
||||
import copy
|
||||
import traceback
|
||||
import types
|
||||
|
||||
import subliminal
|
||||
from subliminal_patch.core import save_subtitles as subliminal_save_subtitles
|
||||
from subzero.subtitle_storage import StoredSubtitlesManager
|
||||
from subzero.lib.io import FileIO
|
||||
|
||||
from subtitlehelpers import force_utf8
|
||||
from config import config
|
||||
@@ -16,51 +19,14 @@ from support.items import get_item
|
||||
|
||||
|
||||
def get_subtitle_storage():
|
||||
return StoredSubtitlesManager(Data, get_item)
|
||||
return StoredSubtitlesManager(Data, Thread, get_item)
|
||||
|
||||
|
||||
def whack_missing_parts(scanned_video_part_map, existing_parts=None):
|
||||
"""
|
||||
cleans out our internal storage's video parts (parts may get updated/deleted/whatever)
|
||||
:param existing_parts: optional list of part ids known
|
||||
:param scanned_video_part_map: videos to check for
|
||||
:return:
|
||||
"""
|
||||
# shortcut
|
||||
|
||||
if "subs" not in Dict:
|
||||
return
|
||||
|
||||
if not existing_parts:
|
||||
existing_parts = []
|
||||
for part in scanned_video_part_map.viewvalues():
|
||||
existing_parts.append(str(part.id))
|
||||
|
||||
whacked_parts = False
|
||||
for video in scanned_video_part_map.keys():
|
||||
video_id = str(video.id)
|
||||
if video_id not in Dict["subs"]:
|
||||
continue
|
||||
|
||||
parts = Dict["subs"][video_id].keys()
|
||||
|
||||
for part_id in parts:
|
||||
part_id = str(part_id)
|
||||
if part_id not in existing_parts:
|
||||
Log.Info("Whacking part %s in internal storage of video %s (%s, %s)", part_id, video_id,
|
||||
repr(existing_parts), repr(parts))
|
||||
del Dict["subs"][video_id][part_id]
|
||||
whacked_parts = True
|
||||
|
||||
if whacked_parts:
|
||||
Dict.Save()
|
||||
|
||||
|
||||
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a"):
|
||||
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a", set_current=True):
|
||||
"""
|
||||
stores information about downloaded subtitles in plex's Dict()
|
||||
"""
|
||||
existing_parts = []
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
part = scanned_video_part_map[video]
|
||||
part_id = str(part.id)
|
||||
@@ -69,30 +35,37 @@ def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_ty
|
||||
metadata = video.plexapi_metadata
|
||||
title = get_title_for_video_metadata(metadata)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(plex_item)
|
||||
stored_subs = subtitle_storage.load(video_id)
|
||||
is_new = False
|
||||
if not stored_subs:
|
||||
is_new = True
|
||||
Log.Debug(u"Creating new subtitle storage: %s, %s", video_id, part_id)
|
||||
stored_subs = subtitle_storage.new(plex_item)
|
||||
|
||||
existing_parts.append(part_id)
|
||||
|
||||
stored_any = False
|
||||
for subtitle in video_subtitles:
|
||||
lang = Locale.Language.Match(subtitle.language.alpha2)
|
||||
Log.Debug(u"Adding subtitle to storage: %s, %s, %s" % (video_id, part_id, title))
|
||||
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode)
|
||||
lang = str(subtitle.language)
|
||||
subtitle.normalize()
|
||||
Log.Debug(u"Adding subtitle to storage: %s, %s, %s, %s, %s" % (video_id, part_id, lang, title,
|
||||
subtitle.guess_encoding()))
|
||||
|
||||
last_mod = None
|
||||
if subtitle.storage_path:
|
||||
last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
|
||||
|
||||
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode, last_mod=last_mod,
|
||||
set_current=set_current)
|
||||
|
||||
if ret_val:
|
||||
Log.Debug("Subtitle stored")
|
||||
stored_any = True
|
||||
|
||||
else:
|
||||
Log.Debug("Subtitle already existing in storage")
|
||||
|
||||
if stored_any:
|
||||
if is_new or video_subtitles:
|
||||
Log.Debug("Saving subtitle storage for %s" % video_id)
|
||||
subtitle_storage.save(stored_subs)
|
||||
|
||||
#if existing_parts:
|
||||
# whack_missing_parts(scanned_video_part_map, existing_parts=existing_parts)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
def reset_storage(key):
|
||||
@@ -108,37 +81,49 @@ def reset_storage(key):
|
||||
|
||||
|
||||
def log_storage(key):
|
||||
if not key:
|
||||
Log.Debug(pprint.pformat(getattr(Dict, "_dict")))
|
||||
if key in Dict:
|
||||
Log.Debug(pprint.pformat(Dict[key]))
|
||||
|
||||
|
||||
def save_subtitles_to_file(subtitles):
|
||||
def get_target_folder(file_path):
|
||||
fld = None
|
||||
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
|
||||
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
# specific subFolder requested, create it if it doesn't exist
|
||||
fld_base = os.path.split(file_path)[0]
|
||||
if fld_custom:
|
||||
if fld_custom.startswith("/"):
|
||||
# absolute folder
|
||||
fld = fld_custom
|
||||
else:
|
||||
fld = os.path.join(fld_base, fld_custom)
|
||||
else:
|
||||
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
|
||||
fld = force_unicode(fld)
|
||||
if not os.path.exists(fld):
|
||||
os.makedirs(fld)
|
||||
return fld
|
||||
|
||||
|
||||
def save_subtitles_to_file(subtitles, tags=None, forced_tag=None):
|
||||
forced_tag = forced_tag or config.forced_only
|
||||
for video, video_subtitles in subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
fld = None
|
||||
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
# specific subFolder requested, create it if it doesn't exist
|
||||
fld_base = os.path.split(video.name)[0]
|
||||
if fld_custom:
|
||||
if fld_custom.startswith("/"):
|
||||
# absolute folder
|
||||
fld = fld_custom
|
||||
else:
|
||||
fld = os.path.join(fld_base, fld_custom)
|
||||
else:
|
||||
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
|
||||
fld = force_unicode(fld)
|
||||
if not os.path.exists(fld):
|
||||
os.makedirs(fld)
|
||||
subliminal.save_subtitles(video, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
|
||||
encode_with=force_utf8 if config.enforce_encoding else None,
|
||||
chmod=config.chmod, forced_tag=config.forced_only, path_decoder=force_unicode,
|
||||
debug_mods=config.debug_mods)
|
||||
if not isinstance(video, types.StringTypes):
|
||||
file_path = video.name
|
||||
else:
|
||||
file_path = video
|
||||
|
||||
fld = get_target_folder(file_path)
|
||||
subliminal_save_subtitles(file_path, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
|
||||
chmod=config.chmod, forced_tag=forced_tag, path_decoder=force_unicode,
|
||||
debug_mods=config.debug_mods, formats=config.subtitle_formats, tags=tags)
|
||||
return True
|
||||
|
||||
|
||||
@@ -146,8 +131,7 @@ def save_subtitles_to_metadata(videos, subtitles):
|
||||
for video, video_subtitles in subtitles.items():
|
||||
mediaPart = videos[video]
|
||||
for subtitle in video_subtitles:
|
||||
content = force_utf8(subtitle.get_modified_text(debug=config.debug_mods)) if config.enforce_encoding else \
|
||||
subtitle.get_modified_content(debug=config.debug_mods)
|
||||
content = subtitle.get_modified_content(debug=config.debug_mods)
|
||||
|
||||
if not isinstance(mediaPart, Framework.api.agentkit.MediaPart):
|
||||
# we're being handed a Plex.py model instance here, not an internal PMS MediaPart object.
|
||||
@@ -159,10 +143,12 @@ def save_subtitles_to_metadata(videos, subtitles):
|
||||
return True
|
||||
|
||||
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None):
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None,
|
||||
set_current=True):
|
||||
"""
|
||||
|
||||
:param scanned_video_part_map:
|
||||
:param set_current: save the subtitle as the current one
|
||||
:param scanned_video_part_map:
|
||||
:param downloaded_subtitles:
|
||||
:param mode:
|
||||
:param bare_save: don't trigger anything; don't store information
|
||||
@@ -172,6 +158,8 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_
|
||||
meta_fallback = False
|
||||
save_successful = False
|
||||
|
||||
# big fixme: scanned_video_part_map isn't needed to the current extent. rewrite.
|
||||
|
||||
if mods:
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
if not video_subtitles:
|
||||
@@ -183,31 +171,66 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_
|
||||
subtitle.plex_media_fps = video.fps
|
||||
|
||||
storage = "metadata"
|
||||
if Prefs['subtitles.save.filesystem']:
|
||||
save_to_fs = cast_bool(Prefs['subtitles.save.filesystem'])
|
||||
if save_to_fs:
|
||||
storage = "filesystem"
|
||||
try:
|
||||
Log.Debug("Using filesystem as subtitle storage")
|
||||
save_subtitles_to_file(downloaded_subtitles)
|
||||
except OSError:
|
||||
if Prefs["subtitles.save.metadata_fallback"]:
|
||||
meta_fallback = True
|
||||
|
||||
if set_current:
|
||||
if save_to_fs:
|
||||
try:
|
||||
Log.Debug("Using filesystem as subtitle storage")
|
||||
save_subtitles_to_file(downloaded_subtitles)
|
||||
except OSError:
|
||||
if cast_bool(Prefs["subtitles.save.metadata_fallback"]):
|
||||
meta_fallback = True
|
||||
storage = "metadata"
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
save_successful = True
|
||||
save_successful = True
|
||||
|
||||
if not Prefs['subtitles.save.filesystem'] or meta_fallback:
|
||||
if meta_fallback:
|
||||
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
|
||||
else:
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
|
||||
if not save_to_fs or meta_fallback:
|
||||
if meta_fallback:
|
||||
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
|
||||
else:
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
|
||||
|
||||
if not bare_save and save_successful and config.notify_executable:
|
||||
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
|
||||
if not bare_save and save_successful and config.notify_executable:
|
||||
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
|
||||
|
||||
if not bare_save and save_successful:
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode)
|
||||
if not bare_save and (save_successful or not set_current):
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode, set_current=set_current)
|
||||
|
||||
return save_successful
|
||||
|
||||
|
||||
def get_pack_id(subtitle):
|
||||
return "%s_%s" % (subtitle.provider_name, subtitle.numeric_id)
|
||||
|
||||
|
||||
def get_pack_data(subtitle):
|
||||
subtitle_id = get_pack_id(subtitle)
|
||||
|
||||
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
|
||||
if os.path.isfile(archive):
|
||||
Log.Info("Loading archive from pack cache: %s", subtitle_id)
|
||||
try:
|
||||
data = FileIO.read(archive, 'rb')
|
||||
|
||||
return data
|
||||
except:
|
||||
Log.Error("Couldn't load archive from pack cache: %s: %s", subtitle_id, traceback.format_exc())
|
||||
|
||||
|
||||
def store_pack_data(subtitle, data):
|
||||
subtitle_id = get_pack_id(subtitle)
|
||||
|
||||
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
|
||||
|
||||
Log.Info("Storing archive in pack cache: %s", subtitle_id)
|
||||
try:
|
||||
FileIO.write(archive, data, 'wb')
|
||||
|
||||
except:
|
||||
Log.Error("Couldn't store archive in pack cache: %s: %s", subtitle_id, traceback.format_exc())
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# coding=utf-8
|
||||
|
||||
import re, os
|
||||
import config
|
||||
import helpers
|
||||
|
||||
from config import config, SUBTITLE_EXTS, TEXT_SUBTITLE_EXTS
|
||||
from bs4 import UnicodeDammit
|
||||
|
||||
|
||||
@@ -86,11 +86,11 @@ class VobSubSubtitleHelper(SubtitleHelper):
|
||||
|
||||
|
||||
IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
|
||||
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2})?$")
|
||||
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
|
||||
|
||||
|
||||
def match_ietf_language(s):
|
||||
language_match = re.match(".+\.([^\.]+)$" if not helpers.cast_bool(Prefs["subtitles.language.ietf"])
|
||||
language_match = re.match(".+\.([^\.]+)$" if not helpers.cast_bool(Prefs["subtitles.language.ietf_display"])
|
||||
else IETF_MATCH, s)
|
||||
if language_match and len(language_match.groups()) == 1:
|
||||
language = language_match.groups()[0]
|
||||
@@ -102,7 +102,7 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, filename):
|
||||
(file, file_extension) = os.path.splitext(filename)
|
||||
return file_extension.lower()[1:] in config.SUBTITLE_EXTS
|
||||
return file_extension.lower()[1:] in SUBTITLE_EXTS
|
||||
|
||||
def process_subtitles(self, part):
|
||||
|
||||
@@ -120,21 +120,29 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
forced = ''
|
||||
default = ''
|
||||
split_tag = file.rsplit('.', 1)
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'custom']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'embedded-forced',
|
||||
'custom']:
|
||||
file = split_tag[0]
|
||||
sub_tag = split_tag[1].lower()
|
||||
# don't do anything with 'normal', we don't need it
|
||||
if 'forced' == split_tag[1].lower():
|
||||
if 'forced' in sub_tag:
|
||||
forced = '1'
|
||||
if 'default' == split_tag[1].lower():
|
||||
elif 'default' == sub_tag:
|
||||
default = '1'
|
||||
|
||||
# Attempt to extract the language from the filename (e.g. Avatar (2009).eng)
|
||||
# IETF support thanks to
|
||||
# https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
|
||||
language = Locale.Language.Match(match_ietf_language(file))
|
||||
lang_part = match_ietf_language(file)
|
||||
if lang_part != file:
|
||||
language = Locale.Language.Match(lang_part)
|
||||
elif config.only_one:
|
||||
language = Locale.Language.Match(list(config.lang_list)[0].alpha2)
|
||||
else:
|
||||
language = Locale.Language.Match("xx")
|
||||
|
||||
# skip non-SRT if wanted
|
||||
if not helpers.cast_bool(Prefs["subtitles.scan.exotic_ext"]) and ext not in ["srt", "ass", "ssa"]:
|
||||
if not config.exotic_ext and ext not in TEXT_SUBTITLE_EXTS:
|
||||
return lang_sub_map
|
||||
|
||||
codec = None
|
||||
@@ -157,6 +165,7 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
Log("An error occurred while attempting to parse the subtitle file, skipping... : " + self.filename)
|
||||
return lang_sub_map
|
||||
|
||||
# fixme: re-add vtt once Plex Inc. fixes this line in LocalMedia.bundle
|
||||
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb']:
|
||||
codec = ext.replace('ass', 'ssa')
|
||||
|
||||
@@ -174,19 +183,20 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
|
||||
def get_subtitles_from_metadata(part):
|
||||
subs = {}
|
||||
for language in part.subtitles:
|
||||
subs[language] = []
|
||||
for key, proxy in getattr(part.subtitles[language], "_proxies").iteritems():
|
||||
if not proxy or not len(proxy) >= 5:
|
||||
Log.Debug("Can't parse metadata: %s" % repr(proxy))
|
||||
continue
|
||||
if hasattr(part, "subtitles") and part.subtitles:
|
||||
for language in part.subtitles:
|
||||
subs[language] = []
|
||||
for key, proxy in getattr(part.subtitles[language], "_proxies").iteritems():
|
||||
if not proxy or not len(proxy) >= 5:
|
||||
Log.Debug("Can't parse metadata: %s" % repr(proxy))
|
||||
continue
|
||||
|
||||
p_type = proxy[0]
|
||||
p_type = proxy[0]
|
||||
|
||||
if p_type == "Media":
|
||||
# metadata subtitle
|
||||
Log.Debug(u"Found metadata subtitle: %s, %s" % (language, repr(proxy)))
|
||||
subs[language].append(key)
|
||||
if p_type == "Media":
|
||||
# metadata subtitle
|
||||
Log.Debug(u"Found metadata subtitle: %s, %s" % (language, repr(proxy)))
|
||||
subs[language].append(key)
|
||||
return subs
|
||||
|
||||
|
||||
|
||||
+589
-233
@@ -1,24 +1,29 @@
|
||||
# coding=utf-8
|
||||
|
||||
import glob
|
||||
import os
|
||||
import datetime
|
||||
import time
|
||||
import operator
|
||||
import traceback
|
||||
from urllib2 import URLError
|
||||
|
||||
from subliminal_patch.score import compute_score
|
||||
from subliminal_patch.core import download_subtitles
|
||||
from subliminal import list_subtitles as list_all_subtitles
|
||||
from babelfish import Language
|
||||
from subliminal import list_subtitles as list_all_subtitles, region as subliminal_cache_region
|
||||
from subzero.language import Language
|
||||
from subzero.video import refine_video
|
||||
|
||||
from missing_subtitles import items_get_all_missing_subs, refresh_item
|
||||
from scheduler import scheduler
|
||||
from storage import save_subtitles, whack_missing_parts, get_subtitle_storage
|
||||
from storage import save_subtitles, get_subtitle_storage
|
||||
from support.config import config
|
||||
from support.items import get_recent_items, is_ignored, get_item
|
||||
from support.lib import Plex
|
||||
from support.items import get_recent_items, get_item, is_ignored, get_item_title
|
||||
from support.helpers import track_usage, get_title_for_video_metadata, cast_bool, PartUnknownException
|
||||
from support.plex_media import scan_videos, get_plex_metadata
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.scanning import scan_videos
|
||||
from download import download_best_subtitles, pre_download_hook, post_download_hook, language_hook
|
||||
|
||||
PROVIDER_SLACK = 30
|
||||
DL_PROVIDER_SLACK = 30
|
||||
|
||||
|
||||
class Task(object):
|
||||
@@ -35,11 +40,10 @@ class Task(object):
|
||||
# task ready for being status-displayed?
|
||||
ready_for_display = False
|
||||
|
||||
def __init__(self, scheduler):
|
||||
def __init__(self):
|
||||
self.name = self.get_class_name()
|
||||
self.ready_for_display = False
|
||||
self.time_start = None
|
||||
self.scheduler = scheduler
|
||||
self.setup_defaults()
|
||||
|
||||
self.running = False
|
||||
@@ -93,143 +97,69 @@ class Task(object):
|
||||
Log.Info(u"Task: ran: %s", self.name)
|
||||
|
||||
|
||||
class SearchAllRecentlyAddedMissing(Task):
|
||||
periodic = True
|
||||
items_done = None
|
||||
items_searching = None
|
||||
items_searching_ids = None
|
||||
items_failed = None
|
||||
percentage = 0
|
||||
|
||||
stall_time = 30
|
||||
|
||||
def __init__(self, scheduler):
|
||||
super(SearchAllRecentlyAddedMissing, self).__init__(scheduler)
|
||||
self.items_done = None
|
||||
self.items_searching = None
|
||||
self.items_searching_ids = None
|
||||
self.items_failed = None
|
||||
self.percentage = 0
|
||||
|
||||
def signal(self, signal_name, *args, **kwargs):
|
||||
handler = getattr(self, "signal_%s" % signal_name)
|
||||
return handler(*args, **kwargs) if handler else None
|
||||
|
||||
def signal_updated_metadata(self, *args, **kwargs):
|
||||
item_id = int(args[0])
|
||||
|
||||
if self.items_searching_ids is not None and item_id in self.items_searching_ids:
|
||||
self.items_done.append(item_id)
|
||||
return True
|
||||
|
||||
def prepare(self, *args, **kwargs):
|
||||
self.items_done = []
|
||||
recent_items = get_recent_items()
|
||||
missing = items_get_all_missing_subs(recent_items, sleep_after_request=0.2)
|
||||
ids = set([id for added_at, id, title, item, missing_languages in missing if not is_ignored(id, item=item)])
|
||||
self.items_searching = missing
|
||||
self.items_searching_ids = ids
|
||||
self.items_failed = []
|
||||
self.percentage = 0
|
||||
self.ready_for_display = True
|
||||
|
||||
def run(self):
|
||||
super(SearchAllRecentlyAddedMissing, self).run()
|
||||
self.running = True
|
||||
missing_count = len(self.items_searching)
|
||||
items_done_count = 0
|
||||
|
||||
for added_at, item_id, title, item, missing_languages in self.items_searching:
|
||||
Log.Debug(u"Task: %s, triggering refresh for %s (%s)", self.name, title, item_id)
|
||||
try:
|
||||
refresh_item(item_id)
|
||||
except URLError:
|
||||
# timeout
|
||||
pass
|
||||
search_started = datetime.datetime.now()
|
||||
tries = 1
|
||||
while 1:
|
||||
if item_id in self.items_done:
|
||||
items_done_count += 1
|
||||
self.percentage = int(items_done_count * 100 / missing_count)
|
||||
Log.Debug(u"Task: %s, item %s done (%s%%, %s/%s)", self.name, item_id, self.percentage,
|
||||
items_done_count, missing_count)
|
||||
break
|
||||
|
||||
# item considered stalled after self.stall_time seconds passed after last refresh
|
||||
if (datetime.datetime.now() - search_started).total_seconds() > self.stall_time:
|
||||
if tries > 3:
|
||||
self.items_failed.append(item_id)
|
||||
Log.Debug(u"Task: %s, item stalled for %s times: %s, skipping", self.name, tries, item_id)
|
||||
break
|
||||
|
||||
Log.Debug(u"Task: %s, item stalled for %s seconds: %s, retrying", self.name, self.stall_time,
|
||||
item_id)
|
||||
tries += 1
|
||||
try:
|
||||
refresh_item(item_id)
|
||||
except URLError:
|
||||
pass
|
||||
search_started = datetime.datetime.now()
|
||||
time.sleep(1)
|
||||
time.sleep(0.1)
|
||||
# we can't hammer the PMS, otherwise requests will be stalled
|
||||
time.sleep(5)
|
||||
|
||||
Log.Debug("Task: %s, done (%s%%, %s/%s). Failed items: %s", self.name, self.percentage,
|
||||
items_done_count, missing_count, self.items_failed)
|
||||
self.running = False
|
||||
|
||||
def post_run(self, task_data):
|
||||
super(SearchAllRecentlyAddedMissing, self).post_run(task_data)
|
||||
self.ready_for_display = False
|
||||
self.percentage = 0
|
||||
self.items_done = None
|
||||
self.items_failed = None
|
||||
self.items_searching = None
|
||||
self.items_searching_ids = None
|
||||
|
||||
|
||||
class SubtitleListingMixin(object):
|
||||
def list_subtitles(self, rating_key, item_type, part_id, language, skip_wrong_fps=True):
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
def list_subtitles(self, rating_key, item_type, part_id, language, skip_wrong_fps=True, metadata=None,
|
||||
scanned_parts=None, air_date_cutoff=None):
|
||||
|
||||
if not metadata:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
|
||||
if not metadata:
|
||||
return
|
||||
|
||||
providers = config.get_providers(media_type="series" if item_type == "episode" else "movies")
|
||||
if not scanned_parts:
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, providers=providers)
|
||||
if not scanned_parts:
|
||||
Log.Error(u"%s: Couldn't list available subtitles for %s", self.name, rating_key)
|
||||
return
|
||||
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
refine_video(video, refiner_settings=config.refiner_settings)
|
||||
|
||||
if air_date_cutoff is not None and metadata["item"].year and \
|
||||
metadata["item"].year + air_date_cutoff < datetime.date.today().year:
|
||||
Log.Debug("Skipping searching for subtitles: %s, it aired over %s year(s) ago.", rating_key,
|
||||
air_date_cutoff)
|
||||
return
|
||||
|
||||
config.init_subliminal_patches()
|
||||
|
||||
provider_settings = config.provider_settings
|
||||
if not skip_wrong_fps:
|
||||
provider_settings["opensubtitles"]["skip_wrong_fps"] = False
|
||||
|
||||
if item_type == "episode":
|
||||
min_score = 240
|
||||
if video.is_special:
|
||||
min_score = 180
|
||||
else:
|
||||
min_score = 60
|
||||
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
return
|
||||
languages = {Language.fromietf(language)}
|
||||
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
config.init_subliminal_patches()
|
||||
|
||||
provider_settings = config.provider_settings.copy()
|
||||
if not skip_wrong_fps:
|
||||
provider_settings = config.provider_settings.copy()
|
||||
provider_settings["opensubtitles"]["skip_wrong_fps"] = False
|
||||
|
||||
available_subs = list_all_subtitles(scanned_parts, {Language.fromietf(language)},
|
||||
providers=config.providers,
|
||||
available_subs = list_all_subtitles([video], languages,
|
||||
providers=providers,
|
||||
provider_configs=provider_settings,
|
||||
pool_class=config.provider_pool)
|
||||
pool_class=config.provider_pool,
|
||||
throttle_callback=config.provider_throttle,
|
||||
language_hook=language_hook)
|
||||
|
||||
use_hearing_impaired = Prefs['subtitles.search.hearingImpaired'] in ("prefer", "force HI")
|
||||
|
||||
# sort subtitles by score
|
||||
unsorted_subtitles = []
|
||||
for s in available_subs[video]:
|
||||
Log.Debug("Starting score computation for %s", s)
|
||||
Log.Debug(u"%s: Starting score computation for %s", self.name, s)
|
||||
try:
|
||||
matches = s.get_matches(video)
|
||||
except AttributeError:
|
||||
Log.Error("Match computation failed for %s: %s", s, traceback.format_exc())
|
||||
Log.Error(u"%s: Match computation failed for %s: %s", self.name, s, traceback.format_exc())
|
||||
continue
|
||||
|
||||
# skip wrong season/episodes
|
||||
if item_type == "episode" and not {"series", "season", "episode"}.issubset(matches):
|
||||
Log.Debug(u"%s: Skipping %s, because it doesn't match our series/episode", self.name, s)
|
||||
continue
|
||||
|
||||
unsorted_subtitles.append(
|
||||
@@ -240,7 +170,7 @@ class SubtitleListingMixin(object):
|
||||
for subtitle, score, matches in scored_subtitles:
|
||||
# check score
|
||||
if score < min_score:
|
||||
Log.Info('Score %d is below min_score (%d)', score, min_score)
|
||||
Log.Info(u'%s: Score %d is below min_score (%d)', self.name, score, min_score)
|
||||
continue
|
||||
subtitle.score = score
|
||||
subtitle.matches = matches
|
||||
@@ -257,24 +187,39 @@ class DownloadSubtitleMixin(object):
|
||||
item_type = subtitle.item_type
|
||||
part_id = subtitle.part_id
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
providers = config.get_providers(media_type="series" if item_type == "episode" else "movies")
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, providers=providers)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
pre_download_hook(subtitle)
|
||||
|
||||
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
|
||||
download_subtitles([subtitle], providers=config.providers, provider_configs=config.provider_settings,
|
||||
pool_class=config.provider_pool)
|
||||
download_subtitles([subtitle], providers=providers,
|
||||
provider_configs=config.provider_settings,
|
||||
pool_class=config.provider_pool, throttle_callback=config.provider_throttle)
|
||||
|
||||
post_download_hook(subtitle)
|
||||
|
||||
# may be redundant
|
||||
subtitle.pack_data = None
|
||||
|
||||
download_successful = False
|
||||
|
||||
if subtitle.content:
|
||||
try:
|
||||
whack_missing_parts(scanned_parts)
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode=mode, mods=config.default_mods)
|
||||
Log.Debug("Manually downloaded subtitle for: %s", rating_key)
|
||||
if mode == "m":
|
||||
Log.Debug(u"%s: Manually downloaded subtitle for: %s", self.name, rating_key)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
elif mode == "b":
|
||||
Log.Debug(u"%s: Downloaded better subtitle for: %s", self.name, rating_key)
|
||||
track_usage("Subtitle", "better", "download", 1)
|
||||
download_successful = True
|
||||
refresh_item(rating_key)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
|
||||
except:
|
||||
Log.Error("Something went wrong when downloading specific subtitle: %s", traceback.format_exc())
|
||||
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s",
|
||||
self.name, traceback.format_exc())
|
||||
finally:
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
@@ -286,6 +231,13 @@ class DownloadSubtitleMixin(object):
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle,
|
||||
mode=mode)
|
||||
history.destroy()
|
||||
|
||||
# clear missing subtitles menu data
|
||||
if not scheduler.is_task_running("MissingSubtitles"):
|
||||
scheduler.clear_task_data("MissingSubtitles")
|
||||
else:
|
||||
set_refresh_menu_state(u"%s: Subtitle download failed (%s)" % (self.name, rating_key))
|
||||
return download_successful
|
||||
|
||||
|
||||
@@ -310,16 +262,27 @@ class AvailableSubsForItem(SubtitleListingMixin, Task):
|
||||
def run(self):
|
||||
super(AvailableSubsForItem, self).run()
|
||||
self.running = True
|
||||
track_usage("Subtitle", "manual", "list", 1)
|
||||
self.data = self.list_subtitles(self.rating_key, self.item_type, self.part_id, self.language,
|
||||
skip_wrong_fps=False)
|
||||
try:
|
||||
track_usage("Subtitle", "manual", "list", 1)
|
||||
except:
|
||||
Log.Error("Something went wrong with track_usage: %s", traceback.format_exc())
|
||||
|
||||
Log.Debug("Listing available subtitles for: %s", self.rating_key)
|
||||
subs = self.list_subtitles(self.rating_key, self.item_type, self.part_id, self.language, skip_wrong_fps=False)
|
||||
if not subs:
|
||||
self.data = "found_none"
|
||||
return
|
||||
|
||||
# we can't have nasty unpicklable stuff like ZipFile, BytesIO etc in self.data
|
||||
self.data = [s.make_picklable() for s in subs]
|
||||
|
||||
def post_run(self, task_data):
|
||||
super(AvailableSubsForItem, self).post_run(task_data)
|
||||
if self.rating_key not in task_data:
|
||||
task_data[self.rating_key] = {}
|
||||
|
||||
task_data[self.rating_key][self.language] = self.data
|
||||
# clean old data
|
||||
for key in task_data.keys():
|
||||
if key != self.rating_key:
|
||||
del task_data[key]
|
||||
task_data.update({self.rating_key: {self.language: self.data}})
|
||||
|
||||
|
||||
class DownloadSubtitleForItem(DownloadSubtitleMixin, Task):
|
||||
@@ -356,11 +319,304 @@ class MissingSubtitles(Task):
|
||||
task_data["missing_subtitles"] = self.data
|
||||
|
||||
|
||||
class SearchAllRecentlyAddedMissing(Task):
|
||||
periodic = True
|
||||
|
||||
items_done = None
|
||||
items_searching = None
|
||||
percentage = 0
|
||||
|
||||
def __init__(self):
|
||||
super(SearchAllRecentlyAddedMissing, self).__init__()
|
||||
self.items_done = None
|
||||
self.items_searching = None
|
||||
self.percentage = 0
|
||||
|
||||
def signal_updated_metadata(self, *args, **kwargs):
|
||||
return True
|
||||
|
||||
def prepare(self):
|
||||
self.items_done = 0
|
||||
self.items_searching = 0
|
||||
self.percentage = 0
|
||||
self.ready_for_display = True
|
||||
|
||||
def run(self):
|
||||
super(SearchAllRecentlyAddedMissing, self).run()
|
||||
|
||||
self.running = True
|
||||
self.prepare()
|
||||
|
||||
from support.history import get_history
|
||||
history = get_history()
|
||||
|
||||
now = datetime.datetime.now()
|
||||
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
|
||||
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
|
||||
series_providers = config.get_providers(media_type="series")
|
||||
movie_providers = config.get_providers(media_type="movies")
|
||||
|
||||
is_recent_str = Prefs["scheduler.item_is_recent_age"]
|
||||
num, ident = is_recent_str.split()
|
||||
|
||||
max_search_days = 0
|
||||
if ident == "days":
|
||||
max_search_days = int(num)
|
||||
elif ident == "weeks":
|
||||
max_search_days = int(num) * 7
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
recent_files = subtitle_storage.get_recent_files(age_days=max_search_days)
|
||||
|
||||
self.items_searching = len(recent_files)
|
||||
|
||||
download_count = 0
|
||||
videos_with_downloads = 0
|
||||
|
||||
config.init_subliminal_patches()
|
||||
|
||||
Log.Info(u"%s: Searching for subtitles for %s items", self.name, self.items_searching)
|
||||
|
||||
def skip_item():
|
||||
self.items_searching = self.items_searching - 1
|
||||
self.percentage = int(self.items_done * 100 / self.items_searching)
|
||||
|
||||
# search for subtitles in viable items
|
||||
try:
|
||||
for fn in recent_files:
|
||||
stored_subs = subtitle_storage.load(filename=fn)
|
||||
video_id = stored_subs.video_id
|
||||
if not stored_subs:
|
||||
Log.Debug("Skipping item %s because storage is empty", video_id)
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
# added_date <= max_search_days?
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
Log.Debug("Skipping item %s because it's too old", video_id)
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
if stored_subs.item_type == "episode":
|
||||
min_score = min_score_series
|
||||
providers = series_providers
|
||||
else:
|
||||
min_score = min_score_movies
|
||||
providers = movie_providers
|
||||
|
||||
parts = []
|
||||
plex_item = get_item(video_id)
|
||||
|
||||
if not plex_item:
|
||||
Log.Info(u"%s: Item %s unknown, skipping", self.name, video_id)
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
if is_ignored(video_id, item=plex_item):
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
for media in plex_item.media:
|
||||
parts += media.parts
|
||||
|
||||
downloads_per_video = 0
|
||||
hit_providers = False
|
||||
for part in parts:
|
||||
part_id = part.id
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(video_id, part_id, stored_subs.item_type)
|
||||
except PartUnknownException:
|
||||
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
|
||||
continue
|
||||
|
||||
if not metadata:
|
||||
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
|
||||
continue
|
||||
|
||||
Log.Debug(u"%s: Looking for missing subtitles: %s", self.name, get_item_title(plex_item))
|
||||
scanned_parts = scan_videos([metadata], providers=providers)
|
||||
|
||||
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score,
|
||||
providers=providers)
|
||||
hit_providers = downloaded_subtitles is not None
|
||||
download_successful = False
|
||||
|
||||
if downloaded_subtitles:
|
||||
downloaded_any = any(downloaded_subtitles.values())
|
||||
if not downloaded_any:
|
||||
continue
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, downloaded_subtitles, mode="a", mods=config.default_mods)
|
||||
Log.Debug(u"%s: Downloaded subtitle for item with missing subs: %s", self.name, video_id)
|
||||
download_successful = True
|
||||
refresh_item(video_id)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
except:
|
||||
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s", self.name,
|
||||
traceback.format_exc())
|
||||
finally:
|
||||
scanned_parts = None
|
||||
try:
|
||||
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
|
||||
if download_successful:
|
||||
# store item in history
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
for subtitle in video_subtitles:
|
||||
downloads_per_video += 1
|
||||
history.add(item_title, video.id, section_title=metadata["section"],
|
||||
subtitle=subtitle,
|
||||
mode="a")
|
||||
|
||||
downloaded_subtitles = None
|
||||
except:
|
||||
Log.Error(u"%s: DEBUG HIT: %s", self.name, traceback.format_exc())
|
||||
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
Thread.Sleep(PROVIDER_SLACK)
|
||||
|
||||
download_count += downloads_per_video
|
||||
|
||||
if downloads_per_video:
|
||||
videos_with_downloads += 1
|
||||
|
||||
self.items_done = self.items_done + 1
|
||||
self.percentage = int(self.items_done * 100 / self.items_searching)
|
||||
|
||||
stored_subs = None
|
||||
|
||||
if downloads_per_video:
|
||||
Log.Debug(u"%s: Subtitles have been downloaded, "
|
||||
u"waiting %s seconds before continuing", self.name, DL_PROVIDER_SLACK)
|
||||
Thread.Sleep(DL_PROVIDER_SLACK)
|
||||
else:
|
||||
if hit_providers:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
Thread.Sleep(PROVIDER_SLACK)
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
history.destroy()
|
||||
|
||||
if download_count:
|
||||
Log.Debug(u"%s: done. Missing subtitles found for %s/%s items (%s subs downloaded)", self.name,
|
||||
videos_with_downloads, self.items_searching, download_count)
|
||||
else:
|
||||
Log.Debug(u"%s: done. No subtitles found for %s items", self.name, self.items_searching)
|
||||
|
||||
def post_run(self, task_data):
|
||||
super(SearchAllRecentlyAddedMissing, self).post_run(task_data)
|
||||
self.ready_for_display = False
|
||||
self.percentage = 0
|
||||
self.items_done = None
|
||||
self.items_searching = None
|
||||
|
||||
|
||||
class LegacySearchAllRecentlyAddedMissing(Task):
|
||||
periodic = True
|
||||
frequency = "never"
|
||||
items_done = None
|
||||
items_searching = None
|
||||
items_searching_ids = None
|
||||
items_failed = None
|
||||
percentage = 0
|
||||
|
||||
stall_time = 30
|
||||
|
||||
def __init__(self):
|
||||
super(LegacySearchAllRecentlyAddedMissing, self).__init__()
|
||||
self.items_done = None
|
||||
self.items_searching = None
|
||||
self.items_searching_ids = None
|
||||
self.items_failed = None
|
||||
self.percentage = 0
|
||||
|
||||
def signal(self, signal_name, *args, **kwargs):
|
||||
handler = getattr(self, "signal_%s" % signal_name)
|
||||
return handler(*args, **kwargs) if handler else None
|
||||
|
||||
def signal_updated_metadata(self, *args, **kwargs):
|
||||
item_id = int(args[0])
|
||||
|
||||
if self.items_searching_ids is not None and item_id in self.items_searching_ids:
|
||||
self.items_done.append(item_id)
|
||||
return True
|
||||
|
||||
def prepare(self, *args, **kwargs):
|
||||
self.items_done = []
|
||||
recent_items = get_recent_items()
|
||||
missing = items_get_all_missing_subs(recent_items, sleep_after_request=0.2)
|
||||
ids = set([id for added_at, id, title, item, missing_languages in missing if not is_ignored(id, item=item)])
|
||||
self.items_searching = missing
|
||||
self.items_searching_ids = ids
|
||||
self.items_failed = []
|
||||
self.percentage = 0
|
||||
self.ready_for_display = True
|
||||
|
||||
def run(self):
|
||||
super(LegacySearchAllRecentlyAddedMissing, self).run()
|
||||
self.running = True
|
||||
missing_count = len(self.items_searching)
|
||||
items_done_count = 0
|
||||
|
||||
for added_at, item_id, title, item, missing_languages in self.items_searching:
|
||||
Log.Debug(u"Task: %s, triggering refresh for %s (%s)", self.name, title, item_id)
|
||||
try:
|
||||
refresh_item(item_id)
|
||||
except URLError:
|
||||
# timeout
|
||||
pass
|
||||
search_started = datetime.datetime.now()
|
||||
tries = 1
|
||||
while 1:
|
||||
if item_id in self.items_done:
|
||||
items_done_count += 1
|
||||
self.percentage = int(items_done_count * 100 / missing_count)
|
||||
Log.Debug(u"Task: %s, item %s done (%s%%, %s/%s)", self.name, item_id, self.percentage,
|
||||
items_done_count, missing_count)
|
||||
break
|
||||
|
||||
# item considered stalled after self.stall_time seconds passed after last refresh
|
||||
if (datetime.datetime.now() - search_started).total_seconds() > self.stall_time:
|
||||
if tries > 3:
|
||||
self.items_failed.append(item_id)
|
||||
Log.Debug(u"Task: %s, item stalled for %s times: %s, skipping", self.name, tries, item_id)
|
||||
break
|
||||
|
||||
Log.Debug(u"Task: %s, item stalled for %s seconds: %s, retrying", self.name, self.stall_time,
|
||||
item_id)
|
||||
tries += 1
|
||||
try:
|
||||
refresh_item(item_id)
|
||||
except URLError:
|
||||
pass
|
||||
search_started = datetime.datetime.now()
|
||||
Thread.Sleep(1)
|
||||
Thread.Sleep(0.1)
|
||||
# we can't hammer the PMS, otherwise requests will be stalled
|
||||
Thread.Sleep(5)
|
||||
|
||||
Log.Debug("Task: %s, done (%s%%, %s/%s). Failed items: %s", self.name, self.percentage,
|
||||
items_done_count, missing_count, self.items_failed)
|
||||
|
||||
def post_run(self, task_data):
|
||||
super(LegacySearchAllRecentlyAddedMissing, self).post_run(task_data)
|
||||
self.ready_for_display = False
|
||||
self.percentage = 0
|
||||
self.items_done = None
|
||||
self.items_failed = None
|
||||
self.items_searching = None
|
||||
self.items_searching_ids = None
|
||||
|
||||
|
||||
class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
periodic = True
|
||||
|
||||
# TV: episode, format, series, year, season, video_codec, release_group, hearing_impaired
|
||||
series_cutoff = 355
|
||||
# TV: episode, format, series, year, season, video_codec, release_group, hearing_impaired, resolution
|
||||
series_cutoff = 357
|
||||
|
||||
# movies: format, title, release_group, year, video_codec, resolution, hearing_impaired
|
||||
movies_cutoff = 117
|
||||
@@ -375,11 +631,11 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
try:
|
||||
max_search_days = int(Prefs["scheduler.tasks.FindBetterSubtitles.max_days_after_added"].strip())
|
||||
except ValueError:
|
||||
Log.Error("Please only put numbers into the FindBetterSubtitles.max_days_after_added setting. Exiting")
|
||||
Log.Error(u"Please only put numbers into the FindBetterSubtitles.max_days_after_added setting. Exiting")
|
||||
return
|
||||
else:
|
||||
if max_search_days > 30:
|
||||
Log.Error("FindBetterSubtitles.max_days_after_added is too big. Max is 30 days.")
|
||||
Log.Error(u"%s: FindBetterSubtitles.max_days_after_added is too big. Max is 30 days.", self.name)
|
||||
return
|
||||
|
||||
now = datetime.datetime.now()
|
||||
@@ -390,108 +646,152 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
overwrite_manually_selected = cast_bool(
|
||||
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected"])
|
||||
|
||||
air_date_cutoff_pref = Prefs["scheduler.tasks.FindBetterSubtitles.air_date_cutoff"]
|
||||
if air_date_cutoff_pref == "don't limit":
|
||||
air_date_cutoff = None
|
||||
else:
|
||||
air_date_cutoff = int(air_date_cutoff_pref.split()[0])
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
recent_subs = subtitle_storage.load_recent_files(age_days=max_search_days)
|
||||
viable_item_count = 0
|
||||
|
||||
for fn, stored_subs in recent_subs.iteritems():
|
||||
video_id = stored_subs.video_id
|
||||
try:
|
||||
for fn in subtitle_storage.get_recent_files(age_days=max_search_days):
|
||||
stored_subs = subtitle_storage.load(filename=fn)
|
||||
if not stored_subs:
|
||||
continue
|
||||
|
||||
if stored_subs.item_type == "episode":
|
||||
cutoff = self.series_cutoff
|
||||
min_score = min_score_series
|
||||
else:
|
||||
cutoff = self.movies_cutoff
|
||||
min_score = min_score_movies
|
||||
video_id = stored_subs.video_id
|
||||
|
||||
# don't search for better subtitles until at least 30 minutes have passed
|
||||
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
|
||||
Log.Debug("Item %s too new, skipping", video_id)
|
||||
continue
|
||||
if stored_subs.item_type == "episode":
|
||||
cutoff = self.series_cutoff
|
||||
min_score = min_score_series
|
||||
else:
|
||||
cutoff = self.movies_cutoff
|
||||
min_score = min_score_movies
|
||||
|
||||
# added_date <= max_search_days?
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
continue
|
||||
# don't search for better subtitles until at least 30 minutes have passed
|
||||
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
|
||||
Log.Debug(u"%s: Item %s too new, skipping", self.name, video_id)
|
||||
continue
|
||||
|
||||
viable_item_count += 1
|
||||
ditch_parts = []
|
||||
# added_date <= max_search_days?
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
continue
|
||||
|
||||
# look through all stored subtitle data
|
||||
for part_id, languages in stored_subs.parts.iteritems():
|
||||
part_id = str(part_id)
|
||||
viable_item_count += 1
|
||||
ditch_parts = []
|
||||
|
||||
# all languages
|
||||
for language, current_subs in languages.iteritems():
|
||||
current_key = current_subs.get("current")
|
||||
current = current_subs.get(current_key)
|
||||
# look through all stored subtitle data
|
||||
for part_id, languages in stored_subs.parts.iteritems():
|
||||
part_id = str(part_id)
|
||||
|
||||
# currently got subtitle?
|
||||
if not current:
|
||||
continue
|
||||
current_score = current.score
|
||||
current_mode = current.mode
|
||||
# all languages
|
||||
for language, current_subs in languages.iteritems():
|
||||
current_key = current_subs.get("current")
|
||||
current = current_subs.get(current_key)
|
||||
|
||||
# late cutoff met? skip
|
||||
if current_score >= cutoff:
|
||||
Log.Debug(u"Skipping finding better subs, cutoff met (current: %s, cutoff: %s): %s (%s)",
|
||||
current_score, cutoff, stored_subs.title, video_id)
|
||||
continue
|
||||
# currently got subtitle?
|
||||
# fixme: check for existence
|
||||
if not current:
|
||||
continue
|
||||
current_score = current.score
|
||||
current_mode = current.mode
|
||||
|
||||
# got manual subtitle but don't want to touch those?
|
||||
if current_mode == "m" and not overwrite_manually_selected:
|
||||
Log.Debug(u"Skipping finding better subs, had manual: %s (%s)", stored_subs.title, video_id)
|
||||
continue
|
||||
# late cutoff met? skip
|
||||
if current_score >= cutoff:
|
||||
Log.Debug(u"%s: Skipping finding better subs, "
|
||||
u"cutoff met (current: %s, cutoff: %s): %s (%s)",
|
||||
self.name, current_score, cutoff, stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
# subtitle modifications different from default
|
||||
if not overwrite_manually_modified and current.mods \
|
||||
and set(current.mods).difference(set(config.default_mods)):
|
||||
Log.Debug(u"Skipping finding better subs, it has manual modifications: %s (%s)",
|
||||
stored_subs.title, video_id)
|
||||
continue
|
||||
# got manual subtitle but don't want to touch those?
|
||||
if current_mode == "m" and not overwrite_manually_selected:
|
||||
Log.Debug(u"%s: Skipping finding better subs, "
|
||||
u"had manual: %s (%s)", self.name, stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
try:
|
||||
subs = self.list_subtitles(video_id, stored_subs.item_type, part_id, language)
|
||||
except PartUnknownException:
|
||||
Log.Info("Part %s unknown/gone; ditching subtitle info", part_id)
|
||||
ditch_parts.append(part_id)
|
||||
continue
|
||||
# subtitle modifications different from default
|
||||
if not overwrite_manually_modified and current.mods \
|
||||
and set(current.mods).difference(set(config.default_mods)):
|
||||
Log.Debug(u"%s: Skipping finding better subs, it has manual modifications: %s (%s)",
|
||||
self.name, stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
if subs:
|
||||
# subs are already sorted by score
|
||||
better_downloaded = False
|
||||
better_tried_download = 0
|
||||
for sub in subs:
|
||||
if sub.score > current_score and sub.score > min_score:
|
||||
Log.Debug("Better subtitle found for %s, downloading", video_id)
|
||||
better_tried_download += 1
|
||||
ret = self.download_subtitle(sub, video_id, mode="b")
|
||||
if ret:
|
||||
better_found += 1
|
||||
better_downloaded = True
|
||||
break
|
||||
else:
|
||||
Log.Debug("Couldn't download/save subtitle. Continuing to the next one")
|
||||
if better_tried_download and not better_downloaded:
|
||||
Log.Debug("Tried downloading better subtitle for %s, but every try failed.", video_id)
|
||||
try:
|
||||
subs = self.list_subtitles(video_id, stored_subs.item_type, part_id, language,
|
||||
air_date_cutoff=air_date_cutoff)
|
||||
except PartUnknownException:
|
||||
Log.Info(u"%s: Part %s unknown/gone; ditching subtitle info", self.name, part_id)
|
||||
ditch_parts.append(part_id)
|
||||
continue
|
||||
|
||||
elif better_downloaded:
|
||||
Log.Debug("Better subtitle downloaded for %s", video_id)
|
||||
hit_providers = subs is not None
|
||||
|
||||
if ditch_parts:
|
||||
for part_id in ditch_parts:
|
||||
try:
|
||||
del stored_subs.parts[part_id]
|
||||
except KeyError:
|
||||
pass
|
||||
subtitle_storage.save(stored_subs)
|
||||
if subs:
|
||||
# subs are already sorted by score
|
||||
better_downloaded = False
|
||||
better_tried_download = 0
|
||||
better_visited = 0
|
||||
for sub in subs:
|
||||
if sub.score > current_score and sub.score > min_score:
|
||||
Log.Debug(u"%s: Better subtitle found for %s, downloading", self.name, video_id)
|
||||
better_tried_download += 1
|
||||
ret = self.download_subtitle(sub, video_id, mode="b")
|
||||
if ret:
|
||||
better_found += 1
|
||||
better_downloaded = True
|
||||
break
|
||||
else:
|
||||
Log.Debug(u"%s: Couldn't download/save subtitle. "
|
||||
u"Continuing to the next one", self.name)
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing",
|
||||
self.name, DL_PROVIDER_SLACK)
|
||||
Thread.Sleep(DL_PROVIDER_SLACK)
|
||||
better_visited += 1
|
||||
|
||||
time.sleep(1)
|
||||
if better_tried_download and not better_downloaded:
|
||||
Log.Debug(u"%s: Tried downloading better subtitle for %s, "
|
||||
u"but every try failed.", self.name, video_id)
|
||||
|
||||
elif better_downloaded:
|
||||
Log.Debug(u"%s: Better subtitle downloaded for %s", self.name, video_id)
|
||||
|
||||
if better_tried_download or better_downloaded:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, DL_PROVIDER_SLACK)
|
||||
Thread.Sleep(DL_PROVIDER_SLACK)
|
||||
|
||||
elif better_visited:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
Thread.Sleep(PROVIDER_SLACK)
|
||||
|
||||
subs = None
|
||||
|
||||
elif hit_providers:
|
||||
# hit the providers but didn't try downloading? wait.
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
Thread.Sleep(PROVIDER_SLACK)
|
||||
|
||||
if ditch_parts:
|
||||
for part_id in ditch_parts:
|
||||
try:
|
||||
del stored_subs.parts[part_id]
|
||||
except KeyError:
|
||||
pass
|
||||
subtitle_storage.save(stored_subs)
|
||||
ditch_parts = None
|
||||
|
||||
stored_subs = None
|
||||
|
||||
Thread.Sleep(1)
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
|
||||
if better_found:
|
||||
Log.Debug("Task: %s, done. Better subtitles found for %s/%s items", self.name, better_found,
|
||||
Log.Debug(u"%s: done. Better subtitles found for %s/%s items", self.name, better_found,
|
||||
viable_item_count)
|
||||
else:
|
||||
Log.Debug("Task: %s, done. No better subtitles found for %s items", self.name, viable_item_count)
|
||||
Log.Debug(u"%s: done. No better subtitles found for %s items", self.name, viable_item_count)
|
||||
|
||||
|
||||
class SubtitleStorageMaintenance(Task):
|
||||
@@ -501,14 +801,35 @@ class SubtitleStorageMaintenance(Task):
|
||||
def run(self):
|
||||
super(SubtitleStorageMaintenance, self).run()
|
||||
self.running = True
|
||||
Log.Info("Running subtitle storage maintenance")
|
||||
Log.Info(u"%s: Running subtitle storage maintenance", self.name)
|
||||
storage = get_subtitle_storage()
|
||||
deleted_items = storage.delete_missing_files()
|
||||
deleted_items = storage.delete_missing(wanted_languages=set(str(l) for l in config.lang_list))
|
||||
if deleted_items:
|
||||
Log.Info("Subtitle information for %d non-existant videos have been cleaned up" % len(deleted_items))
|
||||
Log.Debug("Videos: %s" % deleted_items)
|
||||
Log.Info(u"%s: Subtitle information for %d non-existant videos have been cleaned up",
|
||||
self.name, len(deleted_items))
|
||||
Log.Debug(u"%s: Videos: %s", self.name, deleted_items)
|
||||
else:
|
||||
Log.Info("Nothing to do")
|
||||
Log.Info(u"%s: Nothing to do", self.name)
|
||||
|
||||
storage.destroy()
|
||||
|
||||
|
||||
class MenuHistoryMaintenance(Task):
|
||||
periodic = True
|
||||
frequency = "every 7 days"
|
||||
|
||||
def run(self):
|
||||
super(MenuHistoryMaintenance, self).run()
|
||||
self.running = True
|
||||
Log.Info(u"%s: Running menu history maintenance", self.name)
|
||||
now = datetime.datetime.now()
|
||||
if "menu_history" in Dict:
|
||||
for key, timeout in Dict["menu_history"].copy().items():
|
||||
if now > timeout:
|
||||
try:
|
||||
del Dict["menu_history"][key]
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
class MigrateSubtitleStorage(Task):
|
||||
@@ -518,15 +839,49 @@ class MigrateSubtitleStorage(Task):
|
||||
def run(self):
|
||||
super(MigrateSubtitleStorage, self).run()
|
||||
self.running = True
|
||||
Log.Info("Running subtitle storage migration")
|
||||
Log.Info(u"%s: Running subtitle storage migration", self.name)
|
||||
storage = get_subtitle_storage()
|
||||
for fn in storage.get_all_files():
|
||||
if fn.endswith(".json.gz"):
|
||||
continue
|
||||
Log.Debug("Migrating %s", fn)
|
||||
Log.Debug(u"%s: Migrating %s", self.name, fn)
|
||||
storage.load(None, fn)
|
||||
|
||||
storage.destroy()
|
||||
|
||||
|
||||
class CacheMaintenance(Task):
|
||||
periodic = True
|
||||
frequency = "every 1 days"
|
||||
|
||||
main_cache_validity = 14 # days
|
||||
pack_cache_validity = 4 # days
|
||||
|
||||
def run(self):
|
||||
super(CacheMaintenance, self).run()
|
||||
self.running = True
|
||||
Log.Info(u"%s: Running cache maintenance", self.name)
|
||||
now = datetime.datetime.now()
|
||||
|
||||
def remove_expired(path, expiry):
|
||||
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(path))
|
||||
if mtime + datetime.timedelta(days=expiry) < now:
|
||||
try:
|
||||
os.remove(path)
|
||||
except (IOError, OSError):
|
||||
Log.Debug("Couldn't remove cache file: %s", os.path.basename(path))
|
||||
|
||||
# main cache
|
||||
if config.new_style_cache:
|
||||
for fn in subliminal_cache_region.backend.all_filenames:
|
||||
remove_expired(fn, self.main_cache_validity)
|
||||
|
||||
# archive cache
|
||||
for fn in glob.iglob(os.path.join(config.pack_cache_dir, "*.archive")):
|
||||
remove_expired(fn, self.pack_cache_validity)
|
||||
|
||||
|
||||
scheduler.register(LegacySearchAllRecentlyAddedMissing)
|
||||
scheduler.register(SearchAllRecentlyAddedMissing)
|
||||
scheduler.register(AvailableSubsForItem)
|
||||
scheduler.register(DownloadSubtitleForItem)
|
||||
@@ -534,4 +889,5 @@ scheduler.register(MissingSubtitles)
|
||||
scheduler.register(FindBetterSubtitles)
|
||||
scheduler.register(SubtitleStorageMaintenance)
|
||||
scheduler.register(MigrateSubtitleStorage)
|
||||
|
||||
scheduler.register(MenuHistoryMaintenance)
|
||||
scheduler.register(CacheMaintenance)
|
||||
|
||||
+163
-54
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"id": "langPref1",
|
||||
"id": "langPref1a",
|
||||
"label": "Subtitle Language (1)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -40,6 +40,8 @@
|
||||
"ro",
|
||||
"ru",
|
||||
"sr",
|
||||
"sr-cyrl",
|
||||
"sr-latn",
|
||||
"sk",
|
||||
"sl",
|
||||
"es",
|
||||
@@ -53,7 +55,7 @@
|
||||
"default": "en"
|
||||
},
|
||||
{
|
||||
"id": "langPref2",
|
||||
"id": "langPref2a",
|
||||
"label": "Subtitle Language (2)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -94,6 +96,8 @@
|
||||
"ro",
|
||||
"ru",
|
||||
"sr",
|
||||
"sr-cyrl",
|
||||
"sr-latn",
|
||||
"sk",
|
||||
"sl",
|
||||
"es",
|
||||
@@ -107,7 +111,7 @@
|
||||
"default": "None"
|
||||
},
|
||||
{
|
||||
"id": "langPref3",
|
||||
"id": "langPref3a",
|
||||
"label": "Subtitle Language (3)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -148,6 +152,8 @@
|
||||
"ro",
|
||||
"ru",
|
||||
"sr",
|
||||
"sr-cyrl",
|
||||
"sr-latn",
|
||||
"sk",
|
||||
"sl",
|
||||
"es",
|
||||
@@ -173,11 +179,17 @@
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.language.ietf",
|
||||
"label": "Treat IETF language tags as ISO 639-1 (e.g. pt-BR = pt)",
|
||||
"id": "subtitles.language.ietf_display",
|
||||
"label": "Display languages with country attribute as ISO 639-1 (e.g. pt-BR = pt)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.language.ietf_normalize",
|
||||
"label": "Treat languages with country attribute as ISO 639-1 (e.g. don't download pt-BR if pt subtitle exists)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.only_one",
|
||||
"label": "Restrict to one language (skips adding \".lang.\" to the subtitle filename; only uses \"Subtitle Language (1)\")",
|
||||
@@ -190,6 +202,50 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "media_rename1",
|
||||
"label": "I rename my files using",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"Sonarr/Radarr (fill api info below)",
|
||||
"Filebot",
|
||||
"Sonarr/Radarr/Filebot",
|
||||
"Symlink to original file",
|
||||
"I keep the original filenames",
|
||||
"none of the above"
|
||||
],
|
||||
"default": "I keep the original filenames"
|
||||
},
|
||||
{
|
||||
"id": "use_file_info_file",
|
||||
"label": "Retrieve original filename from .file_info/file_info index files (see wiki)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.sonarr.url",
|
||||
"label": "Sonarr URL (add URL base if configured)",
|
||||
"type": "text",
|
||||
"default": "http://127.0.0.1:8989"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.sonarr.api_key",
|
||||
"label": "Sonarr API key",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "drone_api.radarr.url",
|
||||
"label": "Radarr URL (add URL base if configured, min. version: 0.2.0.897)",
|
||||
"type": "text",
|
||||
"default": "http://127.0.0.1:7878"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.radarr.api_key",
|
||||
"label": "Radarr API key",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.enabled",
|
||||
"label": "Provider: Enable OpenSubtitles",
|
||||
@@ -198,7 +254,7 @@
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.username",
|
||||
"label": "Opensubtitles Username (VIP)",
|
||||
"label": "Opensubtitles Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
@@ -210,12 +266,24 @@
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.is_vip",
|
||||
"label": "OpenSubtitles VIP? (ad-free subs, 1000 subs/day, no-cache VIP server: http://v.ht/osvip)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.podnapisi.enabled",
|
||||
"label": "Provider: Enable Podnapisi.NET",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.titlovi.enabled",
|
||||
"label": "Provider: Enable Titlovi.com",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.enabled",
|
||||
"label": "Provider: Enable Addic7ed",
|
||||
@@ -237,7 +305,7 @@
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.boost_by1",
|
||||
"id": "provider.addic7ed.boost_by2",
|
||||
"label": "Addic7ed: boost score (if requirements met)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -260,12 +328,13 @@
|
||||
"25",
|
||||
"21",
|
||||
"20",
|
||||
"19",
|
||||
"15",
|
||||
"10",
|
||||
"5",
|
||||
"0"
|
||||
],
|
||||
"default": "21"
|
||||
"default": "19"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.use_random_agents",
|
||||
@@ -275,7 +344,7 @@
|
||||
},
|
||||
{
|
||||
"id": "provider.legendastv.enabled",
|
||||
"label": "Provider: Enable Legendas TV (mostly pt-BR)",
|
||||
"label": "Provider: Enable Legendas TV (mostly pt-BR; UNRAR NEEDED)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
@@ -306,31 +375,11 @@
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.shooter.enabled",
|
||||
"label": "Provider: Enable Shooter.cn (Chinese)",
|
||||
"id": "provider.subscene.enabled",
|
||||
"label": "Provider: Enable SubScene",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.subscenter.enabled",
|
||||
"label": "Provider: Enable SubsCenter (Hebrew)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.subscenter.username",
|
||||
"label": "SubsCenter Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.subscenter.password",
|
||||
"label": "SubsCenter Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "providers.multithreading",
|
||||
"label": "Search enabled providers simultaneously (multithreading)",
|
||||
@@ -338,32 +387,26 @@
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.use_tags",
|
||||
"label": "I keep the exact (release-) filename of my media files",
|
||||
"id": "subtitles.embedded.autoextract",
|
||||
"label": "Automatically extract and use embedded subtitles upon media addition (with configured default mods)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.embedded",
|
||||
"label": "Scan: include embedded subtitles (in the media file (MKV/MP4), don't download if existing)",
|
||||
"label": "Don't search for subtitles of a language if there are embedded subtitles inside the media file (MKV/MP4)?",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.external",
|
||||
"label": "Scan: include external subtitles (metadata/filesystem, don't download if existing)",
|
||||
"label": "Don't search for subtitles of a language if they already exist on the filesystem (metadata/filesystem)?",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.exotic_ext",
|
||||
"label": "Scan: include \"exotic\" external subtitle formats (anything else than .srt/.ssa/.ass)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.filename_strictness",
|
||||
"label": "Scan: which external subtitles should be picked up?",
|
||||
"label": "How strict should these subtitles existing on the filesystem be detected?",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"exact: media filename match",
|
||||
@@ -372,6 +415,12 @@
|
||||
],
|
||||
"default": "loose: filename contains media filename"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.exotic_ext",
|
||||
"label": "Include non-text subtitle formats (anything else than .srt/.ssa/.ass/.vtt; embedded or external) in the above?",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.search.minimumTVScore2",
|
||||
"label": "Minimum score for TV (min: 240, def/sane: 337, min-ideal: 352; see http://v.ht/szscores)",
|
||||
@@ -402,6 +451,12 @@
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.remove_tags",
|
||||
"label": "Remove style tags from downloaded subtitles (bold, italic, underline, colors, ...)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.fix_common",
|
||||
"label": "Fix common whitespace/punctuation issues in subtitles",
|
||||
@@ -414,12 +469,6 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.enforce_encoding",
|
||||
"label": "Normalize subtitle encoding to UTF-8",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.colors",
|
||||
"label": "Change colors of subtitles to",
|
||||
@@ -451,6 +500,17 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.save.formats",
|
||||
"label": "Subtitle formats to save (non-SRT only works if the previous option is enabled)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"SRT",
|
||||
"VTT",
|
||||
"SRT+VTT"
|
||||
],
|
||||
"default": "SRT"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.save.subFolder",
|
||||
"label": "Subtitle Folder (\"current folder\" is the folder the current media file lives in)",
|
||||
@@ -496,7 +556,8 @@
|
||||
"never",
|
||||
"current media item",
|
||||
"next episode (series)",
|
||||
"hybrid: current item or next episode"
|
||||
"hybrid: current item or next episode",
|
||||
"hybrid-plus: current item and next episode"
|
||||
],
|
||||
"default": "never"
|
||||
},
|
||||
@@ -506,8 +567,6 @@
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"never",
|
||||
"every 1 hours",
|
||||
"every 3 hours",
|
||||
"every 6 hours",
|
||||
"every 12 hours",
|
||||
"every 24 hours"
|
||||
@@ -528,7 +587,8 @@
|
||||
"3 weeks",
|
||||
"4 weeks",
|
||||
"5 weeks",
|
||||
"6 weeks"
|
||||
"6 weeks",
|
||||
"12 weeks"
|
||||
],
|
||||
"default": "2 weeks"
|
||||
},
|
||||
@@ -556,6 +616,25 @@
|
||||
"type": "text",
|
||||
"default": "7"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.air_date_cutoff",
|
||||
"label": "Scheduler: Don't search for better subtitles if the item's air date is older than",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"don't limit",
|
||||
"1 year",
|
||||
"2 years",
|
||||
"3 years",
|
||||
"4 years",
|
||||
"5 years",
|
||||
"6 years",
|
||||
"7 years",
|
||||
"8 years",
|
||||
"9 years",
|
||||
"10 years"
|
||||
],
|
||||
"default": "1 year"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected",
|
||||
"label": "Scheduler: Overwrite manually selected subtitles when better found",
|
||||
@@ -643,7 +722,7 @@
|
||||
},
|
||||
{
|
||||
"id": "notify_executable",
|
||||
"label": "Call this executable upon successful subtitle download",
|
||||
"label": "Call this executable upon successful subtitle download (see Wiki for details)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
@@ -653,6 +732,30 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "new_style_cache",
|
||||
"label": "Use new style caching (for subliminal)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "low_impact_mode",
|
||||
"label": "Low impact mode (for remote filesystems)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "pms_request_timeout",
|
||||
"label": "Timeout for API requests sent to the PMS",
|
||||
"type": "text",
|
||||
"default": "15"
|
||||
},
|
||||
{
|
||||
"id": "proxy",
|
||||
"label": "HTTP proxy to use for providers (supports credentials)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "log_level",
|
||||
"label": "How verbose should the logging be?",
|
||||
@@ -666,6 +769,12 @@
|
||||
],
|
||||
"default": "WARNING"
|
||||
},
|
||||
{
|
||||
"id": "log_rotate_keep",
|
||||
"label": "How many log backups to keep?",
|
||||
"type": "text",
|
||||
"default": "5"
|
||||
},
|
||||
{
|
||||
"id": "log_debug_mods",
|
||||
"label": "Log subtitle modification (debug)",
|
||||
|
||||
+4
-4
@@ -9,11 +9,11 @@
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>2.0.19</string>
|
||||
<string>2.5.0</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>2.0.19.1337</string>
|
||||
<string>2.5.0.2247</string>
|
||||
<key>PlexFrameworkVersion</key>
|
||||
<string>2</string>
|
||||
<key>PlexPluginClass</key>
|
||||
@@ -23,7 +23,7 @@
|
||||
<key>PlexPluginConsoleLogging</key>
|
||||
<string>0</string>
|
||||
<key>PlexPluginDevMode</key>
|
||||
<string>1</string>
|
||||
<string>0</string>
|
||||
<key>PlexPluginCodePolicy</key>
|
||||
<!-- this allows channels to access some python methods which are otherwise blocked, as well as import external code libraries, and interact with the PMS HTTP API -->
|
||||
<string>Elevated</string>
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
<h1>Sub-Zero for Plex</h1><i>Subtitles done right</i>
|
||||
|
||||
Version 2.0.19.1337 RC8
|
||||
Version 2.5.0.2247
|
||||
|
||||
Originally based on @bramwalet's awesome <a href="https://github.com/bramwalet/Subliminal.bundle">Subliminal.bundle</a>
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,85 @@
|
||||
"""Generic interface to all dbm clones.
|
||||
|
||||
Instead of
|
||||
|
||||
import dbm
|
||||
d = dbm.open(file, 'w', 0666)
|
||||
|
||||
use
|
||||
|
||||
import anydbm
|
||||
d = anydbm.open(file, 'w')
|
||||
|
||||
The returned object is a dbhash, gdbm, dbm or dumbdbm object,
|
||||
dependent on the type of database being opened (determined by whichdb
|
||||
module) in the case of an existing dbm. If the dbm does not exist and
|
||||
the create or new flag ('c' or 'n') was specified, the dbm type will
|
||||
be determined by the availability of the modules (tested in the above
|
||||
order).
|
||||
|
||||
It has the following interface (key and data are strings):
|
||||
|
||||
d[key] = data # store data at key (may override data at
|
||||
# existing key)
|
||||
data = d[key] # retrieve data at key (raise KeyError if no
|
||||
# such key)
|
||||
del d[key] # delete data stored at key (raises KeyError
|
||||
# if no such key)
|
||||
flag = key in d # true if the key exists
|
||||
list = d.keys() # return a list of all existing keys (slow!)
|
||||
|
||||
Future versions may change the order in which implementations are
|
||||
tested for existence, and add interfaces to other dbm-like
|
||||
implementations.
|
||||
"""
|
||||
|
||||
class error(Exception):
|
||||
pass
|
||||
|
||||
_names = ['dbhash', 'gdbm', 'dbm', 'dumbdbm']
|
||||
_errors = [error]
|
||||
_defaultmod = None
|
||||
|
||||
for _name in _names:
|
||||
try:
|
||||
_mod = __import__(_name)
|
||||
except ImportError:
|
||||
continue
|
||||
if not _defaultmod:
|
||||
_defaultmod = _mod
|
||||
_errors.append(_mod.error)
|
||||
|
||||
if not _defaultmod:
|
||||
raise ImportError, "no dbm clone found; tried %s" % _names
|
||||
|
||||
error = tuple(_errors)
|
||||
|
||||
def open(file, flag='r', mode=0666):
|
||||
"""Open or create database at path given by *file*.
|
||||
|
||||
Optional argument *flag* can be 'r' (default) for read-only access, 'w'
|
||||
for read-write access of an existing database, 'c' for read-write access
|
||||
to a new or existing database, and 'n' for read-write access to a new
|
||||
database.
|
||||
|
||||
Note: 'r' and 'w' fail if the database doesn't exist; 'c' creates it
|
||||
only if it doesn't exist; and 'n' always creates a new database.
|
||||
"""
|
||||
|
||||
# guess the type of an existing database
|
||||
from whichdb import whichdb
|
||||
result=whichdb(file)
|
||||
if result is None:
|
||||
# db doesn't exist
|
||||
if 'c' in flag or 'n' in flag:
|
||||
# file doesn't exist and the new
|
||||
# flag was used so use default type
|
||||
mod = _defaultmod
|
||||
else:
|
||||
raise error, "need 'c' or 'n' flag to open new db"
|
||||
elif result == "":
|
||||
# db type cannot be determined
|
||||
raise error, "db type could not be determined"
|
||||
else:
|
||||
mod = __import__(result)
|
||||
return mod.open(file, flag, mode)
|
||||
@@ -4,7 +4,6 @@
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from . import basestr
|
||||
|
||||
@@ -5,26 +5,31 @@ http://www.crummy.com/software/BeautifulSoup/
|
||||
|
||||
Beautiful Soup uses a pluggable XML or HTML parser to parse a
|
||||
(possibly invalid) document into a tree representation. Beautiful Soup
|
||||
provides provides methods and Pythonic idioms that make it easy to
|
||||
navigate, search, and modify the parse tree.
|
||||
provides methods and Pythonic idioms that make it easy to navigate,
|
||||
search, and modify the parse tree.
|
||||
|
||||
Beautiful Soup works with Python 2.6 and up. It works better if lxml
|
||||
Beautiful Soup works with Python 2.7 and up. It works better if lxml
|
||||
and/or html5lib is installed.
|
||||
|
||||
For more than you ever wanted to know about Beautiful Soup, see the
|
||||
documentation:
|
||||
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
|
||||
"""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.4.1"
|
||||
__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
|
||||
__version__ = "4.6.0"
|
||||
__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = ['BeautifulSoup']
|
||||
|
||||
import os
|
||||
import re
|
||||
import traceback
|
||||
import warnings
|
||||
|
||||
from .builder import builder_registry, ParserRejectedMarkup
|
||||
@@ -77,7 +82,7 @@ class BeautifulSoup(Tag):
|
||||
|
||||
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
|
||||
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
|
||||
|
||||
def __init__(self, markup="", features=None, builder=None,
|
||||
parse_only=None, from_encoding=None, exclude_encodings=None,
|
||||
@@ -137,6 +142,10 @@ class BeautifulSoup(Tag):
|
||||
from_encoding = from_encoding or deprecated_argument(
|
||||
"fromEncoding", "from_encoding")
|
||||
|
||||
if from_encoding and isinstance(markup, unicode):
|
||||
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
|
||||
from_encoding = None
|
||||
|
||||
if len(kwargs) > 0:
|
||||
arg = kwargs.keys().pop()
|
||||
raise TypeError(
|
||||
@@ -161,19 +170,29 @@ class BeautifulSoup(Tag):
|
||||
markup_type = "XML"
|
||||
else:
|
||||
markup_type = "HTML"
|
||||
|
||||
caller = traceback.extract_stack()[0]
|
||||
filename = caller[0]
|
||||
line_number = caller[1]
|
||||
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
|
||||
filename=filename,
|
||||
line_number=line_number,
|
||||
parser=builder.NAME,
|
||||
markup_type=markup_type))
|
||||
|
||||
self.builder = builder
|
||||
self.is_xml = builder.is_xml
|
||||
self.known_xml = self.is_xml
|
||||
self.builder.soup = self
|
||||
|
||||
self.parse_only = parse_only
|
||||
|
||||
if hasattr(markup, 'read'): # It's a file-type object.
|
||||
markup = markup.read()
|
||||
elif len(markup) <= 256:
|
||||
elif len(markup) <= 256 and (
|
||||
(isinstance(markup, bytes) and not b'<' in markup)
|
||||
or (isinstance(markup, unicode) and not u'<' in markup)
|
||||
):
|
||||
# Print out warnings for a couple beginner problems
|
||||
# involving passing non-markup to Beautiful Soup.
|
||||
# Beautiful Soup will still parse the input as markup,
|
||||
@@ -195,16 +214,10 @@ class BeautifulSoup(Tag):
|
||||
if isinstance(markup, unicode):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
|
||||
if markup[:5] == "http:" or markup[:6] == "https:":
|
||||
# TODO: This is ugly but I couldn't get it to work in
|
||||
# Python 3 otherwise.
|
||||
if ((isinstance(markup, bytes) and not b' ' in markup)
|
||||
or (isinstance(markup, unicode) and not u' ' in markup)):
|
||||
if isinstance(markup, unicode):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
|
||||
'"%s" looks like a filename, not markup. You should'
|
||||
' probably open this file and pass the filehandle into'
|
||||
' Beautiful Soup.' % markup)
|
||||
self._check_markup_is_url(markup)
|
||||
|
||||
for (self.markup, self.original_encoding, self.declared_html_encoding,
|
||||
self.contains_replacement_characters) in (
|
||||
@@ -223,15 +236,52 @@ class BeautifulSoup(Tag):
|
||||
self.builder.soup = None
|
||||
|
||||
def __copy__(self):
|
||||
return type(self)(self.encode(), builder=self.builder)
|
||||
copy = type(self)(
|
||||
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
|
||||
)
|
||||
|
||||
# Although we encoded the tree to UTF-8, that may not have
|
||||
# been the encoding of the original markup. Set the copy's
|
||||
# .original_encoding to reflect the original object's
|
||||
# .original_encoding.
|
||||
copy.original_encoding = self.original_encoding
|
||||
return copy
|
||||
|
||||
def __getstate__(self):
|
||||
# Frequently a tree builder can't be pickled.
|
||||
d = dict(self.__dict__)
|
||||
if 'builder' in d and not self.builder.picklable:
|
||||
del d['builder']
|
||||
d['builder'] = None
|
||||
return d
|
||||
|
||||
@staticmethod
|
||||
def _check_markup_is_url(markup):
|
||||
"""
|
||||
Check if markup looks like it's actually a url and raise a warning
|
||||
if so. Markup can be unicode or str (py2) / bytes (py3).
|
||||
"""
|
||||
if isinstance(markup, bytes):
|
||||
space = b' '
|
||||
cant_start_with = (b"http:", b"https:")
|
||||
elif isinstance(markup, unicode):
|
||||
space = u' '
|
||||
cant_start_with = (u"http:", u"https:")
|
||||
else:
|
||||
return
|
||||
|
||||
if any(markup.startswith(prefix) for prefix in cant_start_with):
|
||||
if not space in markup:
|
||||
if isinstance(markup, bytes):
|
||||
decoded_markup = markup.decode('utf-8', 'replace')
|
||||
else:
|
||||
decoded_markup = markup
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an'
|
||||
' HTTP client. You should probably use an HTTP client like'
|
||||
' requests to get the document behind the URL, and feed'
|
||||
' that document to Beautiful Soup.' % decoded_markup
|
||||
)
|
||||
|
||||
def _feed(self):
|
||||
# Convert the document to Unicode.
|
||||
self.builder.reset()
|
||||
@@ -335,7 +385,18 @@ class BeautifulSoup(Tag):
|
||||
if parent.next_sibling:
|
||||
# This node is being inserted into an element that has
|
||||
# already been parsed. Deal with any dangling references.
|
||||
index = parent.contents.index(o)
|
||||
index = len(parent.contents)-1
|
||||
while index >= 0:
|
||||
if parent.contents[index] is o:
|
||||
break
|
||||
index -= 1
|
||||
else:
|
||||
raise ValueError(
|
||||
"Error building tree: supposedly %r was inserted "
|
||||
"into %r after the fact, but I don't see it!" % (
|
||||
o, parent
|
||||
)
|
||||
)
|
||||
if index == 0:
|
||||
previous_element = parent
|
||||
previous_sibling = None
|
||||
@@ -387,7 +448,7 @@ class BeautifulSoup(Tag):
|
||||
"""Push a start tag on to the stack.
|
||||
|
||||
If this method returns None, the tag was rejected by the
|
||||
SoupStrainer. You should proceed as if the tag had not occured
|
||||
SoupStrainer. You should proceed as if the tag had not occurred
|
||||
in the document. For instance, if this was a self-closing tag,
|
||||
don't call handle_endtag.
|
||||
"""
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
import sys
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
HTMLAwareEntitySubstitution,
|
||||
whitespace_re
|
||||
)
|
||||
|
||||
@@ -227,9 +231,14 @@ class HTMLTreeBuilder(TreeBuilder):
|
||||
Such as which tags are empty-element tags.
|
||||
"""
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
|
||||
'spacer', 'link', 'frame', 'base'])
|
||||
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
|
||||
empty_element_tags = set([
|
||||
# These are from HTML5.
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
|
||||
# These are from HTML4, removed in HTML5.
|
||||
'spacer', 'frame'
|
||||
])
|
||||
|
||||
# The HTML standard defines these attributes as containing a
|
||||
# space-separated list of values, not a single value. That is,
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__all__ = [
|
||||
'HTML5TreeBuilder',
|
||||
]
|
||||
|
||||
from pdb import set_trace
|
||||
import warnings
|
||||
import re
|
||||
from bs4.builder import (
|
||||
PERMISSIVE,
|
||||
HTML,
|
||||
@@ -15,7 +18,10 @@ from bs4.element import (
|
||||
whitespace_re,
|
||||
)
|
||||
import html5lib
|
||||
from html5lib.constants import namespaces
|
||||
from html5lib.constants import (
|
||||
namespaces,
|
||||
prefixes,
|
||||
)
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
@@ -23,6 +29,15 @@ from bs4.element import (
|
||||
Tag,
|
||||
)
|
||||
|
||||
try:
|
||||
# Pre-0.99999999
|
||||
from html5lib.treebuilders import _base as treebuilder_base
|
||||
new_html5lib = False
|
||||
except ImportError, e:
|
||||
# 0.99999999 and up
|
||||
from html5lib.treebuilders import base as treebuilder_base
|
||||
new_html5lib = True
|
||||
|
||||
class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
"""Use html5lib to build a tree."""
|
||||
|
||||
@@ -47,7 +62,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
if self.soup.parse_only is not None:
|
||||
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
|
||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||
doc = parser.parse(markup, encoding=self.user_specified_encoding)
|
||||
|
||||
extra_kwargs = dict()
|
||||
if not isinstance(markup, unicode):
|
||||
if new_html5lib:
|
||||
extra_kwargs['override_encoding'] = self.user_specified_encoding
|
||||
else:
|
||||
extra_kwargs['encoding'] = self.user_specified_encoding
|
||||
doc = parser.parse(markup, **extra_kwargs)
|
||||
|
||||
# Set the character encoding detected by the tokenizer.
|
||||
if isinstance(markup, unicode):
|
||||
@@ -55,11 +77,17 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
# charEncoding to UTF-8 if it gets Unicode input.
|
||||
doc.original_encoding = None
|
||||
else:
|
||||
doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
if not isinstance(original_encoding, basestring):
|
||||
# In 0.99999999 and up, the encoding is an html5lib
|
||||
# Encoding object. We want to use a string for compatibility
|
||||
# with other tree builders.
|
||||
original_encoding = original_encoding.name
|
||||
doc.original_encoding = original_encoding
|
||||
|
||||
def create_treebuilder(self, namespaceHTMLElements):
|
||||
self.underlying_builder = TreeBuilderForHtml5lib(
|
||||
self.soup, namespaceHTMLElements)
|
||||
namespaceHTMLElements, self.soup)
|
||||
return self.underlying_builder
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
@@ -67,10 +95,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
return u'<html><head></head><body>%s</body></html>' % fragment
|
||||
|
||||
|
||||
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
||||
|
||||
def __init__(self, soup, namespaceHTMLElements):
|
||||
self.soup = soup
|
||||
def __init__(self, namespaceHTMLElements, soup=None):
|
||||
if soup:
|
||||
self.soup = soup
|
||||
else:
|
||||
from bs4 import BeautifulSoup
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
|
||||
|
||||
def documentClass(self):
|
||||
@@ -93,7 +125,8 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
return TextNode(Comment(data), self.soup)
|
||||
|
||||
def fragmentClass(self):
|
||||
self.soup = BeautifulSoup("")
|
||||
from bs4 import BeautifulSoup
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
self.soup.name = "[document_fragment]"
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
@@ -105,7 +138,57 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
return self.soup
|
||||
|
||||
def getFragment(self):
|
||||
return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
|
||||
return treebuilder_base.TreeBuilder.getFragment(self).element
|
||||
|
||||
def testSerializer(self, element):
|
||||
from bs4 import BeautifulSoup
|
||||
rv = []
|
||||
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if isinstance(element, BeautifulSoup):
|
||||
pass
|
||||
if isinstance(element, Doctype):
|
||||
m = doctype_re.match(element)
|
||||
if m:
|
||||
name = m.group(1)
|
||||
if m.lastindex > 1:
|
||||
publicId = m.group(2) or ""
|
||||
systemId = m.group(3) or m.group(4) or ""
|
||||
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
|
||||
(' ' * indent, name, publicId, systemId))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
|
||||
elif isinstance(element, Comment):
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element))
|
||||
elif isinstance(element, NavigableString):
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
||||
else:
|
||||
if element.namespace:
|
||||
name = "%s %s" % (prefixes[element.namespace],
|
||||
element.name)
|
||||
else:
|
||||
name = element.name
|
||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||
if element.attrs:
|
||||
attributes = []
|
||||
for name, value in element.attrs.items():
|
||||
if isinstance(name, NamespacedAttribute):
|
||||
name = "%s %s" % (prefixes[name.namespace], name.name)
|
||||
if isinstance(value, list):
|
||||
value = " ".join(value)
|
||||
attributes.append((name, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
indent += 2
|
||||
for child in element.children:
|
||||
serializeElement(child, indent)
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
class AttrList(object):
|
||||
def __init__(self, element):
|
||||
@@ -137,9 +220,9 @@ class AttrList(object):
|
||||
return name in list(self.attrs.keys())
|
||||
|
||||
|
||||
class Element(html5lib.treebuilders._base.Node):
|
||||
class Element(treebuilder_base.Node):
|
||||
def __init__(self, element, soup, namespace):
|
||||
html5lib.treebuilders._base.Node.__init__(self, element.name)
|
||||
treebuilder_base.Node.__init__(self, element.name)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
self.namespace = namespace
|
||||
@@ -158,8 +241,10 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
child = node
|
||||
elif node.element.__class__ == NavigableString:
|
||||
string_child = child = node.element
|
||||
node.parent = self
|
||||
else:
|
||||
child = node.element
|
||||
node.parent = self
|
||||
|
||||
if not isinstance(child, basestring) and child.parent is not None:
|
||||
node.element.extract()
|
||||
@@ -197,6 +282,8 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
most_recent_element=most_recent_element)
|
||||
|
||||
def getAttributes(self):
|
||||
if isinstance(self.element, Comment):
|
||||
return {}
|
||||
return AttrList(self.element)
|
||||
|
||||
def setAttributes(self, attributes):
|
||||
@@ -224,11 +311,11 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
if insertBefore:
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
self.insertBefore(data, insertBefore)
|
||||
self.insertBefore(text, insertBefore)
|
||||
else:
|
||||
self.appendChild(data)
|
||||
self.appendChild(text)
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = self.element.index(refNode.element)
|
||||
@@ -250,6 +337,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
# print "MOVE", self.element.contents
|
||||
# print "FROM", self.element
|
||||
# print "TO", new_parent.element
|
||||
|
||||
element = self.element
|
||||
new_parent_element = new_parent.element
|
||||
# Determine what this tag's next_element will be once all the children
|
||||
@@ -268,7 +356,6 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
new_parents_last_descendant_next_element = new_parent_element.next_element
|
||||
|
||||
to_append = element.contents
|
||||
append_after = new_parent_element.contents
|
||||
if len(to_append) > 0:
|
||||
# Set the first child's previous_element and previous_sibling
|
||||
# to elements within the new parent
|
||||
@@ -285,12 +372,19 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
if new_parents_last_child:
|
||||
new_parents_last_child.next_sibling = first_child
|
||||
|
||||
# Fix the last child's next_element and next_sibling
|
||||
last_child = to_append[-1]
|
||||
last_child.next_element = new_parents_last_descendant_next_element
|
||||
# Find the very last element being moved. It is now the
|
||||
# parent's last descendant. It has no .next_sibling and
|
||||
# its .next_element is whatever the previous last
|
||||
# descendant had.
|
||||
last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
|
||||
|
||||
last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
|
||||
if new_parents_last_descendant_next_element:
|
||||
new_parents_last_descendant_next_element.previous_element = last_child
|
||||
last_child.next_sibling = None
|
||||
# TODO: This code has no test coverage and I'm not sure
|
||||
# how to get html5lib to go through this path, but it's
|
||||
# just the other side of the previous line.
|
||||
new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
|
||||
last_childs_last_descendant.next_sibling = None
|
||||
|
||||
for child in to_append:
|
||||
child.parent = new_parent_element
|
||||
@@ -324,7 +418,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
|
||||
class TextNode(Element):
|
||||
def __init__(self, element, soup):
|
||||
html5lib.treebuilders._base.Node.__init__(self, None)
|
||||
treebuilder_base.Node.__init__(self, None)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__all__ = [
|
||||
'HTMLParserTreeBuilder',
|
||||
]
|
||||
@@ -49,7 +52,31 @@ from bs4.builder import (
|
||||
HTMLPARSER = 'html.parser'
|
||||
|
||||
class BeautifulSoupHTMLParser(HTMLParser):
|
||||
def handle_starttag(self, name, attrs):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
HTMLParser.__init__(self, *args, **kwargs)
|
||||
|
||||
# Keep a list of empty-element tags that were encountered
|
||||
# without an explicit closing tag. If we encounter a closing tag
|
||||
# of this type, we'll associate it with one of those entries.
|
||||
#
|
||||
# This isn't a stack because we don't care about the
|
||||
# order. It's a list of closing tags we've already handled and
|
||||
# will ignore, assuming they ever show up.
|
||||
self.already_closed_empty_element = []
|
||||
|
||||
def handle_startendtag(self, name, attrs):
|
||||
# This is only called when the markup looks like
|
||||
# <tag/>.
|
||||
|
||||
# is_startend() tells handle_starttag not to close the tag
|
||||
# just because its name matches a known empty-element tag. We
|
||||
# know that this is an empty-element tag and we want to call
|
||||
# handle_endtag ourselves.
|
||||
tag = self.handle_starttag(name, attrs, handle_empty_element=False)
|
||||
self.handle_endtag(name)
|
||||
|
||||
def handle_starttag(self, name, attrs, handle_empty_element=True):
|
||||
# XXX namespace
|
||||
attr_dict = {}
|
||||
for key, value in attrs:
|
||||
@@ -59,10 +86,34 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
||||
value = ''
|
||||
attr_dict[key] = value
|
||||
attrvalue = '""'
|
||||
self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
#print "START", name
|
||||
tag = self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
if tag and tag.is_empty_element and handle_empty_element:
|
||||
# Unlike other parsers, html.parser doesn't send separate end tag
|
||||
# events for empty-element tags. (It's handled in
|
||||
# handle_startendtag, but only if the original markup looked like
|
||||
# <tag/>.)
|
||||
#
|
||||
# So we need to call handle_endtag() ourselves. Since we
|
||||
# know the start event is identical to the end event, we
|
||||
# don't want handle_endtag() to cross off any previous end
|
||||
# events for tags of this name.
|
||||
self.handle_endtag(name, check_already_closed=False)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self.soup.handle_endtag(name)
|
||||
# But we might encounter an explicit closing tag for this tag
|
||||
# later on. If so, we want to ignore it.
|
||||
self.already_closed_empty_element.append(name)
|
||||
|
||||
def handle_endtag(self, name, check_already_closed=True):
|
||||
#print "END", name
|
||||
if check_already_closed and name in self.already_closed_empty_element:
|
||||
# This is a redundant end tag for an empty-element tag.
|
||||
# We've already called handle_endtag() for it, so just
|
||||
# check it off the list.
|
||||
# print "ALREADY CLOSED", name
|
||||
self.already_closed_empty_element.remove(name)
|
||||
else:
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.soup.handle_data(data)
|
||||
@@ -166,6 +217,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
warnings.warn(RuntimeWarning(
|
||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
||||
raise e
|
||||
parser.already_closed_empty_element = []
|
||||
|
||||
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
|
||||
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__all__ = [
|
||||
'LXMLTreeBuilderForXML',
|
||||
'LXMLTreeBuilder',
|
||||
@@ -12,6 +14,7 @@ from bs4.element import (
|
||||
Doctype,
|
||||
NamespacedAttribute,
|
||||
ProcessingInstruction,
|
||||
XMLProcessingInstruction,
|
||||
)
|
||||
from bs4.builder import (
|
||||
FAST,
|
||||
@@ -29,6 +32,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
DEFAULT_PARSER_CLASS = etree.XMLParser
|
||||
|
||||
is_xml = True
|
||||
processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
NAME = "lxml-xml"
|
||||
ALTERNATE_NAMES = ["xml"]
|
||||
@@ -87,6 +91,16 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
|
||||
Each 4-tuple represents a strategy for parsing the document.
|
||||
"""
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
if is_html:
|
||||
self.processing_instruction_class = ProcessingInstruction
|
||||
else:
|
||||
self.processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
if isinstance(markup, unicode):
|
||||
# We were given Unicode. Maybe lxml can parse Unicode on
|
||||
# this system?
|
||||
@@ -98,11 +112,6 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
yield (markup.encode("utf8"), "utf8",
|
||||
document_declared_encoding, False)
|
||||
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
detector = EncodingDetector(
|
||||
markup, try_encodings, is_html, exclude_encodings)
|
||||
@@ -201,7 +210,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
def pi(self, target, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(target + ' ' + data)
|
||||
self.soup.endData(ProcessingInstruction)
|
||||
self.soup.endData(self.processing_instruction_class)
|
||||
|
||||
def data(self, content):
|
||||
self.soup.handle_data(content)
|
||||
@@ -229,6 +238,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
|
||||
|
||||
features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
|
||||
is_xml = False
|
||||
processing_instruction_class = ProcessingInstruction
|
||||
|
||||
def default_parser(self, encoding):
|
||||
return etree.HTMLParser
|
||||
|
||||
@@ -6,9 +6,10 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal
|
||||
Feed Parser. It works best on XML and HTML, but it does not rewrite the
|
||||
XML or HTML to reflect a new encoding; that's the tree builder's job.
|
||||
"""
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
from pdb import set_trace
|
||||
import codecs
|
||||
from htmlentitydefs import codepoint2name
|
||||
import re
|
||||
@@ -309,7 +310,7 @@ class EncodingDetector:
|
||||
else:
|
||||
xml_endpos = 1024
|
||||
html_endpos = max(2048, int(len(markup) * 0.05))
|
||||
|
||||
|
||||
declared_encoding = None
|
||||
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
|
||||
if not declared_encoding_match and is_html:
|
||||
@@ -346,7 +347,7 @@ class UnicodeDammit:
|
||||
self.tried_encodings = []
|
||||
self.contains_replacement_characters = False
|
||||
self.is_html = is_html
|
||||
|
||||
self.log = logging.getLogger(__name__)
|
||||
self.detector = EncodingDetector(
|
||||
markup, override_encodings, is_html, exclude_encodings)
|
||||
|
||||
@@ -376,9 +377,10 @@ class UnicodeDammit:
|
||||
if encoding != "ascii":
|
||||
u = self._convert_from(encoding, "replace")
|
||||
if u is not None:
|
||||
logging.warning(
|
||||
self.log.warning(
|
||||
"Some characters could not be decoded, and were "
|
||||
"replaced with REPLACEMENT CHARACTER.")
|
||||
"replaced with REPLACEMENT CHARACTER."
|
||||
)
|
||||
self.contains_replacement_characters = True
|
||||
break
|
||||
|
||||
@@ -734,7 +736,7 @@ class UnicodeDammit:
|
||||
0xde : b'\xc3\x9e', # Þ
|
||||
0xdf : b'\xc3\x9f', # ß
|
||||
0xe0 : b'\xc3\xa0', # à
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe2 : b'\xc3\xa2', # â
|
||||
0xe3 : b'\xc3\xa3', # ã
|
||||
0xe4 : b'\xc3\xa4', # ä
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Diagnostic functions, mainly for use when doing tech support."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
import cProfile
|
||||
@@ -56,7 +58,8 @@ def diagnose(data):
|
||||
data = data.read()
|
||||
elif os.path.exists(data):
|
||||
print '"%s" looks like a filename. Reading data from the file.' % data
|
||||
data = open(data).read()
|
||||
with open(data) as fp:
|
||||
data = fp.read()
|
||||
elif data.startswith("http:") or data.startswith("https:"):
|
||||
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
|
||||
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
from pdb import set_trace
|
||||
import collections
|
||||
import re
|
||||
import shlex
|
||||
import sys
|
||||
import warnings
|
||||
from bs4.dammit import EntitySubstitution
|
||||
@@ -99,6 +101,8 @@ class HTMLAwareEntitySubstitution(EntitySubstitution):
|
||||
|
||||
preformatted_tags = set(["pre"])
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
|
||||
@classmethod
|
||||
def _substitute_if_appropriate(cls, ns, f):
|
||||
if (isinstance(ns, NavigableString)
|
||||
@@ -127,8 +131,8 @@ class PageElement(object):
|
||||
# to methods like encode() and prettify():
|
||||
#
|
||||
# "html" - All Unicode characters with corresponding HTML entities
|
||||
# are converted to those entities on output.
|
||||
# "minimal" - Bare ampersands and angle brackets are converted to
|
||||
# are converted to those entities on output.
|
||||
# "minimal" - Bare ampersands and angle brackets are converted to
|
||||
# XML entities: & < >
|
||||
# None - The null formatter. Unicode characters are never
|
||||
# converted to entities. This is not recommended, but it's
|
||||
@@ -169,11 +173,19 @@ class PageElement(object):
|
||||
|
||||
This is used when mapping a formatter name ("minimal") to an
|
||||
appropriate function (one that performs entity-substitution on
|
||||
the contents of <script> and <style> tags, or not). It's
|
||||
the contents of <script> and <style> tags, or not). It can be
|
||||
inefficient, but it should be called very rarely.
|
||||
"""
|
||||
if self.known_xml is not None:
|
||||
# Most of the time we will have determined this when the
|
||||
# document is parsed.
|
||||
return self.known_xml
|
||||
|
||||
# Otherwise, it's likely that this element was created by
|
||||
# direct invocation of the constructor from within the user's
|
||||
# Python code.
|
||||
if self.parent is None:
|
||||
# This is the top-level object. It should have .is_xml set
|
||||
# This is the top-level object. It should have .known_xml set
|
||||
# from tree creation. If not, take a guess--BS is usually
|
||||
# used on HTML markup.
|
||||
return getattr(self, 'is_xml', False)
|
||||
@@ -523,9 +535,16 @@ class PageElement(object):
|
||||
return ResultSet(strainer, result)
|
||||
elif isinstance(name, basestring):
|
||||
# Optimization to find all tags with a given name.
|
||||
if name.count(':') == 1:
|
||||
# This is a name with a prefix.
|
||||
prefix, name = name.split(':', 1)
|
||||
else:
|
||||
prefix = None
|
||||
result = (element for element in generator
|
||||
if isinstance(element, Tag)
|
||||
and element.name == name)
|
||||
and element.name == name
|
||||
and (prefix is None or element.prefix == prefix)
|
||||
)
|
||||
return ResultSet(strainer, result)
|
||||
results = ResultSet(strainer)
|
||||
while True:
|
||||
@@ -637,7 +656,7 @@ class PageElement(object):
|
||||
return lambda el: el._attr_value_as_string(
|
||||
attribute, '').startswith(value)
|
||||
elif operator == '$':
|
||||
# string represenation of `attribute` ends with `value`
|
||||
# string representation of `attribute` ends with `value`
|
||||
return lambda el: el._attr_value_as_string(
|
||||
attribute, '').endswith(value)
|
||||
elif operator == '*':
|
||||
@@ -677,6 +696,11 @@ class NavigableString(unicode, PageElement):
|
||||
PREFIX = ''
|
||||
SUFFIX = ''
|
||||
|
||||
# We can't tell just by looking at a string whether it's contained
|
||||
# in an XML document or an HTML document.
|
||||
|
||||
known_xml = None
|
||||
|
||||
def __new__(cls, value):
|
||||
"""Create a new NavigableString.
|
||||
|
||||
@@ -743,10 +767,16 @@ class CData(PreformattedString):
|
||||
SUFFIX = u']]>'
|
||||
|
||||
class ProcessingInstruction(PreformattedString):
|
||||
"""A SGML processing instruction."""
|
||||
|
||||
PREFIX = u'<?'
|
||||
SUFFIX = u'>'
|
||||
|
||||
class XMLProcessingInstruction(ProcessingInstruction):
|
||||
"""An XML processing instruction."""
|
||||
PREFIX = u'<?'
|
||||
SUFFIX = u'?>'
|
||||
|
||||
class Comment(PreformattedString):
|
||||
|
||||
PREFIX = u'<!--'
|
||||
@@ -781,7 +811,8 @@ class Tag(PageElement):
|
||||
"""Represents a found HTML tag with its attributes and contents."""
|
||||
|
||||
def __init__(self, parser=None, builder=None, name=None, namespace=None,
|
||||
prefix=None, attrs=None, parent=None, previous=None):
|
||||
prefix=None, attrs=None, parent=None, previous=None,
|
||||
is_xml=None):
|
||||
"Basic constructor."
|
||||
|
||||
if parser is None:
|
||||
@@ -795,6 +826,14 @@ class Tag(PageElement):
|
||||
self.name = name
|
||||
self.namespace = namespace
|
||||
self.prefix = prefix
|
||||
if builder is not None:
|
||||
preserve_whitespace_tags = builder.preserve_whitespace_tags
|
||||
else:
|
||||
if is_xml:
|
||||
preserve_whitespace_tags = []
|
||||
else:
|
||||
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
|
||||
self.preserve_whitespace_tags = preserve_whitespace_tags
|
||||
if attrs is None:
|
||||
attrs = {}
|
||||
elif attrs:
|
||||
@@ -805,6 +844,13 @@ class Tag(PageElement):
|
||||
attrs = dict(attrs)
|
||||
else:
|
||||
attrs = dict(attrs)
|
||||
|
||||
# If possible, determine ahead of time whether this tag is an
|
||||
# XML tag.
|
||||
if builder:
|
||||
self.known_xml = builder.is_xml
|
||||
else:
|
||||
self.known_xml = is_xml
|
||||
self.attrs = attrs
|
||||
self.contents = []
|
||||
self.setup(parent, previous)
|
||||
@@ -824,7 +870,7 @@ class Tag(PageElement):
|
||||
Its contents are a copy of the old Tag's contents.
|
||||
"""
|
||||
clone = type(self)(None, self.builder, self.name, self.namespace,
|
||||
self.nsprefix, self.attrs)
|
||||
self.prefix, self.attrs, is_xml=self._is_xml)
|
||||
for attr in ('can_be_empty_element', 'hidden'):
|
||||
setattr(clone, attr, getattr(self, attr))
|
||||
for child in self.contents:
|
||||
@@ -946,6 +992,13 @@ class Tag(PageElement):
|
||||
attribute."""
|
||||
return self.attrs.get(key, default)
|
||||
|
||||
def get_attribute_list(self, key, default=None):
|
||||
"""The same as get(), but always returns a list."""
|
||||
value = self.get(key, default)
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return value
|
||||
|
||||
def has_attr(self, key):
|
||||
return key in self.attrs
|
||||
|
||||
@@ -997,7 +1050,7 @@ class Tag(PageElement):
|
||||
tag_name, tag_name))
|
||||
return self.find(tag_name)
|
||||
# We special case contents to avoid recursion.
|
||||
elif not tag.startswith("__") and not tag=="contents":
|
||||
elif not tag.startswith("__") and not tag == "contents":
|
||||
return self.find(tag)
|
||||
raise AttributeError(
|
||||
"'%s' object has no attribute '%s'" % (self.__class__, tag))
|
||||
@@ -1057,10 +1110,11 @@ class Tag(PageElement):
|
||||
|
||||
def _should_pretty_print(self, indent_level):
|
||||
"""Should this tag be pretty-printed?"""
|
||||
|
||||
return (
|
||||
indent_level is not None and
|
||||
(self.name not in HTMLAwareEntitySubstitution.preformatted_tags
|
||||
or self._is_xml))
|
||||
indent_level is not None
|
||||
and self.name not in self.preserve_whitespace_tags
|
||||
)
|
||||
|
||||
def decode(self, indent_level=None,
|
||||
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
@@ -1280,6 +1334,7 @@ class Tag(PageElement):
|
||||
|
||||
_selector_combinators = ['>', '+', '~']
|
||||
_select_debug = False
|
||||
quoted_colon = re.compile('"[^"]*:[^"]*"')
|
||||
def select_one(self, selector):
|
||||
"""Perform a CSS selection operation on the current element."""
|
||||
value = self.select(selector, limit=1)
|
||||
@@ -1305,8 +1360,7 @@ class Tag(PageElement):
|
||||
if limit and len(context) >= limit:
|
||||
break
|
||||
return context
|
||||
|
||||
tokens = selector.split()
|
||||
tokens = shlex.split(selector)
|
||||
current_context = [self]
|
||||
|
||||
if tokens[-1] in self._selector_combinators:
|
||||
@@ -1358,7 +1412,7 @@ class Tag(PageElement):
|
||||
return classes.issubset(candidate.get('class', []))
|
||||
checker = classes_match
|
||||
|
||||
elif ':' in token:
|
||||
elif ':' in token and not self.quoted_colon.search(token):
|
||||
# Pseudo-class
|
||||
tag_name, pseudo = token.split(':', 1)
|
||||
if tag_name == '':
|
||||
@@ -1389,11 +1443,8 @@ class Tag(PageElement):
|
||||
self.count += 1
|
||||
if self.count == self.destination:
|
||||
return True
|
||||
if self.count > self.destination:
|
||||
# Stop the generator that's sending us
|
||||
# these things.
|
||||
raise StopIteration()
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
checker = Counter(pseudo_value).nth_child_of_type
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
@@ -1498,13 +1549,12 @@ class Tag(PageElement):
|
||||
# don't include it in the context more than once.
|
||||
new_context.append(candidate)
|
||||
new_context_ids.add(id(candidate))
|
||||
if limit and len(new_context) >= limit:
|
||||
break
|
||||
elif self._select_debug:
|
||||
print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
|
||||
|
||||
|
||||
current_context = new_context
|
||||
if limit and len(current_context) >= limit:
|
||||
current_context = current_context[:limit]
|
||||
|
||||
if self._select_debug:
|
||||
print "Final verdict:"
|
||||
@@ -1662,28 +1712,22 @@ class SoupStrainer(object):
|
||||
"I don't know how to match against a %s" % markup.__class__)
|
||||
return found
|
||||
|
||||
def _matches(self, markup, match_against):
|
||||
def _matches(self, markup, match_against, already_tried=None):
|
||||
# print u"Matching %s against %s" % (markup, match_against)
|
||||
result = False
|
||||
if isinstance(markup, list) or isinstance(markup, tuple):
|
||||
# This should only happen when searching a multi-valued attribute
|
||||
# like 'class'.
|
||||
if (isinstance(match_against, unicode)
|
||||
and ' ' in match_against):
|
||||
# A bit of a special case. If they try to match "foo
|
||||
# bar" on a multivalue attribute's value, only accept
|
||||
# the literal value "foo bar"
|
||||
#
|
||||
# XXX This is going to be pretty slow because we keep
|
||||
# splitting match_against. But it shouldn't come up
|
||||
# too often.
|
||||
return (whitespace_re.split(match_against) == markup)
|
||||
else:
|
||||
for item in markup:
|
||||
if self._matches(item, match_against):
|
||||
return True
|
||||
return False
|
||||
|
||||
for item in markup:
|
||||
if self._matches(item, match_against):
|
||||
return True
|
||||
# We didn't match any particular value of the multivalue
|
||||
# attribute, but maybe we match the attribute value when
|
||||
# considered as a string.
|
||||
if self._matches(' '.join(markup), match_against):
|
||||
return True
|
||||
return False
|
||||
|
||||
if match_against is True:
|
||||
# True matches any non-None value.
|
||||
return markup is not None
|
||||
@@ -1693,6 +1737,7 @@ class SoupStrainer(object):
|
||||
|
||||
# Custom callables take the tag as an argument, but all
|
||||
# other ways of matching match the tag name as a string.
|
||||
original_markup = markup
|
||||
if isinstance(markup, Tag):
|
||||
markup = markup.name
|
||||
|
||||
@@ -1703,18 +1748,51 @@ class SoupStrainer(object):
|
||||
# None matches None, False, an empty string, an empty list, and so on.
|
||||
return not match_against
|
||||
|
||||
if isinstance(match_against, unicode):
|
||||
if (hasattr(match_against, '__iter__')
|
||||
and not isinstance(match_against, basestring)):
|
||||
# We're asked to match against an iterable of items.
|
||||
# The markup must be match at least one item in the
|
||||
# iterable. We'll try each one in turn.
|
||||
#
|
||||
# To avoid infinite recursion we need to keep track of
|
||||
# items we've already seen.
|
||||
if not already_tried:
|
||||
already_tried = set()
|
||||
for item in match_against:
|
||||
if item.__hash__:
|
||||
key = item
|
||||
else:
|
||||
key = id(item)
|
||||
if key in already_tried:
|
||||
continue
|
||||
else:
|
||||
already_tried.add(key)
|
||||
if self._matches(original_markup, item, already_tried):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
# Beyond this point we might need to run the test twice: once against
|
||||
# the tag's name and once against its prefixed name.
|
||||
match = False
|
||||
|
||||
if not match and isinstance(match_against, unicode):
|
||||
# Exact string match
|
||||
return markup == match_against
|
||||
match = markup == match_against
|
||||
|
||||
if hasattr(match_against, 'match'):
|
||||
if not match and hasattr(match_against, 'search'):
|
||||
# Regexp match
|
||||
return match_against.search(markup)
|
||||
|
||||
if hasattr(match_against, '__iter__'):
|
||||
# The markup must be an exact match against something
|
||||
# in the iterable.
|
||||
return markup in match_against
|
||||
if (not match
|
||||
and isinstance(original_markup, Tag)
|
||||
and original_markup.prefix):
|
||||
# Try the whole thing again with the prefixed tag name.
|
||||
return self._matches(
|
||||
original_markup.prefix + ':' + original_markup.name, match_against
|
||||
)
|
||||
|
||||
return match
|
||||
|
||||
|
||||
class ResultSet(list):
|
||||
@@ -1723,3 +1801,8 @@ class ResultSet(list):
|
||||
def __init__(self, source, result=()):
|
||||
super(ResultSet, self).__init__(result)
|
||||
self.source = source
|
||||
|
||||
def __getattr__(self, key):
|
||||
raise AttributeError(
|
||||
"ResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?" % key
|
||||
)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Helper classes for tests."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
import pickle
|
||||
@@ -67,6 +69,18 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
markup in these tests, there's not much room for interpretation.
|
||||
"""
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify that all HTML4 and HTML5 empty element (aka void element) tags
|
||||
are handled correctly.
|
||||
"""
|
||||
for name in [
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
'spacer', 'frame'
|
||||
]:
|
||||
soup = self.soup("")
|
||||
new_tag = soup.new_tag(name)
|
||||
self.assertEqual(True, new_tag.is_empty_element)
|
||||
|
||||
def test_pickle_and_unpickle_identity(self):
|
||||
# Pickling a tree, then unpickling it, yields a tree identical
|
||||
# to the original.
|
||||
@@ -137,6 +151,14 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
markup.replace(b"\n", b""))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
# We test both Unicode and bytestring to verify that
|
||||
# process_markup correctly sets processing_instruction_class
|
||||
# even when the markup is already Unicode and there is no
|
||||
# need to process anything.
|
||||
markup = u"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.decode())
|
||||
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
@@ -215,9 +237,22 @@ Hello, world!
|
||||
self.assertEqual(comment, baz.previous_element)
|
||||
|
||||
def test_preserved_whitespace_in_pre_and_textarea(self):
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags."""
|
||||
self.assertSoupEquals("<pre> </pre>")
|
||||
self.assertSoupEquals("<textarea> woo </textarea>")
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags,
|
||||
even if that would mean not prettifying the markup.
|
||||
"""
|
||||
pre_markup = "<pre> </pre>"
|
||||
textarea_markup = "<textarea> woo\nwoo </textarea>"
|
||||
self.assertSoupEquals(pre_markup)
|
||||
self.assertSoupEquals(textarea_markup)
|
||||
|
||||
soup = self.soup(pre_markup)
|
||||
self.assertEqual(soup.pre.prettify(), pre_markup)
|
||||
|
||||
soup = self.soup(textarea_markup)
|
||||
self.assertEqual(soup.textarea.prettify(), textarea_markup)
|
||||
|
||||
soup = self.soup("<textarea></textarea>")
|
||||
self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
|
||||
|
||||
def test_nested_inline_elements(self):
|
||||
"""Inline elements can be nested indefinitely."""
|
||||
@@ -307,6 +342,13 @@ Hello, world!
|
||||
self.assertEqual("p", soup.p.name)
|
||||
self.assertConnectedness(soup)
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify consistent handling of empty-element tags,
|
||||
no matter how they come in through the markup.
|
||||
"""
|
||||
self.assertSoupEquals('<br/><br/><br/>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('<br /><br /><br />', "<br/><br/><br/>")
|
||||
|
||||
def test_head_tag_between_head_and_body(self):
|
||||
"Prevent recurrence of a bug in the html5lib treebuilder."
|
||||
content = """<html><head></head>
|
||||
@@ -480,7 +522,9 @@ Hello, world!
|
||||
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
|
||||
soup = self.soup(
|
||||
hebrew_document, from_encoding="iso8859-8")
|
||||
self.assertEqual(soup.original_encoding, 'iso8859-8')
|
||||
# Some tree builders call it iso8859-8, others call it iso-8859-9.
|
||||
# That's not a difference we really care about.
|
||||
assert soup.original_encoding in ('iso8859-8', 'iso-8859-8')
|
||||
self.assertEqual(
|
||||
soup.encode('utf-8'),
|
||||
hebrew_document.decode("iso8859-8").encode("utf-8"))
|
||||
@@ -563,6 +607,11 @@ class XMLTreeBuilderSmokeTest(object):
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out *exactly* the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
@@ -639,6 +688,40 @@ class XMLTreeBuilderSmokeTest(object):
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
def test_find_by_prefixed_name(self):
|
||||
doc = """<?xml version="1.0" encoding="utf-8"?>
|
||||
<Document xmlns="http://example.com/ns0"
|
||||
xmlns:ns1="http://example.com/ns1"
|
||||
xmlns:ns2="http://example.com/ns2"
|
||||
<ns1:tag>foo</ns1:tag>
|
||||
<ns1:tag>bar</ns1:tag>
|
||||
<ns2:tag key="value">baz</ns2:tag>
|
||||
</Document>
|
||||
"""
|
||||
soup = self.soup(doc)
|
||||
|
||||
# There are three <tag> tags.
|
||||
self.assertEqual(3, len(soup.find_all('tag')))
|
||||
|
||||
# But two of them are ns1:tag and one of them is ns2:tag.
|
||||
self.assertEqual(2, len(soup.find_all('ns1:tag')))
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag')))
|
||||
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag', key='value')))
|
||||
self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag'])))
|
||||
|
||||
def test_copy_tag_preserves_namespace(self):
|
||||
xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<w:document xmlns:w="http://example.com/ns0"/>"""
|
||||
|
||||
soup = self.soup(xml)
|
||||
tag = soup.document
|
||||
duplicate = copy.copy(tag)
|
||||
|
||||
# The two tags have the same namespace prefix.
|
||||
self.assertEqual(tag.prefix, duplicate.prefix)
|
||||
|
||||
|
||||
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
||||
"""Smoke test for a tree builder that supports HTML5."""
|
||||
|
||||
|
||||
@@ -84,6 +84,33 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
|
||||
def test_reparented_markup_containing_identical_whitespace_nodes(self):
|
||||
"""Verify that we keep the two whitespace nodes in this
|
||||
document distinct when reparenting the adjacent <tbody> tags.
|
||||
"""
|
||||
markup = '<table> <tbody><tbody><ims></tbody> </table>'
|
||||
soup = self.soup(markup)
|
||||
space1, space2 = soup.find_all(string=' ')
|
||||
tbody1, tbody2 = soup.find_all('tbody')
|
||||
assert space1.next_element is tbody1
|
||||
assert tbody2.next_element is space2
|
||||
|
||||
def test_reparented_markup_containing_children(self):
|
||||
markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
|
||||
soup = self.soup(markup)
|
||||
noscript = soup.noscript
|
||||
self.assertEqual("target", noscript.next_element)
|
||||
target = soup.find(string='target')
|
||||
|
||||
# The 'aftermath' string was duplicated; we want the second one.
|
||||
final_aftermath = soup.find_all(string='aftermath')[-1]
|
||||
|
||||
# The <noscript> tag was moved beneath a copy of the <a> tag,
|
||||
# but the 'target' string within is still connected to the
|
||||
# (second) 'aftermath' string.
|
||||
self.assertEqual(final_aftermath, target.next_element)
|
||||
self.assertEqual(target, final_aftermath.previous_element)
|
||||
|
||||
def test_processing_instruction(self):
|
||||
"""Processing instructions become comments."""
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
@@ -96,3 +123,8 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
a1, a2 = soup.find_all('a')
|
||||
self.assertEqual(a1, a2)
|
||||
assert a1 is not a2
|
||||
|
||||
def test_foster_parenting(self):
|
||||
markup = b"""<table><td></tbody>A"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
|
||||
|
||||
@@ -29,4 +29,6 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
|
||||
|
||||
|
||||
def test_redundant_empty_element_closing_tags(self):
|
||||
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('</br></br></br>', "")
|
||||
|
||||
@@ -35,7 +35,6 @@ try:
|
||||
except ImportError, e:
|
||||
LXML_PRESENT = False
|
||||
|
||||
PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
|
||||
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
|
||||
|
||||
class TestConstructor(SoupTest):
|
||||
@@ -77,7 +76,7 @@ class TestWarnings(SoupTest):
|
||||
def test_no_warning_if_explicit_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", "html.parser")
|
||||
self.assertEquals([], w)
|
||||
self.assertEqual([], w)
|
||||
|
||||
def test_parseOnlyThese_renamed_to_parse_only(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
@@ -118,15 +117,34 @@ class TestWarnings(SoupTest):
|
||||
soup = self.soup(filename)
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
def test_url_warning(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("http://www.crummy.com/")
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("looks like a URL" in msg)
|
||||
def test_url_warning_with_bytes_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/")
|
||||
# Be aware this isn't the only warning that can be raised during
|
||||
# execution..
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
# note - this url must differ from the bytes one otherwise
|
||||
# python's warnings system swallows the second warning
|
||||
soup = self.soup(u"http://www.crummyunicode.com/")
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_bytes_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(u"http://www.crummyuncode.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("http://www.crummy.com/ is great")
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
class TestSelectiveParsing(SoupTest):
|
||||
|
||||
@@ -260,7 +278,7 @@ class TestEncodingConversion(SoupTest):
|
||||
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
|
||||
|
||||
@skipIf(
|
||||
PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
|
||||
PYTHON_3_PRE_3_2,
|
||||
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
|
||||
def test_attribute_name_containing_unicode_characters(self):
|
||||
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Tests for Beautiful Soup's tree traversal methods.
|
||||
|
||||
@@ -222,7 +223,19 @@ class TestFindAllByName(TreeTest):
|
||||
self.assertSelects(
|
||||
tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
|
||||
|
||||
def test_find_with_multi_valued_attribute(self):
|
||||
soup = self.soup(
|
||||
"<div class='a b'>1</div><div class='a c'>2</div><div class='a d'>3</div>"
|
||||
)
|
||||
r1 = soup.find('div', 'a d');
|
||||
r2 = soup.find('div', re.compile(r'a d'));
|
||||
r3, r4 = soup.find_all('div', ['a b', 'a d']);
|
||||
self.assertEqual('3', r1.string)
|
||||
self.assertEqual('3', r2.string)
|
||||
self.assertEqual('1', r3.string)
|
||||
self.assertEqual('3', r4.string)
|
||||
|
||||
|
||||
class TestFindAllByAttribute(TreeTest):
|
||||
|
||||
def test_find_all_by_attribute_name(self):
|
||||
@@ -294,10 +307,10 @@ class TestFindAllByAttribute(TreeTest):
|
||||
f = tree.find_all("gar", class_=re.compile("a"))
|
||||
self.assertSelects(f, ["Found it"])
|
||||
|
||||
# Since the class is not the string "foo bar", but the two
|
||||
# strings "foo" and "bar", this will not find anything.
|
||||
# If the search fails to match the individual strings "foo" and "bar",
|
||||
# it will be tried against the combined string "foo bar".
|
||||
f = tree.find_all("gar", class_=re.compile("o b"))
|
||||
self.assertSelects(f, [])
|
||||
self.assertSelects(f, ["Found it"])
|
||||
|
||||
def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
|
||||
soup = self.soup("<a class='bar'>Found it</a>")
|
||||
@@ -335,7 +348,7 @@ class TestFindAllByAttribute(TreeTest):
|
||||
strainer = SoupStrainer(attrs={'id' : 'first'})
|
||||
self.assertSelects(tree.find_all(strainer), ['Match.'])
|
||||
|
||||
def test_find_all_with_missing_atribute(self):
|
||||
def test_find_all_with_missing_attribute(self):
|
||||
# You can pass in None as the value of an attribute to find_all.
|
||||
# This will match tags that do not have that attribute set.
|
||||
tree = self.soup("""<a id="1">ID present.</a>
|
||||
@@ -1273,6 +1286,10 @@ class TestCDAtaListAttributes(SoupTest):
|
||||
soup = self.soup("<a class='foo\tbar'>")
|
||||
self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode())
|
||||
|
||||
def test_get_attribute_list(self):
|
||||
soup = self.soup("<a id='abc def'>")
|
||||
self.assertEqual(['abc def'], soup.a.get_attribute_list('id'))
|
||||
|
||||
def test_accept_charset(self):
|
||||
soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">')
|
||||
self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset'])
|
||||
@@ -1328,6 +1345,13 @@ class TestPersistence(SoupTest):
|
||||
copied = copy.deepcopy(self.tree)
|
||||
self.assertEqual(copied.decode(), self.tree.decode())
|
||||
|
||||
def test_copy_preserves_encoding(self):
|
||||
soup = BeautifulSoup(b'<p> </p>', 'html.parser')
|
||||
encoding = soup.original_encoding
|
||||
copy = soup.__copy__()
|
||||
self.assertEqual(u"<p> </p>", unicode(copy))
|
||||
self.assertEqual(encoding, copy.original_encoding)
|
||||
|
||||
def test_unicode_pickle(self):
|
||||
# A tree containing Unicode characters can be pickled.
|
||||
html = u"<b>\N{SNOWMAN}</b>"
|
||||
@@ -1676,8 +1700,8 @@ class TestSoupSelector(TreeTest):
|
||||
def setUp(self):
|
||||
self.soup = BeautifulSoup(self.HTML, 'html.parser')
|
||||
|
||||
def assertSelects(self, selector, expected_ids):
|
||||
el_ids = [el['id'] for el in self.soup.select(selector)]
|
||||
def assertSelects(self, selector, expected_ids, **kwargs):
|
||||
el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
|
||||
el_ids.sort()
|
||||
expected_ids.sort()
|
||||
self.assertEqual(expected_ids, el_ids,
|
||||
@@ -1720,6 +1744,13 @@ class TestSoupSelector(TreeTest):
|
||||
for selector in ('html div', 'html body div', 'body div'):
|
||||
self.assertSelects(selector, ['data1', 'main', 'inner', 'footer'])
|
||||
|
||||
|
||||
def test_limit(self):
|
||||
self.assertSelects('html div', ['main'], limit=1)
|
||||
self.assertSelects('html body div', ['inner', 'main'], limit=2)
|
||||
self.assertSelects('body div', ['data1', 'main', 'inner', 'footer'],
|
||||
limit=10)
|
||||
|
||||
def test_tag_no_match(self):
|
||||
self.assertEqual(len(self.soup.select('del')), 0)
|
||||
|
||||
@@ -1902,6 +1933,14 @@ class TestSoupSelector(TreeTest):
|
||||
('div[data-tag]', ['data1'])
|
||||
)
|
||||
|
||||
def test_quoted_space_in_selector_name(self):
|
||||
html = """<div style="display: wrong">nope</div>
|
||||
<div style="display: right">yes</div>
|
||||
"""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
[chosen] = soup.select('div[style="display: right"]')
|
||||
self.assertEqual("yes", chosen.string)
|
||||
|
||||
def test_unsupported_pseudoclass(self):
|
||||
self.assertRaises(
|
||||
NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from .core import where, old_where
|
||||
|
||||
__version__ = "2017.04.17"
|
||||
__version__ = "2017.11.05"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,17 +19,18 @@ class DeprecatedBundleWarning(DeprecationWarning):
|
||||
|
||||
|
||||
def where():
|
||||
f = os.path.split(__file__)[0]
|
||||
f = os.path.dirname(__file__)
|
||||
|
||||
return os.path.join(f, 'cacert.pem')
|
||||
|
||||
|
||||
def old_where():
|
||||
warnings.warn(
|
||||
"The weak security bundle is being deprecated.",
|
||||
"The weak security bundle is being deprecated. It will be removed in "
|
||||
"2018.",
|
||||
DeprecatedBundleWarning
|
||||
)
|
||||
f = os.path.split(__file__)[0]
|
||||
f = os.path.dirname(__file__)
|
||||
return os.path.join(f, 'weak.pem')
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,436 @@
|
||||
"""contextlib2 - backports and enhancements to the contextlib module"""
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
from collections import deque
|
||||
from functools import wraps
|
||||
|
||||
__all__ = ["contextmanager", "closing", "ContextDecorator", "ExitStack",
|
||||
"redirect_stdout", "redirect_stderr", "suppress"]
|
||||
|
||||
# Backwards compatibility
|
||||
__all__ += ["ContextStack"]
|
||||
|
||||
class ContextDecorator(object):
|
||||
"A base class or mixin that enables context managers to work as decorators."
|
||||
|
||||
def refresh_cm(self):
|
||||
"""Returns the context manager used to actually wrap the call to the
|
||||
decorated function.
|
||||
|
||||
The default implementation just returns *self*.
|
||||
|
||||
Overriding this method allows otherwise one-shot context managers
|
||||
like _GeneratorContextManager to support use as decorators via
|
||||
implicit recreation.
|
||||
|
||||
DEPRECATED: refresh_cm was never added to the standard library's
|
||||
ContextDecorator API
|
||||
"""
|
||||
warnings.warn("refresh_cm was never added to the standard library",
|
||||
DeprecationWarning)
|
||||
return self._recreate_cm()
|
||||
|
||||
def _recreate_cm(self):
|
||||
"""Return a recreated instance of self.
|
||||
|
||||
Allows an otherwise one-shot context manager like
|
||||
_GeneratorContextManager to support use as
|
||||
a decorator via implicit recreation.
|
||||
|
||||
This is a private interface just for _GeneratorContextManager.
|
||||
See issue #11647 for details.
|
||||
"""
|
||||
return self
|
||||
|
||||
def __call__(self, func):
|
||||
@wraps(func)
|
||||
def inner(*args, **kwds):
|
||||
with self._recreate_cm():
|
||||
return func(*args, **kwds)
|
||||
return inner
|
||||
|
||||
|
||||
class _GeneratorContextManager(ContextDecorator):
|
||||
"""Helper for @contextmanager decorator."""
|
||||
|
||||
def __init__(self, func, args, kwds):
|
||||
self.gen = func(*args, **kwds)
|
||||
self.func, self.args, self.kwds = func, args, kwds
|
||||
# Issue 19330: ensure context manager instances have good docstrings
|
||||
doc = getattr(func, "__doc__", None)
|
||||
if doc is None:
|
||||
doc = type(self).__doc__
|
||||
self.__doc__ = doc
|
||||
# Unfortunately, this still doesn't provide good help output when
|
||||
# inspecting the created context manager instances, since pydoc
|
||||
# currently bypasses the instance docstring and shows the docstring
|
||||
# for the class instead.
|
||||
# See http://bugs.python.org/issue19404 for more details.
|
||||
|
||||
def _recreate_cm(self):
|
||||
# _GCM instances are one-shot context managers, so the
|
||||
# CM must be recreated each time a decorated function is
|
||||
# called
|
||||
return self.__class__(self.func, self.args, self.kwds)
|
||||
|
||||
def __enter__(self):
|
||||
try:
|
||||
return next(self.gen)
|
||||
except StopIteration:
|
||||
raise RuntimeError("generator didn't yield")
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if type is None:
|
||||
try:
|
||||
next(self.gen)
|
||||
except StopIteration:
|
||||
return
|
||||
else:
|
||||
raise RuntimeError("generator didn't stop")
|
||||
else:
|
||||
if value is None:
|
||||
# Need to force instantiation so we can reliably
|
||||
# tell if we get the same exception back
|
||||
value = type()
|
||||
try:
|
||||
self.gen.throw(type, value, traceback)
|
||||
raise RuntimeError("generator didn't stop after throw()")
|
||||
except StopIteration as exc:
|
||||
# Suppress StopIteration *unless* it's the same exception that
|
||||
# was passed to throw(). This prevents a StopIteration
|
||||
# raised inside the "with" statement from being suppressed.
|
||||
return exc is not value
|
||||
except RuntimeError as exc:
|
||||
# Don't re-raise the passed in exception
|
||||
if exc is value:
|
||||
return False
|
||||
# Likewise, avoid suppressing if a StopIteration exception
|
||||
# was passed to throw() and later wrapped into a RuntimeError
|
||||
# (see PEP 479).
|
||||
if _HAVE_EXCEPTION_CHAINING and exc.__cause__ is value:
|
||||
return False
|
||||
raise
|
||||
except:
|
||||
# only re-raise if it's *not* the exception that was
|
||||
# passed to throw(), because __exit__() must not raise
|
||||
# an exception unless __exit__() itself failed. But throw()
|
||||
# has to raise the exception to signal propagation, so this
|
||||
# fixes the impedance mismatch between the throw() protocol
|
||||
# and the __exit__() protocol.
|
||||
#
|
||||
if sys.exc_info()[1] is not value:
|
||||
raise
|
||||
|
||||
|
||||
def contextmanager(func):
|
||||
"""@contextmanager decorator.
|
||||
|
||||
Typical usage:
|
||||
|
||||
@contextmanager
|
||||
def some_generator(<arguments>):
|
||||
<setup>
|
||||
try:
|
||||
yield <value>
|
||||
finally:
|
||||
<cleanup>
|
||||
|
||||
This makes this:
|
||||
|
||||
with some_generator(<arguments>) as <variable>:
|
||||
<body>
|
||||
|
||||
equivalent to this:
|
||||
|
||||
<setup>
|
||||
try:
|
||||
<variable> = <value>
|
||||
<body>
|
||||
finally:
|
||||
<cleanup>
|
||||
|
||||
"""
|
||||
@wraps(func)
|
||||
def helper(*args, **kwds):
|
||||
return _GeneratorContextManager(func, args, kwds)
|
||||
return helper
|
||||
|
||||
|
||||
class closing(object):
|
||||
"""Context to automatically close something at the end of a block.
|
||||
|
||||
Code like this:
|
||||
|
||||
with closing(<module>.open(<arguments>)) as f:
|
||||
<block>
|
||||
|
||||
is equivalent to this:
|
||||
|
||||
f = <module>.open(<arguments>)
|
||||
try:
|
||||
<block>
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
"""
|
||||
def __init__(self, thing):
|
||||
self.thing = thing
|
||||
def __enter__(self):
|
||||
return self.thing
|
||||
def __exit__(self, *exc_info):
|
||||
self.thing.close()
|
||||
|
||||
|
||||
class _RedirectStream(object):
|
||||
|
||||
_stream = None
|
||||
|
||||
def __init__(self, new_target):
|
||||
self._new_target = new_target
|
||||
# We use a list of old targets to make this CM re-entrant
|
||||
self._old_targets = []
|
||||
|
||||
def __enter__(self):
|
||||
self._old_targets.append(getattr(sys, self._stream))
|
||||
setattr(sys, self._stream, self._new_target)
|
||||
return self._new_target
|
||||
|
||||
def __exit__(self, exctype, excinst, exctb):
|
||||
setattr(sys, self._stream, self._old_targets.pop())
|
||||
|
||||
|
||||
class redirect_stdout(_RedirectStream):
|
||||
"""Context manager for temporarily redirecting stdout to another file.
|
||||
|
||||
# How to send help() to stderr
|
||||
with redirect_stdout(sys.stderr):
|
||||
help(dir)
|
||||
|
||||
# How to write help() to a file
|
||||
with open('help.txt', 'w') as f:
|
||||
with redirect_stdout(f):
|
||||
help(pow)
|
||||
"""
|
||||
|
||||
_stream = "stdout"
|
||||
|
||||
|
||||
class redirect_stderr(_RedirectStream):
|
||||
"""Context manager for temporarily redirecting stderr to another file."""
|
||||
|
||||
_stream = "stderr"
|
||||
|
||||
|
||||
class suppress(object):
|
||||
"""Context manager to suppress specified exceptions
|
||||
|
||||
After the exception is suppressed, execution proceeds with the next
|
||||
statement following the with statement.
|
||||
|
||||
with suppress(FileNotFoundError):
|
||||
os.remove(somefile)
|
||||
# Execution still resumes here if the file was already removed
|
||||
"""
|
||||
|
||||
def __init__(self, *exceptions):
|
||||
self._exceptions = exceptions
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exctype, excinst, exctb):
|
||||
# Unlike isinstance and issubclass, CPython exception handling
|
||||
# currently only looks at the concrete type hierarchy (ignoring
|
||||
# the instance and subclass checking hooks). While Guido considers
|
||||
# that a bug rather than a feature, it's a fairly hard one to fix
|
||||
# due to various internal implementation details. suppress provides
|
||||
# the simpler issubclass based semantics, rather than trying to
|
||||
# exactly reproduce the limitations of the CPython interpreter.
|
||||
#
|
||||
# See http://bugs.python.org/issue12029 for more details
|
||||
return exctype is not None and issubclass(exctype, self._exceptions)
|
||||
|
||||
|
||||
# Context manipulation is Python 3 only
|
||||
_HAVE_EXCEPTION_CHAINING = sys.version_info[0] >= 3
|
||||
if _HAVE_EXCEPTION_CHAINING:
|
||||
def _make_context_fixer(frame_exc):
|
||||
def _fix_exception_context(new_exc, old_exc):
|
||||
# Context may not be correct, so find the end of the chain
|
||||
while 1:
|
||||
exc_context = new_exc.__context__
|
||||
if exc_context is old_exc:
|
||||
# Context is already set correctly (see issue 20317)
|
||||
return
|
||||
if exc_context is None or exc_context is frame_exc:
|
||||
break
|
||||
new_exc = exc_context
|
||||
# Change the end of the chain to point to the exception
|
||||
# we expect it to reference
|
||||
new_exc.__context__ = old_exc
|
||||
return _fix_exception_context
|
||||
|
||||
def _reraise_with_existing_context(exc_details):
|
||||
try:
|
||||
# bare "raise exc_details[1]" replaces our carefully
|
||||
# set-up context
|
||||
fixed_ctx = exc_details[1].__context__
|
||||
raise exc_details[1]
|
||||
except BaseException:
|
||||
exc_details[1].__context__ = fixed_ctx
|
||||
raise
|
||||
else:
|
||||
# No exception context in Python 2
|
||||
def _make_context_fixer(frame_exc):
|
||||
return lambda new_exc, old_exc: None
|
||||
|
||||
# Use 3 argument raise in Python 2,
|
||||
# but use exec to avoid SyntaxError in Python 3
|
||||
def _reraise_with_existing_context(exc_details):
|
||||
exc_type, exc_value, exc_tb = exc_details
|
||||
exec ("raise exc_type, exc_value, exc_tb")
|
||||
|
||||
# Handle old-style classes if they exist
|
||||
try:
|
||||
from types import InstanceType
|
||||
except ImportError:
|
||||
# Python 3 doesn't have old-style classes
|
||||
_get_type = type
|
||||
else:
|
||||
# Need to handle old-style context managers on Python 2
|
||||
def _get_type(obj):
|
||||
obj_type = type(obj)
|
||||
if obj_type is InstanceType:
|
||||
return obj.__class__ # Old-style class
|
||||
return obj_type # New-style class
|
||||
|
||||
# Inspired by discussions on http://bugs.python.org/issue13585
|
||||
class ExitStack(object):
|
||||
"""Context manager for dynamic management of a stack of exit callbacks
|
||||
|
||||
For example:
|
||||
|
||||
with ExitStack() as stack:
|
||||
files = [stack.enter_context(open(fname)) for fname in filenames]
|
||||
# All opened files will automatically be closed at the end of
|
||||
# the with statement, even if attempts to open files later
|
||||
# in the list raise an exception
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self._exit_callbacks = deque()
|
||||
|
||||
def pop_all(self):
|
||||
"""Preserve the context stack by transferring it to a new instance"""
|
||||
new_stack = type(self)()
|
||||
new_stack._exit_callbacks = self._exit_callbacks
|
||||
self._exit_callbacks = deque()
|
||||
return new_stack
|
||||
|
||||
def _push_cm_exit(self, cm, cm_exit):
|
||||
"""Helper to correctly register callbacks to __exit__ methods"""
|
||||
def _exit_wrapper(*exc_details):
|
||||
return cm_exit(cm, *exc_details)
|
||||
_exit_wrapper.__self__ = cm
|
||||
self.push(_exit_wrapper)
|
||||
|
||||
def push(self, exit):
|
||||
"""Registers a callback with the standard __exit__ method signature
|
||||
|
||||
Can suppress exceptions the same way __exit__ methods can.
|
||||
|
||||
Also accepts any object with an __exit__ method (registering a call
|
||||
to the method instead of the object itself)
|
||||
"""
|
||||
# We use an unbound method rather than a bound method to follow
|
||||
# the standard lookup behaviour for special methods
|
||||
_cb_type = _get_type(exit)
|
||||
try:
|
||||
exit_method = _cb_type.__exit__
|
||||
except AttributeError:
|
||||
# Not a context manager, so assume its a callable
|
||||
self._exit_callbacks.append(exit)
|
||||
else:
|
||||
self._push_cm_exit(exit, exit_method)
|
||||
return exit # Allow use as a decorator
|
||||
|
||||
def callback(self, callback, *args, **kwds):
|
||||
"""Registers an arbitrary callback and arguments.
|
||||
|
||||
Cannot suppress exceptions.
|
||||
"""
|
||||
def _exit_wrapper(exc_type, exc, tb):
|
||||
callback(*args, **kwds)
|
||||
# We changed the signature, so using @wraps is not appropriate, but
|
||||
# setting __wrapped__ may still help with introspection
|
||||
_exit_wrapper.__wrapped__ = callback
|
||||
self.push(_exit_wrapper)
|
||||
return callback # Allow use as a decorator
|
||||
|
||||
def enter_context(self, cm):
|
||||
"""Enters the supplied context manager
|
||||
|
||||
If successful, also pushes its __exit__ method as a callback and
|
||||
returns the result of the __enter__ method.
|
||||
"""
|
||||
# We look up the special methods on the type to match the with statement
|
||||
_cm_type = _get_type(cm)
|
||||
_exit = _cm_type.__exit__
|
||||
result = _cm_type.__enter__(cm)
|
||||
self._push_cm_exit(cm, _exit)
|
||||
return result
|
||||
|
||||
def close(self):
|
||||
"""Immediately unwind the context stack"""
|
||||
self.__exit__(None, None, None)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc_details):
|
||||
received_exc = exc_details[0] is not None
|
||||
|
||||
# We manipulate the exception state so it behaves as though
|
||||
# we were actually nesting multiple with statements
|
||||
frame_exc = sys.exc_info()[1]
|
||||
_fix_exception_context = _make_context_fixer(frame_exc)
|
||||
|
||||
# Callbacks are invoked in LIFO order to match the behaviour of
|
||||
# nested context managers
|
||||
suppressed_exc = False
|
||||
pending_raise = False
|
||||
while self._exit_callbacks:
|
||||
cb = self._exit_callbacks.pop()
|
||||
try:
|
||||
if cb(*exc_details):
|
||||
suppressed_exc = True
|
||||
pending_raise = False
|
||||
exc_details = (None, None, None)
|
||||
except:
|
||||
new_exc_details = sys.exc_info()
|
||||
# simulate the stack of exceptions by setting the context
|
||||
_fix_exception_context(new_exc_details[1], exc_details[1])
|
||||
pending_raise = True
|
||||
exc_details = new_exc_details
|
||||
if pending_raise:
|
||||
_reraise_with_existing_context(exc_details)
|
||||
return received_exc and suppressed_exc
|
||||
|
||||
# Preserve backwards compatibility
|
||||
class ContextStack(ExitStack):
|
||||
"""Backwards compatibility alias for ExitStack"""
|
||||
|
||||
def __init__(self):
|
||||
warnings.warn("ContextStack has been renamed to ExitStack",
|
||||
DeprecationWarning)
|
||||
super(ContextStack, self).__init__()
|
||||
|
||||
def register_exit(self, callback):
|
||||
return self.push(callback)
|
||||
|
||||
def register(self, callback, *args, **kwds):
|
||||
return self.callback(callback, *args, **kwds)
|
||||
|
||||
def preserve(self):
|
||||
return self.pop_all()
|
||||
@@ -0,0 +1,18 @@
|
||||
"""Provide a (g)dbm-compatible interface to bsddb.hashopen."""
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
warnings.warnpy3k("in 3.x, the dbhash module has been removed", stacklevel=2)
|
||||
try:
|
||||
import bsddb
|
||||
except ImportError:
|
||||
# prevent a second import of this module from spuriously succeeding
|
||||
del sys.modules[__name__]
|
||||
raise
|
||||
|
||||
__all__ = ["error","open"]
|
||||
|
||||
error = bsddb.error # Exported for anydbm
|
||||
|
||||
def open(file, flag = 'r', mode=0666):
|
||||
return bsddb.hashopen(file, flag, mode)
|
||||
@@ -1,4 +1,4 @@
|
||||
__version__ = '0.6.2'
|
||||
__version__ = '0.6.5'
|
||||
|
||||
from .lock import Lock # noqa
|
||||
from .lock import NeedRegenerationException # noqa
|
||||
from .lock import NeedRegenerationException # noqa
|
||||
|
||||
@@ -13,6 +13,13 @@ class NoValue(object):
|
||||
def payload(self):
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
"""Ensure __repr__ is a consistent value in case NoValue is used to
|
||||
fill another cache key.
|
||||
|
||||
"""
|
||||
return '<dogpile.cache.api.NoValue object>'
|
||||
|
||||
if py3k:
|
||||
def __bool__(self): # pragma NO COVERAGE
|
||||
return False
|
||||
@@ -20,6 +27,7 @@ class NoValue(object):
|
||||
def __nonzero__(self): # pragma NO COVERAGE
|
||||
return False
|
||||
|
||||
|
||||
NO_VALUE = NoValue()
|
||||
"""Value returned from ``get()`` that describes
|
||||
a key not present."""
|
||||
|
||||
@@ -15,3 +15,11 @@ class RegionNotConfigured(DogpileCacheException):
|
||||
|
||||
class ValidationError(DogpileCacheException):
|
||||
"""Error validating a value or option."""
|
||||
|
||||
|
||||
class PluginNotFound(DogpileCacheException):
|
||||
"""The specified plugin could not be found.
|
||||
|
||||
.. versionadded:: 0.6.4
|
||||
|
||||
"""
|
||||
|
||||
+35
-5
@@ -410,7 +410,13 @@ class CacheRegion(object):
|
||||
"configured with backend: %s. "
|
||||
"Specify replace_existing_backend=True to replace."
|
||||
% self.backend)
|
||||
backend_cls = _backend_loader.load(backend)
|
||||
|
||||
try:
|
||||
backend_cls = _backend_loader.load(backend)
|
||||
except PluginLoader.NotFound:
|
||||
raise exception.PluginNotFound(
|
||||
"Couldn't find cache plugin to load: %s" % backend)
|
||||
|
||||
if _config_argument_dict:
|
||||
self.backend = backend_cls.from_config_dict(
|
||||
_config_argument_dict,
|
||||
@@ -487,8 +493,19 @@ class CacheRegion(object):
|
||||
a value. Any retrieved value whose creation
|
||||
time is prior to this timestamp
|
||||
is considered to be stale. It does not
|
||||
affect the data in the cache in any way, and is also
|
||||
local to this instance of :class:`.CacheRegion`.
|
||||
affect the data in the cache in any way, and is
|
||||
**local to this instance of :class:`.CacheRegion`.**
|
||||
|
||||
.. warning::
|
||||
|
||||
The :meth:`.CacheRegion.invalidate` method's default mode of
|
||||
operation is to set a timestamp **local to this CacheRegion
|
||||
in this Python process only**. It does not impact other Python
|
||||
processes or regions as the timestamp is **only stored locally in
|
||||
memory**. To implement invalidation where the
|
||||
timestamp is stored in the cache or similar so that all Python
|
||||
processes can be affected by an invalidation timestamp, implement a
|
||||
custom :class:`.RegionInvalidationStrategy`.
|
||||
|
||||
Once set, the invalidation time is honored by
|
||||
the :meth:`.CacheRegion.get_or_create`,
|
||||
@@ -550,6 +567,8 @@ class CacheRegion(object):
|
||||
_config_prefix="%sarguments." % prefix,
|
||||
wrap=config_dict.get(
|
||||
"%swrap" % prefix, None),
|
||||
replace_existing_backend=config_dict.get(
|
||||
"%sreplace_existing_backend" % prefix, False),
|
||||
)
|
||||
|
||||
@memoized_property
|
||||
@@ -944,11 +963,14 @@ class CacheRegion(object):
|
||||
if not should_cache_fn:
|
||||
self.backend.set_multi(values_w_created)
|
||||
else:
|
||||
self.backend.set_multi(dict(
|
||||
values_to_cache = dict(
|
||||
(k, v)
|
||||
for k, v in values_w_created.items()
|
||||
if should_cache_fn(v[0])
|
||||
))
|
||||
)
|
||||
|
||||
if values_to_cache:
|
||||
self.backend.set_multi(values_to_cache)
|
||||
|
||||
values.update(values_w_created)
|
||||
return [values[orig_to_mangled[k]].payload for k in keys]
|
||||
@@ -1075,6 +1097,14 @@ class CacheRegion(object):
|
||||
.. versionadded:: 0.5.0 Added ``refresh()`` method to decorated
|
||||
function.
|
||||
|
||||
``original()`` on other hand will invoke the decorated function
|
||||
without any caching::
|
||||
|
||||
newvalue = generate_something.original(5, 6)
|
||||
|
||||
.. versionadded:: 0.6.0 Added ``original()`` method to decorated
|
||||
function.
|
||||
|
||||
Lastly, the ``get()`` method returns either the value cached
|
||||
for the given key, or the token ``NO_VALUE`` if no such key
|
||||
exists::
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .nameregistry import NameRegistry # noqa
|
||||
from .readwrite_lock import ReadWriteMutex # noqa
|
||||
from .langhelpers import PluginLoader, memoized_property, \
|
||||
coerce_string_conf, to_list, KeyReentrantMutex # noqa
|
||||
coerce_string_conf, to_list, KeyReentrantMutex # noqa
|
||||
|
||||
@@ -39,9 +39,9 @@ class PluginLoader(object):
|
||||
self.impls[name] = impl.load
|
||||
return impl.load()
|
||||
else:
|
||||
raise Exception(
|
||||
"Can't load plugin %s %s" %
|
||||
(self.group, name))
|
||||
raise self.NotFound(
|
||||
"Can't load plugin %s %s" % (self.group, name)
|
||||
)
|
||||
|
||||
def register(self, name, modulepath, objname):
|
||||
def load():
|
||||
@@ -49,6 +49,9 @@ class PluginLoader(object):
|
||||
return getattr(mod, objname)
|
||||
self.impls[name] = load
|
||||
|
||||
class NotFound(Exception):
|
||||
"""The specified plugin could not be found."""
|
||||
|
||||
|
||||
class memoized_property(object):
|
||||
"""A read-only @property that is only evaluated once."""
|
||||
|
||||
@@ -0,0 +1,249 @@
|
||||
"""A dumb and slow but simple dbm clone.
|
||||
|
||||
For database spam, spam.dir contains the index (a text file),
|
||||
spam.bak *may* contain a backup of the index (also a text file),
|
||||
while spam.dat contains the data (a binary file).
|
||||
|
||||
XXX TO DO:
|
||||
|
||||
- seems to contain a bug when updating...
|
||||
|
||||
- reclaim free space (currently, space once occupied by deleted or expanded
|
||||
items is never reused)
|
||||
|
||||
- support concurrent access (currently, if two processes take turns making
|
||||
updates, they can mess up the index)
|
||||
|
||||
- support efficient access to large databases (currently, the whole index
|
||||
is read when the database is opened, and some updates rewrite the whole index)
|
||||
|
||||
- support opening for read-only (flag = 'm')
|
||||
|
||||
"""
|
||||
|
||||
import ast as _ast
|
||||
import os as _os
|
||||
import __builtin__
|
||||
import UserDict
|
||||
|
||||
_open = __builtin__.open
|
||||
|
||||
_BLOCKSIZE = 512
|
||||
|
||||
error = IOError # For anydbm
|
||||
|
||||
class _Database(UserDict.DictMixin):
|
||||
|
||||
# The on-disk directory and data files can remain in mutually
|
||||
# inconsistent states for an arbitrarily long time (see comments
|
||||
# at the end of __setitem__). This is only repaired when _commit()
|
||||
# gets called. One place _commit() gets called is from __del__(),
|
||||
# and if that occurs at program shutdown time, module globals may
|
||||
# already have gotten rebound to None. Since it's crucial that
|
||||
# _commit() finish successfully, we can't ignore shutdown races
|
||||
# here, and _commit() must not reference any globals.
|
||||
_os = _os # for _commit()
|
||||
_open = _open # for _commit()
|
||||
|
||||
def __init__(self, filebasename, mode):
|
||||
self._mode = mode
|
||||
|
||||
# The directory file is a text file. Each line looks like
|
||||
# "%r, (%d, %d)\n" % (key, pos, siz)
|
||||
# where key is the string key, pos is the offset into the dat
|
||||
# file of the associated value's first byte, and siz is the number
|
||||
# of bytes in the associated value.
|
||||
self._dirfile = filebasename + _os.extsep + 'dir'
|
||||
|
||||
# The data file is a binary file pointed into by the directory
|
||||
# file, and holds the values associated with keys. Each value
|
||||
# begins at a _BLOCKSIZE-aligned byte offset, and is a raw
|
||||
# binary 8-bit string value.
|
||||
self._datfile = filebasename + _os.extsep + 'dat'
|
||||
self._bakfile = filebasename + _os.extsep + 'bak'
|
||||
|
||||
# The index is an in-memory dict, mirroring the directory file.
|
||||
self._index = None # maps keys to (pos, siz) pairs
|
||||
|
||||
# Mod by Jack: create data file if needed
|
||||
try:
|
||||
f = _open(self._datfile, 'r')
|
||||
except IOError:
|
||||
with _open(self._datfile, 'w') as f:
|
||||
self._chmod(self._datfile)
|
||||
else:
|
||||
f.close()
|
||||
self._update()
|
||||
|
||||
# Read directory file into the in-memory index dict.
|
||||
def _update(self):
|
||||
self._index = {}
|
||||
try:
|
||||
f = _open(self._dirfile)
|
||||
except IOError:
|
||||
pass
|
||||
else:
|
||||
with f:
|
||||
for line in f:
|
||||
line = line.rstrip()
|
||||
key, pos_and_siz_pair = _ast.literal_eval(line)
|
||||
self._index[key] = pos_and_siz_pair
|
||||
|
||||
# Write the index dict to the directory file. The original directory
|
||||
# file (if any) is renamed with a .bak extension first. If a .bak
|
||||
# file currently exists, it's deleted.
|
||||
def _commit(self):
|
||||
# CAUTION: It's vital that _commit() succeed, and _commit() can
|
||||
# be called from __del__(). Therefore we must never reference a
|
||||
# global in this routine.
|
||||
if self._index is None:
|
||||
return # nothing to do
|
||||
|
||||
try:
|
||||
self._os.unlink(self._bakfile)
|
||||
except self._os.error:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._os.rename(self._dirfile, self._bakfile)
|
||||
except self._os.error:
|
||||
pass
|
||||
|
||||
with self._open(self._dirfile, 'w') as f:
|
||||
self._chmod(self._dirfile)
|
||||
for key, pos_and_siz_pair in self._index.iteritems():
|
||||
f.write("%r, %r\n" % (key, pos_and_siz_pair))
|
||||
|
||||
sync = _commit
|
||||
|
||||
def __getitem__(self, key):
|
||||
pos, siz = self._index[key] # may raise KeyError
|
||||
with _open(self._datfile, 'rb') as f:
|
||||
f.seek(pos)
|
||||
dat = f.read(siz)
|
||||
return dat
|
||||
|
||||
# Append val to the data file, starting at a _BLOCKSIZE-aligned
|
||||
# offset. The data file is first padded with NUL bytes (if needed)
|
||||
# to get to an aligned offset. Return pair
|
||||
# (starting offset of val, len(val))
|
||||
def _addval(self, val):
|
||||
with _open(self._datfile, 'rb+') as f:
|
||||
f.seek(0, 2)
|
||||
pos = int(f.tell())
|
||||
npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
|
||||
f.write('\0'*(npos-pos))
|
||||
pos = npos
|
||||
f.write(val)
|
||||
return (pos, len(val))
|
||||
|
||||
# Write val to the data file, starting at offset pos. The caller
|
||||
# is responsible for ensuring that there's enough room starting at
|
||||
# pos to hold val, without overwriting some other value. Return
|
||||
# pair (pos, len(val)).
|
||||
def _setval(self, pos, val):
|
||||
with _open(self._datfile, 'rb+') as f:
|
||||
f.seek(pos)
|
||||
f.write(val)
|
||||
return (pos, len(val))
|
||||
|
||||
# key is a new key whose associated value starts in the data file
|
||||
# at offset pos and with length siz. Add an index record to
|
||||
# the in-memory index dict, and append one to the directory file.
|
||||
def _addkey(self, key, pos_and_siz_pair):
|
||||
self._index[key] = pos_and_siz_pair
|
||||
with _open(self._dirfile, 'a') as f:
|
||||
self._chmod(self._dirfile)
|
||||
f.write("%r, %r\n" % (key, pos_and_siz_pair))
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
if not type(key) == type('') == type(val):
|
||||
raise TypeError, "keys and values must be strings"
|
||||
if key not in self._index:
|
||||
self._addkey(key, self._addval(val))
|
||||
else:
|
||||
# See whether the new value is small enough to fit in the
|
||||
# (padded) space currently occupied by the old value.
|
||||
pos, siz = self._index[key]
|
||||
oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
|
||||
newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
|
||||
if newblocks <= oldblocks:
|
||||
self._index[key] = self._setval(pos, val)
|
||||
else:
|
||||
# The new value doesn't fit in the (padded) space used
|
||||
# by the old value. The blocks used by the old value are
|
||||
# forever lost.
|
||||
self._index[key] = self._addval(val)
|
||||
|
||||
# Note that _index may be out of synch with the directory
|
||||
# file now: _setval() and _addval() don't update the directory
|
||||
# file. This also means that the on-disk directory and data
|
||||
# files are in a mutually inconsistent state, and they'll
|
||||
# remain that way until _commit() is called. Note that this
|
||||
# is a disaster (for the database) if the program crashes
|
||||
# (so that _commit() never gets called).
|
||||
|
||||
def __delitem__(self, key):
|
||||
# The blocks used by the associated value are lost.
|
||||
del self._index[key]
|
||||
# XXX It's unclear why we do a _commit() here (the code always
|
||||
# XXX has, so I'm not changing it). _setitem__ doesn't try to
|
||||
# XXX keep the directory file in synch. Why should we? Or
|
||||
# XXX why shouldn't __setitem__?
|
||||
self._commit()
|
||||
|
||||
def keys(self):
|
||||
return self._index.keys()
|
||||
|
||||
def has_key(self, key):
|
||||
return key in self._index
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self._index
|
||||
|
||||
def iterkeys(self):
|
||||
return self._index.iterkeys()
|
||||
__iter__ = iterkeys
|
||||
|
||||
def __len__(self):
|
||||
return len(self._index)
|
||||
|
||||
def close(self):
|
||||
try:
|
||||
self._commit()
|
||||
finally:
|
||||
self._index = self._datfile = self._dirfile = self._bakfile = None
|
||||
|
||||
__del__ = close
|
||||
|
||||
def _chmod (self, file):
|
||||
if hasattr(self._os, 'chmod'):
|
||||
self._os.chmod(file, self._mode)
|
||||
|
||||
|
||||
def open(file, flag=None, mode=0666):
|
||||
"""Open the database file, filename, and return corresponding object.
|
||||
|
||||
The flag argument, used to control how the database is opened in the
|
||||
other DBM implementations, is ignored in the dumbdbm module; the
|
||||
database is always opened for update, and will be created if it does
|
||||
not exist.
|
||||
|
||||
The optional mode argument is the UNIX mode of the file, used only when
|
||||
the database has to be created. It defaults to octal code 0666 (and
|
||||
will be modified by the prevailing umask).
|
||||
|
||||
"""
|
||||
# flag argument is currently ignored
|
||||
|
||||
# Modify mode depending on the umask
|
||||
try:
|
||||
um = _os.umask(0)
|
||||
_os.umask(um)
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
# Turn off any bits that are set in the umask
|
||||
mode = mode & (~um)
|
||||
|
||||
return _Database(file, mode)
|
||||
@@ -0,0 +1,32 @@
|
||||
Copyright (c) 2013, Ethan Furman.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
Redistributions of source code must retain the above
|
||||
copyright notice, this list of conditions and the
|
||||
following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
Neither the name Ethan Furman nor the names of any
|
||||
contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -0,0 +1,3 @@
|
||||
enum34 is the new Python stdlib enum module available in Python 3.4
|
||||
backported for previous versions of Python from 2.4 to 3.3.
|
||||
tested on 2.6, 2.7, and 3.3+
|
||||
@@ -0,0 +1,837 @@
|
||||
"""Python Enumerations"""
|
||||
|
||||
import sys as _sys
|
||||
|
||||
__all__ = ['Enum', 'IntEnum', 'unique']
|
||||
|
||||
version = 1, 1, 6
|
||||
|
||||
pyver = float('%s.%s' % _sys.version_info[:2])
|
||||
|
||||
try:
|
||||
any
|
||||
except NameError:
|
||||
def any(iterable):
|
||||
for element in iterable:
|
||||
if element:
|
||||
return True
|
||||
return False
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
OrderedDict = None
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
# In Python 2 basestring is the ancestor of both str and unicode
|
||||
# in Python 3 it's just str, but was missing in 3.1
|
||||
basestring = str
|
||||
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
# In Python 3 unicode no longer exists (it's just str)
|
||||
unicode = str
|
||||
|
||||
class _RouteClassAttributeToGetattr(object):
|
||||
"""Route attribute access on a class to __getattr__.
|
||||
|
||||
This is a descriptor, used to define attributes that act differently when
|
||||
accessed through an instance and through a class. Instance access remains
|
||||
normal, but access to an attribute through a class will be routed to the
|
||||
class's __getattr__ method; this is done by raising AttributeError.
|
||||
|
||||
"""
|
||||
def __init__(self, fget=None):
|
||||
self.fget = fget
|
||||
|
||||
def __get__(self, instance, ownerclass=None):
|
||||
if instance is None:
|
||||
raise AttributeError()
|
||||
return self.fget(instance)
|
||||
|
||||
def __set__(self, instance, value):
|
||||
raise AttributeError("can't set attribute")
|
||||
|
||||
def __delete__(self, instance):
|
||||
raise AttributeError("can't delete attribute")
|
||||
|
||||
|
||||
def _is_descriptor(obj):
|
||||
"""Returns True if obj is a descriptor, False otherwise."""
|
||||
return (
|
||||
hasattr(obj, '__get__') or
|
||||
hasattr(obj, '__set__') or
|
||||
hasattr(obj, '__delete__'))
|
||||
|
||||
|
||||
def _is_dunder(name):
|
||||
"""Returns True if a __dunder__ name, False otherwise."""
|
||||
return (name[:2] == name[-2:] == '__' and
|
||||
name[2:3] != '_' and
|
||||
name[-3:-2] != '_' and
|
||||
len(name) > 4)
|
||||
|
||||
|
||||
def _is_sunder(name):
|
||||
"""Returns True if a _sunder_ name, False otherwise."""
|
||||
return (name[0] == name[-1] == '_' and
|
||||
name[1:2] != '_' and
|
||||
name[-2:-1] != '_' and
|
||||
len(name) > 2)
|
||||
|
||||
|
||||
def _make_class_unpicklable(cls):
|
||||
"""Make the given class un-picklable."""
|
||||
def _break_on_call_reduce(self, protocol=None):
|
||||
raise TypeError('%r cannot be pickled' % self)
|
||||
cls.__reduce_ex__ = _break_on_call_reduce
|
||||
cls.__module__ = '<unknown>'
|
||||
|
||||
|
||||
class _EnumDict(dict):
|
||||
"""Track enum member order and ensure member names are not reused.
|
||||
|
||||
EnumMeta will use the names found in self._member_names as the
|
||||
enumeration member names.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
super(_EnumDict, self).__init__()
|
||||
self._member_names = []
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""Changes anything not dundered or not a descriptor.
|
||||
|
||||
If a descriptor is added with the same name as an enum member, the name
|
||||
is removed from _member_names (this may leave a hole in the numerical
|
||||
sequence of values).
|
||||
|
||||
If an enum member name is used twice, an error is raised; duplicate
|
||||
values are not checked for.
|
||||
|
||||
Single underscore (sunder) names are reserved.
|
||||
|
||||
Note: in 3.x __order__ is simply discarded as a not necessary piece
|
||||
leftover from 2.x
|
||||
|
||||
"""
|
||||
if pyver >= 3.0 and key in ('_order_', '__order__'):
|
||||
return
|
||||
elif key == '__order__':
|
||||
key = '_order_'
|
||||
if _is_sunder(key):
|
||||
if key != '_order_':
|
||||
raise ValueError('_names_ are reserved for future Enum use')
|
||||
elif _is_dunder(key):
|
||||
pass
|
||||
elif key in self._member_names:
|
||||
# descriptor overwriting an enum?
|
||||
raise TypeError('Attempted to reuse key: %r' % key)
|
||||
elif not _is_descriptor(value):
|
||||
if key in self:
|
||||
# enum overwriting a descriptor?
|
||||
raise TypeError('Key already defined as: %r' % self[key])
|
||||
self._member_names.append(key)
|
||||
super(_EnumDict, self).__setitem__(key, value)
|
||||
|
||||
|
||||
# Dummy value for Enum as EnumMeta explicity checks for it, but of course until
|
||||
# EnumMeta finishes running the first time the Enum class doesn't exist. This
|
||||
# is also why there are checks in EnumMeta like `if Enum is not None`
|
||||
Enum = None
|
||||
|
||||
|
||||
class EnumMeta(type):
|
||||
"""Metaclass for Enum"""
|
||||
@classmethod
|
||||
def __prepare__(metacls, cls, bases):
|
||||
return _EnumDict()
|
||||
|
||||
def __new__(metacls, cls, bases, classdict):
|
||||
# an Enum class is final once enumeration items have been defined; it
|
||||
# cannot be mixed with other types (int, float, etc.) if it has an
|
||||
# inherited __new__ unless a new __new__ is defined (or the resulting
|
||||
# class will fail).
|
||||
if type(classdict) is dict:
|
||||
original_dict = classdict
|
||||
classdict = _EnumDict()
|
||||
for k, v in original_dict.items():
|
||||
classdict[k] = v
|
||||
|
||||
member_type, first_enum = metacls._get_mixins_(bases)
|
||||
__new__, save_new, use_args = metacls._find_new_(classdict, member_type,
|
||||
first_enum)
|
||||
# save enum items into separate mapping so they don't get baked into
|
||||
# the new class
|
||||
members = dict((k, classdict[k]) for k in classdict._member_names)
|
||||
for name in classdict._member_names:
|
||||
del classdict[name]
|
||||
|
||||
# py2 support for definition order
|
||||
_order_ = classdict.get('_order_')
|
||||
if _order_ is None:
|
||||
if pyver < 3.0:
|
||||
try:
|
||||
_order_ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])]
|
||||
except TypeError:
|
||||
_order_ = [name for name in sorted(members.keys())]
|
||||
else:
|
||||
_order_ = classdict._member_names
|
||||
else:
|
||||
del classdict['_order_']
|
||||
if pyver < 3.0:
|
||||
_order_ = _order_.replace(',', ' ').split()
|
||||
aliases = [name for name in members if name not in _order_]
|
||||
_order_ += aliases
|
||||
|
||||
# check for illegal enum names (any others?)
|
||||
invalid_names = set(members) & set(['mro'])
|
||||
if invalid_names:
|
||||
raise ValueError('Invalid enum member name(s): %s' % (
|
||||
', '.join(invalid_names), ))
|
||||
|
||||
# save attributes from super classes so we know if we can take
|
||||
# the shortcut of storing members in the class dict
|
||||
base_attributes = set([a for b in bases for a in b.__dict__])
|
||||
# create our new Enum type
|
||||
enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict)
|
||||
enum_class._member_names_ = [] # names in random order
|
||||
if OrderedDict is not None:
|
||||
enum_class._member_map_ = OrderedDict()
|
||||
else:
|
||||
enum_class._member_map_ = {} # name->value map
|
||||
enum_class._member_type_ = member_type
|
||||
|
||||
# Reverse value->name map for hashable values.
|
||||
enum_class._value2member_map_ = {}
|
||||
|
||||
# instantiate them, checking for duplicates as we go
|
||||
# we instantiate first instead of checking for duplicates first in case
|
||||
# a custom __new__ is doing something funky with the values -- such as
|
||||
# auto-numbering ;)
|
||||
if __new__ is None:
|
||||
__new__ = enum_class.__new__
|
||||
for member_name in _order_:
|
||||
value = members[member_name]
|
||||
if not isinstance(value, tuple):
|
||||
args = (value, )
|
||||
else:
|
||||
args = value
|
||||
if member_type is tuple: # special case for tuple enums
|
||||
args = (args, ) # wrap it one more time
|
||||
if not use_args or not args:
|
||||
enum_member = __new__(enum_class)
|
||||
if not hasattr(enum_member, '_value_'):
|
||||
enum_member._value_ = value
|
||||
else:
|
||||
enum_member = __new__(enum_class, *args)
|
||||
if not hasattr(enum_member, '_value_'):
|
||||
enum_member._value_ = member_type(*args)
|
||||
value = enum_member._value_
|
||||
enum_member._name_ = member_name
|
||||
enum_member.__objclass__ = enum_class
|
||||
enum_member.__init__(*args)
|
||||
# If another member with the same value was already defined, the
|
||||
# new member becomes an alias to the existing one.
|
||||
for name, canonical_member in enum_class._member_map_.items():
|
||||
if canonical_member.value == enum_member._value_:
|
||||
enum_member = canonical_member
|
||||
break
|
||||
else:
|
||||
# Aliases don't appear in member names (only in __members__).
|
||||
enum_class._member_names_.append(member_name)
|
||||
# performance boost for any member that would not shadow
|
||||
# a DynamicClassAttribute (aka _RouteClassAttributeToGetattr)
|
||||
if member_name not in base_attributes:
|
||||
setattr(enum_class, member_name, enum_member)
|
||||
# now add to _member_map_
|
||||
enum_class._member_map_[member_name] = enum_member
|
||||
try:
|
||||
# This may fail if value is not hashable. We can't add the value
|
||||
# to the map, and by-value lookups for this value will be
|
||||
# linear.
|
||||
enum_class._value2member_map_[value] = enum_member
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
|
||||
# If a custom type is mixed into the Enum, and it does not know how
|
||||
# to pickle itself, pickle.dumps will succeed but pickle.loads will
|
||||
# fail. Rather than have the error show up later and possibly far
|
||||
# from the source, sabotage the pickle protocol for this class so
|
||||
# that pickle.dumps also fails.
|
||||
#
|
||||
# However, if the new class implements its own __reduce_ex__, do not
|
||||
# sabotage -- it's on them to make sure it works correctly. We use
|
||||
# __reduce_ex__ instead of any of the others as it is preferred by
|
||||
# pickle over __reduce__, and it handles all pickle protocols.
|
||||
unpicklable = False
|
||||
if '__reduce_ex__' not in classdict:
|
||||
if member_type is not object:
|
||||
methods = ('__getnewargs_ex__', '__getnewargs__',
|
||||
'__reduce_ex__', '__reduce__')
|
||||
if not any(m in member_type.__dict__ for m in methods):
|
||||
_make_class_unpicklable(enum_class)
|
||||
unpicklable = True
|
||||
|
||||
|
||||
# double check that repr and friends are not the mixin's or various
|
||||
# things break (such as pickle)
|
||||
for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'):
|
||||
class_method = getattr(enum_class, name)
|
||||
obj_method = getattr(member_type, name, None)
|
||||
enum_method = getattr(first_enum, name, None)
|
||||
if name not in classdict and class_method is not enum_method:
|
||||
if name == '__reduce_ex__' and unpicklable:
|
||||
continue
|
||||
setattr(enum_class, name, enum_method)
|
||||
|
||||
# method resolution and int's are not playing nice
|
||||
# Python's less than 2.6 use __cmp__
|
||||
|
||||
if pyver < 2.6:
|
||||
|
||||
if issubclass(enum_class, int):
|
||||
setattr(enum_class, '__cmp__', getattr(int, '__cmp__'))
|
||||
|
||||
elif pyver < 3.0:
|
||||
|
||||
if issubclass(enum_class, int):
|
||||
for method in (
|
||||
'__le__',
|
||||
'__lt__',
|
||||
'__gt__',
|
||||
'__ge__',
|
||||
'__eq__',
|
||||
'__ne__',
|
||||
'__hash__',
|
||||
):
|
||||
setattr(enum_class, method, getattr(int, method))
|
||||
|
||||
# replace any other __new__ with our own (as long as Enum is not None,
|
||||
# anyway) -- again, this is to support pickle
|
||||
if Enum is not None:
|
||||
# if the user defined their own __new__, save it before it gets
|
||||
# clobbered in case they subclass later
|
||||
if save_new:
|
||||
setattr(enum_class, '__member_new__', enum_class.__dict__['__new__'])
|
||||
setattr(enum_class, '__new__', Enum.__dict__['__new__'])
|
||||
return enum_class
|
||||
|
||||
def __bool__(cls):
|
||||
"""
|
||||
classes/types should always be True.
|
||||
"""
|
||||
return True
|
||||
|
||||
def __call__(cls, value, names=None, module=None, type=None, start=1):
|
||||
"""Either returns an existing member, or creates a new enum class.
|
||||
|
||||
This method is used both when an enum class is given a value to match
|
||||
to an enumeration member (i.e. Color(3)) and for the functional API
|
||||
(i.e. Color = Enum('Color', names='red green blue')).
|
||||
|
||||
When used for the functional API: `module`, if set, will be stored in
|
||||
the new class' __module__ attribute; `type`, if set, will be mixed in
|
||||
as the first base class.
|
||||
|
||||
Note: if `module` is not set this routine will attempt to discover the
|
||||
calling module by walking the frame stack; if this is unsuccessful
|
||||
the resulting class will not be pickleable.
|
||||
|
||||
"""
|
||||
if names is None: # simple value lookup
|
||||
return cls.__new__(cls, value)
|
||||
# otherwise, functional API: we're creating a new Enum type
|
||||
return cls._create_(value, names, module=module, type=type, start=start)
|
||||
|
||||
def __contains__(cls, member):
|
||||
return isinstance(member, cls) and member.name in cls._member_map_
|
||||
|
||||
def __delattr__(cls, attr):
|
||||
# nicer error message when someone tries to delete an attribute
|
||||
# (see issue19025).
|
||||
if attr in cls._member_map_:
|
||||
raise AttributeError(
|
||||
"%s: cannot delete Enum member." % cls.__name__)
|
||||
super(EnumMeta, cls).__delattr__(attr)
|
||||
|
||||
def __dir__(self):
|
||||
return (['__class__', '__doc__', '__members__', '__module__'] +
|
||||
self._member_names_)
|
||||
|
||||
@property
|
||||
def __members__(cls):
|
||||
"""Returns a mapping of member name->value.
|
||||
|
||||
This mapping lists all enum members, including aliases. Note that this
|
||||
is a copy of the internal mapping.
|
||||
|
||||
"""
|
||||
return cls._member_map_.copy()
|
||||
|
||||
def __getattr__(cls, name):
|
||||
"""Return the enum member matching `name`
|
||||
|
||||
We use __getattr__ instead of descriptors or inserting into the enum
|
||||
class' __dict__ in order to support `name` and `value` being both
|
||||
properties for enum members (which live in the class' __dict__) and
|
||||
enum members themselves.
|
||||
|
||||
"""
|
||||
if _is_dunder(name):
|
||||
raise AttributeError(name)
|
||||
try:
|
||||
return cls._member_map_[name]
|
||||
except KeyError:
|
||||
raise AttributeError(name)
|
||||
|
||||
def __getitem__(cls, name):
|
||||
return cls._member_map_[name]
|
||||
|
||||
def __iter__(cls):
|
||||
return (cls._member_map_[name] for name in cls._member_names_)
|
||||
|
||||
def __reversed__(cls):
|
||||
return (cls._member_map_[name] for name in reversed(cls._member_names_))
|
||||
|
||||
def __len__(cls):
|
||||
return len(cls._member_names_)
|
||||
|
||||
__nonzero__ = __bool__
|
||||
|
||||
def __repr__(cls):
|
||||
return "<enum %r>" % cls.__name__
|
||||
|
||||
def __setattr__(cls, name, value):
|
||||
"""Block attempts to reassign Enum members.
|
||||
|
||||
A simple assignment to the class namespace only changes one of the
|
||||
several possible ways to get an Enum member from the Enum class,
|
||||
resulting in an inconsistent Enumeration.
|
||||
|
||||
"""
|
||||
member_map = cls.__dict__.get('_member_map_', {})
|
||||
if name in member_map:
|
||||
raise AttributeError('Cannot reassign members.')
|
||||
super(EnumMeta, cls).__setattr__(name, value)
|
||||
|
||||
def _create_(cls, class_name, names=None, module=None, type=None, start=1):
|
||||
"""Convenience method to create a new Enum class.
|
||||
|
||||
`names` can be:
|
||||
|
||||
* A string containing member names, separated either with spaces or
|
||||
commas. Values are auto-numbered from 1.
|
||||
* An iterable of member names. Values are auto-numbered from 1.
|
||||
* An iterable of (member name, value) pairs.
|
||||
* A mapping of member name -> value.
|
||||
|
||||
"""
|
||||
if pyver < 3.0:
|
||||
# if class_name is unicode, attempt a conversion to ASCII
|
||||
if isinstance(class_name, unicode):
|
||||
try:
|
||||
class_name = class_name.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
raise TypeError('%r is not representable in ASCII' % class_name)
|
||||
metacls = cls.__class__
|
||||
if type is None:
|
||||
bases = (cls, )
|
||||
else:
|
||||
bases = (type, cls)
|
||||
classdict = metacls.__prepare__(class_name, bases)
|
||||
_order_ = []
|
||||
|
||||
# special processing needed for names?
|
||||
if isinstance(names, basestring):
|
||||
names = names.replace(',', ' ').split()
|
||||
if isinstance(names, (tuple, list)) and isinstance(names[0], basestring):
|
||||
names = [(e, i+start) for (i, e) in enumerate(names)]
|
||||
|
||||
# Here, names is either an iterable of (name, value) or a mapping.
|
||||
item = None # in case names is empty
|
||||
for item in names:
|
||||
if isinstance(item, basestring):
|
||||
member_name, member_value = item, names[item]
|
||||
else:
|
||||
member_name, member_value = item
|
||||
classdict[member_name] = member_value
|
||||
_order_.append(member_name)
|
||||
# only set _order_ in classdict if name/value was not from a mapping
|
||||
if not isinstance(item, basestring):
|
||||
classdict['_order_'] = ' '.join(_order_)
|
||||
enum_class = metacls.__new__(metacls, class_name, bases, classdict)
|
||||
|
||||
# TODO: replace the frame hack if a blessed way to know the calling
|
||||
# module is ever developed
|
||||
if module is None:
|
||||
try:
|
||||
module = _sys._getframe(2).f_globals['__name__']
|
||||
except (AttributeError, ValueError):
|
||||
pass
|
||||
if module is None:
|
||||
_make_class_unpicklable(enum_class)
|
||||
else:
|
||||
enum_class.__module__ = module
|
||||
|
||||
return enum_class
|
||||
|
||||
@staticmethod
|
||||
def _get_mixins_(bases):
|
||||
"""Returns the type for creating enum members, and the first inherited
|
||||
enum class.
|
||||
|
||||
bases: the tuple of bases that was given to __new__
|
||||
|
||||
"""
|
||||
if not bases or Enum is None:
|
||||
return object, Enum
|
||||
|
||||
|
||||
# double check that we are not subclassing a class with existing
|
||||
# enumeration members; while we're at it, see if any other data
|
||||
# type has been mixed in so we can use the correct __new__
|
||||
member_type = first_enum = None
|
||||
for base in bases:
|
||||
if (base is not Enum and
|
||||
issubclass(base, Enum) and
|
||||
base._member_names_):
|
||||
raise TypeError("Cannot extend enumerations")
|
||||
# base is now the last base in bases
|
||||
if not issubclass(base, Enum):
|
||||
raise TypeError("new enumerations must be created as "
|
||||
"`ClassName([mixin_type,] enum_type)`")
|
||||
|
||||
# get correct mix-in type (either mix-in type of Enum subclass, or
|
||||
# first base if last base is Enum)
|
||||
if not issubclass(bases[0], Enum):
|
||||
member_type = bases[0] # first data type
|
||||
first_enum = bases[-1] # enum type
|
||||
else:
|
||||
for base in bases[0].__mro__:
|
||||
# most common: (IntEnum, int, Enum, object)
|
||||
# possible: (<Enum 'AutoIntEnum'>, <Enum 'IntEnum'>,
|
||||
# <class 'int'>, <Enum 'Enum'>,
|
||||
# <class 'object'>)
|
||||
if issubclass(base, Enum):
|
||||
if first_enum is None:
|
||||
first_enum = base
|
||||
else:
|
||||
if member_type is None:
|
||||
member_type = base
|
||||
|
||||
return member_type, first_enum
|
||||
|
||||
if pyver < 3.0:
|
||||
@staticmethod
|
||||
def _find_new_(classdict, member_type, first_enum):
|
||||
"""Returns the __new__ to be used for creating the enum members.
|
||||
|
||||
classdict: the class dictionary given to __new__
|
||||
member_type: the data type whose __new__ will be used by default
|
||||
first_enum: enumeration to check for an overriding __new__
|
||||
|
||||
"""
|
||||
# now find the correct __new__, checking to see of one was defined
|
||||
# by the user; also check earlier enum classes in case a __new__ was
|
||||
# saved as __member_new__
|
||||
__new__ = classdict.get('__new__', None)
|
||||
if __new__:
|
||||
return None, True, True # __new__, save_new, use_args
|
||||
|
||||
N__new__ = getattr(None, '__new__')
|
||||
O__new__ = getattr(object, '__new__')
|
||||
if Enum is None:
|
||||
E__new__ = N__new__
|
||||
else:
|
||||
E__new__ = Enum.__dict__['__new__']
|
||||
# check all possibles for __member_new__ before falling back to
|
||||
# __new__
|
||||
for method in ('__member_new__', '__new__'):
|
||||
for possible in (member_type, first_enum):
|
||||
try:
|
||||
target = possible.__dict__[method]
|
||||
except (AttributeError, KeyError):
|
||||
target = getattr(possible, method, None)
|
||||
if target not in [
|
||||
None,
|
||||
N__new__,
|
||||
O__new__,
|
||||
E__new__,
|
||||
]:
|
||||
if method == '__member_new__':
|
||||
classdict['__new__'] = target
|
||||
return None, False, True
|
||||
if isinstance(target, staticmethod):
|
||||
target = target.__get__(member_type)
|
||||
__new__ = target
|
||||
break
|
||||
if __new__ is not None:
|
||||
break
|
||||
else:
|
||||
__new__ = object.__new__
|
||||
|
||||
# if a non-object.__new__ is used then whatever value/tuple was
|
||||
# assigned to the enum member name will be passed to __new__ and to the
|
||||
# new enum member's __init__
|
||||
if __new__ is object.__new__:
|
||||
use_args = False
|
||||
else:
|
||||
use_args = True
|
||||
|
||||
return __new__, False, use_args
|
||||
else:
|
||||
@staticmethod
|
||||
def _find_new_(classdict, member_type, first_enum):
|
||||
"""Returns the __new__ to be used for creating the enum members.
|
||||
|
||||
classdict: the class dictionary given to __new__
|
||||
member_type: the data type whose __new__ will be used by default
|
||||
first_enum: enumeration to check for an overriding __new__
|
||||
|
||||
"""
|
||||
# now find the correct __new__, checking to see of one was defined
|
||||
# by the user; also check earlier enum classes in case a __new__ was
|
||||
# saved as __member_new__
|
||||
__new__ = classdict.get('__new__', None)
|
||||
|
||||
# should __new__ be saved as __member_new__ later?
|
||||
save_new = __new__ is not None
|
||||
|
||||
if __new__ is None:
|
||||
# check all possibles for __member_new__ before falling back to
|
||||
# __new__
|
||||
for method in ('__member_new__', '__new__'):
|
||||
for possible in (member_type, first_enum):
|
||||
target = getattr(possible, method, None)
|
||||
if target not in (
|
||||
None,
|
||||
None.__new__,
|
||||
object.__new__,
|
||||
Enum.__new__,
|
||||
):
|
||||
__new__ = target
|
||||
break
|
||||
if __new__ is not None:
|
||||
break
|
||||
else:
|
||||
__new__ = object.__new__
|
||||
|
||||
# if a non-object.__new__ is used then whatever value/tuple was
|
||||
# assigned to the enum member name will be passed to __new__ and to the
|
||||
# new enum member's __init__
|
||||
if __new__ is object.__new__:
|
||||
use_args = False
|
||||
else:
|
||||
use_args = True
|
||||
|
||||
return __new__, save_new, use_args
|
||||
|
||||
|
||||
########################################################
|
||||
# In order to support Python 2 and 3 with a single
|
||||
# codebase we have to create the Enum methods separately
|
||||
# and then use the `type(name, bases, dict)` method to
|
||||
# create the class.
|
||||
########################################################
|
||||
temp_enum_dict = {}
|
||||
temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n"
|
||||
|
||||
def __new__(cls, value):
|
||||
# all enum instances are actually created during class construction
|
||||
# without calling this method; this method is called by the metaclass'
|
||||
# __call__ (i.e. Color(3) ), and by pickle
|
||||
if type(value) is cls:
|
||||
# For lookups like Color(Color.red)
|
||||
value = value.value
|
||||
#return value
|
||||
# by-value search for a matching enum member
|
||||
# see if it's in the reverse mapping (for hashable values)
|
||||
try:
|
||||
if value in cls._value2member_map_:
|
||||
return cls._value2member_map_[value]
|
||||
except TypeError:
|
||||
# not there, now do long search -- O(n) behavior
|
||||
for member in cls._member_map_.values():
|
||||
if member.value == value:
|
||||
return member
|
||||
raise ValueError("%s is not a valid %s" % (value, cls.__name__))
|
||||
temp_enum_dict['__new__'] = __new__
|
||||
del __new__
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s.%s: %r>" % (
|
||||
self.__class__.__name__, self._name_, self._value_)
|
||||
temp_enum_dict['__repr__'] = __repr__
|
||||
del __repr__
|
||||
|
||||
def __str__(self):
|
||||
return "%s.%s" % (self.__class__.__name__, self._name_)
|
||||
temp_enum_dict['__str__'] = __str__
|
||||
del __str__
|
||||
|
||||
if pyver >= 3.0:
|
||||
def __dir__(self):
|
||||
added_behavior = [
|
||||
m
|
||||
for cls in self.__class__.mro()
|
||||
for m in cls.__dict__
|
||||
if m[0] != '_' and m not in self._member_map_
|
||||
]
|
||||
return (['__class__', '__doc__', '__module__', ] + added_behavior)
|
||||
temp_enum_dict['__dir__'] = __dir__
|
||||
del __dir__
|
||||
|
||||
def __format__(self, format_spec):
|
||||
# mixed-in Enums should use the mixed-in type's __format__, otherwise
|
||||
# we can get strange results with the Enum name showing up instead of
|
||||
# the value
|
||||
|
||||
# pure Enum branch
|
||||
if self._member_type_ is object:
|
||||
cls = str
|
||||
val = str(self)
|
||||
# mix-in branch
|
||||
else:
|
||||
cls = self._member_type_
|
||||
val = self.value
|
||||
return cls.__format__(val, format_spec)
|
||||
temp_enum_dict['__format__'] = __format__
|
||||
del __format__
|
||||
|
||||
|
||||
####################################
|
||||
# Python's less than 2.6 use __cmp__
|
||||
|
||||
if pyver < 2.6:
|
||||
|
||||
def __cmp__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
if self is other:
|
||||
return 0
|
||||
return -1
|
||||
return NotImplemented
|
||||
raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__cmp__'] = __cmp__
|
||||
del __cmp__
|
||||
|
||||
else:
|
||||
|
||||
def __le__(self, other):
|
||||
raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__le__'] = __le__
|
||||
del __le__
|
||||
|
||||
def __lt__(self, other):
|
||||
raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__lt__'] = __lt__
|
||||
del __lt__
|
||||
|
||||
def __ge__(self, other):
|
||||
raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__ge__'] = __ge__
|
||||
del __ge__
|
||||
|
||||
def __gt__(self, other):
|
||||
raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__gt__'] = __gt__
|
||||
del __gt__
|
||||
|
||||
|
||||
def __eq__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
return self is other
|
||||
return NotImplemented
|
||||
temp_enum_dict['__eq__'] = __eq__
|
||||
del __eq__
|
||||
|
||||
def __ne__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
return self is not other
|
||||
return NotImplemented
|
||||
temp_enum_dict['__ne__'] = __ne__
|
||||
del __ne__
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self._name_)
|
||||
temp_enum_dict['__hash__'] = __hash__
|
||||
del __hash__
|
||||
|
||||
def __reduce_ex__(self, proto):
|
||||
return self.__class__, (self._value_, )
|
||||
temp_enum_dict['__reduce_ex__'] = __reduce_ex__
|
||||
del __reduce_ex__
|
||||
|
||||
# _RouteClassAttributeToGetattr is used to provide access to the `name`
|
||||
# and `value` properties of enum members while keeping some measure of
|
||||
# protection from modification, while still allowing for an enumeration
|
||||
# to have members named `name` and `value`. This works because enumeration
|
||||
# members are not set directly on the enum class -- __getattr__ is
|
||||
# used to look them up.
|
||||
|
||||
@_RouteClassAttributeToGetattr
|
||||
def name(self):
|
||||
return self._name_
|
||||
temp_enum_dict['name'] = name
|
||||
del name
|
||||
|
||||
@_RouteClassAttributeToGetattr
|
||||
def value(self):
|
||||
return self._value_
|
||||
temp_enum_dict['value'] = value
|
||||
del value
|
||||
|
||||
@classmethod
|
||||
def _convert(cls, name, module, filter, source=None):
|
||||
"""
|
||||
Create a new Enum subclass that replaces a collection of global constants
|
||||
"""
|
||||
# convert all constants from source (or module) that pass filter() to
|
||||
# a new Enum called name, and export the enum and its members back to
|
||||
# module;
|
||||
# also, replace the __reduce_ex__ method so unpickling works in
|
||||
# previous Python versions
|
||||
module_globals = vars(_sys.modules[module])
|
||||
if source:
|
||||
source = vars(source)
|
||||
else:
|
||||
source = module_globals
|
||||
members = dict((name, value) for name, value in source.items() if filter(name))
|
||||
cls = cls(name, members, module=module)
|
||||
cls.__reduce_ex__ = _reduce_ex_by_name
|
||||
module_globals.update(cls.__members__)
|
||||
module_globals[name] = cls
|
||||
return cls
|
||||
temp_enum_dict['_convert'] = _convert
|
||||
del _convert
|
||||
|
||||
Enum = EnumMeta('Enum', (object, ), temp_enum_dict)
|
||||
del temp_enum_dict
|
||||
|
||||
# Enum has now been created
|
||||
###########################
|
||||
|
||||
class IntEnum(int, Enum):
|
||||
"""Enum where members are also (and must be) ints"""
|
||||
|
||||
def _reduce_ex_by_name(self, proto):
|
||||
return self.name
|
||||
|
||||
def unique(enumeration):
|
||||
"""Class decorator that ensures only unique members exist in an enumeration."""
|
||||
duplicates = []
|
||||
for name, member in enumeration.__members__.items():
|
||||
if name != member.name:
|
||||
duplicates.append((name, member.name))
|
||||
if duplicates:
|
||||
duplicate_names = ', '.join(
|
||||
["%s -> %s" % (alias, name) for (alias, name) in duplicates]
|
||||
)
|
||||
raise ValueError('duplicate names found in %r: %s' %
|
||||
(enumeration, duplicate_names)
|
||||
)
|
||||
return enumeration
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,735 @@
|
||||
``enum`` --- support for enumerations
|
||||
========================================
|
||||
|
||||
.. :synopsis: enumerations are sets of symbolic names bound to unique, constant
|
||||
values.
|
||||
.. :moduleauthor:: Ethan Furman <ethan@stoneleaf.us>
|
||||
.. :sectionauthor:: Barry Warsaw <barry@python.org>,
|
||||
.. :sectionauthor:: Eli Bendersky <eliben@gmail.com>,
|
||||
.. :sectionauthor:: Ethan Furman <ethan@stoneleaf.us>
|
||||
|
||||
----------------
|
||||
|
||||
An enumeration is a set of symbolic names (members) bound to unique, constant
|
||||
values. Within an enumeration, the members can be compared by identity, and
|
||||
the enumeration itself can be iterated over.
|
||||
|
||||
|
||||
Module Contents
|
||||
---------------
|
||||
|
||||
This module defines two enumeration classes that can be used to define unique
|
||||
sets of names and values: ``Enum`` and ``IntEnum``. It also defines
|
||||
one decorator, ``unique``.
|
||||
|
||||
``Enum``
|
||||
|
||||
Base class for creating enumerated constants. See section `Functional API`_
|
||||
for an alternate construction syntax.
|
||||
|
||||
``IntEnum``
|
||||
|
||||
Base class for creating enumerated constants that are also subclasses of ``int``.
|
||||
|
||||
``unique``
|
||||
|
||||
Enum class decorator that ensures only one name is bound to any one value.
|
||||
|
||||
|
||||
Creating an Enum
|
||||
----------------
|
||||
|
||||
Enumerations are created using the ``class`` syntax, which makes them
|
||||
easy to read and write. An alternative creation method is described in
|
||||
`Functional API`_. To define an enumeration, subclass ``Enum`` as
|
||||
follows::
|
||||
|
||||
>>> from enum import Enum
|
||||
>>> class Color(Enum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
... blue = 3
|
||||
|
||||
Note: Nomenclature
|
||||
|
||||
- The class ``Color`` is an *enumeration* (or *enum*)
|
||||
- The attributes ``Color.red``, ``Color.green``, etc., are
|
||||
*enumeration members* (or *enum members*).
|
||||
- The enum members have *names* and *values* (the name of
|
||||
``Color.red`` is ``red``, the value of ``Color.blue`` is
|
||||
``3``, etc.)
|
||||
|
||||
Note:
|
||||
|
||||
Even though we use the ``class`` syntax to create Enums, Enums
|
||||
are not normal Python classes. See `How are Enums different?`_ for
|
||||
more details.
|
||||
|
||||
Enumeration members have human readable string representations::
|
||||
|
||||
>>> print(Color.red)
|
||||
Color.red
|
||||
|
||||
...while their ``repr`` has more information::
|
||||
|
||||
>>> print(repr(Color.red))
|
||||
<Color.red: 1>
|
||||
|
||||
The *type* of an enumeration member is the enumeration it belongs to::
|
||||
|
||||
>>> type(Color.red)
|
||||
<enum 'Color'>
|
||||
>>> isinstance(Color.green, Color)
|
||||
True
|
||||
>>>
|
||||
|
||||
Enum members also have a property that contains just their item name::
|
||||
|
||||
>>> print(Color.red.name)
|
||||
red
|
||||
|
||||
Enumerations support iteration. In Python 3.x definition order is used; in
|
||||
Python 2.x the definition order is not available, but class attribute
|
||||
``__order__`` is supported; otherwise, value order is used::
|
||||
|
||||
>>> class Shake(Enum):
|
||||
... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x
|
||||
... vanilla = 7
|
||||
... chocolate = 4
|
||||
... cookies = 9
|
||||
... mint = 3
|
||||
...
|
||||
>>> for shake in Shake:
|
||||
... print(shake)
|
||||
...
|
||||
Shake.vanilla
|
||||
Shake.chocolate
|
||||
Shake.cookies
|
||||
Shake.mint
|
||||
|
||||
The ``__order__`` attribute is always removed, and in 3.x it is also ignored
|
||||
(order is definition order); however, in the stdlib version it will be ignored
|
||||
but not removed.
|
||||
|
||||
Enumeration members are hashable, so they can be used in dictionaries and sets::
|
||||
|
||||
>>> apples = {}
|
||||
>>> apples[Color.red] = 'red delicious'
|
||||
>>> apples[Color.green] = 'granny smith'
|
||||
>>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'}
|
||||
True
|
||||
|
||||
|
||||
Programmatic access to enumeration members and their attributes
|
||||
---------------------------------------------------------------
|
||||
|
||||
Sometimes it's useful to access members in enumerations programmatically (i.e.
|
||||
situations where ``Color.red`` won't do because the exact color is not known
|
||||
at program-writing time). ``Enum`` allows such access::
|
||||
|
||||
>>> Color(1)
|
||||
<Color.red: 1>
|
||||
>>> Color(3)
|
||||
<Color.blue: 3>
|
||||
|
||||
If you want to access enum members by *name*, use item access::
|
||||
|
||||
>>> Color['red']
|
||||
<Color.red: 1>
|
||||
>>> Color['green']
|
||||
<Color.green: 2>
|
||||
|
||||
If have an enum member and need its ``name`` or ``value``::
|
||||
|
||||
>>> member = Color.red
|
||||
>>> member.name
|
||||
'red'
|
||||
>>> member.value
|
||||
1
|
||||
|
||||
|
||||
Duplicating enum members and values
|
||||
-----------------------------------
|
||||
|
||||
Having two enum members (or any other attribute) with the same name is invalid;
|
||||
in Python 3.x this would raise an error, but in Python 2.x the second member
|
||||
simply overwrites the first::
|
||||
|
||||
>>> # python 2.x
|
||||
>>> class Shape(Enum):
|
||||
... square = 2
|
||||
... square = 3
|
||||
...
|
||||
>>> Shape.square
|
||||
<Shape.square: 3>
|
||||
|
||||
>>> # python 3.x
|
||||
>>> class Shape(Enum):
|
||||
... square = 2
|
||||
... square = 3
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: Attempted to reuse key: 'square'
|
||||
|
||||
However, two enum members are allowed to have the same value. Given two members
|
||||
A and B with the same value (and A defined first), B is an alias to A. By-value
|
||||
lookup of the value of A and B will return A. By-name lookup of B will also
|
||||
return A::
|
||||
|
||||
>>> class Shape(Enum):
|
||||
... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x
|
||||
... square = 2
|
||||
... diamond = 1
|
||||
... circle = 3
|
||||
... alias_for_square = 2
|
||||
...
|
||||
>>> Shape.square
|
||||
<Shape.square: 2>
|
||||
>>> Shape.alias_for_square
|
||||
<Shape.square: 2>
|
||||
>>> Shape(2)
|
||||
<Shape.square: 2>
|
||||
|
||||
|
||||
Allowing aliases is not always desirable. ``unique`` can be used to ensure
|
||||
that none exist in a particular enumeration::
|
||||
|
||||
>>> from enum import unique
|
||||
>>> @unique
|
||||
... class Mistake(Enum):
|
||||
... __order__ = 'one two three four' # only needed in 2.x
|
||||
... one = 1
|
||||
... two = 2
|
||||
... three = 3
|
||||
... four = 3
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: duplicate names found in <enum 'Mistake'>: four -> three
|
||||
|
||||
Iterating over the members of an enum does not provide the aliases::
|
||||
|
||||
>>> list(Shape)
|
||||
[<Shape.square: 2>, <Shape.diamond: 1>, <Shape.circle: 3>]
|
||||
|
||||
The special attribute ``__members__`` is a dictionary mapping names to members.
|
||||
It includes all names defined in the enumeration, including the aliases::
|
||||
|
||||
>>> for name, member in sorted(Shape.__members__.items()):
|
||||
... name, member
|
||||
...
|
||||
('alias_for_square', <Shape.square: 2>)
|
||||
('circle', <Shape.circle: 3>)
|
||||
('diamond', <Shape.diamond: 1>)
|
||||
('square', <Shape.square: 2>)
|
||||
|
||||
The ``__members__`` attribute can be used for detailed programmatic access to
|
||||
the enumeration members. For example, finding all the aliases::
|
||||
|
||||
>>> [name for name, member in Shape.__members__.items() if member.name != name]
|
||||
['alias_for_square']
|
||||
|
||||
Comparisons
|
||||
-----------
|
||||
|
||||
Enumeration members are compared by identity::
|
||||
|
||||
>>> Color.red is Color.red
|
||||
True
|
||||
>>> Color.red is Color.blue
|
||||
False
|
||||
>>> Color.red is not Color.blue
|
||||
True
|
||||
|
||||
Ordered comparisons between enumeration values are *not* supported. Enum
|
||||
members are not integers (but see `IntEnum`_ below)::
|
||||
|
||||
>>> Color.red < Color.blue
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
TypeError: unorderable types: Color() < Color()
|
||||
|
||||
.. warning::
|
||||
|
||||
In Python 2 *everything* is ordered, even though the ordering may not
|
||||
make sense. If you want your enumerations to have a sensible ordering
|
||||
check out the `OrderedEnum`_ recipe below.
|
||||
|
||||
|
||||
Equality comparisons are defined though::
|
||||
|
||||
>>> Color.blue == Color.red
|
||||
False
|
||||
>>> Color.blue != Color.red
|
||||
True
|
||||
>>> Color.blue == Color.blue
|
||||
True
|
||||
|
||||
Comparisons against non-enumeration values will always compare not equal
|
||||
(again, ``IntEnum`` was explicitly designed to behave differently, see
|
||||
below)::
|
||||
|
||||
>>> Color.blue == 2
|
||||
False
|
||||
|
||||
|
||||
Allowed members and attributes of enumerations
|
||||
----------------------------------------------
|
||||
|
||||
The examples above use integers for enumeration values. Using integers is
|
||||
short and handy (and provided by default by the `Functional API`_), but not
|
||||
strictly enforced. In the vast majority of use-cases, one doesn't care what
|
||||
the actual value of an enumeration is. But if the value *is* important,
|
||||
enumerations can have arbitrary values.
|
||||
|
||||
Enumerations are Python classes, and can have methods and special methods as
|
||||
usual. If we have this enumeration::
|
||||
|
||||
>>> class Mood(Enum):
|
||||
... funky = 1
|
||||
... happy = 3
|
||||
...
|
||||
... def describe(self):
|
||||
... # self is the member here
|
||||
... return self.name, self.value
|
||||
...
|
||||
... def __str__(self):
|
||||
... return 'my custom str! {0}'.format(self.value)
|
||||
...
|
||||
... @classmethod
|
||||
... def favorite_mood(cls):
|
||||
... # cls here is the enumeration
|
||||
... return cls.happy
|
||||
|
||||
Then::
|
||||
|
||||
>>> Mood.favorite_mood()
|
||||
<Mood.happy: 3>
|
||||
>>> Mood.happy.describe()
|
||||
('happy', 3)
|
||||
>>> str(Mood.funky)
|
||||
'my custom str! 1'
|
||||
|
||||
The rules for what is allowed are as follows: _sunder_ names (starting and
|
||||
ending with a single underscore) are reserved by enum and cannot be used;
|
||||
all other attributes defined within an enumeration will become members of this
|
||||
enumeration, with the exception of *__dunder__* names and descriptors (methods
|
||||
are also descriptors).
|
||||
|
||||
Note:
|
||||
|
||||
If your enumeration defines ``__new__`` and/or ``__init__`` then
|
||||
whatever value(s) were given to the enum member will be passed into
|
||||
those methods. See `Planet`_ for an example.
|
||||
|
||||
|
||||
Restricted subclassing of enumerations
|
||||
--------------------------------------
|
||||
|
||||
Subclassing an enumeration is allowed only if the enumeration does not define
|
||||
any members. So this is forbidden::
|
||||
|
||||
>>> class MoreColor(Color):
|
||||
... pink = 17
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: Cannot extend enumerations
|
||||
|
||||
But this is allowed::
|
||||
|
||||
>>> class Foo(Enum):
|
||||
... def some_behavior(self):
|
||||
... pass
|
||||
...
|
||||
>>> class Bar(Foo):
|
||||
... happy = 1
|
||||
... sad = 2
|
||||
...
|
||||
|
||||
Allowing subclassing of enums that define members would lead to a violation of
|
||||
some important invariants of types and instances. On the other hand, it makes
|
||||
sense to allow sharing some common behavior between a group of enumerations.
|
||||
(See `OrderedEnum`_ for an example.)
|
||||
|
||||
|
||||
Pickling
|
||||
--------
|
||||
|
||||
Enumerations can be pickled and unpickled::
|
||||
|
||||
>>> from enum.test_enum import Fruit
|
||||
>>> from pickle import dumps, loads
|
||||
>>> Fruit.tomato is loads(dumps(Fruit.tomato, 2))
|
||||
True
|
||||
|
||||
The usual restrictions for pickling apply: picklable enums must be defined in
|
||||
the top level of a module, since unpickling requires them to be importable
|
||||
from that module.
|
||||
|
||||
Note:
|
||||
|
||||
With pickle protocol version 4 (introduced in Python 3.4) it is possible
|
||||
to easily pickle enums nested in other classes.
|
||||
|
||||
|
||||
|
||||
Functional API
|
||||
--------------
|
||||
|
||||
The ``Enum`` class is callable, providing the following functional API::
|
||||
|
||||
>>> Animal = Enum('Animal', 'ant bee cat dog')
|
||||
>>> Animal
|
||||
<enum 'Animal'>
|
||||
>>> Animal.ant
|
||||
<Animal.ant: 1>
|
||||
>>> Animal.ant.value
|
||||
1
|
||||
>>> list(Animal)
|
||||
[<Animal.ant: 1>, <Animal.bee: 2>, <Animal.cat: 3>, <Animal.dog: 4>]
|
||||
|
||||
The semantics of this API resemble ``namedtuple``. The first argument
|
||||
of the call to ``Enum`` is the name of the enumeration.
|
||||
|
||||
The second argument is the *source* of enumeration member names. It can be a
|
||||
whitespace-separated string of names, a sequence of names, a sequence of
|
||||
2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to
|
||||
values. The last two options enable assigning arbitrary values to
|
||||
enumerations; the others auto-assign increasing integers starting with 1. A
|
||||
new class derived from ``Enum`` is returned. In other words, the above
|
||||
assignment to ``Animal`` is equivalent to::
|
||||
|
||||
>>> class Animals(Enum):
|
||||
... ant = 1
|
||||
... bee = 2
|
||||
... cat = 3
|
||||
... dog = 4
|
||||
|
||||
Pickling enums created with the functional API can be tricky as frame stack
|
||||
implementation details are used to try and figure out which module the
|
||||
enumeration is being created in (e.g. it will fail if you use a utility
|
||||
function in separate module, and also may not work on IronPython or Jython).
|
||||
The solution is to specify the module name explicitly as follows::
|
||||
|
||||
>>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__)
|
||||
|
||||
Derived Enumerations
|
||||
--------------------
|
||||
|
||||
IntEnum
|
||||
^^^^^^^
|
||||
|
||||
A variation of ``Enum`` is provided which is also a subclass of
|
||||
``int``. Members of an ``IntEnum`` can be compared to integers;
|
||||
by extension, integer enumerations of different types can also be compared
|
||||
to each other::
|
||||
|
||||
>>> from enum import IntEnum
|
||||
>>> class Shape(IntEnum):
|
||||
... circle = 1
|
||||
... square = 2
|
||||
...
|
||||
>>> class Request(IntEnum):
|
||||
... post = 1
|
||||
... get = 2
|
||||
...
|
||||
>>> Shape == 1
|
||||
False
|
||||
>>> Shape.circle == 1
|
||||
True
|
||||
>>> Shape.circle == Request.post
|
||||
True
|
||||
|
||||
However, they still can't be compared to standard ``Enum`` enumerations::
|
||||
|
||||
>>> class Shape(IntEnum):
|
||||
... circle = 1
|
||||
... square = 2
|
||||
...
|
||||
>>> class Color(Enum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
...
|
||||
>>> Shape.circle == Color.red
|
||||
False
|
||||
|
||||
``IntEnum`` values behave like integers in other ways you'd expect::
|
||||
|
||||
>>> int(Shape.circle)
|
||||
1
|
||||
>>> ['a', 'b', 'c'][Shape.circle]
|
||||
'b'
|
||||
>>> [i for i in range(Shape.square)]
|
||||
[0, 1]
|
||||
|
||||
For the vast majority of code, ``Enum`` is strongly recommended,
|
||||
since ``IntEnum`` breaks some semantic promises of an enumeration (by
|
||||
being comparable to integers, and thus by transitivity to other
|
||||
unrelated enumerations). It should be used only in special cases where
|
||||
there's no other choice; for example, when integer constants are
|
||||
replaced with enumerations and backwards compatibility is required with code
|
||||
that still expects integers.
|
||||
|
||||
|
||||
Others
|
||||
^^^^^^
|
||||
|
||||
While ``IntEnum`` is part of the ``enum`` module, it would be very
|
||||
simple to implement independently::
|
||||
|
||||
class IntEnum(int, Enum):
|
||||
pass
|
||||
|
||||
This demonstrates how similar derived enumerations can be defined; for example
|
||||
a ``StrEnum`` that mixes in ``str`` instead of ``int``.
|
||||
|
||||
Some rules:
|
||||
|
||||
1. When subclassing ``Enum``, mix-in types must appear before
|
||||
``Enum`` itself in the sequence of bases, as in the ``IntEnum``
|
||||
example above.
|
||||
2. While ``Enum`` can have members of any type, once you mix in an
|
||||
additional type, all the members must have values of that type, e.g.
|
||||
``int`` above. This restriction does not apply to mix-ins which only
|
||||
add methods and don't specify another data type such as ``int`` or
|
||||
``str``.
|
||||
3. When another data type is mixed in, the ``value`` attribute is *not the
|
||||
same* as the enum member itself, although it is equivalant and will compare
|
||||
equal.
|
||||
4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and
|
||||
``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for
|
||||
IntEnum) treat the enum member as its mixed-in type.
|
||||
|
||||
Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int``
|
||||
subclasses are printed as strings and not numbers when the ``%d``, ``%i``,
|
||||
or ``%u`` codes are used.
|
||||
5. ``str.__format__`` (or ``format``) will use the mixed-in
|
||||
type's ``__format__``. If the ``Enum``'s ``str`` or
|
||||
``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes.
|
||||
|
||||
|
||||
Decorators
|
||||
----------
|
||||
|
||||
unique
|
||||
^^^^^^
|
||||
|
||||
A ``class`` decorator specifically for enumerations. It searches an
|
||||
enumeration's ``__members__`` gathering any aliases it finds; if any are
|
||||
found ``ValueError`` is raised with the details::
|
||||
|
||||
>>> @unique
|
||||
... class NoDupes(Enum):
|
||||
... first = 'one'
|
||||
... second = 'two'
|
||||
... third = 'two'
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: duplicate names found in <enum 'NoDupes'>: third -> second
|
||||
|
||||
|
||||
Interesting examples
|
||||
--------------------
|
||||
|
||||
While ``Enum`` and ``IntEnum`` are expected to cover the majority of
|
||||
use-cases, they cannot cover them all. Here are recipes for some different
|
||||
types of enumerations that can be used directly, or as examples for creating
|
||||
one's own.
|
||||
|
||||
|
||||
AutoNumber
|
||||
^^^^^^^^^^
|
||||
|
||||
Avoids having to specify the value for each enumeration member::
|
||||
|
||||
>>> class AutoNumber(Enum):
|
||||
... def __new__(cls):
|
||||
... value = len(cls.__members__) + 1
|
||||
... obj = object.__new__(cls)
|
||||
... obj._value_ = value
|
||||
... return obj
|
||||
...
|
||||
>>> class Color(AutoNumber):
|
||||
... __order__ = "red green blue" # only needed in 2.x
|
||||
... red = ()
|
||||
... green = ()
|
||||
... blue = ()
|
||||
...
|
||||
>>> Color.green.value == 2
|
||||
True
|
||||
|
||||
Note:
|
||||
|
||||
The `__new__` method, if defined, is used during creation of the Enum
|
||||
members; it is then replaced by Enum's `__new__` which is used after
|
||||
class creation for lookup of existing members. Due to the way Enums are
|
||||
supposed to behave, there is no way to customize Enum's `__new__`.
|
||||
|
||||
|
||||
UniqueEnum
|
||||
^^^^^^^^^^
|
||||
|
||||
Raises an error if a duplicate member name is found instead of creating an
|
||||
alias::
|
||||
|
||||
>>> class UniqueEnum(Enum):
|
||||
... def __init__(self, *args):
|
||||
... cls = self.__class__
|
||||
... if any(self.value == e.value for e in cls):
|
||||
... a = self.name
|
||||
... e = cls(self.value).name
|
||||
... raise ValueError(
|
||||
... "aliases not allowed in UniqueEnum: %r --> %r"
|
||||
... % (a, e))
|
||||
...
|
||||
>>> class Color(UniqueEnum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
... blue = 3
|
||||
... grene = 2
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green'
|
||||
|
||||
|
||||
OrderedEnum
|
||||
^^^^^^^^^^^
|
||||
|
||||
An ordered enumeration that is not based on ``IntEnum`` and so maintains
|
||||
the normal ``Enum`` invariants (such as not being comparable to other
|
||||
enumerations)::
|
||||
|
||||
>>> class OrderedEnum(Enum):
|
||||
... def __ge__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ >= other._value_
|
||||
... return NotImplemented
|
||||
... def __gt__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ > other._value_
|
||||
... return NotImplemented
|
||||
... def __le__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ <= other._value_
|
||||
... return NotImplemented
|
||||
... def __lt__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ < other._value_
|
||||
... return NotImplemented
|
||||
...
|
||||
>>> class Grade(OrderedEnum):
|
||||
... __ordered__ = 'A B C D F'
|
||||
... A = 5
|
||||
... B = 4
|
||||
... C = 3
|
||||
... D = 2
|
||||
... F = 1
|
||||
...
|
||||
>>> Grade.C < Grade.A
|
||||
True
|
||||
|
||||
|
||||
Planet
|
||||
^^^^^^
|
||||
|
||||
If ``__new__`` or ``__init__`` is defined the value of the enum member
|
||||
will be passed to those methods::
|
||||
|
||||
>>> class Planet(Enum):
|
||||
... MERCURY = (3.303e+23, 2.4397e6)
|
||||
... VENUS = (4.869e+24, 6.0518e6)
|
||||
... EARTH = (5.976e+24, 6.37814e6)
|
||||
... MARS = (6.421e+23, 3.3972e6)
|
||||
... JUPITER = (1.9e+27, 7.1492e7)
|
||||
... SATURN = (5.688e+26, 6.0268e7)
|
||||
... URANUS = (8.686e+25, 2.5559e7)
|
||||
... NEPTUNE = (1.024e+26, 2.4746e7)
|
||||
... def __init__(self, mass, radius):
|
||||
... self.mass = mass # in kilograms
|
||||
... self.radius = radius # in meters
|
||||
... @property
|
||||
... def surface_gravity(self):
|
||||
... # universal gravitational constant (m3 kg-1 s-2)
|
||||
... G = 6.67300E-11
|
||||
... return G * self.mass / (self.radius * self.radius)
|
||||
...
|
||||
>>> Planet.EARTH.value
|
||||
(5.976e+24, 6378140.0)
|
||||
>>> Planet.EARTH.surface_gravity
|
||||
9.802652743337129
|
||||
|
||||
|
||||
How are Enums different?
|
||||
------------------------
|
||||
|
||||
Enums have a custom metaclass that affects many aspects of both derived Enum
|
||||
classes and their instances (members).
|
||||
|
||||
|
||||
Enum Classes
|
||||
^^^^^^^^^^^^
|
||||
|
||||
The ``EnumMeta`` metaclass is responsible for providing the
|
||||
``__contains__``, ``__dir__``, ``__iter__`` and other methods that
|
||||
allow one to do things with an ``Enum`` class that fail on a typical
|
||||
class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is
|
||||
responsible for ensuring that various other methods on the final ``Enum``
|
||||
class are correct (such as ``__new__``, ``__getnewargs__``,
|
||||
``__str__`` and ``__repr__``).
|
||||
|
||||
.. note::
|
||||
|
||||
``__dir__`` is not changed in the Python 2 line as it messes up some
|
||||
of the decorators included in the stdlib.
|
||||
|
||||
|
||||
Enum Members (aka instances)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The most interesting thing about Enum members is that they are singletons.
|
||||
``EnumMeta`` creates them all while it is creating the ``Enum``
|
||||
class itself, and then puts a custom ``__new__`` in place to ensure
|
||||
that no new ones are ever instantiated by returning only the existing
|
||||
member instances.
|
||||
|
||||
|
||||
Finer Points
|
||||
^^^^^^^^^^^^
|
||||
|
||||
``Enum`` members are instances of an ``Enum`` class, and even though they
|
||||
are accessible as `EnumClass.member1.member2`, they should not be
|
||||
accessed directly from the member as that lookup may fail or, worse,
|
||||
return something besides the ``Enum`` member you were looking for
|
||||
(changed in version 1.1.1)::
|
||||
|
||||
>>> class FieldTypes(Enum):
|
||||
... name = 1
|
||||
... value = 2
|
||||
... size = 3
|
||||
...
|
||||
>>> FieldTypes.value.size
|
||||
<FieldTypes.size: 3>
|
||||
>>> FieldTypes.size.value
|
||||
3
|
||||
|
||||
The ``__members__`` attribute is only available on the class.
|
||||
|
||||
In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being
|
||||
the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if
|
||||
``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python
|
||||
2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified
|
||||
as the ``OrderedDict`` type didn't exist yet.
|
||||
|
||||
If you give your ``Enum`` subclass extra methods, like the `Planet`_
|
||||
class above, those methods will show up in a `dir` of the member,
|
||||
but not of the class::
|
||||
|
||||
>>> dir(Planet)
|
||||
['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS',
|
||||
'VENUS', '__class__', '__doc__', '__members__', '__module__']
|
||||
>>> dir(Planet.EARTH)
|
||||
['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value']
|
||||
|
||||
A ``__new__`` method will only be used for the creation of the
|
||||
``Enum`` members -- after that it is replaced. This means if you wish to
|
||||
change how ``Enum`` members are looked up you either have to write a
|
||||
helper function or a ``classmethod``.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,312 @@
|
||||
import codecs
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import shutil
|
||||
import tempfile
|
||||
import traceback
|
||||
|
||||
import appdirs
|
||||
|
||||
from scandir import scandir
|
||||
|
||||
try:
|
||||
from collections.abc import MutableMapping
|
||||
unicode = str
|
||||
except ImportError:
|
||||
# Python 2 imports
|
||||
from collections import MutableMapping
|
||||
FileNotFoundError = IOError
|
||||
|
||||
from .posixemulation import rename
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileCache(MutableMapping):
|
||||
"""A persistent file cache that is dictionary-like and has a write buffer.
|
||||
|
||||
*appname* is passed to `appdirs <https://pypi.python.org/pypi/appdirs/>`_
|
||||
to determine a system-appropriate location for the cache files. The cache
|
||||
directory used is available via :data:`cache_dir`.
|
||||
|
||||
By default, a write buffer is used, so writing to cache files is not done
|
||||
until :meth:`sync` is explicitly called. This behavior can be changed using
|
||||
the optional *flag* argument.
|
||||
|
||||
.. NOTE::
|
||||
Keys and values are always stored as :class:`bytes` objects. If data
|
||||
serialization is enabled, keys are returned as :class:`str` or
|
||||
:class:`unicode` objects.
|
||||
If data serialization is disabled, keys are returned as a
|
||||
:class:`bytes` object.
|
||||
|
||||
:param str appname: The app/script the cache should be associated with.
|
||||
:param str flag: How the cache should be opened. See below for details.
|
||||
:param mode: The Unix mode for the cache files.
|
||||
:param str keyencoding: The encoding the keys use, defaults to 'utf-8'.
|
||||
This is used if *serialize* is ``False``; the keys are treated as
|
||||
:class:`bytes` objects.
|
||||
:param bool serialize: Whether or not to (de)serialize the values. If a
|
||||
cache is used with a :class:`~shelve.Shelf`, set this to ``False``.
|
||||
:param str app_cache_dir: absolute path to root cache directory to be
|
||||
used in place of system-appropriate location determined by appdirs
|
||||
|
||||
The optional *flag* argument can be:
|
||||
|
||||
+---------+-------------------------------------------+
|
||||
| Value | Meaning |
|
||||
+=========+===========================================+
|
||||
| ``'r'`` | Open existing cache for reading only |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'w'`` | Open existing cache for reading and |
|
||||
| | writing |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'c'`` | Open cache for reading and writing, |
|
||||
| | creating it if it doesn't exist (default) |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'n'`` | Always create a new, empty cache, open |
|
||||
| | for reading and writing |
|
||||
+---------+-------------------------------------------+
|
||||
|
||||
If a ``'s'`` is appended to the *flag* argument, the cache will be opened
|
||||
in sync mode. Writing to the cache will happen immediately and will not be
|
||||
buffered.
|
||||
|
||||
If an application needs to use more than one cache, then it should use
|
||||
subcaches. To create a subcache, append a series of one or more names
|
||||
separated by periods to the application name when creating a
|
||||
:class:`FileCache` object (e.g. ``'appname.subcache'`` or
|
||||
``'appname.subcache.subcache'``).
|
||||
Subcaches are a way for an application to use more than one cache without
|
||||
polluting a user's cache directory. All caches -- main caches or subcaches
|
||||
-- are totally independent. The only aspect in which they are linked is
|
||||
that all of an application's caches exist in the same system directory.
|
||||
Because each cache is independent of every other cache, calling
|
||||
:meth:`delete` on an application's main cache will not delete data in
|
||||
its subcaches.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, appname, flag='c', mode=0o666, keyencoding='utf-8',
|
||||
serialize=True, app_cache_dir=None):
|
||||
"""Initialize a :class:`FileCache` object."""
|
||||
if not isinstance(flag, str):
|
||||
raise TypeError("flag must be str not '{}'".format(type(flag)))
|
||||
elif flag[0] not in 'rwcn':
|
||||
raise ValueError("invalid flag: '{}', first flag must be one of "
|
||||
"'r', 'w', 'c' or 'n'".format(flag))
|
||||
elif len(flag) > 1 and flag[1] != 's':
|
||||
raise ValueError("invalid flag: '{}', second flag must be "
|
||||
"'s'".format(flag))
|
||||
|
||||
appname, subcache = self._parse_appname(appname)
|
||||
if 'cache' in subcache:
|
||||
raise ValueError("invalid subcache name: 'cache'.")
|
||||
self._is_subcache = bool(subcache)
|
||||
|
||||
if not app_cache_dir:
|
||||
app_cache_dir = appdirs.user_cache_dir(appname, appname)
|
||||
subcache_dir = os.path.join(app_cache_dir, *subcache)
|
||||
self.cache_dir = os.path.join(subcache_dir, 'cache')
|
||||
exists = os.path.exists(self.cache_dir)
|
||||
|
||||
if len(flag) > 1 and flag[1] == 's':
|
||||
self._sync = True
|
||||
else:
|
||||
self._sync = False
|
||||
self._buffer = {}
|
||||
|
||||
if exists and 'n' in flag:
|
||||
self.clear()
|
||||
self.create()
|
||||
elif not exists and ('c' in flag or 'n' in flag):
|
||||
self.create()
|
||||
elif not exists:
|
||||
raise FileNotFoundError("no such directory: '{}'".format(
|
||||
self.cache_dir))
|
||||
|
||||
self._flag = 'rb' if 'r' in flag else 'wb'
|
||||
self._mode = mode
|
||||
self._keyencoding = keyencoding
|
||||
self._serialize = serialize
|
||||
|
||||
def _parse_appname(self, appname):
|
||||
"""Splits an appname into the appname and subcache components."""
|
||||
components = appname.split('.')
|
||||
return components[0], components[1:]
|
||||
|
||||
def create(self):
|
||||
"""Create the write buffer and cache directory."""
|
||||
if not self._sync and not hasattr(self, '_buffer'):
|
||||
self._buffer = {}
|
||||
if not os.path.exists(self.cache_dir):
|
||||
os.makedirs(self.cache_dir)
|
||||
|
||||
def clear(self):
|
||||
"""Remove all items from the write buffer and cache.
|
||||
|
||||
The write buffer object and cache directory are not deleted.
|
||||
|
||||
"""
|
||||
self.delete()
|
||||
self.create()
|
||||
|
||||
def delete(self):
|
||||
"""Delete the write buffer and cache directory."""
|
||||
if not self._sync:
|
||||
del self._buffer
|
||||
shutil.rmtree(self.cache_dir)
|
||||
|
||||
def close(self):
|
||||
"""Sync the write buffer, then close the cache.
|
||||
|
||||
If a closed :class:`FileCache` object's methods are called, a
|
||||
:exc:`ValueError` will be raised.
|
||||
|
||||
"""
|
||||
self.sync()
|
||||
self.sync = self.create = self.delete = self._closed
|
||||
self._write_to_file = self._read_to_file = self._closed
|
||||
self._key_to_filename = self._filename_to_key = self._closed
|
||||
self.__getitem__ = self.__setitem__ = self.__delitem__ = self._closed
|
||||
self.__iter__ = self.__len__ = self.__contains__ = self._closed
|
||||
|
||||
def sync(self):
|
||||
"""Sync the write buffer with the cache files and clear the buffer.
|
||||
|
||||
If the :class:`FileCache` object was opened with the optional ``'s'``
|
||||
*flag* argument, then calling :meth:`sync` will do nothing.
|
||||
"""
|
||||
if self._sync:
|
||||
return # opened in sync mode, so skip the manual sync
|
||||
self._sync = True
|
||||
for ekey in self._buffer:
|
||||
filename = self._key_to_filename(ekey)
|
||||
try:
|
||||
self._write_to_file(filename, self._buffer[ekey])
|
||||
except:
|
||||
logger.error("Couldn't write content from %r to cache file: %r: %s", ekey, filename,
|
||||
traceback.format_exc())
|
||||
self._buffer.clear()
|
||||
self._sync = False
|
||||
|
||||
def _closed(self, *args, **kwargs):
|
||||
"""Filler method for closed cache methods."""
|
||||
raise ValueError("invalid operation on closed cache")
|
||||
|
||||
def _encode_key(self, key):
|
||||
"""Encode key using *hex_codec* for constructing a cache filename.
|
||||
|
||||
Keys are implicitly converted to :class:`bytes` if passed as
|
||||
:class:`str`.
|
||||
|
||||
"""
|
||||
if isinstance(key, str) or isinstance(key, unicode):
|
||||
key = key.encode(self._keyencoding)
|
||||
elif not isinstance(key, bytes):
|
||||
raise TypeError("key must be bytes or str")
|
||||
return codecs.encode(key, 'hex_codec').decode(self._keyencoding)
|
||||
|
||||
def _decode_key(self, key):
|
||||
"""Decode key using hex_codec to retrieve the original key.
|
||||
|
||||
Keys are returned as :class:`str` if serialization is enabled.
|
||||
Keys are returned as :class:`bytes` if serialization is disabled.
|
||||
|
||||
"""
|
||||
bkey = codecs.decode(key.encode(self._keyencoding), 'hex_codec')
|
||||
return bkey.decode(self._keyencoding) if self._serialize else bkey
|
||||
|
||||
def _dumps(self, value):
|
||||
return value if not self._serialize else pickle.dumps(value)
|
||||
|
||||
def _loads(self, value):
|
||||
return value if not self._serialize else pickle.loads(value)
|
||||
|
||||
def _key_to_filename(self, key):
|
||||
"""Convert an encoded key to an absolute cache filename."""
|
||||
return os.path.join(self.cache_dir, key)
|
||||
|
||||
def _filename_to_key(self, absfilename):
|
||||
"""Convert an absolute cache filename to a key name."""
|
||||
return os.path.split(absfilename)[1]
|
||||
|
||||
def _all_filenames(self):
|
||||
"""Return a list of absolute cache filenames"""
|
||||
try:
|
||||
for entry in scandir(self.cache_dir):
|
||||
if entry.is_file(follow_symlinks=False):
|
||||
yield os.path.join(self.cache_dir, entry.name)
|
||||
except (FileNotFoundError, OSError):
|
||||
raise StopIteration
|
||||
|
||||
def _all_keys(self):
|
||||
"""Return a list of all encoded key names."""
|
||||
file_keys = [self._filename_to_key(fn) for fn in self._all_filenames()]
|
||||
if self._sync:
|
||||
return set(file_keys)
|
||||
else:
|
||||
return set(file_keys + list(self._buffer))
|
||||
|
||||
def _write_to_file(self, filename, bytesvalue):
|
||||
"""Write bytesvalue to filename."""
|
||||
fh, tmp = tempfile.mkstemp()
|
||||
with os.fdopen(fh, self._flag) as f:
|
||||
f.write(self._dumps(bytesvalue))
|
||||
rename(tmp, filename)
|
||||
os.chmod(filename, self._mode)
|
||||
|
||||
def _read_from_file(self, filename):
|
||||
"""Read data from filename."""
|
||||
try:
|
||||
with open(filename, 'rb') as f:
|
||||
return self._loads(f.read())
|
||||
except (IOError, OSError):
|
||||
logger.warning('Error opening file: {}'.format(filename))
|
||||
return None
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
ekey = self._encode_key(key)
|
||||
if not self._sync:
|
||||
self._buffer[ekey] = value
|
||||
else:
|
||||
filename = self._key_to_filename(ekey)
|
||||
self._write_to_file(filename, value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
if not self._sync:
|
||||
try:
|
||||
return self._buffer[ekey]
|
||||
except KeyError:
|
||||
pass
|
||||
filename = self._key_to_filename(ekey)
|
||||
if filename not in self._all_filenames():
|
||||
raise KeyError(key)
|
||||
return self._read_from_file(filename)
|
||||
|
||||
def __delitem__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
filename = self._key_to_filename(ekey)
|
||||
if not self._sync:
|
||||
try:
|
||||
del self._buffer[ekey]
|
||||
except KeyError:
|
||||
if filename not in self._all_filenames():
|
||||
raise KeyError(key)
|
||||
try:
|
||||
os.remove(filename)
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
|
||||
def __iter__(self):
|
||||
for key in self._all_keys():
|
||||
yield self._decode_key(key)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._all_keys())
|
||||
|
||||
def __contains__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
return ekey in self._all_keys()
|
||||
@@ -0,0 +1,113 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
r"""
|
||||
werkzeug.posixemulation
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Provides a POSIX emulation for some features that are relevant to
|
||||
web applications. The main purpose is to simplify support for
|
||||
systems such as Windows NT that are not 100% POSIX compatible.
|
||||
|
||||
Currently this only implements a :func:`rename` function that
|
||||
follows POSIX semantics. Eg: if the target file already exists it
|
||||
will be replaced without asking.
|
||||
|
||||
This module was introduced in 0.6.1 and is not a public interface.
|
||||
It might become one in later versions of Werkzeug.
|
||||
|
||||
:copyright: (c) 2013 by the Werkzeug Team, see AUTHORS for more details.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import errno
|
||||
import time
|
||||
import random
|
||||
import shutil
|
||||
|
||||
|
||||
can_rename_open_file = False
|
||||
if os.name == 'nt': # pragma: no cover
|
||||
_rename = lambda src, dst: False
|
||||
_rename_atomic = lambda src, dst: False
|
||||
if sys.version_info >= (3, 0):
|
||||
unicode = str
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
|
||||
_MOVEFILE_REPLACE_EXISTING = 0x1
|
||||
_MOVEFILE_WRITE_THROUGH = 0x8
|
||||
_MoveFileEx = ctypes.windll.kernel32.MoveFileExW
|
||||
|
||||
def _rename(src, dst):
|
||||
if not isinstance(src, unicode):
|
||||
src = unicode(src, sys.getfilesystemencoding())
|
||||
if not isinstance(dst, unicode):
|
||||
dst = unicode(dst, sys.getfilesystemencoding())
|
||||
if _rename_atomic(src, dst):
|
||||
return True
|
||||
retry = 0
|
||||
rv = False
|
||||
while not rv and retry < 100:
|
||||
rv = _MoveFileEx(src, dst, _MOVEFILE_REPLACE_EXISTING |
|
||||
_MOVEFILE_WRITE_THROUGH)
|
||||
if not rv:
|
||||
time.sleep(0.001)
|
||||
retry += 1
|
||||
return rv
|
||||
|
||||
# new in Vista and Windows Server 2008
|
||||
_CreateTransaction = ctypes.windll.ktmw32.CreateTransaction
|
||||
_CommitTransaction = ctypes.windll.ktmw32.CommitTransaction
|
||||
_MoveFileTransacted = ctypes.windll.kernel32.MoveFileTransactedW
|
||||
_CloseHandle = ctypes.windll.kernel32.CloseHandle
|
||||
can_rename_open_file = True
|
||||
|
||||
def _rename_atomic(src, dst):
|
||||
ta = _CreateTransaction(None, 0, 0, 0, 0, 1000, 'Werkzeug rename')
|
||||
if ta == -1:
|
||||
return False
|
||||
try:
|
||||
retry = 0
|
||||
rv = False
|
||||
while not rv and retry < 100:
|
||||
rv = _MoveFileTransacted(src, dst, None, None,
|
||||
_MOVEFILE_REPLACE_EXISTING |
|
||||
_MOVEFILE_WRITE_THROUGH, ta)
|
||||
if rv:
|
||||
rv = _CommitTransaction(ta)
|
||||
break
|
||||
else:
|
||||
time.sleep(0.001)
|
||||
retry += 1
|
||||
return rv
|
||||
finally:
|
||||
_CloseHandle(ta)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def rename(src, dst):
|
||||
# Try atomic or pseudo-atomic rename
|
||||
if _rename(src, dst):
|
||||
return
|
||||
# Fall back to "move away and replace"
|
||||
try:
|
||||
os.rename(src, dst)
|
||||
except OSError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
old = "%s-%08x" % (dst, random.randint(0, sys.maxint))
|
||||
os.rename(dst, old)
|
||||
os.rename(src, dst)
|
||||
try:
|
||||
os.unlink(old)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
"""
|
||||
If dst on current filesystem then use
|
||||
atomic rename. Otherwise, fall back to a
|
||||
non-atomic copy and remove.
|
||||
"""
|
||||
rename = shutil.move
|
||||
can_rename_open_file = True
|
||||
@@ -0,0 +1,411 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
ftfy: fixes text for you
|
||||
|
||||
This is a module for making text less broken. See the `fix_text` function
|
||||
for more information.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import unicodedata
|
||||
import ftfy.bad_codecs
|
||||
from ftfy import fixes
|
||||
from ftfy.formatting import display_ljust
|
||||
from ftfy.compatibility import is_printable
|
||||
|
||||
__version__ = '4.4.3'
|
||||
|
||||
|
||||
# See the docstring for ftfy.bad_codecs to see what we're doing here.
|
||||
ftfy.bad_codecs.ok()
|
||||
|
||||
|
||||
def fix_text(text,
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
fix_latin_ligatures=True,
|
||||
fix_character_width=True,
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
fix_surrogates=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True,
|
||||
normalization='NFC',
|
||||
max_decode_length=10**6):
|
||||
r"""
|
||||
Given Unicode text as input, fix inconsistencies and glitches in it,
|
||||
such as mojibake.
|
||||
|
||||
Let's start with some examples:
|
||||
|
||||
>>> print(fix_text('ünicode'))
|
||||
ünicode
|
||||
|
||||
>>> print(fix_text('Broken text… it’s flubberific!',
|
||||
... normalization='NFKC'))
|
||||
Broken text... it's flubberific!
|
||||
|
||||
>>> print(fix_text('HTML entities <3'))
|
||||
HTML entities <3
|
||||
|
||||
>>> print(fix_text('<em>HTML entities <3</em>'))
|
||||
<em>HTML entities <3</em>
|
||||
|
||||
>>> print(fix_text("¯\\_(ã\x83\x84)_/¯"))
|
||||
¯\_(ツ)_/¯
|
||||
|
||||
>>> # This example string starts with a byte-order mark, even if
|
||||
>>> # you can't see it on the Web.
|
||||
>>> print(fix_text('\ufeffParty like\nit’s 1999!'))
|
||||
Party like
|
||||
it's 1999!
|
||||
|
||||
>>> print(fix_text('LOUD NOISES'))
|
||||
LOUD NOISES
|
||||
|
||||
>>> len(fix_text('fi' * 100000))
|
||||
200000
|
||||
|
||||
>>> len(fix_text(''))
|
||||
0
|
||||
|
||||
Based on the options you provide, ftfy applies these steps in order:
|
||||
|
||||
- If `remove_terminal_escapes` is True, remove sequences of bytes that are
|
||||
instructions for Unix terminals, such as the codes that make text appear
|
||||
in different colors.
|
||||
|
||||
- If `fix_encoding` is True, look for common mistakes that come from
|
||||
encoding or decoding Unicode text incorrectly, and fix them if they are
|
||||
reasonably fixable. See `fixes.fix_encoding` for details.
|
||||
|
||||
- If `fix_entities` is True, replace HTML entities with their equivalent
|
||||
characters. If it's "auto" (the default), then consider replacing HTML
|
||||
entities, but don't do so in text where you have seen a pair of actual
|
||||
angle brackets (that's probably actually HTML and you shouldn't mess
|
||||
with the entities).
|
||||
|
||||
- If `uncurl_quotes` is True, replace various curly quotation marks with
|
||||
plain-ASCII straight quotes.
|
||||
|
||||
- If `fix_latin_ligatures` is True, then ligatures made of Latin letters,
|
||||
such as `fi`, will be separated into individual letters. These ligatures
|
||||
are usually not meaningful outside of font rendering, and often represent
|
||||
copy-and-paste errors.
|
||||
|
||||
- If `fix_character_width` is True, half-width and full-width characters
|
||||
will be replaced by their standard-width form.
|
||||
|
||||
- If `fix_line_breaks` is true, convert all line breaks to Unix style
|
||||
(CRLF and CR line breaks become LF line breaks).
|
||||
|
||||
- If `fix_surrogates` is true, ensure that there are no UTF-16 surrogates
|
||||
in the resulting string, by converting them to the correct characters
|
||||
when they're appropriately paired, or replacing them with \ufffd
|
||||
otherwise.
|
||||
|
||||
- If `remove_control_chars` is true, remove control characters that
|
||||
are not suitable for use in text. This includes most of the ASCII control
|
||||
characters, plus some Unicode controls such as the byte order mark
|
||||
(U+FEFF). Useful control characters, such as Tab, Line Feed, and
|
||||
bidirectional marks, are left as they are.
|
||||
|
||||
- If `remove_bom` is True, remove the Byte-Order Mark at the start of the
|
||||
string if it exists. (This is largely redundant, because it's a special
|
||||
case of `remove_control_characters`. This option will become deprecated
|
||||
in a later version.)
|
||||
|
||||
- If `normalization` is not None, apply the specified form of Unicode
|
||||
normalization, which can be one of 'NFC', 'NFKC', 'NFD', and 'NFKD'.
|
||||
|
||||
- The default normalization, NFC, combines characters and diacritics that
|
||||
are written using separate code points, such as converting "e" plus an
|
||||
acute accent modifier into "é", or converting "ka" (か) plus a dakuten
|
||||
into the single character "ga" (が). Unicode can be converted to NFC
|
||||
form without any change in its meaning.
|
||||
|
||||
- If you ask for NFKC normalization, it will apply additional
|
||||
normalizations that can change the meanings of characters. For example,
|
||||
ellipsis characters will be replaced with three periods, all ligatures
|
||||
will be replaced with the individual characters that make them up,
|
||||
and characters that differ in font style will be converted to the same
|
||||
character.
|
||||
|
||||
- If anything was changed, repeat all the steps, so that the function is
|
||||
idempotent. "&amp;" will become "&", for example, not "&".
|
||||
|
||||
`fix_text` will work one line at a time, with the possibility that some
|
||||
lines are in different encodings, allowing it to fix text that has been
|
||||
concatenated together from different sources.
|
||||
|
||||
When it encounters lines longer than `max_decode_length` (1 million
|
||||
codepoints by default), it will not run the `fix_encoding` step, to avoid
|
||||
unbounded slowdowns.
|
||||
|
||||
If you're certain that any decoding errors in the text would have affected
|
||||
the entire text in the same way, and you don't mind operations that scale
|
||||
with the length of the text, you can use `fix_text_segment` directly to
|
||||
fix the whole string in one batch.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
|
||||
|
||||
out = []
|
||||
pos = 0
|
||||
while pos < len(text):
|
||||
textbreak = text.find('\n', pos) + 1
|
||||
fix_encoding_this_time = fix_encoding
|
||||
if textbreak == 0:
|
||||
textbreak = len(text)
|
||||
if (textbreak - pos) > max_decode_length:
|
||||
fix_encoding_this_time = False
|
||||
|
||||
substring = text[pos:textbreak]
|
||||
|
||||
if fix_entities == 'auto' and '<' in substring and '>' in substring:
|
||||
# we see angle brackets together; this could be HTML
|
||||
fix_entities = False
|
||||
|
||||
out.append(
|
||||
fix_text_segment(
|
||||
substring,
|
||||
fix_entities=fix_entities,
|
||||
remove_terminal_escapes=remove_terminal_escapes,
|
||||
fix_encoding=fix_encoding_this_time,
|
||||
uncurl_quotes=uncurl_quotes,
|
||||
fix_latin_ligatures=fix_latin_ligatures,
|
||||
fix_character_width=fix_character_width,
|
||||
fix_line_breaks=fix_line_breaks,
|
||||
fix_surrogates=fix_surrogates,
|
||||
remove_control_chars=remove_control_chars,
|
||||
remove_bom=remove_bom,
|
||||
normalization=normalization
|
||||
)
|
||||
)
|
||||
pos = textbreak
|
||||
|
||||
return ''.join(out)
|
||||
|
||||
# Some alternate names for the main functions
|
||||
ftfy = fix_text
|
||||
fix_encoding = fixes.fix_encoding
|
||||
fix_text_encoding = fixes.fix_text_encoding # deprecated
|
||||
|
||||
|
||||
def fix_file(input_file,
|
||||
encoding=None,
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
fix_latin_ligatures=True,
|
||||
fix_character_width=True,
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
fix_surrogates=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True,
|
||||
normalization='NFC'):
|
||||
"""
|
||||
Fix text that is found in a file.
|
||||
|
||||
If the file is being read as Unicode text, use that. If it's being read as
|
||||
bytes, then we hope an encoding was supplied. If not, unfortunately, we
|
||||
have to guess what encoding it is. We'll try a few common encodings, but we
|
||||
make no promises. See the `guess_bytes` function for how this is done.
|
||||
|
||||
The output is a stream of fixed lines of text.
|
||||
"""
|
||||
entities = fix_entities
|
||||
for line in input_file:
|
||||
if isinstance(line, bytes):
|
||||
if encoding is None:
|
||||
line, encoding = guess_bytes(line)
|
||||
else:
|
||||
line = line.decode(encoding)
|
||||
if fix_entities == 'auto' and '<' in line and '>' in line:
|
||||
entities = False
|
||||
yield fix_text_segment(
|
||||
line,
|
||||
fix_entities=entities,
|
||||
remove_terminal_escapes=remove_terminal_escapes,
|
||||
fix_encoding=fix_encoding,
|
||||
fix_latin_ligatures=fix_latin_ligatures,
|
||||
fix_character_width=fix_character_width,
|
||||
uncurl_quotes=uncurl_quotes,
|
||||
fix_line_breaks=fix_line_breaks,
|
||||
fix_surrogates=fix_surrogates,
|
||||
remove_control_chars=remove_control_chars,
|
||||
remove_bom=remove_bom,
|
||||
normalization=normalization
|
||||
)
|
||||
|
||||
|
||||
def fix_text_segment(text,
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
fix_latin_ligatures=True,
|
||||
fix_character_width=True,
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
fix_surrogates=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True,
|
||||
normalization='NFC'):
|
||||
"""
|
||||
Apply fixes to text in a single chunk. This could be a line of text
|
||||
within a larger run of `fix_text`, or it could be a larger amount
|
||||
of text that you are certain is in a consistent encoding.
|
||||
|
||||
See `fix_text` for a description of the parameters.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
|
||||
|
||||
if fix_entities == 'auto' and '<' in text and '>' in text:
|
||||
fix_entities = False
|
||||
while True:
|
||||
origtext = text
|
||||
if remove_terminal_escapes:
|
||||
text = fixes.remove_terminal_escapes(text)
|
||||
if fix_encoding:
|
||||
text = fixes.fix_encoding(text)
|
||||
if fix_entities:
|
||||
text = fixes.unescape_html(text)
|
||||
if fix_latin_ligatures:
|
||||
text = fixes.fix_latin_ligatures(text)
|
||||
if fix_character_width:
|
||||
text = fixes.fix_character_width(text)
|
||||
if uncurl_quotes:
|
||||
text = fixes.uncurl_quotes(text)
|
||||
if fix_line_breaks:
|
||||
text = fixes.fix_line_breaks(text)
|
||||
if fix_surrogates:
|
||||
text = fixes.fix_surrogates(text)
|
||||
if remove_control_chars:
|
||||
text = fixes.remove_control_chars(text)
|
||||
if remove_bom and not remove_control_chars:
|
||||
# Skip this step if we've already done `remove_control_chars`,
|
||||
# because it would be redundant.
|
||||
text = fixes.remove_bom(text)
|
||||
if normalization is not None:
|
||||
text = unicodedata.normalize(normalization, text)
|
||||
if text == origtext:
|
||||
return text
|
||||
|
||||
|
||||
def guess_bytes(bstring):
|
||||
"""
|
||||
NOTE: Using `guess_bytes` is not the recommended way of using ftfy. ftfy
|
||||
is not designed to be an encoding detector.
|
||||
|
||||
In the unfortunate situation that you have some bytes in an unknown
|
||||
encoding, ftfy can guess a reasonable strategy for decoding them, by trying
|
||||
a few common encodings that can be distinguished from each other.
|
||||
|
||||
Unlike the rest of ftfy, this may not be accurate, and it may *create*
|
||||
Unicode problems instead of solving them!
|
||||
|
||||
It doesn't try East Asian encodings at all, and if you have East Asian text
|
||||
that you don't know how to decode, you are somewhat out of luck. East
|
||||
Asian encodings require some serious statistics to distinguish from each
|
||||
other, so we can't support them without decreasing the accuracy of ftfy.
|
||||
|
||||
If you don't know which encoding you have at all, I recommend
|
||||
trying the 'chardet' module, and being appropriately skeptical about its
|
||||
results.
|
||||
|
||||
The encodings we try here are:
|
||||
|
||||
- UTF-16 with a byte order mark, because a UTF-16 byte order mark looks
|
||||
like nothing else
|
||||
- UTF-8, because it's the global standard, which has been used by a
|
||||
majority of the Web since 2008
|
||||
- "utf-8-variants", because it's what people actually implement when they
|
||||
think they're doing UTF-8
|
||||
- MacRoman, because Microsoft Office thinks it's still a thing, and it
|
||||
can be distinguished by its line breaks. (If there are no line breaks in
|
||||
the string, though, you're out of luck.)
|
||||
- "sloppy-windows-1252", the Latin-1-like encoding that is the most common
|
||||
single-byte encoding
|
||||
"""
|
||||
if type(bstring) == type(''):
|
||||
raise UnicodeError(
|
||||
"This string was already decoded as Unicode. You should pass "
|
||||
"bytes to guess_bytes, not Unicode."
|
||||
)
|
||||
|
||||
if bstring.startswith(b'\xfe\xff') or bstring.startswith(b'\xff\xfe'):
|
||||
return bstring.decode('utf-16'), 'utf-16'
|
||||
|
||||
byteset = set(bytes(bstring))
|
||||
byte_ed, byte_c0, byte_CR, byte_LF = b'\xed\xc0\r\n'
|
||||
|
||||
try:
|
||||
if byte_ed in byteset or byte_c0 in byteset:
|
||||
# Byte 0xed can be used to encode a range of codepoints that
|
||||
# are UTF-16 surrogates. UTF-8 does not use UTF-16 surrogates,
|
||||
# so when we see 0xed, it's very likely we're being asked to
|
||||
# decode CESU-8, the variant that encodes UTF-16 surrogates
|
||||
# instead of the original characters themselves.
|
||||
#
|
||||
# This will occasionally trigger on standard UTF-8, as there
|
||||
# are some Korean characters that also use byte 0xed, but that's
|
||||
# not harmful.
|
||||
#
|
||||
# Byte 0xc0 is impossible because, numerically, it would only
|
||||
# encode characters lower than U+0040. Those already have
|
||||
# single-byte representations, and UTF-8 requires using the
|
||||
# shortest possible representation. However, Java hides the null
|
||||
# codepoint, U+0000, in a non-standard longer representation -- it
|
||||
# encodes it as 0xc0 0x80 instead of 0x00, guaranteeing that 0x00
|
||||
# will never appear in the encoded bytes.
|
||||
#
|
||||
# The 'utf-8-variants' decoder can handle both of these cases, as
|
||||
# well as standard UTF-8, at the cost of a bit of speed.
|
||||
return bstring.decode('utf-8-variants'), 'utf-8-variants'
|
||||
else:
|
||||
return bstring.decode('utf-8'), 'utf-8'
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
if byte_CR in bstring and byte_LF not in bstring:
|
||||
return bstring.decode('macroman'), 'macroman'
|
||||
else:
|
||||
return bstring.decode('sloppy-windows-1252'), 'sloppy-windows-1252'
|
||||
|
||||
|
||||
def explain_unicode(text):
|
||||
"""
|
||||
A utility method that's useful for debugging mysterious Unicode.
|
||||
|
||||
It breaks down a string, showing you for each codepoint its number in
|
||||
hexadecimal, its glyph, its category in the Unicode standard, and its name
|
||||
in the Unicode standard.
|
||||
|
||||
>>> explain_unicode('(╯°□°)╯︵ ┻━┻')
|
||||
U+0028 ( [Ps] LEFT PARENTHESIS
|
||||
U+256F ╯ [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
|
||||
U+00B0 ° [So] DEGREE SIGN
|
||||
U+25A1 □ [So] WHITE SQUARE
|
||||
U+00B0 ° [So] DEGREE SIGN
|
||||
U+0029 ) [Pe] RIGHT PARENTHESIS
|
||||
U+256F ╯ [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
|
||||
U+FE35 ︵ [Ps] PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
|
||||
U+0020 [Zs] SPACE
|
||||
U+253B ┻ [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
|
||||
U+2501 ━ [So] BOX DRAWINGS HEAVY HORIZONTAL
|
||||
U+253B ┻ [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
|
||||
"""
|
||||
for char in text:
|
||||
if is_printable(char):
|
||||
display = char
|
||||
else:
|
||||
display = char.encode('unicode-escape').decode('ascii')
|
||||
print('U+{code:04X} {display} [{category}] {name}'.format(
|
||||
display=display_ljust(display, 7),
|
||||
code=ord(char),
|
||||
category=unicodedata.category(char),
|
||||
name=unicodedata.name(char, '<unknown>')
|
||||
))
|
||||
@@ -0,0 +1,94 @@
|
||||
# coding: utf-8
|
||||
r"""
|
||||
Give Python the ability to decode some common, flawed encodings.
|
||||
|
||||
Python does not want you to be sloppy with your text. Its encoders and decoders
|
||||
("codecs") follow the relevant standards whenever possible, which means that
|
||||
when you get text that *doesn't* follow those standards, you'll probably fail
|
||||
to decode it. Or you might succeed at decoding it for implementation-specific
|
||||
reasons, which is perhaps worse.
|
||||
|
||||
There are some encodings out there that Python wishes didn't exist, which are
|
||||
widely used outside of Python:
|
||||
|
||||
- "utf-8-variants", a family of not-quite-UTF-8 encodings, including the
|
||||
ever-popular CESU-8 and "Java modified UTF-8".
|
||||
- "Sloppy" versions of character map encodings, where bytes that don't map to
|
||||
anything will instead map to the Unicode character with the same number.
|
||||
|
||||
Simply importing this module, or in fact any part of the `ftfy` package, will
|
||||
make these new "bad codecs" available to Python through the standard Codecs
|
||||
API. You never have to actually call any functions inside `ftfy.bad_codecs`.
|
||||
|
||||
However, if you want to call something because your code checker insists on it,
|
||||
you can call ``ftfy.bad_codecs.ok()``.
|
||||
|
||||
A quick example of decoding text that's encoded in CESU-8:
|
||||
|
||||
>>> import ftfy.bad_codecs
|
||||
>>> print(b'\xed\xa0\xbd\xed\xb8\x8d'.decode('utf-8-variants'))
|
||||
😍
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
from encodings import normalize_encoding
|
||||
import codecs
|
||||
|
||||
_CACHE = {}
|
||||
|
||||
# Define some aliases for 'utf-8-variants'. All hyphens get turned into
|
||||
# underscores, because of `normalize_encoding`.
|
||||
UTF8_VAR_NAMES = (
|
||||
'utf_8_variants', 'utf8_variants',
|
||||
'utf_8_variant', 'utf8_variant',
|
||||
'utf_8_var', 'utf8_var',
|
||||
'cesu_8', 'cesu8',
|
||||
'java_utf_8', 'java_utf8'
|
||||
)
|
||||
|
||||
|
||||
def search_function(encoding):
|
||||
"""
|
||||
Register our "bad codecs" with Python's codecs API. This involves adding
|
||||
a search function that takes in an encoding name, and returns a codec
|
||||
for that encoding if it knows one, or None if it doesn't.
|
||||
|
||||
The encodings this will match are:
|
||||
|
||||
- Encodings of the form 'sloppy-windows-NNNN' or 'sloppy-iso-8859-N',
|
||||
where the non-sloppy version is an encoding that leaves some bytes
|
||||
unmapped to characters.
|
||||
- The 'utf-8-variants' encoding, which has the several aliases seen
|
||||
above.
|
||||
"""
|
||||
if encoding in _CACHE:
|
||||
return _CACHE[encoding]
|
||||
|
||||
norm_encoding = normalize_encoding(encoding)
|
||||
codec = None
|
||||
if norm_encoding in UTF8_VAR_NAMES:
|
||||
from ftfy.bad_codecs.utf8_variants import CODEC_INFO
|
||||
codec = CODEC_INFO
|
||||
elif norm_encoding.startswith('sloppy_'):
|
||||
from ftfy.bad_codecs.sloppy import CODECS
|
||||
codec = CODECS.get(norm_encoding)
|
||||
|
||||
if codec is not None:
|
||||
_CACHE[encoding] = codec
|
||||
|
||||
return codec
|
||||
|
||||
|
||||
def ok():
|
||||
"""
|
||||
A feel-good function that gives you something to call after importing
|
||||
this package.
|
||||
|
||||
Why is this here? Pyflakes. Pyflakes gets upset when you import a module
|
||||
and appear not to use it. It doesn't know that you're using it when
|
||||
you use the ``unicode.encode`` and ``bytes.decode`` methods with certain
|
||||
encodings.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
codecs.register(search_function)
|
||||
@@ -0,0 +1,164 @@
|
||||
# coding: utf-8
|
||||
r"""
|
||||
Decodes single-byte encodings, filling their "holes" in the same messy way that
|
||||
everyone else does.
|
||||
|
||||
A single-byte encoding maps each byte to a Unicode character, except that some
|
||||
bytes are left unmapped. In the commonly-used Windows-1252 encoding, for
|
||||
example, bytes 0x81 and 0x8D, among others, have no meaning.
|
||||
|
||||
Python, wanting to preserve some sense of decorum, will handle these bytes
|
||||
as errors. But Windows knows that 0x81 and 0x8D are possible bytes and they're
|
||||
different from each other. It just hasn't defined what they are in terms of
|
||||
Unicode.
|
||||
|
||||
Software that has to interoperate with Windows-1252 and Unicode -- such as all
|
||||
the common Web browsers -- will pick some Unicode characters for them to map
|
||||
to, and the characters they pick are the Unicode characters with the same
|
||||
numbers: U+0081 and U+008D. This is the same as what Latin-1 does, and the
|
||||
resulting characters tend to fall into a range of Unicode that's set aside for
|
||||
obselete Latin-1 control characters anyway.
|
||||
|
||||
These sloppy codecs let Python do the same thing, thus interoperating with
|
||||
other software that works this way. It defines a sloppy version of many
|
||||
single-byte encodings with holes. (There is no need for a sloppy version of
|
||||
an encoding without holes: for example, there is no such thing as
|
||||
sloppy-iso-8859-2 or sloppy-macroman.)
|
||||
|
||||
The following encodings will become defined:
|
||||
|
||||
- sloppy-windows-1250 (Central European, sort of based on ISO-8859-2)
|
||||
- sloppy-windows-1251 (Cyrillic)
|
||||
- sloppy-windows-1252 (Western European, based on Latin-1)
|
||||
- sloppy-windows-1253 (Greek, sort of based on ISO-8859-7)
|
||||
- sloppy-windows-1254 (Turkish, based on ISO-8859-9)
|
||||
- sloppy-windows-1255 (Hebrew, based on ISO-8859-8)
|
||||
- sloppy-windows-1256 (Arabic)
|
||||
- sloppy-windows-1257 (Baltic, based on ISO-8859-13)
|
||||
- sloppy-windows-1258 (Vietnamese)
|
||||
- sloppy-cp874 (Thai, based on ISO-8859-11)
|
||||
- sloppy-iso-8859-3 (Maltese and Esperanto, I guess)
|
||||
- sloppy-iso-8859-6 (different Arabic)
|
||||
- sloppy-iso-8859-7 (Greek)
|
||||
- sloppy-iso-8859-8 (Hebrew)
|
||||
- sloppy-iso-8859-11 (Thai)
|
||||
|
||||
Aliases such as "sloppy-cp1252" for "sloppy-windows-1252" will also be
|
||||
defined.
|
||||
|
||||
Only sloppy-windows-1251 and sloppy-windows-1252 are used by the rest of ftfy;
|
||||
the rest are rather uncommon.
|
||||
|
||||
Here are some examples, using `ftfy.explain_unicode` to illustrate how
|
||||
sloppy-windows-1252 merges Windows-1252 with Latin-1:
|
||||
|
||||
>>> from ftfy import explain_unicode
|
||||
>>> some_bytes = b'\x80\x81\x82'
|
||||
>>> explain_unicode(some_bytes.decode('latin-1'))
|
||||
U+0080 \x80 [Cc] <unknown>
|
||||
U+0081 \x81 [Cc] <unknown>
|
||||
U+0082 \x82 [Cc] <unknown>
|
||||
|
||||
>>> explain_unicode(some_bytes.decode('windows-1252', 'replace'))
|
||||
U+20AC € [Sc] EURO SIGN
|
||||
U+FFFD � [So] REPLACEMENT CHARACTER
|
||||
U+201A ‚ [Ps] SINGLE LOW-9 QUOTATION MARK
|
||||
|
||||
>>> explain_unicode(some_bytes.decode('sloppy-windows-1252'))
|
||||
U+20AC € [Sc] EURO SIGN
|
||||
U+0081 \x81 [Cc] <unknown>
|
||||
U+201A ‚ [Ps] SINGLE LOW-9 QUOTATION MARK
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import codecs
|
||||
from encodings import normalize_encoding
|
||||
import sys
|
||||
|
||||
REPLACEMENT_CHAR = '\ufffd'
|
||||
PY26 = sys.version_info[:2] == (2, 6)
|
||||
|
||||
def make_sloppy_codec(encoding):
|
||||
"""
|
||||
Take a codec name, and return a 'sloppy' version of that codec that can
|
||||
encode and decode the unassigned bytes in that encoding.
|
||||
|
||||
Single-byte encodings in the standard library are defined using some
|
||||
boilerplate classes surrounding the functions that do the actual work,
|
||||
`codecs.charmap_decode` and `charmap_encode`. This function, given an
|
||||
encoding name, *defines* those boilerplate classes.
|
||||
"""
|
||||
# Make an array of all 256 possible bytes.
|
||||
all_bytes = bytearray(range(256))
|
||||
|
||||
# Get a list of what they would decode to in Latin-1.
|
||||
sloppy_chars = list(all_bytes.decode('latin-1'))
|
||||
|
||||
# Get a list of what they decode to in the given encoding. Use the
|
||||
# replacement character for unassigned bytes.
|
||||
if PY26:
|
||||
decoded_chars = all_bytes.decode(encoding, 'replace')
|
||||
else:
|
||||
decoded_chars = all_bytes.decode(encoding, errors='replace')
|
||||
|
||||
# Update the sloppy_chars list. Each byte that was successfully decoded
|
||||
# gets its decoded value in the list. The unassigned bytes are left as
|
||||
# they are, which gives their decoding in Latin-1.
|
||||
for i, char in enumerate(decoded_chars):
|
||||
if char != REPLACEMENT_CHAR:
|
||||
sloppy_chars[i] = char
|
||||
|
||||
# For ftfy's own purposes, we're going to allow byte 1A, the "Substitute"
|
||||
# control code, to encode the Unicode replacement character U+FFFD.
|
||||
sloppy_chars[0x1a] = REPLACEMENT_CHAR
|
||||
|
||||
# Create the data structures that tell the charmap methods how to encode
|
||||
# and decode in this sloppy encoding.
|
||||
decoding_table = ''.join(sloppy_chars)
|
||||
encoding_table = codecs.charmap_build(decoding_table)
|
||||
|
||||
# Now produce all the class boilerplate. Look at the Python source for
|
||||
# `encodings.cp1252` for comparison; this is almost exactly the same,
|
||||
# except I made it follow pep8.
|
||||
class Codec(codecs.Codec):
|
||||
def encode(self, input, errors='strict'):
|
||||
return codecs.charmap_encode(input, errors, encoding_table)
|
||||
|
||||
def decode(self, input, errors='strict'):
|
||||
return codecs.charmap_decode(input, errors, decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input, self.errors, encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
return codecs.charmap_decode(input, self.errors, decoding_table)[0]
|
||||
|
||||
class StreamWriter(Codec, codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec, codecs.StreamReader):
|
||||
pass
|
||||
|
||||
return codecs.CodecInfo(
|
||||
name='sloppy-' + encoding,
|
||||
encode=Codec().encode,
|
||||
decode=Codec().decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamreader=StreamReader,
|
||||
streamwriter=StreamWriter,
|
||||
)
|
||||
|
||||
# Define a codec for each incomplete encoding. The resulting CODECS dictionary
|
||||
# can be used by the main module of ftfy.bad_codecs.
|
||||
CODECS = {}
|
||||
INCOMPLETE_ENCODINGS = (
|
||||
['windows-%s' % num for num in range(1250, 1259)] +
|
||||
['iso-8859-%s' % num for num in (3, 6, 7, 8, 11)] +
|
||||
['cp%s' % num for num in range(1250, 1259)] + ['cp874']
|
||||
)
|
||||
|
||||
for _encoding in INCOMPLETE_ENCODINGS:
|
||||
_new_name = normalize_encoding('sloppy-' + _encoding)
|
||||
CODECS[_new_name] = make_sloppy_codec(_encoding)
|
||||
@@ -0,0 +1,282 @@
|
||||
r"""
|
||||
This file defines a codec called "utf-8-variants" (or "utf-8-var"), which can
|
||||
decode text that's been encoded with a popular non-standard version of UTF-8.
|
||||
This includes CESU-8, the accidental encoding made by layering UTF-8 on top of
|
||||
UTF-16, as well as Java's twist on CESU-8 that contains a two-byte encoding for
|
||||
codepoint 0.
|
||||
|
||||
This is particularly relevant in Python 3, which provides no other way of
|
||||
decoding CESU-8 [1]_.
|
||||
|
||||
The easiest way to use the codec is to simply import `ftfy.bad_codecs`:
|
||||
|
||||
>>> import ftfy.bad_codecs
|
||||
>>> result = b'here comes a null! \xc0\x80'.decode('utf-8-var')
|
||||
>>> print(repr(result).lstrip('u'))
|
||||
'here comes a null! \x00'
|
||||
|
||||
The codec does not at all enforce "correct" CESU-8. For example, the Unicode
|
||||
Consortium's not-quite-standard describing CESU-8 requires that there is only
|
||||
one possible encoding of any character, so it does not allow mixing of valid
|
||||
UTF-8 and CESU-8. This codec *does* allow that, just like Python 2's UTF-8
|
||||
decoder does.
|
||||
|
||||
Characters in the Basic Multilingual Plane still have only one encoding. This
|
||||
codec still enforces the rule, within the BMP, that characters must appear in
|
||||
their shortest form. There is one exception: the sequence of bytes `0xc0 0x80`,
|
||||
instead of just `0x00`, may be used to encode the null character `U+0000`, like
|
||||
in Java.
|
||||
|
||||
If you encode with this codec, you get legitimate UTF-8. Decoding with this
|
||||
codec and then re-encoding is not idempotent, although encoding and then
|
||||
decoding is. So this module won't produce CESU-8 for you. Look for that
|
||||
functionality in the sister module, "Breaks Text For You", coming approximately
|
||||
never.
|
||||
|
||||
.. [1] In a pinch, you can decode CESU-8 in Python 2 using the UTF-8 codec:
|
||||
first decode the bytes (incorrectly), then encode them, then decode them
|
||||
again, using UTF-8 as the codec every time.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import codecs
|
||||
from encodings.utf_8 import (IncrementalDecoder as UTF8IncrementalDecoder,
|
||||
IncrementalEncoder as UTF8IncrementalEncoder)
|
||||
from ftfy.compatibility import bytes_to_ints, unichr, PYTHON2
|
||||
|
||||
NAME = 'utf-8-variants'
|
||||
|
||||
# This regular expression matches all possible six-byte CESU-8 sequences,
|
||||
# plus truncations of them at the end of the string. (If any of the
|
||||
# subgroups matches $, then all the subgroups after it also have to match $,
|
||||
# as there are no more characters to match.)
|
||||
CESU8_EXPR = (
|
||||
b'('
|
||||
b'\xed'
|
||||
b'([\xa0-\xaf]|$)'
|
||||
b'([\x80-\xbf]|$)'
|
||||
b'(\xed|$)'
|
||||
b'([\xb0-\xbf]|$)'
|
||||
b'([\x80-\xbf]|$)'
|
||||
b')'
|
||||
)
|
||||
|
||||
CESU8_RE = re.compile(CESU8_EXPR)
|
||||
|
||||
# This expression matches isolated surrogate characters that aren't
|
||||
# CESU-8, which have to be handled carefully on Python 2.
|
||||
SURROGATE_EXPR = (b'(\xed([\xa0-\xbf]|$)([\x80-\xbf]|$))')
|
||||
|
||||
# This expression matches the Java encoding of U+0, including if it's
|
||||
# truncated and we need more bytes.
|
||||
NULL_EXPR = b'(\xc0(\x80|$))'
|
||||
|
||||
# This regex matches cases that we need to decode differently from
|
||||
# standard UTF-8.
|
||||
SPECIAL_BYTES_RE = re.compile(b'|'.join([NULL_EXPR, CESU8_EXPR, SURROGATE_EXPR]))
|
||||
|
||||
|
||||
class IncrementalDecoder(UTF8IncrementalDecoder):
|
||||
"""
|
||||
An incremental decoder that extends Python's built-in UTF-8 decoder.
|
||||
|
||||
This encoder needs to take in bytes, possibly arriving in a stream, and
|
||||
output the correctly decoded text. The general strategy for doing this
|
||||
is to fall back on the real UTF-8 decoder whenever possible, because
|
||||
the real UTF-8 decoder is way optimized, but to call specialized methods
|
||||
we define here for the cases the real encoder isn't expecting.
|
||||
"""
|
||||
def _buffer_decode(self, input, errors, final):
|
||||
"""
|
||||
Decode bytes that may be arriving in a stream, following the Codecs
|
||||
API.
|
||||
|
||||
`input` is the incoming sequence of bytes. `errors` tells us how to
|
||||
handle errors, though we delegate all error-handling cases to the real
|
||||
UTF-8 decoder to ensure correct behavior. `final` indicates whether
|
||||
this is the end of the sequence, in which case we should raise an
|
||||
error given incomplete input.
|
||||
|
||||
Returns as much decoded text as possible, and the number of bytes
|
||||
consumed.
|
||||
"""
|
||||
# decoded_segments are the pieces of text we have decoded so far,
|
||||
# and position is our current position in the byte string. (Bytes
|
||||
# before this position have been consumed, and bytes after it have
|
||||
# yet to be decoded.)
|
||||
decoded_segments = []
|
||||
position = 0
|
||||
while True:
|
||||
# Use _buffer_decode_step to decode a segment of text.
|
||||
decoded, consumed = self._buffer_decode_step(
|
||||
input[position:],
|
||||
errors,
|
||||
final
|
||||
)
|
||||
if consumed == 0:
|
||||
# Either there's nothing left to decode, or we need to wait
|
||||
# for more input. Either way, we're done for now.
|
||||
break
|
||||
|
||||
# Append the decoded text to the list, and update our position.
|
||||
decoded_segments.append(decoded)
|
||||
position += consumed
|
||||
|
||||
if final:
|
||||
# _buffer_decode_step must consume all the bytes when `final` is
|
||||
# true.
|
||||
assert position == len(input)
|
||||
|
||||
return ''.join(decoded_segments), position
|
||||
|
||||
def _buffer_decode_step(self, input, errors, final):
|
||||
"""
|
||||
There are three possibilities for each decoding step:
|
||||
|
||||
- Decode as much real UTF-8 as possible.
|
||||
- Decode a six-byte CESU-8 sequence at the current position.
|
||||
- Decode a Java-style null at the current position.
|
||||
|
||||
This method figures out which step is appropriate, and does it.
|
||||
"""
|
||||
# Get a reference to the superclass method that we'll be using for
|
||||
# most of the real work.
|
||||
sup = UTF8IncrementalDecoder._buffer_decode
|
||||
|
||||
# Find the next byte position that indicates a variant of UTF-8.
|
||||
match = SPECIAL_BYTES_RE.search(input)
|
||||
if match is None:
|
||||
return sup(input, errors, final)
|
||||
|
||||
cutoff = match.start()
|
||||
if cutoff > 0:
|
||||
return sup(input[:cutoff], errors, True)
|
||||
|
||||
# Some byte sequence that we intend to handle specially matches
|
||||
# at the beginning of the input.
|
||||
if input.startswith(b'\xc0'):
|
||||
if len(input) > 1:
|
||||
# Decode the two-byte sequence 0xc0 0x80.
|
||||
return '\u0000', 2
|
||||
else:
|
||||
if final:
|
||||
# We hit the end of the stream. Let the superclass method
|
||||
# handle it.
|
||||
return sup(input, errors, True)
|
||||
else:
|
||||
# Wait to see another byte.
|
||||
return '', 0
|
||||
else:
|
||||
# Decode a possible six-byte sequence starting with 0xed.
|
||||
return self._buffer_decode_surrogates(sup, input, errors, final)
|
||||
|
||||
@staticmethod
|
||||
def _buffer_decode_surrogates(sup, input, errors, final):
|
||||
"""
|
||||
When we have improperly encoded surrogates, we can still see the
|
||||
bits that they were meant to represent.
|
||||
|
||||
The surrogates were meant to encode a 20-bit number, to which we
|
||||
add 0x10000 to get a codepoint. That 20-bit number now appears in
|
||||
this form:
|
||||
|
||||
11101101 1010abcd 10efghij 11101101 1011klmn 10opqrst
|
||||
|
||||
The CESU8_RE above matches byte sequences of this form. Then we need
|
||||
to extract the bits and assemble a codepoint number from them.
|
||||
"""
|
||||
if len(input) < 6:
|
||||
if final:
|
||||
# We found 0xed near the end of the stream, and there aren't
|
||||
# six bytes to decode. Delegate to the superclass method to
|
||||
# handle it as an error.
|
||||
if PYTHON2 and len(input) >= 3:
|
||||
# We can't trust Python 2 to raise an error when it's
|
||||
# asked to decode a surrogate, so let's force the issue.
|
||||
input = mangle_surrogates(input)
|
||||
return sup(input, errors, final)
|
||||
else:
|
||||
# We found a surrogate, the stream isn't over yet, and we don't
|
||||
# know enough of the following bytes to decode anything, so
|
||||
# consume zero bytes and wait.
|
||||
return '', 0
|
||||
else:
|
||||
if CESU8_RE.match(input):
|
||||
# Given this is a CESU-8 sequence, do some math to pull out
|
||||
# the intended 20-bit value, and consume six bytes.
|
||||
bytenums = bytes_to_ints(input[:6])
|
||||
codepoint = (
|
||||
((bytenums[1] & 0x0f) << 16) +
|
||||
((bytenums[2] & 0x3f) << 10) +
|
||||
((bytenums[4] & 0x0f) << 6) +
|
||||
(bytenums[5] & 0x3f) +
|
||||
0x10000
|
||||
)
|
||||
return unichr(codepoint), 6
|
||||
else:
|
||||
# This looked like a CESU-8 sequence, but it wasn't one.
|
||||
# 0xed indicates the start of a three-byte sequence, so give
|
||||
# three bytes to the superclass to decode as usual -- except
|
||||
# for working around the Python 2 discrepancy as before.
|
||||
if PYTHON2:
|
||||
input = mangle_surrogates(input)
|
||||
return sup(input[:3], errors, False)
|
||||
|
||||
|
||||
def mangle_surrogates(bytestring):
|
||||
"""
|
||||
When Python 3 sees the UTF-8 encoding of a surrogate codepoint, it treats
|
||||
it as an error (which it is). In 'replace' mode, it will decode as three
|
||||
replacement characters. But Python 2 will just output the surrogate
|
||||
codepoint.
|
||||
|
||||
To ensure consistency between Python 2 and Python 3, and protect downstream
|
||||
applications from malformed strings, we turn surrogate sequences at the
|
||||
start of the string into the bytes `ff ff ff`, which we're *sure* won't
|
||||
decode, and which turn into three replacement characters in 'replace' mode.
|
||||
|
||||
This function does nothing in Python 3, and it will be deprecated in ftfy
|
||||
5.0.
|
||||
"""
|
||||
if PYTHON2:
|
||||
if bytestring.startswith(b'\xed') and len(bytestring) >= 3:
|
||||
decoded = bytestring[:3].decode('utf-8', 'replace')
|
||||
if '\ud800' <= decoded <= '\udfff':
|
||||
return b'\xff\xff\xff' + mangle_surrogates(bytestring[3:])
|
||||
return bytestring
|
||||
else:
|
||||
# On Python 3, nothing needs to be done.
|
||||
return bytestring
|
||||
|
||||
# The encoder is identical to UTF-8.
|
||||
IncrementalEncoder = UTF8IncrementalEncoder
|
||||
|
||||
|
||||
# Everything below here is boilerplate that matches the modules in the
|
||||
# built-in `encodings` package.
|
||||
def encode(input, errors='strict'):
|
||||
return IncrementalEncoder(errors).encode(input, final=True), len(input)
|
||||
|
||||
|
||||
def decode(input, errors='strict'):
|
||||
return IncrementalDecoder(errors).decode(input, final=True), len(input)
|
||||
|
||||
|
||||
class StreamWriter(codecs.StreamWriter):
|
||||
encode = encode
|
||||
|
||||
|
||||
class StreamReader(codecs.StreamReader):
|
||||
decode = decode
|
||||
|
||||
|
||||
CODEC_INFO = codecs.CodecInfo(
|
||||
name=NAME,
|
||||
encode=encode,
|
||||
decode=decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamreader=StreamReader,
|
||||
streamwriter=StreamWriter,
|
||||
)
|
||||
@@ -0,0 +1,162 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Heuristics to determine whether re-encoding text is actually making it
|
||||
more reasonable.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import unicodedata
|
||||
from ftfy.chardata import chars_to_classes
|
||||
|
||||
# The following regex uses the mapping of character classes to ASCII
|
||||
# characters defined in chardata.py and build_data.py:
|
||||
#
|
||||
# L = Latin capital letter
|
||||
# l = Latin lowercase letter
|
||||
# A = Non-latin capital or title-case letter
|
||||
# a = Non-latin lowercase letter
|
||||
# C = Non-cased letter (Lo)
|
||||
# X = Control character (Cc)
|
||||
# m = Letter modifier (Lm)
|
||||
# M = Mark (Mc, Me, Mn)
|
||||
# N = Miscellaneous numbers (No)
|
||||
# 1 = Math symbol (Sm) or currency symbol (Sc)
|
||||
# 2 = Symbol modifier (Sk)
|
||||
# 3 = Other symbol (So)
|
||||
# S = UTF-16 surrogate
|
||||
# _ = Unassigned character
|
||||
# = Whitespace
|
||||
# o = Other
|
||||
|
||||
|
||||
def _make_weirdness_regex():
|
||||
"""
|
||||
Creates a list of regexes that match 'weird' character sequences.
|
||||
The more matches there are, the weirder the text is.
|
||||
"""
|
||||
groups = []
|
||||
|
||||
# Match lowercase letters that are followed by non-ASCII uppercase letters
|
||||
groups.append('lA')
|
||||
|
||||
# Match diacritical marks, except when they modify a non-cased letter or
|
||||
# another mark.
|
||||
#
|
||||
# You wouldn't put a diacritical mark on a digit or a space, for example.
|
||||
# You might put it on a Latin letter, but in that case there will almost
|
||||
# always be a pre-composed version, and we normalize to pre-composed
|
||||
# versions first. The cases that can't be pre-composed tend to be in
|
||||
# large scripts without case, which are in class C.
|
||||
groups.append('[^CM]M')
|
||||
|
||||
# Match non-Latin characters adjacent to Latin characters.
|
||||
#
|
||||
# This is a simplification from ftfy version 2, which compared all
|
||||
# adjacent scripts. However, the ambiguities we need to resolve come from
|
||||
# encodings designed to represent Latin characters.
|
||||
groups.append('[Ll][AaC]')
|
||||
groups.append('[AaC][Ll]')
|
||||
|
||||
# Match IPA letters next to capital letters.
|
||||
#
|
||||
# IPA uses lowercase letters only. Some accented capital letters next to
|
||||
# punctuation can accidentally decode as IPA letters, and an IPA letter
|
||||
# appearing next to a capital letter is a good sign that this happened.
|
||||
groups.append('[LA]i')
|
||||
groups.append('i[LA]')
|
||||
|
||||
# Match non-combining diacritics. We've already set aside the common ones
|
||||
# like ^ (the CIRCUMFLEX ACCENT, repurposed as a caret, exponent sign,
|
||||
# or happy eye) and assigned them to category 'o'. The remaining ones,
|
||||
# like the diaeresis (¨), are pretty weird to see on their own instead
|
||||
# of combined with a letter.
|
||||
groups.append('2')
|
||||
|
||||
# Match C1 control characters, which are almost always the result of
|
||||
# decoding Latin-1 that was meant to be Windows-1252.
|
||||
groups.append('X')
|
||||
|
||||
# Match private use and unassigned characters.
|
||||
groups.append('P')
|
||||
groups.append('_')
|
||||
|
||||
# Match adjacent characters from any different pair of these categories:
|
||||
# - Modifier marks (M)
|
||||
# - Letter modifiers (m)
|
||||
# - Miscellaneous numbers (N)
|
||||
# - Symbols (1 or 3, because 2 is already weird on its own)
|
||||
|
||||
exclusive_categories = 'MmN13'
|
||||
for cat1 in exclusive_categories:
|
||||
others_range = ''.join(c for c in exclusive_categories if c != cat1)
|
||||
groups.append('{cat1}[{others_range}]'.format(
|
||||
cat1=cat1, others_range=others_range
|
||||
))
|
||||
regex = '|'.join('({0})'.format(group) for group in groups)
|
||||
return re.compile(regex)
|
||||
|
||||
WEIRDNESS_RE = _make_weirdness_regex()
|
||||
|
||||
# These characters appear in mojibake but also appear commonly on their own.
|
||||
# We have a slight preference to leave them alone.
|
||||
COMMON_SYMBOL_RE = re.compile(
|
||||
'['
|
||||
'\N{HORIZONTAL ELLIPSIS}\N{EM DASH}\N{EN DASH}'
|
||||
'\N{LEFT SINGLE QUOTATION MARK}\N{LEFT DOUBLE QUOTATION MARK}'
|
||||
'\N{RIGHT SINGLE QUOTATION MARK}\N{RIGHT DOUBLE QUOTATION MARK}'
|
||||
'\N{INVERTED EXCLAMATION MARK}\N{INVERTED QUESTION MARK}\N{DEGREE SIGN}'
|
||||
'\N{TRADE MARK SIGN}'
|
||||
'\N{REGISTERED SIGN}'
|
||||
'\N{SINGLE LEFT-POINTING ANGLE QUOTATION MARK}'
|
||||
'\N{SINGLE RIGHT-POINTING ANGLE QUOTATION MARK}'
|
||||
'\N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}'
|
||||
'\N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}'
|
||||
'\N{NO-BREAK SPACE}'
|
||||
'\N{ACUTE ACCENT}\N{MULTIPLICATION SIGN}\N{LATIN SMALL LETTER SHARP S}'
|
||||
'\ufeff' # The byte-order mark, whose encoding '' looks common
|
||||
']'
|
||||
)
|
||||
|
||||
def sequence_weirdness(text):
|
||||
"""
|
||||
Determine how often a text has unexpected characters or sequences of
|
||||
characters. This metric is used to disambiguate when text should be
|
||||
re-decoded or left as is.
|
||||
|
||||
We start by normalizing text in NFC form, so that penalties for
|
||||
diacritical marks don't apply to characters that know what to do with
|
||||
them.
|
||||
|
||||
The following things are deemed weird:
|
||||
|
||||
- Lowercase letters followed by non-ASCII uppercase letters
|
||||
- Non-Latin characters next to Latin characters
|
||||
- Un-combined diacritical marks, unless they're stacking on non-alphabetic
|
||||
characters (in languages that do that kind of thing a lot) or other
|
||||
marks
|
||||
- C1 control characters
|
||||
- Adjacent symbols from any different pair of these categories:
|
||||
|
||||
- Modifier marks
|
||||
- Letter modifiers
|
||||
- Non-digit numbers
|
||||
- Symbols (including math and currency)
|
||||
|
||||
The return value is the number of instances of weirdness.
|
||||
"""
|
||||
text2 = unicodedata.normalize('NFC', text)
|
||||
weirdness = len(WEIRDNESS_RE.findall(chars_to_classes(text2)))
|
||||
punct_discount = len(COMMON_SYMBOL_RE.findall(text2))
|
||||
return weirdness * 2 - punct_discount
|
||||
|
||||
|
||||
def text_cost(text):
|
||||
"""
|
||||
An overall cost function for text. Weirder is worse, but all else being
|
||||
equal, shorter strings are better.
|
||||
|
||||
The overall cost is measured as the "weirdness" (see
|
||||
:func:`sequence_weirdness`) plus the length.
|
||||
"""
|
||||
return sequence_weirdness(text) + len(text)
|
||||
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
A script to make the char_classes.dat file.
|
||||
|
||||
This never needs to run in normal usage. It needs to be run if the character
|
||||
classes we care about change, or if a new version of Python supports a new
|
||||
Unicode standard and we want it to affect our string decoding.
|
||||
|
||||
The file that we generate is based on Unicode 9.0, as supported by Python 3.6.
|
||||
You can certainly use it in earlier versions. This simply makes sure that we
|
||||
get consistent results from running ftfy on different versions of Python.
|
||||
|
||||
The file will be written to the current directory.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import unicodedata
|
||||
import sys
|
||||
import zlib
|
||||
if sys.hexversion >= 0x03000000:
|
||||
unichr = chr
|
||||
|
||||
# L = Latin capital letter
|
||||
# l = Latin lowercase letter
|
||||
# A = Non-latin capital or title-case letter
|
||||
# a = Non-latin lowercase letter
|
||||
# C = Non-cased letter (Lo)
|
||||
# X = Control character (Cc)
|
||||
# m = Letter modifier (Lm)
|
||||
# M = Mark (Mc, Me, Mn)
|
||||
# N = Miscellaneous numbers (No)
|
||||
# P = Private use (Co)
|
||||
# 1 = Math symbol (Sm) or currency symbol (Sc)
|
||||
# 2 = Symbol modifier (Sk)
|
||||
# 3 = Other symbol (So)
|
||||
# S = UTF-16 surrogate
|
||||
# _ = Unassigned character
|
||||
# = Whitespace
|
||||
# o = Other
|
||||
|
||||
|
||||
def make_char_data_file(do_it_anyway=False):
|
||||
"""
|
||||
Build the compressed data file 'char_classes.dat' and write it to the
|
||||
current directory.
|
||||
|
||||
If you run this, run it in Python 3.6 or later. It will run in earlier
|
||||
versions, but you won't get the Unicode 9 standard, leading to inconsistent
|
||||
behavior.
|
||||
|
||||
To protect against this, running this in the wrong version of Python will
|
||||
raise an error unless you pass `do_it_anyway=True`.
|
||||
"""
|
||||
if sys.hexversion < 0x03060000 and not do_it_anyway:
|
||||
raise RuntimeError(
|
||||
"This function should be run in Python 3.6 or later."
|
||||
)
|
||||
|
||||
cclasses = [None] * 0x110000
|
||||
for codepoint in range(0x0, 0x110000):
|
||||
char = unichr(codepoint)
|
||||
category = unicodedata.category(char)
|
||||
|
||||
if (0x250 <= codepoint < 0x300) and char != 'ə':
|
||||
# IPA symbols and modifiers.
|
||||
#
|
||||
# This category excludes the schwa (ə), which is used as a normal
|
||||
# Latin letter in some languages.
|
||||
cclasses[codepoint] = 'i'
|
||||
elif category.startswith('L'): # letters
|
||||
if unicodedata.name(char, '').startswith('LATIN'):
|
||||
if category == 'Lu':
|
||||
cclasses[codepoint] = 'L'
|
||||
else:
|
||||
cclasses[codepoint] = 'l'
|
||||
else:
|
||||
if category == 'Lu' or category == 'Lt':
|
||||
cclasses[codepoint] = 'A'
|
||||
elif category == 'Ll':
|
||||
cclasses[codepoint] = 'a'
|
||||
elif category == 'Lo':
|
||||
cclasses[codepoint] = 'C'
|
||||
elif category == 'Lm':
|
||||
cclasses[codepoint] = 'm'
|
||||
else:
|
||||
raise ValueError('got some weird kind of letter')
|
||||
elif 0xfe00 <= codepoint <= 0xfe0f or 0x1f3fb <= codepoint <= 0x1f3ff:
|
||||
# Variation selectors and skin-tone modifiers have the category
|
||||
# of non-spacing marks, but they act like symbols
|
||||
cclasses[codepoint] = '3'
|
||||
elif category.startswith('M'): # marks
|
||||
cclasses[codepoint] = 'M'
|
||||
elif category == 'No':
|
||||
cclasses[codepoint] = 'N'
|
||||
elif category == 'Sm' or category == 'Sc':
|
||||
cclasses[codepoint] = '1'
|
||||
elif category == 'Sk':
|
||||
cclasses[codepoint] = '2'
|
||||
elif category == 'So':
|
||||
cclasses[codepoint] = '3'
|
||||
elif category == 'Cc':
|
||||
cclasses[codepoint] = 'X'
|
||||
elif category == 'Cs':
|
||||
cclasses[codepoint] = 'S'
|
||||
elif category == 'Co':
|
||||
cclasses[codepoint] = 'P'
|
||||
elif category.startswith('Z'):
|
||||
cclasses[codepoint] = ' '
|
||||
elif 0x1f000 <= codepoint <= 0x1ffff:
|
||||
# This range is rapidly having emoji added to it. Assume that
|
||||
# an unassigned codepoint in this range is just a symbol we
|
||||
# don't know yet.
|
||||
cclasses[codepoint] = '3'
|
||||
elif category == 'Cn':
|
||||
cclasses[codepoint] = '_'
|
||||
else:
|
||||
cclasses[codepoint] = 'o'
|
||||
|
||||
# Mark whitespace control characters as whitespace
|
||||
cclasses[9] = cclasses[10] = cclasses[12] = cclasses[13] = ' '
|
||||
|
||||
# Some other exceptions for characters that are more commonly used as
|
||||
# punctuation or decoration than for their ostensible purpose.
|
||||
# For example, tilde is not usually a "math symbol", and the accents
|
||||
# `´ are much more like quotation marks than modifiers.
|
||||
for char in "^~`´˝^`":
|
||||
cclasses[ord(char)] = 'o'
|
||||
|
||||
out = open('char_classes.dat', 'wb')
|
||||
out.write(zlib.compress(''.join(cclasses).encode('ascii')))
|
||||
out.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
make_char_data_file()
|
||||
Binary file not shown.
@@ -0,0 +1,214 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This gives other modules access to the gritty details about characters and the
|
||||
encodings that use them.
|
||||
"""
|
||||
|
||||
import re
|
||||
import zlib
|
||||
import unicodedata
|
||||
import itertools
|
||||
from pkg_resources import resource_string
|
||||
from ftfy.compatibility import unichr
|
||||
|
||||
# These are the encodings we will try to fix in ftfy, in the
|
||||
# order that they should be tried.
|
||||
CHARMAP_ENCODINGS = [
|
||||
u'latin-1',
|
||||
u'sloppy-windows-1252',
|
||||
u'sloppy-windows-1250',
|
||||
u'iso-8859-2',
|
||||
u'sloppy-windows-1251',
|
||||
u'macroman',
|
||||
u'cp437',
|
||||
]
|
||||
|
||||
|
||||
def _build_regexes():
|
||||
"""
|
||||
ENCODING_REGEXES contain reasonably fast ways to detect if we
|
||||
could represent a given string in a given encoding. The simplest one is
|
||||
the u'ascii' detector, which of course just determines if all characters
|
||||
are between U+0000 and U+007F.
|
||||
"""
|
||||
# Define a regex that matches ASCII text.
|
||||
encoding_regexes = {u'ascii': re.compile('^[\x00-\x7f]*$')}
|
||||
|
||||
for encoding in CHARMAP_ENCODINGS:
|
||||
# Make a sequence of characters that bytes \x80 to \xFF decode to
|
||||
# in each encoding, as well as byte \x1A, which is used to represent
|
||||
# the replacement character � in the sloppy-* encodings.
|
||||
latin1table = u''.join(unichr(i) for i in range(128, 256)) + '\x1a'
|
||||
charlist = latin1table.encode(u'latin-1').decode(encoding)
|
||||
|
||||
# The rest of the ASCII bytes -- bytes \x00 to \x19 and \x1B
|
||||
# to \x7F -- will decode as those ASCII characters in any encoding we
|
||||
# support, so we can just include them as ranges. This also lets us
|
||||
# not worry about escaping regex special characters, because all of
|
||||
# them are in the \x1B to \x7F range.
|
||||
regex = u'^[\x00-\x19\x1b-\x7f{0}]*$'.format(charlist)
|
||||
encoding_regexes[encoding] = re.compile(regex)
|
||||
return encoding_regexes
|
||||
ENCODING_REGEXES = _build_regexes()
|
||||
|
||||
|
||||
def _build_utf8_punct_regex():
|
||||
"""
|
||||
Recognize UTF-8 mojibake that's so blatant that we can fix it even when the
|
||||
rest of the string doesn't decode as UTF-8 -- namely, UTF-8 sequences for
|
||||
the u'General Punctuation' characters U+2000 to U+2040, re-encoded in
|
||||
Windows-1252.
|
||||
|
||||
These are recognizable by the distinctiveu'â€u' ('\xe2\x80') sequence they
|
||||
all begin with when decoded as Windows-1252.
|
||||
"""
|
||||
# We're making a regex that has all the literal bytes from 0x80 to 0xbf in
|
||||
# a range. "Couldn't this have just said [\x80-\xbf]?", you might ask.
|
||||
# However, when we decode the regex as Windows-1252, the resulting
|
||||
# characters won't even be remotely contiguous.
|
||||
#
|
||||
# Unrelatedly, the expression that generates these bytes will be so much
|
||||
# prettier when we deprecate Python 2.
|
||||
continuation_char_list = ''.join(
|
||||
unichr(i) for i in range(0x80, 0xc0)
|
||||
).encode(u'latin-1')
|
||||
obvious_utf8 = (u'â€['
|
||||
+ continuation_char_list.decode(u'sloppy-windows-1252')
|
||||
+ u']')
|
||||
return re.compile(obvious_utf8)
|
||||
PARTIAL_UTF8_PUNCT_RE = _build_utf8_punct_regex()
|
||||
|
||||
|
||||
# Recognize UTF-8 sequences that would be valid if it weren't for a b'\xa0'
|
||||
# that some Windows-1252 program converted to a plain space.
|
||||
#
|
||||
# The smaller values are included on a case-by-case basis, because we don't want
|
||||
# to decode likely input sequences to unlikely characters. These are the ones
|
||||
# that *do* form likely characters before 0xa0:
|
||||
#
|
||||
# 0xc2 -> U+A0 NO-BREAK SPACE
|
||||
# 0xc3 -> U+E0 LATIN SMALL LETTER A WITH GRAVE
|
||||
# 0xc5 -> U+160 LATIN CAPITAL LETTER S WITH CARON
|
||||
# 0xce -> U+3A0 GREEK CAPITAL LETTER PI
|
||||
# 0xd0 -> U+420 CYRILLIC CAPITAL LETTER ER
|
||||
#
|
||||
# These still need to come with a cost, so that they only get converted when
|
||||
# there's evidence that it fixes other things. Any of these could represent
|
||||
# characters that legitimately appear surrounded by spaces, particularly U+C5
|
||||
# (Å), which is a word in multiple languages!
|
||||
#
|
||||
# We should consider checking for b'\x85' being converted to ... in the future.
|
||||
# I've seen it once, but the text still wasn't recoverable.
|
||||
|
||||
ALTERED_UTF8_RE = re.compile(b'[\xc2\xc3\xc5\xce\xd0][ ]'
|
||||
b'|[\xe0-\xef][ ][\x80-\xbf]'
|
||||
b'|[\xe0-\xef][\x80-\xbf][ ]'
|
||||
b'|[\xf0-\xf4][ ][\x80-\xbf][\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x80-\xbf][ ][\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x80-\xbf][\x80-\xbf][ ]')
|
||||
|
||||
# This expression matches UTF-8 and CESU-8 sequences where some of the
|
||||
# continuation bytes have been lost. The byte 0x1a (sometimes written as ^Z) is
|
||||
# used within ftfy to represent a byte that produced the replacement character
|
||||
# \ufffd. We don't know which byte it was, but we can at least decode the UTF-8
|
||||
# sequence as \ufffd instead of failing to re-decode it at all.
|
||||
LOSSY_UTF8_RE = re.compile(
|
||||
b'[\xc2-\xdf][\x1a]'
|
||||
b'|\xed[\xa0-\xaf][\x1a]\xed[\xb0-\xbf][\x1a\x80-\xbf]'
|
||||
b'|\xed[\xa0-\xaf][\x1a\x80-\xbf]\xed[\xb0-\xbf][\x1a]'
|
||||
b'|[\xe0-\xef][\x1a][\x1a\x80-\xbf]'
|
||||
b'|[\xe0-\xef][\x1a\x80-\xbf][\x1a]'
|
||||
b'|[\xf0-\xf4][\x1a][\x1a\x80-\xbf][\x1a\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x1a\x80-\xbf][\x1a][\x1a\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x1a\x80-\xbf][\x1a\x80-\xbf][\x1a]'
|
||||
b'|\x1a'
|
||||
)
|
||||
|
||||
# These regexes match various Unicode variations on single and double quotes.
|
||||
SINGLE_QUOTE_RE = re.compile(u'[\u2018-\u201b]')
|
||||
DOUBLE_QUOTE_RE = re.compile(u'[\u201c-\u201f]')
|
||||
|
||||
|
||||
def possible_encoding(text, encoding):
|
||||
"""
|
||||
Given text and a single-byte encoding, check whether that text could have
|
||||
been decoded from that single-byte encoding.
|
||||
|
||||
In other words, check whether it can be encoded in that encoding, possibly
|
||||
sloppily.
|
||||
"""
|
||||
return bool(ENCODING_REGEXES[encoding].match(text))
|
||||
|
||||
|
||||
CHAR_CLASS_STRING = zlib.decompress(
|
||||
resource_string(__name__, 'char_classes.dat')
|
||||
).decode(u'ascii')
|
||||
|
||||
def chars_to_classes(string):
|
||||
"""
|
||||
Convert each Unicode character to a letter indicating which of many
|
||||
classes it's in.
|
||||
|
||||
See build_data.py for where this data comes from and what it means.
|
||||
"""
|
||||
return string.translate(CHAR_CLASS_STRING)
|
||||
|
||||
|
||||
def _build_control_char_mapping():
|
||||
"""
|
||||
Build a translate mapping that strips likely-unintended control characters.
|
||||
See :func:`ftfy.fixes.remove_control_chars` for a description of these
|
||||
codepoint ranges and why they should be removed.
|
||||
"""
|
||||
control_chars = {}
|
||||
|
||||
for i in itertools.chain(
|
||||
range(0x00, 0x09), [0x0b],
|
||||
range(0x0e, 0x20), [0x7f],
|
||||
range(0x206a, 0x2070),
|
||||
[0xfeff],
|
||||
range(0xfff9, 0xfffd),
|
||||
range(0x1d173, 0x1d17b),
|
||||
range(0xe0000, 0xe0080)
|
||||
):
|
||||
control_chars[i] = None
|
||||
|
||||
return control_chars
|
||||
CONTROL_CHARS = _build_control_char_mapping()
|
||||
|
||||
|
||||
# A translate mapping that breaks ligatures made of Latin letters. While
|
||||
# ligatures may be important to the representation of other languages, in
|
||||
# Latin letters they tend to represent a copy/paste error.
|
||||
#
|
||||
# Ligatures may also be separated by NFKC normalization, but that is sometimes
|
||||
# more normalization than you want.
|
||||
LIGATURES = {
|
||||
ord(u'IJ'): u'IJ',
|
||||
ord(u'ij'): u'ij',
|
||||
ord(u'ff'): u'ff',
|
||||
ord(u'fi'): u'fi',
|
||||
ord(u'fl'): u'fl',
|
||||
ord(u'ffi'): u'ffi',
|
||||
ord(u'ffl'): u'ffl',
|
||||
ord(u'ſt'): u'ſt',
|
||||
ord(u'st'): u'st'
|
||||
}
|
||||
|
||||
|
||||
def _build_width_map():
|
||||
"""
|
||||
Build a translate mapping that replaces halfwidth and fullwidth forms
|
||||
with their standard-width forms.
|
||||
"""
|
||||
# Though it's not listed as a fullwidth character, we'll want to convert
|
||||
# U+3000 IDEOGRAPHIC SPACE to U+20 SPACE on the same principle, so start
|
||||
# with that in the dictionary.
|
||||
width_map = {0x3000: u' '}
|
||||
for i in range(0xff01, 0xfff0):
|
||||
char = unichr(i)
|
||||
alternate = unicodedata.normalize(u'NFKC', char)
|
||||
if alternate != char:
|
||||
width_map[i] = alternate
|
||||
return width_map
|
||||
WIDTH_MAP = _build_width_map()
|
||||
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
A command-line utility for fixing text found in a file.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import io
|
||||
import codecs
|
||||
from ftfy import fix_file, __version__
|
||||
from ftfy.compatibility import PYTHON2
|
||||
|
||||
|
||||
ENCODE_ERROR_TEXT_UNIX = """ftfy error:
|
||||
Unfortunately, this output stream does not support Unicode.
|
||||
|
||||
Your system locale may be very old or misconfigured. You should use a locale
|
||||
that supports UTF-8. One way to do this is to `export LANG=C.UTF-8`.
|
||||
"""
|
||||
|
||||
ENCODE_ERROR_TEXT_WINDOWS = """ftfy error:
|
||||
Unfortunately, this output stream does not support Unicode.
|
||||
|
||||
You might be trying to output to the Windows Command Prompt (cmd.exe), which
|
||||
does not fully support Unicode for historical reasons. In general, we recommend
|
||||
finding a way to run Python without using cmd.exe.
|
||||
|
||||
You can work around this problem by using the '-o filename' option in ftfy to
|
||||
output to a file instead.
|
||||
"""
|
||||
|
||||
DECODE_ERROR_TEXT = """ftfy error:
|
||||
This input couldn't be decoded as %r. We got the following error:
|
||||
|
||||
%s
|
||||
|
||||
ftfy works best when its input is in a known encoding. You can use `ftfy -g`
|
||||
to guess, if you're desperate. Otherwise, give the encoding name with the
|
||||
`-e` option, such as `ftfy -e latin-1`.
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Run ftfy as a command-line utility.
|
||||
"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ftfy (fixes text for you), version %s" % __version__
|
||||
)
|
||||
parser.add_argument('filename', default='-', nargs='?',
|
||||
help='The file whose Unicode is to be fixed. Defaults '
|
||||
'to -, meaning standard input.')
|
||||
parser.add_argument('-o', '--output', type=str, default='-',
|
||||
help='The file to output to. Defaults to -, meaning '
|
||||
'standard output.')
|
||||
parser.add_argument('-g', '--guess', action='store_true',
|
||||
help="Ask ftfy to guess the encoding of your input. "
|
||||
"This is risky. Overrides -e.")
|
||||
parser.add_argument('-e', '--encoding', type=str, default='utf-8',
|
||||
help='The encoding of the input. Defaults to UTF-8.')
|
||||
parser.add_argument('-n', '--normalization', type=str, default='NFC',
|
||||
help='The normalization of Unicode to apply. '
|
||||
'Defaults to NFC. Can be "none".')
|
||||
parser.add_argument('--preserve-entities', action='store_true',
|
||||
help="Leave HTML entities as they are. The default "
|
||||
"is to decode them, as long as no HTML tags "
|
||||
"have appeared in the file.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
encoding = args.encoding
|
||||
if args.guess:
|
||||
encoding = None
|
||||
|
||||
if args.filename == '-':
|
||||
# Get a standard input stream made of bytes, so we can decode it as
|
||||
# whatever encoding is necessary.
|
||||
if PYTHON2:
|
||||
file = sys.stdin
|
||||
else:
|
||||
file = sys.stdin.buffer
|
||||
else:
|
||||
file = open(args.filename, 'rb')
|
||||
|
||||
if args.output == '-':
|
||||
encode_output = PYTHON2
|
||||
outfile = sys.stdout
|
||||
else:
|
||||
encode_output = False
|
||||
outfile = io.open(args.output, 'w', encoding='utf-8')
|
||||
|
||||
normalization = args.normalization
|
||||
if normalization.lower() == 'none':
|
||||
normalization = None
|
||||
|
||||
if args.preserve_entities:
|
||||
fix_entities = False
|
||||
else:
|
||||
fix_entities = 'auto'
|
||||
|
||||
try:
|
||||
for line in fix_file(file, encoding=encoding,
|
||||
fix_entities=fix_entities,
|
||||
normalization=normalization):
|
||||
if encode_output:
|
||||
outfile.write(line.encode('utf-8'))
|
||||
else:
|
||||
try:
|
||||
outfile.write(line)
|
||||
except UnicodeEncodeError:
|
||||
if sys.platform == 'win32':
|
||||
sys.stderr.write(ENCODE_ERROR_TEXT_WINDOWS)
|
||||
else:
|
||||
sys.stderr.write(ENCODE_ERROR_TEXT_UNIX)
|
||||
sys.exit(1)
|
||||
except UnicodeDecodeError as err:
|
||||
sys.stderr.write(DECODE_ERROR_TEXT % (encoding, err))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,55 @@
|
||||
"""
|
||||
Makes some function names and behavior consistent between Python 2 and
|
||||
Python 3, and also between narrow and wide builds.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import sys
|
||||
import unicodedata
|
||||
|
||||
if sys.hexversion >= 0x03000000:
|
||||
unichr = chr
|
||||
xrange = range
|
||||
PYTHON2 = False
|
||||
else:
|
||||
unichr = unichr
|
||||
xrange = xrange
|
||||
PYTHON2 = True
|
||||
|
||||
PYTHON34_OR_LATER = (sys.hexversion >= 0x03040000)
|
||||
|
||||
|
||||
def _narrow_unichr_workaround(codepoint):
|
||||
"""
|
||||
A replacement for unichr() on narrow builds of Python. This will get
|
||||
us the narrow representation of an astral character, which will be
|
||||
a string of length two, containing two UTF-16 surrogates.
|
||||
"""
|
||||
escaped = b'\\U%08x' % codepoint
|
||||
return escaped.decode('unicode-escape')
|
||||
|
||||
|
||||
if sys.maxunicode < 0x10000:
|
||||
unichr = _narrow_unichr_workaround
|
||||
|
||||
|
||||
def bytes_to_ints(bytestring):
|
||||
"""
|
||||
No matter what version of Python this is, make a sequence of integers from
|
||||
a bytestring. On Python 3, this is easy, because a 'bytes' object _is_ a
|
||||
sequence of integers.
|
||||
"""
|
||||
if PYTHON2:
|
||||
return [ord(b) for b in bytestring]
|
||||
else:
|
||||
return bytestring
|
||||
|
||||
|
||||
def is_printable(char):
|
||||
"""
|
||||
str.isprintable() is new in Python 3. It's useful in `explain_unicode`, so
|
||||
let's make a crude approximation in Python 2.
|
||||
"""
|
||||
if PYTHON2:
|
||||
return not unicodedata.category(char).startswith('C')
|
||||
else:
|
||||
return char.isprintable()
|
||||
@@ -0,0 +1,664 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This module contains the individual fixes that the main fix_text function
|
||||
can perform.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import sys
|
||||
import codecs
|
||||
import warnings
|
||||
from ftfy.chardata import (possible_encoding, CHARMAP_ENCODINGS,
|
||||
CONTROL_CHARS, LIGATURES, WIDTH_MAP,
|
||||
PARTIAL_UTF8_PUNCT_RE, ALTERED_UTF8_RE,
|
||||
LOSSY_UTF8_RE, SINGLE_QUOTE_RE, DOUBLE_QUOTE_RE)
|
||||
from ftfy.badness import text_cost
|
||||
from ftfy.compatibility import unichr
|
||||
from html5lib.constants import entities
|
||||
|
||||
|
||||
BYTES_ERROR_TEXT = """Hey wait, this isn't Unicode.
|
||||
|
||||
ftfy is designed to fix problems that were introduced by handling Unicode
|
||||
incorrectly. It might be able to fix the bytes you just handed it, but the
|
||||
fact that you just gave a pile of bytes to a function that fixes text means
|
||||
that your code is *also* handling Unicode incorrectly.
|
||||
|
||||
ftfy takes Unicode text as input. You should take these bytes and decode
|
||||
them from the encoding you think they are in. If you're not sure what encoding
|
||||
they're in:
|
||||
|
||||
- First, try to find out. 'utf-8' is a good assumption.
|
||||
- If the encoding is simply unknowable, try running your bytes through
|
||||
ftfy.guess_bytes. As the name implies, this may not always be accurate.
|
||||
|
||||
If you're confused by this, please read the Python Unicode HOWTO:
|
||||
|
||||
http://docs.python.org/%d/howto/unicode.html
|
||||
""" % sys.version_info[0]
|
||||
|
||||
|
||||
def fix_encoding(text):
|
||||
r"""
|
||||
Fix text with incorrectly-decoded garbage ("mojibake") whenever possible.
|
||||
|
||||
This function looks for the evidence of mojibake, formulates a plan to fix
|
||||
it, and applies the plan. It determines whether it should replace nonsense
|
||||
sequences of single-byte characters that were really meant to be UTF-8
|
||||
characters, and if so, turns them into the correctly-encoded Unicode
|
||||
character that they were meant to represent.
|
||||
|
||||
The input to the function must be Unicode. If you don't have Unicode text,
|
||||
you're not using the right tool to solve your problem.
|
||||
|
||||
`fix_encoding` decodes text that looks like it was decoded incorrectly. It
|
||||
leaves alone text that doesn't.
|
||||
|
||||
>>> print(fix_encoding('único'))
|
||||
único
|
||||
|
||||
>>> print(fix_encoding('This text is fine already :þ'))
|
||||
This text is fine already :þ
|
||||
|
||||
Because these characters often come from Microsoft products, we allow
|
||||
for the possibility that we get not just Unicode characters 128-255, but
|
||||
also Windows's conflicting idea of what characters 128-160 are.
|
||||
|
||||
>>> print(fix_encoding('This — should be an em dash'))
|
||||
This — should be an em dash
|
||||
|
||||
We might have to deal with both Windows characters and raw control
|
||||
characters at the same time, especially when dealing with characters like
|
||||
0x81 that have no mapping in Windows. This is a string that Python's
|
||||
standard `.encode` and `.decode` methods cannot correct.
|
||||
|
||||
>>> print(fix_encoding('This text is sad .â\x81”.'))
|
||||
This text is sad .⁔.
|
||||
|
||||
However, it has safeguards against fixing sequences of letters and
|
||||
punctuation that can occur in valid text. In the following example,
|
||||
the last three characters are not replaced with a Korean character,
|
||||
even though they could be.
|
||||
|
||||
>>> print(fix_encoding('not such a fan of Charlotte Brontë…”'))
|
||||
not such a fan of Charlotte Brontë…”
|
||||
|
||||
This function can now recover some complex manglings of text, such as when
|
||||
UTF-8 mojibake has been normalized in a way that replaces U+A0 with a
|
||||
space:
|
||||
|
||||
>>> print(fix_encoding('The more you know 🌠'))
|
||||
The more you know 🌠
|
||||
|
||||
Cases of genuine ambiguity can sometimes be addressed by finding other
|
||||
characters that are not double-encoded, and expecting the encoding to
|
||||
be consistent:
|
||||
|
||||
>>> print(fix_encoding('AHÅ™, the new sofa from IKEA®'))
|
||||
AHÅ™, the new sofa from IKEA®
|
||||
|
||||
Finally, we handle the case where the text is in a single-byte encoding
|
||||
that was intended as Windows-1252 all along but read as Latin-1:
|
||||
|
||||
>>> print(fix_encoding('This text was never UTF-8 at all\x85'))
|
||||
This text was never UTF-8 at all…
|
||||
|
||||
The best version of the text is found using
|
||||
:func:`ftfy.badness.text_cost`.
|
||||
"""
|
||||
text, _ = fix_encoding_and_explain(text)
|
||||
return text
|
||||
|
||||
|
||||
def fix_text_encoding(text):
|
||||
"""
|
||||
A deprecated name for :func:`ftfy.fixes.fix_encoding`.
|
||||
"""
|
||||
warnings.warn('fix_text_encoding is now known as fix_encoding',
|
||||
DeprecationWarning)
|
||||
return fix_encoding(text)
|
||||
|
||||
|
||||
# When we support discovering mojibake in more encodings, we run the risk
|
||||
# of more false positives. We can mitigate false positives by assigning an
|
||||
# additional cost to using encodings that are rarer than Windows-1252, so
|
||||
# that these encodings will only be used if they fix multiple problems.
|
||||
ENCODING_COSTS = {
|
||||
'macroman': 2,
|
||||
'iso-8859-2': 2,
|
||||
'sloppy-windows-1250': 2,
|
||||
'sloppy-windows-1251': 3,
|
||||
'cp437': 3,
|
||||
}
|
||||
|
||||
|
||||
def fix_encoding_and_explain(text):
|
||||
"""
|
||||
Re-decodes text that has been decoded incorrectly, and also return a
|
||||
"plan" indicating all the steps required to fix it.
|
||||
|
||||
The resulting plan could be used with :func:`ftfy.fixes.apply_plan`
|
||||
to fix additional strings that are broken in the same way.
|
||||
"""
|
||||
best_version = text
|
||||
best_cost = text_cost(text)
|
||||
best_plan = []
|
||||
plan_so_far = []
|
||||
while True:
|
||||
prevtext = text
|
||||
text, plan = fix_one_step_and_explain(text)
|
||||
plan_so_far.extend(plan)
|
||||
cost = text_cost(text)
|
||||
for _, _, step_cost in plan_so_far:
|
||||
cost += step_cost
|
||||
|
||||
if cost < best_cost:
|
||||
best_cost = cost
|
||||
best_version = text
|
||||
best_plan = list(plan_so_far)
|
||||
if text == prevtext:
|
||||
return best_version, best_plan
|
||||
|
||||
|
||||
def fix_one_step_and_explain(text):
|
||||
"""
|
||||
Performs a single step of re-decoding text that's been decoded incorrectly.
|
||||
|
||||
Returns the decoded text, plus a "plan" for how to reproduce what it did.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(BYTES_ERROR_TEXT)
|
||||
if len(text) == 0:
|
||||
return text, []
|
||||
|
||||
# The first plan is to return ASCII text unchanged.
|
||||
if possible_encoding(text, 'ascii'):
|
||||
return text, []
|
||||
|
||||
# As we go through the next step, remember the possible encodings
|
||||
# that we encounter but don't successfully fix yet. We may need them
|
||||
# later.
|
||||
possible_1byte_encodings = []
|
||||
|
||||
# Suppose the text was supposed to be UTF-8, but it was decoded using
|
||||
# a single-byte encoding instead. When these cases can be fixed, they
|
||||
# are usually the correct thing to do, so try them next.
|
||||
for encoding in CHARMAP_ENCODINGS:
|
||||
if possible_encoding(text, encoding):
|
||||
encoded_bytes = text.encode(encoding)
|
||||
encode_step = ('encode', encoding, ENCODING_COSTS.get(encoding, 0))
|
||||
transcode_steps = []
|
||||
|
||||
# Now, find out if it's UTF-8 (or close enough). Otherwise,
|
||||
# remember the encoding for later.
|
||||
try:
|
||||
decoding = 'utf-8'
|
||||
# Check encoded_bytes for sequences that would be UTF-8,
|
||||
# except they have b' ' where b'\xa0' would belong.
|
||||
if ALTERED_UTF8_RE.search(encoded_bytes):
|
||||
encoded_bytes = restore_byte_a0(encoded_bytes)
|
||||
cost = encoded_bytes.count(b'\xa0') * 2
|
||||
transcode_steps.append(('transcode', 'restore_byte_a0', cost))
|
||||
|
||||
# Check for the byte 0x1a, which indicates where one of our
|
||||
# 'sloppy' codecs found a replacement character.
|
||||
if encoding.startswith('sloppy') and b'\x1a' in encoded_bytes:
|
||||
encoded_bytes = replace_lossy_sequences(encoded_bytes)
|
||||
transcode_steps.append(('transcode', 'replace_lossy_sequences', 0))
|
||||
|
||||
if b'\xed' in encoded_bytes or b'\xc0' in encoded_bytes:
|
||||
decoding = 'utf-8-variants'
|
||||
|
||||
decode_step = ('decode', decoding, 0)
|
||||
steps = [encode_step] + transcode_steps + [decode_step]
|
||||
fixed = encoded_bytes.decode(decoding)
|
||||
return fixed, steps
|
||||
|
||||
except UnicodeDecodeError:
|
||||
possible_1byte_encodings.append(encoding)
|
||||
|
||||
# Look for a-hat-euro sequences that remain, and fix them in isolation.
|
||||
if PARTIAL_UTF8_PUNCT_RE.search(text):
|
||||
steps = [('transcode', 'fix_partial_utf8_punct_in_1252', 1)]
|
||||
fixed = fix_partial_utf8_punct_in_1252(text)
|
||||
return fixed, steps
|
||||
|
||||
# The next most likely case is that this is Latin-1 that was intended to
|
||||
# be read as Windows-1252, because those two encodings in particular are
|
||||
# easily confused.
|
||||
if 'latin-1' in possible_1byte_encodings:
|
||||
if 'windows-1252' in possible_1byte_encodings:
|
||||
# This text is in the intersection of Latin-1 and
|
||||
# Windows-1252, so it's probably legit.
|
||||
return text, []
|
||||
else:
|
||||
# Otherwise, it means we have characters that are in Latin-1 but
|
||||
# not in Windows-1252. Those are C1 control characters. Nobody
|
||||
# wants those. Assume they were meant to be Windows-1252. Don't
|
||||
# use the sloppy codec, because bad Windows-1252 characters are
|
||||
# a bad sign.
|
||||
encoded = text.encode('latin-1')
|
||||
try:
|
||||
fixed = encoded.decode('windows-1252')
|
||||
steps = []
|
||||
if fixed != text:
|
||||
steps = [('encode', 'latin-1', 0),
|
||||
('decode', 'windows-1252', 1)]
|
||||
return fixed, steps
|
||||
except UnicodeDecodeError:
|
||||
# This text contained characters that don't even make sense
|
||||
# if you assume they were supposed to be Windows-1252. In
|
||||
# that case, let's not assume anything.
|
||||
pass
|
||||
|
||||
# The cases that remain are mixups between two different single-byte
|
||||
# encodings, and not the common case of Latin-1 vs. Windows-1252.
|
||||
#
|
||||
# These cases may be unsolvable without adding false positives, though
|
||||
# I have vague ideas about how to optionally address them in the future.
|
||||
|
||||
# Return the text unchanged; the plan is empty.
|
||||
return text, []
|
||||
|
||||
|
||||
def apply_plan(text, plan):
|
||||
"""
|
||||
Apply a plan for fixing the encoding of text.
|
||||
|
||||
The plan is a list of tuples of the form (operation, encoding, cost):
|
||||
|
||||
- `operation` is 'encode' if it turns a string into bytes, 'decode' if it
|
||||
turns bytes into a string, and 'transcode' if it keeps the type the same.
|
||||
- `encoding` is the name of the encoding to use, such as 'utf-8' or
|
||||
'latin-1', or the function name in the case of 'transcode'.
|
||||
- The `cost` does not affect how the plan itself works. It's used by other
|
||||
users of plans, namely `fix_encoding_and_explain`, which has to decide
|
||||
*which* plan to use.
|
||||
"""
|
||||
obj = text
|
||||
for operation, encoding, _ in plan:
|
||||
if operation == 'encode':
|
||||
obj = obj.encode(encoding)
|
||||
elif operation == 'decode':
|
||||
obj = obj.decode(encoding)
|
||||
elif operation == 'transcode':
|
||||
if encoding in TRANSCODERS:
|
||||
obj = TRANSCODERS[encoding](obj)
|
||||
else:
|
||||
raise ValueError("Unknown transcode operation: %s" % encoding)
|
||||
else:
|
||||
raise ValueError("Unknown plan step: %s" % operation)
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
HTML_ENTITY_RE = re.compile(r"&#?\w{0,8};")
|
||||
|
||||
|
||||
def unescape_html(text):
|
||||
"""
|
||||
Decode all three types of HTML entities/character references.
|
||||
|
||||
Code by Fredrik Lundh of effbot.org. Rob Speer made a slight change
|
||||
to it for efficiency: it won't match entities longer than 8 characters,
|
||||
because there are no valid entities like that.
|
||||
|
||||
>>> print(unescape_html('<tag>'))
|
||||
<tag>
|
||||
"""
|
||||
def fixup(match):
|
||||
"""
|
||||
Replace one matched HTML entity with the character it represents,
|
||||
if possible.
|
||||
"""
|
||||
text = match.group(0)
|
||||
if text[:2] == "&#":
|
||||
# character reference
|
||||
try:
|
||||
if text[:3] == "&#x":
|
||||
codept = int(text[3:-1], 16)
|
||||
else:
|
||||
codept = int(text[2:-1])
|
||||
if 0x80 <= codept < 0xa0:
|
||||
# Decode this range of characters as Windows-1252, as Web
|
||||
# browsers do in practice.
|
||||
return unichr(codept).encode('latin-1').decode('sloppy-windows-1252')
|
||||
else:
|
||||
return unichr(codept)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
# named entity
|
||||
try:
|
||||
text = entities[text[1:]]
|
||||
except KeyError:
|
||||
pass
|
||||
return text # leave as is
|
||||
return HTML_ENTITY_RE.sub(fixup, text)
|
||||
|
||||
|
||||
ANSI_RE = re.compile('\033\\[((?:\\d|;)*)([a-zA-Z])')
|
||||
|
||||
def remove_terminal_escapes(text):
|
||||
r"""
|
||||
Strip out "ANSI" terminal escape sequences, such as those that produce
|
||||
colored text on Unix.
|
||||
|
||||
>>> print(remove_terminal_escapes(
|
||||
... "\033[36;44mI'm blue, da ba dee da ba doo...\033[0m"
|
||||
... ))
|
||||
I'm blue, da ba dee da ba doo...
|
||||
"""
|
||||
return ANSI_RE.sub('', text)
|
||||
|
||||
|
||||
def uncurl_quotes(text):
|
||||
r"""
|
||||
Replace curly quotation marks with straight equivalents.
|
||||
|
||||
>>> print(uncurl_quotes('\u201chere\u2019s a test\u201d'))
|
||||
"here's a test"
|
||||
"""
|
||||
return SINGLE_QUOTE_RE.sub("'", DOUBLE_QUOTE_RE.sub('"', text))
|
||||
|
||||
|
||||
def fix_latin_ligatures(text):
|
||||
"""
|
||||
Replace single-character ligatures of Latin letters, such as 'fi', with the
|
||||
characters that they contain, as in 'fi'. Latin ligatures are usually not
|
||||
intended in text strings (though they're lovely in *rendered* text). If
|
||||
you have such a ligature in your string, it is probably a result of a
|
||||
copy-and-paste glitch.
|
||||
|
||||
We leave ligatures in other scripts alone to be safe. They may be intended,
|
||||
and removing them may lose information. If you want to take apart nearly
|
||||
all ligatures, use NFKC normalization.
|
||||
|
||||
>>> print(fix_latin_ligatures("fluffiest"))
|
||||
fluffiest
|
||||
"""
|
||||
return text.translate(LIGATURES)
|
||||
|
||||
|
||||
def fix_character_width(text):
|
||||
"""
|
||||
The ASCII characters, katakana, and Hangul characters have alternate
|
||||
"halfwidth" or "fullwidth" forms that help text line up in a grid.
|
||||
|
||||
If you don't need these width properties, you probably want to replace
|
||||
these characters with their standard form, which is what this function
|
||||
does.
|
||||
|
||||
Note that this replaces the ideographic space, U+3000, with the ASCII
|
||||
space, U+20.
|
||||
|
||||
>>> print(fix_character_width("LOUD NOISES"))
|
||||
LOUD NOISES
|
||||
>>> print(fix_character_width("Uターン")) # this means "U-turn"
|
||||
Uターン
|
||||
"""
|
||||
return text.translate(WIDTH_MAP)
|
||||
|
||||
|
||||
def fix_line_breaks(text):
|
||||
r"""
|
||||
Convert all line breaks to Unix style.
|
||||
|
||||
This will convert the following sequences into the standard \\n
|
||||
line break:
|
||||
|
||||
- CRLF (\\r\\n), used on Windows and in some communication
|
||||
protocols
|
||||
- CR (\\r), once used on Mac OS Classic, and now kept alive
|
||||
by misguided software such as Microsoft Office for Mac
|
||||
- LINE SEPARATOR (\\u2028) and PARAGRAPH SEPARATOR (\\u2029),
|
||||
defined by Unicode and used to sow confusion and discord
|
||||
- NEXT LINE (\\x85), a C1 control character that is certainly
|
||||
not what you meant
|
||||
|
||||
The NEXT LINE character is a bit of an odd case, because it
|
||||
usually won't show up if `fix_encoding` is also being run.
|
||||
\\x85 is very common mojibake for \\u2026, HORIZONTAL ELLIPSIS.
|
||||
|
||||
>>> print(fix_line_breaks(
|
||||
... "This string is made of two things:\u2029"
|
||||
... "1. Unicode\u2028"
|
||||
... "2. Spite"
|
||||
... ))
|
||||
This string is made of two things:
|
||||
1. Unicode
|
||||
2. Spite
|
||||
|
||||
For further testing and examples, let's define a function to make sure
|
||||
we can see the control characters in their escaped form:
|
||||
|
||||
>>> def eprint(text):
|
||||
... print(text.encode('unicode-escape').decode('ascii'))
|
||||
|
||||
>>> eprint(fix_line_breaks("Content-type: text/plain\r\n\r\nHi."))
|
||||
Content-type: text/plain\n\nHi.
|
||||
|
||||
>>> eprint(fix_line_breaks("This is how Microsoft \r trolls Mac users"))
|
||||
This is how Microsoft \n trolls Mac users
|
||||
|
||||
>>> eprint(fix_line_breaks("What is this \x85 I don't even"))
|
||||
What is this \n I don't even
|
||||
"""
|
||||
return text.replace('\r\n', '\n').replace('\r', '\n')\
|
||||
.replace('\u2028', '\n').replace('\u2029', '\n')\
|
||||
.replace('\u0085', '\n')
|
||||
|
||||
|
||||
SURROGATE_RE = re.compile('[\ud800-\udfff]')
|
||||
SURROGATE_PAIR_RE = re.compile('[\ud800-\udbff][\udc00-\udfff]')
|
||||
|
||||
|
||||
def convert_surrogate_pair(match):
|
||||
"""
|
||||
Convert a surrogate pair to the single codepoint it represents.
|
||||
|
||||
This implements the formula described at:
|
||||
http://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates
|
||||
"""
|
||||
pair = match.group(0)
|
||||
codept = 0x10000 + (ord(pair[0]) - 0xd800) * 0x400 + (ord(pair[1]) - 0xdc00)
|
||||
return unichr(codept)
|
||||
|
||||
|
||||
def fix_surrogates(text):
|
||||
"""
|
||||
Replace 16-bit surrogate codepoints with the characters they represent
|
||||
(when properly paired), or with \ufffd otherwise.
|
||||
|
||||
>>> high_surrogate = unichr(0xd83d)
|
||||
>>> low_surrogate = unichr(0xdca9)
|
||||
>>> print(fix_surrogates(high_surrogate + low_surrogate))
|
||||
💩
|
||||
>>> print(fix_surrogates(low_surrogate + high_surrogate))
|
||||
��
|
||||
|
||||
The above doctest had to be very carefully written, because even putting
|
||||
the Unicode escapes of the surrogates in the docstring was causing
|
||||
various tools to fail, which I think just goes to show why this fixer is
|
||||
necessary.
|
||||
"""
|
||||
if SURROGATE_RE.search(text):
|
||||
text = SURROGATE_PAIR_RE.sub(convert_surrogate_pair, text)
|
||||
text = SURROGATE_RE.sub('\ufffd', text)
|
||||
return text
|
||||
|
||||
|
||||
def remove_control_chars(text):
|
||||
"""
|
||||
Remove various control characters that you probably didn't intend to be in
|
||||
your text. Many of these characters appear in the table of "Characters not
|
||||
suitable for use with markup" at
|
||||
http://www.unicode.org/reports/tr20/tr20-9.html.
|
||||
|
||||
This includes:
|
||||
|
||||
- ASCII control characters, except for the important whitespace characters
|
||||
(U+00 to U+08, U+0B, U+0E to U+1F, U+7F)
|
||||
- Deprecated Arabic control characters (U+206A to U+206F)
|
||||
- Interlinear annotation characters (U+FFF9 to U+FFFB)
|
||||
- The Object Replacement Character (U+FFFC)
|
||||
- The byte order mark (U+FEFF)
|
||||
- Musical notation control characters (U+1D173 to U+1D17A)
|
||||
- Tag characters (U+E0000 to U+E007F)
|
||||
|
||||
However, these similar characters are left alone:
|
||||
|
||||
- Control characters that produce whitespace (U+09, U+0A, U+0C, U+0D,
|
||||
U+2028, and U+2029)
|
||||
- C1 control characters (U+80 to U+9F) -- even though they are basically
|
||||
never used intentionally, they are important clues about what mojibake
|
||||
has happened
|
||||
- Control characters that affect glyph rendering, such as joiners and
|
||||
right-to-left marks (U+200C to U+200F, U+202A to U+202E)
|
||||
"""
|
||||
return text.translate(CONTROL_CHARS)
|
||||
|
||||
|
||||
def remove_bom(text):
|
||||
r"""
|
||||
Remove a byte-order mark that was accidentally decoded as if it were part
|
||||
of the text.
|
||||
|
||||
>>> print(remove_bom("\ufeffWhere do you want to go today?"))
|
||||
Where do you want to go today?
|
||||
"""
|
||||
return text.lstrip(unichr(0xfeff))
|
||||
|
||||
|
||||
# Define a regex to match valid escape sequences in Python string literals.
|
||||
ESCAPE_SEQUENCE_RE = re.compile(r'''
|
||||
( \\U........ # 8-digit hex escapes
|
||||
| \\u.... # 4-digit hex escapes
|
||||
| \\x.. # 2-digit hex escapes
|
||||
| \\[0-7]{1,3} # Octal escapes
|
||||
| \\N\{[^}]+\} # Unicode characters by name
|
||||
| \\[\\'"abfnrtv] # Single-character escapes
|
||||
)''', re.UNICODE | re.VERBOSE)
|
||||
|
||||
|
||||
def decode_escapes(text):
|
||||
r"""
|
||||
Decode backslashed escape sequences, including \\x, \\u, and \\U character
|
||||
references, even in the presence of other Unicode.
|
||||
|
||||
This is what Python's "string-escape" and "unicode-escape" codecs were
|
||||
meant to do, but in contrast, this actually works. It will decode the
|
||||
string exactly the same way that the Python interpreter decodes its string
|
||||
literals.
|
||||
|
||||
>>> factoid = '\\u20a1 is the currency symbol for the colón.'
|
||||
>>> print(factoid[1:])
|
||||
u20a1 is the currency symbol for the colón.
|
||||
>>> print(decode_escapes(factoid))
|
||||
₡ is the currency symbol for the colón.
|
||||
|
||||
Even though Python itself can read string literals with a combination of
|
||||
escapes and literal Unicode -- you're looking at one right now -- the
|
||||
"unicode-escape" codec doesn't work on literal Unicode. (See
|
||||
http://stackoverflow.com/a/24519338/773754 for more details.)
|
||||
|
||||
Instead, this function searches for just the parts of a string that
|
||||
represent escape sequences, and decodes them, leaving the rest alone. All
|
||||
valid escape sequences are made of ASCII characters, and this allows
|
||||
"unicode-escape" to work correctly.
|
||||
|
||||
This fix cannot be automatically applied by the `ftfy.fix_text` function,
|
||||
because escaped text is not necessarily a mistake, and there is no way
|
||||
to distinguish text that's supposed to be escaped from text that isn't.
|
||||
"""
|
||||
def decode_match(match):
|
||||
"Given a regex match, decode the escape sequence it contains."
|
||||
return codecs.decode(match.group(0), 'unicode-escape')
|
||||
|
||||
return ESCAPE_SEQUENCE_RE.sub(decode_match, text)
|
||||
|
||||
|
||||
def restore_byte_a0(byts):
|
||||
"""
|
||||
Some mojibake has been additionally altered by a process that said "hmm,
|
||||
byte A0, that's basically a space!" and replaced it with an ASCII space.
|
||||
When the A0 is part of a sequence that we intend to decode as UTF-8,
|
||||
changing byte A0 to 20 would make it fail to decode.
|
||||
|
||||
This process finds sequences that would convincingly decode as UTF-8 if
|
||||
byte 20 were changed to A0, and puts back the A0. For the purpose of
|
||||
deciding whether this is a good idea, this step gets a cost of twice
|
||||
the number of bytes that are changed.
|
||||
|
||||
This is used as a step within `fix_encoding`.
|
||||
"""
|
||||
def replacement(match):
|
||||
"The function to apply when this regex matches."
|
||||
return match.group(0).replace(b'\x20', b'\xa0')
|
||||
|
||||
return ALTERED_UTF8_RE.sub(replacement, byts)
|
||||
|
||||
|
||||
def replace_lossy_sequences(byts):
|
||||
"""
|
||||
This function identifies sequences where information has been lost in
|
||||
a "sloppy" codec, indicated by byte 1A, and if they would otherwise look
|
||||
like a UTF-8 sequence, it replaces them with the UTF-8 sequence for U+FFFD.
|
||||
|
||||
A further explanation:
|
||||
|
||||
ftfy can now fix text in a few cases that it would previously fix
|
||||
incompletely, because of the fact that it can't successfully apply the fix
|
||||
to the entire string. A very common case of this is when characters have
|
||||
been erroneously decoded as windows-1252, but instead of the "sloppy"
|
||||
windows-1252 that passes through unassigned bytes, the unassigned bytes get
|
||||
turned into U+FFFD (�), so we can't tell what they were.
|
||||
|
||||
This most commonly happens with curly quotation marks that appear
|
||||
``“ like this �``.
|
||||
|
||||
We can do better by building on ftfy's "sloppy codecs" to let them handle
|
||||
less-sloppy but more-lossy text. When they encounter the character ``�``,
|
||||
instead of refusing to encode it, they encode it as byte 1A -- an
|
||||
ASCII control code called SUBSTITUTE that once was meant for about the same
|
||||
purpose. We can then apply a fixer that looks for UTF-8 sequences where
|
||||
some continuation bytes have been replaced by byte 1A, and decode the whole
|
||||
sequence as �; if that doesn't work, it'll just turn the byte back into �
|
||||
itself.
|
||||
|
||||
As a result, the above text ``“ like this �`` will decode as
|
||||
``“ like this �``.
|
||||
|
||||
If U+1A was actually in the original string, then the sloppy codecs will
|
||||
not be used, and this function will not be run, so your weird control
|
||||
character will be left alone but wacky fixes like this won't be possible.
|
||||
|
||||
This is used as a step within `fix_encoding`.
|
||||
"""
|
||||
return LOSSY_UTF8_RE.sub('\ufffd'.encode('utf-8'), byts)
|
||||
|
||||
|
||||
def fix_partial_utf8_punct_in_1252(text):
|
||||
"""
|
||||
Fix particular characters that seem to be found in the wild encoded in
|
||||
UTF-8 and decoded in Latin-1 or Windows-1252, even when this fix can't be
|
||||
consistently applied.
|
||||
|
||||
For this function, we assume the text has been decoded in Windows-1252.
|
||||
If it was decoded in Latin-1, we'll call this right after it goes through
|
||||
the Latin-1-to-Windows-1252 fixer.
|
||||
|
||||
This is used as a step within `fix_encoding`.
|
||||
"""
|
||||
def replacement(match):
|
||||
"The function to apply when this regex matches."
|
||||
return match.group(0).encode('sloppy-windows-1252').decode('utf-8')
|
||||
return PARTIAL_UTF8_PUNCT_RE.sub(replacement, text)
|
||||
|
||||
|
||||
TRANSCODERS = {
|
||||
'restore_byte_a0': restore_byte_a0,
|
||||
'replace_lossy_sequences': replace_lossy_sequences,
|
||||
'fix_partial_utf8_punct_in_1252': fix_partial_utf8_punct_in_1252
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module provides functions for justifying Unicode text in a monospaced
|
||||
display such as a terminal.
|
||||
|
||||
We used to have our own implementation here, but now we mostly rely on
|
||||
the 'wcwidth' library.
|
||||
"""
|
||||
from __future__ import unicode_literals, division
|
||||
from unicodedata import normalize
|
||||
from wcwidth import wcwidth, wcswidth
|
||||
|
||||
|
||||
def character_width(char):
|
||||
r"""
|
||||
Determine the width that a character is likely to be displayed as in
|
||||
a monospaced terminal. The width for a printable character will
|
||||
always be 0, 1, or 2.
|
||||
|
||||
Nonprintable or control characters will return -1, a convention that comes
|
||||
from wcwidth.
|
||||
|
||||
>>> character_width('車')
|
||||
2
|
||||
>>> character_width('A')
|
||||
1
|
||||
>>> character_width('\N{ZERO WIDTH JOINER}')
|
||||
0
|
||||
>>> character_width('\n')
|
||||
-1
|
||||
"""
|
||||
return wcwidth(char)
|
||||
|
||||
|
||||
def monospaced_width(text):
|
||||
"""
|
||||
Return the number of character cells that this string is likely to occupy
|
||||
when displayed in a monospaced, modern, Unicode-aware terminal emulator.
|
||||
We refer to this as the "display width" of the string.
|
||||
|
||||
This can be useful for formatting text that may contain non-spacing
|
||||
characters, or CJK characters that take up two character cells.
|
||||
|
||||
Returns -1 if the string contains a non-printable or control character.
|
||||
|
||||
>>> monospaced_width('ちゃぶ台返し')
|
||||
12
|
||||
>>> len('ちゃぶ台返し')
|
||||
6
|
||||
>>> monospaced_width('owl\N{SOFT HYPHEN}flavored')
|
||||
12
|
||||
>>> monospaced_width('example\x80')
|
||||
-1
|
||||
|
||||
# The Korean word 'ibnida' can be written with 3 characters or 7 jamo.
|
||||
# Either way, it *looks* the same and takes up 6 character cells.
|
||||
>>> monospaced_width('입니다')
|
||||
6
|
||||
>>> monospaced_width('\u110b\u1175\u11b8\u1102\u1175\u1103\u1161')
|
||||
6
|
||||
"""
|
||||
# NFC-normalize the text first, so that we don't need special cases for
|
||||
# Hangul jamo.
|
||||
return wcswidth(normalize('NFC', text))
|
||||
|
||||
|
||||
def display_ljust(text, width, fillchar=' '):
|
||||
"""
|
||||
Return `text` left-justified in a Unicode string whose display width,
|
||||
in a monospaced terminal, should be at least `width` character cells.
|
||||
The rest of the string will be padded with `fillchar`, which must be
|
||||
a width-1 character.
|
||||
|
||||
"Left" here means toward the beginning of the string, which may actually
|
||||
appear on the right in an RTL context. This is similar to the use of the
|
||||
word "left" in "left parenthesis".
|
||||
|
||||
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||||
>>> for line in lines:
|
||||
... print(display_ljust(line, 20, '▒'))
|
||||
Table flip▒▒▒▒▒▒▒▒▒▒
|
||||
(╯°□°)╯︵ ┻━┻▒▒▒▒▒▒▒
|
||||
ちゃぶ台返し▒▒▒▒▒▒▒▒
|
||||
|
||||
This example, and the similar ones that follow, should come out justified
|
||||
correctly when viewed in a monospaced terminal. It will probably not look
|
||||
correct if you're viewing this code or documentation in a Web browser.
|
||||
"""
|
||||
if character_width(fillchar) != 1:
|
||||
raise ValueError("The padding character must have display width 1")
|
||||
|
||||
text_width = monospaced_width(text)
|
||||
if text_width == -1:
|
||||
# There's a control character here, so just don't add padding
|
||||
return text
|
||||
|
||||
padding = max(0, width - text_width)
|
||||
return text + fillchar * padding
|
||||
|
||||
|
||||
def display_rjust(text, width, fillchar=' '):
|
||||
"""
|
||||
Return `text` right-justified in a Unicode string whose display width,
|
||||
in a monospaced terminal, should be at least `width` character cells.
|
||||
The rest of the string will be padded with `fillchar`, which must be
|
||||
a width-1 character.
|
||||
|
||||
"Right" here means toward the end of the string, which may actually be on
|
||||
the left in an RTL context. This is similar to the use of the word "right"
|
||||
in "right parenthesis".
|
||||
|
||||
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||||
>>> for line in lines:
|
||||
... print(display_rjust(line, 20, '▒'))
|
||||
▒▒▒▒▒▒▒▒▒▒Table flip
|
||||
▒▒▒▒▒▒▒(╯°□°)╯︵ ┻━┻
|
||||
▒▒▒▒▒▒▒▒ちゃぶ台返し
|
||||
"""
|
||||
if character_width(fillchar) != 1:
|
||||
raise ValueError("The padding character must have display width 1")
|
||||
|
||||
text_width = monospaced_width(text)
|
||||
if text_width == -1:
|
||||
return text
|
||||
|
||||
padding = max(0, width - text_width)
|
||||
return fillchar * padding + text
|
||||
|
||||
|
||||
def display_center(text, width, fillchar=' '):
|
||||
"""
|
||||
Return `text` centered in a Unicode string whose display width, in a
|
||||
monospaced terminal, should be at least `width` character cells. The rest
|
||||
of the string will be padded with `fillchar`, which must be a width-1
|
||||
character.
|
||||
|
||||
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||||
>>> for line in lines:
|
||||
... print(display_center(line, 20, '▒'))
|
||||
▒▒▒▒▒Table flip▒▒▒▒▒
|
||||
▒▒▒(╯°□°)╯︵ ┻━┻▒▒▒▒
|
||||
▒▒▒▒ちゃぶ台返し▒▒▒▒
|
||||
"""
|
||||
if character_width(fillchar) != 1:
|
||||
raise ValueError("The padding character must have display width 1")
|
||||
|
||||
text_width = monospaced_width(text)
|
||||
if text_width == -1:
|
||||
return text
|
||||
|
||||
padding = max(0, width - text_width)
|
||||
left_padding = padding // 2
|
||||
right_padding = padding - left_padding
|
||||
return fillchar * left_padding + text + fillchar * right_padding
|
||||
@@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This file defines a general method for evaluating ftfy using data that arrives
|
||||
in a stream. A concrete implementation of it is found in `twitter_tester.py`.
|
||||
"""
|
||||
from __future__ import print_function, unicode_literals
|
||||
from ftfy import fix_text
|
||||
from ftfy.fixes import fix_encoding, unescape_html
|
||||
from ftfy.chardata import possible_encoding
|
||||
|
||||
|
||||
class StreamTester:
|
||||
"""
|
||||
Take in a sequence of texts, and show the ones that will be changed by
|
||||
ftfy. This will also periodically show updates, such as the proportion of
|
||||
texts that changed.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.num_fixed = 0
|
||||
self.count = 0
|
||||
|
||||
def check_ftfy(self, text, encoding_only=True):
|
||||
"""
|
||||
Given a single text input, check whether `ftfy.fix_text_encoding`
|
||||
would change it. If so, display the change.
|
||||
"""
|
||||
self.count += 1
|
||||
text = unescape_html(text)
|
||||
if not possible_encoding(text, 'ascii'):
|
||||
if encoding_only:
|
||||
fixed = fix_encoding(text)
|
||||
else:
|
||||
fixed = fix_text(text, uncurl_quotes=False, fix_character_width=False)
|
||||
if text != fixed:
|
||||
# possibly filter common bots before printing
|
||||
print('\nText:\t{text!r}\nFixed:\t{fixed!r}\n'.format(
|
||||
text=text, fixed=fixed
|
||||
))
|
||||
self.num_fixed += 1
|
||||
elif 'â€' in text or '\x80' in text:
|
||||
print('\nNot fixed:\t{text!r}'.format(text=text))
|
||||
|
||||
# Print status updates once in a while
|
||||
if self.count % 100 == 0:
|
||||
print('.', end='', flush=True)
|
||||
if self.count % 10000 == 0:
|
||||
print('\n%d/%d fixed' % (self.num_fixed, self.count))
|
||||
@@ -0,0 +1,72 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
Do what is necessary to authenticate this tester as a Twitter "app", using
|
||||
somebody's Twitter account.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import os
|
||||
|
||||
|
||||
AUTH_TOKEN_PATH = os.path.expanduser('~/.cache/oauth/twitter_ftfy.auth')
|
||||
|
||||
def get_auth():
|
||||
"""
|
||||
Twitter has some bizarre requirements about how to authorize an "app" to
|
||||
use its API.
|
||||
|
||||
The user of the app has to log in to get a secret token. That's fine. But
|
||||
the app itself has its own "consumer secret" token. The app has to know it,
|
||||
and the user of the app has to not know it.
|
||||
|
||||
This is, of course, impossible. It's equivalent to DRM. Your computer can't
|
||||
*really* make use of secret information while hiding the same information
|
||||
from you.
|
||||
|
||||
The threat appears to be that, if you have this super-sekrit token, you can
|
||||
impersonate the app while doing something different. Well, of course you
|
||||
can do that, because you *have the source code* and you can change it to do
|
||||
what you want. You still have to log in as a particular user who has a
|
||||
token that's actually secret, you know.
|
||||
|
||||
Even developers of closed-source applications that use the Twitter API are
|
||||
unsure what to do, for good reason. These "secrets" are not secret in any
|
||||
cryptographic sense. A bit of Googling shows that the secret tokens for
|
||||
every popular Twitter app are already posted on the Web.
|
||||
|
||||
Twitter wants us to pretend this string can be kept secret, and hide this
|
||||
secret behind a fig leaf like everybody else does. So that's what we've
|
||||
done.
|
||||
"""
|
||||
|
||||
from twitter.oauth import OAuth
|
||||
from twitter import oauth_dance, read_token_file
|
||||
|
||||
def unhide(secret):
|
||||
"""
|
||||
Do something mysterious and exactly as secure as every other Twitter
|
||||
app.
|
||||
"""
|
||||
return ''.join([chr(ord(c) - 0x2800) for c in secret])
|
||||
|
||||
fig_leaf = '⠴⡹⠹⡩⠶⠴⡶⡅⡂⡩⡅⠳⡏⡉⡈⠰⠰⡹⡥⡶⡈⡐⡍⡂⡫⡍⡗⡬⡒⡧⡶⡣⡰⡄⡧⡸⡑⡣⠵⡓⠶⠴⡁'
|
||||
consumer_key = 'OFhyNd2Zt4Ba6gJGJXfbsw'
|
||||
|
||||
if os.path.exists(AUTH_TOKEN_PATH):
|
||||
token, token_secret = read_token_file(AUTH_TOKEN_PATH)
|
||||
else:
|
||||
authdir = os.path.dirname(AUTH_TOKEN_PATH)
|
||||
if not os.path.exists(authdir):
|
||||
os.makedirs(authdir)
|
||||
token, token_secret = oauth_dance(
|
||||
app_name='ftfy-tester',
|
||||
consumer_key=consumer_key,
|
||||
consumer_secret=unhide(fig_leaf),
|
||||
token_filename=AUTH_TOKEN_PATH
|
||||
)
|
||||
|
||||
return OAuth(
|
||||
token=token,
|
||||
token_secret=token_secret,
|
||||
consumer_key=consumer_key,
|
||||
consumer_secret=unhide(fig_leaf)
|
||||
)
|
||||
@@ -0,0 +1,88 @@
|
||||
"""
|
||||
Implements a StreamTester that runs over Twitter data. See the class
|
||||
docstring.
|
||||
|
||||
This module is written for Python 3 only. The __future__ imports you see here
|
||||
are just to let Python 2 scan the file without crashing with a SyntaxError.
|
||||
"""
|
||||
from __future__ import print_function, unicode_literals
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from ftfy.streamtester import StreamTester
|
||||
|
||||
|
||||
class TwitterTester(StreamTester):
|
||||
"""
|
||||
This class uses the StreamTester code (defined in `__init__.py`) to
|
||||
evaluate ftfy's real-world performance, by feeding it live data from
|
||||
Twitter.
|
||||
|
||||
This is a semi-manual evaluation. It requires a human to look at the
|
||||
results and determine if they are good. The three possible cases we
|
||||
can see here are:
|
||||
|
||||
- Success: the process takes in mojibake and outputs correct text.
|
||||
- False positive: the process takes in correct text, and outputs
|
||||
mojibake. Every false positive should be considered a bug, and
|
||||
reported on GitHub if it isn't already.
|
||||
- Confusion: the process takes in mojibake and outputs different
|
||||
mojibake. Not a great outcome, but not as dire as a false
|
||||
positive.
|
||||
|
||||
This tester cannot reveal false negatives. So far, that can only be
|
||||
done by the unit tests.
|
||||
"""
|
||||
OUTPUT_DIR = './twitterlogs'
|
||||
|
||||
def __init__(self):
|
||||
self.lines_by_lang = defaultdict(list)
|
||||
super().__init__()
|
||||
|
||||
def save_files(self):
|
||||
"""
|
||||
When processing data from live Twitter, save it to log files so that
|
||||
it can be replayed later.
|
||||
"""
|
||||
if not os.path.exists(self.OUTPUT_DIR):
|
||||
os.makedirs(self.OUTPUT_DIR)
|
||||
for lang, lines in self.lines_by_lang.items():
|
||||
filename = 'tweets.{}.txt'.format(lang)
|
||||
fullname = os.path.join(self.OUTPUT_DIR, filename)
|
||||
langfile = open(fullname, 'a', encoding='utf-8')
|
||||
for line in lines:
|
||||
print(line.replace('\n', ' '), file=langfile)
|
||||
langfile.close()
|
||||
self.lines_by_lang = defaultdict(list)
|
||||
|
||||
def run_sample(self):
|
||||
"""
|
||||
Listen to live data from Twitter, and pass on the fully-formed tweets
|
||||
to `check_ftfy`. This requires the `twitter` Python package as a
|
||||
dependency.
|
||||
"""
|
||||
from twitter import TwitterStream
|
||||
from ftfy.streamtester.oauth import get_auth
|
||||
twitter_stream = TwitterStream(auth=get_auth())
|
||||
iterator = twitter_stream.statuses.sample()
|
||||
for tweet in iterator:
|
||||
if 'text' in tweet:
|
||||
self.check_ftfy(tweet['text'])
|
||||
if 'user' in tweet:
|
||||
lang = tweet['user'].get('lang', 'NONE')
|
||||
self.lines_by_lang[lang].append(tweet['text'])
|
||||
if self.count % 10000 == 100:
|
||||
self.save_files()
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
When run from the command line, this script connects to the Twitter stream
|
||||
and runs the TwitterTester on it forever. Or at least until the stream
|
||||
drops.
|
||||
"""
|
||||
tester = TwitterTester()
|
||||
tester.run_sample()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -33,8 +33,6 @@ def guess_filename(filename, options):
|
||||
if not options.get('yaml') and not options.get('json') and not options.get('show_property'):
|
||||
print('For:', filename)
|
||||
|
||||
options['implicit'] = True # Force implicit option in CLI
|
||||
|
||||
guess = api.guessit(filename, options)
|
||||
|
||||
if options.get('show_property'):
|
||||
|
||||
@@ -4,4 +4,4 @@
|
||||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '2.1.3.dev0'
|
||||
__version__ = '2.1.4'
|
||||
|
||||
@@ -126,7 +126,8 @@ class GuessItApi(object):
|
||||
for match in matches:
|
||||
if isinstance(match.value, six.text_type):
|
||||
match.value = match.value.encode("ascii")
|
||||
return matches.to_dict(options.get('advanced', False), options.get('implicit', False))
|
||||
return matches.to_dict(options.get('advanced', False), options.get('single_value', False),
|
||||
options.get('enforce_list', False))
|
||||
except:
|
||||
raise GuessitException(string, options)
|
||||
|
||||
|
||||
@@ -54,6 +54,10 @@ def build_argument_parser():
|
||||
help='Display the value of a single property (title, series, video_codec, year, ...)')
|
||||
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=None,
|
||||
help='Display advanced information for filename guesses, as json output')
|
||||
output_opts.add_argument('-s', '--single-value', dest='single_value', action='store_true', default=None,
|
||||
help='Keep only first value found for each property')
|
||||
output_opts.add_argument('-l', '--enforce-list', dest='enforce_list', action='store_true', default=None,
|
||||
help='Wrap each found value in a list even when property has a single value')
|
||||
output_opts.add_argument('-j', '--json', dest='json', action='store_true', default=None,
|
||||
help='Display information for filename guesses as json output')
|
||||
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=None,
|
||||
|
||||
@@ -39,12 +39,12 @@ def audio_codec():
|
||||
rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority)
|
||||
|
||||
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
|
||||
rebulk.regex("Dolby", "DolbyDigital", "Dolby-Digital", "DDP?", value="DolbyDigital")
|
||||
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='AC3')
|
||||
rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
|
||||
rebulk.regex("AAC", value="AAC")
|
||||
rebulk.regex("AC3D?", value="AC3")
|
||||
rebulk.regex("Flac", value="FLAC")
|
||||
rebulk.regex("DTS", value="DTS")
|
||||
rebulk.string("AAC", value="AAC")
|
||||
rebulk.string('EAC3', 'DDP', 'DD+', value="EAC3")
|
||||
rebulk.string("Flac", value="FLAC")
|
||||
rebulk.string("DTS", value="DTS")
|
||||
rebulk.regex("True-?HD", value="TrueHD")
|
||||
|
||||
rebulk.defaults(name="audio_profile")
|
||||
|
||||
@@ -34,15 +34,17 @@ def container():
|
||||
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
|
||||
'iso', 'vob']
|
||||
torrent = ['torrent']
|
||||
nzb = ['nzb']
|
||||
|
||||
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
|
||||
|
||||
rebulk.defaults(name='container',
|
||||
validator=seps_surround,
|
||||
formatter=lambda s: s.upper(),
|
||||
formatter=lambda s: s.lower(),
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name in ['format',
|
||||
'video_codec'] or other.name == 'container' and 'extension' in other.tags
|
||||
@@ -51,5 +53,6 @@ def container():
|
||||
rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle'])
|
||||
rebulk.string(*videos, tags=['video'])
|
||||
rebulk.string(*torrent, tags=['torrent'])
|
||||
rebulk.string(*nzb, tags=['nzb'])
|
||||
|
||||
return rebulk
|
||||
|
||||
@@ -24,12 +24,18 @@ def edition():
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name == 'episode_details' and other.value == 'Special'
|
||||
else '__default__')
|
||||
rebulk.string('SE', value='Special Edition', tags='has-neighbor')
|
||||
rebulk.string('se', value='Special Edition', tags='has-neighbor')
|
||||
rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition')
|
||||
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition')
|
||||
rebulk.regex('limited', 'limited-edition', value='Limited Edition')
|
||||
rebulk.regex('limited', 'limited-edition', value='Limited Edition', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical Edition')
|
||||
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
|
||||
value="Director's cut")
|
||||
value="Director's Cut")
|
||||
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
|
||||
value='Extended', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
|
||||
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
|
||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
|
||||
|
||||
return rebulk
|
||||
|
||||
@@ -98,7 +98,7 @@ def episodes():
|
||||
episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
|
||||
or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
|
||||
"""
|
||||
values = match.children.to_dict(implicit=True)
|
||||
values = match.children.to_dict()
|
||||
if 'season' in values and is_iterable(values['season']):
|
||||
# Season numbers must be in natural order to be validated.
|
||||
if not list(sorted(values['season'])) == values['season']:
|
||||
@@ -464,8 +464,21 @@ class RemoveWeakIfMovie(Rule):
|
||||
return context.get('type') != 'episode'
|
||||
|
||||
def when(self, matches, context):
|
||||
if matches.named('year'):
|
||||
return matches.tagged('weak-movie')
|
||||
to_remove = []
|
||||
to_ignore = set()
|
||||
remove = False
|
||||
for filepart in matches.markers.named('path'):
|
||||
year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
|
||||
if year:
|
||||
remove = True
|
||||
next_match = matches.next(year, predicate=lambda m, fp=filepart: m.private and m.end <= fp.end, index=0)
|
||||
if next_match and not matches.at_match(next_match, predicate=lambda m: m.name == 'year'):
|
||||
to_ignore.add(next_match.initiator)
|
||||
|
||||
if remove:
|
||||
to_remove.extend(matches.tagged('weak-movie', predicate=lambda m: m.initiator not in to_ignore))
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveWeakIfSxxExx(Rule):
|
||||
|
||||
@@ -66,28 +66,27 @@ def other():
|
||||
rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
|
||||
|
||||
for value in (
|
||||
'Screener', 'Remux', 'Remastered', '3D', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC',
|
||||
'Screener', 'Remux', '3D', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC',
|
||||
'CC', 'LD', 'MD', 'XXX'):
|
||||
rebulk.string(value, value=value)
|
||||
|
||||
rebulk.string('LDTV', value='LD')
|
||||
rebulk.string('HD', value='HD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Full-?HD', 'FHD', value='FullHD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='UltraHD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
|
||||
for value in ('Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Uncut',
|
||||
'Extended', 'Extended Cut', 'Colorized', 'Internal', 'Uncensored'):
|
||||
for value in ('Complete', 'Classic', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail',
|
||||
'Colorized', 'Internal'):
|
||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex('Extended-?version', value='Extended', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex('Alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex('Read-?NFO', value='Read NFO')
|
||||
rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
|
||||
rebulk.string('DOCU', value='Documentary', tags='has-neighbor')
|
||||
rebulk.string('OM', value='Open Matte', tags='has-neighbor')
|
||||
rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
|
||||
rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
|
||||
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
|
||||
rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix'])
|
||||
|
||||
for coast in ('East', 'West'):
|
||||
|
||||
@@ -330,7 +330,7 @@
|
||||
screen_size: 720p
|
||||
format: WEB-DL
|
||||
audio_channels: "5.1"
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
video_codec: h264
|
||||
release_group: CtrlHD
|
||||
|
||||
@@ -356,7 +356,7 @@
|
||||
screen_size: 720p
|
||||
format: WEB-DL
|
||||
audio_channels: "5.1"
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
video_codec: h264
|
||||
release_group: CtrlHD
|
||||
|
||||
@@ -388,7 +388,7 @@
|
||||
screen_size: 720p
|
||||
format: WEB-DL
|
||||
audio_channels: "5.1"
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
video_codec: h264
|
||||
|
||||
? Game of Thrones S03E06 1080i HDTV DD5.1 MPEG2-TrollHD.ts
|
||||
@@ -398,7 +398,7 @@
|
||||
screen_size: 1080i
|
||||
format: HDTV
|
||||
audio_channels: "5.1"
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
video_codec: Mpeg2
|
||||
release_group: TrollHD
|
||||
|
||||
@@ -548,7 +548,7 @@
|
||||
screen_size: 720p
|
||||
season: 1
|
||||
video_profile: BP
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
|
||||
? Sleepy.Hollow.S01E09.720p.WEB-DL.DD5.1.H.264-BS.mkv
|
||||
: episode: 9
|
||||
@@ -559,7 +559,7 @@
|
||||
screen_size: 720p
|
||||
season: 1
|
||||
release_group: BS
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
|
||||
? Battlestar.Galactica.S00.Pilot.FRENCH.DVDRip.XviD-NOTAG.avi
|
||||
: title: Battlestar Galactica
|
||||
@@ -621,7 +621,7 @@
|
||||
streaming_service: Netflix
|
||||
format: WEBRip
|
||||
audio_channels: "5.1"
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
video_codec: h264
|
||||
release_group: NTb
|
||||
|
||||
@@ -1130,7 +1130,7 @@
|
||||
episode: 21
|
||||
episode_title: Al Sah-Him
|
||||
screen_size: 1080p
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: "5.1"
|
||||
video_codec: h264
|
||||
release_group: BS
|
||||
@@ -1167,7 +1167,7 @@
|
||||
audio_codec: AAC
|
||||
date: 2015-07-01
|
||||
format: WEBRip
|
||||
other: Extended
|
||||
edition: Extended
|
||||
release_group: BTW
|
||||
screen_size: 720p
|
||||
streaming_service: Comedy Central
|
||||
@@ -1653,7 +1653,7 @@
|
||||
|
||||
? The.Good.Wife.S06E01.E10.720p.WEB-DL.DD5.1.H.264-CtrlHD/The.Good.Wife.S06E09.Trust.Issues.720p.WEB-DL.DD5.1.H.264-CtrlHD.mkv
|
||||
: audio_channels: '5.1'
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
episode: 9
|
||||
format: WEB-DL
|
||||
release_group: CtrlHD
|
||||
@@ -1814,7 +1814,7 @@
|
||||
format: HDTV
|
||||
video_codec: h264
|
||||
audio_codec: AAC
|
||||
container: MP4
|
||||
container: mp4
|
||||
release_group: k3n
|
||||
type: episode
|
||||
|
||||
@@ -1853,7 +1853,7 @@
|
||||
|
||||
? Game.of.Thrones.S6.Ep5.X265.Dolby.2.0.KTM3.mp4
|
||||
: audio_channels: '2.0'
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
container: mp4
|
||||
episode: 5
|
||||
release_group: KTM3
|
||||
@@ -1885,7 +1885,7 @@
|
||||
|
||||
? Breaking.Bad.S01E01.2008.BluRay.VC1.1080P.5.1.WMV-NOVO
|
||||
: audio_channels: '5.1'
|
||||
container: WMV
|
||||
container: wmv
|
||||
episode: 1
|
||||
format: BluRay
|
||||
release_group: NOVO
|
||||
@@ -1922,9 +1922,7 @@
|
||||
|
||||
? Fear.The.Walking.Dead.S02E01.HDTV.x264.AAC.MP4-k3n.mp4
|
||||
: audio_codec: AAC
|
||||
container:
|
||||
- MP4
|
||||
- mp4
|
||||
container: mp4
|
||||
episode: 1
|
||||
format: HDTV
|
||||
mimetype: video/mp4
|
||||
@@ -2063,7 +2061,7 @@
|
||||
|
||||
? The.Walking.Dead.S06E01.FRENCH.1080p.WEB-DL.DD5.1.HEVC.x265-GOLF68
|
||||
: audio_channels: '5.1'
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
episode: 1
|
||||
format: WEB-DL
|
||||
language: fr
|
||||
@@ -2202,7 +2200,7 @@
|
||||
season: 1
|
||||
screen_size: 720p
|
||||
format: HDTV
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
release_group: Group
|
||||
@@ -2242,7 +2240,7 @@
|
||||
screen_size: 1080p
|
||||
streaming_service: Amazon Prime
|
||||
format: WEBRip
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: EAC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
type: episode
|
||||
@@ -2252,7 +2250,7 @@
|
||||
: title: Show Name
|
||||
date: 2016-09-28
|
||||
episode_title: Nice Title
|
||||
other: Extended
|
||||
edition: Extended
|
||||
screen_size: 1080p
|
||||
streaming_service: Comedy Central
|
||||
format: WEBRip
|
||||
@@ -2403,7 +2401,7 @@
|
||||
screen_size: 1080p
|
||||
streaming_service: Netflix
|
||||
format: WEBRip
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
release_group: NTb
|
||||
@@ -2692,7 +2690,7 @@
|
||||
screen_size: 4K
|
||||
streaming_service: Amazon Prime
|
||||
format: WEBRip
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: EAC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
release_group: Group
|
||||
@@ -2909,7 +2907,7 @@
|
||||
episode: 10
|
||||
screen_size: 1080p
|
||||
format: WEB-DL
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
release_group: KINGS
|
||||
@@ -2935,7 +2933,7 @@
|
||||
screen_size: 1080p
|
||||
streaming_service: Netflix
|
||||
format: WEBRip
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
release_group: ViSUM
|
||||
@@ -2956,7 +2954,7 @@
|
||||
episode: 5
|
||||
screen_size: 1080p
|
||||
format: WEB-DL
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
release_group: HKD
|
||||
@@ -2998,7 +2996,7 @@
|
||||
episode_title: The Brain In The Bot
|
||||
screen_size: 1080p
|
||||
format: WEB-DL
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
release_group: R2D2
|
||||
@@ -3012,7 +3010,7 @@
|
||||
episode: 7
|
||||
screen_size: 1080p
|
||||
format: WEB-DL
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
subtitle_language: nl
|
||||
@@ -3040,7 +3038,7 @@
|
||||
episode: 12
|
||||
screen_size: 1080p
|
||||
format: WEB-DL
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
release_group: Het.Robot.Team.OYM
|
||||
type: episode
|
||||
@@ -3311,7 +3309,7 @@
|
||||
screen_size: 720p
|
||||
format: WEBRip
|
||||
video_codec: h264
|
||||
container: MKV
|
||||
container: mkv
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
release_group: Ehhhh
|
||||
@@ -3501,7 +3499,7 @@
|
||||
screen_size: 1080p
|
||||
streaming_service: Amazon Prime
|
||||
format: WEBRip
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
release_group: GROUP
|
||||
@@ -3592,7 +3590,7 @@
|
||||
episode: 13
|
||||
other: FINAL
|
||||
language: mul
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
screen_size: 4K
|
||||
streaming_service: Netflix
|
||||
@@ -3664,7 +3662,7 @@
|
||||
season: 1
|
||||
episode: 1
|
||||
episode_title: Spark of Rebellion
|
||||
other: Alternative Cut
|
||||
edition: Alternative Cut
|
||||
format: HDTV
|
||||
video_codec: h264
|
||||
release_group: W4F
|
||||
@@ -3760,7 +3758,7 @@
|
||||
? Rick and Morty Season 1 [UNCENSORED] [BDRip] [1080p] [HEVC]
|
||||
: title: Rick and Morty
|
||||
season: 1
|
||||
other: Uncensored
|
||||
edition: Uncensored
|
||||
format: BluRay
|
||||
screen_size: 1080p
|
||||
video_codec: h265
|
||||
@@ -3830,7 +3828,7 @@
|
||||
other: East Coast Feed
|
||||
screen_size: 720p
|
||||
format: WEB-DL
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
release_group: NTb
|
||||
@@ -3869,7 +3867,7 @@
|
||||
screen_size: 720p
|
||||
streaming_service: Amazon Prime
|
||||
format: WEBRip
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
release_group: casstudio
|
||||
@@ -3901,3 +3899,67 @@
|
||||
container: mkv
|
||||
mimetype: video/x-matroska
|
||||
type: episode
|
||||
|
||||
? Show.Name.S02E01.Super.Title.720p.WEB-DL.DD5.1.H.264-ABC.nzb
|
||||
: title: Show Name
|
||||
season: 2
|
||||
episode: 1
|
||||
episode_title: Super Title
|
||||
screen_size: 720p
|
||||
format: WEB-DL
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
video_codec: h264
|
||||
release_group: ABC
|
||||
container: nzb
|
||||
type: episode
|
||||
|
||||
? "[SGKK] Bleach 312v1 [720p/mkv]-Group.mkv"
|
||||
: title: Bleach
|
||||
season: 3
|
||||
episode: 12
|
||||
version: 1
|
||||
screen_size: 720p
|
||||
release_group: Group
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? The.Expanse.S02E08.720p.WEBRip.x264.EAC3-KiNGS.mkv
|
||||
: title: The Expanse
|
||||
season: 2
|
||||
episode: 8
|
||||
screen_size: 720p
|
||||
format: WEBRip
|
||||
video_codec: h264
|
||||
audio_codec: EAC3
|
||||
release_group: KiNGS
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? Series_name.2005.211.episode.title.avi
|
||||
: title: Series name
|
||||
year: 2005
|
||||
season: 2
|
||||
episode: 11
|
||||
episode_title: episode title
|
||||
container: avi
|
||||
type: episode
|
||||
|
||||
? the.flash.2014.208.hdtv-lol[ettv].mkv
|
||||
: title: the flash
|
||||
year: 2014
|
||||
season: 2
|
||||
episode: 8
|
||||
format: HDTV
|
||||
release_group: lol[ettv]
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? "[Despair-Paradise].Kono.Subarashii.Sekai.ni.Shukufuku.wo!.2.-..09.vostfr.FHD"
|
||||
: options: -E -t episode
|
||||
release_group: Despair-Paradise
|
||||
title: Kono Subarashii Sekai ni Shukufuku wo! 2
|
||||
episode: 9
|
||||
subtitle_language: fr
|
||||
other: FullHD
|
||||
type: episode
|
||||
|
||||
@@ -111,7 +111,7 @@
|
||||
? Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director's.Cut).CD1.DVDRip.XviD.AC3-WAF.avi
|
||||
: title: Blade Runner
|
||||
year: 1982
|
||||
edition: Director's cut
|
||||
edition: Director's Cut
|
||||
cd: 1
|
||||
format: DVD
|
||||
video_codec: XviD
|
||||
@@ -147,7 +147,8 @@
|
||||
format: DVD
|
||||
video_codec: XviD
|
||||
release_group: ARROW
|
||||
other: ['Proper', 'Limited']
|
||||
other: Proper
|
||||
edition: Limited Edition
|
||||
proper_count: 1
|
||||
|
||||
? Movies/Fr - Paris 2054, Renaissance (2005) - De Christian Volckman - (Film Divx Science Fiction Fantastique Thriller Policier N&B).avi
|
||||
@@ -363,7 +364,7 @@
|
||||
video_codec: h264
|
||||
release_group: AN0NYM0US[bb]
|
||||
format: BluRay
|
||||
other: Limited
|
||||
edition: Limited Edition
|
||||
|
||||
? movies/La Science des Rêves (2006)/La.Science.Des.Reves.FRENCH.DVDRip.XviD-MP-AceBot.avi
|
||||
: title: La Science des Rêves
|
||||
@@ -439,7 +440,7 @@
|
||||
format: BluRay
|
||||
video_codec: XviD
|
||||
audio_channels: "5.1"
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
uuid: XD607ebb-BRc59935-5155473f-1c5f49
|
||||
|
||||
? Pacific.Rim.3D.2013.COMPLETE.BLURAY-PCH.avi
|
||||
@@ -644,7 +645,7 @@
|
||||
- Timsit
|
||||
- Lindon
|
||||
screen_size: 1080p
|
||||
container: MKV
|
||||
container: mkv
|
||||
format: HDTV
|
||||
|
||||
? some.movie.720p.bluray.x264-mind
|
||||
@@ -688,20 +689,18 @@
|
||||
|
||||
? h265 - HEVC Riddick Unrated Director Cut French 1080p DTS.mkv
|
||||
: audio_codec: DTS
|
||||
edition: Director's cut
|
||||
edition: [Unrated, Director's Cut]
|
||||
language: fr
|
||||
screen_size: 1080p
|
||||
title: Riddick
|
||||
other: Unrated
|
||||
video_codec: h265
|
||||
|
||||
? "[h265 - HEVC] Riddick Unrated Director Cut French [1080p DTS].mkv"
|
||||
: audio_codec: DTS
|
||||
edition: Director's cut
|
||||
edition: [Unrated, Director's Cut]
|
||||
language: fr
|
||||
screen_size: 1080p
|
||||
title: Riddick
|
||||
other: Unrated
|
||||
video_codec: h265
|
||||
|
||||
? Barbecue-2014-French-mHD-1080p
|
||||
@@ -892,7 +891,8 @@
|
||||
|
||||
? Suicide Squad EXTENDED (2016) 2160p 4K UltraHD Blu-Ray x265 (HEVC 10bit BT709) Dolby Atmos 7.1 -DDR
|
||||
: title: Suicide Squad
|
||||
other: [Extended, UltraHD]
|
||||
edition: Extended
|
||||
other: UltraHD
|
||||
year: 2016
|
||||
screen_size: 4K
|
||||
format: BluRay
|
||||
@@ -906,7 +906,7 @@
|
||||
? Queen - A Kind of Magic (Alternative Extended Version) 2CD 2014
|
||||
: title: Queen
|
||||
alternative_title: A Kind of Magic
|
||||
other: [Alternative Cut, Extended]
|
||||
edition: [Alternative Cut, Extended]
|
||||
cd_count: 2
|
||||
year: 2014
|
||||
type: movie
|
||||
@@ -914,7 +914,7 @@
|
||||
? Jour.de.Fete.1949.ALTERNATiVE.CUT.1080p.BluRay.x264-SADPANDA[rarbg]
|
||||
: title: Jour de Fete
|
||||
year: 1949
|
||||
other: Alternative Cut
|
||||
edition: Alternative Cut
|
||||
screen_size: 1080p
|
||||
format: BluRay
|
||||
video_codec: h264
|
||||
@@ -941,7 +941,7 @@
|
||||
|
||||
? Alien DC (1979) [1080p]
|
||||
: title: Alien
|
||||
edition: Director's cut
|
||||
edition: Director's Cut
|
||||
year: 1979
|
||||
screen_size: 1080p
|
||||
type: movie
|
||||
@@ -949,7 +949,7 @@
|
||||
? Requiem.For.A.Dream.2000.DC.1080p.BluRay.x264.anoXmous
|
||||
: title: Requiem For A Dream
|
||||
year: 2000
|
||||
edition: Director's cut
|
||||
edition: Director's Cut
|
||||
screen_size: 1080p
|
||||
format: BluRay
|
||||
video_codec: h264
|
||||
@@ -963,7 +963,7 @@
|
||||
screen_size: 1080p
|
||||
format: WEBRip
|
||||
video_codec: h264
|
||||
audio_codec: DolbyDigital
|
||||
audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
release_group: FGT
|
||||
type: movie
|
||||
@@ -980,7 +980,7 @@
|
||||
? Suntan.2016.FESTiVAL.DVDRip.x264-IcHoR
|
||||
: title: Suntan
|
||||
year: 2016
|
||||
other: Festival
|
||||
edition: Festival
|
||||
format: DVD
|
||||
video_codec: h264
|
||||
release_group: IcHoR
|
||||
@@ -1058,7 +1058,7 @@
|
||||
? The Heartbreak Kid (1993) UNCUT 720p WEBRip x264
|
||||
: title: The Heartbreak Kid
|
||||
year: 1993
|
||||
other: Uncut
|
||||
edition: Uncut
|
||||
screen_size: 720p
|
||||
format: WEBRip
|
||||
video_codec: h264
|
||||
@@ -1097,3 +1097,12 @@
|
||||
format: HDTC
|
||||
language: French
|
||||
type: movie
|
||||
|
||||
? We.Are.X.2016.LIMITED.BDRip.x264-BiPOLAR
|
||||
: title: We Are X
|
||||
year: 2016
|
||||
edition: Limited Edition
|
||||
format: BluRay
|
||||
video_codec: h264
|
||||
release_group: BiPOLAR
|
||||
type: movie
|
||||
|
||||
@@ -10,9 +10,14 @@
|
||||
|
||||
? +DolbyDigital
|
||||
? +DD
|
||||
? +DDP
|
||||
? +Dolby Digital
|
||||
: audio_codec: DolbyDigital
|
||||
? +AC3
|
||||
: audio_codec: AC3
|
||||
|
||||
? +DDP
|
||||
? +DD+
|
||||
? +EAC3
|
||||
: audio_codec: EAC3
|
||||
|
||||
? +DolbyAtmos
|
||||
? +Dolby Atmos
|
||||
@@ -23,9 +28,6 @@
|
||||
? +AAC
|
||||
: audio_codec: AAC
|
||||
|
||||
? +AC3
|
||||
: audio_codec: AC3
|
||||
|
||||
? +Flac
|
||||
: audio_codec: FLAC
|
||||
|
||||
@@ -88,7 +90,7 @@
|
||||
|
||||
? DD5.1
|
||||
? DD51
|
||||
: audio_codec: DolbyDigital
|
||||
: audio_codec: AC3
|
||||
audio_channels: '5.1'
|
||||
|
||||
? -51
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Use - marker to check inputs that should not match results.
|
||||
? Director's cut
|
||||
? Edition Director's cut
|
||||
: edition: Director's cut
|
||||
: edition: Director's Cut
|
||||
|
||||
? Collector
|
||||
? Collector Edition
|
||||
@@ -23,3 +23,9 @@
|
||||
? Deluxe Edition
|
||||
? Edition Deluxe
|
||||
: edition: Deluxe Edition
|
||||
|
||||
? Super Movie Alternate XViD
|
||||
? Super Movie Alternative XViD
|
||||
? Super Movie Alternate Cut XViD
|
||||
? Super Movie Alternative Cut XViD
|
||||
: edition: Alternative Cut
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user