Compare commits
1397 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6d444ebe99 | |||
| 237eafed35 | |||
| fbc5069fb8 | |||
| d23c44589e | |||
| 42cc500b05 | |||
| 81760192dc | |||
| 2cb077423d | |||
| de8aaaa5e5 | |||
| b9ebd4e1d6 | |||
| 8fdf1e841c | |||
| 9df92d0262 | |||
| a07d5aa440 | |||
| 54bd222605 | |||
| 6487258136 | |||
| d1935a4439 | |||
| 026c30642e | |||
| 036d036a61 | |||
| 2092d44627 | |||
| c6e7e64ba3 | |||
| a8f5ad6435 | |||
| afa0c3a1b0 | |||
| b3132d57b2 | |||
| 0a2a6b558f | |||
| adb9926928 | |||
| 3ce25007b5 | |||
| 5690ada2a7 | |||
| 76481186e9 | |||
| 8d2d2341c8 | |||
| 4e20d282f7 | |||
| edc3ce1ba4 | |||
| b9249ff09a | |||
| c3b2ffa97d | |||
| 4e3b8ee3c2 | |||
| a749ed4837 | |||
| 67ba6be6e2 | |||
| 7a47e6617d | |||
| 4a4c6e7df2 | |||
| 5661528862 | |||
| 696e9d6b64 | |||
| c0aa465827 | |||
| a6120ae27a | |||
| ba8a165aa5 | |||
| 833d7072ed | |||
| 9829137001 | |||
| c686214f56 | |||
| 2252d7ea6a | |||
| e7fbfca2d7 | |||
| 9ca959a20a | |||
| bd8e26ecab | |||
| 451b34dceb | |||
| 02761db660 | |||
| 42b7e9fa62 | |||
| edf6c25e17 | |||
| e91aac65cc | |||
| 01d5a18af8 | |||
| 70c1142f8d | |||
| 8b6b162073 | |||
| 5199fbe0cb | |||
| 924de62dff | |||
| 4cba7d8684 | |||
| f3f9ab1360 | |||
| 682d1d85ce | |||
| a1cc9a2049 | |||
| a7f7b3e572 | |||
| 7c32a7c2c8 | |||
| e842579f25 | |||
| bdd9134a0e | |||
| a01552e88c | |||
| 824957ae85 | |||
| af335d5565 | |||
| 2f9eb51868 | |||
| aebbc17643 | |||
| 84e78e1e20 | |||
| 89bb747ee3 | |||
| 62e37dbd09 | |||
| edef9cb936 | |||
| 3ae02c3050 | |||
| a4016616a1 | |||
| b4855611c4 | |||
| 1b44f6d220 | |||
| b0f0af087b | |||
| 1344f7255d | |||
| 39fe3b0fd6 | |||
| 0ba676b5e7 | |||
| 4d6897c138 | |||
| c7c6ba09e9 | |||
| c06baa67f1 | |||
| cdb7946c00 | |||
| bdb5da8df0 | |||
| e961c8d3aa | |||
| 3eb1a9eef8 | |||
| 67aead8fcc | |||
| fd764d0576 | |||
| dad55d7922 | |||
| fb32772512 | |||
| 918ce65acd | |||
| 9f03b9ee71 | |||
| 2235de1a2d | |||
| 8804c89f04 | |||
| 2e8805015c | |||
| f435ca2961 | |||
| 71c3761b20 | |||
| e4c441043a | |||
| 8a655a5d6e | |||
| 777c21ce87 | |||
| e22ff09691 | |||
| d0f685e87c | |||
| 8f71c417a9 | |||
| b62977c494 | |||
| 8d11136c1c | |||
| 4a7ea43095 | |||
| 8fe4bd2751 | |||
| 38bb819a24 | |||
| dbe75ad18d | |||
| 760441b45a | |||
| 56645b601b | |||
| 885e4bc99f | |||
| b04e5510fd | |||
| 806000725b | |||
| 71270641d3 | |||
| bf4f2bec91 | |||
| dafad3a7a3 | |||
| 182a1cc3fb | |||
| 4b7664aaa6 | |||
| 2050aef1e5 | |||
| 390af30bf6 | |||
| 698f48b1fd | |||
| 2e5cc61ac6 | |||
| 8d97fb7633 | |||
| 8a41c393bb | |||
| 6ae38359d7 | |||
| 7ddd1e3497 | |||
| 20a0993aa8 | |||
| 57d58056de | |||
| 06c6fa4d01 | |||
| 41f884e129 | |||
| 77a74c8839 | |||
| c198788017 | |||
| 4cbfa21b52 | |||
| f3754de394 | |||
| d47ad013cd | |||
| 8c4372d0d3 | |||
| 1c7b9145c8 | |||
| c477f53ee6 | |||
| f99f03dc33 | |||
| 2ddd786819 | |||
| 6e604f98e3 | |||
| 729404d05f | |||
| de50dfdb7c | |||
| 7bda522f0a | |||
| 6c39fb0649 | |||
| a7342ac77e | |||
| 5d45b8bbdd | |||
| aa0ff38ed7 | |||
| d55aa3b569 | |||
| d86a99fb32 | |||
| c687152724 | |||
| 65ec539875 | |||
| 6dba0792d2 | |||
| df78cecb31 | |||
| 3d8687f69d | |||
| 92196897a9 | |||
| 4206edfb13 | |||
| c08e63ab80 | |||
| 03646b4f87 | |||
| d9fa860b0c | |||
| 93d8494ddc | |||
| bd982958fa | |||
| e280b62f5c | |||
| 2bb050de40 | |||
| f3ed3bf0bf | |||
| 79457536f2 | |||
| 048f930da1 | |||
| 6aa8108fce | |||
| c234f75d7e | |||
| 064b634f77 | |||
| 8d83184cd1 | |||
| 7a5112bee5 | |||
| 0c549c6bda | |||
| c48e704502 | |||
| bec66895d9 | |||
| c9f1e8a8bb | |||
| ac209e7ee2 | |||
| 525256e15c | |||
| 3b8c965f4b | |||
| 8f8da8e6ea | |||
| ac9b81abea | |||
| 1c39c55423 | |||
| ca11273b37 | |||
| b532a60c3d | |||
| 941662e9f2 | |||
| 4d1e4c3ebe | |||
| f66fd9bcae | |||
| f5c5ecd1b9 | |||
| f9b7855d19 | |||
| 418a8af99a | |||
| ce3b4661de | |||
| 4b811f38b0 | |||
| bba2823065 | |||
| 5547e9658d | |||
| e14cbb19f5 | |||
| 0613a001c5 | |||
| 2970ba69f8 | |||
| 2c6b811d4d | |||
| d5a3caf961 | |||
| 7e64778546 | |||
| 1afd0d7c28 | |||
| 3027a3c3e8 | |||
| 3d7df100ff | |||
| 4de5030196 | |||
| e3bfe368db | |||
| e45fe0aaa0 | |||
| 807d758bfa | |||
| 7c5164b9a5 | |||
| 1e15fb8e43 | |||
| ae996b4b9a | |||
| 3259a7eec9 | |||
| 39a5aa1d63 | |||
| dbe378ad82 | |||
| a316c11974 | |||
| 2fd05c2464 | |||
| 8adabb946e | |||
| 3f251b9c0e | |||
| aadd60c3ad | |||
| 99cc994865 | |||
| da0355ca88 | |||
| aaa7c0934a | |||
| 03c70f4dfa | |||
| 0704609fa5 | |||
| d26569b26f | |||
| 007e93e526 | |||
| 8feec0284d | |||
| eaa79fb3bd | |||
| 3af5102e93 | |||
| d936460d83 | |||
| f51649c59f | |||
| be1e33b555 | |||
| 059645dec7 | |||
| 6439becd7d | |||
| 917fbc1ea2 | |||
| c97fee90b7 | |||
| 35d04946b4 | |||
| d0d71d626e | |||
| 5a1b39c67e | |||
| a8cbd37697 | |||
| b2bac94009 | |||
| d88b7e2a17 | |||
| 68bf35d83d | |||
| a78e6587ac | |||
| 21f715a321 | |||
| 18a5dfd81f | |||
| 2a7b5e2efb | |||
| 0d63b0361f | |||
| 4e301ddd24 | |||
| bc182276ac | |||
| 4980523d10 | |||
| 85baf58b55 | |||
| d7a4d02564 | |||
| 0e6f4c45db | |||
| 932cadce3c | |||
| 3926ea9c69 | |||
| dd1495c881 | |||
| 8c27e6aade | |||
| ba2774eeb5 | |||
| 8e854a8d64 | |||
| 86f5ed198f | |||
| cc57520c71 | |||
| 8d9f8960b2 | |||
| f66573620b | |||
| 3544a0e7f8 | |||
| 9c9db90886 | |||
| c4bc4d22e9 | |||
| b107c70a0c | |||
| 084069441f | |||
| 8b01433e61 | |||
| b72902b8f4 | |||
| 354e455ae7 | |||
| 8aaed47e39 | |||
| c7598aaf12 | |||
| cbe2d16d9b | |||
| 953eb97513 | |||
| b340b3b699 | |||
| f9f2579904 | |||
| 3a90653edd | |||
| a8ae18f43c | |||
| c235dd934a | |||
| 3e7c2cb0c2 | |||
| 1c9398b5b9 | |||
| 6a9c818e67 | |||
| 753baf85b6 | |||
| 7685c2a6b7 | |||
| cf1203566e | |||
| 052e6a475b | |||
| 8890acef3a | |||
| 72570ee21b | |||
| 100c94ad83 | |||
| 2ea3bf20a7 | |||
| b1cb7c7259 | |||
| 7510dfc5c5 | |||
| b18bbba23f | |||
| 4e28cea2a3 | |||
| a9bafc5efd | |||
| a04ff3343b | |||
| aa09fb28d2 | |||
| e6900c18b9 | |||
| 221a17a5af | |||
| fc638c608b | |||
| 71d9d96d81 | |||
| 5a8b999509 | |||
| 720d7e9d8d | |||
| c69be5934d | |||
| dae186fb03 | |||
| 076ad78355 | |||
| 421aa3a95c | |||
| 153d186a1c | |||
| 2238835868 | |||
| e0be4542ab | |||
| fab841bc7a | |||
| 789a28a966 | |||
| 7cde652ed1 | |||
| 5359116e72 | |||
| 17edfd215d | |||
| e292b46cca | |||
| d091b20ebe | |||
| 50a53562a1 | |||
| 55a479590b | |||
| 8874bb64fb | |||
| 38afba3075 | |||
| ba48e30128 | |||
| 77397b6877 | |||
| f50fa0554a | |||
| d0dd9f629d | |||
| c82637e760 | |||
| 152cfb3f07 | |||
| 7f579181fd | |||
| 3e0f39b6f1 | |||
| 244d3b1a5b | |||
| 7c24302f7c | |||
| 6cafc3a1e8 | |||
| 1ab0d31baa | |||
| b2fadc5a90 | |||
| 38f3d85909 | |||
| 3694100265 | |||
| af44f271ab | |||
| 9984f6aef9 | |||
| 51a1debc39 | |||
| b8a68f62a0 | |||
| 5ded188f51 | |||
| 12c5dda1fa | |||
| 25146049bf | |||
| 5598ee0c78 | |||
| 6e4b0cbcbf | |||
| 572cf29974 | |||
| 5601d19002 | |||
| e81dd5df76 | |||
| e7919d5a47 | |||
| 6f634fbc21 | |||
| 7478ece1ff | |||
| cd72b6f477 | |||
| fab96de4c7 | |||
| 0ffa17cf67 | |||
| 777549a15f | |||
| c07ded004d | |||
| da3e96a9d8 | |||
| d6e8a03ddf | |||
| b13cbd1e54 | |||
| 6b2e5c154b | |||
| 137a4d1e0d | |||
| 1725550acc | |||
| bd91e173b0 | |||
| 47a11b3e64 | |||
| b5e57519ff | |||
| 20845bbcd4 | |||
| 739c10ade6 | |||
| 14ea2d72a7 | |||
| 4a9ea97ea1 | |||
| b017a94353 | |||
| 15b65dd844 | |||
| 079ea8c39d | |||
| 4b949dcd72 | |||
| 2626cf4253 | |||
| b260c8aaec | |||
| 1ece46473b | |||
| 890c3cc8b0 | |||
| 7b45c9f1c5 | |||
| 58fb2f5ea6 | |||
| a79f3e47ba | |||
| b3b9db9ff6 | |||
| 9aed245241 | |||
| aa03fdb445 | |||
| 7cb8356598 | |||
| ac347755fd | |||
| b16cb15e88 | |||
| 4989c37964 | |||
| 06849c5814 | |||
| 78b67a6f5e | |||
| acf79df4d0 | |||
| bc5a9caf63 | |||
| 7b34b07cdc | |||
| 8df1a1bf17 | |||
| 1143b0f2d2 | |||
| 86883336fd | |||
| 62d77c5811 | |||
| 8397dddbbe | |||
| 47ef94d8c3 | |||
| 8aa4a485ed | |||
| cb4ef9c9ea | |||
| 2f80852a7c | |||
| 190a580642 | |||
| 6ba85f5069 | |||
| 707b5921fb | |||
| 2e25e68444 | |||
| 034260e426 | |||
| b4eda8bbff | |||
| 93a1b7fb52 | |||
| 8ef44c3520 | |||
| 449de57fc7 | |||
| cbe29e233d | |||
| bef56ff124 | |||
| 5a05c0f858 | |||
| c1e13e520b | |||
| cebe92bd8f | |||
| 6f8cfc7914 | |||
| e7e98b83d2 | |||
| 4b72bb9d28 | |||
| 221068874b | |||
| 6028d8b2f1 | |||
| ddaafe9310 | |||
| 139e38731a | |||
| d25056cb35 | |||
| 5c80a7091b | |||
| 5faf190202 | |||
| 169b114ff6 | |||
| bc67326573 | |||
| a32543533d | |||
| 6b6e40ef96 | |||
| 8127b7ecf0 | |||
| 09425ccbe0 | |||
| 61fbc4e3b5 | |||
| 158e4f85da | |||
| 8b1107d2e1 | |||
| 59ffa9084f | |||
| 19df673c50 | |||
| 5f20894413 | |||
| 7349874804 | |||
| fda5dc7e89 | |||
| d60b45a667 | |||
| ab2e69a76e | |||
| 6a836338a5 | |||
| 5a02365605 | |||
| 26b38c4f64 | |||
| 9b7edf2960 | |||
| 7050f64fae | |||
| 4623a989d8 | |||
| 87b942bd6d | |||
| 87ee5cc627 | |||
| bff8fe8b70 | |||
| 1495882dc7 | |||
| 2e50d84f2a | |||
| d32716f4c5 | |||
| 876aa4eda0 | |||
| 3673aee8e9 | |||
| a758191ee0 | |||
| 99410249c7 | |||
| a705f2ad30 | |||
| 33223dedc1 | |||
| bd8e8ef346 | |||
| c75e7bf656 | |||
| cb4117376a | |||
| 0d37920aad | |||
| 0da6e76200 | |||
| 5f5934a6ee | |||
| 85b7a2f4f5 | |||
| 3dcfd30a04 | |||
| b5a0f65783 | |||
| 3862e6f3a4 | |||
| 1d4e2ec50b | |||
| 8b85485510 | |||
| 722ce3ac8b | |||
| 1e132f2808 | |||
| d007e0a172 | |||
| 3ddd722cc1 | |||
| 82d8189966 | |||
| 2d533eb004 | |||
| f9c899701f | |||
| e9f62fbb09 | |||
| 5b2f09318a | |||
| 8c260c43a8 | |||
| eee793302c | |||
| 0d1fdf6e60 | |||
| 64398d8f30 | |||
| cab736b573 | |||
| 93071dd81e | |||
| e8fcb8f91a | |||
| 33cacfe884 | |||
| f624f7f05a | |||
| 624195d870 | |||
| ab2ef66263 | |||
| 4ea0372212 | |||
| ff31912e8a | |||
| dcefed2e4c | |||
| 55bbc4f585 | |||
| 0f2bb99b39 | |||
| 85342eeed3 | |||
| 374a6a668a | |||
| e3be3195ee | |||
| 503279f3c2 | |||
| f8bb54024c | |||
| 6e53fc606a | |||
| ab810c48af | |||
| 13bb9183af | |||
| 2c5b6ea690 | |||
| a8efa2e266 | |||
| e73eb2fd86 | |||
| d38fa26e13 | |||
| 716f4493e8 | |||
| 3220974a4a | |||
| 6732272047 | |||
| 547f038139 | |||
| 3b0ee60eaa | |||
| a869281de7 | |||
| a4ed77c7bb | |||
| 81718e64d3 | |||
| dee0daf8aa | |||
| 8e599fb22a | |||
| acb5589af1 | |||
| 6db2771cd6 | |||
| 06d4e0a19a | |||
| 3b18c6c14f | |||
| 300359acf2 | |||
| 5456d0200a | |||
| 9890f66443 | |||
| aba863bc84 | |||
| ade416f5c8 | |||
| 7097267f7c | |||
| b0d8d1a86d | |||
| 2c8296ba85 | |||
| 4dd17de146 | |||
| 3a281b0b57 | |||
| 04ed625f1a | |||
| 1cddfb1b2d | |||
| 796b64d83e | |||
| 240a3687d7 | |||
| 9ed4764ab2 | |||
| f253a13297 | |||
| 744cd57dd5 | |||
| e2a5647363 | |||
| a1f324c105 | |||
| 767e0f8ac7 | |||
| 0c0ad02234 | |||
| c09973ec56 | |||
| 03a72e1917 | |||
| f9e0eaaf83 | |||
| 985f75f7da | |||
| 171cbd6c53 | |||
| 9875bc5c5b | |||
| 882509f891 | |||
| 3396502334 | |||
| b7fb99c3d4 | |||
| c82307a710 | |||
| 309a99d183 | |||
| 09a6ef0194 | |||
| 43afcb4239 | |||
| 7a78f33ac3 | |||
| d5fb538630 | |||
| a22cdf5d5b | |||
| fe0636bbbf | |||
| 13859cfbd7 | |||
| 0adadc59ac | |||
| d65ba19c6c | |||
| 5cedbd2fa0 | |||
| 735fb09762 | |||
| 79d61419b0 | |||
| 248b93e5c6 | |||
| d8eff1adb5 | |||
| c911620254 | |||
| c68a32b889 | |||
| 788819a900 | |||
| 27c94af980 | |||
| 81122665a0 | |||
| 1856e687eb | |||
| 6055793d46 | |||
| 99b670ff10 | |||
| 7a09218cc0 | |||
| a34d0523b5 | |||
| f06e900bab | |||
| 7da15a2d44 | |||
| e999cc53d0 | |||
| b7d4bd00a5 | |||
| 8c2aa849d7 | |||
| 01a759fff8 | |||
| cb0008b59e | |||
| 9cd825aff1 | |||
| 8ad52d2979 | |||
| efd6143498 | |||
| 157fae5f83 | |||
| 6d63301b63 | |||
| 9801c8c6b3 | |||
| e04f4c0bd0 | |||
| b501578584 | |||
| 308f429c91 | |||
| 1d45172475 | |||
| 085a4f30db | |||
| 7a600dc2b6 | |||
| c0c2891d8d | |||
| 06b269a2ba | |||
| f3a4db0d87 | |||
| bcd99d18c4 | |||
| c05c400c6f | |||
| 0f081d8d7b | |||
| 833dc5e3ae | |||
| 0be3df435b | |||
| f4446af57e | |||
| 253aa664a8 | |||
| 0df037a295 | |||
| ed49d743f9 | |||
| 203cc392c0 | |||
| 52ba5a7f24 | |||
| 8aa0576bbc | |||
| 5ce9cc79c8 | |||
| 1a596dfdea | |||
| aeecb3ff59 | |||
| 85c8d2d558 | |||
| 2cf4e7ac59 | |||
| e7412a91f9 | |||
| 9888d03982 | |||
| 765cc39553 | |||
| 6e58c2f984 | |||
| 295542ff18 | |||
| 9d72d9c647 | |||
| 853897ec3e | |||
| 9cf8ad7399 | |||
| fdf974c5e3 | |||
| 2920dbfe8d | |||
| 77d05f7697 | |||
| 3ffeaeffb6 | |||
| db2755675c | |||
| 7ca090f73c | |||
| bb251ad29e | |||
| 75d770e019 | |||
| 49bf116c18 | |||
| b7d227fe0f | |||
| 83f59935f2 | |||
| 37b794fa14 | |||
| 1f5c45df91 | |||
| 62e3020234 | |||
| 895d457500 | |||
| 586269efd3 | |||
| 576718fc03 | |||
| 648dd4147a | |||
| c4df743c3e | |||
| b98fead37e | |||
| 6522094164 | |||
| fcd3dfe75c | |||
| ec9a798590 | |||
| 5825443d4d | |||
| 9768b3fadd | |||
| 77a72d6663 | |||
| 08d647c024 | |||
| a77ef040be | |||
| 13e581b953 | |||
| 1cc18617c5 | |||
| 2642f65614 | |||
| 4abb2aacf9 | |||
| 904daaf2b3 | |||
| 3044f2b1fb | |||
| 826accb2d1 | |||
| d5cb35ed95 | |||
| 24c7e4be8c | |||
| abbd7283b2 | |||
| 2980aa08d7 | |||
| e2344abbc4 | |||
| 80097c3500 | |||
| 714f36caee | |||
| fb1860d78b | |||
| ce7acd278e | |||
| ae8473183d | |||
| 69fb328b50 | |||
| b8d9899796 | |||
| e58fa1964d | |||
| 1627dee77e | |||
| bbac0c033f | |||
| 6437e1dbad | |||
| 48a9e998ff | |||
| 6b6ca461f0 | |||
| 7960952a30 | |||
| 5ec64efb75 | |||
| 2440b2eae4 | |||
| 54db2857c9 | |||
| 5b8f0b7361 | |||
| 053ebe3963 | |||
| 661b0367f5 | |||
| 01da0697a0 | |||
| a3d3b670ae | |||
| 5c64a332f8 | |||
| 6fcd9b645a | |||
| 78da16654a | |||
| da20d4882b | |||
| 1f31c38d24 | |||
| 5f2fd9733b | |||
| 8a225b4e09 | |||
| af05b41937 | |||
| d618da457e | |||
| d16bdad782 | |||
| f6d33e73a0 | |||
| 7b48e445f5 | |||
| 2390f904bd | |||
| 3bee3631a3 | |||
| 9da0b2d3c1 | |||
| 7a092e4585 | |||
| 196fb6b4f6 | |||
| 9507002961 | |||
| 943ed38c2f | |||
| 496619b492 | |||
| 4772b42d64 | |||
| 5bc10953cc | |||
| 18deca202d | |||
| 84bc4b018d | |||
| 1a0598a47a | |||
| 973d117887 | |||
| c284c8f336 | |||
| df69cbc84c | |||
| 646453887f | |||
| 189d617005 | |||
| 554cd8bfe7 | |||
| 79505dea20 | |||
| 5358a46b7e | |||
| aff1599ce7 | |||
| bc7df1c8a1 | |||
| f1df1d25a8 | |||
| 47d9b472ed | |||
| 89ab8c34d8 | |||
| 600498f9c1 | |||
| 845fbcd2ac | |||
| 3cc9f19b8f | |||
| e68c642005 | |||
| 81ae950577 | |||
| 62b4496cd6 | |||
| 29b7292d15 | |||
| 791058a2d2 | |||
| b6c108faef | |||
| 72d592866a | |||
| 4052993246 | |||
| a24f6e7789 | |||
| 0d0fd49924 | |||
| 139dcb409e | |||
| 707e6e7d13 | |||
| 36abb29ddd | |||
| a700fe761e | |||
| 7577164471 | |||
| 1bce743ea3 | |||
| f85ab0364a | |||
| eb3a0d52fd | |||
| b8cd295a12 | |||
| d3ff49ee0c | |||
| d4833f1e6e | |||
| 548483ed2f | |||
| f6f39b97c8 | |||
| 21ea5e0df9 | |||
| 3cbab6a5c7 | |||
| f19f39ba16 | |||
| b9c0fd9a1c | |||
| ce520e6944 | |||
| 0ad62a95e2 | |||
| 8f62a69e06 | |||
| 34bbb98f7f | |||
| 26cd6bb955 | |||
| 97534c633d | |||
| 0a9a2963c2 | |||
| 05afc39a35 | |||
| 84fdc1f55f | |||
| 3b03c3c2bb | |||
| 980f62686d | |||
| 202f2532a6 | |||
| 78d193a2fd | |||
| 0c109b0f27 | |||
| e33c0ab86c | |||
| 3a0189069d | |||
| 2688bd9edd | |||
| 889f7bd2d7 | |||
| 0561c2d640 | |||
| b76f1ad004 | |||
| cde6153f64 | |||
| 12bdaa510b | |||
| 0e6a4acf80 | |||
| e7785f7094 | |||
| 2dcf39eff8 | |||
| 1125c5c133 | |||
| faf7cedfe2 | |||
| 52a6127625 | |||
| b552f6f9fa | |||
| 9b558fcce2 | |||
| c8eae6df6c | |||
| 5f50bd7095 | |||
| c8617218dc | |||
| a8ceae993e | |||
| a72a8854c9 | |||
| dc658db9ba | |||
| 8d8ecfe9e1 | |||
| 4b77e63857 | |||
| 19aa800324 | |||
| 85adb6b0e3 | |||
| bd2523821d | |||
| c1838a3c84 | |||
| d836f8f5d0 | |||
| 37491c134e | |||
| aa6efb7e5c | |||
| e4d990c06d | |||
| 01288afac0 | |||
| 579e3ca3ab | |||
| f61bc3ce7c | |||
| cc6004e981 | |||
| 35eb037d05 | |||
| 1eb0e4419d | |||
| 7b5ca875dc | |||
| 2d22a6c383 | |||
| f4884f1c18 | |||
| 27cc3bd185 | |||
| 9b894c2ea7 | |||
| a341808873 | |||
| 8927513f8e | |||
| 84436dfa94 | |||
| 2b73f633e0 | |||
| 3d7a452141 | |||
| 38a8557311 | |||
| 79672923c5 | |||
| 3842182a83 | |||
| 8b0d359e0b | |||
| db2903edfd | |||
| 18d22a72bd | |||
| 402cfc1632 | |||
| 9dec7e4971 | |||
| 931c224247 | |||
| f6ee6d4027 | |||
| 332d41fb25 | |||
| 8303af25fb | |||
| ee02bdb19a | |||
| e674132d5a | |||
| c9eb8bc7be | |||
| 2076a2c6d0 | |||
| 32c0f09b16 | |||
| 1264cabb3f | |||
| fb722d0581 | |||
| cb00ab9610 | |||
| 4102a1c8fd | |||
| af6d7a1ae2 | |||
| 36cae6311a | |||
| 327bb31daa | |||
| 8c2effe337 | |||
| da59adddf4 | |||
| 6f3c806a21 | |||
| 3d119bcd98 | |||
| 6264c21e23 | |||
| d5d6aa0bd5 | |||
| 7ad49fa65a | |||
| 5b8dfb48c3 | |||
| 4d557be99a | |||
| a7e022c6f4 | |||
| fc3f5dad4f | |||
| fa42669580 | |||
| 0c73de726a | |||
| ea87d21977 | |||
| a9e9e8cf44 | |||
| 9905cd307f | |||
| 92ea32b52c | |||
| 4c56f7583a | |||
| fc3050ef3d | |||
| 29c63e11bd | |||
| 64cbe21f6e | |||
| a56bb97d45 | |||
| 6edc6a1c6d | |||
| 01c656ffb2 | |||
| 078c6d0c21 | |||
| 580a8c0f3e | |||
| f0258349bf | |||
| d9080eeb80 | |||
| b504744876 | |||
| 638e8b5b47 | |||
| 9b9c40f310 | |||
| cc3a1db879 | |||
| a16312803e | |||
| 206f9fa5ad | |||
| f20e97574a | |||
| 51764f0ce0 | |||
| e698b9d608 | |||
| e2a7cc6b45 | |||
| 6eaf307be9 | |||
| 9743af5db0 | |||
| 07d02ad75e | |||
| 91f51a27af | |||
| a60318260a | |||
| c3e7e336b5 | |||
| 0b1037b497 | |||
| 7da48b7dc5 | |||
| 73bcfc6151 | |||
| dfe1a16aa0 | |||
| 4f0e685feb | |||
| fca052b308 | |||
| c449f42444 | |||
| 5ec956943c | |||
| 1ad696be6d | |||
| 92b3b762b2 | |||
| 0b29a57079 | |||
| 0dee015181 | |||
| 2f1294a119 | |||
| e609e55710 | |||
| b752ce8572 | |||
| de59c68328 | |||
| f92e78e8be | |||
| 9abc611f1e | |||
| 8e42f61a52 | |||
| 48fd3f977d | |||
| 451636e0b3 | |||
| 1fc810470b | |||
| 1c96efdafa | |||
| 8fb0711973 | |||
| aabb4f2c13 | |||
| eb1c5d976f | |||
| fd89533903 | |||
| d5ec60f0f6 | |||
| 18b896ec0b | |||
| af93e1edec | |||
| a8a5b4ad16 | |||
| 0d40883929 | |||
| 3b6645156d | |||
| 7596346fcd | |||
| 877ff60077 | |||
| 928da6e679 | |||
| c1a9ccef3c | |||
| 5f41c85281 | |||
| 18ef38b90b | |||
| 7b155e6b31 | |||
| ba4d7b2199 | |||
| 869387af34 | |||
| 5b16a80730 | |||
| adf1190584 | |||
| 1c16cf5926 | |||
| a833cf7b0b | |||
| 62a35e7ced | |||
| 7b005760c1 | |||
| b07631f0b5 | |||
| 595d8a8f53 | |||
| 35321b00cd | |||
| 8928f19818 | |||
| 76cc8fad47 | |||
| cb851d8519 | |||
| af0aff3aee | |||
| 6d4099c79c | |||
| d9672e179c | |||
| 1e291343fe | |||
| a5d0bf68fd | |||
| b8e2b524e1 | |||
| 6abd062477 | |||
| fbcc2644bf | |||
| 34b05c8c17 | |||
| e3dce02716 | |||
| ed8a70b5c8 | |||
| 35944b0776 | |||
| 2f80ee5b39 | |||
| 280eb71ae4 | |||
| 9462b1b175 | |||
| 874204838d | |||
| 0e4a936176 | |||
| 5089708e2d | |||
| e17367aa13 | |||
| 26be0978ee | |||
| de1aea9dd2 | |||
| 4c143be906 | |||
| b83cea1073 | |||
| 2418b67089 | |||
| 7e550cf916 | |||
| dce72fcb08 | |||
| adede7bb2e | |||
| 377799ace3 | |||
| 02a822c630 | |||
| 8101bca753 | |||
| 40e177ded0 | |||
| 13f732d733 | |||
| fbca4cbf8c | |||
| 45c8cd1536 | |||
| da293bbc2f | |||
| 7991568d6d | |||
| 5fc1c8cbb1 | |||
| 596981aca2 | |||
| 6d55197218 | |||
| 85cb813a75 | |||
| 5f99319985 | |||
| f34c76eb90 | |||
| adb08aff75 | |||
| 93f8bf561b | |||
| 52e391aa83 | |||
| 751e9fc0c5 | |||
| 77b0b9dc6b | |||
| 5729552206 | |||
| 929f53ac13 | |||
| c6b983ea6c | |||
| 419bee76e2 | |||
| 2f3180cc07 | |||
| b5eb917e10 | |||
| 9fed8d6335 | |||
| becbdba56e | |||
| 85b9373760 | |||
| c069541cee | |||
| 4c0f20694d | |||
| a99175d46c | |||
| 4bab9b9f5b | |||
| a5ea603116 | |||
| 8be6d9bd77 | |||
| 9a9043aa67 | |||
| 7ed58386e5 | |||
| 51660449a8 | |||
| af1a8d13f1 | |||
| 8e13e6c181 | |||
| de915ba840 | |||
| 834922aa35 | |||
| 2d4e67c268 | |||
| 48a036a2bb | |||
| 140fb72aeb | |||
| 2d4c3790a6 | |||
| 74860fe2ee | |||
| aab69705b6 | |||
| d6c88621f6 | |||
| bd275601aa | |||
| 72c04e7b43 | |||
| f281d6bfce | |||
| 62fc223d7b | |||
| e274a542c1 | |||
| cd3b453bbb | |||
| 84bc6c95be | |||
| 3862447fa1 | |||
| f85224258b | |||
| 11d5edcc5e | |||
| 4df519e67a | |||
| e9afcaa9e6 | |||
| 672403ef92 | |||
| 4fbdd67255 | |||
| d6dd93b9d0 | |||
| 80f223e706 | |||
| 0f0d709975 | |||
| 8db5e100b8 | |||
| ebc984d371 | |||
| 80c73e5871 | |||
| 5de4d29dd8 | |||
| fad95a0b22 | |||
| ebd3867c5f | |||
| 0781265baa | |||
| 9b8798d534 | |||
| 190724360c | |||
| 93acb7fbc1 | |||
| 90cc235d23 | |||
| 515698fd95 | |||
| 2596d0a4bc | |||
| ef8f9f7816 | |||
| 276ecf262f | |||
| 5c8d083038 | |||
| a2c399b4b7 | |||
| 4ecec2e362 | |||
| e072cb4123 | |||
| e44cdd4191 | |||
| 43d60b20ca | |||
| 6a61c0e722 | |||
| 9de9428825 | |||
| 13cb31d2db | |||
| 211c687609 | |||
| 3151df31f8 | |||
| db30396c26 | |||
| efed67f6e4 | |||
| 3c0d0a7d60 | |||
| 0f0254675e | |||
| 068cf1a2fd | |||
| 5f3d2904aa | |||
| e81d3a43b8 | |||
| 7006687292 | |||
| d044c65d2c | |||
| d3ae88f5fe | |||
| a598104778 | |||
| c7099f1a7b | |||
| 3955a27594 | |||
| 597ecd8c0b | |||
| 006505bf22 | |||
| 3b0102c5a8 | |||
| 5d8c49c537 | |||
| b276b6eda9 | |||
| dc502c95b2 | |||
| 1b29e4eae5 | |||
| cb5cf573e5 | |||
| 0f5dd3a722 | |||
| ea4a77dbcc | |||
| 02a6de68b8 | |||
| 1dbb7373c6 | |||
| 4bbd2aa56a | |||
| 169fca23a9 | |||
| f4058b7981 | |||
| baffc7a775 | |||
| 0140d20793 | |||
| 8ced7206f0 | |||
| fa4274f2e3 | |||
| 64d0d211b1 | |||
| aaaa6aa731 | |||
| 47d61bb83a | |||
| d5850afcc2 | |||
| 0c48b0799e | |||
| 1b96dbae3d | |||
| 244e183a2b | |||
| 5cb00a0532 | |||
| 09ce46f46a | |||
| 881a23ec7f | |||
| d53da82ddf | |||
| 177d95128f | |||
| 867a162fcf | |||
| fe0291ef55 | |||
| 1a21ab513d | |||
| 1a275e9501 | |||
| 96a8c33767 | |||
| 084284d1ee | |||
| 13b087e44b | |||
| 22b318f05e | |||
| a575e40859 | |||
| ef044e4937 | |||
| 1e1f8e7ca0 | |||
| 814395b58e | |||
| 5ac5c3c595 | |||
| 64a8daab76 | |||
| 3fb6017976 | |||
| 9379e84ba2 | |||
| 8eaa468b1c | |||
| a1c3e64bf3 | |||
| e90e1bd0c5 | |||
| 30cec00f0e | |||
| 2a0c1a13ad | |||
| 072aa0883b | |||
| 2e22c585d0 | |||
| 3240b19649 | |||
| 2f4b47e456 | |||
| f735c9128c | |||
| 56e8cb0f44 | |||
| d5253f130c | |||
| 261c6f3c7e | |||
| 2ad59e6592 | |||
| f5cf977788 | |||
| d392707ecf | |||
| cbc57fbc0b | |||
| b32a2ded77 | |||
| e7ee9ae747 | |||
| 97acfb6845 | |||
| 709197a957 | |||
| 7d003cdc3b | |||
| c0266a5b84 | |||
| 5b61c71cdd | |||
| 3423b42a8a | |||
| 942124ac67 | |||
| 58d4534176 | |||
| 93517582d1 | |||
| 75c60c2b60 | |||
| 1fbd9cfd50 | |||
| 2e6843fd78 | |||
| c073de4acd | |||
| dcd85c85d0 | |||
| 6e5bfd162a | |||
| b579fa7804 | |||
| f356313e67 | |||
| 4055debc6f | |||
| fcc907c507 | |||
| 8a90a51182 | |||
| 4c42b3090a | |||
| 626d519c81 | |||
| dae3672a9a | |||
| 640bf5515f | |||
| 476fd09397 | |||
| bfbf12914f | |||
| 91eae536ae | |||
| 404becadba | |||
| d71d33d899 | |||
| 65e72da01e | |||
| 8556bebb1f | |||
| dc5c353b8d | |||
| 9f7f877cf2 | |||
| 9a827b783a | |||
| d2641f045e | |||
| e4ef6dc604 | |||
| c8cc9bb188 | |||
| a21dd3d0c0 | |||
| b16d6658f8 | |||
| 01aab808c3 | |||
| eb1ae54739 | |||
| 5483d02a6f | |||
| 9d434eb1e9 | |||
| 43269befd6 | |||
| d8d2b06c6c | |||
| 1f9a2f6554 | |||
| 940162a8b5 | |||
| 3c2b39453a | |||
| 459cd92017 | |||
| a5aa0a773d | |||
| d1b569fbbe | |||
| 6d609f628b | |||
| 8d5eaf0f8d | |||
| de93b439ca | |||
| d11d9ef03c | |||
| f1fc8e1d82 | |||
| 9a44c37cab | |||
| 25a9e5efdf | |||
| 9352193986 | |||
| 61436ca278 | |||
| 17b6fcc48a | |||
| 9f9c5cf27a | |||
| 8fd38fbb40 | |||
| ac2c9fff38 | |||
| 8dc4877379 | |||
| d22a3a3953 | |||
| 182538d2a7 | |||
| 997c0bc297 | |||
| f9099cd680 | |||
| e8b47c33b6 | |||
| 6618fdd86b | |||
| 0b5ef5e257 | |||
| 4f36e6119c | |||
| 24b58d9615 | |||
| 4621c21907 | |||
| a53f6005b3 | |||
| 8bad1b2dfc | |||
| 856ec02083 | |||
| 45c63bdac7 | |||
| a5202b8eb8 | |||
| 766e47a757 | |||
| 0026ef7db7 | |||
| 368c7927ff | |||
| 1dd1ec3a0d | |||
| 6ed5c83b05 | |||
| 3efd1e56c4 | |||
| 1e18c9e309 | |||
| c79048027c | |||
| b2c981fca1 | |||
| 88af4d608d | |||
| 2008b35e8e | |||
| a082714ad5 | |||
| 2f28fde4e6 | |||
| e3004b9db7 | |||
| b192f4f80d | |||
| 809331b9fd | |||
| 3828c8bf89 | |||
| 4731750684 | |||
| 54f2308944 | |||
| afdd44323e | |||
| 9b88d5814c | |||
| 02a924e97d | |||
| e167439ed0 | |||
| 9f26d5a401 | |||
| d7f72470ec | |||
| abc45b1a2f | |||
| 5bc530deb2 | |||
| 6a206b0c5e | |||
| 2485639e11 | |||
| d056c14b91 | |||
| 834a8dd0a8 | |||
| ea5e4d48d3 | |||
| 2b08a8958a | |||
| 759b09c8d6 | |||
| 0266afe9ab | |||
| 109c5e0703 | |||
| 40a79c2cc4 | |||
| debc425f99 | |||
| 602a1cc8a3 | |||
| d080eae809 | |||
| 631b5033fe | |||
| af8ea6934b | |||
| 19740ae6c2 | |||
| 7b78b71487 | |||
| 86a43a79c8 | |||
| 6035a1bde4 | |||
| a32e952323 | |||
| d55b1c67df | |||
| 103f7bc18b | |||
| e857c223d4 | |||
| ea07997522 | |||
| d492c73f94 | |||
| 3b836d29a2 | |||
| 9248916527 | |||
| 2006ebb244 | |||
| 58c852cdba | |||
| 9e77a8e304 | |||
| e9817f1e0d | |||
| 123dde7b8f | |||
| c1b84eabdb | |||
| c7ececde77 | |||
| 6f305d636e | |||
| d25990895c | |||
| d406ced759 | |||
| b858b56120 | |||
| c94fe81dbf | |||
| a67bbebb84 | |||
| cf577c81e1 | |||
| ad236be02c | |||
| 3412e379d6 | |||
| 95f240ab07 | |||
| 0c8ae3f45b | |||
| fe87944049 | |||
| 2cbe290916 | |||
| a85321a1a9 | |||
| c55071d157 | |||
| 86eac774e7 | |||
| dac6df4282 | |||
| d7918b1714 | |||
| c4de84a23a | |||
| c147c29756 | |||
| 5a4a50bc9d | |||
| 55ea4009c9 | |||
| 536fd7dfe4 | |||
| a1f6568b84 | |||
| 6a9112f03c | |||
| 89b4305ccb | |||
| 8643e6a055 | |||
| e2756e85b7 | |||
| 0f7bc36e86 | |||
| 5e20032976 | |||
| c7dbac05a9 | |||
| a0a5adb807 | |||
| ac6a43f6e5 | |||
| 91f57da735 | |||
| 488ac604f9 | |||
| 70ab3e456f | |||
| d0017d2ab8 | |||
| 9633abc09e | |||
| 8f608acc71 | |||
| dbce582bdf | |||
| 62f03bcf11 | |||
| 530eb9ef66 | |||
| 12509eb93a | |||
| 621623bdb6 | |||
| 497a94e3a5 | |||
| a2f5ce797d | |||
| e17082d27e | |||
| 2eefb8e225 | |||
| 5d9b1a1810 | |||
| f274e76253 | |||
| 3bfef7f67b | |||
| 5d6651e00e | |||
| f0ed0b7c41 | |||
| 0d4bf7b6b3 | |||
| a5c7c656e6 | |||
| fb3a937c81 | |||
| e50820abd0 | |||
| 083084136c | |||
| 0188b81220 | |||
| c7468dbfb5 | |||
| d92ba7125e | |||
| 050d5dd063 | |||
| a860c57bd1 | |||
| 1b0b189c16 | |||
| 7d2b3d6663 | |||
| 2899d68973 | |||
| 0cc8238b1a | |||
| f277751d86 | |||
| 74d63a9144 | |||
| 07f7b4e7fb | |||
| 92fda093f7 | |||
| 714751d2d8 | |||
| 2c949192b2 | |||
| c0e3c6a0eb | |||
| 764484f735 | |||
| 208bd4fcb2 | |||
| 6b17825fa2 | |||
| d20e0bd2c2 | |||
| ba53a5fa93 | |||
| 4d40da5661 | |||
| 4ab157e2a1 | |||
| dbf64d2a2b | |||
| 03d4ee3482 | |||
| 959a061380 | |||
| f5432dfb9e | |||
| 6e2f2fb9d2 | |||
| fb494a911d | |||
| bc9dec659c | |||
| b68cc3f61e | |||
| 0db80add2c | |||
| 2a67632497 | |||
| 5260b28c15 | |||
| 4d365cba22 | |||
| 8174a8efc3 | |||
| a5d8df35b6 | |||
| 0ad429ffaa | |||
| 3108572387 | |||
| 98a406ff9e | |||
| 9257550e56 | |||
| ef19ed0a26 | |||
| 80daa8560d | |||
| 797cc16a91 | |||
| 771e0464d7 | |||
| fc00566469 | |||
| 7587860c12 | |||
| fabb5dd003 | |||
| 314da8b50f | |||
| c8fbfcbc24 | |||
| a922961621 | |||
| 2995eb1cac | |||
| 758b732142 | |||
| 50b80f3267 |
@@ -0,0 +1,3 @@
|
||||
.gitattributes export-ignore
|
||||
/Wiki export-ignore
|
||||
.gitignore export-ignore
|
||||
+2
-1
@@ -55,4 +55,5 @@ docs/_build/
|
||||
# pycharm
|
||||
.idea
|
||||
|
||||
icon.psd
|
||||
icon.psd
|
||||
main-icon.psd
|
||||
+511
@@ -1,3 +1,514 @@
|
||||
|
||||
2.5.4.2541
|
||||
|
||||
- core: try retrieving advanced_settings.json from the path given, which may be a file path or a directory
|
||||
- menu: ignore options: fix plugin not responding, fix unicode strings; resolve #509
|
||||
- providers: addic7ed: fix usage/adapt to new show search method
|
||||
- providers: opensubtitles: properly handle responses again, re-enable automatic throttling based on those (broken since XMLRPC handler rewrite)
|
||||
|
||||
|
||||
2.5.4.2527
|
||||
|
||||
- core: bugfixes
|
||||
- core: get_item: don't fail on socket timeout; fixes #498
|
||||
- core: fix scandir encoding errors; #453 #461 #441
|
||||
- core: clamp menu history to 25 items
|
||||
- add UnRAR for aarch64 (untested), arm (armv5tel, untested), linux/i386, MacOSX/i386; fixes #311
|
||||
- add 3rd party licenses
|
||||
- menu: new debounce/history mechanism; fixes the back button usage
|
||||
- config: add custom path option for advanced_settings.json
|
||||
- providers: opensubtitles: re-add support for throttling based on HTTP response codes, which got ditched due to new connection interface
|
||||
- providers: legendastv: disable if unrar wasn't found
|
||||
- providers: addic7ed: reduce show cache to 1 week
|
||||
- advanced settings: sonarr/radarr: make ssl verification optional
|
||||
- advanced settings: opensubtitles: add configurable connection timeout
|
||||
- refiners: drone: use certifi for HTTPS connections
|
||||
- tasks: SearchAllRecentlyAddedMissing: fix ZeroDivisionError in edgecases; fixes #496
|
||||
|
||||
|
||||
2.5.3.2452
|
||||
|
||||
- core: update certifi to 2018.01.18
|
||||
- core: metadata storage: only allow one subtitle per language
|
||||
- core: metadata storage: only parse latest metadata subtitle in localmedia
|
||||
- core: metadata storage: kill existing metadata subtitles explicitly upon storing a new one
|
||||
- core: metadata storage: fix selecting current subtitle from menu
|
||||
- providers: opensubtitles: use new requests based transport by default, finally fixes ResponseNotReady properly
|
||||
- providers: opensubtitles: mask token in logs
|
||||
- providers: don't check for hash validity if it isn't verifiable (fixes napiprojekt, #478)
|
||||
- submod: common: extend non_word_only matching
|
||||
- submod: common: reduce multi spaces to one
|
||||
- submod: OCR: fix III'll=I'll
|
||||
- advanced settings: add option to use HTTP instead of HTTPS for OpenSubtitles
|
||||
|
||||
|
||||
2.5.3.2422
|
||||
|
||||
- core: don't fail on embedded subtitle streams without language code set, fixes #473
|
||||
- providers: catch ResponseNotReady in list_subtitles_provider as well (partly fixes OpenSubtitles)
|
||||
- providers: don't use retry logic in case of ResponseNotReady
|
||||
- providers: addic7ed: use new search endpoint
|
||||
|
||||
|
||||
2.5.3.2414
|
||||
|
||||
- core: expand user agent list
|
||||
- core: update subliminal to 4ad5d31
|
||||
- core: treat 23.976, 23.98, 24.0 fps as equal
|
||||
- core: correctly skip blacklist entries when iterating through currently known subs
|
||||
- core: fix unpacking of packs without asked-for-release-group
|
||||
- core: fix embedded subtitle language detection; add debug log
|
||||
- core: treat embedded subtitle containing "forced" in its title as forced
|
||||
- core: improve embedded subtitles detection
|
||||
- core: store extracted embedded forced subtitles with the "forced" suffix (e.g.: video.en.forced.srt)
|
||||
- core: don't bother trying to extract embedded subtitle if transcoder wasn't found
|
||||
- core: fix automatic extraction of unknown embedded subtitle streams
|
||||
- core: skip immediately searching for new subtitle after successfully extracting embedded
|
||||
- core: extract embedded ASS: don't transcode to SRT using ffmpeg (Plex Transcoder), do the transcoding later using pysubs2; fixes offset issues
|
||||
- core: extract embedded: let ffmpeg auto convert mov_text/tx3g to srt
|
||||
- core: fix transcoder detection; add fallback #460
|
||||
- core: remove LD_LIBRARY_PATH from environment before calling notification executable
|
||||
- core: auto extract embedded subtitles in a separate thread
|
||||
- core: reduce encoding change log spam
|
||||
- core: only allow one automatic extraction at a time; add optional advanced settings "auto_extract_multithread"
|
||||
- core: add minimum score a subtitle has to have when considered by the find better subtitles task, when the current subtitle is an extracted embedded one; add advanced_settings entries
|
||||
- core/config: automatic extraction: add config setting to indicate whether there should be an immediate search for available subtitles after extraction or not (default: off)
|
||||
- core/menu/submod: add reverse_rtl modification for Hebrew; fixes #409
|
||||
- core: scoring: assume title match on tvdb_id match
|
||||
- tasks: search all recently added missing: fix attribute access on missing stored subtitle info
|
||||
- providers: add hosszupuska (hungarian, thanks morpheus133 for the basic implementation)
|
||||
- providers: add argenteam (spanish, thanks mmiraglia for the basic implementation)
|
||||
- providers: addic7ed: use random user agent by default (enforce for existing configs)
|
||||
- providers: enable subscene by default
|
||||
- providers: opensubtitles: add fallback for dict based query response in contrast to list/array based
|
||||
- advanced settings: make text-based-subtitle-formats configurable
|
||||
- menu: submod: inverse-reverse subtitle timing time-choices for better accessibility
|
||||
- submod: reduce log spam in case of debug logs enabled
|
||||
- submod: style tags could result in no output at all
|
||||
- submod: fix empty content if only non-line-mods were used, no line-mods; fixes #449
|
||||
- submod: HI: correctly handle style tags when checking for brackets
|
||||
- submod: HI: don't remove anything that's surrounded by quotes
|
||||
- submod: HI: double or triple dash is em dash
|
||||
- submod: HI: HI_before_colon_noncaps, don't assume single quotes are sentence enders
|
||||
- submod: common: don't uppercase after abbreviations
|
||||
- submod: common: don't break phone numbers (more than one spaced number pair found)
|
||||
- submod: common: also count lines only consisting of dots as removable
|
||||
- submod: common: replace more than 3 consecutive dots with 3 dots
|
||||
- submod: OCR: "H i." = "Hi."
|
||||
|
||||
|
||||
2.5.0.2287
|
||||
|
||||
- core: reduce main icon size
|
||||
- core: fix usage on NVIDIA SHIELD (hopefully, please report back), #441
|
||||
- core: add scandir fallback to listdir in case of badly configured locale in environment, #441, #440
|
||||
- core: get subtitles from archive: don't assume an episode match
|
||||
- core: get subtitles from archive: don't assume any attributes in guess
|
||||
- core: improve release group detection for drone/filebot/file_info refiners
|
||||
- core: fix language detection for embedded subtitle streams
|
||||
- core: support extraction of embedded mov_text subtitles in mp4 video files
|
||||
- refiners: drone: add http:// to url if not given
|
||||
- providers: opensubtitles: retry/reinitialize request when encountering ResponseNotReady
|
||||
- config: clarify subscene being only enabled for TV series by default
|
||||
- menu: when encountering permission errors when scanning media files, warn in the menu about them
|
||||
- submod: common: don't break -- addic7ed --
|
||||
- submod: common: remove lines that consist only of dash, underscore
|
||||
- submod: OCR: fix Ls = Is
|
||||
- submod: OCR: fix bad HI colons (ANNOUNCER; instead of ANNOUNCER:)
|
||||
- submod: common: fix lines consisting only of bad music symbols (*#¶ = ♪)
|
||||
- submod: HI: remove music-symbol-only-lines
|
||||
- submod: HI: be less aggressive about lines ending with a colon; please re-apply all your mods via advanced menu
|
||||
- submod: OCR: fix it'sjust, isn'tjust, Iam, Ican
|
||||
|
||||
|
||||
2.5.0.2247
|
||||
- fix ignoring by-hash-matched episodes
|
||||
|
||||
|
||||
2.5.0.2241
|
||||
|
||||
- fix issue when removing crap from filenames to not accidentally remove release group #436
|
||||
- fix initialization of soft ignore list after upgrade fron 2.0
|
||||
|
||||
|
||||
2.5.0.2221
|
||||
|
||||
- refiners: add support for retrieving original filename from
|
||||
- drone derivates: sonarr, radarr
|
||||
- filebot
|
||||
- symlinks
|
||||
- file_info meta file lists (see wiki)
|
||||
|
||||
- providers: add subscene (disabled by default to not flood subscene on release)
|
||||
- normal search
|
||||
- season pack search if season has concluded
|
||||
|
||||
- core: add provider subtitle-archive/pack cache for retrieving single subtitles from previously downloaded (season-) packs (subscene)
|
||||
- core/agent: massive performance improvements over 2.0
|
||||
- core/agent/background-tasks: reduce memory usage to a fraction of 2.0
|
||||
- core/providers: add dynamic provider throttling when certain events occur (ServiceUnavailable, too many downloads, ...), to lighten the provider-load
|
||||
- core/agent/config: automatically extract embedded subtitles (and use them if no current subtitle)
|
||||
- core: fix internal subtitle info storage issues
|
||||
- core: always store internal subtitle information even if no subtitle was downloaded (fixes SearchAllRecentlyAddedMissing)
|
||||
- core: fix internal subtitle info storage on windows (gzip handling is broken there)
|
||||
- core: don't fail on missing logfile paths
|
||||
- core: fix default encoding order for non-script-serbian
|
||||
- core: improve logging
|
||||
- core: add AsRequested to cleanup garbage names
|
||||
- core: treat SDTV and HDTV the same when searching for subtitles
|
||||
- core: parse_video: trust PMS season and episode numbers
|
||||
- core: parse_video: add series year information from PMS if none found
|
||||
- core: upgrade dependencies
|
||||
- core: update subliminal to 62cdb3c
|
||||
- core: add new file based cache mechanism, rendering DBM/memory backends obsolete
|
||||
- core: treat 23.980 fps as 23.976 and vice-versa
|
||||
- core: add HTTP proxy support for querying the providers (supports credentials)
|
||||
- core: only compute file hashes for enabled providers
|
||||
- core: massive speedup; refine only when needed, exit early otherwise
|
||||
- core: store last modified timestamp in subtitle info storage
|
||||
- core: only write to subtitle info storage if we haven't had one or any subtitle was downloaded
|
||||
- core: only clean up the sub-folder if a subtitle-sub-folder has been selected, and not the parent one also
|
||||
- core: support for CP437 encoded filenames in ZIP-Archives
|
||||
- core: use scandir library instead of os.listdir if possible, reducing performance-impact
|
||||
- core: archives: support multi-episode subtitles (partly)
|
||||
- core: subtitle cleanup: add support for hi, cc, sdh secondary filename tags; don't autoclean .txt
|
||||
- core: increase request timeout by three times in case a proxy is being used
|
||||
- core: fix language=Unknown in Plex when "Restrict to one language"-setting is set
|
||||
- core: refining: re-add old detected title as alternative title after re-refining with plex metadata's title; fixes #428
|
||||
- core: implement advanced_settings.json (see advanced_settings.json.template for reference, copy to "Plug-in Support/Data/com.plexapp.agents.subzero" to use it)
|
||||
- core/tasks: fix search all recently added missing (the total number of items will change in the menu while running), reduces memory usage
|
||||
- core/menu: add support for extracting embedded subtitles using the builtin plex transcoder
|
||||
- core/menu: skip wrong season or episode in returned subtitle results
|
||||
- core/config: fix language handling if treat undefined as first language is set
|
||||
- providers: remove shooter.cn
|
||||
- providers: add support for zip/rar archives containing more than one subtitle file
|
||||
- submod: common: remove redundant interpunction ("Hello !!!" -> "Hello!")
|
||||
- submod: skip provider hashing when applying mods
|
||||
- submod: correctly drop empty line (fixing broken display)
|
||||
- submod: OCR: fix F'xxxxx -> Fxxxxx
|
||||
- submod: HI: improve bracket matching
|
||||
- submod: OCR: fix l/L instead of I more aggressively
|
||||
- submod: common: fix uppercase I's in lowercase words more aggressively
|
||||
- submod: HI: improve HI_before_colon
|
||||
- submod: common: be more aggressive when fixing numbers; correctly space out spaced ellipses; don't break spaced ellipses; handle multiple spaces in numbers
|
||||
- menu: add support for extracting embedded subtitles for a whole season
|
||||
- menu: add reapply mods to current subtitle
|
||||
- menu: pad titles for more submenus, resulting in detail view in PlexWeb
|
||||
- menu: add subtitle selection submenu (if multiple subtitles are inside the subtitle info storage; e.g. previously downloaded ones or extracted embedded)
|
||||
- menu: advanced: add skip findbettersubtitles menu item, which sets the last_run to now (for debugging purposes)
|
||||
- menu: ignore: add more natural title for seasons and episodes (kills your old ignore lists!)
|
||||
- config: skip provider hashing on low impact mode
|
||||
- config: add limit by air date setting to consider for FindBetterSubtitles task (default: 1 year)
|
||||
- advanced settings: define enabled-for media types per provider
|
||||
- advanced settings: define enabled-for languages per provider
|
||||
- advanced settings: add deep-clean option (clean up the subtitle-sub-folder and the parent one)
|
||||
|
||||
|
||||
|
||||
2.0.33.1871
|
||||
- core: normalize line endings in subtitles to LF (\n)
|
||||
- core: add subtitle storage lock to avoid race condition
|
||||
- core: be more verbose about subtitle storage addition
|
||||
- core: fix MPL2 newline parsing, which resulted in broken subtitles
|
||||
- core: encoding change: reduce log spam
|
||||
- submod: common: fix CM_starting_spacedots
|
||||
- opensubtitles: fix request/response handling
|
||||
|
||||
|
||||
|
||||
2.0.33.1849
|
||||
- opensubtitles: add VIP server handling + preference; VIP benefits: 10€/year, ad-free subs, 1000 subs/day, no-cache VIP server, help SZ and subscribe via http://v.ht/osvip
|
||||
- opensubtitles: try to reuse previous token instead of logging in every time
|
||||
- core: add throttling between searches (10 seconds)
|
||||
- core: fix IETF handling for good
|
||||
- core: fix no subtitles being searched in certain situations (when an external subtitle without special tag exists)
|
||||
- core: add subtitle blacklist
|
||||
- core: fixes
|
||||
- core: fix detection of certain PMS media stream language tags ("FR" for example)
|
||||
- core: missing subtitles: correctly skip unwanted subtitle extensions
|
||||
- core: missing subtitles: honor "treat undefined as first language" option correctly
|
||||
- api: add blacklisting endpoints for quickly searching for new subtitls via bookmarklet
|
||||
- submod: colors: apply color mods at the end of processing modifications; fix color mods
|
||||
- submod: new remove_tags modification to remove all styling tags from subtitles
|
||||
- submod: HI: be more aggressive at handling brackets
|
||||
- submod: OCR: update en and hrv
|
||||
- submod: common: remove "torrent downloaded from ..." lines
|
||||
- submod: OCR: fix WholeWord handling, improving modification
|
||||
- submod: apply OCR fixes before HI
|
||||
- submod: OCR: fix broken HI tag colons (ANNOUNCER'. instead of ANNOUNCER:)
|
||||
- menu: advanced: speed up batch modifications
|
||||
- menu: add subtitle blacklist
|
||||
- menu: recently played: show only TV episodes and movies (music tracks were listed here as well)
|
||||
|
||||
|
||||
2.0.29.1767
|
||||
- core: fix internal subtitle storage issues
|
||||
- core: handle "embedded-forced" tag (futureproofing)
|
||||
- core: remove more garbage tags from release groups (nzbgeek, chamele0n, buymore, xpost, postbot)
|
||||
- submod: OCR fix: fix music icon = paragraph
|
||||
|
||||
|
||||
2.0.29.1756
|
||||
- core: don't fail on uppercase file extensions
|
||||
- core: don't re-download a subtitle if we already downloaded one, it still physically exists and external subtitles are configured to be ignored
|
||||
- core: fix VTT subtitle duplication
|
||||
- core: if forced subtitles not explicitly wanted, ignore existing forced subtitles when searching
|
||||
- core: add full IETF language support for `Treat languages with country attribute as ISO 639-1 (e.g. don't download pt-BR if pt subtitle exists)`-setting for embedded subtitles
|
||||
- menu: remove buggy dynamic permission-based channel icon introduced in 1715
|
||||
- menu: improve `Items with missing subtitles` menu usage and item display
|
||||
- menu: `Advanced -> Get my logs` handle custom domains without port
|
||||
- menu: correctly show country/script part of languages with such attributes (e.g. pt-BR)
|
||||
- config: rename `Scan:` settings; make them better understandable and translatable
|
||||
- config: rephrase IETF options as "languages with country attribute" (e.g. pt-BR)
|
||||
- config: separate IETF options into how to display languages with country attribute and how they should be handled when searching/scanning (e.g. pt-BR)
|
||||
- config: `Scheduler: Item age to be considered recent` now can go up to 12 weeks
|
||||
- config: `Scheduler: Periodically search for recent items with missing subtitles` added `every 2 hours`
|
||||
- submod: swe: add Ĺ to Å
|
||||
|
||||
|
||||
2.0.26.1715
|
||||
- core: submod: OCR fixes: swe: replace ĺ with å inside words
|
||||
- core: fix handling of non-existant PMS audio_codec info
|
||||
- core: filename matching ignored the strictness setting in certain global directory configurations (thanks @raduc)
|
||||
- core: don't fail on migration errors
|
||||
- provider titlovi: handle multiple subtitles per archive
|
||||
- provider addic7ed: reset default boost to 19 (was 21)
|
||||
- menu: add warning icon on missing permissions
|
||||
- menu: manual subtitle list sometimes listed duplicates (thanks @andreashoyer)
|
||||
- menu: don't request PMS metadata in item details menu twice
|
||||
- menu: don't fail badly on non existant PMS metadata in item details menu
|
||||
|
||||
|
||||
2.0.26.1695
|
||||
## ATTENTION: THIS RELEASE RESETS YOUR CONFIGURED LANGUAGES TO DEFAULT!
|
||||
- core: fix bug that caused SZ not to work for Windows users with special characters in their username
|
||||
- core: fix issues when logging failed manual download actions
|
||||
- core: update guessit to 2.1.4
|
||||
- core: fix issue causing the background task scheduler to stop after changing preferences
|
||||
- core: fix polish encoding (try windows-1250 first, then iso 8859-2)
|
||||
- core: remove subscenter provider as it now uses captchas
|
||||
- core: add titlovi as default provider (thanks viking!)
|
||||
- core: increase default PMS API request timeout to 15 (old: 10, max: 45); add preference for that
|
||||
- core: re-add separate legacy FindMissingSubtitles task and run it on the first run to prime SZ's internal subtitle storage
|
||||
- core: add "low impact mode" for people with remote filesystems (currently enabled for List LANGUAGE subtitles in detail menu); alleviates certain plexweb timeout issues
|
||||
- menu: change naming of find missing subtitles menu item
|
||||
- legendastv: fix multi value guessit issues
|
||||
- submod: OCR: update eng and hrv OCR replace dictionaries; fix ". L am huge"
|
||||
|
||||
|
||||
2.0.25.1635
|
||||
- core: update memory handling, possibly reduce memory problems of 2.0
|
||||
- core: support for MPL2 subtitle format
|
||||
- core: update task handling
|
||||
- core: re-enable NVIDIA SHIELD support by fixing rarfile behaviour
|
||||
- core: add SZ_UNRAR_TOOL environment variable for custom unrar location
|
||||
- core: disable SZ when no providers are enabled
|
||||
- core: only start activity monitor if channel or agent are enabled
|
||||
- core: improve custom provider integration
|
||||
- core: update eastern european encoding detection (especially Romanian)
|
||||
- tasks: reduce provider stress by introducing wait times between searches/downloads
|
||||
- windows: correctly ship UnRAR.exe
|
||||
- windows: skip DBM checks
|
||||
- addic7ed: fix Nip/Tuck
|
||||
- subscenter: use new domain
|
||||
|
||||
|
||||
2.0.24.1581
|
||||
- legendastv: ship unrar.exe for Windows users (fixes unrar issues)
|
||||
- addic7ed: fix TooManyRequests error
|
||||
- submod: OCR fixes NL: add custom dictionary data for malformed characters
|
||||
- submod: OCR fixes: update hrv/NL dictionaries
|
||||
- submod: common: remove spaces before punctuation
|
||||
- podnapisi: now returns more subtitles again
|
||||
ATTENTION: Sub-Zero is still broken on PMS for SHIELD. Help needed!
|
||||
|
||||
|
||||
2.0.24.1565
|
||||
- core: fix searchallrecentlymissing task erroring if item not found
|
||||
- core: fix non-plex-items appearing in and crashing the recently played list
|
||||
- core: add hybrid-plus activity setting (current media file and next episode)
|
||||
- podnapisi: fix by using correct guessit parameters
|
||||
|
||||
|
||||
2.0.24.1558
|
||||
- core: fix handling of broken RAR files from legendas
|
||||
|
||||
|
||||
2.0.24.1555
|
||||
- core: fix rare microdvd issue from OpenSubtitles by generally providing FPS info when encountering a microdvd subtitle
|
||||
|
||||
|
||||
2.0.24.1549
|
||||
Changes from 1.4
|
||||
- wiki: new wiki! (thanks @dane22!)
|
||||
- core: update subliminal to version 2
|
||||
- core: update all dependencies
|
||||
- core: add new providers: legendastv (pt-BR), napiprojekt (pl), shooter (cn), subscenter (heb)
|
||||
- core: rewritten all subliminal patches for version 2
|
||||
- core: use SSL again for opensubtitles
|
||||
- core: improved matching due to subliminal 2 (and SZ custom) tvdb/omdb refiners
|
||||
- core: improved matching by relying on existing metadata provided by the PMS
|
||||
- core: improved performance due to multithreaded provider-querying
|
||||
- core: improved performance due to less physical media file access (no more MKV metadata scanning)
|
||||
- core: VTT subtitle format output supported (for Chromecast)
|
||||
- core: rewrote and streamlined internal subtitle data storage format
|
||||
- core: support Cyrillic and Latin variants of Serbian language
|
||||
- core: simplified (custom) provider registration; add own provider registry
|
||||
- core: rewrote recently added missing task
|
||||
- core: automatically fix badly (re-) encoded unicode entities in subtitles
|
||||
- core: always store subtitles in proper UTF-8 encoding
|
||||
- core: add periodic internal subtitle data storage cleanup task
|
||||
- core: on non-windows systems, utilize a file-based cache database for provider media lists and subliminal refiner results
|
||||
- core: add manual and automatic subtitle modification framework (fix common OCR issues, remove hearing impaired etc.)
|
||||
- core: relieve some stress on providers by providing better fine-grained retry handling
|
||||
- menu: add icons for menu items; update main channel icon
|
||||
- menu: add subtitle modifications (subtitle content fixes, offset-based shifting, framerate conversion)
|
||||
- menu: add recently played menu
|
||||
- menu: add "Get my logs" function to the advanced menu, which zips up all necessary logs suitable for posting in the forums
|
||||
- menu: add generic "back to season" and "back to series" entries to item detail views to make navigation easier
|
||||
- config: all scores changed (defaults updated)
|
||||
- config: remove "Force UTF-8 when storing subtitles" (it's now always implied)
|
||||
- improve almost everything Sub-Zero did in 1.4 :)
|
||||
|
||||
|
||||
2.0.23.1464 RC10.1
|
||||
- core: huge bugfix; please check `Library/Application Support/Plex Media\ Server/Plug-in Support/Data/com.plexapp.agents.subzero/DataItems`
|
||||
for any `subs_XXXXX.json.gz` file bigger than 500kb and delete them
|
||||
|
||||
|
||||
2.0.23.1456 RC10
|
||||
- core: findBetterSubtitles: increase series cutoff by 2 (resolution match)
|
||||
- core: add VTT format
|
||||
- core: fix crashes regarding DBM/cache management
|
||||
- core: update rarfile.py
|
||||
- core: add missing encodings
|
||||
- core: full support for Serbian subtitles (Cyrillic and Latin)
|
||||
- podnapisi: fix pt-BR, srp-cyrl and srp-latn
|
||||
- core: implement own provider registry and ditch the subliminal one
|
||||
- core: use ftfy library to fix re-encoding errors inside subtitles introduced by the subtitle author
|
||||
- core: always store and save subtitles normalized to UTF-8
|
||||
- core: replace spaced dashes in movie/series names before re-refining with plex metadata info
|
||||
- submod: remove_HI: handle multiline brackets correctly
|
||||
|
||||
|
||||
2.0.20.1364 RC9
|
||||
- core: performance improvements
|
||||
- core: if info couldn't be guessed from the filename, fill missing info from PMS #270
|
||||
- submod: OCR: add more to the eng dictionary
|
||||
- submod: HI: fixed some issues with font style tags
|
||||
- core: don't ignore subtitles from providers that don't have hearing impaired info, when hearing impaired mode is set to "force non-HI"
|
||||
- legendastv/menu: fix manual subtitle selection issues in menu
|
||||
- core: improve specials matching on OpenSubtitles
|
||||
- core: update guessit
|
||||
|
||||
|
||||
2.0.19.1337 RC8
|
||||
- napiprojekt: fixed: couldn't convert microdvd to SRT in certain occasions
|
||||
- core: when normalize to UTF-8 is enabled, also store the subtitle in UTF-8 encoding in the internal storage
|
||||
- core: add more encodings for western/eastern/northern europe
|
||||
- submod: OCR: update dictionaries from SubtitleEdit
|
||||
- submod: common: be smarter about uppercase i's in words that should have lowercase L's
|
||||
- submod: fix unopened/unclosed font style tags after modification
|
||||
- core: re-enable OMDB support
|
||||
- core: update guessit for better matching
|
||||
- core: fix SearchAllRecentlyMissing (was broken since RC3)
|
||||
|
||||
|
||||
2.0.19.1299 RC7
|
||||
- submod: offset mods now get merged internally when applied multiple times (to avoid errors and increase performance)
|
||||
- submod: improve performance
|
||||
- submod: core mods (OCR, common, remove_HI) now are always applied in a fixed order internally, regardless of the order they were added in
|
||||
- submod: CM_spaces_in_numbers: don't break up ellipses (30... 29... 28...)
|
||||
- submod: CM_spaces_in_numbers: don't fix countdown numbers (30, 29, 28)
|
||||
- submod: remove_HI: make bracket removal more aggressive
|
||||
- submod: remove_HI: be less aggressive when removing text-before-colon
|
||||
- submod: remove_HI: remove all-uppercase-before-sentence (THIS IS ALL UPPERCASE And here starts a sentence -> And here starts a sentence)
|
||||
- submod: fix all character ranges to include non-ASCII characters
|
||||
- add new README for 2.0
|
||||
|
||||
|
||||
2.0.19.1267 RC6
|
||||
- core: add new SZ subtitle storage format
|
||||
- smaller data files and less cumbersome
|
||||
- it will auto migrate when old data is accessed - to speed this up, use "Trigger subtitle storage migration (expensive)" in advanced menu)
|
||||
- core: performance optimizations
|
||||
- addic7ed: when release group matches, assume the format matches, too (leftover change from RC5)
|
||||
- submod: fix patterns for beginlines/endlines
|
||||
- submod: add our own dictionaries to OCR fixes (english)
|
||||
- submod: hearing impaired: also remove full-caps with punctuation inside
|
||||
- submod: correctly handle partiallines
|
||||
- submod: in numbers with spaces (incorrect), also allow for some punctuation (,.:')
|
||||
|
||||
|
||||
2.0.18.1245 RC5
|
||||
- core: add more debug info
|
||||
- core: fix subtitle modifications (was broken in RC4, created non-usable subtitles)
|
||||
- submod: add ANSI colors
|
||||
- menu/submod: add color mod menu
|
||||
- submod: exclusive mods now are mutually exclusive and get cleaned on duplicate
|
||||
- menu/core: naming
|
||||
|
||||
For everyone who runs RC4: your subtitles are broken. Go to the advanced menu and trigger `Re-Apply mods of all stored subtitles` to fix them.
|
||||
|
||||
|
||||
2.0.17.1234 RC4
|
||||
- core: backport provider-download-retry implementation
|
||||
- core: implement custom user agent (for OpenSubtitles)
|
||||
- core/menu: correct handling of media with multiple files
|
||||
- core: fix SearchAllRecentlyMissing; also wait 5 seconds between searches
|
||||
- core: SearchAllRecentlyMissing: honor physical ignores
|
||||
- submod: pattern fixes
|
||||
- submod: better unicode handling
|
||||
- submod: add color mod (only automatic by now)
|
||||
|
||||
|
||||
2.0.15.1216 RC3
|
||||
- core: fixes
|
||||
- scheduler: revert some of the aggressive changes in RC2
|
||||
- submod: be smarter about WholeLine matches
|
||||
|
||||
|
||||
2.0.15.1209 RC2
|
||||
- core: fixes
|
||||
- core: submod-common: fix multiple dots at start of line
|
||||
- core/menu: add subtitle modification debug setting
|
||||
- core/menu: when manually listing available subtitles in menu, display those with wrong FPS also (opensubtitles), because you can fix them later
|
||||
- core/menu: advanced-menu: add apply-all-default-mods menu item; add re-apply all mods menu item
|
||||
- core: always look for currently (not-) existing subtitles when called; hopefully fixes #276
|
||||
- scheduler/menu: be faster; also launch scheduled tasks in threads, not just manually launched ones
|
||||
- core: don't delete subtitles with .custom or .embedded in their filenames when running auto cleanup, if the correct media file exists
|
||||
- menu: add back-to-previous menu items
|
||||
|
||||
|
||||
2.0.12.1180 RC1
|
||||
- core: update subliminal to version 2
|
||||
- core: update all dependencies
|
||||
- core: add new providers: legendastv (pt-BR), napiprojekt (pl), shooter (cn), subscenter (heb)
|
||||
- core: rewritten all subliminal patches for version 2
|
||||
- menu: add icons for menu items; update main channel icon
|
||||
- core: use SSL again for opensubtitles
|
||||
- core: improved matching due to subliminal 2 (and SZ custom) tvdb/omdb refiners
|
||||
- menu: add "Get my logs" function to the advanced menu, which zips up all necessary logs suitable for posting in the forums
|
||||
- core: on non-windows systems, utilize a file-based cache database for provider media lists and subliminal refiner results
|
||||
- core: add manual and automatic subtitle modification framework (fix common OCR issues, remove hearing impaired etc.)
|
||||
- menu: add subtitle modifications (subtitle content fixes, offset-based shifting, framerate conversion)
|
||||
- menu: add recently played menu
|
||||
- improve almost everything Sub-Zero did in 1.4 :)
|
||||
|
||||
|
||||
1.4.27.973
|
||||
- core: ignore "obfuscated" and "scrambled" tags in filenames when searching for subtitles
|
||||
- core: exotic embedded subtitles are now also considered when searching (and when the option is enabled); fixes #264
|
||||
|
||||
|
||||
1.4.27.967
|
||||
- core: remember the last 10 played items; only consider on_playback for "playing" state within the first 60 seconds of an item
|
||||
|
||||
|
||||
1.4.27.965
|
||||
- core: on_playback activity bugfixes
|
||||
|
||||
|
||||
1.4.27.957
|
||||
- core: correctly fall back to the next best subtitle if the current one couldn't be downloaded; hopefully fixes #231
|
||||
- core: add "Scan: which external subtitles should be picked up?"-setting
|
||||
|
||||
+131
-63
@@ -1,13 +1,11 @@
|
||||
# coding=utf-8
|
||||
import sys
|
||||
import datetime
|
||||
import os
|
||||
|
||||
from subliminal_patch import compute_score
|
||||
from subzero.sandbox import restore_builtins
|
||||
from subzero.sandbox import fix_environment_stuff
|
||||
|
||||
module = sys.modules['__main__']
|
||||
restore_builtins(module, {})
|
||||
fix_environment_stuff(module, {})
|
||||
|
||||
globals = getattr(module, "__builtins__")["globals"]
|
||||
for key, value in getattr(module, "__builtins__").iteritems():
|
||||
@@ -18,7 +16,6 @@ import logger
|
||||
|
||||
sys.modules["logger"] = logger
|
||||
|
||||
import subliminal
|
||||
import support
|
||||
|
||||
import interface
|
||||
@@ -26,9 +23,9 @@ sys.modules["interface"] = interface
|
||||
|
||||
from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
|
||||
from interface.menu import *
|
||||
from support.plex_media import media_to_videos, get_media_item_ids, scan_videos
|
||||
from support.subtitlehelpers import get_subtitles_from_metadata
|
||||
from support.storage import whack_missing_parts, save_subtitles
|
||||
from support.plex_media import media_to_videos, get_media_item_ids
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles, store_subtitle_info, get_subtitle_storage
|
||||
from support.items import is_ignored
|
||||
from support.config import config
|
||||
from support.lib import get_intent
|
||||
@@ -36,6 +33,7 @@ from support.helpers import track_usage, get_title_for_video_metadata, get_ident
|
||||
from support.history import get_history
|
||||
from support.data import dispatch_migrate
|
||||
from support.activities import activity
|
||||
from support.download import download_best_subtitles
|
||||
|
||||
|
||||
def Start():
|
||||
@@ -48,12 +46,17 @@ def Start():
|
||||
intent = get_intent()
|
||||
intent.cleanup()
|
||||
|
||||
#Locale.DefaultLocale = "de"
|
||||
|
||||
# clear expired menu history items
|
||||
now = datetime.datetime.now()
|
||||
if "menu_history" in Dict:
|
||||
for key, timeout in Dict["menu_history"].items():
|
||||
for key, timeout in Dict["menu_history"].copy().items():
|
||||
if now > timeout:
|
||||
del Dict["menu_history"][key]
|
||||
try:
|
||||
del Dict["menu_history"][key]
|
||||
except:
|
||||
pass
|
||||
|
||||
# run migrations
|
||||
if "subs" in Dict or "history" in Dict:
|
||||
@@ -75,7 +78,8 @@ def Start():
|
||||
scheduler.run()
|
||||
|
||||
# bind activities
|
||||
Thread.Create(activity.start)
|
||||
if config.enable_channel:
|
||||
Thread.Create(activity.start)
|
||||
|
||||
if "anon_id" not in Dict:
|
||||
Dict["anon_id"] = get_identifier()
|
||||
@@ -89,45 +93,6 @@ def Start():
|
||||
track_usage("General", "plugin", "start", config.version)
|
||||
|
||||
|
||||
def download_best_subtitles(video_part_map, min_score=0):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
languages = config.lang_list
|
||||
if not languages:
|
||||
return
|
||||
|
||||
missing_languages = False
|
||||
for video, part in video_part_map.iteritems():
|
||||
if not Prefs['subtitles.save.filesystem']:
|
||||
# scan for existing metadata subtitles
|
||||
meta_subs = get_subtitles_from_metadata(part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
if subList:
|
||||
video.subtitle_languages.add(language)
|
||||
Log.Debug("Found metadata subtitle %s for %s", language, video)
|
||||
|
||||
missing_subs = (languages - video.subtitle_languages)
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
|
||||
if not missing_subs or found_one_which_is_enough:
|
||||
if found_one_which_is_enough:
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
continue
|
||||
missing_languages = True
|
||||
break
|
||||
|
||||
if missing_languages:
|
||||
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s" % (min_score, hearing_impaired))
|
||||
|
||||
return subliminal.download_best_subtitles(video_part_map.keys(), languages, min_score, hearing_impaired, providers=config.providers,
|
||||
provider_configs=config.provider_settings, pool_class=config.provider_pool,
|
||||
compute_score=compute_score)
|
||||
Log.Debug("All languages for all requested videos exist. Doing nothing.")
|
||||
|
||||
|
||||
def update_local_media(metadata, media, media_type="movies"):
|
||||
# Look for subtitles
|
||||
if media_type == "movies":
|
||||
@@ -151,12 +116,52 @@ def update_local_media(metadata, media, media_type="movies"):
|
||||
pass
|
||||
|
||||
|
||||
def agent_extract_embedded(video_part_map):
|
||||
try:
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
to_extract = []
|
||||
item_count = 0
|
||||
|
||||
for scanned_video, part_info in video_part_map.iteritems():
|
||||
plexapi_item = scanned_video.plexapi_metadata["item"]
|
||||
stored_subs = subtitle_storage.load_or_new(plexapi_item)
|
||||
|
||||
for plexapi_part in get_all_parts(plexapi_item):
|
||||
item_count = item_count + 1
|
||||
for requested_language in config.lang_list:
|
||||
embedded_subs = stored_subs.get_by_provider(plexapi_part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(plexapi_part.id, requested_language)
|
||||
if not embedded_subs:
|
||||
stream_data = get_embedded_subtitle_streams(plexapi_part, requested_language=requested_language,
|
||||
get_forced=config.forced_only)
|
||||
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
to_extract.append(({scanned_video: part_info}, plexapi_part, str(stream.index),
|
||||
str(requested_language), not current))
|
||||
|
||||
if not cast_bool(Prefs["subtitles.search_after_autoextract"]):
|
||||
scanned_video.subtitle_languages.update({requested_language})
|
||||
else:
|
||||
Log.Debug("Skipping embedded subtitle extraction for %s, already got %r from %s",
|
||||
plexapi_item.rating_key, requested_language, embedded_subs[0].id)
|
||||
if to_extract:
|
||||
Log.Info("Triggering extraction of %d embedded subtitles of %d items", len(to_extract), item_count)
|
||||
Thread.Create(multi_extract_embedded, stream_list=to_extract, refresh=True, with_mods=True,
|
||||
single_thread=not config.advanced.auto_extract_multithread)
|
||||
except:
|
||||
Log.Error("Something went wrong when auto-extracting subtitles, continuing: %s", traceback.format_exc())
|
||||
|
||||
|
||||
class SubZeroAgent(object):
|
||||
agent_type = None
|
||||
agent_type_verbose = None
|
||||
languages = [Locale.Language.English]
|
||||
primary_provider = False
|
||||
score_prefs_key = None
|
||||
debounce = 10
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(SubZeroAgent, self).__init__(*args, **kwargs)
|
||||
@@ -167,6 +172,9 @@ class SubZeroAgent(object):
|
||||
Log.Debug("Sub-Zero %s, %s search" % (config.version, self.agent_type))
|
||||
results.Append(MetadataSearchResult(id='null', score=100))
|
||||
|
||||
def store_blank_subtitle_metadata(self, video_part_map):
|
||||
store_subtitle_info(video_part_map, dict((k, []) for k in video_part_map.keys()), None, mode="a")
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
if not config.enable_agent:
|
||||
Log.Debug("Skipping Sub-Zero agent(s)")
|
||||
@@ -208,25 +216,81 @@ class SubZeroAgent(object):
|
||||
set_refresh_menu_state(media, media_type=self.agent_type)
|
||||
|
||||
# scanned_video_part_map = {subliminal.Video: plex_part, ...}
|
||||
scanned_video_part_map = scan_videos(videos, kind=self.agent_type)
|
||||
providers = config.get_providers(media_type=self.agent_type)
|
||||
try:
|
||||
scanned_video_part_map = scan_videos(videos, providers=providers)
|
||||
except IOError, e:
|
||||
Log.Exception("Permission error, please check your folder/file permissions. Exiting.")
|
||||
if cast_bool(Prefs["check_permissions"]):
|
||||
config.permissions_ok = False
|
||||
config.missing_permissions = e.message
|
||||
return
|
||||
|
||||
# auto extract embedded
|
||||
if config.embedded_auto_extract:
|
||||
if config.plex_transcoder:
|
||||
agent_extract_embedded(scanned_video_part_map)
|
||||
else:
|
||||
Log.Warning("Plex Transcoder not found, can't auto extract")
|
||||
|
||||
# clear missing subtitles menu data
|
||||
if not scheduler.is_task_running("MissingSubtitles"):
|
||||
scheduler.clear_task_data("MissingSubtitles")
|
||||
|
||||
downloaded_subtitles = None
|
||||
|
||||
# debounce for self.debounce seconds
|
||||
now = datetime.datetime.now()
|
||||
if "last_call" in Dict:
|
||||
last_call = Dict["last_call"]
|
||||
if last_call + datetime.timedelta(seconds=self.debounce) > now:
|
||||
wait = self.debounce - (now - last_call).seconds
|
||||
if wait >= 1:
|
||||
Log.Debug("Waiting %s seconds until continuing", wait)
|
||||
Thread.Sleep(wait)
|
||||
|
||||
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
|
||||
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score)
|
||||
try:
|
||||
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score,
|
||||
throttle_time=self.debounce, providers=providers)
|
||||
except:
|
||||
Log.Exception("Something went wrong when downloading subtitles")
|
||||
|
||||
if downloaded_subtitles is not None:
|
||||
Dict["last_call"] = datetime.datetime.now()
|
||||
|
||||
item_ids = get_media_item_ids(media, kind=self.agent_type)
|
||||
|
||||
whack_missing_parts(scanned_video_part_map)
|
||||
|
||||
downloaded_any = False
|
||||
if downloaded_subtitles:
|
||||
save_subtitles(scanned_video_part_map, downloaded_subtitles, mods=config.default_mods)
|
||||
downloaded_any = any(downloaded_subtitles.values())
|
||||
|
||||
if downloaded_any:
|
||||
save_successful = False
|
||||
try:
|
||||
save_successful = save_subtitles(scanned_video_part_map, downloaded_subtitles,
|
||||
mods=config.default_mods)
|
||||
except:
|
||||
Log.Exception("Something went wrong when saving subtitles")
|
||||
|
||||
track_usage("Subtitle", "refreshed", "download", 1)
|
||||
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
# store item(s) in history
|
||||
for subtitle in video_subtitles:
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle)
|
||||
# store SZ meta info even if download wasn't successful
|
||||
if not save_successful:
|
||||
self.store_blank_subtitle_metadata(scanned_video_part_map)
|
||||
|
||||
else:
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
# store item(s) in history
|
||||
for subtitle in video_subtitles:
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle)
|
||||
history.destroy()
|
||||
else:
|
||||
# store SZ meta info even if we've downloaded none
|
||||
self.store_blank_subtitle_metadata(scanned_video_part_map)
|
||||
|
||||
update_local_media(metadata, media, media_type=self.agent_type)
|
||||
|
||||
@@ -236,13 +300,17 @@ class SubZeroAgent(object):
|
||||
|
||||
# notify any running tasks about our finished update
|
||||
for item_id in item_ids:
|
||||
scheduler.signal("updated_metadata", item_id)
|
||||
#scheduler.signal("updated_metadata", item_id)
|
||||
|
||||
# resolve existing intent for that id
|
||||
intent.resolve("force", item_id)
|
||||
|
||||
Dict.Save()
|
||||
|
||||
# fsync cache
|
||||
if config.new_style_cache:
|
||||
config.sync_cache()
|
||||
|
||||
|
||||
class SubZeroSubtitlesAgentMovies(SubZeroAgent, Agent.Movies):
|
||||
contributes_to = ['com.plexapp.agents.imdb', 'com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.themoviedb', 'com.plexapp.agents.hama']
|
||||
|
||||
@@ -3,15 +3,16 @@ import datetime
|
||||
import StringIO
|
||||
import glob
|
||||
import os
|
||||
import traceback
|
||||
import urlparse
|
||||
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
|
||||
from subzero.lib.io import FileIO
|
||||
from subzero.constants import PREFIX, PLUGIN_IDENTIFIER
|
||||
from menu_helpers import SubFolderObjectContainer, debounce, set_refresh_menu_state, ZipObject, ObjectContainer
|
||||
from menu_helpers import SubFolderObjectContainer, debounce, set_refresh_menu_state, ZipObject, ObjectContainer, route
|
||||
from main import fatality
|
||||
from support.helpers import timestamp, pad_title
|
||||
from support.config import config
|
||||
@@ -19,70 +20,98 @@ from support.lib import Plex
|
||||
from support.storage import reset_storage, log_storage, get_subtitle_storage
|
||||
from support.scheduler import scheduler
|
||||
from support.items import set_mods_for_part, get_item_kind_from_rating_key
|
||||
from support.i18n import _
|
||||
|
||||
|
||||
@route(PREFIX + '/advanced')
|
||||
def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
oc = SubFolderObjectContainer(header=header or "Internal stuff, pay attention!", message=message, no_cache=True,
|
||||
no_history=True,
|
||||
replace_parent=False, title2="Advanced")
|
||||
oc = SubFolderObjectContainer(
|
||||
header=header or _("Internal stuff, pay attention!"),
|
||||
message=message,
|
||||
no_cache=True,
|
||||
no_history=True,
|
||||
replace_parent=False,
|
||||
title2=_("Advanced"))
|
||||
|
||||
if config.lock_advanced_menu and not config.pin_correct:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), success_go_to="advanced"),
|
||||
title=pad_title("Enter PIN"),
|
||||
summary="The owner has restricted the access to this menu. Please enter the correct pin",
|
||||
key=Callback(
|
||||
PinMenu,
|
||||
randomize=timestamp(),
|
||||
success_go_to=_("advanced")),
|
||||
title=pad_title(_("Enter PIN")),
|
||||
summary=_("The owner has restricted the access to this menu. Please enter the correct pin"),
|
||||
))
|
||||
return oc
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerRestart, randomize=timestamp()),
|
||||
title=pad_title("Restart the plugin"),
|
||||
title=pad_title(_("Restart the plugin")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(GetLogsLink),
|
||||
title="Get my logs (copy the appearing link and open it in your browser, please)",
|
||||
summary="Copy the appearing link and open it in your browser, please",
|
||||
title=_("Get my logs (copy the appearing link and open it in your browser, please)"),
|
||||
summary=_("Copy the appearing link and open it in your browser, please"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerBetterSubtitles, randomize=timestamp()),
|
||||
title=pad_title("Trigger find better subtitles"),
|
||||
title=pad_title(_("Trigger find better subtitles")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SkipFindBetterSubtitles, randomize=timestamp()),
|
||||
title=pad_title(_("Skip next find better subtitles (sets last run to now)")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerStorageMaintenance, randomize=timestamp()),
|
||||
title=pad_title("Trigger subtitle storage maintenance"),
|
||||
title=pad_title(_("Trigger subtitle storage maintenance")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerStorageMigration, randomize=timestamp()),
|
||||
title=pad_title("Trigger subtitle storage migration (expensive)"),
|
||||
title=pad_title(_("Trigger subtitle storage migration (expensive)")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerCacheMaintenance, randomize=timestamp()),
|
||||
title=pad_title(_("Trigger cache maintenance (refiners, providers and packs/archives)")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ApplyDefaultMods, randomize=timestamp()),
|
||||
title=pad_title("Apply configured default subtitle mods to all (active) stored subtitles"),
|
||||
title=pad_title(_("Apply configured default subtitle mods to all (active) stored subtitles")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ReApplyMods, randomize=timestamp()),
|
||||
title=pad_title("Re-Apply mods of all stored subtitles"),
|
||||
title=pad_title(_("Re-Apply mods of all stored subtitles")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key="tasks", randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's scheduled tasks state storage"),
|
||||
title=pad_title(_("Log the plugin's scheduled tasks state storage")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key="ignore", randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's internal ignorelist storage"),
|
||||
title=pad_title(_("Log the plugin's internal ignorelist storage")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key=None, randomize=timestamp()),
|
||||
title=pad_title(_("Log the plugin's complete state storage")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="tasks", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's scheduled tasks state storage"),
|
||||
title=pad_title(_("Reset the plugin's scheduled tasks state storage")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="ignore", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's internal ignorelist storage"),
|
||||
title=pad_title(_("Reset the plugin's internal ignorelist storage")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="menu_history", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's menu history storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(InvalidateCache, randomize=timestamp()),
|
||||
title=pad_title("Invalidate Sub-Zero metadata caches (subliminal)"),
|
||||
title=pad_title(_("Invalidate Sub-Zero metadata caches (subliminal)")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetProviderThrottle, randomize=timestamp()),
|
||||
title=pad_title(_("Reset provider throttle states")),
|
||||
))
|
||||
return oc
|
||||
|
||||
@@ -94,15 +123,20 @@ def DispatchRestart():
|
||||
@route(PREFIX + '/advanced/restart/trigger')
|
||||
@debounce
|
||||
def TriggerRestart(randomize=None):
|
||||
set_refresh_menu_state("Restarting the plugin")
|
||||
set_refresh_menu_state(_("Restarting the plugin"))
|
||||
DispatchRestart()
|
||||
return fatality(header="Restart triggered, please wait about 5 seconds", force_title=" ", only_refresh=True,
|
||||
replace_parent=True,
|
||||
no_history=True, randomize=timestamp())
|
||||
return fatality(
|
||||
header=_("Restart triggered, please wait about 5 seconds"),
|
||||
force_title=" ",
|
||||
only_refresh=True,
|
||||
replace_parent=True,
|
||||
no_history=True,
|
||||
randomize=timestamp())
|
||||
|
||||
|
||||
@route(PREFIX + '/advanced/restart/execute')
|
||||
def Restart():
|
||||
@debounce
|
||||
def Restart(randomize=None):
|
||||
Plex[":/plugins"].restart(PLUGIN_IDENTIFIER)
|
||||
|
||||
|
||||
@@ -110,10 +144,17 @@ def Restart():
|
||||
@debounce
|
||||
def ResetStorage(key, randomize=None, sure=False):
|
||||
if not sure:
|
||||
oc = SubFolderObjectContainer(no_history=True, title1="Reset subtitle storage", title2="Are you sure?")
|
||||
oc = SubFolderObjectContainer(
|
||||
no_history=True,
|
||||
title1=_("Reset subtitle storage"),
|
||||
title2=_("Are you sure?"))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key=key, sure=True, randomize=timestamp()),
|
||||
title=pad_title("Are you really sure?"),
|
||||
key=Callback(
|
||||
ResetStorage,
|
||||
key=key,
|
||||
sure=True,
|
||||
randomize=timestamp()),
|
||||
title=pad_title(_("Are you really sure?")),
|
||||
|
||||
))
|
||||
return oc
|
||||
@@ -127,8 +168,8 @@ def ResetStorage(key, randomize=None, sure=False):
|
||||
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Information Storage (%s) reset' % key
|
||||
header=_("Success"),
|
||||
message=_("Information Storage (%s) reset", key)
|
||||
)
|
||||
|
||||
|
||||
@@ -137,8 +178,8 @@ def LogStorage(key, randomize=None):
|
||||
log_storage(key)
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Information Storage (%s) logged' % key
|
||||
header=_("Success"),
|
||||
message=_("Information Storage (%s) logged", key)
|
||||
)
|
||||
|
||||
|
||||
@@ -148,8 +189,21 @@ def TriggerBetterSubtitles(randomize=None):
|
||||
scheduler.dispatch_task("FindBetterSubtitles")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='FindBetterSubtitles triggered'
|
||||
header=_("Success"),
|
||||
message=_("FindBetterSubtitles triggered")
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/skipbetter')
|
||||
@debounce
|
||||
def SkipFindBetterSubtitles(randomize=None):
|
||||
task = scheduler.task("FindBetterSubtitles")
|
||||
task.last_run = datetime.datetime.now()
|
||||
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header=_("Success"),
|
||||
message=_("FindBetterSubtitles skipped")
|
||||
)
|
||||
|
||||
|
||||
@@ -159,8 +213,8 @@ def TriggerStorageMaintenance(randomize=None):
|
||||
scheduler.dispatch_task("SubtitleStorageMaintenance")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='SubtitleStorageMaintenance triggered'
|
||||
header=_("Success"),
|
||||
message=_("SubtitleStorageMaintenance triggered")
|
||||
)
|
||||
|
||||
|
||||
@@ -170,8 +224,19 @@ def TriggerStorageMigration(randomize=None):
|
||||
scheduler.dispatch_task("MigrateSubtitleStorage")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='MigrateSubtitleStorage triggered'
|
||||
header=_("Success"),
|
||||
message=_("MigrateSubtitleStorage triggered")
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/triggercachemaintenance')
|
||||
@debounce
|
||||
def TriggerCacheMaintenance(randomize=None):
|
||||
scheduler.dispatch_task("CacheMaintenance")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header=_("Success"),
|
||||
message=_("TriggerCacheMaintenance triggered")
|
||||
)
|
||||
|
||||
|
||||
@@ -206,8 +271,14 @@ def apply_default_mods(reapply_current=False):
|
||||
continue
|
||||
add_mods = []
|
||||
|
||||
set_mods_for_part(video_id, part_id, Language.fromietf(lang), item_type, add_mods, mode="add")
|
||||
try:
|
||||
set_mods_for_part(video_id, part_id, Language.fromietf(lang), item_type, add_mods, mode="add")
|
||||
except:
|
||||
Log.Error("Couldn't set mods for %s:%s: %s", video_id, part_id, traceback.format_exc())
|
||||
continue
|
||||
|
||||
subs_applied += 1
|
||||
storage.destroy()
|
||||
Log.Debug("Applied mods to %i items" % subs_applied)
|
||||
|
||||
|
||||
@@ -217,8 +288,8 @@ def ApplyDefaultMods(randomize=None):
|
||||
Thread.CreateTimer(1.0, apply_default_mods)
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='This may take some time ...'
|
||||
header=_("Success"),
|
||||
message=_("This may take some time ...")
|
||||
)
|
||||
|
||||
|
||||
@@ -228,17 +299,20 @@ def ReApplyMods(randomize=None):
|
||||
Thread.CreateTimer(1.0, apply_default_mods, reapply_current=True)
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='This may take some time ...'
|
||||
header=_("Success"),
|
||||
message=_("This may take some time ...")
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/get_logs_link')
|
||||
def GetLogsLink():
|
||||
if not config.plex_token:
|
||||
oc = ObjectContainer(title2="Download Logs", no_cache=True, no_history=True,
|
||||
header="Sorry, feature unavailable",
|
||||
message="Universal Plex token not available")
|
||||
oc = ObjectContainer(
|
||||
title2=_("Download Logs"),
|
||||
no_cache=True,
|
||||
no_history=True,
|
||||
header=_("Sorry, feature unavailable"),
|
||||
message=_("Universal Plex token not available"))
|
||||
return oc
|
||||
|
||||
# try getting the link base via the request in context, first, otherwise use the public ip
|
||||
@@ -253,7 +327,7 @@ def GetLogsLink():
|
||||
|
||||
elif "Referer" in req_headers:
|
||||
parsed = urlparse.urlparse(req_headers["Referer"])
|
||||
link_base = "%s://%s:%s" % (parsed.scheme, parsed.hostname, parsed.port)
|
||||
link_base = "%s://%s%s" % (parsed.scheme, parsed.hostname, (":%s" % parsed.port) if parsed.port else "")
|
||||
Log.Debug("Using referer-based link_base")
|
||||
get_external_ip = False
|
||||
|
||||
@@ -263,9 +337,12 @@ def GetLogsLink():
|
||||
Log.Debug("Using ip-based fallback link_base")
|
||||
|
||||
logs_link = "%s%s?X-Plex-Token=%s" % (link_base, PREFIX + '/logs', config.plex_token)
|
||||
oc = ObjectContainer(title2="Download Logs", no_cache=True, no_history=True,
|
||||
header="Copy this link and open this in your browser, please",
|
||||
message=logs_link)
|
||||
oc = ObjectContainer(
|
||||
title2=logs_link,
|
||||
no_cache=True,
|
||||
no_history=True,
|
||||
header=_("Copy this link and open this in your browser, please"),
|
||||
message=logs_link)
|
||||
return oc
|
||||
|
||||
|
||||
@@ -289,35 +366,51 @@ def DownloadLogs():
|
||||
@debounce
|
||||
def InvalidateCache(randomize=None):
|
||||
from subliminal.cache import region
|
||||
region.invalidate()
|
||||
if config.new_style_cache:
|
||||
region.backend.clear()
|
||||
else:
|
||||
region.invalidate()
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Cache invalidated'
|
||||
header=_("Success"),
|
||||
message=_("Cache invalidated")
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/pin')
|
||||
def PinMenu(pin="", randomize=None, success_go_to="channel"):
|
||||
oc = ObjectContainer(title2="Enter PIN number %s" % (len(pin) + 1), no_cache=True, no_history=True,
|
||||
skip_pin_lock=True)
|
||||
oc = ObjectContainer(
|
||||
title2=_("Enter PIN number ") + str(len(pin) + 1),
|
||||
no_cache=True,
|
||||
no_history=True,
|
||||
skip_pin_lock=True)
|
||||
|
||||
if pin == config.pin:
|
||||
Dict["pin_correct_time"] = datetime.datetime.now()
|
||||
config.locked = False
|
||||
if success_go_to == "channel":
|
||||
return fatality(force_title="PIN correct", header="PIN correct", no_history=True)
|
||||
return fatality(
|
||||
force_title=_("PIN correct"),
|
||||
header=_("PIN correct"),
|
||||
no_history=True)
|
||||
elif success_go_to == "advanced":
|
||||
return AdvancedMenu(randomize=timestamp())
|
||||
|
||||
for i in range(10):
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), pin=pin + str(i), success_go_to=success_go_to),
|
||||
key=Callback(
|
||||
PinMenu,
|
||||
randomize=timestamp(),
|
||||
pin=pin + str(i),
|
||||
success_go_to=success_go_to),
|
||||
title=pad_title(str(i)),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), success_go_to=success_go_to),
|
||||
title=pad_title("Reset"),
|
||||
key=Callback(
|
||||
PinMenu,
|
||||
randomize=timestamp(),
|
||||
success_go_to=success_go_to),
|
||||
title=pad_title(_("Reset")),
|
||||
))
|
||||
return oc
|
||||
|
||||
@@ -326,4 +419,15 @@ def PinMenu(pin="", randomize=None, success_go_to="channel"):
|
||||
def ClearPin(randomize=None):
|
||||
Dict["pin_correct_time"] = None
|
||||
config.locked = True
|
||||
return fatality(force_title="Menu locked", header=" ", no_history=True)
|
||||
return fatality(force_title=_("Menu locked"), header=" ", no_history=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/reset_throttle')
|
||||
def ResetProviderThrottle(randomize=None):
|
||||
Dict["provider_throttle"] = {}
|
||||
Dict.Save()
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header=_("Success"),
|
||||
message=_("Provider throttles reset")
|
||||
)
|
||||
|
||||
@@ -0,0 +1,185 @@
|
||||
# coding=utf-8
|
||||
|
||||
import datetime
|
||||
import operator
|
||||
|
||||
from support.config import config
|
||||
from support.helpers import timestamp
|
||||
|
||||
|
||||
def enable_channel_wrapper(func):
|
||||
"""
|
||||
returns the original wrapper :func: (route or handler) if applicable, else the plain to-be-wrapped function
|
||||
:param func: original wrapper
|
||||
:return: original wrapper or wrapped function
|
||||
"""
|
||||
def noop(*args, **kwargs):
|
||||
def inner(*a, **k):
|
||||
"""
|
||||
:param a: args
|
||||
:param k: kwargs
|
||||
:return: originally to-be-wrapped function
|
||||
"""
|
||||
return a[0]
|
||||
|
||||
return inner
|
||||
|
||||
def wrap(*args, **kwargs):
|
||||
enforce_route = kwargs.pop("enforce_route", None)
|
||||
return (func if (config.enable_channel or enforce_route) else noop)(*args, **kwargs)
|
||||
|
||||
return wrap
|
||||
|
||||
|
||||
ROUTE_REGISTRY = {}
|
||||
|
||||
|
||||
def get_func_name(args):
|
||||
return list(args).pop(0).__name__
|
||||
|
||||
|
||||
def get_lookup_key(f, args, kwargs):
|
||||
return tuple([f.__name__, tuple(args), tuple([(key, value) for key, value in kwargs.iteritems()])])
|
||||
|
||||
|
||||
def should_debounce(f, key, kw):
|
||||
return getattr(f, "debounce", False) and "randomize" in kw and key in Dict["menu_history"]
|
||||
|
||||
|
||||
def register_route_function(f):
|
||||
fn = f.__name__
|
||||
if fn != "ValidatePrefs" and fn not in ROUTE_REGISTRY:
|
||||
ROUTE_REGISTRY[fn] = f
|
||||
return f
|
||||
|
||||
|
||||
def main_menu_fallback():
|
||||
key = get_lookup_key(ROUTE_REGISTRY["fatality"], [], {})
|
||||
Dict["last_menu_item"] = key
|
||||
add_to_menu_history(key)
|
||||
|
||||
return ROUTE_REGISTRY["fatality"](randomize=timestamp())
|
||||
|
||||
|
||||
def add_to_menu_history(key):
|
||||
# add function to menu history
|
||||
mh = Dict["menu_history"]
|
||||
if key in mh:
|
||||
del mh[key]
|
||||
|
||||
mh[key] = datetime.datetime.now() + datetime.timedelta(hours=6)
|
||||
|
||||
# limit to 25 items
|
||||
Dict["menu_history"] = dict(sorted(sorted(mh.items(), key=operator.itemgetter(1),
|
||||
reverse=True)[:25]))
|
||||
|
||||
try:
|
||||
Dict.Save()
|
||||
except TypeError:
|
||||
Log.Error("Can't save menu history for: %r", key)
|
||||
del Dict["menu_history"][key]
|
||||
|
||||
|
||||
def route_wrapper(*args, **kwargs):
|
||||
def wrap(f):
|
||||
already_wrapped = getattr(f, "orig_f", False)
|
||||
|
||||
register_route_function(f)
|
||||
|
||||
def inner(*a, **kw):
|
||||
if "menu_history" not in Dict:
|
||||
Dict["menu_history"] = {}
|
||||
|
||||
if "last_menu_item" not in Dict:
|
||||
Dict["last_menu_item"] = None
|
||||
|
||||
key = get_lookup_key(f, list(a), kw)
|
||||
|
||||
ret_f = f
|
||||
ret_a = a
|
||||
ret_kw = kw
|
||||
# mh = Dict["menu_history"]
|
||||
# mh_keys = [k for k, v in sorted(mh.items(), key=operator.itemgetter(1))]
|
||||
#
|
||||
# fallback_needed = False
|
||||
# fallback_found = False
|
||||
|
||||
if should_debounce(ret_f, key, kw):
|
||||
# special case for TriggerRestart
|
||||
if ret_f.__name__ in ("TriggerRestart", "Restart"):
|
||||
Log.Debug("Don't trigger a re-restart, falling back to main menu")
|
||||
else:
|
||||
Log.Debug("not triggering %s twice with %s, %s, returning to main menu" %
|
||||
(f.__name__, a, kw))
|
||||
|
||||
return main_menu_fallback()
|
||||
#
|
||||
# fallback_needed = True
|
||||
#
|
||||
# # try to find a suitable fallback route in case we've encountered an already visited
|
||||
# # debounced route
|
||||
# fallbacks = []
|
||||
# current_last_visit = mh[key]
|
||||
# last_menu_item = Dict["last_menu_item"]
|
||||
# direction_backwards = True
|
||||
#
|
||||
# if last_menu_item and last_menu_item in mh and key in mh:
|
||||
# last_mi_pos = mh_keys.index(last_menu_item)
|
||||
# current_mi_pos = mh_keys.index(key)
|
||||
# if current_mi_pos > -1 and last_mi_pos > -1:
|
||||
# print "SHEKEL", current_mi_pos, last_mi_pos, current_mi_pos < last_mi_pos
|
||||
|
||||
# only consider items in menu history that have an older timestamp than the current
|
||||
# for key_, last_visit in sorted(mh.items(), key=operator.itemgetter(1),
|
||||
# reverse=True):
|
||||
# if last_visit < current_last_visit:
|
||||
# fallbacks.append(key_)
|
||||
#
|
||||
# for key_ in fallbacks:
|
||||
# # old data structure
|
||||
# if not len(key_) == 3 or not (isinstance(key_[1], tuple) and isinstance(key_[2], tuple)):
|
||||
# continue
|
||||
#
|
||||
# old_f, old_a, old_kw = key_
|
||||
# if old_f == "ValidatePrefs":
|
||||
# continue
|
||||
#
|
||||
# possible_fallback = ROUTE_REGISTRY[old_f]
|
||||
#
|
||||
# # non-debounced function found
|
||||
# if not getattr(possible_fallback, "debounce", False):
|
||||
# ret_kw = dict(old_kw)
|
||||
# ret_a = old_a
|
||||
# if "randomize" in ret_kw:
|
||||
# ret_kw["randomize"] = timestamp()
|
||||
#
|
||||
# ret_f = possible_fallback
|
||||
# key = get_lookup_key(ret_f, list(ret_a), ret_kw)
|
||||
# fallback_found = True
|
||||
#
|
||||
# Log.Debug("not triggering %s twice with %s, %s, returning to %s, %s, %s" %
|
||||
# (f.__name__, a, kw, ret_f.__name__, ret_a, ret_kw))
|
||||
#
|
||||
# break
|
||||
#
|
||||
# if not fallback_found:
|
||||
# Log.Debug("No fallback found in menu history for %s, falling back to main menu", f)
|
||||
# return main_menu_fallback()
|
||||
|
||||
# if not fallback_needed:
|
||||
# add_to_menu_history(key)
|
||||
# if ret_f.__name__ != "ValidatePrefs":
|
||||
# Dict["last_menu_item"] = key
|
||||
#
|
||||
add_to_menu_history(key)
|
||||
Dict["last_menu_item"] = key
|
||||
return ret_f(*ret_a, **ret_kw)
|
||||
|
||||
# @route may be used multiple times
|
||||
if not already_wrapped:
|
||||
inner.orig_f = f
|
||||
|
||||
return enable_channel_wrapper(route(*args, **kwargs))(inner)
|
||||
return enable_channel_wrapper(route(*args, **kwargs))(f)
|
||||
|
||||
return wrap
|
||||
@@ -1,24 +1,29 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
|
||||
from subzero.language import Language
|
||||
|
||||
from sub_mod import SubtitleModificationsMenu
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_ignore_options, get_item_task_data, \
|
||||
set_refresh_menu_state
|
||||
set_refresh_menu_state, route, extract_embedded_sub
|
||||
|
||||
from refresh_item import RefreshItem
|
||||
from subzero.constants import PREFIX
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, cast_bool, df, get_language
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub
|
||||
from support.lib import Plex
|
||||
from support.plex_media import get_plex_metadata, scan_videos, PMSMediaProxy
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import timestamp, df, get_language, display_language, get_language_from_stream
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub, get_item_title, save_stored_sub
|
||||
from support.plex_media import get_plex_metadata, get_part, get_embedded_subtitle_streams
|
||||
from support.scanning import scan_videos
|
||||
from support.scheduler import scheduler
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.i18n import _
|
||||
|
||||
|
||||
# fixme: needs kwargs cleanup
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}/actions')
|
||||
@debounce
|
||||
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None):
|
||||
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None, header=None,
|
||||
message=None):
|
||||
"""
|
||||
displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
|
||||
:param rating_key:
|
||||
@@ -31,12 +36,31 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
from interface.main import IgnoreMenu
|
||||
|
||||
title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
|
||||
item = get_item(rating_key)
|
||||
item = plex_item = get_item(rating_key)
|
||||
current_kind = get_item_kind_from_rating_key(rating_key)
|
||||
|
||||
timeout = 30
|
||||
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
|
||||
oc = SubFolderObjectContainer(
|
||||
title2=title,
|
||||
replace_parent=True,
|
||||
header=header,
|
||||
message=message)
|
||||
|
||||
if not item:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(
|
||||
ItemDetailsMenu,
|
||||
rating_key=rating_key,
|
||||
title=title,
|
||||
base_title=base_title,
|
||||
item_title=item_title,
|
||||
randomize=timestamp()),
|
||||
title=_(u"Item not found: %s!", item_title),
|
||||
summary=_("Plex didn't return any information about the item, please refresh it and come back later"),
|
||||
thumb=default_thumb
|
||||
))
|
||||
return oc
|
||||
|
||||
# add back to season for episode
|
||||
if current_kind == "episode":
|
||||
@@ -45,33 +69,37 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
season = get_item(item.season.rating_key)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(MetadataMenu, rating_key=season.rating_key, title=season.title, base_title=show.title,
|
||||
previous_item_type="show", previous_rating_key=show.rating_key,
|
||||
display_items=True, randomize=timestamp()),
|
||||
title=u"< Back to %s" % season.title,
|
||||
summary="Back to %s > %s" % (show.title, season.title),
|
||||
key=Callback(
|
||||
MetadataMenu,
|
||||
rating_key=season.rating_key,
|
||||
title=season.title,
|
||||
base_title=show.title,
|
||||
previous_item_type="show",
|
||||
previous_rating_key=show.rating_key,
|
||||
display_items=True,
|
||||
randomize=timestamp()),
|
||||
title=_(u"< Back to %s", season.title),
|
||||
summary=_("Back to %s > %s", show.title, season.title),
|
||||
thumb=season.thumb or default_thumb
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(UpdateLocalMedia, rating_key=rating_key, title=title, item_title=item_title, base_title=base_title,
|
||||
randomize=timestamp()),
|
||||
title=u"Find local subtitles (doesn't refresh metadata)",
|
||||
summary="Searches for locally available subtitles",
|
||||
thumb=item.thumb or default_thumb
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, randomize=timestamp(),
|
||||
timeout=timeout * 1000),
|
||||
title=u"Refresh: %s" % item_title,
|
||||
summary="Refreshes the %s, possibly searching for missing and picking up new subtitles on disk" % current_kind,
|
||||
key=Callback(
|
||||
RefreshItem,
|
||||
rating_key=rating_key,
|
||||
item_title=item_title,
|
||||
randomize=timestamp(),
|
||||
timeout=timeout * 1000),
|
||||
title=_(u"Refresh: %s", item_title),
|
||||
summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up "
|
||||
"new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind)),
|
||||
thumb=item.thumb or default_thumb
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(),
|
||||
timeout=timeout * 1000),
|
||||
title=u"Force-find subtitles: %s" % item_title,
|
||||
summary="Issues a forced refresh, ignoring known subtitles and searching for new ones",
|
||||
title=_(u"Force-find subtitles: %(item_title)s", item_title=item_title),
|
||||
summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones"),
|
||||
thumb=item.thumb or default_thumb
|
||||
))
|
||||
|
||||
@@ -79,9 +107,6 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
|
||||
# get the plex item
|
||||
plex_item = list(Plex["library"].metadata(rating_key))[0]
|
||||
|
||||
# look for subtitles for all available media parts and all of their languages
|
||||
has_multiple_parts = len(plex_item.media) > 1
|
||||
part_index = 0
|
||||
@@ -94,98 +119,358 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
part_id = str(part.id)
|
||||
part_index += 1
|
||||
|
||||
part_index_addon = u""
|
||||
part_summary_addon = u""
|
||||
if has_multiple_parts:
|
||||
part_index_addon = _(u"File %(file_part_index)s: ", file_part_index=part_index)
|
||||
part_summary_addon = u"%s " % filename
|
||||
|
||||
# iterate through all configured languages
|
||||
for lang in config.lang_list:
|
||||
lang_a2 = lang.alpha2
|
||||
# ietf lang?
|
||||
if cast_bool(Prefs["subtitles.language.ietf"]) and "-" in lang_a2:
|
||||
lang_a2 = lang_a2.split("-")[0]
|
||||
|
||||
# get corresponding stored subtitle data for that media part (physical media item), for language
|
||||
current_sub = stored_subs.get_any(part_id, lang_a2)
|
||||
current_sub = stored_subs.get_any(part_id, lang)
|
||||
current_sub_id = None
|
||||
current_sub_provider_name = None
|
||||
|
||||
part_index_addon = ""
|
||||
part_summary_addon = ""
|
||||
if has_multiple_parts:
|
||||
part_index_addon = u"File %s: " % part_index
|
||||
part_summary_addon = "%s " % filename
|
||||
|
||||
summary = u"%sNo current subtitle in storage" % part_summary_addon
|
||||
summary = _(u"%(part_summary)sNo current subtitle in storage", part_summary=part_summary_addon)
|
||||
current_score = None
|
||||
if current_sub:
|
||||
current_sub_id = current_sub.id
|
||||
current_sub_provider_name = current_sub.provider_name
|
||||
current_score = current_sub.score
|
||||
|
||||
summary = u"%sCurrent subtitle: %s (added: %s, %s), Language: %s, Score: %i, Storage: %s" % \
|
||||
(part_summary_addon, current_sub.provider_name, df(current_sub.date_added),
|
||||
current_sub.mode_verbose, lang, current_sub.score, current_sub.storage_type)
|
||||
summary = _(u"%(part_summary)sCurrent subtitle: %(provider_name)s (added: %(date_added)s, "
|
||||
u"%(mode)s), Language: %(language)s, Score: %(score)i, Storage: %(storage_type)s",
|
||||
part_summary=part_summary_addon,
|
||||
provider_name=current_sub.provider_name,
|
||||
date_added=df(current_sub.date_added),
|
||||
mode=current_sub.mode_verbose,
|
||||
language=display_language(lang),
|
||||
score=current_sub.score,
|
||||
storage_type=current_sub.storage_type)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, language_name=lang.name, current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"%sActions for %s subtitle" % (part_index_addon, lang.name),
|
||||
summary=summary
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, language_name=display_language(lang),
|
||||
current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=_(u"%(part_summary)sManage %(language)s subtitle", part_summary=part_index_addon,
|
||||
language=display_language(lang)),
|
||||
summary=summary
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, language_name=display_language(lang),
|
||||
current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=_(u"%(part_summary)sList %(language)s subtitles", part_summary=part_index_addon,
|
||||
language=display_language(lang)),
|
||||
summary=summary
|
||||
))
|
||||
|
||||
add_ignore_options(oc, "videos", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
if config.plex_transcoder:
|
||||
# embedded subtitles
|
||||
embedded_count = 0
|
||||
embedded_langs = []
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
|
||||
lang = get_language_from_stream(stream.language_code)
|
||||
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = list(config.lang_list)[0]
|
||||
|
||||
if lang:
|
||||
embedded_langs.append(lang)
|
||||
embedded_count += 1
|
||||
|
||||
if embedded_count:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListEmbeddedSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_type=plex_item.type, item_title=item_title, base_title=base_title,
|
||||
randomize=timestamp()),
|
||||
title=_(u"%(part_summary)sEmbedded subtitles (%(languages)s)",
|
||||
part_summary=part_index_addon,
|
||||
languages=", ".join(display_language(l) for l in set(embedded_langs))),
|
||||
summary=_(u"Extract and activate embedded subtitle streams")
|
||||
))
|
||||
|
||||
ignore_title = item_title
|
||||
if current_kind == "episode":
|
||||
ignore_title = get_item_title(item)
|
||||
add_ignore_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/update_local_media/{rating_key}', force=bool)
|
||||
@debounce
|
||||
def UpdateLocalMedia(**kwargs):
|
||||
from support.localmedia import find_subtitles
|
||||
rating_key = kwargs["rating_key"]
|
||||
parts = PMSMediaProxy(rating_key).get_all_parts()
|
||||
for part in parts:
|
||||
find_subtitles(part)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
return ItemDetailsMenu(**kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}')
|
||||
def SubtitleOptionsMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True, header=kwargs.get("header"),
|
||||
message=kwargs.get("message"))
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
current_data = unicode(kwargs["current_data"])
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
subs_count = stored_subs.count(part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
title=kwargs["title"], randomize=timestamp()),
|
||||
title=u"< Back to %s" % kwargs["title"],
|
||||
summary=kwargs["current_data"],
|
||||
title=_(u"< Back to %s", kwargs["title"]),
|
||||
summary=current_data,
|
||||
thumb=default_thumb
|
||||
))
|
||||
if subs_count:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListStoredSubsForItemMenu, randomize=timestamp(), **kwargs),
|
||||
title=_(u"Select active %(language)s subtitle", language=kwargs["language_name"]),
|
||||
summary=_(u"%(count)d subtitles in storage", count=subs_count)
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"List %s subtitles" % kwargs["language_name"],
|
||||
summary=kwargs["current_data"]
|
||||
title=_(u"List available %(language)s subtitles", language=kwargs["language_name"]),
|
||||
summary=current_data
|
||||
))
|
||||
if current_sub:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Modify %s subtitle" % kwargs["language_name"],
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
title=_(u"Modify current %(language)s subtitle", language=kwargs["language_name"]),
|
||||
summary=_(u"Currently applied mods: %(mod_list)s",
|
||||
mod_list=(", ".join(current_sub.mods) if current_sub.mods else "none"))
|
||||
))
|
||||
|
||||
if current_sub.provider_name != "embedded":
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(BlacklistSubtitleMenu, randomize=timestamp(), **kwargs),
|
||||
title=_(u"Blacklist current %(language)s subtitle and search for a new one",
|
||||
language=kwargs["language_name"]),
|
||||
summary=current_data
|
||||
))
|
||||
|
||||
current_bl, subs = stored_subs.get_blacklist(part_id, language)
|
||||
if current_bl:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ManageBlacklistMenu, randomize=timestamp(), **kwargs),
|
||||
title=_(u"Manage blacklist (%(amount)s contained)", amount=len(current_bl)),
|
||||
summary=_(u"Inspect currently blacklisted subtitles")
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/list_stored_subs/{rating_key}/{part_id}')
|
||||
def ListStoredSubsForItemMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = Language.fromietf(kwargs["language"])
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
all_subs = stored_subs.get_all(part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
for key, subtitle in sorted(filter(lambda x: x[0] not in ("current", "blacklist"), all_subs.items()),
|
||||
key=lambda x: x[1].date_added, reverse=True):
|
||||
is_current = key == all_subs["current"]
|
||||
|
||||
summary = _(u"added: %(date_added)s, %(mode)s, Language: %(language)s, Score: %(score)i, Storage: "
|
||||
u"%(storage_type)s",
|
||||
date_added=df(subtitle.date_added),
|
||||
mode=subtitle.mode_verbose,
|
||||
language=display_language(language),
|
||||
score=subtitle.score,
|
||||
storage_type=subtitle.storage_type)
|
||||
|
||||
sub_name = subtitle.provider_name
|
||||
if sub_name == "embedded":
|
||||
sub_name += " (%s)" % subtitle.id
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SelectStoredSubForItemMenu, randomize=timestamp(), sub_key="__".join(key), **kwargs),
|
||||
title=_(u"%(current_state)s%(subtitle_name)s, Score: %(score)s",
|
||||
current_state=_("Current: ") if is_current else _("Stored: "),
|
||||
subtitle_name=sub_name,
|
||||
score=subtitle.score),
|
||||
summary=summary
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/set_current_sub/{rating_key}/{part_id}')
|
||||
@debounce
|
||||
def SelectStoredSubForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = Language.fromietf(kwargs["language"])
|
||||
item_type = kwargs["item_type"]
|
||||
sub_key = tuple(kwargs.pop("sub_key").split("__"))
|
||||
|
||||
plex_item = get_item(rating_key)
|
||||
storage = get_subtitle_storage()
|
||||
stored_subs = storage.load(plex_item.rating_key)
|
||||
|
||||
subtitles = stored_subs.get_all(part_id, language)
|
||||
subtitle = subtitles[sub_key]
|
||||
|
||||
subtitles["current"] = sub_key
|
||||
|
||||
save_stored_sub(subtitle, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
|
||||
stored_subs=stored_subs)
|
||||
|
||||
storage.destroy()
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
kwargs["header"] = _("Success")
|
||||
kwargs["message"] = _("Subtitle saved to disk")
|
||||
|
||||
return SubtitleOptionsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/blacklist_recent/{language}')
|
||||
@route(PREFIX + '/item/blacklist_recent')
|
||||
def BlacklistRecentSubtitleMenu(**kwargs):
|
||||
if "last_played_items" not in Dict or not Dict["last_played_items"]:
|
||||
return
|
||||
|
||||
rating_key = Dict["last_played_items"][0]
|
||||
kwargs["rating_key"] = rating_key
|
||||
return BlacklistAllPartsSubtitleMenu(**kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/blacklist_all/{rating_key}/{language}')
|
||||
@route(PREFIX + '/item/blacklist_all/{rating_key}')
|
||||
def BlacklistAllPartsSubtitleMenu(**kwargs):
|
||||
rating_key = kwargs.get("rating_key")
|
||||
language = kwargs.get("language")
|
||||
if language:
|
||||
language = Language.fromietf(language)
|
||||
|
||||
item = get_item(rating_key)
|
||||
|
||||
if not item:
|
||||
return
|
||||
|
||||
item_title = get_item_title(item)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
for part_id, languages in stored_subs.parts.iteritems():
|
||||
sub_dict = languages
|
||||
if language:
|
||||
key = str(language)
|
||||
if key not in sub_dict:
|
||||
continue
|
||||
|
||||
sub_dict = {key: sub_dict[key]}
|
||||
|
||||
for language, subs in sub_dict.iteritems():
|
||||
if "current" in subs:
|
||||
stored_subs.blacklist(part_id, language, subs["current"])
|
||||
Log.Info("Added %s to blacklist", subs["current"])
|
||||
|
||||
subtitle_storage.save(stored_subs)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
|
||||
|
||||
|
||||
def blacklist(rating_key, part_id, language):
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
if not current_sub:
|
||||
return
|
||||
|
||||
stored_subs.blacklist(part_id, language, current_sub.key)
|
||||
storage.save(stored_subs)
|
||||
storage.destroy()
|
||||
|
||||
Log.Info("Added %s to blacklist", current_sub.key)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@route(PREFIX + '/item/blacklist/{rating_key}/{part_id}')
|
||||
@debounce
|
||||
def BlacklistSubtitleMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
item_title = kwargs["item_title"]
|
||||
|
||||
blacklist(rating_key, part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/manage_blacklist/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def ManageBlacklistMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
remove_sub_key = kwargs.pop("remove_sub_key", None)
|
||||
current_data = unicode(kwargs["current_data"])
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
current_bl, subs = stored_subs.get_blacklist(part_id, language)
|
||||
|
||||
if remove_sub_key:
|
||||
remove_sub_key = tuple(remove_sub_key.split("__"))
|
||||
stored_subs.blacklist(part_id, language, remove_sub_key, add=False)
|
||||
storage.save(stored_subs)
|
||||
Log.Info("Removed %s from blacklist", remove_sub_key)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
title=kwargs["title"], randomize=timestamp()),
|
||||
title=_(u"< Back to %s", kwargs["title"]),
|
||||
summary=current_data,
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
def sorter(pair):
|
||||
# thanks RestrictedModule parser for messing with lambda (x, y)
|
||||
return pair[1]["date_added"]
|
||||
|
||||
for sub_key, data in sorted(current_bl.iteritems(), key=sorter, reverse=True):
|
||||
provider_name, subtitle_id = sub_key
|
||||
title = _(u"%(provider_name)s, %(subtitle_id)s (added: %(date_added)s, %(mode)s), Language: %(language)s, "
|
||||
u"Score: %(score)i, Storage: %(storage_type)s",
|
||||
provider_name=provider_name,
|
||||
subtitle_id=subtitle_id,
|
||||
date_added=df(data["date_added"]),
|
||||
mode=current_sub.get_mode_verbose(data["mode"]),
|
||||
language=display_language(Language.fromietf(language)),
|
||||
score=data["score"],
|
||||
storage_type=data["storage_type"])
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ManageBlacklistMenu, remove_sub_key="__".join(sub_key), randomize=timestamp(), **kwargs),
|
||||
title=title,
|
||||
summary=_(u"Remove subtitle from blacklist")
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/search/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item_title=None, filename=None,
|
||||
item_type="episode", language=None, language_name=None, force=False, current_id=None,
|
||||
current_data=None,
|
||||
@@ -195,6 +480,8 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
running = scheduler.is_task_running("AvailableSubsForItem")
|
||||
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
|
||||
|
||||
current_data = unicode(current_data) if current_data else None
|
||||
|
||||
if (search_results is None or force) and not running:
|
||||
scheduler.dispatch_task("AvailableSubsForItem", rating_key=rating_key, item_type=item_type, part_id=part_id,
|
||||
language=language)
|
||||
@@ -203,36 +490,56 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
oc = SubFolderObjectContainer(title2=unicode(title), replace_parent=True)
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=rating_key, item_title=item_title, title=title, randomize=timestamp()),
|
||||
title=u"< Back to %s" % title,
|
||||
title=_(u"< Back to %s", title),
|
||||
summary=current_data,
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
plex_part = None
|
||||
if not config.low_impact_mode:
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True)
|
||||
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
return oc
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
return oc
|
||||
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
video_display_data = [video.format] if video.format else []
|
||||
if video.release_group:
|
||||
video_display_data.append(u"by %s" % video.release_group)
|
||||
video_display_data = " ".join(video_display_data)
|
||||
video_display_data = [video.format] if video.format else []
|
||||
if video.release_group:
|
||||
video_display_data.append(unicode(_(u"by %(release_group)s", release_group=video.release_group)))
|
||||
video_display_data = " ".join(video_display_data)
|
||||
else:
|
||||
video_display_data = metadata["filename"]
|
||||
|
||||
current_display = (u"Current: %s (%s) " % (current_provider, current_score) if current_provider else "")
|
||||
current_display = (_(u"Current: %(provider_name)s (%(score)s) ",
|
||||
provider_name=current_provider,
|
||||
score=current_score if current_provider else ""))
|
||||
if not running:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title, language=language,
|
||||
filename=filename, part_id=part_id, title=title, current_id=current_id, force=True,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
current_data=current_data, item_type=item_type, randomize=timestamp()),
|
||||
title=u"Search for %s subs (%s)" % (get_language(language).name, video_display_data),
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
title=_(u"Search for %(language)s subs (%(video_data)s)",
|
||||
language=get_language(language).name,
|
||||
video_data=video_display_data),
|
||||
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
if search_results == "found_none":
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
|
||||
language=language, filename=filename, current_data=current_data, force=True,
|
||||
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
randomize=timestamp()),
|
||||
title=_(u"No subtitles found"),
|
||||
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title,
|
||||
@@ -240,28 +547,55 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
randomize=timestamp()),
|
||||
title=u"Searching for %s subs (%s), refresh here ..." % (get_language(language).name, video_display_data),
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
title=_(u"Searching for %(language)s subs (%(video_data)s), refresh here ...",
|
||||
language=display_language(get_language(language)),
|
||||
video_data=video_display_data),
|
||||
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
if not search_results:
|
||||
if not search_results or search_results == "found_none":
|
||||
return oc
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
current_bl, subs = stored_subs.get_blacklist(part_id, language)
|
||||
|
||||
seen = []
|
||||
for subtitle in search_results:
|
||||
if subtitle.id in seen:
|
||||
continue
|
||||
|
||||
bl_addon = ""
|
||||
if (str(subtitle.provider_name), str(subtitle.id)) in current_bl:
|
||||
bl_addon = "Blacklisted "
|
||||
|
||||
wrong_fps_addon = ""
|
||||
if subtitle.wrong_fps:
|
||||
wrong_fps_addon = " (wrong FPS, sub: %s, media: %s)" % (subtitle.fps, plex_part.fps)
|
||||
if plex_part:
|
||||
wrong_fps_addon = _(" (wrong FPS, sub: %(subtitle_fps)s, media: %(media_fps)s)",
|
||||
subtitle_fps=subtitle.fps,
|
||||
media_fps=plex_part.fps)
|
||||
else:
|
||||
wrong_fps_addon = _(" (wrong FPS, sub: %(subtitle_fps)s, media: unknown, low impact mode)",
|
||||
subtitle_fps=subtitle.fps)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerDownloadSubtitle, rating_key=rating_key, randomize=timestamp(), item_title=item_title,
|
||||
subtitle_id=str(subtitle.id), language=language),
|
||||
title=u"%s: %s, score: %s%s" % ("Available" if current_id != subtitle.id else "Current",
|
||||
subtitle.provider_name, subtitle.score, wrong_fps_addon),
|
||||
summary=u"Release: %s, Matches: %s" % (subtitle.release_info, ", ".join(subtitle.matches)),
|
||||
title=_(u"%(blacklisted_state)s%(current_state)s: %(provider_name)s, score: %(score)s%(wrong_fps_state)s",
|
||||
blacklisted_state=bl_addon,
|
||||
current_state=_("Available") if current_id != subtitle.id else _("Current"),
|
||||
provider_name=subtitle.provider_name,
|
||||
score=subtitle.score,
|
||||
wrong_fps_state=wrong_fps_addon),
|
||||
summary=_(u"Release: %(release_info)s, Matches: %(matches)s",
|
||||
release_info=subtitle.release_info,
|
||||
matches=", ".join(subtitle.matches)),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
seen.append(subtitle.id)
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@@ -270,7 +604,7 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None, language=None, randomize=None):
|
||||
from interface.main import fatality
|
||||
|
||||
set_refresh_menu_state("Downloading subtitle for %s" % item_title or rating_key)
|
||||
set_refresh_menu_state(_("Downloading subtitle for %(title_or_id)s", title_or_id=item_title or rating_key))
|
||||
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
|
||||
|
||||
download_subtitle = None
|
||||
@@ -284,4 +618,84 @@ def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None,
|
||||
else:
|
||||
scheduler.dispatch_task("DownloadSubtitleForItem", rating_key=rating_key, subtitle=download_subtitle)
|
||||
|
||||
scheduler.clear_task_data("AvailableSubsForItem")
|
||||
|
||||
return fatality(randomize=timestamp(), header=" ", replace_parent=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/embedded/{rating_key}/{part_id}')
|
||||
def ListEmbeddedSubsForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
title = kwargs["title"]
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
base_title=kwargs["base_title"], title=kwargs["item_title"], randomize=timestamp()),
|
||||
title=_("< Back to %s", kwargs["title"]),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
plex_item = get_item(rating_key)
|
||||
part = get_part(plex_item, part_id)
|
||||
|
||||
if part:
|
||||
for stream_data in get_embedded_subtitle_streams(part, skip_duplicate_unknown=False):
|
||||
language = stream_data["language"]
|
||||
is_unknown = stream_data["is_unknown"]
|
||||
stream = stream_data["stream"]
|
||||
is_forced = stream_data["is_forced"]
|
||||
|
||||
if language:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, with_mods=True, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s with default mods",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/extract_embedded/{rating_key}/{part_id}/{stream_index}')
|
||||
@debounce
|
||||
def TriggerExtractEmbeddedSubForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.get("part_id")
|
||||
stream_index = kwargs.get("stream_index")
|
||||
|
||||
Thread.Create(extract_embedded_sub, **kwargs)
|
||||
header = _(u"Extracting of embedded subtitle %s of part %s:%s triggered",
|
||||
stream_index, rating_key, part_id)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
kwargs.pop("item_type")
|
||||
kwargs.pop("stream_index")
|
||||
kwargs.pop("part_id")
|
||||
kwargs.pop("with_mods", False)
|
||||
kwargs.pop("language")
|
||||
kwargs["title"] = kwargs["item_title"]
|
||||
kwargs["header"] = header
|
||||
kwargs["message"] = header
|
||||
|
||||
return ItemDetailsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
|
||||
+108
-75
@@ -2,13 +2,13 @@
|
||||
|
||||
from subzero.constants import PREFIX, TITLE, ART
|
||||
from support.config import config
|
||||
from support.helpers import pad_title, timestamp, df, get_plex_item_display_title
|
||||
from support.helpers import pad_title, timestamp, df, display_language
|
||||
from support.scheduler import scheduler
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, \
|
||||
get_item_kind_from_item
|
||||
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_ignore_options,\
|
||||
ObjectContainer
|
||||
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, get_item_title
|
||||
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_ignore_options, \
|
||||
ObjectContainer, route, handler
|
||||
from support.i18n import _
|
||||
from item_details import ItemDetailsMenu
|
||||
|
||||
|
||||
@@ -36,25 +36,34 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
if config.lock_menu and not config.pin_correct:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp()),
|
||||
title=pad_title("Enter PIN"),
|
||||
summary="The owner has restricted the access to this menu. Please enter the correct pin",
|
||||
title=pad_title(_("Enter PIN")),
|
||||
summary=_("The owner has restricted the access to this menu. Please enter the correct pin"),
|
||||
))
|
||||
return oc
|
||||
|
||||
if not config.permissions_ok and config.missing_permissions:
|
||||
for title, path in config.missing_permissions:
|
||||
if not isinstance(config.missing_permissions, list):
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, randomize=timestamp()),
|
||||
title=pad_title("Insufficient permissions"),
|
||||
summary="Insufficient permissions on library %s, folder: %s" % (title, path),
|
||||
title=pad_title(_("Insufficient permissions")),
|
||||
summary=config.missing_permissions,
|
||||
))
|
||||
else:
|
||||
for title, path in config.missing_permissions:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, randomize=timestamp()),
|
||||
title=pad_title(_("Insufficient permissions")),
|
||||
summary=_("Insufficient permissions on library %(title)s, folder: %(path)s",
|
||||
title=title,
|
||||
path=path),
|
||||
))
|
||||
return oc
|
||||
|
||||
if not config.enabled_sections:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, randomize=timestamp()),
|
||||
title=pad_title("I'm not enabled!"),
|
||||
summary="Please enable me for some of your libraries in your server settings; currently I do nothing",
|
||||
title=pad_title(_("I'm not enabled!")),
|
||||
summary=_("Please enable me for some of your libraries in your server settings; currently I do nothing"),
|
||||
))
|
||||
return oc
|
||||
|
||||
@@ -62,47 +71,42 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
if Dict["current_refresh_state"]:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("Working ... refresh here"),
|
||||
summary="Current state: %s; Last state: %s" % (
|
||||
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
|
||||
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
|
||||
title=pad_title(_("Working ... refresh here")),
|
||||
summary=_("Current state: %s; Last state: %s",
|
||||
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
|
||||
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
|
||||
)
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(OnDeckMenu),
|
||||
title="On-deck items",
|
||||
summary="Shows the current on deck items and allows you to individually (force-) refresh their metadata/"
|
||||
"subtitles.",
|
||||
title=_("On-deck items"),
|
||||
summary=_("Shows the current on deck items and allows you to individually (force-) refresh their metadata/subtitles."),
|
||||
thumb=R("icon-ondeck.jpg")
|
||||
))
|
||||
if "last_played_items" in Dict and Dict["last_played_items"]:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentlyPlayedMenu),
|
||||
title=pad_title("Recently played items"),
|
||||
summary="Shows the %i recently played items and allows you to individually (force-) refresh their "
|
||||
"metadata/subtitles." % config.store_recently_played_amount,
|
||||
title=pad_title(_("Recently played items")),
|
||||
summary=_("Shows the %s recently played items and allows you to individually (force-) refresh their metadata/subtitles.", config.store_recently_played_amount),
|
||||
thumb=R("icon-played.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentlyAddedMenu),
|
||||
title="Recently-added items",
|
||||
summary="Shows the recently added items per section.",
|
||||
title=_("Recently-added items"),
|
||||
summary=_("Shows the recently added items per section."),
|
||||
thumb=R("icon-added.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, randomize=timestamp()),
|
||||
title="Items with missing subtitles",
|
||||
summary="Shows the items honoring the configured 'Item age to be considered recent'-setting (%s)"
|
||||
" and allowing you to individually (force-) refresh their metadata/subtitles. " %
|
||||
Prefs["scheduler.item_is_recent_age"],
|
||||
title=_("Show recently added items with missing subtitles"),
|
||||
summary=_("Lists items with missing subtitles. Click on Find recent items with missing subs to update list"),
|
||||
thumb=R("icon-missing.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SectionsMenu),
|
||||
title="Browse all items",
|
||||
summary="Go through your whole library and manage your ignore list. You can also "
|
||||
"(force-) refresh the metadata/subtitles of individual items.",
|
||||
title=_("Browse all items"),
|
||||
summary=_("Go through your whole library and manage your ignore list. You can also (force-) refresh the metadata/subtitles of individual items."),
|
||||
thumb=R("icon-browse.jpg")
|
||||
))
|
||||
|
||||
@@ -110,41 +114,46 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
task = scheduler.task(task_name)
|
||||
|
||||
if task.ready_for_display:
|
||||
task_state = "Running: %s/%s (%s%%)" % (len(task.items_done), len(task.items_searching), task.percentage)
|
||||
task_state = _("Running: %(items_done)s/%(items_searching)s (%(percentage)s%%)",
|
||||
items_done=task.items_done,
|
||||
items_searching=task.items_searching,
|
||||
percentage=task.percentage)
|
||||
else:
|
||||
task_state = "Last scheduler run: %s; Next scheduled run: %s; Last runtime: %s" % (
|
||||
df(scheduler.last_run(task_name)) or "never",
|
||||
df(scheduler.next_run(task_name)) or "never",
|
||||
lr = scheduler.last_run(task_name)
|
||||
nr = scheduler.next_run(task_name)
|
||||
task_state = _("Last run: %s; Next scheduled run: %s; Last runtime: %s",
|
||||
df(scheduler.last_run(task_name)) if lr else "never",
|
||||
df(scheduler.next_run(task_name)) if nr else "never",
|
||||
str(task.last_run_time).split(".")[0])
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshMissing, randomize=timestamp()),
|
||||
title="Search for missing subtitles (in recently-added items, max-age: %s)" % Prefs[
|
||||
"scheduler.item_is_recent_age"],
|
||||
summary="Automatically run periodically by the scheduler, if configured. %s" % task_state,
|
||||
title=_("Search for missing subtitles (in recently-added items, max-age: %s)", Prefs[
|
||||
"scheduler.item_is_recent_age"]),
|
||||
summary=_("Automatically run periodically by the scheduler, if configured. %s", task_state),
|
||||
thumb=R("icon-search.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(IgnoreListMenu),
|
||||
title="Display ignore list (%d)" % len(ignore_list),
|
||||
summary="Show the current ignore list (mainly used for the automatic tasks)",
|
||||
title=_("Display ignore list (%(ignored_count)d)", ignored_count=len(ignore_list)),
|
||||
summary=_("Show the current ignore list (mainly used for the automatic tasks)"),
|
||||
thumb=R("icon-ignore.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(HistoryMenu),
|
||||
title="History",
|
||||
summary="Show the last %i downloaded subtitles" % int(Prefs["history_size"]),
|
||||
title=_("History"),
|
||||
summary=_("Show the last %i downloaded subtitles", int(Prefs["history_size"])),
|
||||
thumb=R("icon-history.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("Refresh"),
|
||||
summary="Current state: %s; Last state: %s" % (
|
||||
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
|
||||
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
|
||||
title=pad_title(_("Refresh")),
|
||||
summary=_("Current state: %s; Last state: %s",
|
||||
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
|
||||
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
|
||||
),
|
||||
thumb=R("icon-refresh.jpg")
|
||||
))
|
||||
@@ -153,15 +162,31 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
if config.pin:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ClearPin, randomize=timestamp()),
|
||||
title=pad_title("Re-lock menu(s)"),
|
||||
summary="Enabled the PIN again for menu(s)"
|
||||
title=pad_title(_("Re-lock menu(s)")),
|
||||
summary=_("Enabled the PIN again for menu(s)")
|
||||
))
|
||||
|
||||
if not only_refresh:
|
||||
if "provider_throttle" in Dict and Dict["provider_throttle"].keys():
|
||||
summary_data = []
|
||||
for provider, data in Dict["provider_throttle"].iteritems():
|
||||
reason, until, desc = data
|
||||
summary_data.append(unicode(_("%(throttled_provider)s until %(until_date)s (%(reason)s)",
|
||||
throttled_provider=provider,
|
||||
until_date=until.strftime("%y/%m/%d %H:%M"),
|
||||
reason=reason)))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title(_("Throttled providers: %s", ", ".join(Dict["provider_throttle"].keys()))),
|
||||
summary=", ".join(summary_data),
|
||||
thumb=R("icon-throttled.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(AdvancedMenu),
|
||||
title=pad_title("Advanced functions"),
|
||||
summary="Use at your own risk",
|
||||
title=pad_title(_("Advanced functions")),
|
||||
summary=_("Use at your own risk"),
|
||||
thumb=R("icon-advanced.jpg")
|
||||
))
|
||||
|
||||
@@ -175,24 +200,22 @@ def OnDeckMenu(message=None):
|
||||
:param message:
|
||||
:return:
|
||||
"""
|
||||
return mergedItemsMenu(title="Items On Deck", base_title="Items On Deck", itemGetter=get_on_deck_items)
|
||||
return mergedItemsMenu(title=_("Items On Deck"), base_title=_("Items On Deck"), itemGetter=get_on_deck_items)
|
||||
|
||||
|
||||
@route(PREFIX + '/recently_played')
|
||||
def RecentlyPlayedMenu():
|
||||
base_title = "Recently Played"
|
||||
base_title = _("Recently Played")
|
||||
oc = SubFolderObjectContainer(title2=base_title, replace_parent=True)
|
||||
|
||||
for item in [get_item(rating_key) for rating_key in Dict["last_played_items"]]:
|
||||
kind = get_item_kind_from_item(item)
|
||||
if kind not in ("episode", "movie"):
|
||||
if not item:
|
||||
continue
|
||||
|
||||
if kind == "episode":
|
||||
item_title = get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
|
||||
parent_title=item.show.title)
|
||||
else:
|
||||
item_title = get_plex_item_display_title(item, kind, section_title=None)
|
||||
if getattr(getattr(item, "__class__"), "__name__") not in ("Episode", "Movie"):
|
||||
continue
|
||||
|
||||
item_title = get_item_title(item)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
title=item_title,
|
||||
@@ -210,13 +233,13 @@ def RecentlyAddedMenu(message=None):
|
||||
:param message:
|
||||
:return:
|
||||
"""
|
||||
return SectionsMenu(base_title="Recently added", section_items_key="recently_added", ignore_options=False)
|
||||
return SectionsMenu(base_title=_("Recently added"), section_items_key="recently_added", ignore_options=False)
|
||||
|
||||
|
||||
@route(PREFIX + '/recent', force=bool)
|
||||
@debounce
|
||||
def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
title = "Items with missing subtitles"
|
||||
title = _("Items with missing subtitles")
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
|
||||
running = scheduler.is_task_running("MissingSubtitles")
|
||||
@@ -230,13 +253,13 @@ def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
if not running:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, force=True, randomize=timestamp()),
|
||||
title=u"Get items with missing subtitles",
|
||||
title=_(u"Find recent items with missing subtitles"),
|
||||
thumb=default_thumb
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, force=False, randomize=timestamp()),
|
||||
title=u"Updating, refresh here ...",
|
||||
title=_(u"Updating, refresh here ..."),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
@@ -246,7 +269,7 @@ def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
key=Callback(ItemDetailsMenu, title=title + " > " + item_title, item_title=item_title,
|
||||
rating_key=item_id),
|
||||
title=item_title,
|
||||
summary="Missing: %s" % ", ".join(l.name for l in missing_languages),
|
||||
summary=_("Missing: %s", ", ".join(display_language(l) for l in missing_languages)),
|
||||
thumb=get_item_thumb(item) or default_thumb
|
||||
))
|
||||
|
||||
@@ -304,18 +327,25 @@ def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
|
||||
"""
|
||||
is_ignored = rating_key in ignore_list[kind]
|
||||
if not sure:
|
||||
oc = SubFolderObjectContainer(no_history=True, replace_parent=True, title1="%s %s %s %s the ignore list" % (
|
||||
"Add" if not is_ignored else "Remove", ignore_list.verbose(kind), title,
|
||||
"to" if not is_ignored else "from"), title2="Are you sure?")
|
||||
t = u"Add %(kind)s %(title)s to the ignore list"
|
||||
if is_ignored:
|
||||
t = u"Remove %(kind)s %(title)s from the ignore list"
|
||||
oc = SubFolderObjectContainer(no_history=True, replace_parent=True,
|
||||
title1=_(t,
|
||||
kind=ignore_list.verbose(kind),
|
||||
title=title
|
||||
),
|
||||
title2=_("Are you sure?"))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(IgnoreMenu, kind=kind, rating_key=rating_key, title=title, sure=True,
|
||||
todo="add" if not is_ignored else "remove"),
|
||||
title=pad_title("Are you sure?"),
|
||||
title=pad_title(_("Are you sure?")),
|
||||
))
|
||||
return oc
|
||||
|
||||
rel = ignore_list[kind]
|
||||
dont_change = False
|
||||
state = None
|
||||
if todo == "remove":
|
||||
if not is_ignored:
|
||||
dont_change = True
|
||||
@@ -324,7 +354,6 @@ def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
|
||||
Log.Info("Removed %s (%s) from the ignore list", title, rating_key)
|
||||
ignore_list.remove_title(kind, rating_key)
|
||||
ignore_list.save()
|
||||
state = "removed from"
|
||||
elif todo == "add":
|
||||
if is_ignored:
|
||||
dont_change = True
|
||||
@@ -333,25 +362,29 @@ def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
|
||||
Log.Info("Added %s (%s) to the ignore list", title, rating_key)
|
||||
ignore_list.add_title(kind, rating_key, title)
|
||||
ignore_list.save()
|
||||
state = "added to"
|
||||
else:
|
||||
dont_change = True
|
||||
|
||||
if dont_change:
|
||||
return fatality(force_title=" ", header="Didn't change the ignore list", no_history=True)
|
||||
return fatality(force_title=" ", header=_("Didn't change the ignore list"), no_history=True)
|
||||
|
||||
return fatality(force_title=" ", header="%s %s the ignore list" % (title, state), no_history=True)
|
||||
t = "%(title)s added to the ignore list"
|
||||
if todo == "remove":
|
||||
t = "%(title)s removed from the ignore list"
|
||||
return fatality(force_title=" ", header=_(t,
|
||||
title=title,),
|
||||
no_history=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/sections')
|
||||
def SectionsMenu(base_title="Sections", section_items_key="all", ignore_options=True):
|
||||
def SectionsMenu(base_title=_("Sections"), section_items_key="all", ignore_options=True):
|
||||
"""
|
||||
displays the menu for all sections
|
||||
:return:
|
||||
"""
|
||||
items = get_all_items("sections")
|
||||
|
||||
return dig_tree(SubFolderObjectContainer(title2="Sections", no_cache=True, no_history=True), items, None,
|
||||
return dig_tree(SubFolderObjectContainer(title2=_("Sections"), no_cache=True, no_history=True), items, None,
|
||||
menu_determination_callback=determine_section_display, pass_kwargs={"base_title": base_title,
|
||||
"section_items_key": section_items_key,
|
||||
"ignore_options": ignore_options},
|
||||
@@ -412,7 +445,7 @@ def SectionFirstLetterMenu(rating_key, title=None, base_title=None, section_titl
|
||||
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SectionMenu, title="All", base_title=title, rating_key=rating_key, ignore_options=False),
|
||||
key=Callback(SectionMenu, title=_("All"), base_title=title, rating_key=rating_key, ignore_options=False),
|
||||
title="All"
|
||||
)
|
||||
)
|
||||
|
||||
+203
-43
@@ -2,22 +2,29 @@
|
||||
import locale
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import traceback
|
||||
|
||||
import logger
|
||||
import copy
|
||||
|
||||
from requests import HTTPError
|
||||
from item_details import ItemDetailsMenu
|
||||
from refresh_item import RefreshItem
|
||||
from menu_helpers import add_ignore_options, dig_tree, set_refresh_menu_state, \
|
||||
should_display_ignore, enable_channel_wrapper, default_thumb, debounce, ObjectContainer, SubFolderObjectContainer
|
||||
default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route, \
|
||||
extract_embedded_sub
|
||||
from main import fatality, IgnoreMenu
|
||||
from advanced import DispatchRestart
|
||||
from subzero.constants import ART, PREFIX, DEPENDENCY_MODULE_NAMES
|
||||
from support.plex_media import get_all_parts, get_embedded_subtitle_streams
|
||||
from support.scheduler import scheduler
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, df
|
||||
from support.helpers import timestamp, df, display_language
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_all_items, get_items_info, \
|
||||
get_item_kind_from_rating_key, get_item
|
||||
from support.items import get_all_items, get_items_info, get_item_kind_from_rating_key, get_item, MI_KEY, get_item_title
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.i18n import _
|
||||
|
||||
# init GUI
|
||||
ObjectContainer.art = R(ART)
|
||||
@@ -25,11 +32,7 @@ ObjectContainer.no_cache = True
|
||||
|
||||
# default thumb for DirectoryObjects
|
||||
DirectoryObject.thumb = default_thumb
|
||||
|
||||
# noinspection PyUnboundLocalVariable
|
||||
route = enable_channel_wrapper(route)
|
||||
# noinspection PyUnboundLocalVariable
|
||||
handler = enable_channel_wrapper(handler)
|
||||
Plugin.AddViewGroup("full_details", viewMode="InfoList", mediaType="items", type="list", summary=2)
|
||||
|
||||
|
||||
@route(PREFIX + '/section/firstLetter/key', deeper=bool)
|
||||
@@ -56,7 +59,7 @@ def FirstLetterMetadataMenu(rating_key, key, title=None, base_title=None, displa
|
||||
|
||||
@route(PREFIX + '/section/contents', display_items=bool)
|
||||
def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, previous_item_type=None,
|
||||
previous_rating_key=None, randomize=None):
|
||||
previous_rating_key=None, message=None, header=None, randomize=None):
|
||||
"""
|
||||
displays the contents of a section based on whether it has a deeper tree or not (movies->movie (item) list; series->series list)
|
||||
:param rating_key:
|
||||
@@ -70,49 +73,80 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
title = unicode(title)
|
||||
item_title = title
|
||||
title = base_title + " > " + title
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True, header=header, message=message,
|
||||
view_group="full_details")
|
||||
|
||||
current_kind = get_item_kind_from_rating_key(rating_key)
|
||||
|
||||
if display_items:
|
||||
timeout = 30
|
||||
show = None
|
||||
|
||||
# add back to series for season
|
||||
if current_kind == "season":
|
||||
timeout = 360
|
||||
timeout = 720
|
||||
|
||||
show = get_item(previous_rating_key)
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(MetadataMenu, rating_key=show.rating_key, title=show.title, base_title=show.section.title,
|
||||
previous_item_type="section", display_items=True, randomize=timestamp()),
|
||||
title=u"< Back to %s" % show.title,
|
||||
title=_(u"< Back to %s", show.title),
|
||||
thumb=show.thumb or default_thumb
|
||||
))
|
||||
elif current_kind == "series":
|
||||
timeout = 1800
|
||||
# it shouldn't take more than 6 minutes to scan all of a series' files and determine the force refresh
|
||||
timeout = 3600
|
||||
|
||||
items = get_all_items(key="children", value=rating_key, base="library/metadata")
|
||||
kind, deeper = get_items_info(items)
|
||||
dig_tree(oc, items, MetadataMenu,
|
||||
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": kind,
|
||||
"previous_rating_key": rating_key})
|
||||
|
||||
# we don't know exactly where we are here, only add ignore option to series
|
||||
if should_display_ignore(items, previous=previous_item_type):
|
||||
add_ignore_options(oc, "series", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
if current_kind in ("series", "season"):
|
||||
item = get_item(rating_key)
|
||||
sub_title = get_item_title(item)
|
||||
add_ignore_options(oc, current_kind, title=sub_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
# mass-extract embedded
|
||||
if current_kind == "season" and config.plex_transcoder:
|
||||
for lang in config.lang_list:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title,
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=_(u"Extract missing %(language)s embedded subtitles", language=display_language(lang)),
|
||||
summary=_("Extracts the not yet extracted embedded subtitles of all episodes for the current "
|
||||
"season with all configured default modifications")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title, force=True,
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=_(u"Extract and activate %(language)s embedded subtitles", language=display_language(lang)),
|
||||
summary=_("Extracts embedded subtitles of all episodes for the current season "
|
||||
"with all configured default modifications")
|
||||
))
|
||||
|
||||
# add refresh
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=title, refresh_kind=current_kind,
|
||||
previous_rating_key=previous_rating_key, timeout=timeout * 1000, randomize=timestamp()),
|
||||
title=u"Refresh: %s" % item_title,
|
||||
summary="Refreshes the %s, possibly searching for missing and picking up new subtitles on disk" % current_kind
|
||||
title=_(u"Refresh: %s", item_title),
|
||||
summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up "
|
||||
"new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind))
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=title, force=True,
|
||||
refresh_kind=current_kind, previous_rating_key=previous_rating_key, timeout=timeout * 1000,
|
||||
randomize=timestamp()),
|
||||
title=u"Auto-Find subtitles: %s" % item_title,
|
||||
summary="Issues a forced refresh, ignoring known subtitles and searching for new ones"
|
||||
title=_(u"Auto-Find subtitles: %s", item_title),
|
||||
summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones")
|
||||
))
|
||||
else:
|
||||
return ItemDetailsMenu(rating_key=rating_key, title=title, item_title=item_title)
|
||||
@@ -120,6 +154,70 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/season/extract_embedded/{rating_key}/{language}')
|
||||
def SeasonExtractEmbedded(**kwargs):
|
||||
rating_key = kwargs.get("rating_key")
|
||||
requested_language = kwargs.pop("language")
|
||||
with_mods = kwargs.pop("with_mods")
|
||||
item_title = kwargs.pop("item_title")
|
||||
title = kwargs.pop("title")
|
||||
force = kwargs.pop("force", False)
|
||||
|
||||
Thread.Create(season_extract_embedded, **{"rating_key": rating_key, "requested_language": requested_language,
|
||||
"with_mods": with_mods, "force": force})
|
||||
|
||||
kwargs["header"] = _("Success")
|
||||
kwargs["message"] = _(u"Extracting of embedded subtitles for %s triggered", title)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
return MetadataMenu(randomize=timestamp(), title=item_title, **kwargs)
|
||||
|
||||
|
||||
def multi_extract_embedded(stream_list, refresh=False, with_mods=False, single_thread=True):
|
||||
def execute():
|
||||
for video_part_map, plexapi_part, stream_index, language, set_current in stream_list:
|
||||
plexapi_item = video_part_map.keys()[0].plexapi_metadata["item"]
|
||||
|
||||
extract_embedded_sub(rating_key=plexapi_item.rating_key, part_id=plexapi_part.id,
|
||||
plex_item=plexapi_item, part=plexapi_part, scanned_videos=video_part_map,
|
||||
stream_index=stream_index, set_current=set_current,
|
||||
language=language, with_mods=with_mods, refresh=refresh)
|
||||
|
||||
if single_thread:
|
||||
with Thread.Lock(key="extract_embedded"):
|
||||
execute()
|
||||
else:
|
||||
execute()
|
||||
|
||||
|
||||
def season_extract_embedded(rating_key, requested_language, with_mods=False, force=False):
|
||||
# get stored subtitle info for item id
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
try:
|
||||
for data in get_all_items(key="children", value=rating_key, base="library/metadata"):
|
||||
item = get_item(data[MI_KEY])
|
||||
if item:
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
for part in get_all_parts(item):
|
||||
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(part.id, requested_language)
|
||||
if not embedded_subs or force:
|
||||
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language,
|
||||
get_forced=config.forced_only)
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
set_current = not current or force
|
||||
refresh = not current
|
||||
|
||||
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
|
||||
stream_index=str(stream.index), set_current=set_current,
|
||||
refresh=refresh, language=requested_language, with_mods=with_mods)
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
@route(PREFIX + '/ignore_list')
|
||||
def IgnoreListMenu():
|
||||
oc = SubFolderObjectContainer(title2="Ignore list", replace_parent=True)
|
||||
@@ -135,17 +233,22 @@ def IgnoreListMenu():
|
||||
def HistoryMenu():
|
||||
from support.history import get_history
|
||||
history = get_history()
|
||||
oc = SubFolderObjectContainer(title2="History", replace_parent=True)
|
||||
oc = SubFolderObjectContainer(title2=_("History"), replace_parent=True)
|
||||
|
||||
for item in history.items:
|
||||
possible_language = item.language
|
||||
language_display = item.lang_name if not possible_language else display_language(possible_language)
|
||||
|
||||
for item in history.history_items:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, title=item.title, item_title=item.item_title,
|
||||
rating_key=item.rating_key),
|
||||
title=u"%s (%s)" % (item.item_title, item.mode_verbose),
|
||||
summary=u"%s in %s (%s, score: %s), %s" % (item.lang_name, item.section_title,
|
||||
summary=_(u"%s in %s (%s, score: %s), %s", language_display, item.section_title,
|
||||
item.provider_name, item.score, df(item.time))
|
||||
))
|
||||
|
||||
history.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@@ -157,10 +260,26 @@ def RefreshMissing(randomize=None):
|
||||
return fatality(header=header, replace_parent=True)
|
||||
|
||||
|
||||
def replace_item(obj, key, replace_value):
|
||||
for k, v in obj.items():
|
||||
if isinstance(v, dict):
|
||||
obj[k] = replace_item(v, key, replace_value)
|
||||
if key in obj:
|
||||
obj[key] = replace_value
|
||||
return obj
|
||||
|
||||
|
||||
@route(PREFIX + '/ValidatePrefs', enforce_route=True)
|
||||
def ValidatePrefs():
|
||||
Core.log.setLevel(logging.DEBUG)
|
||||
|
||||
if Prefs["log_console"]:
|
||||
Core.log.addHandler(logger.console_handler)
|
||||
Log.Debug("Logging to console from now on")
|
||||
else:
|
||||
Core.log.removeHandler(logger.console_handler)
|
||||
Log.Debug("Stop logging to console")
|
||||
|
||||
# cache the channel state
|
||||
update_dict = False
|
||||
restart = False
|
||||
@@ -173,47 +292,61 @@ def ValidatePrefs():
|
||||
update_dict = True
|
||||
|
||||
elif Dict["channel_enabled"] != config.enable_channel:
|
||||
Log.Debug("Channel features %s, restarting plugin", "enabled" if config.enable_channel else "disabled")
|
||||
Log.Debug("Interface features %s, restarting plugin", "enabled" if config.enable_channel else "disabled")
|
||||
update_dict = True
|
||||
restart = True
|
||||
|
||||
if "plugin_pin_mode2" not in Dict:
|
||||
update_dict = True
|
||||
|
||||
elif Dict["plugin_pin_mode2"] != Prefs["plugin_pin_mode2"]:
|
||||
update_dict = True
|
||||
restart = True
|
||||
|
||||
if update_dict:
|
||||
Dict["channel_enabled"] = config.enable_channel
|
||||
Dict["plugin_pin_mode2"] = Prefs["plugin_pin_mode2"]
|
||||
Dict.Save()
|
||||
|
||||
if restart:
|
||||
scheduler.stop()
|
||||
DispatchRestart()
|
||||
return
|
||||
|
||||
scheduler.setup_tasks()
|
||||
scheduler.clear_task_data("MissingSubtitles")
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
if Prefs["log_console"]:
|
||||
Core.log.addHandler(logger.console_handler)
|
||||
Log.Debug("Logging to console from now on")
|
||||
else:
|
||||
Core.log.removeHandler(logger.console_handler)
|
||||
Log.Debug("Stop logging to console")
|
||||
|
||||
Log.Debug("Validate Prefs called.")
|
||||
|
||||
# SZ config debug
|
||||
Log.Debug("--- SZ Config-Debug ---")
|
||||
for attr in [
|
||||
"app_support_path", "data_path", "data_items_path", "enable_agent",
|
||||
"version", "app_support_path", "data_path", "data_items_path", "enable_agent",
|
||||
"enable_channel", "permissions_ok", "missing_permissions", "fs_encoding",
|
||||
"subtitle_destination_folder"]:
|
||||
Log.Debug("config.%s: %s", attr, getattr(config, attr))
|
||||
"subtitle_destination_folder", "new_style_cache", "dbm_supported", "lang_list", "providers",
|
||||
"plex_transcoder", "refiner_settings", "unrar", "adv_cfg_path"]:
|
||||
|
||||
value = getattr(config, attr)
|
||||
if isinstance(value, dict):
|
||||
d = replace_item(copy.deepcopy(value), "api_key", "xxxxxxxxxxxxxxxxxxxxxxxxx")
|
||||
Log.Debug("config.%s: %s", attr, d)
|
||||
continue
|
||||
|
||||
Log.Debug("config.%s: %s", attr, value)
|
||||
|
||||
for attr in ["plugin_log_path", "server_log_path"]:
|
||||
value = getattr(config, attr)
|
||||
access = os.access(value, os.R_OK)
|
||||
if Core.runtime.os == "Windows":
|
||||
try:
|
||||
f = open(value, "r")
|
||||
f.read(1)
|
||||
f.close()
|
||||
except:
|
||||
access = False
|
||||
|
||||
if value:
|
||||
access = os.access(value, os.R_OK)
|
||||
if Core.runtime.os == "Windows":
|
||||
try:
|
||||
f = open(value, "r")
|
||||
f.read(1)
|
||||
f.close()
|
||||
except:
|
||||
access = False
|
||||
|
||||
Log.Debug("config.%s: %s (accessible: %s)", attr, value, access)
|
||||
|
||||
@@ -221,10 +354,36 @@ def ValidatePrefs():
|
||||
"subtitles.save.filesystem", ]:
|
||||
Log.Debug("Pref.%s: %s", attr, Prefs[attr])
|
||||
|
||||
# debug drone
|
||||
if "sonarr" in config.refiner_settings or "radarr" in config.refiner_settings:
|
||||
Log.Debug("----- Connections -----")
|
||||
try:
|
||||
from subliminal_patch.refiners.drone import SonarrClient, RadarrClient
|
||||
for key, cls in [("sonarr", SonarrClient), ("radarr", RadarrClient)]:
|
||||
if key in config.refiner_settings:
|
||||
cname = key.capitalize()
|
||||
try:
|
||||
status = cls(**config.refiner_settings[key]).status()
|
||||
except HTTPError, e:
|
||||
if e.response.status_code == 401:
|
||||
Log.Debug("%s: NOT WORKING - BAD API KEY", cname)
|
||||
else:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
|
||||
except:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
|
||||
else:
|
||||
if status and status["version"]:
|
||||
Log.Debug("%s: OK - %s", cname, status["version"])
|
||||
else:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname)
|
||||
except:
|
||||
Log.Debug("Something went really wrong when evaluating Sonarr/Radarr: %s", traceback.format_exc())
|
||||
|
||||
# fixme: check existance of and os access of logs
|
||||
Log.Debug("----- Environment -----")
|
||||
Log.Debug("Platform: %s", Core.runtime.platform)
|
||||
Log.Debug("OS: %s", Core.runtime.os)
|
||||
Log.Debug("----- Environment -----")
|
||||
Log.Debug("Python: %s", platform.python_version())
|
||||
for key, value in os.environ.iteritems():
|
||||
if key.startswith("PLEX") or key.startswith("SZ_"):
|
||||
if "TOKEN" in key:
|
||||
@@ -239,5 +398,6 @@ def ValidatePrefs():
|
||||
Log.Debug("Setting log-level to %s", Prefs["log_level"])
|
||||
logger.register_logging_handler(DEPENDENCY_MODULE_NAMES, level=Prefs["log_level"])
|
||||
Core.log.setLevel(logging.getLevelName(Prefs["log_level"]))
|
||||
os.environ['U1pfT01EQl9LRVk'] = '789CF30DAC2C8B0AF433F5C9AD34290A712DF30D7135F12D0FB3E502006FDE081E'
|
||||
|
||||
return
|
||||
|
||||
@@ -1,25 +1,34 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
import types
|
||||
import datetime
|
||||
import subprocess
|
||||
import os
|
||||
import operator
|
||||
|
||||
from support.items import get_kind, get_item_thumb
|
||||
from support.helpers import get_video_display_title
|
||||
from func import enable_channel_wrapper, route_wrapper, register_route_function
|
||||
from subzero.language import Language
|
||||
from support.i18n import is_localized_string, _
|
||||
from support.items import get_kind, get_item_thumb, get_item, get_item_kind_from_item, refresh_item
|
||||
from support.helpers import get_video_display_title, pad_title, display_language, quote_args, is_stream_forced
|
||||
from support.ignore import ignore_list
|
||||
from support.lib import get_intent
|
||||
from support.config import config
|
||||
from subzero.constants import ICON_SUB, ICON
|
||||
from support.plex_media import get_part, get_plex_metadata
|
||||
from support.scheduler import scheduler
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles
|
||||
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
|
||||
default_thumb = R(ICON_SUB)
|
||||
main_icon = ICON if not config.is_development else "icon-dev.jpg"
|
||||
|
||||
|
||||
def should_display_ignore(items, previous=None):
|
||||
kind = get_kind(items)
|
||||
return items and (
|
||||
(kind in ("show", "season")) or
|
||||
(kind == "episode" and previous != "season")
|
||||
)
|
||||
# noinspection PyUnboundLocalVariable
|
||||
route = route_wrapper
|
||||
# noinspection PyUnboundLocalVariable
|
||||
handler = enable_channel_wrapper(handler)
|
||||
|
||||
|
||||
def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None, add_kind=True):
|
||||
@@ -41,10 +50,15 @@ def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None
|
||||
|
||||
in_list = rating_key in ignore_list[use_kind]
|
||||
|
||||
t = u"Ignore %(kind)s \"%(title)s\""
|
||||
if in_list:
|
||||
t = u"Un-ignore %(kind)s \"%(title)s\""
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(callback_menu, kind=use_kind, rating_key=rating_key, title=title),
|
||||
title=u"%s %s \"%s\"" % (
|
||||
"Un-Ignore" if in_list else "Ignore", ignore_list.verbose(kind) if add_kind else "", unicode(title))
|
||||
key=Callback(callback_menu, kind=use_kind, sure=False, todo="not_set", rating_key=rating_key, title=title),
|
||||
title=_(t,
|
||||
kind=ignore_list.verbose(kind) if add_kind else "",
|
||||
title=unicode(title))
|
||||
)
|
||||
)
|
||||
|
||||
@@ -66,7 +80,7 @@ def dig_tree(oc, items, menu_callback, menu_determination_callback=None, force_r
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(menu_callback or menu_determination_callback(kind, item, pass_kwargs=pass_kwargs), title=title,
|
||||
rating_key=force_rating_key or key, **add_kwargs),
|
||||
title=title, thumb=thumb, summary=summary
|
||||
title=pad_title(title) if kind in ("show", "season") else title, thumb=thumb, summary=summary
|
||||
))
|
||||
return oc
|
||||
|
||||
@@ -84,8 +98,8 @@ def set_refresh_menu_state(state_or_media, media_type="movies"):
|
||||
Dict["current_refresh_state"] = None
|
||||
return
|
||||
|
||||
if isinstance(state_or_media, types.StringTypes):
|
||||
Dict["current_refresh_state"] = state_or_media
|
||||
if isinstance(state_or_media, types.StringTypes) or is_localized_string(state_or_media):
|
||||
Dict["current_refresh_state"] = unicode(state_or_media)
|
||||
return
|
||||
|
||||
media = state_or_media
|
||||
@@ -96,14 +110,19 @@ def set_refresh_menu_state(state_or_media, media_type="movies"):
|
||||
for episode in media.seasons[season].episodes:
|
||||
ep = media.seasons[season].episodes[episode]
|
||||
media_id = ep.id
|
||||
title = get_video_display_title("show", ep.title, parent_title=media.title, season=int(season), episode=int(episode))
|
||||
title = get_video_display_title(_("show"), ep.title, parent_title=media.title, season=int(season), episode=int(episode))
|
||||
else:
|
||||
title = get_video_display_title("movie", media.title)
|
||||
title = get_video_display_title(_("movie"), media.title)
|
||||
|
||||
intent = get_intent()
|
||||
force_refresh = intent.get("force", media_id)
|
||||
|
||||
Dict["current_refresh_state"] = u"%sRefreshing %s" % ("Force-" if force_refresh else "", unicode(title))
|
||||
t = u"Refreshing %(title)s"
|
||||
if force_refresh:
|
||||
t = u"Force-refreshing %(title)s"
|
||||
|
||||
Dict["current_refresh_state"] = unicode(_(t,
|
||||
title=unicode(title)))
|
||||
|
||||
|
||||
def get_item_task_data(task_name, rating_key, language):
|
||||
@@ -112,55 +131,80 @@ def get_item_task_data(task_name, rating_key, language):
|
||||
return search_results.get(language)
|
||||
|
||||
|
||||
def enable_channel_wrapper(func):
|
||||
"""
|
||||
returns the original wrapper :func: (route or handler) if applicable, else the plain to-be-wrapped function
|
||||
:param func: original wrapper
|
||||
:return: original wrapper or wrapped function
|
||||
"""
|
||||
def noop(*args, **kwargs):
|
||||
def inner(*a, **k):
|
||||
"""
|
||||
:param a: args
|
||||
:param k: kwargs
|
||||
:return: originally to-be-wrapped function
|
||||
"""
|
||||
return a[0]
|
||||
|
||||
return inner
|
||||
|
||||
def wrap(*args, **kwargs):
|
||||
enforce_route = kwargs.pop("enforce_route", None)
|
||||
return (func if config.enable_channel or enforce_route else noop)(*args, **kwargs)
|
||||
|
||||
return wrap
|
||||
|
||||
|
||||
def debounce(func):
|
||||
"""
|
||||
prevent func from being called twice with the same arguments
|
||||
:param func:
|
||||
:return:
|
||||
"""
|
||||
def get_lookup_key(args, kwargs):
|
||||
func_name = list(args).pop(0).__name__
|
||||
return tuple([func_name] + [(key, value) for key, value in kwargs.iteritems()])
|
||||
|
||||
def wrap(*args, **kwargs):
|
||||
if "randomize" in kwargs:
|
||||
if not "menu_history" in Dict:
|
||||
Dict["menu_history"] = {}
|
||||
func.debounce = True
|
||||
|
||||
key = get_lookup_key([func] + list(args), kwargs)
|
||||
if key in Dict["menu_history"]:
|
||||
Log.Debug("not triggering %s twice with %s, %s" % (func, args, kwargs))
|
||||
return ObjectContainer()
|
||||
else:
|
||||
Dict["menu_history"][key] = datetime.datetime.now() + datetime.timedelta(days=1)
|
||||
Dict.Save()
|
||||
return func(*args, **kwargs)
|
||||
return func
|
||||
|
||||
return wrap
|
||||
|
||||
def extract_embedded_sub(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.pop("part_id")
|
||||
stream_index = kwargs.pop("stream_index")
|
||||
with_mods = kwargs.pop("with_mods", False)
|
||||
language = Language.fromietf(kwargs.pop("language"))
|
||||
refresh = kwargs.pop("refresh", True)
|
||||
set_current = kwargs.pop("set_current", True)
|
||||
|
||||
plex_item = kwargs.pop("plex_item", get_item(rating_key))
|
||||
item_type = get_item_kind_from_item(plex_item)
|
||||
part = kwargs.pop("part", get_part(plex_item, part_id))
|
||||
scanned_videos = kwargs.pop("scanned_videos", None)
|
||||
|
||||
any_successful = False
|
||||
|
||||
if part:
|
||||
if not scanned_videos:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
scanned_videos = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if str(stream.index) == stream_index:
|
||||
is_forced = is_stream_forced(stream)
|
||||
bn = os.path.basename(part.file)
|
||||
|
||||
set_refresh_menu_state(_(u"Extracting subtitle %(stream_index)s of %(filename)s",
|
||||
stream_index=stream_index,
|
||||
filename=bn))
|
||||
Log.Info(u"Extracting stream %s (%s) of %s", stream_index, display_language(language), bn)
|
||||
|
||||
out_codec = stream.codec if stream.codec != "mov_text" else "srt"
|
||||
|
||||
args = [
|
||||
config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", out_codec, "-"
|
||||
]
|
||||
output = None
|
||||
try:
|
||||
output = subprocess.check_output(quote_args(args), stderr=subprocess.PIPE, shell=True)
|
||||
except:
|
||||
Log.Error("Extraction failed: %s", traceback.format_exc())
|
||||
|
||||
if output:
|
||||
subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None)
|
||||
subtitle.content = output
|
||||
subtitle.provider_name = "embedded"
|
||||
subtitle.id = "stream_%s" % stream_index
|
||||
subtitle.score = 0
|
||||
subtitle.set_encoding("utf-8")
|
||||
|
||||
# fixme: speedup video; only video.name is needed
|
||||
save_successful = save_subtitles(scanned_videos, {scanned_videos.keys()[0]: [subtitle]}, mode="m",
|
||||
set_current=set_current, is_forced=is_forced)
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
if save_successful and refresh:
|
||||
refresh_item(rating_key)
|
||||
|
||||
any_successful = True
|
||||
|
||||
return any_successful
|
||||
|
||||
|
||||
class SZObjectContainer(ObjectContainer):
|
||||
@@ -194,10 +238,10 @@ class SubFolderObjectContainer(ObjectContainer):
|
||||
from support.helpers import pad_title, timestamp
|
||||
self.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("<< Back to home"),
|
||||
summary="Current state: %s; Last state: %s" % (
|
||||
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
|
||||
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
|
||||
title=pad_title(_("<< Back to home")),
|
||||
summary=_("Current state: %s; Last state: %s",
|
||||
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
|
||||
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
|
||||
)
|
||||
))
|
||||
|
||||
@@ -215,4 +259,4 @@ class ZipObject(ObjectClass):
|
||||
self.SetHeader("Content-Disposition",
|
||||
'attachment; filename="' + datetime.datetime.now().strftime("Logs_%y%m%d_%H-%M-%S.zip")
|
||||
+ '"')
|
||||
return self.zipdata
|
||||
return self.zipdata
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
# coding=utf-8
|
||||
|
||||
from subzero.constants import PREFIX
|
||||
from menu_helpers import debounce, set_refresh_menu_state
|
||||
from menu_helpers import debounce, set_refresh_menu_state, route
|
||||
from support.items import refresh_item
|
||||
from support.helpers import timestamp
|
||||
from support.i18n import _
|
||||
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}')
|
||||
@route(PREFIX + '/item/refresh/{rating_key}/force', force=True)
|
||||
@route(PREFIX + '/item/refresh/{rating_key}')
|
||||
@debounce
|
||||
def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=False, refresh_kind=None,
|
||||
previous_rating_key=None, timeout=8000, randomize=None, trigger=True):
|
||||
@@ -14,9 +16,17 @@ def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=Fal
|
||||
from interface.main import fatality
|
||||
header = " "
|
||||
if trigger:
|
||||
set_refresh_menu_state(u"Triggering %sRefresh for %s" % ("Force-" if force else "", item_title))
|
||||
t = u"Triggering refresh for %(title)s"
|
||||
if force:
|
||||
u"Triggering forced refresh for %(title)s"
|
||||
set_refresh_menu_state(_(t,
|
||||
title=item_title))
|
||||
Thread.Create(refresh_item, rating_key=rating_key, force=force, refresh_kind=refresh_kind,
|
||||
parent_rating_key=previous_rating_key, timeout=int(timeout))
|
||||
|
||||
header = u"%s of item %s triggered" % ("Refresh" if not force else "Forced-refresh", rating_key)
|
||||
t = u"Refresh of item %(item_id)s triggered"
|
||||
if force:
|
||||
t = u"Forced refresh of item %(item_id)s triggered"
|
||||
header = _(t,
|
||||
item_id=rating_key)
|
||||
return fatality(randomize=timestamp(), header=header, replace_parent=True)
|
||||
|
||||
@@ -3,22 +3,24 @@
|
||||
import traceback
|
||||
import types
|
||||
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, route
|
||||
from subzero.modification import registry as mod_registry, SubtitleModifications
|
||||
from subzero.constants import PREFIX
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.scanning import scan_videos
|
||||
from support.helpers import timestamp, pad_title
|
||||
from support.items import get_current_sub, set_mods_for_part
|
||||
from support.i18n import _
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleModificationsMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
lang_instance = Language.fromietf(language)
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
@@ -29,8 +31,8 @@ def SubtitleModificationsMenu(**kwargs):
|
||||
from interface.item_details import SubtitleOptionsMenu
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleOptionsMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"< Back to subtitle options for: %s" % kwargs["title"],
|
||||
summary=kwargs["current_data"],
|
||||
title=_(u"< Back to subtitle options for: %s", kwargs["title"]),
|
||||
summary=unicode(kwargs["current_data"]),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
@@ -41,47 +43,57 @@ def SubtitleModificationsMenu(**kwargs):
|
||||
if mod.exclusive and identifier in current_mods:
|
||||
continue
|
||||
|
||||
if mod.languages and lang_instance not in mod.languages:
|
||||
continue
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=identifier, mode="add", randomize=timestamp(), **kwargs),
|
||||
title=pad_title(mod.description), summary=mod.long_description or ""
|
||||
title=pad_title(_(mod.description)), summary=_(mod.long_description) or ""
|
||||
))
|
||||
|
||||
fps_mod = SubtitleModifications.get_mod_class("change_FPS")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleFPSModMenu, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(fps_mod.description), summary=fps_mod.long_description or ""
|
||||
title=pad_title(_(fps_mod.description)), summary=_(fps_mod.long_description) or ""
|
||||
))
|
||||
|
||||
shift_mod = SubtitleModifications.get_mod_class("shift_offset")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(shift_mod.description), summary=shift_mod.long_description or ""
|
||||
title=pad_title(_(shift_mod.description)), summary=_(shift_mod.long_description) or ""
|
||||
))
|
||||
|
||||
color_mod = SubtitleModifications.get_mod_class("color")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleColorModMenu, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(color_mod.description), summary=color_mod.long_description or ""
|
||||
title=pad_title(_(color_mod.description)), summary=_(color_mod.long_description) or ""
|
||||
))
|
||||
|
||||
if current_mods:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=None, mode="remove_last", randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Remove last applied mod (%s)" % current_mods[-1]),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
|
||||
title=pad_title(_("Remove last applied mod (%s)", current_mods[-1])),
|
||||
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleListMods, randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Manage applied mods"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods))
|
||||
title=pad_title(_("Manage applied mods")),
|
||||
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods))
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleReapplyMods, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(_("Reapply applied mods")),
|
||||
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=None, mode="clear", randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Restore original version"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
|
||||
title=pad_title(_("Restore original version")),
|
||||
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@@ -97,28 +109,31 @@ def SubtitleFPSModMenu(**kwargs):
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modification menu"
|
||||
title=_("< Back to subtitle modification menu")
|
||||
))
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
target_fps = plex_part.fps
|
||||
|
||||
for fps in ["23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
|
||||
for fps in ["23.980", "23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
|
||||
if float(fps) == float(target_fps):
|
||||
continue
|
||||
|
||||
if float(fps) > float(target_fps):
|
||||
indicator = "subs constantly getting faster"
|
||||
indicator = _("subs constantly getting faster")
|
||||
else:
|
||||
indicator = "subs constantly getting slower"
|
||||
indicator = _("subs constantly getting slower")
|
||||
|
||||
mod_ident = SubtitleModifications.get_mod_signature("change_FPS", **{"from": fps, "to": target_fps})
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
|
||||
title="%s fps -> %s fps (%s)" % (fps, target_fps, indicator)
|
||||
title=_("%(from_fps)s fps -> %(to_fps)s fps (%(slower_or_faster_indicator)s)",
|
||||
from_fps=fps,
|
||||
to_fps=target_fps,
|
||||
slower_or_faster_indicator=indicator)
|
||||
))
|
||||
|
||||
return oc
|
||||
@@ -136,13 +151,13 @@ def SubtitleShiftModUnitMenu(**kwargs):
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modifications"
|
||||
title=_("< Back to subtitle modifications")
|
||||
))
|
||||
|
||||
for unit, title in POSSIBLE_UNITS:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleShiftModMenu, unit=unit, randomize=timestamp(), **kwargs),
|
||||
title="Adjust by %s" % title
|
||||
title=_("Adjust by %(time_and_unit)s", time_and_unit=title)
|
||||
))
|
||||
|
||||
return oc
|
||||
@@ -159,16 +174,16 @@ def SubtitleShiftModMenu(unit=None, **kwargs):
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to unit selection"
|
||||
title=_("< Back to unit selection")
|
||||
))
|
||||
|
||||
rng = []
|
||||
if unit == "h":
|
||||
rng = range(-10, 11)
|
||||
rng = list(reversed(range(-10, 0))) + list(reversed(range(1, 11)))
|
||||
elif unit in ("m", "s"):
|
||||
rng = range(-15, 15)
|
||||
rng = list(reversed(range(-15, 0))) + list(reversed(range(1, 16)))
|
||||
elif unit == "ms":
|
||||
rng = range(-900, 1000, 100)
|
||||
rng = list(reversed(range(-900, 0, 100))) + list(reversed(range(100, 1000, 100)))
|
||||
|
||||
for i in rng:
|
||||
if i == 0:
|
||||
@@ -193,7 +208,7 @@ def SubtitleColorModMenu(**kwargs):
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modification menu"
|
||||
title=_("< Back to subtitle modification menu")
|
||||
))
|
||||
|
||||
for color, code in color_mod.colors.iteritems():
|
||||
@@ -225,6 +240,22 @@ def SubtitleSetMods(mods=None, mode=None, **kwargs):
|
||||
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_reapply_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleReapplyMods(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
lang_a2 = kwargs["language"]
|
||||
item_type = kwargs["item_type"]
|
||||
|
||||
language = Language.fromietf(lang_a2)
|
||||
|
||||
set_mods_for_part(rating_key, part_id, language, item_type, [], mode="add")
|
||||
|
||||
kwargs.pop("randomize")
|
||||
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_list_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleListMods(**kwargs):
|
||||
@@ -239,13 +270,15 @@ def SubtitleListMods(**kwargs):
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modifications"
|
||||
title=_("< Back to subtitle modifications")
|
||||
))
|
||||
|
||||
for identifier in current_sub.mods:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=identifier, mode="remove", randomize=timestamp(), **kwargs),
|
||||
title="Remove: %s" % identifier
|
||||
title=_("Remove: %(mod_name)s", mod_name=identifier)
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
|
||||
return oc
|
||||
@@ -13,6 +13,12 @@ import lib
|
||||
|
||||
sys.modules["support.lib"] = lib
|
||||
|
||||
import i18n
|
||||
|
||||
sys.modules["support.i18n"] = i18n
|
||||
|
||||
helpers._ = i18n._
|
||||
|
||||
import plex_media
|
||||
sys.modules["support.plex_media"] = plex_media
|
||||
|
||||
@@ -28,22 +34,25 @@ import items
|
||||
|
||||
sys.modules["support.items"] = items
|
||||
|
||||
import missing_subtitles
|
||||
|
||||
sys.modules["support.missing_subtitles"] = missing_subtitles
|
||||
|
||||
import scheduler
|
||||
|
||||
sys.modules["support.scheduler"] = scheduler
|
||||
|
||||
import tasks
|
||||
|
||||
sys.modules["support.tasks"] = tasks
|
||||
|
||||
import storage
|
||||
|
||||
sys.modules["support.storage"] = storage
|
||||
|
||||
import scanning
|
||||
sys.modules["support.scanning"] = scanning
|
||||
|
||||
import missing_subtitles
|
||||
|
||||
sys.modules["support.missing_subtitles"] = missing_subtitles
|
||||
|
||||
import tasks
|
||||
|
||||
sys.modules["support.tasks"] = tasks
|
||||
|
||||
import ignore
|
||||
|
||||
sys.modules["support.ignore"] = ignore
|
||||
@@ -58,3 +67,6 @@ sys.modules["support.data"] = data
|
||||
|
||||
import activities
|
||||
sys.modules["support.activities"] = activities
|
||||
|
||||
import download
|
||||
sys.modules["support.download"] = download
|
||||
@@ -3,25 +3,26 @@ from wraptor.decorators import throttle
|
||||
from config import config
|
||||
from items import get_item, get_item_kind_from_item, refresh_item
|
||||
|
||||
from plex_activity import Activity
|
||||
from plex_activity.sources.s_logging.main import Logging as Activity_Logging
|
||||
Activity = None
|
||||
try:
|
||||
from plex_activity import Activity
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
class PlexActivityManager(object):
|
||||
def start(self):
|
||||
activity_sources_enabled = None
|
||||
|
||||
if not Activity:
|
||||
return
|
||||
|
||||
if config.plex_token:
|
||||
from plex import Plex
|
||||
Plex.configuration.defaults.authentication(config.plex_token)
|
||||
activity_sources_enabled = ["websocket"]
|
||||
Activity.on('websocket.playing', self.on_playing)
|
||||
|
||||
elif config.server_log_path:
|
||||
Activity_Logging.add_hint(config.server_log_path, None)
|
||||
activity_sources_enabled = ["logging"]
|
||||
Activity.on('logging.playing', self.on_playing)
|
||||
|
||||
if activity_sources_enabled:
|
||||
Activity.start(activity_sources_enabled)
|
||||
|
||||
@@ -38,6 +39,13 @@ class PlexActivityManager(object):
|
||||
return
|
||||
|
||||
rating_key = info["ratingKey"]
|
||||
|
||||
# only use integer based rating keys
|
||||
try:
|
||||
int(rating_key)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
if rating_key in Dict["last_played_items"] and rating_key != Dict["last_played_items"][0]:
|
||||
# shift last played
|
||||
Dict["last_played_items"].insert(0,
|
||||
@@ -56,10 +64,12 @@ class PlexActivityManager(object):
|
||||
|
||||
debug_msg = "Started playing %s. Refreshing it." % rating_key
|
||||
|
||||
key_to_refresh = None
|
||||
if config.activity_mode in ["refresh", "next_episode", "hybrid"]:
|
||||
# todo: cleanup debug messages for hybrid-plus
|
||||
|
||||
keys_to_refresh = []
|
||||
if config.activity_mode in ["refresh", "next_episode", "hybrid", "hybrid-plus"]:
|
||||
# next episode or next episode and current movie
|
||||
if config.activity_mode in ["next_episode", "hybrid"]:
|
||||
if config.activity_mode in ["next_episode", "hybrid", "hybrid-plus"]:
|
||||
plex_item = get_item(rating_key)
|
||||
if not plex_item:
|
||||
Log.Warn("Can't determine media type of %s, skipping" % rating_key)
|
||||
@@ -67,20 +77,24 @@ class PlexActivityManager(object):
|
||||
|
||||
if get_item_kind_from_item(plex_item) == "episode":
|
||||
next_ep = self.get_next_episode(rating_key)
|
||||
if config.activity_mode == "hybrid-plus":
|
||||
keys_to_refresh.append(rating_key)
|
||||
if next_ep:
|
||||
key_to_refresh = next_ep.rating_key
|
||||
keys_to_refresh.append(next_ep.rating_key)
|
||||
debug_msg = "Started playing %s. Refreshing next episode (%s, S%02iE%02i)." % \
|
||||
(rating_key, next_ep.rating_key, int(next_ep.season.index), int(next_ep.index))
|
||||
|
||||
else:
|
||||
if config.activity_mode == "hybrid":
|
||||
key_to_refresh = rating_key
|
||||
keys_to_refresh.append(rating_key)
|
||||
elif config.activity_mode == "refresh":
|
||||
key_to_refresh = rating_key
|
||||
keys_to_refresh.append(rating_key)
|
||||
|
||||
if key_to_refresh:
|
||||
if keys_to_refresh:
|
||||
Log.Debug(debug_msg)
|
||||
refresh_item(key_to_refresh)
|
||||
Log.Debug("Refreshing %s", keys_to_refresh)
|
||||
for key in keys_to_refresh:
|
||||
refresh_item(key)
|
||||
|
||||
def get_next_episode(self, rating_key):
|
||||
plex_item = get_item(rating_key)
|
||||
|
||||
+508
-60
@@ -1,21 +1,41 @@
|
||||
# coding=utf-8
|
||||
|
||||
import copy
|
||||
import os
|
||||
import re
|
||||
import inspect
|
||||
|
||||
import sys
|
||||
import rarfile
|
||||
import jstyleson
|
||||
import datetime
|
||||
|
||||
import subliminal
|
||||
import subliminal_patch
|
||||
from babelfish import Language
|
||||
import subzero.constants
|
||||
import lib
|
||||
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError
|
||||
|
||||
from subliminal_patch.core import is_windows_special_path
|
||||
from whichdb import whichdb
|
||||
|
||||
from subliminal_patch.exceptions import TooManyRequests
|
||||
from subzero.language import Language
|
||||
from subliminal.cli import MutexLock
|
||||
from subzero.lib.io import FileIO, get_viable_encoding
|
||||
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW
|
||||
from subzero.lib.dict import Dicked
|
||||
from subzero.util import get_root_path
|
||||
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW, MEDIA_TYPE_TO_STRING
|
||||
from dogpile.cache.region import register_backend as register_cache_backend
|
||||
from lib import Plex
|
||||
from helpers import check_write_permissions, cast_bool
|
||||
from helpers import check_write_permissions, cast_bool, cast_int, mswindows
|
||||
|
||||
SUBTITLE_EXTS = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'txt', 'psb']
|
||||
register_cache_backend(
|
||||
"subzero.cache.file", "subzero.cache_backends.file", "SZFileBackend")
|
||||
|
||||
SUBTITLE_EXTS_BASE = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'psb',
|
||||
'vtt']
|
||||
SUBTITLE_EXTS = SUBTITLE_EXTS_BASE + ["txt"]
|
||||
|
||||
TEXT_SUBTITLE_EXTS = ("srt", "ass", "ssa", "vtt", "mov_text")
|
||||
VIDEO_EXTS = ['3g2', '3gp', 'asf', 'asx', 'avc', 'avi', 'avs', 'bivx', 'bup', 'divx', 'dv', 'dvr-ms', 'evo', 'fli',
|
||||
'flv',
|
||||
'm2t', 'm2ts', 'm2v', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'mts', 'nsv', 'nuv', 'ogm', 'ogv', 'tp',
|
||||
@@ -36,7 +56,27 @@ def int_or_default(s, default):
|
||||
return default
|
||||
|
||||
|
||||
VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable)
|
||||
|
||||
PROVIDER_THROTTLE_MAP = {
|
||||
"default": {
|
||||
TooManyRequests: (datetime.timedelta(hours=1), "1 hour"),
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
|
||||
ServiceUnavailable: (datetime.timedelta(minutes=20), "20 minutes"),
|
||||
},
|
||||
"opensubtitles": {
|
||||
TooManyRequests: (datetime.timedelta(hours=3), "3 hours"),
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
|
||||
},
|
||||
"addic7ed": {
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
|
||||
TooManyRequests: (datetime.timedelta(minutes=5), "5 minutes"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class Config(object):
|
||||
libraries_root = None
|
||||
plugin_info = ""
|
||||
version = None
|
||||
full_version = None
|
||||
@@ -48,6 +88,13 @@ class Config(object):
|
||||
universal_plex_token = None
|
||||
plex_token = None
|
||||
is_development = False
|
||||
dbm_supported = False
|
||||
pms_request_timeout = 15
|
||||
low_impact_mode = False
|
||||
new_style_cache = False
|
||||
pack_cache_dir = None
|
||||
advanced = None
|
||||
debug_i18n = False
|
||||
|
||||
enable_channel = True
|
||||
enable_agent = True
|
||||
@@ -56,10 +103,8 @@ class Config(object):
|
||||
lock_advanced_menu = False
|
||||
locked = False
|
||||
pin_valid_minutes = 10
|
||||
lang_list = None
|
||||
subtitle_destination_folder = None
|
||||
providers = None
|
||||
provider_settings = None
|
||||
subtitle_formats = None
|
||||
max_recent_items_per_library = 200
|
||||
permissions_ok = False
|
||||
missing_permissions = None
|
||||
@@ -70,26 +115,43 @@ class Config(object):
|
||||
sections = None
|
||||
enabled_sections = None
|
||||
remove_hi = False
|
||||
remove_tags = False
|
||||
fix_ocr = False
|
||||
fix_common = False
|
||||
reverse_rtl = False
|
||||
colors = ""
|
||||
enforce_encoding = False
|
||||
chmod = None
|
||||
forced_only = False
|
||||
exotic_ext = False
|
||||
treat_und_as_first = False
|
||||
subtitle_sub_dir = None, None
|
||||
ext_match_strictness = False
|
||||
default_mods = None
|
||||
debug_mods = False
|
||||
react_to_activities = False
|
||||
activity_mode = None
|
||||
subtitles_save_to = None
|
||||
no_refresh = False
|
||||
plex_transcoder = None
|
||||
refiner_settings = None
|
||||
exact_filenames = False
|
||||
only_one = False
|
||||
embedded_auto_extract = False
|
||||
ietf_as_alpha3 = False
|
||||
unrar = None
|
||||
adv_cfg_path = None
|
||||
|
||||
store_recently_played_amount = 20
|
||||
store_recently_played_amount = 40
|
||||
|
||||
initialized = False
|
||||
|
||||
def initialize(self):
|
||||
self.libraries_root = os.path.abspath(os.path.join(get_root_path(), ".."))
|
||||
self.init_libraries()
|
||||
|
||||
if is_windows_special_path:
|
||||
Log.Warn("The Plex metadata folder is residing inside a folder with special characters. "
|
||||
"Multithreading and playback activities will be disabled.")
|
||||
|
||||
self.fs_encoding = get_viable_encoding()
|
||||
self.plugin_info = self.get_plugin_info()
|
||||
self.is_development = self.get_dev_mode()
|
||||
@@ -101,18 +163,25 @@ class Config(object):
|
||||
self.data_items_path = os.path.join(self.data_path, "DataItems")
|
||||
self.universal_plex_token = self.get_universal_plex_token()
|
||||
self.plex_token = os.environ.get("PLEXTOKEN", self.universal_plex_token)
|
||||
subzero.constants.DEFAULT_TIMEOUT = lib.DEFAULT_TIMEOUT = self.pms_request_timeout = \
|
||||
min(cast_int(Prefs['pms_request_timeout'], 15), 45)
|
||||
self.low_impact_mode = cast_bool(Prefs['low_impact_mode'])
|
||||
self.new_style_cache = cast_bool(Prefs['new_style_cache'])
|
||||
self.pack_cache_dir = self.get_pack_cache_dir()
|
||||
self.advanced = self.get_advanced_config()
|
||||
self.debug_i18n = self.advanced.debug_i18n
|
||||
|
||||
os.environ["SZ_USER_AGENT"] = self.get_user_agent()
|
||||
|
||||
self.setup_proxies()
|
||||
self.set_plugin_mode()
|
||||
self.set_plugin_lock()
|
||||
self.set_activity_modes()
|
||||
self.parse_rename_mode()
|
||||
|
||||
self.lang_list = self.get_lang_list()
|
||||
self.subtitle_destination_folder = self.get_subtitle_destination_folder()
|
||||
self.subtitle_formats = self.get_subtitle_formats()
|
||||
self.forced_only = cast_bool(Prefs["subtitles.only_foreign"])
|
||||
self.providers = self.get_providers()
|
||||
self.provider_settings = self.get_provider_settings()
|
||||
self.max_recent_items_per_library = int_or_default(Prefs["scheduler.max_recent_items_per_library"], 2000)
|
||||
self.sections = list(Plex["library"].sections())
|
||||
self.missing_permissions = []
|
||||
@@ -122,40 +191,161 @@ class Config(object):
|
||||
self.permissions_ok = self.check_permissions()
|
||||
self.notify_executable = self.check_notify_executable()
|
||||
self.remove_hi = cast_bool(Prefs['subtitles.remove_hi'])
|
||||
self.remove_tags = cast_bool(Prefs['subtitles.remove_tags'])
|
||||
self.fix_ocr = cast_bool(Prefs['subtitles.fix_ocr'])
|
||||
self.fix_common = cast_bool(Prefs['subtitles.fix_common'])
|
||||
self.reverse_rtl = cast_bool(Prefs['subtitles.reverse_rtl'])
|
||||
self.colors = Prefs['subtitles.colors'] if Prefs['subtitles.colors'] != "don't change" else None
|
||||
self.enforce_encoding = cast_bool(Prefs['subtitles.enforce_encoding'])
|
||||
self.chmod = self.check_chmod()
|
||||
self.exotic_ext = cast_bool(Prefs["subtitles.scan.exotic_ext"])
|
||||
self.treat_und_as_first = cast_bool(Prefs["subtitles.language.treat_und_as_first"])
|
||||
self.subtitle_sub_dir = self.get_subtitle_sub_dir()
|
||||
self.ext_match_strictness = self.determine_ext_sub_strictness()
|
||||
self.default_mods = self.get_default_mods()
|
||||
self.debug_mods = cast_bool(Prefs['log_debug_mods'])
|
||||
self.subtitles_save_to = Prefs['subtitles.save.filesystem']
|
||||
self.no_refresh = os.environ.get("SZ_NO_REFRESH", False)
|
||||
self.plex_transcoder = self.get_plex_transcoder()
|
||||
self.only_one = cast_bool(Prefs['subtitles.only_one'])
|
||||
self.embedded_auto_extract = cast_bool(Prefs["subtitles.embedded.autoextract"])
|
||||
self.ietf_as_alpha3 = cast_bool(Prefs["subtitles.language.ietf_normalize"])
|
||||
self.initialized = True
|
||||
|
||||
def init_cache(self):
|
||||
use_fallback_cache = True
|
||||
if Core.runtime.os != "Windows":
|
||||
def init_libraries(self):
|
||||
try_executables = []
|
||||
custom_unrar = os.environ.get("SZ_UNRAR_TOOL")
|
||||
if custom_unrar:
|
||||
if os.path.isfile(custom_unrar):
|
||||
try_executables.append(custom_unrar)
|
||||
|
||||
unrar_exe = None
|
||||
if Core.runtime.os == "Windows":
|
||||
unrar_exe = os.path.abspath(os.path.join(self.libraries_root, "Windows", "i386", "UnRAR", "UnRAR.exe"))
|
||||
|
||||
elif Core.runtime.os == "MacOSX":
|
||||
unrar_exe = os.path.abspath(os.path.join(self.libraries_root, "MacOSX", "i386", "UnRAR", "unrar"))
|
||||
|
||||
elif Core.runtime.os == "Linux":
|
||||
unrar_exe = os.path.abspath(os.path.join(self.libraries_root, "Linux", Core.runtime.cpu, "UnRAR", "unrar"))
|
||||
|
||||
if unrar_exe and os.path.isfile(unrar_exe):
|
||||
try_executables.append(unrar_exe)
|
||||
|
||||
try_executables.append("unrar")
|
||||
|
||||
for exe in try_executables:
|
||||
rarfile.UNRAR_TOOL = exe
|
||||
rarfile.ORIG_UNRAR_TOOL = exe
|
||||
try:
|
||||
subliminal.region.configure('dogpile.cache.dbm', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'filename': os.path.join(config.data_items_path, 'subzero.dbm'),
|
||||
'lock_factory': MutexLock})
|
||||
use_fallback_cache = False
|
||||
rarfile.custom_check([rarfile.UNRAR_TOOL], True)
|
||||
except:
|
||||
Log.Debug("custom check failed for: %s", exe)
|
||||
continue
|
||||
|
||||
rarfile.OPEN_ARGS = rarfile.ORIG_OPEN_ARGS
|
||||
rarfile.EXTRACT_ARGS = rarfile.ORIG_EXTRACT_ARGS
|
||||
rarfile.TEST_ARGS = rarfile.ORIG_TEST_ARGS
|
||||
Log.Info("Using UnRAR from: %s", exe)
|
||||
self.unrar = exe
|
||||
return
|
||||
|
||||
Log.Warn("UnRAR not found")
|
||||
|
||||
def init_cache(self):
|
||||
if self.new_style_cache:
|
||||
subliminal.region.configure('subzero.cache.file', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'appname': "sz_cache",
|
||||
'app_cache_dir': self.data_path})
|
||||
Log.Info("Using new style file based cache!")
|
||||
return
|
||||
|
||||
names = ['dbhash', 'gdbm', 'dbm']
|
||||
dbfn = None
|
||||
self.dbm_supported = False
|
||||
|
||||
# try importing dbm modules
|
||||
if Core.runtime.os != "Windows":
|
||||
impawrt = None
|
||||
try:
|
||||
impawrt = getattr(sys.modules['__main__'], "__builtins__").get("__import__")
|
||||
except:
|
||||
pass
|
||||
|
||||
if use_fallback_cache:
|
||||
Log.Warn("Not using file based cache!")
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
if impawrt:
|
||||
for name in names:
|
||||
try:
|
||||
impawrt(name)
|
||||
except:
|
||||
continue
|
||||
if not self.dbm_supported:
|
||||
self.dbm_supported = name
|
||||
break
|
||||
|
||||
if self.dbm_supported:
|
||||
# anydbm checks; try guessing the format and importing the correct module
|
||||
dbfn = os.path.join(config.data_items_path, 'subzero.dbm')
|
||||
db_which = whichdb(dbfn)
|
||||
if db_which is not None and db_which != "":
|
||||
try:
|
||||
impawrt(db_which)
|
||||
except ImportError:
|
||||
self.dbm_supported = False
|
||||
|
||||
if self.dbm_supported:
|
||||
try:
|
||||
subliminal.region.configure('dogpile.cache.dbm', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'filename': dbfn,
|
||||
'lock_factory': MutexLock})
|
||||
Log.Info("Using file based cache!")
|
||||
return
|
||||
except:
|
||||
self.dbm_supported = False
|
||||
|
||||
Log.Warn("Not using file based cache!")
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
|
||||
def sync_cache(self):
|
||||
if not self.new_style_cache:
|
||||
return
|
||||
Log.Debug("Syncing cache")
|
||||
subliminal.region.backend.sync()
|
||||
|
||||
def get_pack_cache_dir(self):
|
||||
pack_cache_dir = os.path.join(config.data_path, "pack_cache")
|
||||
if not os.path.isdir(pack_cache_dir):
|
||||
os.makedirs(pack_cache_dir)
|
||||
|
||||
return pack_cache_dir
|
||||
|
||||
def get_advanced_config(self):
|
||||
paths = []
|
||||
if Prefs['path_to_advanced_settings']:
|
||||
paths = [
|
||||
Prefs['path_to_advanced_settings'],
|
||||
os.path.join(Prefs['path_to_advanced_settings'], "advanced_settings.json")
|
||||
]
|
||||
|
||||
paths.append(os.path.join(config.data_path, "advanced_settings.json"))
|
||||
|
||||
for path in paths:
|
||||
if os.path.isfile(path):
|
||||
data = FileIO.read(path, "r")
|
||||
|
||||
d = Dicked(**jstyleson.loads(data))
|
||||
self.adv_cfg_path = path
|
||||
Log.Info(u"Using advanced settings from: %s", path)
|
||||
return d
|
||||
|
||||
return Dicked()
|
||||
|
||||
def set_log_paths(self):
|
||||
# find log handler
|
||||
for handler in Core.log.handlers:
|
||||
if getattr(getattr(handler, "__class__"), "__name__") in (
|
||||
'FileHandler', 'RotatingFileHandler', 'TimedRotatingFileHandler'):
|
||||
cls_name = getattr(getattr(handler, "__class__"), "__name__")
|
||||
if cls_name in ('FileHandler', 'RotatingFileHandler', 'TimedRotatingFileHandler'):
|
||||
plugin_log_file = handler.baseFilename
|
||||
if cls_name in ("RotatingFileHandler", "TimedRotatingFileHandler"):
|
||||
handler.backupCount = int_or_default(Prefs['log_rotate_keep'], 5)
|
||||
|
||||
if os.path.isfile(os.path.realpath(plugin_log_file)):
|
||||
self.plugin_log_path = plugin_log_file
|
||||
|
||||
@@ -174,18 +364,28 @@ class Config(object):
|
||||
except:
|
||||
Log.Warn("Couldn't determine Plex Token")
|
||||
else:
|
||||
Log("Did NOT find Preferences file - most likely Windows OS. Otherwise please check logfile and hierarchy.")
|
||||
Log.Warn("Did NOT find Preferences file - most likely Windows OS. Otherwise please check logfile and hierarchy.")
|
||||
|
||||
# fixme: windows
|
||||
|
||||
def set_plugin_mode(self):
|
||||
if Prefs["plugin_mode"] == "only agent":
|
||||
self.enable_agent = True
|
||||
self.enable_channel = True
|
||||
|
||||
# any provider enabled?
|
||||
if not self.providers:
|
||||
self.enable_agent = False
|
||||
self.enable_channel = False
|
||||
elif Prefs["plugin_mode"] == "only channel":
|
||||
Log.Warn("No providers enabled, disabling agent and interface!")
|
||||
return
|
||||
|
||||
if Prefs["plugin_mode2"] == "only agent":
|
||||
self.enable_channel = False
|
||||
elif Prefs["plugin_mode2"] == "only interface":
|
||||
self.enable_agent = False
|
||||
|
||||
def set_plugin_lock(self):
|
||||
if Prefs["plugin_pin_mode"] in ("channel menu", "advanced menu"):
|
||||
if Prefs["plugin_pin_mode2"] in ("interface", "advanced menu"):
|
||||
# check pin
|
||||
pin = Prefs["plugin_pin"]
|
||||
if not pin or not len(pin):
|
||||
@@ -198,8 +398,8 @@ class Config(object):
|
||||
except ValueError:
|
||||
Log.Warn("PIN has to be an integer (0-9)")
|
||||
self.pin = pin
|
||||
self.lock_advanced_menu = Prefs["plugin_pin_mode"] == "advanced menu"
|
||||
self.lock_menu = Prefs["plugin_pin_mode"] == "channel menu"
|
||||
self.lock_advanced_menu = Prefs["plugin_pin_mode2"] == "advanced menu"
|
||||
self.lock_menu = Prefs["plugin_pin_mode2"] == "interface"
|
||||
|
||||
try:
|
||||
self.pin_valid_minutes = int(Prefs["plugin_pin_valid_for"].strip())
|
||||
@@ -217,7 +417,7 @@ class Config(object):
|
||||
self.permissions_ok = self.check_permissions()
|
||||
|
||||
def check_permissions(self):
|
||||
if not Prefs["subtitles.save.filesystem"] or not Prefs["check_permissions"]:
|
||||
if not cast_bool(Prefs["subtitles.save.filesystem"]) or not cast_bool(Prefs["check_permissions"]):
|
||||
return True
|
||||
|
||||
self.missing_permissions = []
|
||||
@@ -233,6 +433,9 @@ class Config(object):
|
||||
if isinstance(path_str, unicode):
|
||||
path_str = path_str.encode(self.fs_encoding)
|
||||
|
||||
if not os.path.exists(path_str):
|
||||
continue
|
||||
|
||||
if use_ignore_fs:
|
||||
# check whether we've got an ignore file inside the section path
|
||||
if self.is_physically_ignored(path_str):
|
||||
@@ -302,19 +505,37 @@ class Config(object):
|
||||
if not fn:
|
||||
return
|
||||
|
||||
splitted_fn = fn.split()
|
||||
exe_fn = splitted_fn[0]
|
||||
arguments = [arg.strip() for arg in splitted_fn[1:]]
|
||||
got_args = "%(" in fn
|
||||
if got_args:
|
||||
first_arg_pos = fn.index("%(")
|
||||
exe_fn = fn[:first_arg_pos].strip()
|
||||
arguments = [arg.strip() for arg in fn[first_arg_pos:].split()]
|
||||
else:
|
||||
exe_fn = fn
|
||||
arguments = []
|
||||
|
||||
if os.path.isfile(exe_fn) and os.access(exe_fn, os.X_OK):
|
||||
return exe_fn, arguments
|
||||
|
||||
# try finding the executable itself, the path might contain spaces and there might have been other arguments
|
||||
fn_split = exe_fn.split(u" ")
|
||||
tmp_exe_fn = fn_split[0]
|
||||
|
||||
for offset in range(1, len(fn_split)+1):
|
||||
if os.path.isfile(tmp_exe_fn) and os.access(tmp_exe_fn, os.X_OK):
|
||||
exe_fn = tmp_exe_fn.strip()
|
||||
arguments = [arg.strip() for arg in fn_split[offset:]] + arguments
|
||||
return exe_fn, arguments
|
||||
|
||||
tmp_exe_fn = u" ".join(fn_split[:offset+1])
|
||||
|
||||
Log.Error("Notify executable not existing or not executable: %s" % exe_fn)
|
||||
|
||||
def refresh_enabled_sections(self):
|
||||
self.enabled_sections = self.check_enabled_sections()
|
||||
|
||||
def check_enabled_sections(self):
|
||||
enabled_for_primary_agents = []
|
||||
enabled_for_primary_agents = {"movie": [], "show": []}
|
||||
enabled_sections = {}
|
||||
|
||||
# find which agents we're enabled for
|
||||
@@ -327,29 +548,55 @@ class Config(object):
|
||||
related_agents = Plex.primary_agent(agent.identifier, t.media_type)
|
||||
for a in related_agents:
|
||||
if a.identifier == PLUGIN_IDENTIFIER and a.enabled:
|
||||
enabled_for_primary_agents.append(agent.identifier)
|
||||
enabled_for_primary_agents[MEDIA_TYPE_TO_STRING[t.media_type]].append(agent.identifier)
|
||||
|
||||
# find the libraries that use them
|
||||
for library in self.sections:
|
||||
if library.agent in enabled_for_primary_agents:
|
||||
if library.agent in enabled_for_primary_agents.get(library.type, []):
|
||||
enabled_sections[library.key] = library
|
||||
|
||||
Log.Debug(u"I'm enabled for: %s" % [lib.title for key, lib in enabled_sections.iteritems()])
|
||||
return enabled_sections
|
||||
|
||||
# Prepare a list of languages we want subs for
|
||||
def get_lang_list(self):
|
||||
l = {Language.fromietf(Prefs["langPref1"])}
|
||||
def get_lang_list(self, provider=None):
|
||||
# advanced settings
|
||||
if provider and self.advanced.providers and provider in self.advanced.providers:
|
||||
adv_languages = self.advanced.providers[provider].get("languages", None)
|
||||
if adv_languages:
|
||||
adv_out = set()
|
||||
for adv_lang in adv_languages:
|
||||
adv_lang = adv_lang.strip()
|
||||
try:
|
||||
real_lang = Language.fromietf(adv_lang)
|
||||
except:
|
||||
try:
|
||||
real_lang = Language.fromname(adv_lang)
|
||||
except:
|
||||
continue
|
||||
adv_out.update({real_lang})
|
||||
|
||||
# fallback to default languages if no valid language was found in advanced settings
|
||||
if adv_out:
|
||||
return adv_out
|
||||
|
||||
l = {Language.fromietf(Prefs["langPref1a"])}
|
||||
lang_custom = Prefs["langPrefCustom"].strip()
|
||||
|
||||
if Prefs['subtitles.only_one']:
|
||||
return l
|
||||
|
||||
if Prefs["langPref2"] != "None":
|
||||
l.update({Language.fromietf(Prefs["langPref2"])})
|
||||
if Prefs["langPref2a"] != "None":
|
||||
try:
|
||||
l.update({Language.fromietf(Prefs["langPref2a"])})
|
||||
except:
|
||||
pass
|
||||
|
||||
if Prefs["langPref3"] != "None":
|
||||
l.update({Language.fromietf(Prefs["langPref3"])})
|
||||
if Prefs["langPref3a"] != "None":
|
||||
try:
|
||||
l.update({Language.fromietf(Prefs["langPref3a"])})
|
||||
except:
|
||||
pass
|
||||
|
||||
if len(lang_custom) and lang_custom != "None":
|
||||
for lang in lang_custom.split(u","):
|
||||
@@ -365,6 +612,8 @@ class Config(object):
|
||||
|
||||
return l
|
||||
|
||||
lang_list = property(get_lang_list)
|
||||
|
||||
def get_subtitle_destination_folder(self):
|
||||
if not Prefs["subtitles.save.filesystem"]:
|
||||
return
|
||||
@@ -374,18 +623,39 @@ class Config(object):
|
||||
return fld_custom or (
|
||||
Prefs["subtitles.save.subFolder"] if Prefs["subtitles.save.subFolder"] != "current folder" else None)
|
||||
|
||||
def get_providers(self):
|
||||
def get_subtitle_formats(self):
|
||||
formats = Prefs["subtitles.save.formats"]
|
||||
out = []
|
||||
if "SRT" in formats:
|
||||
out.append("srt")
|
||||
if "VTT" in formats:
|
||||
out.append("vtt")
|
||||
return out
|
||||
|
||||
def get_providers(self, media_type="series"):
|
||||
providers = {'opensubtitles': cast_bool(Prefs['provider.opensubtitles.enabled']),
|
||||
# 'thesubdb': Prefs['provider.thesubdb.enabled'],
|
||||
'podnapisi': cast_bool(Prefs['provider.podnapisi.enabled']),
|
||||
'titlovi': cast_bool(Prefs['provider.titlovi.enabled']),
|
||||
'addic7ed': cast_bool(Prefs['provider.addic7ed.enabled']),
|
||||
'tvsubtitles': cast_bool(Prefs['provider.tvsubtitles.enabled']),
|
||||
'legendastv': cast_bool(Prefs['provider.legendastv.enabled']),
|
||||
'napiprojekt': cast_bool(Prefs['provider.napiprojekt.enabled']),
|
||||
'shooter': cast_bool(Prefs['provider.shooter.enabled']),
|
||||
'subscenter': cast_bool(Prefs['provider.subscenter.enabled']),
|
||||
'hosszupuska': cast_bool(Prefs['provider.hosszupuska.enabled']),
|
||||
'supersubtitles': cast_bool(Prefs['provider.supersubtitles.enabled']),
|
||||
'shooter': False,
|
||||
'subscene': cast_bool(Prefs['provider.subscene.enabled']),
|
||||
'argenteam': cast_bool(Prefs['provider.argenteam.enabled']),
|
||||
'subscenter': False,
|
||||
'assrt': cast_bool(Prefs['provider.assrt.enabled']),
|
||||
}
|
||||
|
||||
providers_by_prefs = copy.deepcopy(providers)
|
||||
|
||||
# disable subscene for movies by default
|
||||
if media_type == "movies":
|
||||
providers["subscene"] = False
|
||||
|
||||
# ditch non-forced-subtitles-reporting providers
|
||||
if self.forced_only:
|
||||
providers["addic7ed"] = False
|
||||
@@ -393,33 +663,109 @@ class Config(object):
|
||||
providers["legendastv"] = False
|
||||
providers["napiprojekt"] = False
|
||||
providers["shooter"] = False
|
||||
providers["subscenter"] = False
|
||||
providers["hosszupuska"] = False
|
||||
providers["supersubtitles"] = False
|
||||
providers["titlovi"] = False
|
||||
providers["argenteam"] = False
|
||||
providers["assrt"] = False
|
||||
|
||||
if not self.unrar and providers["legendastv"]:
|
||||
providers["legendastv"] = False
|
||||
Log.Info("Disabling LegendasTV, because UnRAR wasn't found")
|
||||
|
||||
# advanced settings
|
||||
if media_type and self.advanced.providers:
|
||||
for provider, data in self.advanced.providers.iteritems():
|
||||
if provider not in providers or not providers_by_prefs[provider]:
|
||||
continue
|
||||
|
||||
if data["enabled_for"] is not None:
|
||||
providers[provider] = media_type in data["enabled_for"]
|
||||
|
||||
if "provider_throttle" not in Dict:
|
||||
Dict["provider_throttle"] = {}
|
||||
|
||||
changed = False
|
||||
for provider, enabled in dict(providers).iteritems():
|
||||
reason, until, throttle_desc = Dict["provider_throttle"].get(provider, (None, None, None))
|
||||
if reason:
|
||||
now = datetime.datetime.now()
|
||||
if now < until:
|
||||
Log.Info("Not using %s until %s, because of: %s", provider,
|
||||
until.strftime("%y/%m/%d %H:%M"), reason)
|
||||
providers[provider] = False
|
||||
else:
|
||||
Log.Info("Using %s again after %s, (disabled because: %s)", provider, throttle_desc, reason)
|
||||
del Dict["provider_throttle"][provider]
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
Dict.Save()
|
||||
|
||||
return filter(lambda prov: providers[prov], providers)
|
||||
|
||||
providers = property(get_providers)
|
||||
|
||||
def get_provider_settings(self):
|
||||
os_use_https = self.advanced.providers.opensubtitles.use_https \
|
||||
if self.advanced.providers.opensubtitles.use_https != None else True
|
||||
provider_settings = {'addic7ed': {'username': Prefs['provider.addic7ed.username'],
|
||||
'password': Prefs['provider.addic7ed.password'],
|
||||
'use_random_agents': cast_bool(Prefs['provider.addic7ed.use_random_agents']),
|
||||
'use_random_agents': cast_bool(Prefs['provider.addic7ed.use_random_agents1']),
|
||||
},
|
||||
'opensubtitles': {'username': Prefs['provider.opensubtitles.username'],
|
||||
'password': Prefs['provider.opensubtitles.password'],
|
||||
'use_tag_search': cast_bool(Prefs['provider.opensubtitles.use_tags']),
|
||||
'only_foreign': cast_bool(Prefs['subtitles.only_foreign'])
|
||||
'use_tag_search': self.exact_filenames,
|
||||
'only_foreign': self.forced_only,
|
||||
'is_vip': cast_bool(Prefs['provider.opensubtitles.is_vip']),
|
||||
'use_ssl': os_use_https,
|
||||
'timeout': self.advanced.providers.opensubtitles.timeout or 15
|
||||
},
|
||||
'podnapisi': {
|
||||
'only_foreign': cast_bool(Prefs['subtitles.only_foreign'])
|
||||
'only_foreign': self.forced_only,
|
||||
},
|
||||
'legendastv': {'username': Prefs['provider.legendastv.username'],
|
||||
'password': Prefs['provider.legendastv.password'],
|
||||
},
|
||||
'subscenter': {'username': Prefs['provider.subscenter.username'],
|
||||
'password': Prefs['provider.subscenter.password'],
|
||||
},
|
||||
'assrt': {'token': Prefs['provider.assrt.token'], }
|
||||
}
|
||||
|
||||
return provider_settings
|
||||
|
||||
provider_settings = property(get_provider_settings)
|
||||
|
||||
def provider_throttle(self, name, exception):
|
||||
"""
|
||||
throttle a provider :name: for X hours based on the :exception: type
|
||||
:param name:
|
||||
:param exception:
|
||||
:return:
|
||||
"""
|
||||
cls = getattr(exception, "__class__")
|
||||
cls_name = getattr(cls, "__name__")
|
||||
if cls not in VALID_THROTTLE_EXCEPTIONS:
|
||||
for valid_cls in VALID_THROTTLE_EXCEPTIONS:
|
||||
if isinstance(cls, valid_cls):
|
||||
cls = valid_cls
|
||||
|
||||
throttle_data = PROVIDER_THROTTLE_MAP.get(name, PROVIDER_THROTTLE_MAP["default"]).get(cls, None) or \
|
||||
PROVIDER_THROTTLE_MAP["default"].get(cls, None)
|
||||
|
||||
if not throttle_data:
|
||||
return
|
||||
|
||||
throttle_delta, throttle_description = throttle_data
|
||||
|
||||
if "provider_throttle" not in Dict:
|
||||
Dict["provider_throttle"] = {}
|
||||
|
||||
throttle_until = datetime.datetime.now() + throttle_delta
|
||||
Dict["provider_throttle"][name] = (cls_name, throttle_until, throttle_description)
|
||||
|
||||
Log.Info("Throttling %s for %s, until %s, because of: %s", name, throttle_description,
|
||||
throttle_until.strftime("%y/%m/%d %H:%M"), cls_name)
|
||||
Dict.Save()
|
||||
|
||||
@property
|
||||
def provider_pool(self):
|
||||
if cast_bool(Prefs['providers.multithreading']):
|
||||
@@ -443,6 +789,22 @@ class Config(object):
|
||||
if wrong_chmod:
|
||||
Log.Warn("Chmod setting ignored, please use only 4-digit integers with leading 0 (e.g.: 775)")
|
||||
|
||||
def get_subtitle_sub_dir(self):
|
||||
"""
|
||||
|
||||
:return: folder, is_absolute
|
||||
"""
|
||||
if not cast_bool(Prefs['subtitles.save.filesystem']):
|
||||
return None, None
|
||||
|
||||
if Prefs["subtitles.save.subFolder.Custom"]:
|
||||
return Prefs["subtitles.save.subFolder.Custom"], os.path.isabs(Prefs["subtitles.save.subFolder.Custom"])
|
||||
|
||||
if Prefs["subtitles.save.subFolder"] == "current folder":
|
||||
return ".", False
|
||||
|
||||
return Prefs["subtitles.save.subFolder"], False
|
||||
|
||||
def determine_ext_sub_strictness(self):
|
||||
val = Prefs["subtitles.scan.filename_strictness"]
|
||||
if val == "any":
|
||||
@@ -455,15 +817,25 @@ class Config(object):
|
||||
mods = []
|
||||
if self.remove_hi:
|
||||
mods.append("remove_HI")
|
||||
if self.remove_tags:
|
||||
mods.append("remove_tags")
|
||||
if self.fix_ocr:
|
||||
mods.append("OCR_fixes")
|
||||
if self.fix_common:
|
||||
mods.append("common")
|
||||
if self.colors:
|
||||
mods.append("color(name=%s)" % self.colors)
|
||||
if self.reverse_rtl:
|
||||
mods.append("reverse_rtl")
|
||||
|
||||
return mods
|
||||
|
||||
def setup_proxies(self):
|
||||
proxy = Prefs["proxy"]
|
||||
if proxy:
|
||||
os.environ["SZ_HTTP_PROXY"] = proxy.strip()
|
||||
Log.Debug("Using HTTP Proxy: %s", proxy)
|
||||
|
||||
def set_activity_modes(self):
|
||||
val = Prefs["activity.on_playback"]
|
||||
if val == "never":
|
||||
@@ -475,9 +847,85 @@ class Config(object):
|
||||
self.activity_mode = "refresh"
|
||||
elif val == "hybrid: current item or next episode":
|
||||
self.activity_mode = "hybrid"
|
||||
elif val == "hybrid-plus: current item and next episode":
|
||||
self.activity_mode = "hybrid-plus"
|
||||
else:
|
||||
self.activity_mode = "next_episode"
|
||||
|
||||
def get_plex_transcoder(self):
|
||||
base_path = os.environ.get("PLEX_MEDIA_SERVER_HOME", None)
|
||||
if not base_path:
|
||||
# fall back to bundled plugins path
|
||||
bundle_path = os.environ.get("PLEXBUNDLEDPLUGINSPATH", None)
|
||||
if bundle_path:
|
||||
base_path = os.path.normpath(os.path.join(bundle_path, "..", ".."))
|
||||
|
||||
if sys.platform == "darwin":
|
||||
fn = os.path.join(base_path, "MacOS", "Plex Transcoder")
|
||||
elif mswindows:
|
||||
fn = os.path.join(base_path, "plextranscoder.exe")
|
||||
else:
|
||||
fn = os.path.join(base_path, "Plex Transcoder")
|
||||
|
||||
if os.path.isfile(fn):
|
||||
return fn
|
||||
|
||||
# look inside Resources folder as fallback, as well
|
||||
fn = os.path.join(base_path, "Resources", "Plex Transcoder")
|
||||
if os.path.isfile(fn):
|
||||
return fn
|
||||
|
||||
def parse_rename_mode(self):
|
||||
# fixme: exact_filenames should be determined via callback combined with info about the current video
|
||||
# (original_name)
|
||||
|
||||
mode = str(Prefs["media_rename1"])
|
||||
self.refiner_settings = {}
|
||||
|
||||
if cast_bool(Prefs['use_file_info_file']):
|
||||
self.refiner_settings["file_info_file"] = True
|
||||
self.exact_filenames = True
|
||||
|
||||
if mode == "none of the above":
|
||||
return
|
||||
|
||||
elif mode == "Symlink to original file":
|
||||
self.refiner_settings["symlinks"] = True
|
||||
self.exact_filenames = True
|
||||
return
|
||||
|
||||
elif mode == "I keep the original filenames":
|
||||
self.exact_filenames = True
|
||||
return
|
||||
|
||||
if mode in ("Filebot", "Sonarr/Radarr/Filebot"):
|
||||
self.refiner_settings["filebot"] = True
|
||||
|
||||
if mode in ("Sonarr/Radarr (fill api info below)", "Sonarr/Radarr/Filebot"):
|
||||
if Prefs["drone_api.sonarr.url"] and Prefs["drone_api.sonarr.api_key"]:
|
||||
self.refiner_settings["sonarr"] = {
|
||||
"base_url": Prefs["drone_api.sonarr.url"],
|
||||
"api_key": Prefs["drone_api.sonarr.api_key"],
|
||||
}
|
||||
if self.advanced.refiners.sonarr:
|
||||
self.refiner_settings["sonarr"].update(self.advanced.refiners.sonarr)
|
||||
|
||||
self.exact_filenames = True
|
||||
|
||||
if Prefs["drone_api.radarr.url"] and Prefs["drone_api.radarr.api_key"]:
|
||||
self.refiner_settings["radarr"] = {
|
||||
"base_url": Prefs["drone_api.radarr.url"],
|
||||
"api_key": Prefs["drone_api.radarr.api_key"]
|
||||
}
|
||||
if self.advanced.refiners.radarr:
|
||||
self.refiner_settings["radarr"].update(self.advanced.refiners.radarr)
|
||||
|
||||
self.exact_filenames = True
|
||||
|
||||
@property
|
||||
def text_based_formats(self):
|
||||
return self.advanced.text_subtitle_formats or TEXT_SUBTITLE_EXTS
|
||||
|
||||
def init_subliminal_patches(self):
|
||||
# configure custom subtitle destination folders for scanning pre-existing subs
|
||||
Log.Debug("Patching subliminal ...")
|
||||
@@ -486,7 +934,7 @@ class Config(object):
|
||||
subliminal_patch.core.INCLUDE_EXOTIC_SUBS = self.exotic_ext
|
||||
|
||||
subliminal_patch.core.DOWNLOAD_TRIES = int(Prefs['subtitles.try_downloads'])
|
||||
subliminal.score.episode_scores["addic7ed_boost"] = int(Prefs['provider.addic7ed.boost_by1'])
|
||||
subliminal.score.episode_scores["addic7ed_boost"] = int(Prefs['provider.addic7ed.boost_by2'])
|
||||
|
||||
|
||||
config = Config()
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
|
||||
|
||||
def dispatch_migrate():
|
||||
@@ -6,6 +7,8 @@ def dispatch_migrate():
|
||||
migrate()
|
||||
except:
|
||||
Log.Error("Migration failed: %s" % traceback.format_exc())
|
||||
del Dict["subs"]
|
||||
Dict.Save()
|
||||
|
||||
|
||||
def migrate():
|
||||
@@ -25,6 +28,7 @@ def migrate():
|
||||
time=item.time)
|
||||
|
||||
del Dict["history"]
|
||||
history.destroy()
|
||||
Dict.Save()
|
||||
|
||||
# migrate subtitle storage from Dict to Data
|
||||
@@ -80,5 +84,6 @@ def migrate():
|
||||
if stored_any:
|
||||
subtitle_storage.save(stored_subs)
|
||||
|
||||
subtitle_storage.destroy()
|
||||
del Dict["subs"]
|
||||
Dict.Save()
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
|
||||
from subzero.language import Language
|
||||
|
||||
import subliminal_patch as subliminal
|
||||
|
||||
from support.config import config
|
||||
from support.helpers import cast_bool
|
||||
from subtitlehelpers import get_subtitles_from_metadata
|
||||
from subliminal_patch import compute_score
|
||||
from support.plex_media import get_blacklist_from_part_map
|
||||
from subzero.video import refine_video
|
||||
from support.storage import get_pack_data, store_pack_data
|
||||
|
||||
|
||||
def get_missing_languages(video, part):
|
||||
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
|
||||
|
||||
# should we treat IETF as alpha3? (ditch the country part)
|
||||
alpha3_map = {}
|
||||
if config.ietf_as_alpha3:
|
||||
for language in languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
if not Prefs['subtitles.save.filesystem']:
|
||||
# scan for existing metadata subtitles
|
||||
meta_subs = get_subtitles_from_metadata(part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
if subList:
|
||||
video.subtitle_languages.add(language)
|
||||
Log.Debug("Found metadata subtitle %s for %s", language, video)
|
||||
|
||||
have_languages = video.subtitle_languages.copy()
|
||||
if config.ietf_as_alpha3:
|
||||
for language in have_languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
missing_languages = (set(str(l) for l in languages) - set(str(l) for l in have_languages))
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
|
||||
if not missing_languages or found_one_which_is_enough:
|
||||
if found_one_which_is_enough:
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
return False
|
||||
|
||||
# re-add country codes to the missing languages, in case we've removed them above
|
||||
if config.ietf_as_alpha3:
|
||||
for language in languages:
|
||||
language.country = alpha3_map.get(language.alpha3, None)
|
||||
|
||||
return missing_languages
|
||||
|
||||
|
||||
def pre_download_hook(subtitle):
|
||||
if subtitle.is_pack:
|
||||
# try retrieving the subtitle from a cached pack archive
|
||||
pack_data = get_pack_data(subtitle)
|
||||
if pack_data:
|
||||
subtitle.pack_data = pack_data
|
||||
|
||||
|
||||
def post_download_hook(subtitle):
|
||||
# if a new pack was downloaded, store it in the cache; providers' download method is responsible for
|
||||
# setting subtitle.pack_data to None in case the cached pack data we provided was successfully used
|
||||
if subtitle.is_pack and subtitle.pack_data:
|
||||
# store pack data in cache
|
||||
store_pack_data(subtitle, subtitle.pack_data)
|
||||
|
||||
# may be redundant
|
||||
subtitle.pack_data = None
|
||||
|
||||
|
||||
def language_hook(provider):
|
||||
return config.get_lang_list(provider=provider)
|
||||
|
||||
|
||||
def download_best_subtitles(video_part_map, min_score=0, throttle_time=None, providers=None):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
|
||||
if not languages:
|
||||
return
|
||||
|
||||
use_videos = []
|
||||
for video, part in video_part_map.iteritems():
|
||||
if not video.ignore_all:
|
||||
missing_languages = get_missing_languages(video, part)
|
||||
else:
|
||||
missing_languages = languages
|
||||
|
||||
if missing_languages:
|
||||
Log.Info(u"%s has missing languages: %s", os.path.basename(video.name), missing_languages)
|
||||
refine_video(video, refiner_settings=config.refiner_settings)
|
||||
use_videos.append(video)
|
||||
|
||||
# prepare blacklist
|
||||
blacklist = get_blacklist_from_part_map(video_part_map, languages)
|
||||
|
||||
if use_videos:
|
||||
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s, languages: %s" %
|
||||
(min_score, hearing_impaired, languages))
|
||||
|
||||
return subliminal.download_best_subtitles(set(use_videos), languages, min_score, hearing_impaired,
|
||||
providers=providers or config.providers,
|
||||
provider_configs=config.provider_settings,
|
||||
pool_class=config.provider_pool,
|
||||
compute_score=compute_score, throttle_time=throttle_time,
|
||||
blacklist=blacklist, throttle_callback=config.provider_throttle,
|
||||
pre_download_hook=pre_download_hook,
|
||||
post_download_hook=post_download_hook,
|
||||
language_hook=language_hook)
|
||||
Log.Debug("All languages for all requested videos exist. Doing nothing.")
|
||||
@@ -10,10 +10,12 @@ import re
|
||||
import platform
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
|
||||
import chardet
|
||||
|
||||
from bs4 import UnicodeDammit
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
from subzero.analytics import track_event
|
||||
|
||||
mswindows = (sys.platform == "win32")
|
||||
@@ -42,6 +44,13 @@ def cast_bool(value):
|
||||
return str(value).strip() in ("true", "True")
|
||||
|
||||
|
||||
def cast_int(value, default=None):
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
# A platform independent way to split paths which might come in with different separators.
|
||||
def split_path(str):
|
||||
if str.find('\\') != -1:
|
||||
@@ -149,10 +158,11 @@ def get_video_display_title(kind, title, section_title=None, parent_title=None,
|
||||
if add_section_title:
|
||||
section_add = ("%s: " % section_title) if section_title else ""
|
||||
|
||||
if kind == "show" and parent_title:
|
||||
if kind in ("season", "show") and parent_title:
|
||||
if season and episode:
|
||||
return '%s%s S%02dE%02d%s' % (section_add, parent_title, season or 0, episode or 0,
|
||||
(", %s" % title if title else ""))
|
||||
|
||||
return '%s%s%s' % (section_add, parent_title, (", %s" % title if title else ""))
|
||||
return "%s%s" % (section_add, title)
|
||||
|
||||
@@ -200,7 +210,7 @@ def decode_message(s):
|
||||
|
||||
|
||||
def timestamp():
|
||||
return int(time.time())
|
||||
return int(time.time()*1000)
|
||||
|
||||
|
||||
def df(d):
|
||||
@@ -245,13 +255,13 @@ def get_item_hints(data):
|
||||
:param data: video item dict of media_to_videos
|
||||
:return:
|
||||
"""
|
||||
hints = {"title": data["title"], "type": "movie"}
|
||||
hints = {"title": data["original_title"] or data["title"], "type": "movie"}
|
||||
if data["type"] == "episode":
|
||||
hints.update(
|
||||
{
|
||||
"type": "episode",
|
||||
"episode_title": data["title"],
|
||||
"title": data["series"],
|
||||
"title": data["original_title"] or data["series"],
|
||||
}
|
||||
)
|
||||
return hints
|
||||
@@ -265,7 +275,7 @@ def notify_executable(exe_info, videos, subtitles, storage):
|
||||
exe, arguments = exe_info
|
||||
for video, video_subtitles in subtitles.items():
|
||||
for subtitle in video_subtitles:
|
||||
lang = Locale.Language.Match(subtitle.language.alpha2)
|
||||
lang = str(subtitle.language)
|
||||
data = video.plexapi_metadata.copy()
|
||||
data.update({
|
||||
"subtitle_language": lang,
|
||||
@@ -282,7 +292,6 @@ def notify_executable(exe_info, videos, subtitles, storage):
|
||||
prepared_arguments = [arg % prepared_data for arg in arguments]
|
||||
|
||||
Log.Debug(u"Calling %s with arguments: %s" % (exe, prepared_arguments))
|
||||
env = os.environ
|
||||
if not mswindows:
|
||||
env_path = {"PATH": os.pathsep.join(
|
||||
[
|
||||
@@ -293,23 +302,62 @@ def notify_executable(exe_info, videos, subtitles, storage):
|
||||
)
|
||||
}
|
||||
env = dict(os.environ, **env_path)
|
||||
env.pop("LD_LIBRARY_PATH", None)
|
||||
else:
|
||||
env = dict(os.environ)
|
||||
|
||||
# clean out any Plex-PYTHONPATH that may bleed through the spawned process
|
||||
if "PYTHONPATH" in env and "plex" in env["PYTHONPATH"].lower():
|
||||
del env["PYTHONPATH"]
|
||||
|
||||
try:
|
||||
output = subprocess.check_output(quote_args([exe] + prepared_arguments),
|
||||
stderr=subprocess.STDOUT, shell=True, env=env)
|
||||
except subprocess.CalledProcessError:
|
||||
Log.Error(u"Calling %s failed: %s" % (exe, traceback.format_exc()))
|
||||
proc = subprocess.Popen(quote_args([exe] + prepared_arguments), stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE, shell=True, env=env, cwd=os.path.dirname(exe))
|
||||
output, errors = proc.communicate()
|
||||
|
||||
if proc.returncode == 1:
|
||||
Log.Error(u"Calling %s with args %s failed: output:\n%s, error:\n%s", exe, prepared_arguments,
|
||||
output, errors)
|
||||
return
|
||||
|
||||
output = output.decode()
|
||||
|
||||
except:
|
||||
Log.Error(u"Calling %s failed: %s", exe, traceback.format_exc())
|
||||
else:
|
||||
Log.Debug(u"Process output: %s" % output)
|
||||
Log.Debug(u"Process output: %s", output)
|
||||
|
||||
|
||||
def track_usage(category=None, action=None, label=None, value=None):
|
||||
if not cast_bool(Prefs["track_usage"]):
|
||||
return
|
||||
|
||||
Thread.Create(dispatch_track_usage, category, action, label, value,
|
||||
identifier=Dict["anon_id"], first_use=Dict["first_use"],
|
||||
add=Network.PublicAddress)
|
||||
if "last_tracked" not in Dict:
|
||||
Dict["last_tracked"] = OrderedDict()
|
||||
Dict.Save()
|
||||
|
||||
event_key = (category, action, label, value)
|
||||
now = datetime.datetime.now()
|
||||
if event_key in Dict["last_tracked"] and (Dict["last_tracked"][event_key] + datetime.timedelta(minutes=30)) < now:
|
||||
return
|
||||
|
||||
Dict["last_tracked"][event_key] = now
|
||||
|
||||
# maintenance
|
||||
for key, value in Dict["last_tracked"].copy().iteritems():
|
||||
# kill day old values
|
||||
if value < now - datetime.timedelta(days=1):
|
||||
try:
|
||||
del Dict["last_tracked"][key]
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
Thread.Create(dispatch_track_usage, category, action, label, value,
|
||||
identifier=Dict["anon_id"], first_use=Dict["first_use"],
|
||||
add=Network.PublicAddress)
|
||||
except:
|
||||
Log.Debug("Something went wrong when reporting anonymous user statistics: %s", traceback.format_exc())
|
||||
|
||||
|
||||
def dispatch_track_usage(*args, **kwargs):
|
||||
@@ -322,9 +370,30 @@ def dispatch_track_usage(*args, **kwargs):
|
||||
Log.Debug("Something went wrong when reporting anonymous user statistics: %s", traceback.format_exc())
|
||||
|
||||
|
||||
def get_language_from_stream(lang_code):
|
||||
if lang_code:
|
||||
lang = Locale.Language.Match(lang_code)
|
||||
if lang and lang != "xx":
|
||||
# Log.Debug("Found language: %r", lang)
|
||||
return Language.fromietf(lang)
|
||||
|
||||
|
||||
def get_language(lang_short):
|
||||
return Language.fromietf(lang_short)
|
||||
|
||||
|
||||
def display_language(l):
|
||||
return _(str(l).lower())
|
||||
|
||||
|
||||
def is_stream_forced(stream):
|
||||
stream_title = getattr(stream, "title", "") or ""
|
||||
forced = getattr(stream, "forced", False)
|
||||
if not forced and stream_title and "forced" in stream_title.strip().lower():
|
||||
forced = True
|
||||
|
||||
return forced
|
||||
|
||||
|
||||
class PartUnknownException(Exception):
|
||||
pass
|
||||
@@ -1,4 +1,4 @@
|
||||
# coding=utf-8
|
||||
from subzero.history_storage import SubtitleHistory
|
||||
|
||||
get_history = lambda: SubtitleHistory(Data, int(Prefs["history_size"]))
|
||||
get_history = lambda: SubtitleHistory(Data, Thread, int(Prefs["history_size"]))
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
# coding=utf-8
|
||||
|
||||
import inspect
|
||||
|
||||
from support.config import config
|
||||
|
||||
|
||||
core = getattr(Data, "_core")
|
||||
|
||||
|
||||
# get original localization module in order to access its base classes later on
|
||||
def get_localization_module():
|
||||
cls = getattr(core.localization, "__class__")
|
||||
return inspect.getmodule(cls)
|
||||
|
||||
|
||||
plex_i18n_module = get_localization_module()
|
||||
|
||||
|
||||
def old_style_placeholders_count(s):
|
||||
# fixme: incomplete, use regex
|
||||
return sum(s.count(c) for c in ["%s", "%d", "%r", "%f", "%i"])
|
||||
|
||||
|
||||
def check_old_style_placeholders(k, args):
|
||||
# replace escaped %'s?
|
||||
k = k.__str__().replace("%%", "")
|
||||
|
||||
if "%(" in k:
|
||||
Log.Error(u"%r defines named placeholders for formatting" % k)
|
||||
return "NEEDS NAMED ARGUMENTS"
|
||||
|
||||
placeholders_found = old_style_placeholders_count(k)
|
||||
if placeholders_found and not args:
|
||||
Log.Error(u"%r requires a arguments for formatting" % k)
|
||||
return "NEEDS FORMAT ARGUMENTS"
|
||||
|
||||
elif not placeholders_found and args:
|
||||
Log.Error(u"%r doesn't define placeholders for formatting" % k)
|
||||
return "HAS NO FORMAT ARGUMENTS"
|
||||
|
||||
elif placeholders_found and placeholders_found != len(args):
|
||||
Log.Error(u"%r wrong amount of arguments supplied for formatting" % k)
|
||||
return "WRONG FORMAT ARGUMENT COUNT"
|
||||
|
||||
|
||||
class SmartLocalStringFormatter(plex_i18n_module.LocalStringFormatter):
|
||||
"""
|
||||
this allows the use of dictionaries for string formatting, also does some sanity checking on the keys and values
|
||||
"""
|
||||
def __init__(self, string1, string2, locale=None):
|
||||
if isinstance(string2, tuple):
|
||||
# dictionary passed
|
||||
if len(string2) == 1 and hasattr(string2[0], "iteritems"):
|
||||
string2 = string2[0]
|
||||
if config.debug_i18n:
|
||||
if "%(" not in string1.__str__().replace("%%", ""):
|
||||
Log.Error(u"%r: dictionary for non-named format string supplied" % string1.__str__())
|
||||
string1 = "%s"
|
||||
string2 = "NO NAMED ARGUMENTS"
|
||||
|
||||
# arguments
|
||||
elif len(string2) >= 1 and config.debug_i18n:
|
||||
msg = check_old_style_placeholders(string1, string2)
|
||||
if msg:
|
||||
string1 = "%s"
|
||||
string2 = msg
|
||||
|
||||
setattr(self, "_string1", string1)
|
||||
setattr(self, "_string2", string2)
|
||||
setattr(self, "_locale", locale)
|
||||
|
||||
|
||||
def local_string_with_optional_format(key, *args, **kwargs):
|
||||
if kwargs:
|
||||
args = (kwargs,)
|
||||
else:
|
||||
args = tuple(args)
|
||||
|
||||
if args:
|
||||
# fixme: may not be the best idea as this evaluates the string early
|
||||
try:
|
||||
return unicode(SmartLocalStringFormatter(plex_i18n_module.LocalString(core, key, Locale.CurrentLocale), args))
|
||||
except TypeError:
|
||||
Log.Exception("Broken translation!")
|
||||
return unicode(SmartLocalStringFormatter(plex_i18n_module.LocalString(core, key, "en"), args))
|
||||
|
||||
# check string instances for arguments
|
||||
if config.debug_i18n:
|
||||
msg = check_old_style_placeholders(key, args)
|
||||
if msg:
|
||||
return msg
|
||||
|
||||
try:
|
||||
return unicode(plex_i18n_module.LocalString(core, key, Locale.CurrentLocale))
|
||||
|
||||
except TypeError:
|
||||
Log.Exception("Broken translation!")
|
||||
return unicode(plex_i18n_module.LocalString(core, key, "en"))
|
||||
|
||||
|
||||
_ = local_string_with_optional_format
|
||||
|
||||
|
||||
def is_localized_string(s):
|
||||
return hasattr(s, "localize")
|
||||
@@ -11,7 +11,8 @@ class IgnoreDict(DictProxy):
|
||||
"section": "sections",
|
||||
"show": "series",
|
||||
"movie": "videos",
|
||||
"episode": "videos"
|
||||
"episode": "videos",
|
||||
"season": "seasons",
|
||||
}
|
||||
|
||||
# getItems types mapped to their verbose names
|
||||
@@ -19,9 +20,10 @@ class IgnoreDict(DictProxy):
|
||||
"sections": "Section",
|
||||
"series": "Series",
|
||||
"videos": "Item",
|
||||
"seasons": "Season",
|
||||
}
|
||||
|
||||
key_order = ("sections", "series", "videos")
|
||||
key_order = ("sections", "series", "videos", "seasons")
|
||||
|
||||
def __len__(self):
|
||||
try:
|
||||
@@ -35,7 +37,7 @@ class IgnoreDict(DictProxy):
|
||||
return self.translate_keys.get(name)
|
||||
|
||||
def verbose(self, name):
|
||||
return self.keys_verbose.get(name)
|
||||
return self.keys_verbose.get(self.translate_key(name) or name)
|
||||
|
||||
def get_title_key(self, kind, key):
|
||||
return "%s_%s" % (kind, key)
|
||||
@@ -57,6 +59,7 @@ class IgnoreDict(DictProxy):
|
||||
Dict.Save()
|
||||
|
||||
def setup_defaults(self):
|
||||
return {"sections": [], "series": [], "videos": [], "titles": {}}
|
||||
return {"sections": [], "series": [], "videos": [], "titles": {}, "seasons": []}
|
||||
|
||||
|
||||
ignore_list = IgnoreDict(Dict)
|
||||
|
||||
+126
-34
@@ -5,12 +5,18 @@ import re
|
||||
import traceback
|
||||
import types
|
||||
import os
|
||||
|
||||
import time
|
||||
|
||||
import datetime
|
||||
|
||||
from ignore import ignore_list
|
||||
from helpers import is_recent, get_plex_item_display_title, query_plex, PartUnknownException
|
||||
from lib import Plex, get_intent
|
||||
from config import config, IGNORE_FN
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
from subzero.modification import registry as mod_registry, SubtitleModifications
|
||||
from socket import timeout
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -20,12 +26,20 @@ container_size_re = re.compile(ur'totalSize="(\d+)"')
|
||||
|
||||
|
||||
def get_item(key):
|
||||
item_id = int(key)
|
||||
item_container = Plex["library"].metadata(item_id)
|
||||
try:
|
||||
item_id = int(key)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
try:
|
||||
item_container = Plex["library"].metadata(item_id)
|
||||
except timeout:
|
||||
Log.Debug("PMS API timed out when querying information about item %d", item_id)
|
||||
return
|
||||
|
||||
try:
|
||||
return list(item_container)[0]
|
||||
except IndexError:
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
@@ -50,6 +64,21 @@ def get_item_kind_from_item(item):
|
||||
return PLEX_API_TYPE_MAP.get(get_item_kind(item))
|
||||
|
||||
|
||||
def get_item_title(item):
|
||||
kind = get_item_kind_from_item(item)
|
||||
if kind not in ("episode", "movie", "season", "series"):
|
||||
return
|
||||
|
||||
if kind == "episode":
|
||||
return get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
|
||||
parent_title=item.show.title)
|
||||
elif kind == "season":
|
||||
return get_plex_item_display_title(item, "season", parent=item.show, section_title="Season",
|
||||
parent_title=item.show.title)
|
||||
else:
|
||||
return get_plex_item_display_title(item, kind, section_title=None)
|
||||
|
||||
|
||||
def get_item_thumb(item):
|
||||
kind = get_item_kind(item)
|
||||
if kind == "Episode":
|
||||
@@ -236,7 +265,7 @@ def is_ignored(rating_key, item=None):
|
||||
:return:
|
||||
"""
|
||||
# item in soft ignore list
|
||||
if rating_key in ignore_list["videos"]:
|
||||
if ignore_list["videos"] and rating_key in ignore_list["videos"]:
|
||||
Log.Debug("Item %s is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
@@ -244,12 +273,17 @@ def is_ignored(rating_key, item=None):
|
||||
kind = get_item_kind(item)
|
||||
|
||||
# show in soft ignore list
|
||||
if kind == "Episode" and item.show.rating_key in ignore_list["series"]:
|
||||
if kind == "Episode" and ignore_list["series"] and item.show.rating_key in ignore_list["series"]:
|
||||
Log.Debug("Item %s's show is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
# season in soft ignore list
|
||||
if kind == "Episode" and ignore_list["seasons"] and item.season.rating_key in ignore_list["seasons"]:
|
||||
Log.Debug("Item %s's season is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
# section in soft ignore list
|
||||
if item.section.key in ignore_list["sections"]:
|
||||
if ignore_list["sections"] and item.section.key in ignore_list["sections"]:
|
||||
Log.Debug("Item %s's section is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
@@ -299,26 +333,101 @@ def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, paren
|
||||
# season refresh, needs explicit per-episode refresh
|
||||
refresh = [item.rating_key for item in list(Plex["library/metadata"].children(int(rating_key)))]
|
||||
|
||||
multiple = len(refresh) > 1
|
||||
for key in refresh:
|
||||
Log.Info("%s item %s", "Refreshing" if not force else "Forced-refreshing", key)
|
||||
Plex["library/metadata"].refresh(key)
|
||||
if multiple:
|
||||
Thread.Sleep(10.0)
|
||||
|
||||
|
||||
def get_current_sub(rating_key, part_id, language):
|
||||
def get_current_sub(rating_key, part_id, language, plex_item=None):
|
||||
from support.storage import get_subtitle_storage
|
||||
|
||||
item = get_item(rating_key)
|
||||
item = plex_item or get_item(rating_key)
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
current_sub = stored_subs.get_any(part_id, language)
|
||||
return current_sub, stored_subs, subtitle_storage
|
||||
|
||||
|
||||
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.storage import save_subtitles
|
||||
def save_stored_sub(stored_subtitle, rating_key, part_id, language, item_type, plex_item=None, storage=None,
|
||||
stored_subs=None):
|
||||
"""
|
||||
in order for this to work, if the calling supplies stored_subs and storage, it has to trigger its saving and
|
||||
destruction explicitly
|
||||
:param stored_subtitle:
|
||||
:param rating_key:
|
||||
:param part_id:
|
||||
:param language:
|
||||
:param item_type:
|
||||
:param plex_item:
|
||||
:param storage:
|
||||
:param stored_subs:
|
||||
:return:
|
||||
"""
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles, get_subtitle_storage
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
plex_item = plex_item or get_item(rating_key)
|
||||
|
||||
stored_subs_was_provided = True
|
||||
if not stored_subs or not storage:
|
||||
storage = get_subtitle_storage()
|
||||
stored_subs = storage.load(plex_item.rating_key)
|
||||
stored_subs_was_provided = False
|
||||
|
||||
if not all([plex_item, stored_subs]):
|
||||
return
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
except PartUnknownException:
|
||||
return
|
||||
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
subtitle = ModifiedSubtitle(language, mods=stored_subtitle.mods)
|
||||
subtitle.content = stored_subtitle.content
|
||||
if stored_subtitle.encoding:
|
||||
# thanks plex
|
||||
setattr(subtitle, "_guessed_encoding", stored_subtitle.encoding)
|
||||
|
||||
if stored_subtitle.encoding != "utf-8":
|
||||
subtitle.normalize()
|
||||
stored_subtitle.content = subtitle.content
|
||||
stored_subtitle.encoding = "utf-8"
|
||||
storage.save(stored_subs)
|
||||
|
||||
subtitle.plex_media_fps = plex_part.fps
|
||||
subtitle.page_link = stored_subtitle.id
|
||||
subtitle.language = language
|
||||
subtitle.id = stored_subtitle.id
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
|
||||
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
|
||||
", ".join(stored_subtitle.mods) if stored_subtitle.mods else "none")
|
||||
except:
|
||||
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
|
||||
|
||||
if subtitle.storage_path:
|
||||
stored_subtitle.last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
|
||||
|
||||
if not stored_subs_was_provided:
|
||||
storage.save(stored_subs)
|
||||
storage.destroy()
|
||||
|
||||
|
||||
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
|
||||
plex_item = get_item(rating_key)
|
||||
|
||||
if not plex_item:
|
||||
return
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language, plex_item=plex_item)
|
||||
if mode == "add":
|
||||
for mod in mods:
|
||||
identifier, args = SubtitleModifications.parse_identifier(mod)
|
||||
@@ -346,26 +455,9 @@ def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"
|
||||
current_sub.mods.pop()
|
||||
else:
|
||||
raise NotImplementedError("Wrong mode given")
|
||||
|
||||
save_stored_sub(current_sub, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
|
||||
stored_subs=stored_subs)
|
||||
|
||||
storage.save(stored_subs)
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
except PartUnknownException:
|
||||
return
|
||||
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
subtitle = ModifiedSubtitle(language, mods=current_sub.mods)
|
||||
subtitle.content = current_sub.content
|
||||
subtitle.plex_media_fps = plex_part.fps
|
||||
subtitle.page_link = "modify subtitles with: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
subtitle.language = language
|
||||
subtitle.id = current_sub.id
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
|
||||
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
|
||||
", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
except:
|
||||
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
|
||||
storage.destroy()
|
||||
|
||||
@@ -9,29 +9,33 @@ import subtitlehelpers
|
||||
from config import config as sz_config
|
||||
|
||||
|
||||
SECONDARY_TAGS = ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom', 'hi', 'cc', 'sdh']
|
||||
|
||||
|
||||
def find_subtitles(part):
|
||||
lang_sub_map = {}
|
||||
part_filename = helpers.unicodize(part.file)
|
||||
part_basename = os.path.splitext(os.path.basename(part_filename))[0]
|
||||
use_filesystem = helpers.cast_bool(Prefs["subtitles.save.filesystem"])
|
||||
paths = [os.path.dirname(part_filename)] if use_filesystem else []
|
||||
sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
|
||||
global_subtitle_folder = None
|
||||
use_sub_subfolder = Prefs["subtitles.save.subFolder"] != "current folder" and not sub_dir_custom
|
||||
sub_subfolder = None
|
||||
paths = [os.path.dirname(part_filename)] if use_filesystem else []
|
||||
|
||||
global_folders = []
|
||||
|
||||
if use_filesystem:
|
||||
# Check for local subtitles subdirectory
|
||||
sub_dir_base = paths[0]
|
||||
|
||||
sub_dir_list = []
|
||||
|
||||
if Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
if use_sub_subfolder:
|
||||
# got selected subfolder
|
||||
sub_dir_list.append(os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"]))
|
||||
|
||||
sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
sub_subfolder = os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"])
|
||||
sub_dir_list.append(sub_subfolder)
|
||||
sub_subfolder = os.path.normpath(helpers.unicodize(sub_subfolder))
|
||||
|
||||
if sub_dir_custom:
|
||||
# got custom subfolder
|
||||
@@ -84,8 +88,12 @@ def find_subtitles(part):
|
||||
media_files.append(root)
|
||||
|
||||
# cleanup any leftover subtitle if no associated media file was found
|
||||
if helpers.cast_bool(Prefs["subtitles.autoclean"]):
|
||||
if use_filesystem and helpers.cast_bool(Prefs["subtitles.autoclean"]):
|
||||
for path in paths:
|
||||
# only housekeep in sub_subfolder if sub_subfolder is used
|
||||
if use_sub_subfolder and path != sub_subfolder and not sz_config.advanced.thorough_cleaning:
|
||||
continue
|
||||
|
||||
# we can't housekeep the global subtitle folders as we don't know about *all* media files
|
||||
# in a library; skip them
|
||||
skip_path = False
|
||||
@@ -105,11 +113,10 @@ def find_subtitles(part):
|
||||
if os.path.isfile(enc_fn):
|
||||
(root, ext) = os.path.splitext(file_path_listing)
|
||||
# it's a subtitle file
|
||||
if ext.lower()[1:] in config.SUBTITLE_EXTS:
|
||||
if ext.lower()[1:] in config.SUBTITLE_EXTS_BASE:
|
||||
# get fn without forced/default/normal tag
|
||||
split_tag = root.rsplit(".", 1)
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded',
|
||||
'custom']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
|
||||
root = split_tag[0]
|
||||
|
||||
# get associated media file name without language
|
||||
@@ -135,7 +142,7 @@ def find_subtitles(part):
|
||||
# get fn without forced/default/normal tag
|
||||
split_tag = local_basename.rsplit(".", 1)
|
||||
has_additional_tag = False
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'custom']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
|
||||
local_basename = split_tag[0]
|
||||
has_additional_tag = True
|
||||
|
||||
@@ -159,7 +166,7 @@ def find_subtitles(part):
|
||||
continue
|
||||
|
||||
# determine whether to pick up the subtitle based on our match strictness
|
||||
elif not filename_matches_part:
|
||||
if not filename_matches_part:
|
||||
if sz_config.ext_match_strictness == "strict" or (
|
||||
sz_config.ext_match_strictness == "loose" and not filename_contains_part):
|
||||
# Log.Debug("%s doesn't match %s, skipping" % (helpers.unicodize(local_filename),
|
||||
|
||||
@@ -2,10 +2,17 @@
|
||||
import traceback
|
||||
import time
|
||||
|
||||
from support.config import config
|
||||
from support.helpers import get_plex_item_display_title, cast_bool
|
||||
import os
|
||||
|
||||
from babelfish import LanguageReverseError
|
||||
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import get_plex_item_display_title, cast_bool, get_language_from_stream
|
||||
from support.items import get_item
|
||||
from support.lib import Plex
|
||||
from support.storage import get_subtitle_storage
|
||||
from subzero.video import has_external_subtitle
|
||||
from subzero.language import Language
|
||||
|
||||
|
||||
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()):
|
||||
@@ -17,10 +24,59 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
else:
|
||||
item_title = get_plex_item_display_title(item, kind, section_title=section_title)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load(rating_key)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
subtitle_target_dir, tdir_is_absolute = config.subtitle_sub_dir
|
||||
|
||||
missing = set()
|
||||
languages_set = set([Language.fromietf(str(l)) for l in languages])
|
||||
for media in item.media:
|
||||
existing_subs = {"internal": [], "external": [], "count": 0}
|
||||
existing_subs = {"internal": [], "external": [], "own_external": [], "count": 0}
|
||||
for part in media.parts:
|
||||
|
||||
# did we already download an external subtitle before?
|
||||
if subtitle_target_dir and stored_subs:
|
||||
for language in languages_set:
|
||||
if has_external_subtitle(part.id, stored_subs, language):
|
||||
# check the existence of the actual subtitle file
|
||||
|
||||
# get media filename without extension
|
||||
part_basename = os.path.splitext(os.path.basename(part.file))[0]
|
||||
|
||||
# compute target directory for subtitle
|
||||
# fixme: move to central location
|
||||
if tdir_is_absolute:
|
||||
possible_subtitle_path_base = subtitle_target_dir
|
||||
else:
|
||||
possible_subtitle_path_base = os.path.join(os.path.dirname(part.file), subtitle_target_dir)
|
||||
|
||||
possible_subtitle_path_base = os.path.realpath(possible_subtitle_path_base)
|
||||
|
||||
# folder actually exists?
|
||||
if not os.path.isdir(possible_subtitle_path_base):
|
||||
continue
|
||||
|
||||
found_any = False
|
||||
for ext in config.subtitle_formats:
|
||||
if cast_bool(Prefs['subtitles.only_one']):
|
||||
possible_subtitle_path = os.path.join(possible_subtitle_path_base,
|
||||
u"%s.%s" % (part_basename, ext))
|
||||
else:
|
||||
possible_subtitle_path = os.path.join(possible_subtitle_path_base,
|
||||
u"%s.%s.%s" % (part_basename, language, ext))
|
||||
|
||||
# check for subtitle existence
|
||||
if os.path.isfile(possible_subtitle_path):
|
||||
found_any = True
|
||||
Log.Debug(u"Found: %s", possible_subtitle_path)
|
||||
break
|
||||
|
||||
if found_any:
|
||||
existing_subs["own_external"].append(language)
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
|
||||
for stream in part.streams:
|
||||
if stream.stream_type == 3:
|
||||
if stream.index:
|
||||
@@ -28,25 +84,81 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
else:
|
||||
key = "external"
|
||||
|
||||
existing_subs[key].append(Locale.Language.Match(stream.language_code or ""))
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
if not config.exotic_ext and stream.codec.lower() not in TEXT_SUBTITLE_EXTS:
|
||||
continue
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not stream.language_code and config.treat_und_as_first:
|
||||
lang = Language.fromietf(str(list(config.lang_list)[0]))
|
||||
|
||||
# we can't parse empty language codes
|
||||
elif not stream.language_code or not stream.codec:
|
||||
continue
|
||||
|
||||
else:
|
||||
# parse with internal language parser first
|
||||
try:
|
||||
lang = get_language_from_stream(stream.language_code)
|
||||
if not lang:
|
||||
if config.treat_und_as_first:
|
||||
lang = Language.fromietf(str(list(config.lang_list)[0]))
|
||||
else:
|
||||
continue
|
||||
|
||||
except (ValueError, LanguageReverseError):
|
||||
continue
|
||||
|
||||
if lang:
|
||||
# Log.Debug("Found babelfish language: %r", lang)
|
||||
existing_subs[key].append(lang)
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
|
||||
missing_from_part = set([Language.fromietf(str(l)) for l in languages])
|
||||
if existing_subs["count"]:
|
||||
existing_flat = (existing_subs["internal"] if internal else []) + (existing_subs["external"] if external else [])
|
||||
languages_set = set(languages)
|
||||
if languages_set.issubset(existing_flat) or (len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
|
||||
|
||||
# fixme: this is actually somewhat broken with IETF, as Plex doesn't store the country portion
|
||||
# (pt instead of pt-BR) inside the database. So it might actually download pt-BR if there's a local pt-BR
|
||||
# subtitle but not our own.
|
||||
existing_flat = set((existing_subs["internal"] if internal else [])
|
||||
+ (existing_subs["external"] if external else [])
|
||||
+ existing_subs["own_external"])
|
||||
|
||||
check_languages = set([Language.fromietf(str(l)) for l in languages])
|
||||
alpha3_map = {}
|
||||
if config.ietf_as_alpha3:
|
||||
for language in existing_flat:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
for language in check_languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
# compare sets of strings, not sets of different Language instances
|
||||
check_languages_str = set(str(l) for l in check_languages)
|
||||
existing_flat_str = set(str(l) for l in existing_flat)
|
||||
|
||||
if check_languages_str.issubset(existing_flat_str) or \
|
||||
(len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
|
||||
# all subs found
|
||||
#Log.Info(u"All subtitles exist for '%s'", item_title)
|
||||
continue
|
||||
|
||||
missing_from_part = languages_set - set(existing_flat)
|
||||
missing_from_part = set(Language.fromietf(l) for l in check_languages_str - existing_flat_str)
|
||||
if config.ietf_as_alpha3:
|
||||
for language in missing_from_part:
|
||||
language.country = alpha3_map.get(language.alpha3, None)
|
||||
|
||||
if missing_from_part:
|
||||
Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id,
|
||||
missing_from_part)
|
||||
missing.update(missing_from_part)
|
||||
if missing_from_part:
|
||||
Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id,
|
||||
missing_from_part)
|
||||
missing.update(missing_from_part)
|
||||
|
||||
if missing:
|
||||
# deduplicate
|
||||
missing = set(Language.fromietf(la) for la in set(str(l) for l in missing))
|
||||
return added_at, item_id, item_title, item, missing
|
||||
|
||||
|
||||
@@ -59,7 +171,7 @@ def items_get_all_missing_subs(items, sleep_after_request=False):
|
||||
kind=kind,
|
||||
added_at=added_at,
|
||||
section_title=section_title,
|
||||
languages=config.lang_list,
|
||||
languages=config.lang_list.copy(),
|
||||
internal=cast_bool(Prefs["subtitles.scan.embedded"]),
|
||||
external=cast_bool(Prefs["subtitles.scan.external"])
|
||||
)
|
||||
@@ -74,9 +186,7 @@ def items_get_all_missing_subs(items, sleep_after_request=False):
|
||||
|
||||
|
||||
def refresh_item(item):
|
||||
Plex["library/metadata"].refresh(item)
|
||||
if not config.no_refresh:
|
||||
Plex["library/metadata"].refresh(item)
|
||||
|
||||
|
||||
def refresh_items(items):
|
||||
for item, title in items:
|
||||
refresh_item(item)
|
||||
|
||||
+200
-125
@@ -1,13 +1,12 @@
|
||||
# coding=utf-8
|
||||
|
||||
import os
|
||||
from urllib2 import URLError
|
||||
|
||||
import helpers
|
||||
from config import config
|
||||
from items import get_item
|
||||
from lib import get_intent, Plex
|
||||
from subzero.video import parse_video
|
||||
from lib import Plex
|
||||
from support.config import TEXT_SUBTITLE_EXTS, config
|
||||
|
||||
|
||||
def get_metadata_dict(item, part, add):
|
||||
data = {
|
||||
@@ -21,6 +20,55 @@ def get_metadata_dict(item, part, add):
|
||||
return data
|
||||
|
||||
|
||||
imdb_guid_identifier = "com.plexapp.agents.imdb://"
|
||||
tvdb_guid_identifier = "com.plexapp.agents.thetvdb://"
|
||||
|
||||
|
||||
def get_plexapi_stream_info(plex_item, part_id=None):
|
||||
d = {"stream": {}}
|
||||
data = d["stream"]
|
||||
|
||||
# find current part
|
||||
current_part = None
|
||||
current_media = None
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
if not part_id or str(part.id) == part_id:
|
||||
current_part = part
|
||||
current_media = media
|
||||
break
|
||||
if current_part:
|
||||
break
|
||||
|
||||
if not current_part:
|
||||
return d
|
||||
|
||||
data["video_codec"] = current_media.video_codec
|
||||
if current_media.audio_codec:
|
||||
data["audio_codec"] = current_media.audio_codec.upper()
|
||||
|
||||
if data["audio_codec"] == "DCA":
|
||||
data["audio_codec"] = "DTS"
|
||||
|
||||
if current_media.audio_channels == 8:
|
||||
data["audio_channels"] = "7.1"
|
||||
|
||||
elif current_media.audio_channels == 6:
|
||||
data["audio_channels"] = "5.1"
|
||||
else:
|
||||
data["audio_channels"] = "%s.0" % str(current_media.audio_channels)
|
||||
|
||||
# iter streams
|
||||
for stream in current_part.streams:
|
||||
if stream.stream_type == 1:
|
||||
# video stream
|
||||
data["resolution"] = "%s%s" % (current_media.video_resolution,
|
||||
"i" if stream.scan_type != "progressive" else "p")
|
||||
break
|
||||
|
||||
return d
|
||||
|
||||
|
||||
def media_to_videos(media, kind="series"):
|
||||
"""
|
||||
iterates through media and returns the associated parts (videos)
|
||||
@@ -30,36 +78,61 @@ def media_to_videos(media, kind="series"):
|
||||
"""
|
||||
videos = []
|
||||
|
||||
# this is a Show or a Movie object
|
||||
plex_item = get_item(media.id)
|
||||
year = plex_item.year
|
||||
original_title = plex_item.title_original
|
||||
|
||||
if kind == "series":
|
||||
for season in media.seasons:
|
||||
season_object = media.seasons[season]
|
||||
for episode in media.seasons[season].episodes:
|
||||
ep = media.seasons[season].episodes[episode]
|
||||
|
||||
tvdb_id = None
|
||||
series_tvdb_id = None
|
||||
if tvdb_guid_identifier in ep.guid:
|
||||
tvdb_id = ep.guid[len(tvdb_guid_identifier):].split("?")[0]
|
||||
series_tvdb_id = tvdb_id.split("/")[0]
|
||||
|
||||
# get plex item via API for additional metadata
|
||||
plex_episode = get_item(ep.id)
|
||||
stream_info = get_plexapi_stream_info(plex_episode)
|
||||
|
||||
for item in media.seasons[season].episodes[episode].items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
get_metadata_dict(plex_episode, part,
|
||||
{"plex_part": part, "type": "episode", "title": ep.title,
|
||||
"series": media.title, "id": ep.id,
|
||||
"series_id": media.id, "season_id": season_object.id,
|
||||
"episode": plex_episode.index, "season": plex_episode.season.index,
|
||||
"section": plex_episode.section.title
|
||||
})
|
||||
dict(stream_info, **{"plex_part": part, "type": "episode",
|
||||
"title": ep.title,
|
||||
"series": media.title, "id": ep.id, "year": year,
|
||||
"series_id": media.id,
|
||||
"season_id": season_object.id,
|
||||
"imdb_id": None, "series_tvdb_id": series_tvdb_id,
|
||||
"tvdb_id": tvdb_id,
|
||||
"original_title": original_title,
|
||||
"episode": plex_episode.index,
|
||||
"season": plex_episode.season.index,
|
||||
"section": plex_episode.section.title
|
||||
})
|
||||
)
|
||||
)
|
||||
else:
|
||||
plex_item = get_item(media.id)
|
||||
stream_info = get_plexapi_stream_info(plex_item)
|
||||
imdb_id = None
|
||||
if imdb_guid_identifier in media.guid:
|
||||
imdb_id = media.guid[len(imdb_guid_identifier):].split("?")[0]
|
||||
for item in media.items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
get_metadata_dict(plex_item, part, {"plex_part": part, "type": "movie",
|
||||
"title": media.title, "id": media.id,
|
||||
"series_id": None,
|
||||
"season_id": None,
|
||||
"section": plex_item.section.title})
|
||||
get_metadata_dict(plex_item, part, dict(stream_info, **{"plex_part": part, "type": "movie",
|
||||
"title": media.title, "id": media.id,
|
||||
"series_id": None, "year": year,
|
||||
"season_id": None, "imdb_id": imdb_id,
|
||||
"original_title": original_title,
|
||||
"series_tvdb_id": None, "tvdb_id": None,
|
||||
"section": plex_item.section.title})
|
||||
)
|
||||
)
|
||||
return videos
|
||||
|
||||
@@ -80,10 +153,9 @@ def get_stream_fps(streams):
|
||||
|
||||
|
||||
def get_media_item_ids(media, kind="series"):
|
||||
ids = []
|
||||
if kind == "movies":
|
||||
ids.append(media.id)
|
||||
else:
|
||||
# fixme: does this work correctly for full series force-refreshes and its intents?
|
||||
ids = [media.id]
|
||||
if kind == "series":
|
||||
for season in media.seasons:
|
||||
for episode in media.seasons[season].episodes:
|
||||
ids.append(media.seasons[season].episodes[episode].id)
|
||||
@@ -91,142 +163,145 @@ def get_media_item_ids(media, kind="series"):
|
||||
return ids
|
||||
|
||||
|
||||
def scan_video(plex_part, ignore_all=False, hints=None, rating_key=None):
|
||||
"""
|
||||
returnes a subliminal/guessit-refined parsed video
|
||||
:param plex_part:
|
||||
:param ignore_all:
|
||||
:param hints:
|
||||
:param rating_key:
|
||||
:return:
|
||||
"""
|
||||
embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
|
||||
external_subtitles = not ignore_all and Prefs['subtitles.scan.external']
|
||||
|
||||
if ignore_all:
|
||||
Log.Debug("Force refresh intended.")
|
||||
|
||||
Log.Debug("Scanning video: %s, subtitles=%s, embedded_subtitles=%s" % (
|
||||
plex_part.file, external_subtitles, embedded_subtitles))
|
||||
|
||||
known_embedded = []
|
||||
def get_all_parts(plex_item):
|
||||
parts = []
|
||||
for media in list(Plex["library"].metadata(rating_key))[0].media:
|
||||
for media in plex_item.media:
|
||||
parts += media.parts
|
||||
|
||||
plexpy_part = None
|
||||
for part in parts:
|
||||
if int(part.id) == int(plex_part.id):
|
||||
plexpy_part = part
|
||||
|
||||
# embedded subtitles
|
||||
if plexpy_part:
|
||||
for stream in plexpy_part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3:
|
||||
if (config.forced_only and getattr(stream, "forced")) or \
|
||||
(not config.forced_only and not getattr(stream, "forced")):
|
||||
|
||||
# embedded subtitle
|
||||
if not stream.stream_key:
|
||||
if config.exotic_ext or stream.codec in ("srt", "ass", "ssa"):
|
||||
lang_code = stream.language_code
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang_code and config.treat_und_as_first:
|
||||
lang_code = list(config.lang_list)[0].alpha3
|
||||
known_embedded.append(lang_code)
|
||||
else:
|
||||
Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)
|
||||
|
||||
try:
|
||||
# get basic video info scan (filename)
|
||||
video = parse_video(plex_part.file, hints, external_subtitles=external_subtitles,
|
||||
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
forced_only=config.forced_only, video_fps=plex_part.fps)
|
||||
|
||||
return video
|
||||
|
||||
except ValueError:
|
||||
Log.Warn("File could not be guessed by subliminal: %s" % plex_part.file)
|
||||
return parts
|
||||
|
||||
|
||||
def scan_videos(videos, kind="series", ignore_all=False):
|
||||
"""
|
||||
receives a list of videos containing dictionaries returned by media_to_videos
|
||||
:param videos:
|
||||
:param kind: series or movies
|
||||
:return: dictionary of subliminal.video.scan_video, key=subliminal scanned video, value=plex file part
|
||||
"""
|
||||
ret = {}
|
||||
for video in videos:
|
||||
intent = get_intent()
|
||||
force_refresh = intent.get("force", video["id"], video["series_id"], video["season_id"])
|
||||
Log.Debug("Determining force-refresh (video: %s, series: %s, season: %s), result: %s"
|
||||
% (video["id"], video["series_id"], video["season_id"], force_refresh))
|
||||
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, get_forced=None):
|
||||
streams = []
|
||||
has_unknown = False
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
|
||||
language = helpers.get_language_from_stream(stream.language_code)
|
||||
is_unknown = False
|
||||
found_requested_language = requested_language and requested_language == language
|
||||
is_forced = helpers.is_stream_forced(stream)
|
||||
|
||||
hints = helpers.get_item_hints(video)
|
||||
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
|
||||
scanned_video = scan_video(video["plex_part"], ignore_all=force_refresh or ignore_all, hints=hints,
|
||||
rating_key=video["id"])
|
||||
if get_forced is not None:
|
||||
if (get_forced and not is_forced) or (not get_forced and is_forced):
|
||||
continue
|
||||
|
||||
if not scanned_video:
|
||||
continue
|
||||
if not language and config.treat_und_as_first:
|
||||
# only consider first unknown subtitle stream
|
||||
if has_unknown and skip_duplicate_unknown:
|
||||
continue
|
||||
|
||||
scanned_video.id = video["id"]
|
||||
part_metadata = video.copy()
|
||||
del part_metadata["plex_part"]
|
||||
scanned_video.plexapi_metadata = part_metadata
|
||||
ret[scanned_video] = video["plex_part"]
|
||||
return ret
|
||||
language = list(config.lang_list)[0]
|
||||
is_unknown = True
|
||||
has_unknown = True
|
||||
|
||||
if not requested_language or found_requested_language or has_unknown:
|
||||
streams.append({"stream": stream, "is_unknown": is_unknown, "language": language,
|
||||
"is_forced": is_forced})
|
||||
|
||||
if found_requested_language:
|
||||
break
|
||||
|
||||
return streams
|
||||
|
||||
|
||||
def get_plex_metadata(rating_key, part_id, item_type):
|
||||
def get_part(plex_item, part_id):
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
if str(part.id) == str(part_id):
|
||||
return part
|
||||
|
||||
|
||||
def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
|
||||
"""
|
||||
uses the Plex 3rd party API accessor to get metadata information
|
||||
|
||||
:param rating_key:
|
||||
:param rating_key: movie or episode
|
||||
:param part_id:
|
||||
:param item_type:
|
||||
:return:
|
||||
"""
|
||||
|
||||
try:
|
||||
plex_item = list(Plex["library"].metadata(rating_key))[0]
|
||||
except URLError:
|
||||
return None
|
||||
if not plex_item:
|
||||
plex_item = get_item(rating_key)
|
||||
|
||||
if not plex_item:
|
||||
return
|
||||
|
||||
# find current part
|
||||
current_part = None
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
if str(part.id) == part_id:
|
||||
current_part = part
|
||||
current_part = get_part(plex_item, part_id)
|
||||
|
||||
if not current_part:
|
||||
raise helpers.PartUnknownException("Part unknown")
|
||||
|
||||
stream_info = get_plexapi_stream_info(plex_item, part_id)
|
||||
|
||||
# get normalized metadata
|
||||
# fixme: duplicated logic of media_to_videos
|
||||
if item_type == "episode":
|
||||
show = list(Plex["library"].metadata(plex_item.show.rating_key))[0]
|
||||
year = show.year
|
||||
tvdb_id = None
|
||||
series_tvdb_id = None
|
||||
original_title = show.title_original
|
||||
if tvdb_guid_identifier in plex_item.guid:
|
||||
tvdb_id = plex_item.guid[len(tvdb_guid_identifier):].split("?")[0]
|
||||
series_tvdb_id = tvdb_id.split("/")[0]
|
||||
metadata = get_metadata_dict(plex_item, current_part,
|
||||
{"plex_part": current_part, "type": "episode", "title": plex_item.title,
|
||||
"series": plex_item.show.title, "id": plex_item.rating_key,
|
||||
"series_id": plex_item.show.rating_key,
|
||||
"season_id": plex_item.season.rating_key,
|
||||
"season": plex_item.season.index,
|
||||
"episode": plex_item.index
|
||||
})
|
||||
dict(stream_info,
|
||||
**{"plex_part": current_part, "type": "episode", "title": plex_item.title,
|
||||
"series": plex_item.show.title, "id": plex_item.rating_key,
|
||||
"series_id": plex_item.show.rating_key,
|
||||
"season_id": plex_item.season.rating_key,
|
||||
"imdb_id": None,
|
||||
"year": year,
|
||||
"tvdb_id": tvdb_id,
|
||||
"series_tvdb_id": series_tvdb_id,
|
||||
"original_title": original_title,
|
||||
"season": plex_item.season.index,
|
||||
"episode": plex_item.index
|
||||
})
|
||||
)
|
||||
else:
|
||||
metadata = get_metadata_dict(plex_item, current_part, {"plex_part": current_part, "type": "movie",
|
||||
"title": plex_item.title, "id": plex_item.rating_key,
|
||||
"series_id": None,
|
||||
"season_id": None,
|
||||
"season": None,
|
||||
"episode": None,
|
||||
"section": plex_item.section.title})
|
||||
imdb_id = None
|
||||
original_title = plex_item.title_original
|
||||
if imdb_guid_identifier in plex_item.guid:
|
||||
imdb_id = plex_item.guid[len(imdb_guid_identifier):].split("?")[0]
|
||||
metadata = get_metadata_dict(plex_item, current_part,
|
||||
dict(stream_info, **{"plex_part": current_part, "type": "movie",
|
||||
"title": plex_item.title, "id": plex_item.rating_key,
|
||||
"series_id": None,
|
||||
"season_id": None,
|
||||
"imdb_id": imdb_id,
|
||||
"year": plex_item.year,
|
||||
"tvdb_id": None,
|
||||
"series_tvdb_id": None,
|
||||
"original_title": original_title,
|
||||
"season": None,
|
||||
"episode": None,
|
||||
"section": plex_item.section.title})
|
||||
)
|
||||
return metadata
|
||||
|
||||
|
||||
def get_blacklist_from_part_map(video_part_map, languages):
|
||||
from support.storage import get_subtitle_storage
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
blacklist = []
|
||||
for video, part in video_part_map.iteritems():
|
||||
stored_subs = subtitle_storage.load_or_new(video.plexapi_metadata["item"])
|
||||
for language in languages:
|
||||
current_bl, subs = stored_subs.get_blacklist(part.id, language)
|
||||
if not current_bl:
|
||||
continue
|
||||
|
||||
blacklist = blacklist + [(str(a), str(b)) for a, b in current_bl.keys()]
|
||||
|
||||
subtitle_storage.destroy()
|
||||
|
||||
return blacklist
|
||||
|
||||
|
||||
class PMSMediaProxy(object):
|
||||
"""
|
||||
Proxy object for getting data from a mediatree items "internally" via the PMS
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
import helpers
|
||||
from babelfish.exceptions import LanguageError
|
||||
|
||||
from support.lib import Plex, get_intent
|
||||
from support.plex_media import get_stream_fps
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
|
||||
from subzero.video import parse_video, set_existing_languages
|
||||
from subzero.language import language_from_stream
|
||||
|
||||
|
||||
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False):
|
||||
"""
|
||||
returnes a subliminal/guessit-refined parsed video
|
||||
:param pms_video_info:
|
||||
:param ignore_all:
|
||||
:param hints:
|
||||
:param rating_key:
|
||||
:return:
|
||||
"""
|
||||
embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
|
||||
external_subtitles = not ignore_all and Prefs['subtitles.scan.external']
|
||||
|
||||
plex_part = pms_video_info["plex_part"]
|
||||
|
||||
if ignore_all:
|
||||
Log.Debug("Force refresh intended.")
|
||||
|
||||
Log.Debug("Scanning video: %s, external_subtitles=%s, embedded_subtitles=%s" % (
|
||||
plex_part.file, external_subtitles, embedded_subtitles))
|
||||
|
||||
known_embedded = []
|
||||
parts = []
|
||||
for media in list(Plex["library"].metadata(rating_key))[0].media:
|
||||
parts += media.parts
|
||||
|
||||
plexpy_part = None
|
||||
for part in parts:
|
||||
if int(part.id) == int(plex_part.id):
|
||||
plexpy_part = part
|
||||
|
||||
# embedded subtitles
|
||||
# fixme: skip the whole scanning process if known_embedded == wanted languages?
|
||||
if plexpy_part:
|
||||
if embedded_subtitles:
|
||||
for stream in plexpy_part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3:
|
||||
is_forced = helpers.is_stream_forced(stream)
|
||||
|
||||
if (config.forced_only and is_forced) or \
|
||||
(not config.forced_only and not is_forced):
|
||||
|
||||
# embedded subtitle
|
||||
# fixme: tap into external subtitles here instead of scanning for ourselves later?
|
||||
if stream.codec and getattr(stream, "index", None):
|
||||
if config.exotic_ext or stream.codec.lower() in config.text_based_formats:
|
||||
lang = None
|
||||
try:
|
||||
lang = language_from_stream(stream.language_code)
|
||||
except LanguageError:
|
||||
Log.Debug("Couldn't detect embedded subtitle stream language: %s", stream.language_code)
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = list(config.lang_list)[0]
|
||||
|
||||
if lang:
|
||||
known_embedded.append(lang.alpha3)
|
||||
else:
|
||||
Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)
|
||||
|
||||
Log.Debug("Known embedded: %r", known_embedded)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load(rating_key)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
try:
|
||||
# get basic video info scan (filename)
|
||||
video = parse_video(plex_part.file, hints, skip_hashing=config.low_impact_mode or skip_hashing,
|
||||
providers=providers)
|
||||
|
||||
if not ignore_all:
|
||||
set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles,
|
||||
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
forced_only=config.forced_only, stored_subs=stored_subs, languages=config.lang_list,
|
||||
only_one=config.only_one)
|
||||
|
||||
# add video fps info
|
||||
video.fps = plex_part.fps
|
||||
return video
|
||||
|
||||
except ValueError:
|
||||
Log.Warn("File could not be guessed: %s: %s", plex_part.file, traceback.format_exc())
|
||||
|
||||
|
||||
def scan_videos(videos, ignore_all=False, providers=None, skip_hashing=False):
|
||||
"""
|
||||
receives a list of videos containing dictionaries returned by media_to_videos
|
||||
:param videos:
|
||||
:param kind: series or movies
|
||||
:return: dictionary of subliminal.video.scan_video, key=subliminal scanned video, value=plex file part
|
||||
"""
|
||||
ret = {}
|
||||
for video in videos:
|
||||
intent = get_intent()
|
||||
force_refresh = intent.get("force", video["id"], video["series_id"], video["season_id"])
|
||||
Log.Debug("Determining force-refresh (video: %s, series: %s, season: %s), result: %s"
|
||||
% (video["id"], video["series_id"], video["season_id"], force_refresh))
|
||||
|
||||
hints = helpers.get_item_hints(video)
|
||||
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
|
||||
p = providers or config.get_providers(media_type="series" if video["type"] == "episode" else "movies")
|
||||
scanned_video = scan_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
|
||||
rating_key=video["id"], providers=p,
|
||||
skip_hashing=skip_hashing)
|
||||
|
||||
if not scanned_video:
|
||||
continue
|
||||
|
||||
scanned_video.id = video["id"]
|
||||
part_metadata = video.copy()
|
||||
del part_metadata["plex_part"]
|
||||
scanned_video.plexapi_metadata = part_metadata
|
||||
scanned_video.ignore_all = force_refresh
|
||||
ret[scanned_video] = video["plex_part"]
|
||||
return ret
|
||||
@@ -4,21 +4,24 @@ import datetime
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
from config import config
|
||||
|
||||
def parse_frequency(s):
|
||||
if s == "never" or s == None:
|
||||
if s == "never" or s is None:
|
||||
return None, None
|
||||
kind, num, unit = s.split()
|
||||
return int(num), unit
|
||||
|
||||
|
||||
class DefaultScheduler(object):
|
||||
thread = None
|
||||
queue_thread = None
|
||||
scheduler_thread = None
|
||||
running = False
|
||||
registry = None
|
||||
|
||||
def __init__(self):
|
||||
self.thread = None
|
||||
self.queue_thread = None
|
||||
self.scheduler_thread = None
|
||||
self.running = False
|
||||
self.registry = []
|
||||
|
||||
@@ -47,6 +50,7 @@ class DefaultScheduler(object):
|
||||
if Dict["tasks"]:
|
||||
for task_name in Dict["tasks"].keys():
|
||||
if task_name == "queue":
|
||||
Dict["tasks"][task_name] = []
|
||||
continue
|
||||
|
||||
Dict["tasks"][task_name]["data"] = {}
|
||||
@@ -58,6 +62,7 @@ class DefaultScheduler(object):
|
||||
raise NotImplementedError("Task missing! %s" % name)
|
||||
|
||||
Dict["tasks"][name]["data"] = {}
|
||||
Dict["tasks"][name]["running"] = False
|
||||
Dict.Save()
|
||||
Log.Debug("Task data cleared: %s", name)
|
||||
|
||||
@@ -68,7 +73,7 @@ class DefaultScheduler(object):
|
||||
# discover tasks;
|
||||
self.tasks = {}
|
||||
for cls in self.registry:
|
||||
task = cls(self)
|
||||
task = cls()
|
||||
try:
|
||||
task_frequency = Prefs["scheduler.tasks.%s.frequency" % task.name]
|
||||
except KeyError:
|
||||
@@ -78,7 +83,8 @@ class DefaultScheduler(object):
|
||||
|
||||
def run(self):
|
||||
self.running = True
|
||||
self.thread = Thread.Create(self.worker)
|
||||
self.scheduler_thread = Thread.Create(self.scheduler_worker)
|
||||
self.queue_thread = Thread.Create(self.queue_worker)
|
||||
|
||||
def stop(self):
|
||||
self.running = False
|
||||
@@ -113,6 +119,7 @@ class DefaultScheduler(object):
|
||||
|
||||
def run_task(self, name, *args, **kwargs):
|
||||
task = self.tasks[name]["task"]
|
||||
|
||||
if task.running:
|
||||
Log.Debug("Scheduler: Not running %s, as it's currently running.", name)
|
||||
return False
|
||||
@@ -124,8 +131,12 @@ class DefaultScheduler(object):
|
||||
except Exception, e:
|
||||
Log.Error("Scheduler: Something went wrong when running %s: %s", name, traceback.format_exc())
|
||||
finally:
|
||||
task.post_run(Dict["tasks"][name]["data"])
|
||||
try:
|
||||
task.post_run(Dict["tasks"][name]["data"])
|
||||
except:
|
||||
Log.Error("Scheduler: task.post_run failed for %s: %s", name, traceback.format_exc())
|
||||
Dict.Save()
|
||||
config.sync_cache()
|
||||
|
||||
def dispatch_task(self, *args, **kwargs):
|
||||
if "queue" not in Dict["tasks"]:
|
||||
@@ -134,8 +145,12 @@ class DefaultScheduler(object):
|
||||
Dict["tasks"]["queue"].append((args, kwargs))
|
||||
|
||||
def signal(self, name, *args, **kwargs):
|
||||
for task_name, info in self.tasks.iteritems():
|
||||
task = info["task"]
|
||||
for task_name in self.tasks.keys():
|
||||
task = self.task(task_name)
|
||||
if not task:
|
||||
Log.Error("Scheduler: Task %s not found (?!)" % task_name)
|
||||
continue
|
||||
|
||||
if not task.periodic:
|
||||
continue
|
||||
|
||||
@@ -153,7 +168,7 @@ class DefaultScheduler(object):
|
||||
continue
|
||||
Log.Debug("Scheduler: Not sending signal %s to task %s, because: not running", name, task_name)
|
||||
|
||||
def worker(self):
|
||||
def queue_worker(self):
|
||||
Thread.Sleep(10.0)
|
||||
while 1:
|
||||
if not self.running:
|
||||
@@ -166,13 +181,25 @@ class DefaultScheduler(object):
|
||||
Dict["tasks"]["queue"] = []
|
||||
Dict.Save()
|
||||
for args, kwargs in queue:
|
||||
Log.Debug("Dispatching single task: %s, %s", args, kwargs)
|
||||
Log.Debug("Queue: Dispatching single task: %s, %s", args, kwargs)
|
||||
Thread.Create(self.run_task, True, *args, **kwargs)
|
||||
Thread.Sleep(5.0)
|
||||
|
||||
Thread.Sleep(1)
|
||||
|
||||
def scheduler_worker(self):
|
||||
Thread.Sleep(10.0)
|
||||
while 1:
|
||||
if not self.running:
|
||||
break
|
||||
|
||||
# scheduled tasks
|
||||
for name, info in self.tasks.iteritems():
|
||||
for name in self.tasks.keys():
|
||||
now = datetime.datetime.now()
|
||||
info = self.tasks.get(name)
|
||||
if not info:
|
||||
Log.Error("Scheduler: Task %s not found (?!)" % name)
|
||||
continue
|
||||
task = info["task"]
|
||||
|
||||
if name not in Dict["tasks"] or not task.periodic:
|
||||
@@ -185,11 +212,19 @@ class DefaultScheduler(object):
|
||||
if not frequency_num:
|
||||
continue
|
||||
|
||||
# run legacy SARAM once
|
||||
if name == "SearchAllRecentlyAddedMissing" and ("hasRunLSARAM" not in Dict or not Dict["hasRunLSARAM"]):
|
||||
task = self.tasks["LegacySearchAllRecentlyAddedMissing"]["task"]
|
||||
task.last_run = None
|
||||
name = "LegacySearchAllRecentlyAddedMissing"
|
||||
Dict["hasRunLSARAM"] = True
|
||||
Dict.Save()
|
||||
|
||||
if not task.last_run or (task.last_run + datetime.timedelta(**{frequency_key: frequency_num}) <= now):
|
||||
# fixme: scheduled tasks run synchronously. is this the best idea?
|
||||
#Thread.Create(self.run_task, True, name)
|
||||
Thread.Create(self.run_task, True, name)
|
||||
#Thread.Sleep(5.0)
|
||||
self.run_task(name)
|
||||
#self.run_task(name)
|
||||
Thread.Sleep(5.0)
|
||||
|
||||
Thread.Sleep(1)
|
||||
|
||||
@@ -4,9 +4,12 @@ import datetime
|
||||
import os
|
||||
import pprint
|
||||
import copy
|
||||
import traceback
|
||||
import types
|
||||
|
||||
import subliminal
|
||||
from subliminal_patch.core import save_subtitles as subliminal_save_subtitles
|
||||
from subzero.subtitle_storage import StoredSubtitlesManager
|
||||
from subzero.lib.io import FileIO
|
||||
|
||||
from subtitlehelpers import force_utf8
|
||||
from config import config
|
||||
@@ -16,83 +19,57 @@ from support.items import get_item
|
||||
|
||||
|
||||
def get_subtitle_storage():
|
||||
return StoredSubtitlesManager(Data, get_item)
|
||||
return StoredSubtitlesManager(Data, Thread, get_item)
|
||||
|
||||
|
||||
def whack_missing_parts(scanned_video_part_map, existing_parts=None):
|
||||
"""
|
||||
cleans out our internal storage's video parts (parts may get updated/deleted/whatever)
|
||||
:param existing_parts: optional list of part ids known
|
||||
:param scanned_video_part_map: videos to check for
|
||||
:return:
|
||||
"""
|
||||
# shortcut
|
||||
|
||||
if "subs" not in Dict:
|
||||
return
|
||||
|
||||
if not existing_parts:
|
||||
existing_parts = []
|
||||
for part in scanned_video_part_map.viewvalues():
|
||||
existing_parts.append(str(part.id))
|
||||
|
||||
whacked_parts = False
|
||||
for video in scanned_video_part_map.keys():
|
||||
video_id = str(video.id)
|
||||
if video_id not in Dict["subs"]:
|
||||
continue
|
||||
|
||||
parts = Dict["subs"][video_id].keys()
|
||||
|
||||
for part_id in parts:
|
||||
part_id = str(part_id)
|
||||
if part_id not in existing_parts:
|
||||
Log.Info("Whacking part %s in internal storage of video %s (%s, %s)", part_id, video_id,
|
||||
repr(existing_parts), repr(parts))
|
||||
del Dict["subs"][video_id][part_id]
|
||||
whacked_parts = True
|
||||
|
||||
if whacked_parts:
|
||||
Dict.Save()
|
||||
|
||||
|
||||
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a"):
|
||||
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a", set_current=True):
|
||||
"""
|
||||
stores information about downloaded subtitles in plex's Dict()
|
||||
"""
|
||||
existing_parts = []
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
part = scanned_video_part_map[video]
|
||||
part_id = str(part.id)
|
||||
video_id = str(video.id)
|
||||
plex_item = get_item(video_id)
|
||||
if not plex_item:
|
||||
Log.Warning("Plex item not found: %s", video_id)
|
||||
continue
|
||||
|
||||
metadata = video.plexapi_metadata
|
||||
title = get_title_for_video_metadata(metadata)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(plex_item)
|
||||
stored_subs = subtitle_storage.load(video_id)
|
||||
is_new = False
|
||||
if not stored_subs:
|
||||
is_new = True
|
||||
Log.Debug(u"Creating new subtitle storage: %s, %s", video_id, part_id)
|
||||
stored_subs = subtitle_storage.new(plex_item)
|
||||
|
||||
existing_parts.append(part_id)
|
||||
|
||||
stored_any = False
|
||||
for subtitle in video_subtitles:
|
||||
lang = Locale.Language.Match(subtitle.language.alpha2)
|
||||
Log.Debug(u"Adding subtitle to storage: %s, %s, %s" % (video_id, part_id, title))
|
||||
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode)
|
||||
lang = str(subtitle.language)
|
||||
subtitle.normalize()
|
||||
Log.Debug(u"Adding subtitle to storage: %s, %s, %s, %s, %s" % (video_id, part_id, lang, title,
|
||||
subtitle.guess_encoding()))
|
||||
|
||||
last_mod = None
|
||||
if subtitle.storage_path:
|
||||
last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
|
||||
|
||||
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode, last_mod=last_mod,
|
||||
set_current=set_current)
|
||||
|
||||
if ret_val:
|
||||
Log.Debug("Subtitle stored")
|
||||
stored_any = True
|
||||
|
||||
else:
|
||||
Log.Debug("Subtitle already existing in storage")
|
||||
|
||||
if stored_any:
|
||||
if is_new or video_subtitles:
|
||||
Log.Debug("Saving subtitle storage for %s" % video_id)
|
||||
subtitle_storage.save(stored_subs)
|
||||
|
||||
#if existing_parts:
|
||||
# whack_missing_parts(scanned_video_part_map, existing_parts=existing_parts)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
def reset_storage(key):
|
||||
@@ -108,46 +85,57 @@ def reset_storage(key):
|
||||
|
||||
|
||||
def log_storage(key):
|
||||
if not key:
|
||||
Log.Debug(pprint.pformat(getattr(Dict, "_dict")))
|
||||
if key in Dict:
|
||||
Log.Debug(pprint.pformat(Dict[key]))
|
||||
|
||||
|
||||
def save_subtitles_to_file(subtitles):
|
||||
def get_target_folder(file_path):
|
||||
fld = None
|
||||
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
|
||||
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
# specific subFolder requested, create it if it doesn't exist
|
||||
fld_base = os.path.split(file_path)[0]
|
||||
if fld_custom:
|
||||
if fld_custom.startswith("/"):
|
||||
# absolute folder
|
||||
fld = fld_custom
|
||||
else:
|
||||
fld = os.path.join(fld_base, fld_custom)
|
||||
else:
|
||||
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
|
||||
fld = force_unicode(fld)
|
||||
if not os.path.exists(fld):
|
||||
os.makedirs(fld)
|
||||
return fld
|
||||
|
||||
|
||||
def save_subtitles_to_file(subtitles, tags=None, forced_tag=None):
|
||||
forced_tag = forced_tag or config.forced_only
|
||||
for video, video_subtitles in subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
fld = None
|
||||
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
# specific subFolder requested, create it if it doesn't exist
|
||||
fld_base = os.path.split(video.name)[0]
|
||||
if fld_custom:
|
||||
if fld_custom.startswith("/"):
|
||||
# absolute folder
|
||||
fld = fld_custom
|
||||
else:
|
||||
fld = os.path.join(fld_base, fld_custom)
|
||||
else:
|
||||
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
|
||||
fld = force_unicode(fld)
|
||||
if not os.path.exists(fld):
|
||||
os.makedirs(fld)
|
||||
subliminal.save_subtitles(video, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
|
||||
encode_with=force_utf8 if config.enforce_encoding else None,
|
||||
chmod=config.chmod, forced_tag=config.forced_only, path_decoder=force_unicode,
|
||||
debug_mods=config.debug_mods)
|
||||
if not isinstance(video, types.StringTypes):
|
||||
file_path = video.name
|
||||
else:
|
||||
file_path = video
|
||||
|
||||
fld = get_target_folder(file_path)
|
||||
subliminal_save_subtitles(file_path, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
|
||||
chmod=config.chmod, forced_tag=forced_tag, path_decoder=force_unicode,
|
||||
debug_mods=config.debug_mods, formats=config.subtitle_formats, tags=tags)
|
||||
return True
|
||||
|
||||
|
||||
def save_subtitles_to_metadata(videos, subtitles):
|
||||
def save_subtitles_to_metadata(videos, subtitles, is_forced=False):
|
||||
for video, video_subtitles in subtitles.items():
|
||||
mediaPart = videos[video]
|
||||
for subtitle in video_subtitles:
|
||||
content = force_utf8(subtitle.get_modified_text(debug=config.debug_mods)) if config.enforce_encoding else \
|
||||
subtitle.get_modified_content(debug=config.debug_mods)
|
||||
content = subtitle.get_modified_content(debug=config.debug_mods)
|
||||
|
||||
if not isinstance(mediaPart, Framework.api.agentkit.MediaPart):
|
||||
# we're being handed a Plex.py model instance here, not an internal PMS MediaPart object.
|
||||
@@ -155,14 +143,19 @@ def save_subtitles_to_metadata(videos, subtitles):
|
||||
mp = PMSMediaProxy(video.id).get_part(mediaPart.id)
|
||||
else:
|
||||
mp = mediaPart
|
||||
mp.subtitles[Locale.Language.Match(subtitle.language.alpha2)][subtitle.id] = Proxy.Media(content, ext="srt")
|
||||
pm = Proxy.Media(content, ext="srt", forced="1" if is_forced else None)
|
||||
lang = Locale.Language.Match(subtitle.language.alpha2)
|
||||
mp.subtitles[lang].validate_keys({})
|
||||
mp.subtitles[lang]["subzero"] = pm
|
||||
return True
|
||||
|
||||
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None):
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None,
|
||||
set_current=True, is_forced=False):
|
||||
"""
|
||||
|
||||
:param scanned_video_part_map:
|
||||
:param set_current: save the subtitle as the current one
|
||||
:param scanned_video_part_map:
|
||||
:param downloaded_subtitles:
|
||||
:param mode:
|
||||
:param bare_save: don't trigger anything; don't store information
|
||||
@@ -172,6 +165,8 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_
|
||||
meta_fallback = False
|
||||
save_successful = False
|
||||
|
||||
# big fixme: scanned_video_part_map isn't needed to the current extent. rewrite.
|
||||
|
||||
if mods:
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
if not video_subtitles:
|
||||
@@ -183,31 +178,67 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_
|
||||
subtitle.plex_media_fps = video.fps
|
||||
|
||||
storage = "metadata"
|
||||
if Prefs['subtitles.save.filesystem']:
|
||||
save_to_fs = cast_bool(Prefs['subtitles.save.filesystem'])
|
||||
if save_to_fs:
|
||||
storage = "filesystem"
|
||||
try:
|
||||
Log.Debug("Using filesystem as subtitle storage")
|
||||
save_subtitles_to_file(downloaded_subtitles)
|
||||
except OSError:
|
||||
if Prefs["subtitles.save.metadata_fallback"]:
|
||||
meta_fallback = True
|
||||
|
||||
if set_current:
|
||||
if save_to_fs:
|
||||
try:
|
||||
Log.Debug("Using filesystem as subtitle storage")
|
||||
save_subtitles_to_file(downloaded_subtitles, forced_tag=is_forced)
|
||||
except OSError:
|
||||
if cast_bool(Prefs["subtitles.save.metadata_fallback"]):
|
||||
meta_fallback = True
|
||||
storage = "metadata"
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
save_successful = True
|
||||
save_successful = True
|
||||
|
||||
if not Prefs['subtitles.save.filesystem'] or meta_fallback:
|
||||
if meta_fallback:
|
||||
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
|
||||
else:
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
|
||||
if not save_to_fs or meta_fallback:
|
||||
if meta_fallback:
|
||||
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
|
||||
else:
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles,
|
||||
is_forced=is_forced)
|
||||
|
||||
if not bare_save and save_successful and config.notify_executable:
|
||||
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
|
||||
if not bare_save and save_successful and config.notify_executable:
|
||||
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
|
||||
|
||||
if not bare_save and save_successful:
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode)
|
||||
if not bare_save and save_successful or not set_current:
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode, set_current=set_current)
|
||||
|
||||
return save_successful
|
||||
|
||||
|
||||
def get_pack_id(subtitle):
|
||||
return "%s_%s" % (subtitle.provider_name, subtitle.numeric_id)
|
||||
|
||||
|
||||
def get_pack_data(subtitle):
|
||||
subtitle_id = get_pack_id(subtitle)
|
||||
|
||||
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
|
||||
if os.path.isfile(archive):
|
||||
Log.Info("Loading archive from pack cache: %s", subtitle_id)
|
||||
try:
|
||||
data = FileIO.read(archive, 'rb')
|
||||
|
||||
return data
|
||||
except:
|
||||
Log.Error("Couldn't load archive from pack cache: %s: %s", subtitle_id, traceback.format_exc())
|
||||
|
||||
|
||||
def store_pack_data(subtitle, data):
|
||||
subtitle_id = get_pack_id(subtitle)
|
||||
|
||||
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
|
||||
|
||||
Log.Info("Storing archive in pack cache: %s", subtitle_id)
|
||||
try:
|
||||
FileIO.write(archive, data, 'wb')
|
||||
|
||||
except:
|
||||
Log.Error("Couldn't store archive in pack cache: %s: %s", subtitle_id, traceback.format_exc())
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# coding=utf-8
|
||||
|
||||
import re, os
|
||||
import config
|
||||
import helpers
|
||||
|
||||
from config import config, SUBTITLE_EXTS, TEXT_SUBTITLE_EXTS
|
||||
from bs4 import UnicodeDammit
|
||||
|
||||
|
||||
@@ -86,11 +86,11 @@ class VobSubSubtitleHelper(SubtitleHelper):
|
||||
|
||||
|
||||
IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
|
||||
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2})?$")
|
||||
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
|
||||
|
||||
|
||||
def match_ietf_language(s):
|
||||
language_match = re.match(".+\.([^\.]+)$" if not helpers.cast_bool(Prefs["subtitles.language.ietf"])
|
||||
language_match = re.match(".+\.([^\.]+)$" if not helpers.cast_bool(Prefs["subtitles.language.ietf_display"])
|
||||
else IETF_MATCH, s)
|
||||
if language_match and len(language_match.groups()) == 1:
|
||||
language = language_match.groups()[0]
|
||||
@@ -102,7 +102,7 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, filename):
|
||||
(file, file_extension) = os.path.splitext(filename)
|
||||
return file_extension.lower()[1:] in config.SUBTITLE_EXTS
|
||||
return file_extension.lower()[1:] in SUBTITLE_EXTS
|
||||
|
||||
def process_subtitles(self, part):
|
||||
|
||||
@@ -120,21 +120,29 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
forced = ''
|
||||
default = ''
|
||||
split_tag = file.rsplit('.', 1)
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'custom']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'embedded-forced',
|
||||
'custom']:
|
||||
file = split_tag[0]
|
||||
sub_tag = split_tag[1].lower()
|
||||
# don't do anything with 'normal', we don't need it
|
||||
if 'forced' == split_tag[1].lower():
|
||||
if 'forced' in sub_tag:
|
||||
forced = '1'
|
||||
if 'default' == split_tag[1].lower():
|
||||
elif 'default' == sub_tag:
|
||||
default = '1'
|
||||
|
||||
# Attempt to extract the language from the filename (e.g. Avatar (2009).eng)
|
||||
# IETF support thanks to
|
||||
# https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
|
||||
language = Locale.Language.Match(match_ietf_language(file))
|
||||
lang_part = match_ietf_language(file)
|
||||
if lang_part != file:
|
||||
language = Locale.Language.Match(lang_part)
|
||||
elif config.only_one:
|
||||
language = Locale.Language.Match(list(config.lang_list)[0].alpha2)
|
||||
else:
|
||||
language = Locale.Language.Match("xx")
|
||||
|
||||
# skip non-SRT if wanted
|
||||
if not helpers.cast_bool(Prefs["subtitles.scan.exotic_ext"]) and ext not in ["srt", "ass", "ssa"]:
|
||||
if not config.exotic_ext and ext not in TEXT_SUBTITLE_EXTS:
|
||||
return lang_sub_map
|
||||
|
||||
codec = None
|
||||
@@ -157,6 +165,7 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
Log("An error occurred while attempting to parse the subtitle file, skipping... : " + self.filename)
|
||||
return lang_sub_map
|
||||
|
||||
# fixme: re-add vtt once Plex Inc. fixes this line in LocalMedia.bundle
|
||||
if codec is None and ext in ['ass', 'ssa', 'smi', 'srt', 'psb']:
|
||||
codec = ext.replace('ass', 'ssa')
|
||||
|
||||
@@ -174,19 +183,20 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
|
||||
def get_subtitles_from_metadata(part):
|
||||
subs = {}
|
||||
for language in part.subtitles:
|
||||
subs[language] = []
|
||||
for key, proxy in getattr(part.subtitles[language], "_proxies").iteritems():
|
||||
if not proxy or not len(proxy) >= 5:
|
||||
Log.Debug("Can't parse metadata: %s" % repr(proxy))
|
||||
continue
|
||||
if hasattr(part, "subtitles") and part.subtitles:
|
||||
for language in part.subtitles:
|
||||
subs[language] = []
|
||||
for key, proxy in getattr(part.subtitles[language], "_proxies").iteritems():
|
||||
if not proxy or not len(proxy) >= 5:
|
||||
Log.Debug("Can't parse metadata: %s" % repr(proxy))
|
||||
continue
|
||||
|
||||
p_type = proxy[0]
|
||||
p_type = proxy[0]
|
||||
|
||||
if p_type == "Media":
|
||||
# metadata subtitle
|
||||
Log.Debug(u"Found metadata subtitle: %s, %s" % (language, repr(proxy)))
|
||||
subs[language].append(key)
|
||||
if p_type == "Media":
|
||||
# metadata subtitle
|
||||
Log.Debug(u"Found metadata subtitle: %s, %s" % (language, repr(proxy)))
|
||||
subs[language] = [key]
|
||||
return subs
|
||||
|
||||
|
||||
|
||||
+611
-229
File diff suppressed because it is too large
Load Diff
+219
-56
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"id": "langPref1",
|
||||
"id": "langPref1a",
|
||||
"label": "Subtitle Language (1)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -40,6 +40,8 @@
|
||||
"ro",
|
||||
"ru",
|
||||
"sr",
|
||||
"sr-cyrl",
|
||||
"sr-latn",
|
||||
"sk",
|
||||
"sl",
|
||||
"es",
|
||||
@@ -48,12 +50,14 @@
|
||||
"tr",
|
||||
"uk",
|
||||
"vi",
|
||||
"hr"
|
||||
"hr",
|
||||
"zh-hans",
|
||||
"zh-hant"
|
||||
],
|
||||
"default": "en"
|
||||
},
|
||||
{
|
||||
"id": "langPref2",
|
||||
"id": "langPref2a",
|
||||
"label": "Subtitle Language (2)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -94,6 +98,8 @@
|
||||
"ro",
|
||||
"ru",
|
||||
"sr",
|
||||
"sr-cyrl",
|
||||
"sr-latn",
|
||||
"sk",
|
||||
"sl",
|
||||
"es",
|
||||
@@ -102,12 +108,14 @@
|
||||
"tr",
|
||||
"uk",
|
||||
"vi",
|
||||
"hr"
|
||||
"hr",
|
||||
"zh-hans",
|
||||
"zh-hant"
|
||||
],
|
||||
"default": "None"
|
||||
},
|
||||
{
|
||||
"id": "langPref3",
|
||||
"id": "langPref3a",
|
||||
"label": "Subtitle Language (3)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -148,6 +156,8 @@
|
||||
"ro",
|
||||
"ru",
|
||||
"sr",
|
||||
"sr-cyrl",
|
||||
"sr-latn",
|
||||
"sk",
|
||||
"sl",
|
||||
"es",
|
||||
@@ -156,7 +166,9 @@
|
||||
"tr",
|
||||
"uk",
|
||||
"vi",
|
||||
"hr"
|
||||
"hr",
|
||||
"zh-hans",
|
||||
"zh-hant"
|
||||
],
|
||||
"default": "None"
|
||||
},
|
||||
@@ -173,11 +185,17 @@
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.language.ietf",
|
||||
"label": "Treat IETF language tags as ISO 639-1 (e.g. pt-BR = pt)",
|
||||
"id": "subtitles.language.ietf_display",
|
||||
"label": "Display languages with country attribute as ISO 639-1 (e.g. pt-BR = pt)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.language.ietf_normalize",
|
||||
"label": "Treat languages with country attribute as ISO 639-1 (e.g. don't download pt-BR if pt subtitle exists)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.only_one",
|
||||
"label": "Restrict to one language (skips adding \".lang.\" to the subtitle filename; only uses \"Subtitle Language (1)\")",
|
||||
@@ -190,6 +208,50 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "media_rename1",
|
||||
"label": "I rename my files using",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"Sonarr/Radarr (fill api info below)",
|
||||
"Filebot",
|
||||
"Sonarr/Radarr/Filebot",
|
||||
"Symlink to original file",
|
||||
"I keep the original filenames",
|
||||
"none of the above"
|
||||
],
|
||||
"default": "I keep the original filenames"
|
||||
},
|
||||
{
|
||||
"id": "use_file_info_file",
|
||||
"label": "Retrieve original filename from .file_info/file_info index files (see wiki)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.sonarr.url",
|
||||
"label": "Sonarr URL (add URL base if configured)",
|
||||
"type": "text",
|
||||
"default": "http://127.0.0.1:8989"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.sonarr.api_key",
|
||||
"label": "Sonarr API key",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "drone_api.radarr.url",
|
||||
"label": "Radarr URL (add URL base if configured, min. version: 0.2.0.897)",
|
||||
"type": "text",
|
||||
"default": "http://127.0.0.1:7878"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.radarr.api_key",
|
||||
"label": "Radarr API key",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.enabled",
|
||||
"label": "Provider: Enable OpenSubtitles",
|
||||
@@ -198,7 +260,7 @@
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.username",
|
||||
"label": "Opensubtitles Username (VIP)",
|
||||
"label": "Opensubtitles Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
@@ -210,12 +272,24 @@
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.is_vip",
|
||||
"label": "OpenSubtitles VIP? (ad-free subs, 1000 subs/day, no-cache VIP server: http://v.ht/osvip)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.podnapisi.enabled",
|
||||
"label": "Provider: Enable Podnapisi.NET",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.titlovi.enabled",
|
||||
"label": "Provider: Enable Titlovi.com",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.enabled",
|
||||
"label": "Provider: Enable Addic7ed",
|
||||
@@ -237,7 +311,7 @@
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.boost_by1",
|
||||
"id": "provider.addic7ed.boost_by2",
|
||||
"label": "Addic7ed: boost score (if requirements met)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
@@ -260,22 +334,23 @@
|
||||
"25",
|
||||
"21",
|
||||
"20",
|
||||
"19",
|
||||
"15",
|
||||
"10",
|
||||
"5",
|
||||
"0"
|
||||
],
|
||||
"default": "21"
|
||||
"default": "19"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.use_random_agents",
|
||||
"id": "provider.addic7ed.use_random_agents1",
|
||||
"label": "Addic7ed: Use random user agents",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.legendastv.enabled",
|
||||
"label": "Provider: Enable Legendas TV (mostly pt-BR)",
|
||||
"label": "Provider: Enable Legendas TV (mostly pt-BR; UNRAR NEEDED)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
@@ -306,31 +381,41 @@
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.shooter.enabled",
|
||||
"label": "Provider: Enable Shooter.cn (Chinese)",
|
||||
"id": "provider.subscene.enabled",
|
||||
"label": "Provider: Enable SubScene (TV shows)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.supersubtitles.enabled",
|
||||
"label": "Provider: Enable feliratok.info (Hungarian)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.subscenter.enabled",
|
||||
"label": "Provider: Enable SubsCenter (Hebrew)",
|
||||
"id": "provider.hosszupuska.enabled",
|
||||
"label": "Provider: Enable hosszupuskasub.com (Hungarian)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.subscenter.username",
|
||||
"label": "SubsCenter Username",
|
||||
"id": "provider.argenteam.enabled",
|
||||
"label": "Provider: Enable aRGENTeaM (Spanish)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.assrt.enabled",
|
||||
"label": "Provider: Enable assrt.net (Chinese)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.assrt.token",
|
||||
"label": "Assrt API Token",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.subscenter.password",
|
||||
"label": "SubsCenter Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "providers.multithreading",
|
||||
"label": "Search enabled providers simultaneously (multithreading)",
|
||||
@@ -338,32 +423,32 @@
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.use_tags",
|
||||
"label": "I keep the exact (release-) filename of my media files",
|
||||
"id": "subtitles.embedded.autoextract",
|
||||
"label": "Automatically extract and use embedded subtitles upon media addition (with configured default mods)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.search_after_autoextract",
|
||||
"label": "After automatic extraction of embedded subtitles, also immediately search for available subtitles?",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.embedded",
|
||||
"label": "Scan: include embedded subtitles (in the media file (MKV/MP4), don't download if existing)",
|
||||
"label": "Don't search for subtitles of a language if there are embedded subtitles inside the media file (MKV/MP4)?",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.external",
|
||||
"label": "Scan: include external subtitles (metadata/filesystem, don't download if existing)",
|
||||
"label": "Don't search for subtitles of a language if they already exist on the filesystem (metadata/filesystem)?",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.exotic_ext",
|
||||
"label": "Scan: include \"exotic\" external subtitle formats (anything else than .srt/.ssa/.ass)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.filename_strictness",
|
||||
"label": "Scan: which external subtitles should be picked up?",
|
||||
"label": "How strict should these subtitles existing on the filesystem be detected?",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"exact: media filename match",
|
||||
@@ -372,6 +457,12 @@
|
||||
],
|
||||
"default": "loose: filename contains media filename"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.exotic_ext",
|
||||
"label": "Include non-text subtitle formats (anything else than .srt/.ssa/.ass/.vtt; embedded or external) in the above?",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.search.minimumTVScore2",
|
||||
"label": "Minimum score for TV (min: 240, def/sane: 337, min-ideal: 352; see http://v.ht/szscores)",
|
||||
@@ -402,9 +493,15 @@
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.remove_tags",
|
||||
"label": "Remove style tags from downloaded subtitles (bold, italic, underline, colors, ...)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.fix_common",
|
||||
"label": "Fix common whitespace/punctuation issues in subtitles",
|
||||
"label": "Fix common issues in subtitles",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
@@ -415,10 +512,10 @@
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.enforce_encoding",
|
||||
"label": "Normalize subtitle encoding to UTF-8",
|
||||
"id": "subtitles.reverse_rtl",
|
||||
"label": "Reverse punctuation in RTL languages (heb)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.colors",
|
||||
@@ -451,6 +548,17 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.save.formats",
|
||||
"label": "Subtitle formats to save (non-SRT only works if the previous option is enabled)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"SRT",
|
||||
"VTT",
|
||||
"SRT+VTT"
|
||||
],
|
||||
"default": "SRT"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.save.subFolder",
|
||||
"label": "Subtitle Folder (\"current folder\" is the folder the current media file lives in)",
|
||||
@@ -496,7 +604,8 @@
|
||||
"never",
|
||||
"current media item",
|
||||
"next episode (series)",
|
||||
"hybrid: current item or next episode"
|
||||
"hybrid: current item or next episode",
|
||||
"hybrid-plus: current item and next episode"
|
||||
],
|
||||
"default": "never"
|
||||
},
|
||||
@@ -506,8 +615,6 @@
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"never",
|
||||
"every 1 hours",
|
||||
"every 3 hours",
|
||||
"every 6 hours",
|
||||
"every 12 hours",
|
||||
"every 24 hours"
|
||||
@@ -528,7 +635,8 @@
|
||||
"3 weeks",
|
||||
"4 weeks",
|
||||
"5 weeks",
|
||||
"6 weeks"
|
||||
"6 weeks",
|
||||
"12 weeks"
|
||||
],
|
||||
"default": "2 weeks"
|
||||
},
|
||||
@@ -536,7 +644,7 @@
|
||||
"id": "scheduler.max_recent_items_per_library",
|
||||
"label": "Scheduler: Recent items to consider per library",
|
||||
"type": "text",
|
||||
"default": "500"
|
||||
"default": "1000"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.frequency",
|
||||
@@ -556,6 +664,25 @@
|
||||
"type": "text",
|
||||
"default": "7"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.air_date_cutoff",
|
||||
"label": "Scheduler: Don't search for better subtitles if the item's air date is older than",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"don't limit",
|
||||
"1 year",
|
||||
"2 years",
|
||||
"3 years",
|
||||
"4 years",
|
||||
"5 years",
|
||||
"6 years",
|
||||
"7 years",
|
||||
"8 years",
|
||||
"9 years",
|
||||
"10 years"
|
||||
],
|
||||
"default": "1 year"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected",
|
||||
"label": "Scheduler: Overwrite manually selected subtitles when better found",
|
||||
@@ -606,15 +733,15 @@
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "plugin_mode",
|
||||
"id": "plugin_mode2",
|
||||
"label": "Sub-Zero mode",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"agent + channel",
|
||||
"agent + interface",
|
||||
"only agent",
|
||||
"only channel"
|
||||
"only interface"
|
||||
],
|
||||
"default": "agent + channel"
|
||||
"default": "agent + interface"
|
||||
},
|
||||
{
|
||||
"id": "plugin_pin",
|
||||
@@ -631,19 +758,19 @@
|
||||
"default": "10"
|
||||
},
|
||||
{
|
||||
"id": "plugin_pin_mode",
|
||||
"id": "plugin_pin_mode2",
|
||||
"label": "Use PIN to restrict access to (needs plugin or PMS restart)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"disabled",
|
||||
"channel menu",
|
||||
"interface",
|
||||
"advanced menu"
|
||||
],
|
||||
"default": "disabled"
|
||||
},
|
||||
{
|
||||
"id": "notify_executable",
|
||||
"label": "Call this executable upon successful subtitle download",
|
||||
"label": "Call this executable upon successful subtitle download (see Wiki for details)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
@@ -653,6 +780,36 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "new_style_cache",
|
||||
"label": "Use new style caching (for subliminal)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "low_impact_mode",
|
||||
"label": "Low impact mode (for remote filesystems)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "pms_request_timeout",
|
||||
"label": "Timeout for API requests sent to the PMS",
|
||||
"type": "text",
|
||||
"default": "15"
|
||||
},
|
||||
{
|
||||
"id": "proxy",
|
||||
"label": "HTTP proxy to use for providers (supports credentials)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "path_to_advanced_settings",
|
||||
"label": "Custom path to advanced_settings.json",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "log_level",
|
||||
"label": "How verbose should the logging be?",
|
||||
@@ -666,6 +823,12 @@
|
||||
],
|
||||
"default": "WARNING"
|
||||
},
|
||||
{
|
||||
"id": "log_rotate_keep",
|
||||
"label": "How many log backups to keep?",
|
||||
"type": "text",
|
||||
"default": "5"
|
||||
},
|
||||
{
|
||||
"id": "log_debug_mods",
|
||||
"label": "Log subtitle modification (debug)",
|
||||
|
||||
+7
-5
@@ -9,11 +9,11 @@
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>2.0.19</string>
|
||||
<string>2.5.4</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>2.0.19.1267</string>
|
||||
<string>2.5.7.2663</string>
|
||||
<key>PlexFrameworkVersion</key>
|
||||
<string>2</string>
|
||||
<key>PlexPluginClass</key>
|
||||
@@ -23,7 +23,7 @@
|
||||
<key>PlexPluginConsoleLogging</key>
|
||||
<string>0</string>
|
||||
<key>PlexPluginDevMode</key>
|
||||
<string>1</string>
|
||||
<string>0</string>
|
||||
<key>PlexPluginCodePolicy</key>
|
||||
<!-- this allows channels to access some python methods which are otherwise blocked, as well as import external code libraries, and interact with the PMS HTTP API -->
|
||||
<string>Elevated</string>
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
<h1>Sub-Zero for Plex</h1><i>Subtitles done right</i>
|
||||
|
||||
Version 2.0.19.1267 RC6
|
||||
Version 2.5.7.2663
|
||||
|
||||
Originally based on @bramwalet's awesome <a href="https://github.com/bramwalet/Subliminal.bundle">Subliminal.bundle</a>
|
||||
|
||||
@@ -44,7 +44,9 @@ Score info: <a href="http://v.ht/szscores">http://v.ht/szscores&
|
||||
Plex thread: <a href="https://forums.plex.tv/discussion/186575">https://forums.plex.tv/discussion/186575</a>
|
||||
Github: <a href="https://github.com/pannal/Sub-Zero.bundle">https://github.com/pannal/Sub-Zero</a>
|
||||
|
||||
panni, 2017
|
||||
3rd party licenses: <a href="https://github.com/pannal/Sub-Zero.bundle/tree/master/Licenses">https://github.com/pannal/Sub-Zero.bundle/tree/master/Licenses</a>
|
||||
|
||||
panni, 2018
|
||||
</div>
|
||||
</string>
|
||||
</dict>
|
||||
|
||||
Executable
BIN
Binary file not shown.
BIN
Binary file not shown.
Executable
BIN
Binary file not shown.
Executable
BIN
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,85 @@
|
||||
"""Generic interface to all dbm clones.
|
||||
|
||||
Instead of
|
||||
|
||||
import dbm
|
||||
d = dbm.open(file, 'w', 0666)
|
||||
|
||||
use
|
||||
|
||||
import anydbm
|
||||
d = anydbm.open(file, 'w')
|
||||
|
||||
The returned object is a dbhash, gdbm, dbm or dumbdbm object,
|
||||
dependent on the type of database being opened (determined by whichdb
|
||||
module) in the case of an existing dbm. If the dbm does not exist and
|
||||
the create or new flag ('c' or 'n') was specified, the dbm type will
|
||||
be determined by the availability of the modules (tested in the above
|
||||
order).
|
||||
|
||||
It has the following interface (key and data are strings):
|
||||
|
||||
d[key] = data # store data at key (may override data at
|
||||
# existing key)
|
||||
data = d[key] # retrieve data at key (raise KeyError if no
|
||||
# such key)
|
||||
del d[key] # delete data stored at key (raises KeyError
|
||||
# if no such key)
|
||||
flag = key in d # true if the key exists
|
||||
list = d.keys() # return a list of all existing keys (slow!)
|
||||
|
||||
Future versions may change the order in which implementations are
|
||||
tested for existence, and add interfaces to other dbm-like
|
||||
implementations.
|
||||
"""
|
||||
|
||||
class error(Exception):
|
||||
pass
|
||||
|
||||
_names = ['dbhash', 'gdbm', 'dbm', 'dumbdbm']
|
||||
_errors = [error]
|
||||
_defaultmod = None
|
||||
|
||||
for _name in _names:
|
||||
try:
|
||||
_mod = __import__(_name)
|
||||
except ImportError:
|
||||
continue
|
||||
if not _defaultmod:
|
||||
_defaultmod = _mod
|
||||
_errors.append(_mod.error)
|
||||
|
||||
if not _defaultmod:
|
||||
raise ImportError, "no dbm clone found; tried %s" % _names
|
||||
|
||||
error = tuple(_errors)
|
||||
|
||||
def open(file, flag='r', mode=0666):
|
||||
"""Open or create database at path given by *file*.
|
||||
|
||||
Optional argument *flag* can be 'r' (default) for read-only access, 'w'
|
||||
for read-write access of an existing database, 'c' for read-write access
|
||||
to a new or existing database, and 'n' for read-write access to a new
|
||||
database.
|
||||
|
||||
Note: 'r' and 'w' fail if the database doesn't exist; 'c' creates it
|
||||
only if it doesn't exist; and 'n' always creates a new database.
|
||||
"""
|
||||
|
||||
# guess the type of an existing database
|
||||
from whichdb import whichdb
|
||||
result=whichdb(file)
|
||||
if result is None:
|
||||
# db doesn't exist
|
||||
if 'c' in flag or 'n' in flag:
|
||||
# file doesn't exist and the new
|
||||
# flag was used so use default type
|
||||
mod = _defaultmod
|
||||
else:
|
||||
raise error, "need 'c' or 'n' flag to open new db"
|
||||
elif result == "":
|
||||
# db type cannot be determined
|
||||
raise error, "db type could not be determined"
|
||||
else:
|
||||
mod = __import__(result)
|
||||
return mod.open(file, flag, mode)
|
||||
@@ -4,7 +4,6 @@
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from . import basestr
|
||||
|
||||
@@ -5,26 +5,31 @@ http://www.crummy.com/software/BeautifulSoup/
|
||||
|
||||
Beautiful Soup uses a pluggable XML or HTML parser to parse a
|
||||
(possibly invalid) document into a tree representation. Beautiful Soup
|
||||
provides provides methods and Pythonic idioms that make it easy to
|
||||
navigate, search, and modify the parse tree.
|
||||
provides methods and Pythonic idioms that make it easy to navigate,
|
||||
search, and modify the parse tree.
|
||||
|
||||
Beautiful Soup works with Python 2.6 and up. It works better if lxml
|
||||
Beautiful Soup works with Python 2.7 and up. It works better if lxml
|
||||
and/or html5lib is installed.
|
||||
|
||||
For more than you ever wanted to know about Beautiful Soup, see the
|
||||
documentation:
|
||||
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
|
||||
"""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.4.1"
|
||||
__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
|
||||
__version__ = "4.6.0"
|
||||
__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = ['BeautifulSoup']
|
||||
|
||||
import os
|
||||
import re
|
||||
import traceback
|
||||
import warnings
|
||||
|
||||
from .builder import builder_registry, ParserRejectedMarkup
|
||||
@@ -77,7 +82,7 @@ class BeautifulSoup(Tag):
|
||||
|
||||
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
|
||||
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
|
||||
|
||||
def __init__(self, markup="", features=None, builder=None,
|
||||
parse_only=None, from_encoding=None, exclude_encodings=None,
|
||||
@@ -137,6 +142,10 @@ class BeautifulSoup(Tag):
|
||||
from_encoding = from_encoding or deprecated_argument(
|
||||
"fromEncoding", "from_encoding")
|
||||
|
||||
if from_encoding and isinstance(markup, unicode):
|
||||
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
|
||||
from_encoding = None
|
||||
|
||||
if len(kwargs) > 0:
|
||||
arg = kwargs.keys().pop()
|
||||
raise TypeError(
|
||||
@@ -161,19 +170,29 @@ class BeautifulSoup(Tag):
|
||||
markup_type = "XML"
|
||||
else:
|
||||
markup_type = "HTML"
|
||||
|
||||
caller = traceback.extract_stack()[0]
|
||||
filename = caller[0]
|
||||
line_number = caller[1]
|
||||
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
|
||||
filename=filename,
|
||||
line_number=line_number,
|
||||
parser=builder.NAME,
|
||||
markup_type=markup_type))
|
||||
|
||||
self.builder = builder
|
||||
self.is_xml = builder.is_xml
|
||||
self.known_xml = self.is_xml
|
||||
self.builder.soup = self
|
||||
|
||||
self.parse_only = parse_only
|
||||
|
||||
if hasattr(markup, 'read'): # It's a file-type object.
|
||||
markup = markup.read()
|
||||
elif len(markup) <= 256:
|
||||
elif len(markup) <= 256 and (
|
||||
(isinstance(markup, bytes) and not b'<' in markup)
|
||||
or (isinstance(markup, unicode) and not u'<' in markup)
|
||||
):
|
||||
# Print out warnings for a couple beginner problems
|
||||
# involving passing non-markup to Beautiful Soup.
|
||||
# Beautiful Soup will still parse the input as markup,
|
||||
@@ -195,16 +214,10 @@ class BeautifulSoup(Tag):
|
||||
if isinstance(markup, unicode):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
|
||||
if markup[:5] == "http:" or markup[:6] == "https:":
|
||||
# TODO: This is ugly but I couldn't get it to work in
|
||||
# Python 3 otherwise.
|
||||
if ((isinstance(markup, bytes) and not b' ' in markup)
|
||||
or (isinstance(markup, unicode) and not u' ' in markup)):
|
||||
if isinstance(markup, unicode):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
|
||||
'"%s" looks like a filename, not markup. You should'
|
||||
' probably open this file and pass the filehandle into'
|
||||
' Beautiful Soup.' % markup)
|
||||
self._check_markup_is_url(markup)
|
||||
|
||||
for (self.markup, self.original_encoding, self.declared_html_encoding,
|
||||
self.contains_replacement_characters) in (
|
||||
@@ -223,15 +236,52 @@ class BeautifulSoup(Tag):
|
||||
self.builder.soup = None
|
||||
|
||||
def __copy__(self):
|
||||
return type(self)(self.encode(), builder=self.builder)
|
||||
copy = type(self)(
|
||||
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
|
||||
)
|
||||
|
||||
# Although we encoded the tree to UTF-8, that may not have
|
||||
# been the encoding of the original markup. Set the copy's
|
||||
# .original_encoding to reflect the original object's
|
||||
# .original_encoding.
|
||||
copy.original_encoding = self.original_encoding
|
||||
return copy
|
||||
|
||||
def __getstate__(self):
|
||||
# Frequently a tree builder can't be pickled.
|
||||
d = dict(self.__dict__)
|
||||
if 'builder' in d and not self.builder.picklable:
|
||||
del d['builder']
|
||||
d['builder'] = None
|
||||
return d
|
||||
|
||||
@staticmethod
|
||||
def _check_markup_is_url(markup):
|
||||
"""
|
||||
Check if markup looks like it's actually a url and raise a warning
|
||||
if so. Markup can be unicode or str (py2) / bytes (py3).
|
||||
"""
|
||||
if isinstance(markup, bytes):
|
||||
space = b' '
|
||||
cant_start_with = (b"http:", b"https:")
|
||||
elif isinstance(markup, unicode):
|
||||
space = u' '
|
||||
cant_start_with = (u"http:", u"https:")
|
||||
else:
|
||||
return
|
||||
|
||||
if any(markup.startswith(prefix) for prefix in cant_start_with):
|
||||
if not space in markup:
|
||||
if isinstance(markup, bytes):
|
||||
decoded_markup = markup.decode('utf-8', 'replace')
|
||||
else:
|
||||
decoded_markup = markup
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an'
|
||||
' HTTP client. You should probably use an HTTP client like'
|
||||
' requests to get the document behind the URL, and feed'
|
||||
' that document to Beautiful Soup.' % decoded_markup
|
||||
)
|
||||
|
||||
def _feed(self):
|
||||
# Convert the document to Unicode.
|
||||
self.builder.reset()
|
||||
@@ -335,7 +385,18 @@ class BeautifulSoup(Tag):
|
||||
if parent.next_sibling:
|
||||
# This node is being inserted into an element that has
|
||||
# already been parsed. Deal with any dangling references.
|
||||
index = parent.contents.index(o)
|
||||
index = len(parent.contents)-1
|
||||
while index >= 0:
|
||||
if parent.contents[index] is o:
|
||||
break
|
||||
index -= 1
|
||||
else:
|
||||
raise ValueError(
|
||||
"Error building tree: supposedly %r was inserted "
|
||||
"into %r after the fact, but I don't see it!" % (
|
||||
o, parent
|
||||
)
|
||||
)
|
||||
if index == 0:
|
||||
previous_element = parent
|
||||
previous_sibling = None
|
||||
@@ -387,7 +448,7 @@ class BeautifulSoup(Tag):
|
||||
"""Push a start tag on to the stack.
|
||||
|
||||
If this method returns None, the tag was rejected by the
|
||||
SoupStrainer. You should proceed as if the tag had not occured
|
||||
SoupStrainer. You should proceed as if the tag had not occurred
|
||||
in the document. For instance, if this was a self-closing tag,
|
||||
don't call handle_endtag.
|
||||
"""
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
import sys
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
HTMLAwareEntitySubstitution,
|
||||
whitespace_re
|
||||
)
|
||||
|
||||
@@ -227,9 +231,14 @@ class HTMLTreeBuilder(TreeBuilder):
|
||||
Such as which tags are empty-element tags.
|
||||
"""
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
|
||||
'spacer', 'link', 'frame', 'base'])
|
||||
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
|
||||
empty_element_tags = set([
|
||||
# These are from HTML5.
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
|
||||
# These are from HTML4, removed in HTML5.
|
||||
'spacer', 'frame'
|
||||
])
|
||||
|
||||
# The HTML standard defines these attributes as containing a
|
||||
# space-separated list of values, not a single value. That is,
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__all__ = [
|
||||
'HTML5TreeBuilder',
|
||||
]
|
||||
|
||||
from pdb import set_trace
|
||||
import warnings
|
||||
import re
|
||||
from bs4.builder import (
|
||||
PERMISSIVE,
|
||||
HTML,
|
||||
@@ -15,7 +18,10 @@ from bs4.element import (
|
||||
whitespace_re,
|
||||
)
|
||||
import html5lib
|
||||
from html5lib.constants import namespaces
|
||||
from html5lib.constants import (
|
||||
namespaces,
|
||||
prefixes,
|
||||
)
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
@@ -23,6 +29,15 @@ from bs4.element import (
|
||||
Tag,
|
||||
)
|
||||
|
||||
try:
|
||||
# Pre-0.99999999
|
||||
from html5lib.treebuilders import _base as treebuilder_base
|
||||
new_html5lib = False
|
||||
except ImportError, e:
|
||||
# 0.99999999 and up
|
||||
from html5lib.treebuilders import base as treebuilder_base
|
||||
new_html5lib = True
|
||||
|
||||
class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
"""Use html5lib to build a tree."""
|
||||
|
||||
@@ -47,7 +62,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
if self.soup.parse_only is not None:
|
||||
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
|
||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||
doc = parser.parse(markup, encoding=self.user_specified_encoding)
|
||||
|
||||
extra_kwargs = dict()
|
||||
if not isinstance(markup, unicode):
|
||||
if new_html5lib:
|
||||
extra_kwargs['override_encoding'] = self.user_specified_encoding
|
||||
else:
|
||||
extra_kwargs['encoding'] = self.user_specified_encoding
|
||||
doc = parser.parse(markup, **extra_kwargs)
|
||||
|
||||
# Set the character encoding detected by the tokenizer.
|
||||
if isinstance(markup, unicode):
|
||||
@@ -55,11 +77,17 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
# charEncoding to UTF-8 if it gets Unicode input.
|
||||
doc.original_encoding = None
|
||||
else:
|
||||
doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
if not isinstance(original_encoding, basestring):
|
||||
# In 0.99999999 and up, the encoding is an html5lib
|
||||
# Encoding object. We want to use a string for compatibility
|
||||
# with other tree builders.
|
||||
original_encoding = original_encoding.name
|
||||
doc.original_encoding = original_encoding
|
||||
|
||||
def create_treebuilder(self, namespaceHTMLElements):
|
||||
self.underlying_builder = TreeBuilderForHtml5lib(
|
||||
self.soup, namespaceHTMLElements)
|
||||
namespaceHTMLElements, self.soup)
|
||||
return self.underlying_builder
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
@@ -67,10 +95,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
return u'<html><head></head><body>%s</body></html>' % fragment
|
||||
|
||||
|
||||
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
||||
|
||||
def __init__(self, soup, namespaceHTMLElements):
|
||||
self.soup = soup
|
||||
def __init__(self, namespaceHTMLElements, soup=None):
|
||||
if soup:
|
||||
self.soup = soup
|
||||
else:
|
||||
from bs4 import BeautifulSoup
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
|
||||
|
||||
def documentClass(self):
|
||||
@@ -93,7 +125,8 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
return TextNode(Comment(data), self.soup)
|
||||
|
||||
def fragmentClass(self):
|
||||
self.soup = BeautifulSoup("")
|
||||
from bs4 import BeautifulSoup
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
self.soup.name = "[document_fragment]"
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
@@ -105,7 +138,57 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
return self.soup
|
||||
|
||||
def getFragment(self):
|
||||
return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
|
||||
return treebuilder_base.TreeBuilder.getFragment(self).element
|
||||
|
||||
def testSerializer(self, element):
|
||||
from bs4 import BeautifulSoup
|
||||
rv = []
|
||||
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if isinstance(element, BeautifulSoup):
|
||||
pass
|
||||
if isinstance(element, Doctype):
|
||||
m = doctype_re.match(element)
|
||||
if m:
|
||||
name = m.group(1)
|
||||
if m.lastindex > 1:
|
||||
publicId = m.group(2) or ""
|
||||
systemId = m.group(3) or m.group(4) or ""
|
||||
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
|
||||
(' ' * indent, name, publicId, systemId))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
|
||||
elif isinstance(element, Comment):
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element))
|
||||
elif isinstance(element, NavigableString):
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
||||
else:
|
||||
if element.namespace:
|
||||
name = "%s %s" % (prefixes[element.namespace],
|
||||
element.name)
|
||||
else:
|
||||
name = element.name
|
||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||
if element.attrs:
|
||||
attributes = []
|
||||
for name, value in element.attrs.items():
|
||||
if isinstance(name, NamespacedAttribute):
|
||||
name = "%s %s" % (prefixes[name.namespace], name.name)
|
||||
if isinstance(value, list):
|
||||
value = " ".join(value)
|
||||
attributes.append((name, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
indent += 2
|
||||
for child in element.children:
|
||||
serializeElement(child, indent)
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
class AttrList(object):
|
||||
def __init__(self, element):
|
||||
@@ -137,9 +220,9 @@ class AttrList(object):
|
||||
return name in list(self.attrs.keys())
|
||||
|
||||
|
||||
class Element(html5lib.treebuilders._base.Node):
|
||||
class Element(treebuilder_base.Node):
|
||||
def __init__(self, element, soup, namespace):
|
||||
html5lib.treebuilders._base.Node.__init__(self, element.name)
|
||||
treebuilder_base.Node.__init__(self, element.name)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
self.namespace = namespace
|
||||
@@ -158,8 +241,10 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
child = node
|
||||
elif node.element.__class__ == NavigableString:
|
||||
string_child = child = node.element
|
||||
node.parent = self
|
||||
else:
|
||||
child = node.element
|
||||
node.parent = self
|
||||
|
||||
if not isinstance(child, basestring) and child.parent is not None:
|
||||
node.element.extract()
|
||||
@@ -197,6 +282,8 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
most_recent_element=most_recent_element)
|
||||
|
||||
def getAttributes(self):
|
||||
if isinstance(self.element, Comment):
|
||||
return {}
|
||||
return AttrList(self.element)
|
||||
|
||||
def setAttributes(self, attributes):
|
||||
@@ -224,11 +311,11 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
if insertBefore:
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
self.insertBefore(data, insertBefore)
|
||||
self.insertBefore(text, insertBefore)
|
||||
else:
|
||||
self.appendChild(data)
|
||||
self.appendChild(text)
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = self.element.index(refNode.element)
|
||||
@@ -250,6 +337,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
# print "MOVE", self.element.contents
|
||||
# print "FROM", self.element
|
||||
# print "TO", new_parent.element
|
||||
|
||||
element = self.element
|
||||
new_parent_element = new_parent.element
|
||||
# Determine what this tag's next_element will be once all the children
|
||||
@@ -268,7 +356,6 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
new_parents_last_descendant_next_element = new_parent_element.next_element
|
||||
|
||||
to_append = element.contents
|
||||
append_after = new_parent_element.contents
|
||||
if len(to_append) > 0:
|
||||
# Set the first child's previous_element and previous_sibling
|
||||
# to elements within the new parent
|
||||
@@ -285,12 +372,19 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
if new_parents_last_child:
|
||||
new_parents_last_child.next_sibling = first_child
|
||||
|
||||
# Fix the last child's next_element and next_sibling
|
||||
last_child = to_append[-1]
|
||||
last_child.next_element = new_parents_last_descendant_next_element
|
||||
# Find the very last element being moved. It is now the
|
||||
# parent's last descendant. It has no .next_sibling and
|
||||
# its .next_element is whatever the previous last
|
||||
# descendant had.
|
||||
last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
|
||||
|
||||
last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
|
||||
if new_parents_last_descendant_next_element:
|
||||
new_parents_last_descendant_next_element.previous_element = last_child
|
||||
last_child.next_sibling = None
|
||||
# TODO: This code has no test coverage and I'm not sure
|
||||
# how to get html5lib to go through this path, but it's
|
||||
# just the other side of the previous line.
|
||||
new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
|
||||
last_childs_last_descendant.next_sibling = None
|
||||
|
||||
for child in to_append:
|
||||
child.parent = new_parent_element
|
||||
@@ -324,7 +418,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
|
||||
class TextNode(Element):
|
||||
def __init__(self, element, soup):
|
||||
html5lib.treebuilders._base.Node.__init__(self, None)
|
||||
treebuilder_base.Node.__init__(self, None)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__all__ = [
|
||||
'HTMLParserTreeBuilder',
|
||||
]
|
||||
@@ -49,7 +52,31 @@ from bs4.builder import (
|
||||
HTMLPARSER = 'html.parser'
|
||||
|
||||
class BeautifulSoupHTMLParser(HTMLParser):
|
||||
def handle_starttag(self, name, attrs):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
HTMLParser.__init__(self, *args, **kwargs)
|
||||
|
||||
# Keep a list of empty-element tags that were encountered
|
||||
# without an explicit closing tag. If we encounter a closing tag
|
||||
# of this type, we'll associate it with one of those entries.
|
||||
#
|
||||
# This isn't a stack because we don't care about the
|
||||
# order. It's a list of closing tags we've already handled and
|
||||
# will ignore, assuming they ever show up.
|
||||
self.already_closed_empty_element = []
|
||||
|
||||
def handle_startendtag(self, name, attrs):
|
||||
# This is only called when the markup looks like
|
||||
# <tag/>.
|
||||
|
||||
# is_startend() tells handle_starttag not to close the tag
|
||||
# just because its name matches a known empty-element tag. We
|
||||
# know that this is an empty-element tag and we want to call
|
||||
# handle_endtag ourselves.
|
||||
tag = self.handle_starttag(name, attrs, handle_empty_element=False)
|
||||
self.handle_endtag(name)
|
||||
|
||||
def handle_starttag(self, name, attrs, handle_empty_element=True):
|
||||
# XXX namespace
|
||||
attr_dict = {}
|
||||
for key, value in attrs:
|
||||
@@ -59,10 +86,34 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
||||
value = ''
|
||||
attr_dict[key] = value
|
||||
attrvalue = '""'
|
||||
self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
#print "START", name
|
||||
tag = self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
if tag and tag.is_empty_element and handle_empty_element:
|
||||
# Unlike other parsers, html.parser doesn't send separate end tag
|
||||
# events for empty-element tags. (It's handled in
|
||||
# handle_startendtag, but only if the original markup looked like
|
||||
# <tag/>.)
|
||||
#
|
||||
# So we need to call handle_endtag() ourselves. Since we
|
||||
# know the start event is identical to the end event, we
|
||||
# don't want handle_endtag() to cross off any previous end
|
||||
# events for tags of this name.
|
||||
self.handle_endtag(name, check_already_closed=False)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self.soup.handle_endtag(name)
|
||||
# But we might encounter an explicit closing tag for this tag
|
||||
# later on. If so, we want to ignore it.
|
||||
self.already_closed_empty_element.append(name)
|
||||
|
||||
def handle_endtag(self, name, check_already_closed=True):
|
||||
#print "END", name
|
||||
if check_already_closed and name in self.already_closed_empty_element:
|
||||
# This is a redundant end tag for an empty-element tag.
|
||||
# We've already called handle_endtag() for it, so just
|
||||
# check it off the list.
|
||||
# print "ALREADY CLOSED", name
|
||||
self.already_closed_empty_element.remove(name)
|
||||
else:
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.soup.handle_data(data)
|
||||
@@ -166,6 +217,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
warnings.warn(RuntimeWarning(
|
||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
||||
raise e
|
||||
parser.already_closed_empty_element = []
|
||||
|
||||
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
|
||||
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__all__ = [
|
||||
'LXMLTreeBuilderForXML',
|
||||
'LXMLTreeBuilder',
|
||||
@@ -12,6 +14,7 @@ from bs4.element import (
|
||||
Doctype,
|
||||
NamespacedAttribute,
|
||||
ProcessingInstruction,
|
||||
XMLProcessingInstruction,
|
||||
)
|
||||
from bs4.builder import (
|
||||
FAST,
|
||||
@@ -29,6 +32,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
DEFAULT_PARSER_CLASS = etree.XMLParser
|
||||
|
||||
is_xml = True
|
||||
processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
NAME = "lxml-xml"
|
||||
ALTERNATE_NAMES = ["xml"]
|
||||
@@ -87,6 +91,16 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
|
||||
Each 4-tuple represents a strategy for parsing the document.
|
||||
"""
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
if is_html:
|
||||
self.processing_instruction_class = ProcessingInstruction
|
||||
else:
|
||||
self.processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
if isinstance(markup, unicode):
|
||||
# We were given Unicode. Maybe lxml can parse Unicode on
|
||||
# this system?
|
||||
@@ -98,11 +112,6 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
yield (markup.encode("utf8"), "utf8",
|
||||
document_declared_encoding, False)
|
||||
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
detector = EncodingDetector(
|
||||
markup, try_encodings, is_html, exclude_encodings)
|
||||
@@ -201,7 +210,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
def pi(self, target, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(target + ' ' + data)
|
||||
self.soup.endData(ProcessingInstruction)
|
||||
self.soup.endData(self.processing_instruction_class)
|
||||
|
||||
def data(self, content):
|
||||
self.soup.handle_data(content)
|
||||
@@ -229,6 +238,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
|
||||
|
||||
features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
|
||||
is_xml = False
|
||||
processing_instruction_class = ProcessingInstruction
|
||||
|
||||
def default_parser(self, encoding):
|
||||
return etree.HTMLParser
|
||||
|
||||
@@ -6,9 +6,10 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal
|
||||
Feed Parser. It works best on XML and HTML, but it does not rewrite the
|
||||
XML or HTML to reflect a new encoding; that's the tree builder's job.
|
||||
"""
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
from pdb import set_trace
|
||||
import codecs
|
||||
from htmlentitydefs import codepoint2name
|
||||
import re
|
||||
@@ -309,7 +310,7 @@ class EncodingDetector:
|
||||
else:
|
||||
xml_endpos = 1024
|
||||
html_endpos = max(2048, int(len(markup) * 0.05))
|
||||
|
||||
|
||||
declared_encoding = None
|
||||
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
|
||||
if not declared_encoding_match and is_html:
|
||||
@@ -346,7 +347,7 @@ class UnicodeDammit:
|
||||
self.tried_encodings = []
|
||||
self.contains_replacement_characters = False
|
||||
self.is_html = is_html
|
||||
|
||||
self.log = logging.getLogger(__name__)
|
||||
self.detector = EncodingDetector(
|
||||
markup, override_encodings, is_html, exclude_encodings)
|
||||
|
||||
@@ -376,9 +377,10 @@ class UnicodeDammit:
|
||||
if encoding != "ascii":
|
||||
u = self._convert_from(encoding, "replace")
|
||||
if u is not None:
|
||||
logging.warning(
|
||||
self.log.warning(
|
||||
"Some characters could not be decoded, and were "
|
||||
"replaced with REPLACEMENT CHARACTER.")
|
||||
"replaced with REPLACEMENT CHARACTER."
|
||||
)
|
||||
self.contains_replacement_characters = True
|
||||
break
|
||||
|
||||
@@ -734,7 +736,7 @@ class UnicodeDammit:
|
||||
0xde : b'\xc3\x9e', # Þ
|
||||
0xdf : b'\xc3\x9f', # ß
|
||||
0xe0 : b'\xc3\xa0', # à
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe2 : b'\xc3\xa2', # â
|
||||
0xe3 : b'\xc3\xa3', # ã
|
||||
0xe4 : b'\xc3\xa4', # ä
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Diagnostic functions, mainly for use when doing tech support."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
import cProfile
|
||||
@@ -56,7 +58,8 @@ def diagnose(data):
|
||||
data = data.read()
|
||||
elif os.path.exists(data):
|
||||
print '"%s" looks like a filename. Reading data from the file.' % data
|
||||
data = open(data).read()
|
||||
with open(data) as fp:
|
||||
data = fp.read()
|
||||
elif data.startswith("http:") or data.startswith("https:"):
|
||||
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
|
||||
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
from pdb import set_trace
|
||||
import collections
|
||||
import re
|
||||
import shlex
|
||||
import sys
|
||||
import warnings
|
||||
from bs4.dammit import EntitySubstitution
|
||||
@@ -99,6 +101,8 @@ class HTMLAwareEntitySubstitution(EntitySubstitution):
|
||||
|
||||
preformatted_tags = set(["pre"])
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
|
||||
@classmethod
|
||||
def _substitute_if_appropriate(cls, ns, f):
|
||||
if (isinstance(ns, NavigableString)
|
||||
@@ -127,8 +131,8 @@ class PageElement(object):
|
||||
# to methods like encode() and prettify():
|
||||
#
|
||||
# "html" - All Unicode characters with corresponding HTML entities
|
||||
# are converted to those entities on output.
|
||||
# "minimal" - Bare ampersands and angle brackets are converted to
|
||||
# are converted to those entities on output.
|
||||
# "minimal" - Bare ampersands and angle brackets are converted to
|
||||
# XML entities: & < >
|
||||
# None - The null formatter. Unicode characters are never
|
||||
# converted to entities. This is not recommended, but it's
|
||||
@@ -169,11 +173,19 @@ class PageElement(object):
|
||||
|
||||
This is used when mapping a formatter name ("minimal") to an
|
||||
appropriate function (one that performs entity-substitution on
|
||||
the contents of <script> and <style> tags, or not). It's
|
||||
the contents of <script> and <style> tags, or not). It can be
|
||||
inefficient, but it should be called very rarely.
|
||||
"""
|
||||
if self.known_xml is not None:
|
||||
# Most of the time we will have determined this when the
|
||||
# document is parsed.
|
||||
return self.known_xml
|
||||
|
||||
# Otherwise, it's likely that this element was created by
|
||||
# direct invocation of the constructor from within the user's
|
||||
# Python code.
|
||||
if self.parent is None:
|
||||
# This is the top-level object. It should have .is_xml set
|
||||
# This is the top-level object. It should have .known_xml set
|
||||
# from tree creation. If not, take a guess--BS is usually
|
||||
# used on HTML markup.
|
||||
return getattr(self, 'is_xml', False)
|
||||
@@ -523,9 +535,16 @@ class PageElement(object):
|
||||
return ResultSet(strainer, result)
|
||||
elif isinstance(name, basestring):
|
||||
# Optimization to find all tags with a given name.
|
||||
if name.count(':') == 1:
|
||||
# This is a name with a prefix.
|
||||
prefix, name = name.split(':', 1)
|
||||
else:
|
||||
prefix = None
|
||||
result = (element for element in generator
|
||||
if isinstance(element, Tag)
|
||||
and element.name == name)
|
||||
and element.name == name
|
||||
and (prefix is None or element.prefix == prefix)
|
||||
)
|
||||
return ResultSet(strainer, result)
|
||||
results = ResultSet(strainer)
|
||||
while True:
|
||||
@@ -637,7 +656,7 @@ class PageElement(object):
|
||||
return lambda el: el._attr_value_as_string(
|
||||
attribute, '').startswith(value)
|
||||
elif operator == '$':
|
||||
# string represenation of `attribute` ends with `value`
|
||||
# string representation of `attribute` ends with `value`
|
||||
return lambda el: el._attr_value_as_string(
|
||||
attribute, '').endswith(value)
|
||||
elif operator == '*':
|
||||
@@ -677,6 +696,11 @@ class NavigableString(unicode, PageElement):
|
||||
PREFIX = ''
|
||||
SUFFIX = ''
|
||||
|
||||
# We can't tell just by looking at a string whether it's contained
|
||||
# in an XML document or an HTML document.
|
||||
|
||||
known_xml = None
|
||||
|
||||
def __new__(cls, value):
|
||||
"""Create a new NavigableString.
|
||||
|
||||
@@ -743,10 +767,16 @@ class CData(PreformattedString):
|
||||
SUFFIX = u']]>'
|
||||
|
||||
class ProcessingInstruction(PreformattedString):
|
||||
"""A SGML processing instruction."""
|
||||
|
||||
PREFIX = u'<?'
|
||||
SUFFIX = u'>'
|
||||
|
||||
class XMLProcessingInstruction(ProcessingInstruction):
|
||||
"""An XML processing instruction."""
|
||||
PREFIX = u'<?'
|
||||
SUFFIX = u'?>'
|
||||
|
||||
class Comment(PreformattedString):
|
||||
|
||||
PREFIX = u'<!--'
|
||||
@@ -781,7 +811,8 @@ class Tag(PageElement):
|
||||
"""Represents a found HTML tag with its attributes and contents."""
|
||||
|
||||
def __init__(self, parser=None, builder=None, name=None, namespace=None,
|
||||
prefix=None, attrs=None, parent=None, previous=None):
|
||||
prefix=None, attrs=None, parent=None, previous=None,
|
||||
is_xml=None):
|
||||
"Basic constructor."
|
||||
|
||||
if parser is None:
|
||||
@@ -795,6 +826,14 @@ class Tag(PageElement):
|
||||
self.name = name
|
||||
self.namespace = namespace
|
||||
self.prefix = prefix
|
||||
if builder is not None:
|
||||
preserve_whitespace_tags = builder.preserve_whitespace_tags
|
||||
else:
|
||||
if is_xml:
|
||||
preserve_whitespace_tags = []
|
||||
else:
|
||||
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
|
||||
self.preserve_whitespace_tags = preserve_whitespace_tags
|
||||
if attrs is None:
|
||||
attrs = {}
|
||||
elif attrs:
|
||||
@@ -805,6 +844,13 @@ class Tag(PageElement):
|
||||
attrs = dict(attrs)
|
||||
else:
|
||||
attrs = dict(attrs)
|
||||
|
||||
# If possible, determine ahead of time whether this tag is an
|
||||
# XML tag.
|
||||
if builder:
|
||||
self.known_xml = builder.is_xml
|
||||
else:
|
||||
self.known_xml = is_xml
|
||||
self.attrs = attrs
|
||||
self.contents = []
|
||||
self.setup(parent, previous)
|
||||
@@ -824,7 +870,7 @@ class Tag(PageElement):
|
||||
Its contents are a copy of the old Tag's contents.
|
||||
"""
|
||||
clone = type(self)(None, self.builder, self.name, self.namespace,
|
||||
self.nsprefix, self.attrs)
|
||||
self.prefix, self.attrs, is_xml=self._is_xml)
|
||||
for attr in ('can_be_empty_element', 'hidden'):
|
||||
setattr(clone, attr, getattr(self, attr))
|
||||
for child in self.contents:
|
||||
@@ -946,6 +992,13 @@ class Tag(PageElement):
|
||||
attribute."""
|
||||
return self.attrs.get(key, default)
|
||||
|
||||
def get_attribute_list(self, key, default=None):
|
||||
"""The same as get(), but always returns a list."""
|
||||
value = self.get(key, default)
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return value
|
||||
|
||||
def has_attr(self, key):
|
||||
return key in self.attrs
|
||||
|
||||
@@ -997,7 +1050,7 @@ class Tag(PageElement):
|
||||
tag_name, tag_name))
|
||||
return self.find(tag_name)
|
||||
# We special case contents to avoid recursion.
|
||||
elif not tag.startswith("__") and not tag=="contents":
|
||||
elif not tag.startswith("__") and not tag == "contents":
|
||||
return self.find(tag)
|
||||
raise AttributeError(
|
||||
"'%s' object has no attribute '%s'" % (self.__class__, tag))
|
||||
@@ -1057,10 +1110,11 @@ class Tag(PageElement):
|
||||
|
||||
def _should_pretty_print(self, indent_level):
|
||||
"""Should this tag be pretty-printed?"""
|
||||
|
||||
return (
|
||||
indent_level is not None and
|
||||
(self.name not in HTMLAwareEntitySubstitution.preformatted_tags
|
||||
or self._is_xml))
|
||||
indent_level is not None
|
||||
and self.name not in self.preserve_whitespace_tags
|
||||
)
|
||||
|
||||
def decode(self, indent_level=None,
|
||||
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
@@ -1280,6 +1334,7 @@ class Tag(PageElement):
|
||||
|
||||
_selector_combinators = ['>', '+', '~']
|
||||
_select_debug = False
|
||||
quoted_colon = re.compile('"[^"]*:[^"]*"')
|
||||
def select_one(self, selector):
|
||||
"""Perform a CSS selection operation on the current element."""
|
||||
value = self.select(selector, limit=1)
|
||||
@@ -1305,8 +1360,7 @@ class Tag(PageElement):
|
||||
if limit and len(context) >= limit:
|
||||
break
|
||||
return context
|
||||
|
||||
tokens = selector.split()
|
||||
tokens = shlex.split(selector)
|
||||
current_context = [self]
|
||||
|
||||
if tokens[-1] in self._selector_combinators:
|
||||
@@ -1358,7 +1412,7 @@ class Tag(PageElement):
|
||||
return classes.issubset(candidate.get('class', []))
|
||||
checker = classes_match
|
||||
|
||||
elif ':' in token:
|
||||
elif ':' in token and not self.quoted_colon.search(token):
|
||||
# Pseudo-class
|
||||
tag_name, pseudo = token.split(':', 1)
|
||||
if tag_name == '':
|
||||
@@ -1389,11 +1443,8 @@ class Tag(PageElement):
|
||||
self.count += 1
|
||||
if self.count == self.destination:
|
||||
return True
|
||||
if self.count > self.destination:
|
||||
# Stop the generator that's sending us
|
||||
# these things.
|
||||
raise StopIteration()
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
checker = Counter(pseudo_value).nth_child_of_type
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
@@ -1498,13 +1549,12 @@ class Tag(PageElement):
|
||||
# don't include it in the context more than once.
|
||||
new_context.append(candidate)
|
||||
new_context_ids.add(id(candidate))
|
||||
if limit and len(new_context) >= limit:
|
||||
break
|
||||
elif self._select_debug:
|
||||
print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
|
||||
|
||||
|
||||
current_context = new_context
|
||||
if limit and len(current_context) >= limit:
|
||||
current_context = current_context[:limit]
|
||||
|
||||
if self._select_debug:
|
||||
print "Final verdict:"
|
||||
@@ -1662,28 +1712,22 @@ class SoupStrainer(object):
|
||||
"I don't know how to match against a %s" % markup.__class__)
|
||||
return found
|
||||
|
||||
def _matches(self, markup, match_against):
|
||||
def _matches(self, markup, match_against, already_tried=None):
|
||||
# print u"Matching %s against %s" % (markup, match_against)
|
||||
result = False
|
||||
if isinstance(markup, list) or isinstance(markup, tuple):
|
||||
# This should only happen when searching a multi-valued attribute
|
||||
# like 'class'.
|
||||
if (isinstance(match_against, unicode)
|
||||
and ' ' in match_against):
|
||||
# A bit of a special case. If they try to match "foo
|
||||
# bar" on a multivalue attribute's value, only accept
|
||||
# the literal value "foo bar"
|
||||
#
|
||||
# XXX This is going to be pretty slow because we keep
|
||||
# splitting match_against. But it shouldn't come up
|
||||
# too often.
|
||||
return (whitespace_re.split(match_against) == markup)
|
||||
else:
|
||||
for item in markup:
|
||||
if self._matches(item, match_against):
|
||||
return True
|
||||
return False
|
||||
|
||||
for item in markup:
|
||||
if self._matches(item, match_against):
|
||||
return True
|
||||
# We didn't match any particular value of the multivalue
|
||||
# attribute, but maybe we match the attribute value when
|
||||
# considered as a string.
|
||||
if self._matches(' '.join(markup), match_against):
|
||||
return True
|
||||
return False
|
||||
|
||||
if match_against is True:
|
||||
# True matches any non-None value.
|
||||
return markup is not None
|
||||
@@ -1693,6 +1737,7 @@ class SoupStrainer(object):
|
||||
|
||||
# Custom callables take the tag as an argument, but all
|
||||
# other ways of matching match the tag name as a string.
|
||||
original_markup = markup
|
||||
if isinstance(markup, Tag):
|
||||
markup = markup.name
|
||||
|
||||
@@ -1703,18 +1748,51 @@ class SoupStrainer(object):
|
||||
# None matches None, False, an empty string, an empty list, and so on.
|
||||
return not match_against
|
||||
|
||||
if isinstance(match_against, unicode):
|
||||
if (hasattr(match_against, '__iter__')
|
||||
and not isinstance(match_against, basestring)):
|
||||
# We're asked to match against an iterable of items.
|
||||
# The markup must be match at least one item in the
|
||||
# iterable. We'll try each one in turn.
|
||||
#
|
||||
# To avoid infinite recursion we need to keep track of
|
||||
# items we've already seen.
|
||||
if not already_tried:
|
||||
already_tried = set()
|
||||
for item in match_against:
|
||||
if item.__hash__:
|
||||
key = item
|
||||
else:
|
||||
key = id(item)
|
||||
if key in already_tried:
|
||||
continue
|
||||
else:
|
||||
already_tried.add(key)
|
||||
if self._matches(original_markup, item, already_tried):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
# Beyond this point we might need to run the test twice: once against
|
||||
# the tag's name and once against its prefixed name.
|
||||
match = False
|
||||
|
||||
if not match and isinstance(match_against, unicode):
|
||||
# Exact string match
|
||||
return markup == match_against
|
||||
match = markup == match_against
|
||||
|
||||
if hasattr(match_against, 'match'):
|
||||
if not match and hasattr(match_against, 'search'):
|
||||
# Regexp match
|
||||
return match_against.search(markup)
|
||||
|
||||
if hasattr(match_against, '__iter__'):
|
||||
# The markup must be an exact match against something
|
||||
# in the iterable.
|
||||
return markup in match_against
|
||||
if (not match
|
||||
and isinstance(original_markup, Tag)
|
||||
and original_markup.prefix):
|
||||
# Try the whole thing again with the prefixed tag name.
|
||||
return self._matches(
|
||||
original_markup.prefix + ':' + original_markup.name, match_against
|
||||
)
|
||||
|
||||
return match
|
||||
|
||||
|
||||
class ResultSet(list):
|
||||
@@ -1723,3 +1801,8 @@ class ResultSet(list):
|
||||
def __init__(self, source, result=()):
|
||||
super(ResultSet, self).__init__(result)
|
||||
self.source = source
|
||||
|
||||
def __getattr__(self, key):
|
||||
raise AttributeError(
|
||||
"ResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?" % key
|
||||
)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Helper classes for tests."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
import pickle
|
||||
@@ -67,6 +69,18 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
markup in these tests, there's not much room for interpretation.
|
||||
"""
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify that all HTML4 and HTML5 empty element (aka void element) tags
|
||||
are handled correctly.
|
||||
"""
|
||||
for name in [
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
'spacer', 'frame'
|
||||
]:
|
||||
soup = self.soup("")
|
||||
new_tag = soup.new_tag(name)
|
||||
self.assertEqual(True, new_tag.is_empty_element)
|
||||
|
||||
def test_pickle_and_unpickle_identity(self):
|
||||
# Pickling a tree, then unpickling it, yields a tree identical
|
||||
# to the original.
|
||||
@@ -137,6 +151,14 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
markup.replace(b"\n", b""))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
# We test both Unicode and bytestring to verify that
|
||||
# process_markup correctly sets processing_instruction_class
|
||||
# even when the markup is already Unicode and there is no
|
||||
# need to process anything.
|
||||
markup = u"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.decode())
|
||||
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
@@ -215,9 +237,22 @@ Hello, world!
|
||||
self.assertEqual(comment, baz.previous_element)
|
||||
|
||||
def test_preserved_whitespace_in_pre_and_textarea(self):
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags."""
|
||||
self.assertSoupEquals("<pre> </pre>")
|
||||
self.assertSoupEquals("<textarea> woo </textarea>")
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags,
|
||||
even if that would mean not prettifying the markup.
|
||||
"""
|
||||
pre_markup = "<pre> </pre>"
|
||||
textarea_markup = "<textarea> woo\nwoo </textarea>"
|
||||
self.assertSoupEquals(pre_markup)
|
||||
self.assertSoupEquals(textarea_markup)
|
||||
|
||||
soup = self.soup(pre_markup)
|
||||
self.assertEqual(soup.pre.prettify(), pre_markup)
|
||||
|
||||
soup = self.soup(textarea_markup)
|
||||
self.assertEqual(soup.textarea.prettify(), textarea_markup)
|
||||
|
||||
soup = self.soup("<textarea></textarea>")
|
||||
self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
|
||||
|
||||
def test_nested_inline_elements(self):
|
||||
"""Inline elements can be nested indefinitely."""
|
||||
@@ -307,6 +342,13 @@ Hello, world!
|
||||
self.assertEqual("p", soup.p.name)
|
||||
self.assertConnectedness(soup)
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify consistent handling of empty-element tags,
|
||||
no matter how they come in through the markup.
|
||||
"""
|
||||
self.assertSoupEquals('<br/><br/><br/>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('<br /><br /><br />', "<br/><br/><br/>")
|
||||
|
||||
def test_head_tag_between_head_and_body(self):
|
||||
"Prevent recurrence of a bug in the html5lib treebuilder."
|
||||
content = """<html><head></head>
|
||||
@@ -480,7 +522,9 @@ Hello, world!
|
||||
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
|
||||
soup = self.soup(
|
||||
hebrew_document, from_encoding="iso8859-8")
|
||||
self.assertEqual(soup.original_encoding, 'iso8859-8')
|
||||
# Some tree builders call it iso8859-8, others call it iso-8859-9.
|
||||
# That's not a difference we really care about.
|
||||
assert soup.original_encoding in ('iso8859-8', 'iso-8859-8')
|
||||
self.assertEqual(
|
||||
soup.encode('utf-8'),
|
||||
hebrew_document.decode("iso8859-8").encode("utf-8"))
|
||||
@@ -563,6 +607,11 @@ class XMLTreeBuilderSmokeTest(object):
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out *exactly* the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
@@ -639,6 +688,40 @@ class XMLTreeBuilderSmokeTest(object):
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
def test_find_by_prefixed_name(self):
|
||||
doc = """<?xml version="1.0" encoding="utf-8"?>
|
||||
<Document xmlns="http://example.com/ns0"
|
||||
xmlns:ns1="http://example.com/ns1"
|
||||
xmlns:ns2="http://example.com/ns2"
|
||||
<ns1:tag>foo</ns1:tag>
|
||||
<ns1:tag>bar</ns1:tag>
|
||||
<ns2:tag key="value">baz</ns2:tag>
|
||||
</Document>
|
||||
"""
|
||||
soup = self.soup(doc)
|
||||
|
||||
# There are three <tag> tags.
|
||||
self.assertEqual(3, len(soup.find_all('tag')))
|
||||
|
||||
# But two of them are ns1:tag and one of them is ns2:tag.
|
||||
self.assertEqual(2, len(soup.find_all('ns1:tag')))
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag')))
|
||||
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag', key='value')))
|
||||
self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag'])))
|
||||
|
||||
def test_copy_tag_preserves_namespace(self):
|
||||
xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<w:document xmlns:w="http://example.com/ns0"/>"""
|
||||
|
||||
soup = self.soup(xml)
|
||||
tag = soup.document
|
||||
duplicate = copy.copy(tag)
|
||||
|
||||
# The two tags have the same namespace prefix.
|
||||
self.assertEqual(tag.prefix, duplicate.prefix)
|
||||
|
||||
|
||||
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
||||
"""Smoke test for a tree builder that supports HTML5."""
|
||||
|
||||
|
||||
@@ -84,6 +84,33 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
|
||||
def test_reparented_markup_containing_identical_whitespace_nodes(self):
|
||||
"""Verify that we keep the two whitespace nodes in this
|
||||
document distinct when reparenting the adjacent <tbody> tags.
|
||||
"""
|
||||
markup = '<table> <tbody><tbody><ims></tbody> </table>'
|
||||
soup = self.soup(markup)
|
||||
space1, space2 = soup.find_all(string=' ')
|
||||
tbody1, tbody2 = soup.find_all('tbody')
|
||||
assert space1.next_element is tbody1
|
||||
assert tbody2.next_element is space2
|
||||
|
||||
def test_reparented_markup_containing_children(self):
|
||||
markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
|
||||
soup = self.soup(markup)
|
||||
noscript = soup.noscript
|
||||
self.assertEqual("target", noscript.next_element)
|
||||
target = soup.find(string='target')
|
||||
|
||||
# The 'aftermath' string was duplicated; we want the second one.
|
||||
final_aftermath = soup.find_all(string='aftermath')[-1]
|
||||
|
||||
# The <noscript> tag was moved beneath a copy of the <a> tag,
|
||||
# but the 'target' string within is still connected to the
|
||||
# (second) 'aftermath' string.
|
||||
self.assertEqual(final_aftermath, target.next_element)
|
||||
self.assertEqual(target, final_aftermath.previous_element)
|
||||
|
||||
def test_processing_instruction(self):
|
||||
"""Processing instructions become comments."""
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
@@ -96,3 +123,8 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
a1, a2 = soup.find_all('a')
|
||||
self.assertEqual(a1, a2)
|
||||
assert a1 is not a2
|
||||
|
||||
def test_foster_parenting(self):
|
||||
markup = b"""<table><td></tbody>A"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
|
||||
|
||||
@@ -29,4 +29,6 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
|
||||
|
||||
|
||||
def test_redundant_empty_element_closing_tags(self):
|
||||
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('</br></br></br>', "")
|
||||
|
||||
@@ -35,7 +35,6 @@ try:
|
||||
except ImportError, e:
|
||||
LXML_PRESENT = False
|
||||
|
||||
PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
|
||||
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
|
||||
|
||||
class TestConstructor(SoupTest):
|
||||
@@ -77,7 +76,7 @@ class TestWarnings(SoupTest):
|
||||
def test_no_warning_if_explicit_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", "html.parser")
|
||||
self.assertEquals([], w)
|
||||
self.assertEqual([], w)
|
||||
|
||||
def test_parseOnlyThese_renamed_to_parse_only(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
@@ -118,15 +117,34 @@ class TestWarnings(SoupTest):
|
||||
soup = self.soup(filename)
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
def test_url_warning(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("http://www.crummy.com/")
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("looks like a URL" in msg)
|
||||
def test_url_warning_with_bytes_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/")
|
||||
# Be aware this isn't the only warning that can be raised during
|
||||
# execution..
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
# note - this url must differ from the bytes one otherwise
|
||||
# python's warnings system swallows the second warning
|
||||
soup = self.soup(u"http://www.crummyunicode.com/")
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_bytes_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(u"http://www.crummyuncode.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("http://www.crummy.com/ is great")
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
class TestSelectiveParsing(SoupTest):
|
||||
|
||||
@@ -260,7 +278,7 @@ class TestEncodingConversion(SoupTest):
|
||||
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
|
||||
|
||||
@skipIf(
|
||||
PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
|
||||
PYTHON_3_PRE_3_2,
|
||||
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
|
||||
def test_attribute_name_containing_unicode_characters(self):
|
||||
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Tests for Beautiful Soup's tree traversal methods.
|
||||
|
||||
@@ -222,7 +223,19 @@ class TestFindAllByName(TreeTest):
|
||||
self.assertSelects(
|
||||
tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
|
||||
|
||||
def test_find_with_multi_valued_attribute(self):
|
||||
soup = self.soup(
|
||||
"<div class='a b'>1</div><div class='a c'>2</div><div class='a d'>3</div>"
|
||||
)
|
||||
r1 = soup.find('div', 'a d');
|
||||
r2 = soup.find('div', re.compile(r'a d'));
|
||||
r3, r4 = soup.find_all('div', ['a b', 'a d']);
|
||||
self.assertEqual('3', r1.string)
|
||||
self.assertEqual('3', r2.string)
|
||||
self.assertEqual('1', r3.string)
|
||||
self.assertEqual('3', r4.string)
|
||||
|
||||
|
||||
class TestFindAllByAttribute(TreeTest):
|
||||
|
||||
def test_find_all_by_attribute_name(self):
|
||||
@@ -294,10 +307,10 @@ class TestFindAllByAttribute(TreeTest):
|
||||
f = tree.find_all("gar", class_=re.compile("a"))
|
||||
self.assertSelects(f, ["Found it"])
|
||||
|
||||
# Since the class is not the string "foo bar", but the two
|
||||
# strings "foo" and "bar", this will not find anything.
|
||||
# If the search fails to match the individual strings "foo" and "bar",
|
||||
# it will be tried against the combined string "foo bar".
|
||||
f = tree.find_all("gar", class_=re.compile("o b"))
|
||||
self.assertSelects(f, [])
|
||||
self.assertSelects(f, ["Found it"])
|
||||
|
||||
def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
|
||||
soup = self.soup("<a class='bar'>Found it</a>")
|
||||
@@ -335,7 +348,7 @@ class TestFindAllByAttribute(TreeTest):
|
||||
strainer = SoupStrainer(attrs={'id' : 'first'})
|
||||
self.assertSelects(tree.find_all(strainer), ['Match.'])
|
||||
|
||||
def test_find_all_with_missing_atribute(self):
|
||||
def test_find_all_with_missing_attribute(self):
|
||||
# You can pass in None as the value of an attribute to find_all.
|
||||
# This will match tags that do not have that attribute set.
|
||||
tree = self.soup("""<a id="1">ID present.</a>
|
||||
@@ -1273,6 +1286,10 @@ class TestCDAtaListAttributes(SoupTest):
|
||||
soup = self.soup("<a class='foo\tbar'>")
|
||||
self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode())
|
||||
|
||||
def test_get_attribute_list(self):
|
||||
soup = self.soup("<a id='abc def'>")
|
||||
self.assertEqual(['abc def'], soup.a.get_attribute_list('id'))
|
||||
|
||||
def test_accept_charset(self):
|
||||
soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">')
|
||||
self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset'])
|
||||
@@ -1328,6 +1345,13 @@ class TestPersistence(SoupTest):
|
||||
copied = copy.deepcopy(self.tree)
|
||||
self.assertEqual(copied.decode(), self.tree.decode())
|
||||
|
||||
def test_copy_preserves_encoding(self):
|
||||
soup = BeautifulSoup(b'<p> </p>', 'html.parser')
|
||||
encoding = soup.original_encoding
|
||||
copy = soup.__copy__()
|
||||
self.assertEqual(u"<p> </p>", unicode(copy))
|
||||
self.assertEqual(encoding, copy.original_encoding)
|
||||
|
||||
def test_unicode_pickle(self):
|
||||
# A tree containing Unicode characters can be pickled.
|
||||
html = u"<b>\N{SNOWMAN}</b>"
|
||||
@@ -1676,8 +1700,8 @@ class TestSoupSelector(TreeTest):
|
||||
def setUp(self):
|
||||
self.soup = BeautifulSoup(self.HTML, 'html.parser')
|
||||
|
||||
def assertSelects(self, selector, expected_ids):
|
||||
el_ids = [el['id'] for el in self.soup.select(selector)]
|
||||
def assertSelects(self, selector, expected_ids, **kwargs):
|
||||
el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
|
||||
el_ids.sort()
|
||||
expected_ids.sort()
|
||||
self.assertEqual(expected_ids, el_ids,
|
||||
@@ -1720,6 +1744,13 @@ class TestSoupSelector(TreeTest):
|
||||
for selector in ('html div', 'html body div', 'body div'):
|
||||
self.assertSelects(selector, ['data1', 'main', 'inner', 'footer'])
|
||||
|
||||
|
||||
def test_limit(self):
|
||||
self.assertSelects('html div', ['main'], limit=1)
|
||||
self.assertSelects('html body div', ['inner', 'main'], limit=2)
|
||||
self.assertSelects('body div', ['data1', 'main', 'inner', 'footer'],
|
||||
limit=10)
|
||||
|
||||
def test_tag_no_match(self):
|
||||
self.assertEqual(len(self.soup.select('del')), 0)
|
||||
|
||||
@@ -1902,6 +1933,14 @@ class TestSoupSelector(TreeTest):
|
||||
('div[data-tag]', ['data1'])
|
||||
)
|
||||
|
||||
def test_quoted_space_in_selector_name(self):
|
||||
html = """<div style="display: wrong">nope</div>
|
||||
<div style="display: right">yes</div>
|
||||
"""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
[chosen] = soup.select('div[style="display: right"]')
|
||||
self.assertEqual("yes", chosen.string)
|
||||
|
||||
def test_unsupported_pseudoclass(self):
|
||||
self.assertRaises(
|
||||
NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from .core import where, old_where
|
||||
|
||||
__version__ = "2017.04.17"
|
||||
__version__ = "2018.01.18"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,18 +19,19 @@ class DeprecatedBundleWarning(DeprecationWarning):
|
||||
|
||||
|
||||
def where():
|
||||
f = os.path.split(__file__)[0]
|
||||
f = os.path.dirname(__file__)
|
||||
|
||||
return os.path.join(f, 'cacert.pem')
|
||||
|
||||
|
||||
def old_where():
|
||||
warnings.warn(
|
||||
"The weak security bundle is being deprecated.",
|
||||
"The weak security bundle has been removed. certifi.old_where() is now an alias "
|
||||
"of certifi.where(). Please update your code to use certifi.where() instead. "
|
||||
"certifi.old_where() will be removed in 2018.",
|
||||
DeprecatedBundleWarning
|
||||
)
|
||||
f = os.path.split(__file__)[0]
|
||||
return os.path.join(f, 'weak.pem')
|
||||
return where()
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(where())
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,436 @@
|
||||
"""contextlib2 - backports and enhancements to the contextlib module"""
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
from collections import deque
|
||||
from functools import wraps
|
||||
|
||||
__all__ = ["contextmanager", "closing", "ContextDecorator", "ExitStack",
|
||||
"redirect_stdout", "redirect_stderr", "suppress"]
|
||||
|
||||
# Backwards compatibility
|
||||
__all__ += ["ContextStack"]
|
||||
|
||||
class ContextDecorator(object):
|
||||
"A base class or mixin that enables context managers to work as decorators."
|
||||
|
||||
def refresh_cm(self):
|
||||
"""Returns the context manager used to actually wrap the call to the
|
||||
decorated function.
|
||||
|
||||
The default implementation just returns *self*.
|
||||
|
||||
Overriding this method allows otherwise one-shot context managers
|
||||
like _GeneratorContextManager to support use as decorators via
|
||||
implicit recreation.
|
||||
|
||||
DEPRECATED: refresh_cm was never added to the standard library's
|
||||
ContextDecorator API
|
||||
"""
|
||||
warnings.warn("refresh_cm was never added to the standard library",
|
||||
DeprecationWarning)
|
||||
return self._recreate_cm()
|
||||
|
||||
def _recreate_cm(self):
|
||||
"""Return a recreated instance of self.
|
||||
|
||||
Allows an otherwise one-shot context manager like
|
||||
_GeneratorContextManager to support use as
|
||||
a decorator via implicit recreation.
|
||||
|
||||
This is a private interface just for _GeneratorContextManager.
|
||||
See issue #11647 for details.
|
||||
"""
|
||||
return self
|
||||
|
||||
def __call__(self, func):
|
||||
@wraps(func)
|
||||
def inner(*args, **kwds):
|
||||
with self._recreate_cm():
|
||||
return func(*args, **kwds)
|
||||
return inner
|
||||
|
||||
|
||||
class _GeneratorContextManager(ContextDecorator):
|
||||
"""Helper for @contextmanager decorator."""
|
||||
|
||||
def __init__(self, func, args, kwds):
|
||||
self.gen = func(*args, **kwds)
|
||||
self.func, self.args, self.kwds = func, args, kwds
|
||||
# Issue 19330: ensure context manager instances have good docstrings
|
||||
doc = getattr(func, "__doc__", None)
|
||||
if doc is None:
|
||||
doc = type(self).__doc__
|
||||
self.__doc__ = doc
|
||||
# Unfortunately, this still doesn't provide good help output when
|
||||
# inspecting the created context manager instances, since pydoc
|
||||
# currently bypasses the instance docstring and shows the docstring
|
||||
# for the class instead.
|
||||
# See http://bugs.python.org/issue19404 for more details.
|
||||
|
||||
def _recreate_cm(self):
|
||||
# _GCM instances are one-shot context managers, so the
|
||||
# CM must be recreated each time a decorated function is
|
||||
# called
|
||||
return self.__class__(self.func, self.args, self.kwds)
|
||||
|
||||
def __enter__(self):
|
||||
try:
|
||||
return next(self.gen)
|
||||
except StopIteration:
|
||||
raise RuntimeError("generator didn't yield")
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if type is None:
|
||||
try:
|
||||
next(self.gen)
|
||||
except StopIteration:
|
||||
return
|
||||
else:
|
||||
raise RuntimeError("generator didn't stop")
|
||||
else:
|
||||
if value is None:
|
||||
# Need to force instantiation so we can reliably
|
||||
# tell if we get the same exception back
|
||||
value = type()
|
||||
try:
|
||||
self.gen.throw(type, value, traceback)
|
||||
raise RuntimeError("generator didn't stop after throw()")
|
||||
except StopIteration as exc:
|
||||
# Suppress StopIteration *unless* it's the same exception that
|
||||
# was passed to throw(). This prevents a StopIteration
|
||||
# raised inside the "with" statement from being suppressed.
|
||||
return exc is not value
|
||||
except RuntimeError as exc:
|
||||
# Don't re-raise the passed in exception
|
||||
if exc is value:
|
||||
return False
|
||||
# Likewise, avoid suppressing if a StopIteration exception
|
||||
# was passed to throw() and later wrapped into a RuntimeError
|
||||
# (see PEP 479).
|
||||
if _HAVE_EXCEPTION_CHAINING and exc.__cause__ is value:
|
||||
return False
|
||||
raise
|
||||
except:
|
||||
# only re-raise if it's *not* the exception that was
|
||||
# passed to throw(), because __exit__() must not raise
|
||||
# an exception unless __exit__() itself failed. But throw()
|
||||
# has to raise the exception to signal propagation, so this
|
||||
# fixes the impedance mismatch between the throw() protocol
|
||||
# and the __exit__() protocol.
|
||||
#
|
||||
if sys.exc_info()[1] is not value:
|
||||
raise
|
||||
|
||||
|
||||
def contextmanager(func):
|
||||
"""@contextmanager decorator.
|
||||
|
||||
Typical usage:
|
||||
|
||||
@contextmanager
|
||||
def some_generator(<arguments>):
|
||||
<setup>
|
||||
try:
|
||||
yield <value>
|
||||
finally:
|
||||
<cleanup>
|
||||
|
||||
This makes this:
|
||||
|
||||
with some_generator(<arguments>) as <variable>:
|
||||
<body>
|
||||
|
||||
equivalent to this:
|
||||
|
||||
<setup>
|
||||
try:
|
||||
<variable> = <value>
|
||||
<body>
|
||||
finally:
|
||||
<cleanup>
|
||||
|
||||
"""
|
||||
@wraps(func)
|
||||
def helper(*args, **kwds):
|
||||
return _GeneratorContextManager(func, args, kwds)
|
||||
return helper
|
||||
|
||||
|
||||
class closing(object):
|
||||
"""Context to automatically close something at the end of a block.
|
||||
|
||||
Code like this:
|
||||
|
||||
with closing(<module>.open(<arguments>)) as f:
|
||||
<block>
|
||||
|
||||
is equivalent to this:
|
||||
|
||||
f = <module>.open(<arguments>)
|
||||
try:
|
||||
<block>
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
"""
|
||||
def __init__(self, thing):
|
||||
self.thing = thing
|
||||
def __enter__(self):
|
||||
return self.thing
|
||||
def __exit__(self, *exc_info):
|
||||
self.thing.close()
|
||||
|
||||
|
||||
class _RedirectStream(object):
|
||||
|
||||
_stream = None
|
||||
|
||||
def __init__(self, new_target):
|
||||
self._new_target = new_target
|
||||
# We use a list of old targets to make this CM re-entrant
|
||||
self._old_targets = []
|
||||
|
||||
def __enter__(self):
|
||||
self._old_targets.append(getattr(sys, self._stream))
|
||||
setattr(sys, self._stream, self._new_target)
|
||||
return self._new_target
|
||||
|
||||
def __exit__(self, exctype, excinst, exctb):
|
||||
setattr(sys, self._stream, self._old_targets.pop())
|
||||
|
||||
|
||||
class redirect_stdout(_RedirectStream):
|
||||
"""Context manager for temporarily redirecting stdout to another file.
|
||||
|
||||
# How to send help() to stderr
|
||||
with redirect_stdout(sys.stderr):
|
||||
help(dir)
|
||||
|
||||
# How to write help() to a file
|
||||
with open('help.txt', 'w') as f:
|
||||
with redirect_stdout(f):
|
||||
help(pow)
|
||||
"""
|
||||
|
||||
_stream = "stdout"
|
||||
|
||||
|
||||
class redirect_stderr(_RedirectStream):
|
||||
"""Context manager for temporarily redirecting stderr to another file."""
|
||||
|
||||
_stream = "stderr"
|
||||
|
||||
|
||||
class suppress(object):
|
||||
"""Context manager to suppress specified exceptions
|
||||
|
||||
After the exception is suppressed, execution proceeds with the next
|
||||
statement following the with statement.
|
||||
|
||||
with suppress(FileNotFoundError):
|
||||
os.remove(somefile)
|
||||
# Execution still resumes here if the file was already removed
|
||||
"""
|
||||
|
||||
def __init__(self, *exceptions):
|
||||
self._exceptions = exceptions
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exctype, excinst, exctb):
|
||||
# Unlike isinstance and issubclass, CPython exception handling
|
||||
# currently only looks at the concrete type hierarchy (ignoring
|
||||
# the instance and subclass checking hooks). While Guido considers
|
||||
# that a bug rather than a feature, it's a fairly hard one to fix
|
||||
# due to various internal implementation details. suppress provides
|
||||
# the simpler issubclass based semantics, rather than trying to
|
||||
# exactly reproduce the limitations of the CPython interpreter.
|
||||
#
|
||||
# See http://bugs.python.org/issue12029 for more details
|
||||
return exctype is not None and issubclass(exctype, self._exceptions)
|
||||
|
||||
|
||||
# Context manipulation is Python 3 only
|
||||
_HAVE_EXCEPTION_CHAINING = sys.version_info[0] >= 3
|
||||
if _HAVE_EXCEPTION_CHAINING:
|
||||
def _make_context_fixer(frame_exc):
|
||||
def _fix_exception_context(new_exc, old_exc):
|
||||
# Context may not be correct, so find the end of the chain
|
||||
while 1:
|
||||
exc_context = new_exc.__context__
|
||||
if exc_context is old_exc:
|
||||
# Context is already set correctly (see issue 20317)
|
||||
return
|
||||
if exc_context is None or exc_context is frame_exc:
|
||||
break
|
||||
new_exc = exc_context
|
||||
# Change the end of the chain to point to the exception
|
||||
# we expect it to reference
|
||||
new_exc.__context__ = old_exc
|
||||
return _fix_exception_context
|
||||
|
||||
def _reraise_with_existing_context(exc_details):
|
||||
try:
|
||||
# bare "raise exc_details[1]" replaces our carefully
|
||||
# set-up context
|
||||
fixed_ctx = exc_details[1].__context__
|
||||
raise exc_details[1]
|
||||
except BaseException:
|
||||
exc_details[1].__context__ = fixed_ctx
|
||||
raise
|
||||
else:
|
||||
# No exception context in Python 2
|
||||
def _make_context_fixer(frame_exc):
|
||||
return lambda new_exc, old_exc: None
|
||||
|
||||
# Use 3 argument raise in Python 2,
|
||||
# but use exec to avoid SyntaxError in Python 3
|
||||
def _reraise_with_existing_context(exc_details):
|
||||
exc_type, exc_value, exc_tb = exc_details
|
||||
exec ("raise exc_type, exc_value, exc_tb")
|
||||
|
||||
# Handle old-style classes if they exist
|
||||
try:
|
||||
from types import InstanceType
|
||||
except ImportError:
|
||||
# Python 3 doesn't have old-style classes
|
||||
_get_type = type
|
||||
else:
|
||||
# Need to handle old-style context managers on Python 2
|
||||
def _get_type(obj):
|
||||
obj_type = type(obj)
|
||||
if obj_type is InstanceType:
|
||||
return obj.__class__ # Old-style class
|
||||
return obj_type # New-style class
|
||||
|
||||
# Inspired by discussions on http://bugs.python.org/issue13585
|
||||
class ExitStack(object):
|
||||
"""Context manager for dynamic management of a stack of exit callbacks
|
||||
|
||||
For example:
|
||||
|
||||
with ExitStack() as stack:
|
||||
files = [stack.enter_context(open(fname)) for fname in filenames]
|
||||
# All opened files will automatically be closed at the end of
|
||||
# the with statement, even if attempts to open files later
|
||||
# in the list raise an exception
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self._exit_callbacks = deque()
|
||||
|
||||
def pop_all(self):
|
||||
"""Preserve the context stack by transferring it to a new instance"""
|
||||
new_stack = type(self)()
|
||||
new_stack._exit_callbacks = self._exit_callbacks
|
||||
self._exit_callbacks = deque()
|
||||
return new_stack
|
||||
|
||||
def _push_cm_exit(self, cm, cm_exit):
|
||||
"""Helper to correctly register callbacks to __exit__ methods"""
|
||||
def _exit_wrapper(*exc_details):
|
||||
return cm_exit(cm, *exc_details)
|
||||
_exit_wrapper.__self__ = cm
|
||||
self.push(_exit_wrapper)
|
||||
|
||||
def push(self, exit):
|
||||
"""Registers a callback with the standard __exit__ method signature
|
||||
|
||||
Can suppress exceptions the same way __exit__ methods can.
|
||||
|
||||
Also accepts any object with an __exit__ method (registering a call
|
||||
to the method instead of the object itself)
|
||||
"""
|
||||
# We use an unbound method rather than a bound method to follow
|
||||
# the standard lookup behaviour for special methods
|
||||
_cb_type = _get_type(exit)
|
||||
try:
|
||||
exit_method = _cb_type.__exit__
|
||||
except AttributeError:
|
||||
# Not a context manager, so assume its a callable
|
||||
self._exit_callbacks.append(exit)
|
||||
else:
|
||||
self._push_cm_exit(exit, exit_method)
|
||||
return exit # Allow use as a decorator
|
||||
|
||||
def callback(self, callback, *args, **kwds):
|
||||
"""Registers an arbitrary callback and arguments.
|
||||
|
||||
Cannot suppress exceptions.
|
||||
"""
|
||||
def _exit_wrapper(exc_type, exc, tb):
|
||||
callback(*args, **kwds)
|
||||
# We changed the signature, so using @wraps is not appropriate, but
|
||||
# setting __wrapped__ may still help with introspection
|
||||
_exit_wrapper.__wrapped__ = callback
|
||||
self.push(_exit_wrapper)
|
||||
return callback # Allow use as a decorator
|
||||
|
||||
def enter_context(self, cm):
|
||||
"""Enters the supplied context manager
|
||||
|
||||
If successful, also pushes its __exit__ method as a callback and
|
||||
returns the result of the __enter__ method.
|
||||
"""
|
||||
# We look up the special methods on the type to match the with statement
|
||||
_cm_type = _get_type(cm)
|
||||
_exit = _cm_type.__exit__
|
||||
result = _cm_type.__enter__(cm)
|
||||
self._push_cm_exit(cm, _exit)
|
||||
return result
|
||||
|
||||
def close(self):
|
||||
"""Immediately unwind the context stack"""
|
||||
self.__exit__(None, None, None)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc_details):
|
||||
received_exc = exc_details[0] is not None
|
||||
|
||||
# We manipulate the exception state so it behaves as though
|
||||
# we were actually nesting multiple with statements
|
||||
frame_exc = sys.exc_info()[1]
|
||||
_fix_exception_context = _make_context_fixer(frame_exc)
|
||||
|
||||
# Callbacks are invoked in LIFO order to match the behaviour of
|
||||
# nested context managers
|
||||
suppressed_exc = False
|
||||
pending_raise = False
|
||||
while self._exit_callbacks:
|
||||
cb = self._exit_callbacks.pop()
|
||||
try:
|
||||
if cb(*exc_details):
|
||||
suppressed_exc = True
|
||||
pending_raise = False
|
||||
exc_details = (None, None, None)
|
||||
except:
|
||||
new_exc_details = sys.exc_info()
|
||||
# simulate the stack of exceptions by setting the context
|
||||
_fix_exception_context(new_exc_details[1], exc_details[1])
|
||||
pending_raise = True
|
||||
exc_details = new_exc_details
|
||||
if pending_raise:
|
||||
_reraise_with_existing_context(exc_details)
|
||||
return received_exc and suppressed_exc
|
||||
|
||||
# Preserve backwards compatibility
|
||||
class ContextStack(ExitStack):
|
||||
"""Backwards compatibility alias for ExitStack"""
|
||||
|
||||
def __init__(self):
|
||||
warnings.warn("ContextStack has been renamed to ExitStack",
|
||||
DeprecationWarning)
|
||||
super(ContextStack, self).__init__()
|
||||
|
||||
def register_exit(self, callback):
|
||||
return self.push(callback)
|
||||
|
||||
def register(self, callback, *args, **kwds):
|
||||
return self.callback(callback, *args, **kwds)
|
||||
|
||||
def preserve(self):
|
||||
return self.pop_all()
|
||||
@@ -0,0 +1,18 @@
|
||||
"""Provide a (g)dbm-compatible interface to bsddb.hashopen."""
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
warnings.warnpy3k("in 3.x, the dbhash module has been removed", stacklevel=2)
|
||||
try:
|
||||
import bsddb
|
||||
except ImportError:
|
||||
# prevent a second import of this module from spuriously succeeding
|
||||
del sys.modules[__name__]
|
||||
raise
|
||||
|
||||
__all__ = ["error","open"]
|
||||
|
||||
error = bsddb.error # Exported for anydbm
|
||||
|
||||
def open(file, flag = 'r', mode=0666):
|
||||
return bsddb.hashopen(file, flag, mode)
|
||||
@@ -1,4 +1,4 @@
|
||||
__version__ = '0.6.2'
|
||||
__version__ = '0.6.5'
|
||||
|
||||
from .lock import Lock # noqa
|
||||
from .lock import NeedRegenerationException # noqa
|
||||
from .lock import NeedRegenerationException # noqa
|
||||
|
||||
@@ -13,6 +13,13 @@ class NoValue(object):
|
||||
def payload(self):
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
"""Ensure __repr__ is a consistent value in case NoValue is used to
|
||||
fill another cache key.
|
||||
|
||||
"""
|
||||
return '<dogpile.cache.api.NoValue object>'
|
||||
|
||||
if py3k:
|
||||
def __bool__(self): # pragma NO COVERAGE
|
||||
return False
|
||||
@@ -20,6 +27,7 @@ class NoValue(object):
|
||||
def __nonzero__(self): # pragma NO COVERAGE
|
||||
return False
|
||||
|
||||
|
||||
NO_VALUE = NoValue()
|
||||
"""Value returned from ``get()`` that describes
|
||||
a key not present."""
|
||||
|
||||
@@ -15,3 +15,11 @@ class RegionNotConfigured(DogpileCacheException):
|
||||
|
||||
class ValidationError(DogpileCacheException):
|
||||
"""Error validating a value or option."""
|
||||
|
||||
|
||||
class PluginNotFound(DogpileCacheException):
|
||||
"""The specified plugin could not be found.
|
||||
|
||||
.. versionadded:: 0.6.4
|
||||
|
||||
"""
|
||||
|
||||
+35
-5
@@ -410,7 +410,13 @@ class CacheRegion(object):
|
||||
"configured with backend: %s. "
|
||||
"Specify replace_existing_backend=True to replace."
|
||||
% self.backend)
|
||||
backend_cls = _backend_loader.load(backend)
|
||||
|
||||
try:
|
||||
backend_cls = _backend_loader.load(backend)
|
||||
except PluginLoader.NotFound:
|
||||
raise exception.PluginNotFound(
|
||||
"Couldn't find cache plugin to load: %s" % backend)
|
||||
|
||||
if _config_argument_dict:
|
||||
self.backend = backend_cls.from_config_dict(
|
||||
_config_argument_dict,
|
||||
@@ -487,8 +493,19 @@ class CacheRegion(object):
|
||||
a value. Any retrieved value whose creation
|
||||
time is prior to this timestamp
|
||||
is considered to be stale. It does not
|
||||
affect the data in the cache in any way, and is also
|
||||
local to this instance of :class:`.CacheRegion`.
|
||||
affect the data in the cache in any way, and is
|
||||
**local to this instance of :class:`.CacheRegion`.**
|
||||
|
||||
.. warning::
|
||||
|
||||
The :meth:`.CacheRegion.invalidate` method's default mode of
|
||||
operation is to set a timestamp **local to this CacheRegion
|
||||
in this Python process only**. It does not impact other Python
|
||||
processes or regions as the timestamp is **only stored locally in
|
||||
memory**. To implement invalidation where the
|
||||
timestamp is stored in the cache or similar so that all Python
|
||||
processes can be affected by an invalidation timestamp, implement a
|
||||
custom :class:`.RegionInvalidationStrategy`.
|
||||
|
||||
Once set, the invalidation time is honored by
|
||||
the :meth:`.CacheRegion.get_or_create`,
|
||||
@@ -550,6 +567,8 @@ class CacheRegion(object):
|
||||
_config_prefix="%sarguments." % prefix,
|
||||
wrap=config_dict.get(
|
||||
"%swrap" % prefix, None),
|
||||
replace_existing_backend=config_dict.get(
|
||||
"%sreplace_existing_backend" % prefix, False),
|
||||
)
|
||||
|
||||
@memoized_property
|
||||
@@ -944,11 +963,14 @@ class CacheRegion(object):
|
||||
if not should_cache_fn:
|
||||
self.backend.set_multi(values_w_created)
|
||||
else:
|
||||
self.backend.set_multi(dict(
|
||||
values_to_cache = dict(
|
||||
(k, v)
|
||||
for k, v in values_w_created.items()
|
||||
if should_cache_fn(v[0])
|
||||
))
|
||||
)
|
||||
|
||||
if values_to_cache:
|
||||
self.backend.set_multi(values_to_cache)
|
||||
|
||||
values.update(values_w_created)
|
||||
return [values[orig_to_mangled[k]].payload for k in keys]
|
||||
@@ -1075,6 +1097,14 @@ class CacheRegion(object):
|
||||
.. versionadded:: 0.5.0 Added ``refresh()`` method to decorated
|
||||
function.
|
||||
|
||||
``original()`` on other hand will invoke the decorated function
|
||||
without any caching::
|
||||
|
||||
newvalue = generate_something.original(5, 6)
|
||||
|
||||
.. versionadded:: 0.6.0 Added ``original()`` method to decorated
|
||||
function.
|
||||
|
||||
Lastly, the ``get()`` method returns either the value cached
|
||||
for the given key, or the token ``NO_VALUE`` if no such key
|
||||
exists::
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .nameregistry import NameRegistry # noqa
|
||||
from .readwrite_lock import ReadWriteMutex # noqa
|
||||
from .langhelpers import PluginLoader, memoized_property, \
|
||||
coerce_string_conf, to_list, KeyReentrantMutex # noqa
|
||||
coerce_string_conf, to_list, KeyReentrantMutex # noqa
|
||||
|
||||
@@ -39,9 +39,9 @@ class PluginLoader(object):
|
||||
self.impls[name] = impl.load
|
||||
return impl.load()
|
||||
else:
|
||||
raise Exception(
|
||||
"Can't load plugin %s %s" %
|
||||
(self.group, name))
|
||||
raise self.NotFound(
|
||||
"Can't load plugin %s %s" % (self.group, name)
|
||||
)
|
||||
|
||||
def register(self, name, modulepath, objname):
|
||||
def load():
|
||||
@@ -49,6 +49,9 @@ class PluginLoader(object):
|
||||
return getattr(mod, objname)
|
||||
self.impls[name] = load
|
||||
|
||||
class NotFound(Exception):
|
||||
"""The specified plugin could not be found."""
|
||||
|
||||
|
||||
class memoized_property(object):
|
||||
"""A read-only @property that is only evaluated once."""
|
||||
|
||||
@@ -0,0 +1,249 @@
|
||||
"""A dumb and slow but simple dbm clone.
|
||||
|
||||
For database spam, spam.dir contains the index (a text file),
|
||||
spam.bak *may* contain a backup of the index (also a text file),
|
||||
while spam.dat contains the data (a binary file).
|
||||
|
||||
XXX TO DO:
|
||||
|
||||
- seems to contain a bug when updating...
|
||||
|
||||
- reclaim free space (currently, space once occupied by deleted or expanded
|
||||
items is never reused)
|
||||
|
||||
- support concurrent access (currently, if two processes take turns making
|
||||
updates, they can mess up the index)
|
||||
|
||||
- support efficient access to large databases (currently, the whole index
|
||||
is read when the database is opened, and some updates rewrite the whole index)
|
||||
|
||||
- support opening for read-only (flag = 'm')
|
||||
|
||||
"""
|
||||
|
||||
import ast as _ast
|
||||
import os as _os
|
||||
import __builtin__
|
||||
import UserDict
|
||||
|
||||
_open = __builtin__.open
|
||||
|
||||
_BLOCKSIZE = 512
|
||||
|
||||
error = IOError # For anydbm
|
||||
|
||||
class _Database(UserDict.DictMixin):
|
||||
|
||||
# The on-disk directory and data files can remain in mutually
|
||||
# inconsistent states for an arbitrarily long time (see comments
|
||||
# at the end of __setitem__). This is only repaired when _commit()
|
||||
# gets called. One place _commit() gets called is from __del__(),
|
||||
# and if that occurs at program shutdown time, module globals may
|
||||
# already have gotten rebound to None. Since it's crucial that
|
||||
# _commit() finish successfully, we can't ignore shutdown races
|
||||
# here, and _commit() must not reference any globals.
|
||||
_os = _os # for _commit()
|
||||
_open = _open # for _commit()
|
||||
|
||||
def __init__(self, filebasename, mode):
|
||||
self._mode = mode
|
||||
|
||||
# The directory file is a text file. Each line looks like
|
||||
# "%r, (%d, %d)\n" % (key, pos, siz)
|
||||
# where key is the string key, pos is the offset into the dat
|
||||
# file of the associated value's first byte, and siz is the number
|
||||
# of bytes in the associated value.
|
||||
self._dirfile = filebasename + _os.extsep + 'dir'
|
||||
|
||||
# The data file is a binary file pointed into by the directory
|
||||
# file, and holds the values associated with keys. Each value
|
||||
# begins at a _BLOCKSIZE-aligned byte offset, and is a raw
|
||||
# binary 8-bit string value.
|
||||
self._datfile = filebasename + _os.extsep + 'dat'
|
||||
self._bakfile = filebasename + _os.extsep + 'bak'
|
||||
|
||||
# The index is an in-memory dict, mirroring the directory file.
|
||||
self._index = None # maps keys to (pos, siz) pairs
|
||||
|
||||
# Mod by Jack: create data file if needed
|
||||
try:
|
||||
f = _open(self._datfile, 'r')
|
||||
except IOError:
|
||||
with _open(self._datfile, 'w') as f:
|
||||
self._chmod(self._datfile)
|
||||
else:
|
||||
f.close()
|
||||
self._update()
|
||||
|
||||
# Read directory file into the in-memory index dict.
|
||||
def _update(self):
|
||||
self._index = {}
|
||||
try:
|
||||
f = _open(self._dirfile)
|
||||
except IOError:
|
||||
pass
|
||||
else:
|
||||
with f:
|
||||
for line in f:
|
||||
line = line.rstrip()
|
||||
key, pos_and_siz_pair = _ast.literal_eval(line)
|
||||
self._index[key] = pos_and_siz_pair
|
||||
|
||||
# Write the index dict to the directory file. The original directory
|
||||
# file (if any) is renamed with a .bak extension first. If a .bak
|
||||
# file currently exists, it's deleted.
|
||||
def _commit(self):
|
||||
# CAUTION: It's vital that _commit() succeed, and _commit() can
|
||||
# be called from __del__(). Therefore we must never reference a
|
||||
# global in this routine.
|
||||
if self._index is None:
|
||||
return # nothing to do
|
||||
|
||||
try:
|
||||
self._os.unlink(self._bakfile)
|
||||
except self._os.error:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._os.rename(self._dirfile, self._bakfile)
|
||||
except self._os.error:
|
||||
pass
|
||||
|
||||
with self._open(self._dirfile, 'w') as f:
|
||||
self._chmod(self._dirfile)
|
||||
for key, pos_and_siz_pair in self._index.iteritems():
|
||||
f.write("%r, %r\n" % (key, pos_and_siz_pair))
|
||||
|
||||
sync = _commit
|
||||
|
||||
def __getitem__(self, key):
|
||||
pos, siz = self._index[key] # may raise KeyError
|
||||
with _open(self._datfile, 'rb') as f:
|
||||
f.seek(pos)
|
||||
dat = f.read(siz)
|
||||
return dat
|
||||
|
||||
# Append val to the data file, starting at a _BLOCKSIZE-aligned
|
||||
# offset. The data file is first padded with NUL bytes (if needed)
|
||||
# to get to an aligned offset. Return pair
|
||||
# (starting offset of val, len(val))
|
||||
def _addval(self, val):
|
||||
with _open(self._datfile, 'rb+') as f:
|
||||
f.seek(0, 2)
|
||||
pos = int(f.tell())
|
||||
npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
|
||||
f.write('\0'*(npos-pos))
|
||||
pos = npos
|
||||
f.write(val)
|
||||
return (pos, len(val))
|
||||
|
||||
# Write val to the data file, starting at offset pos. The caller
|
||||
# is responsible for ensuring that there's enough room starting at
|
||||
# pos to hold val, without overwriting some other value. Return
|
||||
# pair (pos, len(val)).
|
||||
def _setval(self, pos, val):
|
||||
with _open(self._datfile, 'rb+') as f:
|
||||
f.seek(pos)
|
||||
f.write(val)
|
||||
return (pos, len(val))
|
||||
|
||||
# key is a new key whose associated value starts in the data file
|
||||
# at offset pos and with length siz. Add an index record to
|
||||
# the in-memory index dict, and append one to the directory file.
|
||||
def _addkey(self, key, pos_and_siz_pair):
|
||||
self._index[key] = pos_and_siz_pair
|
||||
with _open(self._dirfile, 'a') as f:
|
||||
self._chmod(self._dirfile)
|
||||
f.write("%r, %r\n" % (key, pos_and_siz_pair))
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
if not type(key) == type('') == type(val):
|
||||
raise TypeError, "keys and values must be strings"
|
||||
if key not in self._index:
|
||||
self._addkey(key, self._addval(val))
|
||||
else:
|
||||
# See whether the new value is small enough to fit in the
|
||||
# (padded) space currently occupied by the old value.
|
||||
pos, siz = self._index[key]
|
||||
oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
|
||||
newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
|
||||
if newblocks <= oldblocks:
|
||||
self._index[key] = self._setval(pos, val)
|
||||
else:
|
||||
# The new value doesn't fit in the (padded) space used
|
||||
# by the old value. The blocks used by the old value are
|
||||
# forever lost.
|
||||
self._index[key] = self._addval(val)
|
||||
|
||||
# Note that _index may be out of synch with the directory
|
||||
# file now: _setval() and _addval() don't update the directory
|
||||
# file. This also means that the on-disk directory and data
|
||||
# files are in a mutually inconsistent state, and they'll
|
||||
# remain that way until _commit() is called. Note that this
|
||||
# is a disaster (for the database) if the program crashes
|
||||
# (so that _commit() never gets called).
|
||||
|
||||
def __delitem__(self, key):
|
||||
# The blocks used by the associated value are lost.
|
||||
del self._index[key]
|
||||
# XXX It's unclear why we do a _commit() here (the code always
|
||||
# XXX has, so I'm not changing it). _setitem__ doesn't try to
|
||||
# XXX keep the directory file in synch. Why should we? Or
|
||||
# XXX why shouldn't __setitem__?
|
||||
self._commit()
|
||||
|
||||
def keys(self):
|
||||
return self._index.keys()
|
||||
|
||||
def has_key(self, key):
|
||||
return key in self._index
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self._index
|
||||
|
||||
def iterkeys(self):
|
||||
return self._index.iterkeys()
|
||||
__iter__ = iterkeys
|
||||
|
||||
def __len__(self):
|
||||
return len(self._index)
|
||||
|
||||
def close(self):
|
||||
try:
|
||||
self._commit()
|
||||
finally:
|
||||
self._index = self._datfile = self._dirfile = self._bakfile = None
|
||||
|
||||
__del__ = close
|
||||
|
||||
def _chmod (self, file):
|
||||
if hasattr(self._os, 'chmod'):
|
||||
self._os.chmod(file, self._mode)
|
||||
|
||||
|
||||
def open(file, flag=None, mode=0666):
|
||||
"""Open the database file, filename, and return corresponding object.
|
||||
|
||||
The flag argument, used to control how the database is opened in the
|
||||
other DBM implementations, is ignored in the dumbdbm module; the
|
||||
database is always opened for update, and will be created if it does
|
||||
not exist.
|
||||
|
||||
The optional mode argument is the UNIX mode of the file, used only when
|
||||
the database has to be created. It defaults to octal code 0666 (and
|
||||
will be modified by the prevailing umask).
|
||||
|
||||
"""
|
||||
# flag argument is currently ignored
|
||||
|
||||
# Modify mode depending on the umask
|
||||
try:
|
||||
um = _os.umask(0)
|
||||
_os.umask(um)
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
# Turn off any bits that are set in the umask
|
||||
mode = mode & (~um)
|
||||
|
||||
return _Database(file, mode)
|
||||
@@ -0,0 +1,32 @@
|
||||
Copyright (c) 2013, Ethan Furman.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
Redistributions of source code must retain the above
|
||||
copyright notice, this list of conditions and the
|
||||
following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
Neither the name Ethan Furman nor the names of any
|
||||
contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -0,0 +1,3 @@
|
||||
enum34 is the new Python stdlib enum module available in Python 3.4
|
||||
backported for previous versions of Python from 2.4 to 3.3.
|
||||
tested on 2.6, 2.7, and 3.3+
|
||||
@@ -0,0 +1,837 @@
|
||||
"""Python Enumerations"""
|
||||
|
||||
import sys as _sys
|
||||
|
||||
__all__ = ['Enum', 'IntEnum', 'unique']
|
||||
|
||||
version = 1, 1, 6
|
||||
|
||||
pyver = float('%s.%s' % _sys.version_info[:2])
|
||||
|
||||
try:
|
||||
any
|
||||
except NameError:
|
||||
def any(iterable):
|
||||
for element in iterable:
|
||||
if element:
|
||||
return True
|
||||
return False
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
OrderedDict = None
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
# In Python 2 basestring is the ancestor of both str and unicode
|
||||
# in Python 3 it's just str, but was missing in 3.1
|
||||
basestring = str
|
||||
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
# In Python 3 unicode no longer exists (it's just str)
|
||||
unicode = str
|
||||
|
||||
class _RouteClassAttributeToGetattr(object):
|
||||
"""Route attribute access on a class to __getattr__.
|
||||
|
||||
This is a descriptor, used to define attributes that act differently when
|
||||
accessed through an instance and through a class. Instance access remains
|
||||
normal, but access to an attribute through a class will be routed to the
|
||||
class's __getattr__ method; this is done by raising AttributeError.
|
||||
|
||||
"""
|
||||
def __init__(self, fget=None):
|
||||
self.fget = fget
|
||||
|
||||
def __get__(self, instance, ownerclass=None):
|
||||
if instance is None:
|
||||
raise AttributeError()
|
||||
return self.fget(instance)
|
||||
|
||||
def __set__(self, instance, value):
|
||||
raise AttributeError("can't set attribute")
|
||||
|
||||
def __delete__(self, instance):
|
||||
raise AttributeError("can't delete attribute")
|
||||
|
||||
|
||||
def _is_descriptor(obj):
|
||||
"""Returns True if obj is a descriptor, False otherwise."""
|
||||
return (
|
||||
hasattr(obj, '__get__') or
|
||||
hasattr(obj, '__set__') or
|
||||
hasattr(obj, '__delete__'))
|
||||
|
||||
|
||||
def _is_dunder(name):
|
||||
"""Returns True if a __dunder__ name, False otherwise."""
|
||||
return (name[:2] == name[-2:] == '__' and
|
||||
name[2:3] != '_' and
|
||||
name[-3:-2] != '_' and
|
||||
len(name) > 4)
|
||||
|
||||
|
||||
def _is_sunder(name):
|
||||
"""Returns True if a _sunder_ name, False otherwise."""
|
||||
return (name[0] == name[-1] == '_' and
|
||||
name[1:2] != '_' and
|
||||
name[-2:-1] != '_' and
|
||||
len(name) > 2)
|
||||
|
||||
|
||||
def _make_class_unpicklable(cls):
|
||||
"""Make the given class un-picklable."""
|
||||
def _break_on_call_reduce(self, protocol=None):
|
||||
raise TypeError('%r cannot be pickled' % self)
|
||||
cls.__reduce_ex__ = _break_on_call_reduce
|
||||
cls.__module__ = '<unknown>'
|
||||
|
||||
|
||||
class _EnumDict(dict):
|
||||
"""Track enum member order and ensure member names are not reused.
|
||||
|
||||
EnumMeta will use the names found in self._member_names as the
|
||||
enumeration member names.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
super(_EnumDict, self).__init__()
|
||||
self._member_names = []
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""Changes anything not dundered or not a descriptor.
|
||||
|
||||
If a descriptor is added with the same name as an enum member, the name
|
||||
is removed from _member_names (this may leave a hole in the numerical
|
||||
sequence of values).
|
||||
|
||||
If an enum member name is used twice, an error is raised; duplicate
|
||||
values are not checked for.
|
||||
|
||||
Single underscore (sunder) names are reserved.
|
||||
|
||||
Note: in 3.x __order__ is simply discarded as a not necessary piece
|
||||
leftover from 2.x
|
||||
|
||||
"""
|
||||
if pyver >= 3.0 and key in ('_order_', '__order__'):
|
||||
return
|
||||
elif key == '__order__':
|
||||
key = '_order_'
|
||||
if _is_sunder(key):
|
||||
if key != '_order_':
|
||||
raise ValueError('_names_ are reserved for future Enum use')
|
||||
elif _is_dunder(key):
|
||||
pass
|
||||
elif key in self._member_names:
|
||||
# descriptor overwriting an enum?
|
||||
raise TypeError('Attempted to reuse key: %r' % key)
|
||||
elif not _is_descriptor(value):
|
||||
if key in self:
|
||||
# enum overwriting a descriptor?
|
||||
raise TypeError('Key already defined as: %r' % self[key])
|
||||
self._member_names.append(key)
|
||||
super(_EnumDict, self).__setitem__(key, value)
|
||||
|
||||
|
||||
# Dummy value for Enum as EnumMeta explicity checks for it, but of course until
|
||||
# EnumMeta finishes running the first time the Enum class doesn't exist. This
|
||||
# is also why there are checks in EnumMeta like `if Enum is not None`
|
||||
Enum = None
|
||||
|
||||
|
||||
class EnumMeta(type):
|
||||
"""Metaclass for Enum"""
|
||||
@classmethod
|
||||
def __prepare__(metacls, cls, bases):
|
||||
return _EnumDict()
|
||||
|
||||
def __new__(metacls, cls, bases, classdict):
|
||||
# an Enum class is final once enumeration items have been defined; it
|
||||
# cannot be mixed with other types (int, float, etc.) if it has an
|
||||
# inherited __new__ unless a new __new__ is defined (or the resulting
|
||||
# class will fail).
|
||||
if type(classdict) is dict:
|
||||
original_dict = classdict
|
||||
classdict = _EnumDict()
|
||||
for k, v in original_dict.items():
|
||||
classdict[k] = v
|
||||
|
||||
member_type, first_enum = metacls._get_mixins_(bases)
|
||||
__new__, save_new, use_args = metacls._find_new_(classdict, member_type,
|
||||
first_enum)
|
||||
# save enum items into separate mapping so they don't get baked into
|
||||
# the new class
|
||||
members = dict((k, classdict[k]) for k in classdict._member_names)
|
||||
for name in classdict._member_names:
|
||||
del classdict[name]
|
||||
|
||||
# py2 support for definition order
|
||||
_order_ = classdict.get('_order_')
|
||||
if _order_ is None:
|
||||
if pyver < 3.0:
|
||||
try:
|
||||
_order_ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])]
|
||||
except TypeError:
|
||||
_order_ = [name for name in sorted(members.keys())]
|
||||
else:
|
||||
_order_ = classdict._member_names
|
||||
else:
|
||||
del classdict['_order_']
|
||||
if pyver < 3.0:
|
||||
_order_ = _order_.replace(',', ' ').split()
|
||||
aliases = [name for name in members if name not in _order_]
|
||||
_order_ += aliases
|
||||
|
||||
# check for illegal enum names (any others?)
|
||||
invalid_names = set(members) & set(['mro'])
|
||||
if invalid_names:
|
||||
raise ValueError('Invalid enum member name(s): %s' % (
|
||||
', '.join(invalid_names), ))
|
||||
|
||||
# save attributes from super classes so we know if we can take
|
||||
# the shortcut of storing members in the class dict
|
||||
base_attributes = set([a for b in bases for a in b.__dict__])
|
||||
# create our new Enum type
|
||||
enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict)
|
||||
enum_class._member_names_ = [] # names in random order
|
||||
if OrderedDict is not None:
|
||||
enum_class._member_map_ = OrderedDict()
|
||||
else:
|
||||
enum_class._member_map_ = {} # name->value map
|
||||
enum_class._member_type_ = member_type
|
||||
|
||||
# Reverse value->name map for hashable values.
|
||||
enum_class._value2member_map_ = {}
|
||||
|
||||
# instantiate them, checking for duplicates as we go
|
||||
# we instantiate first instead of checking for duplicates first in case
|
||||
# a custom __new__ is doing something funky with the values -- such as
|
||||
# auto-numbering ;)
|
||||
if __new__ is None:
|
||||
__new__ = enum_class.__new__
|
||||
for member_name in _order_:
|
||||
value = members[member_name]
|
||||
if not isinstance(value, tuple):
|
||||
args = (value, )
|
||||
else:
|
||||
args = value
|
||||
if member_type is tuple: # special case for tuple enums
|
||||
args = (args, ) # wrap it one more time
|
||||
if not use_args or not args:
|
||||
enum_member = __new__(enum_class)
|
||||
if not hasattr(enum_member, '_value_'):
|
||||
enum_member._value_ = value
|
||||
else:
|
||||
enum_member = __new__(enum_class, *args)
|
||||
if not hasattr(enum_member, '_value_'):
|
||||
enum_member._value_ = member_type(*args)
|
||||
value = enum_member._value_
|
||||
enum_member._name_ = member_name
|
||||
enum_member.__objclass__ = enum_class
|
||||
enum_member.__init__(*args)
|
||||
# If another member with the same value was already defined, the
|
||||
# new member becomes an alias to the existing one.
|
||||
for name, canonical_member in enum_class._member_map_.items():
|
||||
if canonical_member.value == enum_member._value_:
|
||||
enum_member = canonical_member
|
||||
break
|
||||
else:
|
||||
# Aliases don't appear in member names (only in __members__).
|
||||
enum_class._member_names_.append(member_name)
|
||||
# performance boost for any member that would not shadow
|
||||
# a DynamicClassAttribute (aka _RouteClassAttributeToGetattr)
|
||||
if member_name not in base_attributes:
|
||||
setattr(enum_class, member_name, enum_member)
|
||||
# now add to _member_map_
|
||||
enum_class._member_map_[member_name] = enum_member
|
||||
try:
|
||||
# This may fail if value is not hashable. We can't add the value
|
||||
# to the map, and by-value lookups for this value will be
|
||||
# linear.
|
||||
enum_class._value2member_map_[value] = enum_member
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
|
||||
# If a custom type is mixed into the Enum, and it does not know how
|
||||
# to pickle itself, pickle.dumps will succeed but pickle.loads will
|
||||
# fail. Rather than have the error show up later and possibly far
|
||||
# from the source, sabotage the pickle protocol for this class so
|
||||
# that pickle.dumps also fails.
|
||||
#
|
||||
# However, if the new class implements its own __reduce_ex__, do not
|
||||
# sabotage -- it's on them to make sure it works correctly. We use
|
||||
# __reduce_ex__ instead of any of the others as it is preferred by
|
||||
# pickle over __reduce__, and it handles all pickle protocols.
|
||||
unpicklable = False
|
||||
if '__reduce_ex__' not in classdict:
|
||||
if member_type is not object:
|
||||
methods = ('__getnewargs_ex__', '__getnewargs__',
|
||||
'__reduce_ex__', '__reduce__')
|
||||
if not any(m in member_type.__dict__ for m in methods):
|
||||
_make_class_unpicklable(enum_class)
|
||||
unpicklable = True
|
||||
|
||||
|
||||
# double check that repr and friends are not the mixin's or various
|
||||
# things break (such as pickle)
|
||||
for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'):
|
||||
class_method = getattr(enum_class, name)
|
||||
obj_method = getattr(member_type, name, None)
|
||||
enum_method = getattr(first_enum, name, None)
|
||||
if name not in classdict and class_method is not enum_method:
|
||||
if name == '__reduce_ex__' and unpicklable:
|
||||
continue
|
||||
setattr(enum_class, name, enum_method)
|
||||
|
||||
# method resolution and int's are not playing nice
|
||||
# Python's less than 2.6 use __cmp__
|
||||
|
||||
if pyver < 2.6:
|
||||
|
||||
if issubclass(enum_class, int):
|
||||
setattr(enum_class, '__cmp__', getattr(int, '__cmp__'))
|
||||
|
||||
elif pyver < 3.0:
|
||||
|
||||
if issubclass(enum_class, int):
|
||||
for method in (
|
||||
'__le__',
|
||||
'__lt__',
|
||||
'__gt__',
|
||||
'__ge__',
|
||||
'__eq__',
|
||||
'__ne__',
|
||||
'__hash__',
|
||||
):
|
||||
setattr(enum_class, method, getattr(int, method))
|
||||
|
||||
# replace any other __new__ with our own (as long as Enum is not None,
|
||||
# anyway) -- again, this is to support pickle
|
||||
if Enum is not None:
|
||||
# if the user defined their own __new__, save it before it gets
|
||||
# clobbered in case they subclass later
|
||||
if save_new:
|
||||
setattr(enum_class, '__member_new__', enum_class.__dict__['__new__'])
|
||||
setattr(enum_class, '__new__', Enum.__dict__['__new__'])
|
||||
return enum_class
|
||||
|
||||
def __bool__(cls):
|
||||
"""
|
||||
classes/types should always be True.
|
||||
"""
|
||||
return True
|
||||
|
||||
def __call__(cls, value, names=None, module=None, type=None, start=1):
|
||||
"""Either returns an existing member, or creates a new enum class.
|
||||
|
||||
This method is used both when an enum class is given a value to match
|
||||
to an enumeration member (i.e. Color(3)) and for the functional API
|
||||
(i.e. Color = Enum('Color', names='red green blue')).
|
||||
|
||||
When used for the functional API: `module`, if set, will be stored in
|
||||
the new class' __module__ attribute; `type`, if set, will be mixed in
|
||||
as the first base class.
|
||||
|
||||
Note: if `module` is not set this routine will attempt to discover the
|
||||
calling module by walking the frame stack; if this is unsuccessful
|
||||
the resulting class will not be pickleable.
|
||||
|
||||
"""
|
||||
if names is None: # simple value lookup
|
||||
return cls.__new__(cls, value)
|
||||
# otherwise, functional API: we're creating a new Enum type
|
||||
return cls._create_(value, names, module=module, type=type, start=start)
|
||||
|
||||
def __contains__(cls, member):
|
||||
return isinstance(member, cls) and member.name in cls._member_map_
|
||||
|
||||
def __delattr__(cls, attr):
|
||||
# nicer error message when someone tries to delete an attribute
|
||||
# (see issue19025).
|
||||
if attr in cls._member_map_:
|
||||
raise AttributeError(
|
||||
"%s: cannot delete Enum member." % cls.__name__)
|
||||
super(EnumMeta, cls).__delattr__(attr)
|
||||
|
||||
def __dir__(self):
|
||||
return (['__class__', '__doc__', '__members__', '__module__'] +
|
||||
self._member_names_)
|
||||
|
||||
@property
|
||||
def __members__(cls):
|
||||
"""Returns a mapping of member name->value.
|
||||
|
||||
This mapping lists all enum members, including aliases. Note that this
|
||||
is a copy of the internal mapping.
|
||||
|
||||
"""
|
||||
return cls._member_map_.copy()
|
||||
|
||||
def __getattr__(cls, name):
|
||||
"""Return the enum member matching `name`
|
||||
|
||||
We use __getattr__ instead of descriptors or inserting into the enum
|
||||
class' __dict__ in order to support `name` and `value` being both
|
||||
properties for enum members (which live in the class' __dict__) and
|
||||
enum members themselves.
|
||||
|
||||
"""
|
||||
if _is_dunder(name):
|
||||
raise AttributeError(name)
|
||||
try:
|
||||
return cls._member_map_[name]
|
||||
except KeyError:
|
||||
raise AttributeError(name)
|
||||
|
||||
def __getitem__(cls, name):
|
||||
return cls._member_map_[name]
|
||||
|
||||
def __iter__(cls):
|
||||
return (cls._member_map_[name] for name in cls._member_names_)
|
||||
|
||||
def __reversed__(cls):
|
||||
return (cls._member_map_[name] for name in reversed(cls._member_names_))
|
||||
|
||||
def __len__(cls):
|
||||
return len(cls._member_names_)
|
||||
|
||||
__nonzero__ = __bool__
|
||||
|
||||
def __repr__(cls):
|
||||
return "<enum %r>" % cls.__name__
|
||||
|
||||
def __setattr__(cls, name, value):
|
||||
"""Block attempts to reassign Enum members.
|
||||
|
||||
A simple assignment to the class namespace only changes one of the
|
||||
several possible ways to get an Enum member from the Enum class,
|
||||
resulting in an inconsistent Enumeration.
|
||||
|
||||
"""
|
||||
member_map = cls.__dict__.get('_member_map_', {})
|
||||
if name in member_map:
|
||||
raise AttributeError('Cannot reassign members.')
|
||||
super(EnumMeta, cls).__setattr__(name, value)
|
||||
|
||||
def _create_(cls, class_name, names=None, module=None, type=None, start=1):
|
||||
"""Convenience method to create a new Enum class.
|
||||
|
||||
`names` can be:
|
||||
|
||||
* A string containing member names, separated either with spaces or
|
||||
commas. Values are auto-numbered from 1.
|
||||
* An iterable of member names. Values are auto-numbered from 1.
|
||||
* An iterable of (member name, value) pairs.
|
||||
* A mapping of member name -> value.
|
||||
|
||||
"""
|
||||
if pyver < 3.0:
|
||||
# if class_name is unicode, attempt a conversion to ASCII
|
||||
if isinstance(class_name, unicode):
|
||||
try:
|
||||
class_name = class_name.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
raise TypeError('%r is not representable in ASCII' % class_name)
|
||||
metacls = cls.__class__
|
||||
if type is None:
|
||||
bases = (cls, )
|
||||
else:
|
||||
bases = (type, cls)
|
||||
classdict = metacls.__prepare__(class_name, bases)
|
||||
_order_ = []
|
||||
|
||||
# special processing needed for names?
|
||||
if isinstance(names, basestring):
|
||||
names = names.replace(',', ' ').split()
|
||||
if isinstance(names, (tuple, list)) and isinstance(names[0], basestring):
|
||||
names = [(e, i+start) for (i, e) in enumerate(names)]
|
||||
|
||||
# Here, names is either an iterable of (name, value) or a mapping.
|
||||
item = None # in case names is empty
|
||||
for item in names:
|
||||
if isinstance(item, basestring):
|
||||
member_name, member_value = item, names[item]
|
||||
else:
|
||||
member_name, member_value = item
|
||||
classdict[member_name] = member_value
|
||||
_order_.append(member_name)
|
||||
# only set _order_ in classdict if name/value was not from a mapping
|
||||
if not isinstance(item, basestring):
|
||||
classdict['_order_'] = ' '.join(_order_)
|
||||
enum_class = metacls.__new__(metacls, class_name, bases, classdict)
|
||||
|
||||
# TODO: replace the frame hack if a blessed way to know the calling
|
||||
# module is ever developed
|
||||
if module is None:
|
||||
try:
|
||||
module = _sys._getframe(2).f_globals['__name__']
|
||||
except (AttributeError, ValueError):
|
||||
pass
|
||||
if module is None:
|
||||
_make_class_unpicklable(enum_class)
|
||||
else:
|
||||
enum_class.__module__ = module
|
||||
|
||||
return enum_class
|
||||
|
||||
@staticmethod
|
||||
def _get_mixins_(bases):
|
||||
"""Returns the type for creating enum members, and the first inherited
|
||||
enum class.
|
||||
|
||||
bases: the tuple of bases that was given to __new__
|
||||
|
||||
"""
|
||||
if not bases or Enum is None:
|
||||
return object, Enum
|
||||
|
||||
|
||||
# double check that we are not subclassing a class with existing
|
||||
# enumeration members; while we're at it, see if any other data
|
||||
# type has been mixed in so we can use the correct __new__
|
||||
member_type = first_enum = None
|
||||
for base in bases:
|
||||
if (base is not Enum and
|
||||
issubclass(base, Enum) and
|
||||
base._member_names_):
|
||||
raise TypeError("Cannot extend enumerations")
|
||||
# base is now the last base in bases
|
||||
if not issubclass(base, Enum):
|
||||
raise TypeError("new enumerations must be created as "
|
||||
"`ClassName([mixin_type,] enum_type)`")
|
||||
|
||||
# get correct mix-in type (either mix-in type of Enum subclass, or
|
||||
# first base if last base is Enum)
|
||||
if not issubclass(bases[0], Enum):
|
||||
member_type = bases[0] # first data type
|
||||
first_enum = bases[-1] # enum type
|
||||
else:
|
||||
for base in bases[0].__mro__:
|
||||
# most common: (IntEnum, int, Enum, object)
|
||||
# possible: (<Enum 'AutoIntEnum'>, <Enum 'IntEnum'>,
|
||||
# <class 'int'>, <Enum 'Enum'>,
|
||||
# <class 'object'>)
|
||||
if issubclass(base, Enum):
|
||||
if first_enum is None:
|
||||
first_enum = base
|
||||
else:
|
||||
if member_type is None:
|
||||
member_type = base
|
||||
|
||||
return member_type, first_enum
|
||||
|
||||
if pyver < 3.0:
|
||||
@staticmethod
|
||||
def _find_new_(classdict, member_type, first_enum):
|
||||
"""Returns the __new__ to be used for creating the enum members.
|
||||
|
||||
classdict: the class dictionary given to __new__
|
||||
member_type: the data type whose __new__ will be used by default
|
||||
first_enum: enumeration to check for an overriding __new__
|
||||
|
||||
"""
|
||||
# now find the correct __new__, checking to see of one was defined
|
||||
# by the user; also check earlier enum classes in case a __new__ was
|
||||
# saved as __member_new__
|
||||
__new__ = classdict.get('__new__', None)
|
||||
if __new__:
|
||||
return None, True, True # __new__, save_new, use_args
|
||||
|
||||
N__new__ = getattr(None, '__new__')
|
||||
O__new__ = getattr(object, '__new__')
|
||||
if Enum is None:
|
||||
E__new__ = N__new__
|
||||
else:
|
||||
E__new__ = Enum.__dict__['__new__']
|
||||
# check all possibles for __member_new__ before falling back to
|
||||
# __new__
|
||||
for method in ('__member_new__', '__new__'):
|
||||
for possible in (member_type, first_enum):
|
||||
try:
|
||||
target = possible.__dict__[method]
|
||||
except (AttributeError, KeyError):
|
||||
target = getattr(possible, method, None)
|
||||
if target not in [
|
||||
None,
|
||||
N__new__,
|
||||
O__new__,
|
||||
E__new__,
|
||||
]:
|
||||
if method == '__member_new__':
|
||||
classdict['__new__'] = target
|
||||
return None, False, True
|
||||
if isinstance(target, staticmethod):
|
||||
target = target.__get__(member_type)
|
||||
__new__ = target
|
||||
break
|
||||
if __new__ is not None:
|
||||
break
|
||||
else:
|
||||
__new__ = object.__new__
|
||||
|
||||
# if a non-object.__new__ is used then whatever value/tuple was
|
||||
# assigned to the enum member name will be passed to __new__ and to the
|
||||
# new enum member's __init__
|
||||
if __new__ is object.__new__:
|
||||
use_args = False
|
||||
else:
|
||||
use_args = True
|
||||
|
||||
return __new__, False, use_args
|
||||
else:
|
||||
@staticmethod
|
||||
def _find_new_(classdict, member_type, first_enum):
|
||||
"""Returns the __new__ to be used for creating the enum members.
|
||||
|
||||
classdict: the class dictionary given to __new__
|
||||
member_type: the data type whose __new__ will be used by default
|
||||
first_enum: enumeration to check for an overriding __new__
|
||||
|
||||
"""
|
||||
# now find the correct __new__, checking to see of one was defined
|
||||
# by the user; also check earlier enum classes in case a __new__ was
|
||||
# saved as __member_new__
|
||||
__new__ = classdict.get('__new__', None)
|
||||
|
||||
# should __new__ be saved as __member_new__ later?
|
||||
save_new = __new__ is not None
|
||||
|
||||
if __new__ is None:
|
||||
# check all possibles for __member_new__ before falling back to
|
||||
# __new__
|
||||
for method in ('__member_new__', '__new__'):
|
||||
for possible in (member_type, first_enum):
|
||||
target = getattr(possible, method, None)
|
||||
if target not in (
|
||||
None,
|
||||
None.__new__,
|
||||
object.__new__,
|
||||
Enum.__new__,
|
||||
):
|
||||
__new__ = target
|
||||
break
|
||||
if __new__ is not None:
|
||||
break
|
||||
else:
|
||||
__new__ = object.__new__
|
||||
|
||||
# if a non-object.__new__ is used then whatever value/tuple was
|
||||
# assigned to the enum member name will be passed to __new__ and to the
|
||||
# new enum member's __init__
|
||||
if __new__ is object.__new__:
|
||||
use_args = False
|
||||
else:
|
||||
use_args = True
|
||||
|
||||
return __new__, save_new, use_args
|
||||
|
||||
|
||||
########################################################
|
||||
# In order to support Python 2 and 3 with a single
|
||||
# codebase we have to create the Enum methods separately
|
||||
# and then use the `type(name, bases, dict)` method to
|
||||
# create the class.
|
||||
########################################################
|
||||
temp_enum_dict = {}
|
||||
temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n"
|
||||
|
||||
def __new__(cls, value):
|
||||
# all enum instances are actually created during class construction
|
||||
# without calling this method; this method is called by the metaclass'
|
||||
# __call__ (i.e. Color(3) ), and by pickle
|
||||
if type(value) is cls:
|
||||
# For lookups like Color(Color.red)
|
||||
value = value.value
|
||||
#return value
|
||||
# by-value search for a matching enum member
|
||||
# see if it's in the reverse mapping (for hashable values)
|
||||
try:
|
||||
if value in cls._value2member_map_:
|
||||
return cls._value2member_map_[value]
|
||||
except TypeError:
|
||||
# not there, now do long search -- O(n) behavior
|
||||
for member in cls._member_map_.values():
|
||||
if member.value == value:
|
||||
return member
|
||||
raise ValueError("%s is not a valid %s" % (value, cls.__name__))
|
||||
temp_enum_dict['__new__'] = __new__
|
||||
del __new__
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s.%s: %r>" % (
|
||||
self.__class__.__name__, self._name_, self._value_)
|
||||
temp_enum_dict['__repr__'] = __repr__
|
||||
del __repr__
|
||||
|
||||
def __str__(self):
|
||||
return "%s.%s" % (self.__class__.__name__, self._name_)
|
||||
temp_enum_dict['__str__'] = __str__
|
||||
del __str__
|
||||
|
||||
if pyver >= 3.0:
|
||||
def __dir__(self):
|
||||
added_behavior = [
|
||||
m
|
||||
for cls in self.__class__.mro()
|
||||
for m in cls.__dict__
|
||||
if m[0] != '_' and m not in self._member_map_
|
||||
]
|
||||
return (['__class__', '__doc__', '__module__', ] + added_behavior)
|
||||
temp_enum_dict['__dir__'] = __dir__
|
||||
del __dir__
|
||||
|
||||
def __format__(self, format_spec):
|
||||
# mixed-in Enums should use the mixed-in type's __format__, otherwise
|
||||
# we can get strange results with the Enum name showing up instead of
|
||||
# the value
|
||||
|
||||
# pure Enum branch
|
||||
if self._member_type_ is object:
|
||||
cls = str
|
||||
val = str(self)
|
||||
# mix-in branch
|
||||
else:
|
||||
cls = self._member_type_
|
||||
val = self.value
|
||||
return cls.__format__(val, format_spec)
|
||||
temp_enum_dict['__format__'] = __format__
|
||||
del __format__
|
||||
|
||||
|
||||
####################################
|
||||
# Python's less than 2.6 use __cmp__
|
||||
|
||||
if pyver < 2.6:
|
||||
|
||||
def __cmp__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
if self is other:
|
||||
return 0
|
||||
return -1
|
||||
return NotImplemented
|
||||
raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__cmp__'] = __cmp__
|
||||
del __cmp__
|
||||
|
||||
else:
|
||||
|
||||
def __le__(self, other):
|
||||
raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__le__'] = __le__
|
||||
del __le__
|
||||
|
||||
def __lt__(self, other):
|
||||
raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__lt__'] = __lt__
|
||||
del __lt__
|
||||
|
||||
def __ge__(self, other):
|
||||
raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__ge__'] = __ge__
|
||||
del __ge__
|
||||
|
||||
def __gt__(self, other):
|
||||
raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__gt__'] = __gt__
|
||||
del __gt__
|
||||
|
||||
|
||||
def __eq__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
return self is other
|
||||
return NotImplemented
|
||||
temp_enum_dict['__eq__'] = __eq__
|
||||
del __eq__
|
||||
|
||||
def __ne__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
return self is not other
|
||||
return NotImplemented
|
||||
temp_enum_dict['__ne__'] = __ne__
|
||||
del __ne__
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self._name_)
|
||||
temp_enum_dict['__hash__'] = __hash__
|
||||
del __hash__
|
||||
|
||||
def __reduce_ex__(self, proto):
|
||||
return self.__class__, (self._value_, )
|
||||
temp_enum_dict['__reduce_ex__'] = __reduce_ex__
|
||||
del __reduce_ex__
|
||||
|
||||
# _RouteClassAttributeToGetattr is used to provide access to the `name`
|
||||
# and `value` properties of enum members while keeping some measure of
|
||||
# protection from modification, while still allowing for an enumeration
|
||||
# to have members named `name` and `value`. This works because enumeration
|
||||
# members are not set directly on the enum class -- __getattr__ is
|
||||
# used to look them up.
|
||||
|
||||
@_RouteClassAttributeToGetattr
|
||||
def name(self):
|
||||
return self._name_
|
||||
temp_enum_dict['name'] = name
|
||||
del name
|
||||
|
||||
@_RouteClassAttributeToGetattr
|
||||
def value(self):
|
||||
return self._value_
|
||||
temp_enum_dict['value'] = value
|
||||
del value
|
||||
|
||||
@classmethod
|
||||
def _convert(cls, name, module, filter, source=None):
|
||||
"""
|
||||
Create a new Enum subclass that replaces a collection of global constants
|
||||
"""
|
||||
# convert all constants from source (or module) that pass filter() to
|
||||
# a new Enum called name, and export the enum and its members back to
|
||||
# module;
|
||||
# also, replace the __reduce_ex__ method so unpickling works in
|
||||
# previous Python versions
|
||||
module_globals = vars(_sys.modules[module])
|
||||
if source:
|
||||
source = vars(source)
|
||||
else:
|
||||
source = module_globals
|
||||
members = dict((name, value) for name, value in source.items() if filter(name))
|
||||
cls = cls(name, members, module=module)
|
||||
cls.__reduce_ex__ = _reduce_ex_by_name
|
||||
module_globals.update(cls.__members__)
|
||||
module_globals[name] = cls
|
||||
return cls
|
||||
temp_enum_dict['_convert'] = _convert
|
||||
del _convert
|
||||
|
||||
Enum = EnumMeta('Enum', (object, ), temp_enum_dict)
|
||||
del temp_enum_dict
|
||||
|
||||
# Enum has now been created
|
||||
###########################
|
||||
|
||||
class IntEnum(int, Enum):
|
||||
"""Enum where members are also (and must be) ints"""
|
||||
|
||||
def _reduce_ex_by_name(self, proto):
|
||||
return self.name
|
||||
|
||||
def unique(enumeration):
|
||||
"""Class decorator that ensures only unique members exist in an enumeration."""
|
||||
duplicates = []
|
||||
for name, member in enumeration.__members__.items():
|
||||
if name != member.name:
|
||||
duplicates.append((name, member.name))
|
||||
if duplicates:
|
||||
duplicate_names = ', '.join(
|
||||
["%s -> %s" % (alias, name) for (alias, name) in duplicates]
|
||||
)
|
||||
raise ValueError('duplicate names found in %r: %s' %
|
||||
(enumeration, duplicate_names)
|
||||
)
|
||||
return enumeration
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,735 @@
|
||||
``enum`` --- support for enumerations
|
||||
========================================
|
||||
|
||||
.. :synopsis: enumerations are sets of symbolic names bound to unique, constant
|
||||
values.
|
||||
.. :moduleauthor:: Ethan Furman <ethan@stoneleaf.us>
|
||||
.. :sectionauthor:: Barry Warsaw <barry@python.org>,
|
||||
.. :sectionauthor:: Eli Bendersky <eliben@gmail.com>,
|
||||
.. :sectionauthor:: Ethan Furman <ethan@stoneleaf.us>
|
||||
|
||||
----------------
|
||||
|
||||
An enumeration is a set of symbolic names (members) bound to unique, constant
|
||||
values. Within an enumeration, the members can be compared by identity, and
|
||||
the enumeration itself can be iterated over.
|
||||
|
||||
|
||||
Module Contents
|
||||
---------------
|
||||
|
||||
This module defines two enumeration classes that can be used to define unique
|
||||
sets of names and values: ``Enum`` and ``IntEnum``. It also defines
|
||||
one decorator, ``unique``.
|
||||
|
||||
``Enum``
|
||||
|
||||
Base class for creating enumerated constants. See section `Functional API`_
|
||||
for an alternate construction syntax.
|
||||
|
||||
``IntEnum``
|
||||
|
||||
Base class for creating enumerated constants that are also subclasses of ``int``.
|
||||
|
||||
``unique``
|
||||
|
||||
Enum class decorator that ensures only one name is bound to any one value.
|
||||
|
||||
|
||||
Creating an Enum
|
||||
----------------
|
||||
|
||||
Enumerations are created using the ``class`` syntax, which makes them
|
||||
easy to read and write. An alternative creation method is described in
|
||||
`Functional API`_. To define an enumeration, subclass ``Enum`` as
|
||||
follows::
|
||||
|
||||
>>> from enum import Enum
|
||||
>>> class Color(Enum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
... blue = 3
|
||||
|
||||
Note: Nomenclature
|
||||
|
||||
- The class ``Color`` is an *enumeration* (or *enum*)
|
||||
- The attributes ``Color.red``, ``Color.green``, etc., are
|
||||
*enumeration members* (or *enum members*).
|
||||
- The enum members have *names* and *values* (the name of
|
||||
``Color.red`` is ``red``, the value of ``Color.blue`` is
|
||||
``3``, etc.)
|
||||
|
||||
Note:
|
||||
|
||||
Even though we use the ``class`` syntax to create Enums, Enums
|
||||
are not normal Python classes. See `How are Enums different?`_ for
|
||||
more details.
|
||||
|
||||
Enumeration members have human readable string representations::
|
||||
|
||||
>>> print(Color.red)
|
||||
Color.red
|
||||
|
||||
...while their ``repr`` has more information::
|
||||
|
||||
>>> print(repr(Color.red))
|
||||
<Color.red: 1>
|
||||
|
||||
The *type* of an enumeration member is the enumeration it belongs to::
|
||||
|
||||
>>> type(Color.red)
|
||||
<enum 'Color'>
|
||||
>>> isinstance(Color.green, Color)
|
||||
True
|
||||
>>>
|
||||
|
||||
Enum members also have a property that contains just their item name::
|
||||
|
||||
>>> print(Color.red.name)
|
||||
red
|
||||
|
||||
Enumerations support iteration. In Python 3.x definition order is used; in
|
||||
Python 2.x the definition order is not available, but class attribute
|
||||
``__order__`` is supported; otherwise, value order is used::
|
||||
|
||||
>>> class Shake(Enum):
|
||||
... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x
|
||||
... vanilla = 7
|
||||
... chocolate = 4
|
||||
... cookies = 9
|
||||
... mint = 3
|
||||
...
|
||||
>>> for shake in Shake:
|
||||
... print(shake)
|
||||
...
|
||||
Shake.vanilla
|
||||
Shake.chocolate
|
||||
Shake.cookies
|
||||
Shake.mint
|
||||
|
||||
The ``__order__`` attribute is always removed, and in 3.x it is also ignored
|
||||
(order is definition order); however, in the stdlib version it will be ignored
|
||||
but not removed.
|
||||
|
||||
Enumeration members are hashable, so they can be used in dictionaries and sets::
|
||||
|
||||
>>> apples = {}
|
||||
>>> apples[Color.red] = 'red delicious'
|
||||
>>> apples[Color.green] = 'granny smith'
|
||||
>>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'}
|
||||
True
|
||||
|
||||
|
||||
Programmatic access to enumeration members and their attributes
|
||||
---------------------------------------------------------------
|
||||
|
||||
Sometimes it's useful to access members in enumerations programmatically (i.e.
|
||||
situations where ``Color.red`` won't do because the exact color is not known
|
||||
at program-writing time). ``Enum`` allows such access::
|
||||
|
||||
>>> Color(1)
|
||||
<Color.red: 1>
|
||||
>>> Color(3)
|
||||
<Color.blue: 3>
|
||||
|
||||
If you want to access enum members by *name*, use item access::
|
||||
|
||||
>>> Color['red']
|
||||
<Color.red: 1>
|
||||
>>> Color['green']
|
||||
<Color.green: 2>
|
||||
|
||||
If have an enum member and need its ``name`` or ``value``::
|
||||
|
||||
>>> member = Color.red
|
||||
>>> member.name
|
||||
'red'
|
||||
>>> member.value
|
||||
1
|
||||
|
||||
|
||||
Duplicating enum members and values
|
||||
-----------------------------------
|
||||
|
||||
Having two enum members (or any other attribute) with the same name is invalid;
|
||||
in Python 3.x this would raise an error, but in Python 2.x the second member
|
||||
simply overwrites the first::
|
||||
|
||||
>>> # python 2.x
|
||||
>>> class Shape(Enum):
|
||||
... square = 2
|
||||
... square = 3
|
||||
...
|
||||
>>> Shape.square
|
||||
<Shape.square: 3>
|
||||
|
||||
>>> # python 3.x
|
||||
>>> class Shape(Enum):
|
||||
... square = 2
|
||||
... square = 3
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: Attempted to reuse key: 'square'
|
||||
|
||||
However, two enum members are allowed to have the same value. Given two members
|
||||
A and B with the same value (and A defined first), B is an alias to A. By-value
|
||||
lookup of the value of A and B will return A. By-name lookup of B will also
|
||||
return A::
|
||||
|
||||
>>> class Shape(Enum):
|
||||
... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x
|
||||
... square = 2
|
||||
... diamond = 1
|
||||
... circle = 3
|
||||
... alias_for_square = 2
|
||||
...
|
||||
>>> Shape.square
|
||||
<Shape.square: 2>
|
||||
>>> Shape.alias_for_square
|
||||
<Shape.square: 2>
|
||||
>>> Shape(2)
|
||||
<Shape.square: 2>
|
||||
|
||||
|
||||
Allowing aliases is not always desirable. ``unique`` can be used to ensure
|
||||
that none exist in a particular enumeration::
|
||||
|
||||
>>> from enum import unique
|
||||
>>> @unique
|
||||
... class Mistake(Enum):
|
||||
... __order__ = 'one two three four' # only needed in 2.x
|
||||
... one = 1
|
||||
... two = 2
|
||||
... three = 3
|
||||
... four = 3
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: duplicate names found in <enum 'Mistake'>: four -> three
|
||||
|
||||
Iterating over the members of an enum does not provide the aliases::
|
||||
|
||||
>>> list(Shape)
|
||||
[<Shape.square: 2>, <Shape.diamond: 1>, <Shape.circle: 3>]
|
||||
|
||||
The special attribute ``__members__`` is a dictionary mapping names to members.
|
||||
It includes all names defined in the enumeration, including the aliases::
|
||||
|
||||
>>> for name, member in sorted(Shape.__members__.items()):
|
||||
... name, member
|
||||
...
|
||||
('alias_for_square', <Shape.square: 2>)
|
||||
('circle', <Shape.circle: 3>)
|
||||
('diamond', <Shape.diamond: 1>)
|
||||
('square', <Shape.square: 2>)
|
||||
|
||||
The ``__members__`` attribute can be used for detailed programmatic access to
|
||||
the enumeration members. For example, finding all the aliases::
|
||||
|
||||
>>> [name for name, member in Shape.__members__.items() if member.name != name]
|
||||
['alias_for_square']
|
||||
|
||||
Comparisons
|
||||
-----------
|
||||
|
||||
Enumeration members are compared by identity::
|
||||
|
||||
>>> Color.red is Color.red
|
||||
True
|
||||
>>> Color.red is Color.blue
|
||||
False
|
||||
>>> Color.red is not Color.blue
|
||||
True
|
||||
|
||||
Ordered comparisons between enumeration values are *not* supported. Enum
|
||||
members are not integers (but see `IntEnum`_ below)::
|
||||
|
||||
>>> Color.red < Color.blue
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
TypeError: unorderable types: Color() < Color()
|
||||
|
||||
.. warning::
|
||||
|
||||
In Python 2 *everything* is ordered, even though the ordering may not
|
||||
make sense. If you want your enumerations to have a sensible ordering
|
||||
check out the `OrderedEnum`_ recipe below.
|
||||
|
||||
|
||||
Equality comparisons are defined though::
|
||||
|
||||
>>> Color.blue == Color.red
|
||||
False
|
||||
>>> Color.blue != Color.red
|
||||
True
|
||||
>>> Color.blue == Color.blue
|
||||
True
|
||||
|
||||
Comparisons against non-enumeration values will always compare not equal
|
||||
(again, ``IntEnum`` was explicitly designed to behave differently, see
|
||||
below)::
|
||||
|
||||
>>> Color.blue == 2
|
||||
False
|
||||
|
||||
|
||||
Allowed members and attributes of enumerations
|
||||
----------------------------------------------
|
||||
|
||||
The examples above use integers for enumeration values. Using integers is
|
||||
short and handy (and provided by default by the `Functional API`_), but not
|
||||
strictly enforced. In the vast majority of use-cases, one doesn't care what
|
||||
the actual value of an enumeration is. But if the value *is* important,
|
||||
enumerations can have arbitrary values.
|
||||
|
||||
Enumerations are Python classes, and can have methods and special methods as
|
||||
usual. If we have this enumeration::
|
||||
|
||||
>>> class Mood(Enum):
|
||||
... funky = 1
|
||||
... happy = 3
|
||||
...
|
||||
... def describe(self):
|
||||
... # self is the member here
|
||||
... return self.name, self.value
|
||||
...
|
||||
... def __str__(self):
|
||||
... return 'my custom str! {0}'.format(self.value)
|
||||
...
|
||||
... @classmethod
|
||||
... def favorite_mood(cls):
|
||||
... # cls here is the enumeration
|
||||
... return cls.happy
|
||||
|
||||
Then::
|
||||
|
||||
>>> Mood.favorite_mood()
|
||||
<Mood.happy: 3>
|
||||
>>> Mood.happy.describe()
|
||||
('happy', 3)
|
||||
>>> str(Mood.funky)
|
||||
'my custom str! 1'
|
||||
|
||||
The rules for what is allowed are as follows: _sunder_ names (starting and
|
||||
ending with a single underscore) are reserved by enum and cannot be used;
|
||||
all other attributes defined within an enumeration will become members of this
|
||||
enumeration, with the exception of *__dunder__* names and descriptors (methods
|
||||
are also descriptors).
|
||||
|
||||
Note:
|
||||
|
||||
If your enumeration defines ``__new__`` and/or ``__init__`` then
|
||||
whatever value(s) were given to the enum member will be passed into
|
||||
those methods. See `Planet`_ for an example.
|
||||
|
||||
|
||||
Restricted subclassing of enumerations
|
||||
--------------------------------------
|
||||
|
||||
Subclassing an enumeration is allowed only if the enumeration does not define
|
||||
any members. So this is forbidden::
|
||||
|
||||
>>> class MoreColor(Color):
|
||||
... pink = 17
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: Cannot extend enumerations
|
||||
|
||||
But this is allowed::
|
||||
|
||||
>>> class Foo(Enum):
|
||||
... def some_behavior(self):
|
||||
... pass
|
||||
...
|
||||
>>> class Bar(Foo):
|
||||
... happy = 1
|
||||
... sad = 2
|
||||
...
|
||||
|
||||
Allowing subclassing of enums that define members would lead to a violation of
|
||||
some important invariants of types and instances. On the other hand, it makes
|
||||
sense to allow sharing some common behavior between a group of enumerations.
|
||||
(See `OrderedEnum`_ for an example.)
|
||||
|
||||
|
||||
Pickling
|
||||
--------
|
||||
|
||||
Enumerations can be pickled and unpickled::
|
||||
|
||||
>>> from enum.test_enum import Fruit
|
||||
>>> from pickle import dumps, loads
|
||||
>>> Fruit.tomato is loads(dumps(Fruit.tomato, 2))
|
||||
True
|
||||
|
||||
The usual restrictions for pickling apply: picklable enums must be defined in
|
||||
the top level of a module, since unpickling requires them to be importable
|
||||
from that module.
|
||||
|
||||
Note:
|
||||
|
||||
With pickle protocol version 4 (introduced in Python 3.4) it is possible
|
||||
to easily pickle enums nested in other classes.
|
||||
|
||||
|
||||
|
||||
Functional API
|
||||
--------------
|
||||
|
||||
The ``Enum`` class is callable, providing the following functional API::
|
||||
|
||||
>>> Animal = Enum('Animal', 'ant bee cat dog')
|
||||
>>> Animal
|
||||
<enum 'Animal'>
|
||||
>>> Animal.ant
|
||||
<Animal.ant: 1>
|
||||
>>> Animal.ant.value
|
||||
1
|
||||
>>> list(Animal)
|
||||
[<Animal.ant: 1>, <Animal.bee: 2>, <Animal.cat: 3>, <Animal.dog: 4>]
|
||||
|
||||
The semantics of this API resemble ``namedtuple``. The first argument
|
||||
of the call to ``Enum`` is the name of the enumeration.
|
||||
|
||||
The second argument is the *source* of enumeration member names. It can be a
|
||||
whitespace-separated string of names, a sequence of names, a sequence of
|
||||
2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to
|
||||
values. The last two options enable assigning arbitrary values to
|
||||
enumerations; the others auto-assign increasing integers starting with 1. A
|
||||
new class derived from ``Enum`` is returned. In other words, the above
|
||||
assignment to ``Animal`` is equivalent to::
|
||||
|
||||
>>> class Animals(Enum):
|
||||
... ant = 1
|
||||
... bee = 2
|
||||
... cat = 3
|
||||
... dog = 4
|
||||
|
||||
Pickling enums created with the functional API can be tricky as frame stack
|
||||
implementation details are used to try and figure out which module the
|
||||
enumeration is being created in (e.g. it will fail if you use a utility
|
||||
function in separate module, and also may not work on IronPython or Jython).
|
||||
The solution is to specify the module name explicitly as follows::
|
||||
|
||||
>>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__)
|
||||
|
||||
Derived Enumerations
|
||||
--------------------
|
||||
|
||||
IntEnum
|
||||
^^^^^^^
|
||||
|
||||
A variation of ``Enum`` is provided which is also a subclass of
|
||||
``int``. Members of an ``IntEnum`` can be compared to integers;
|
||||
by extension, integer enumerations of different types can also be compared
|
||||
to each other::
|
||||
|
||||
>>> from enum import IntEnum
|
||||
>>> class Shape(IntEnum):
|
||||
... circle = 1
|
||||
... square = 2
|
||||
...
|
||||
>>> class Request(IntEnum):
|
||||
... post = 1
|
||||
... get = 2
|
||||
...
|
||||
>>> Shape == 1
|
||||
False
|
||||
>>> Shape.circle == 1
|
||||
True
|
||||
>>> Shape.circle == Request.post
|
||||
True
|
||||
|
||||
However, they still can't be compared to standard ``Enum`` enumerations::
|
||||
|
||||
>>> class Shape(IntEnum):
|
||||
... circle = 1
|
||||
... square = 2
|
||||
...
|
||||
>>> class Color(Enum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
...
|
||||
>>> Shape.circle == Color.red
|
||||
False
|
||||
|
||||
``IntEnum`` values behave like integers in other ways you'd expect::
|
||||
|
||||
>>> int(Shape.circle)
|
||||
1
|
||||
>>> ['a', 'b', 'c'][Shape.circle]
|
||||
'b'
|
||||
>>> [i for i in range(Shape.square)]
|
||||
[0, 1]
|
||||
|
||||
For the vast majority of code, ``Enum`` is strongly recommended,
|
||||
since ``IntEnum`` breaks some semantic promises of an enumeration (by
|
||||
being comparable to integers, and thus by transitivity to other
|
||||
unrelated enumerations). It should be used only in special cases where
|
||||
there's no other choice; for example, when integer constants are
|
||||
replaced with enumerations and backwards compatibility is required with code
|
||||
that still expects integers.
|
||||
|
||||
|
||||
Others
|
||||
^^^^^^
|
||||
|
||||
While ``IntEnum`` is part of the ``enum`` module, it would be very
|
||||
simple to implement independently::
|
||||
|
||||
class IntEnum(int, Enum):
|
||||
pass
|
||||
|
||||
This demonstrates how similar derived enumerations can be defined; for example
|
||||
a ``StrEnum`` that mixes in ``str`` instead of ``int``.
|
||||
|
||||
Some rules:
|
||||
|
||||
1. When subclassing ``Enum``, mix-in types must appear before
|
||||
``Enum`` itself in the sequence of bases, as in the ``IntEnum``
|
||||
example above.
|
||||
2. While ``Enum`` can have members of any type, once you mix in an
|
||||
additional type, all the members must have values of that type, e.g.
|
||||
``int`` above. This restriction does not apply to mix-ins which only
|
||||
add methods and don't specify another data type such as ``int`` or
|
||||
``str``.
|
||||
3. When another data type is mixed in, the ``value`` attribute is *not the
|
||||
same* as the enum member itself, although it is equivalant and will compare
|
||||
equal.
|
||||
4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and
|
||||
``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for
|
||||
IntEnum) treat the enum member as its mixed-in type.
|
||||
|
||||
Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int``
|
||||
subclasses are printed as strings and not numbers when the ``%d``, ``%i``,
|
||||
or ``%u`` codes are used.
|
||||
5. ``str.__format__`` (or ``format``) will use the mixed-in
|
||||
type's ``__format__``. If the ``Enum``'s ``str`` or
|
||||
``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes.
|
||||
|
||||
|
||||
Decorators
|
||||
----------
|
||||
|
||||
unique
|
||||
^^^^^^
|
||||
|
||||
A ``class`` decorator specifically for enumerations. It searches an
|
||||
enumeration's ``__members__`` gathering any aliases it finds; if any are
|
||||
found ``ValueError`` is raised with the details::
|
||||
|
||||
>>> @unique
|
||||
... class NoDupes(Enum):
|
||||
... first = 'one'
|
||||
... second = 'two'
|
||||
... third = 'two'
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: duplicate names found in <enum 'NoDupes'>: third -> second
|
||||
|
||||
|
||||
Interesting examples
|
||||
--------------------
|
||||
|
||||
While ``Enum`` and ``IntEnum`` are expected to cover the majority of
|
||||
use-cases, they cannot cover them all. Here are recipes for some different
|
||||
types of enumerations that can be used directly, or as examples for creating
|
||||
one's own.
|
||||
|
||||
|
||||
AutoNumber
|
||||
^^^^^^^^^^
|
||||
|
||||
Avoids having to specify the value for each enumeration member::
|
||||
|
||||
>>> class AutoNumber(Enum):
|
||||
... def __new__(cls):
|
||||
... value = len(cls.__members__) + 1
|
||||
... obj = object.__new__(cls)
|
||||
... obj._value_ = value
|
||||
... return obj
|
||||
...
|
||||
>>> class Color(AutoNumber):
|
||||
... __order__ = "red green blue" # only needed in 2.x
|
||||
... red = ()
|
||||
... green = ()
|
||||
... blue = ()
|
||||
...
|
||||
>>> Color.green.value == 2
|
||||
True
|
||||
|
||||
Note:
|
||||
|
||||
The `__new__` method, if defined, is used during creation of the Enum
|
||||
members; it is then replaced by Enum's `__new__` which is used after
|
||||
class creation for lookup of existing members. Due to the way Enums are
|
||||
supposed to behave, there is no way to customize Enum's `__new__`.
|
||||
|
||||
|
||||
UniqueEnum
|
||||
^^^^^^^^^^
|
||||
|
||||
Raises an error if a duplicate member name is found instead of creating an
|
||||
alias::
|
||||
|
||||
>>> class UniqueEnum(Enum):
|
||||
... def __init__(self, *args):
|
||||
... cls = self.__class__
|
||||
... if any(self.value == e.value for e in cls):
|
||||
... a = self.name
|
||||
... e = cls(self.value).name
|
||||
... raise ValueError(
|
||||
... "aliases not allowed in UniqueEnum: %r --> %r"
|
||||
... % (a, e))
|
||||
...
|
||||
>>> class Color(UniqueEnum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
... blue = 3
|
||||
... grene = 2
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green'
|
||||
|
||||
|
||||
OrderedEnum
|
||||
^^^^^^^^^^^
|
||||
|
||||
An ordered enumeration that is not based on ``IntEnum`` and so maintains
|
||||
the normal ``Enum`` invariants (such as not being comparable to other
|
||||
enumerations)::
|
||||
|
||||
>>> class OrderedEnum(Enum):
|
||||
... def __ge__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ >= other._value_
|
||||
... return NotImplemented
|
||||
... def __gt__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ > other._value_
|
||||
... return NotImplemented
|
||||
... def __le__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ <= other._value_
|
||||
... return NotImplemented
|
||||
... def __lt__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ < other._value_
|
||||
... return NotImplemented
|
||||
...
|
||||
>>> class Grade(OrderedEnum):
|
||||
... __ordered__ = 'A B C D F'
|
||||
... A = 5
|
||||
... B = 4
|
||||
... C = 3
|
||||
... D = 2
|
||||
... F = 1
|
||||
...
|
||||
>>> Grade.C < Grade.A
|
||||
True
|
||||
|
||||
|
||||
Planet
|
||||
^^^^^^
|
||||
|
||||
If ``__new__`` or ``__init__`` is defined the value of the enum member
|
||||
will be passed to those methods::
|
||||
|
||||
>>> class Planet(Enum):
|
||||
... MERCURY = (3.303e+23, 2.4397e6)
|
||||
... VENUS = (4.869e+24, 6.0518e6)
|
||||
... EARTH = (5.976e+24, 6.37814e6)
|
||||
... MARS = (6.421e+23, 3.3972e6)
|
||||
... JUPITER = (1.9e+27, 7.1492e7)
|
||||
... SATURN = (5.688e+26, 6.0268e7)
|
||||
... URANUS = (8.686e+25, 2.5559e7)
|
||||
... NEPTUNE = (1.024e+26, 2.4746e7)
|
||||
... def __init__(self, mass, radius):
|
||||
... self.mass = mass # in kilograms
|
||||
... self.radius = radius # in meters
|
||||
... @property
|
||||
... def surface_gravity(self):
|
||||
... # universal gravitational constant (m3 kg-1 s-2)
|
||||
... G = 6.67300E-11
|
||||
... return G * self.mass / (self.radius * self.radius)
|
||||
...
|
||||
>>> Planet.EARTH.value
|
||||
(5.976e+24, 6378140.0)
|
||||
>>> Planet.EARTH.surface_gravity
|
||||
9.802652743337129
|
||||
|
||||
|
||||
How are Enums different?
|
||||
------------------------
|
||||
|
||||
Enums have a custom metaclass that affects many aspects of both derived Enum
|
||||
classes and their instances (members).
|
||||
|
||||
|
||||
Enum Classes
|
||||
^^^^^^^^^^^^
|
||||
|
||||
The ``EnumMeta`` metaclass is responsible for providing the
|
||||
``__contains__``, ``__dir__``, ``__iter__`` and other methods that
|
||||
allow one to do things with an ``Enum`` class that fail on a typical
|
||||
class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is
|
||||
responsible for ensuring that various other methods on the final ``Enum``
|
||||
class are correct (such as ``__new__``, ``__getnewargs__``,
|
||||
``__str__`` and ``__repr__``).
|
||||
|
||||
.. note::
|
||||
|
||||
``__dir__`` is not changed in the Python 2 line as it messes up some
|
||||
of the decorators included in the stdlib.
|
||||
|
||||
|
||||
Enum Members (aka instances)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The most interesting thing about Enum members is that they are singletons.
|
||||
``EnumMeta`` creates them all while it is creating the ``Enum``
|
||||
class itself, and then puts a custom ``__new__`` in place to ensure
|
||||
that no new ones are ever instantiated by returning only the existing
|
||||
member instances.
|
||||
|
||||
|
||||
Finer Points
|
||||
^^^^^^^^^^^^
|
||||
|
||||
``Enum`` members are instances of an ``Enum`` class, and even though they
|
||||
are accessible as `EnumClass.member1.member2`, they should not be
|
||||
accessed directly from the member as that lookup may fail or, worse,
|
||||
return something besides the ``Enum`` member you were looking for
|
||||
(changed in version 1.1.1)::
|
||||
|
||||
>>> class FieldTypes(Enum):
|
||||
... name = 1
|
||||
... value = 2
|
||||
... size = 3
|
||||
...
|
||||
>>> FieldTypes.value.size
|
||||
<FieldTypes.size: 3>
|
||||
>>> FieldTypes.size.value
|
||||
3
|
||||
|
||||
The ``__members__`` attribute is only available on the class.
|
||||
|
||||
In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being
|
||||
the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if
|
||||
``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python
|
||||
2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified
|
||||
as the ``OrderedDict`` type didn't exist yet.
|
||||
|
||||
If you give your ``Enum`` subclass extra methods, like the `Planet`_
|
||||
class above, those methods will show up in a `dir` of the member,
|
||||
but not of the class::
|
||||
|
||||
>>> dir(Planet)
|
||||
['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS',
|
||||
'VENUS', '__class__', '__doc__', '__members__', '__module__']
|
||||
>>> dir(Planet.EARTH)
|
||||
['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value']
|
||||
|
||||
A ``__new__`` method will only be used for the creation of the
|
||||
``Enum`` members -- after that it is replaced. This means if you wish to
|
||||
change how ``Enum`` members are looked up you either have to write a
|
||||
helper function or a ``classmethod``.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,312 @@
|
||||
import codecs
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import shutil
|
||||
import tempfile
|
||||
import traceback
|
||||
|
||||
import appdirs
|
||||
|
||||
from scandir import scandir
|
||||
|
||||
try:
|
||||
from collections.abc import MutableMapping
|
||||
unicode = str
|
||||
except ImportError:
|
||||
# Python 2 imports
|
||||
from collections import MutableMapping
|
||||
FileNotFoundError = IOError
|
||||
|
||||
from .posixemulation import rename
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileCache(MutableMapping):
|
||||
"""A persistent file cache that is dictionary-like and has a write buffer.
|
||||
|
||||
*appname* is passed to `appdirs <https://pypi.python.org/pypi/appdirs/>`_
|
||||
to determine a system-appropriate location for the cache files. The cache
|
||||
directory used is available via :data:`cache_dir`.
|
||||
|
||||
By default, a write buffer is used, so writing to cache files is not done
|
||||
until :meth:`sync` is explicitly called. This behavior can be changed using
|
||||
the optional *flag* argument.
|
||||
|
||||
.. NOTE::
|
||||
Keys and values are always stored as :class:`bytes` objects. If data
|
||||
serialization is enabled, keys are returned as :class:`str` or
|
||||
:class:`unicode` objects.
|
||||
If data serialization is disabled, keys are returned as a
|
||||
:class:`bytes` object.
|
||||
|
||||
:param str appname: The app/script the cache should be associated with.
|
||||
:param str flag: How the cache should be opened. See below for details.
|
||||
:param mode: The Unix mode for the cache files.
|
||||
:param str keyencoding: The encoding the keys use, defaults to 'utf-8'.
|
||||
This is used if *serialize* is ``False``; the keys are treated as
|
||||
:class:`bytes` objects.
|
||||
:param bool serialize: Whether or not to (de)serialize the values. If a
|
||||
cache is used with a :class:`~shelve.Shelf`, set this to ``False``.
|
||||
:param str app_cache_dir: absolute path to root cache directory to be
|
||||
used in place of system-appropriate location determined by appdirs
|
||||
|
||||
The optional *flag* argument can be:
|
||||
|
||||
+---------+-------------------------------------------+
|
||||
| Value | Meaning |
|
||||
+=========+===========================================+
|
||||
| ``'r'`` | Open existing cache for reading only |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'w'`` | Open existing cache for reading and |
|
||||
| | writing |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'c'`` | Open cache for reading and writing, |
|
||||
| | creating it if it doesn't exist (default) |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'n'`` | Always create a new, empty cache, open |
|
||||
| | for reading and writing |
|
||||
+---------+-------------------------------------------+
|
||||
|
||||
If a ``'s'`` is appended to the *flag* argument, the cache will be opened
|
||||
in sync mode. Writing to the cache will happen immediately and will not be
|
||||
buffered.
|
||||
|
||||
If an application needs to use more than one cache, then it should use
|
||||
subcaches. To create a subcache, append a series of one or more names
|
||||
separated by periods to the application name when creating a
|
||||
:class:`FileCache` object (e.g. ``'appname.subcache'`` or
|
||||
``'appname.subcache.subcache'``).
|
||||
Subcaches are a way for an application to use more than one cache without
|
||||
polluting a user's cache directory. All caches -- main caches or subcaches
|
||||
-- are totally independent. The only aspect in which they are linked is
|
||||
that all of an application's caches exist in the same system directory.
|
||||
Because each cache is independent of every other cache, calling
|
||||
:meth:`delete` on an application's main cache will not delete data in
|
||||
its subcaches.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, appname, flag='c', mode=0o666, keyencoding='utf-8',
|
||||
serialize=True, app_cache_dir=None):
|
||||
"""Initialize a :class:`FileCache` object."""
|
||||
if not isinstance(flag, str):
|
||||
raise TypeError("flag must be str not '{}'".format(type(flag)))
|
||||
elif flag[0] not in 'rwcn':
|
||||
raise ValueError("invalid flag: '{}', first flag must be one of "
|
||||
"'r', 'w', 'c' or 'n'".format(flag))
|
||||
elif len(flag) > 1 and flag[1] != 's':
|
||||
raise ValueError("invalid flag: '{}', second flag must be "
|
||||
"'s'".format(flag))
|
||||
|
||||
appname, subcache = self._parse_appname(appname)
|
||||
if 'cache' in subcache:
|
||||
raise ValueError("invalid subcache name: 'cache'.")
|
||||
self._is_subcache = bool(subcache)
|
||||
|
||||
if not app_cache_dir:
|
||||
app_cache_dir = appdirs.user_cache_dir(appname, appname)
|
||||
subcache_dir = os.path.join(app_cache_dir, *subcache)
|
||||
self.cache_dir = os.path.join(subcache_dir, 'cache')
|
||||
exists = os.path.exists(self.cache_dir)
|
||||
|
||||
if len(flag) > 1 and flag[1] == 's':
|
||||
self._sync = True
|
||||
else:
|
||||
self._sync = False
|
||||
self._buffer = {}
|
||||
|
||||
if exists and 'n' in flag:
|
||||
self.clear()
|
||||
self.create()
|
||||
elif not exists and ('c' in flag or 'n' in flag):
|
||||
self.create()
|
||||
elif not exists:
|
||||
raise FileNotFoundError("no such directory: '{}'".format(
|
||||
self.cache_dir))
|
||||
|
||||
self._flag = 'rb' if 'r' in flag else 'wb'
|
||||
self._mode = mode
|
||||
self._keyencoding = keyencoding
|
||||
self._serialize = serialize
|
||||
|
||||
def _parse_appname(self, appname):
|
||||
"""Splits an appname into the appname and subcache components."""
|
||||
components = appname.split('.')
|
||||
return components[0], components[1:]
|
||||
|
||||
def create(self):
|
||||
"""Create the write buffer and cache directory."""
|
||||
if not self._sync and not hasattr(self, '_buffer'):
|
||||
self._buffer = {}
|
||||
if not os.path.exists(self.cache_dir):
|
||||
os.makedirs(self.cache_dir)
|
||||
|
||||
def clear(self):
|
||||
"""Remove all items from the write buffer and cache.
|
||||
|
||||
The write buffer object and cache directory are not deleted.
|
||||
|
||||
"""
|
||||
self.delete()
|
||||
self.create()
|
||||
|
||||
def delete(self):
|
||||
"""Delete the write buffer and cache directory."""
|
||||
if not self._sync:
|
||||
del self._buffer
|
||||
shutil.rmtree(self.cache_dir)
|
||||
|
||||
def close(self):
|
||||
"""Sync the write buffer, then close the cache.
|
||||
|
||||
If a closed :class:`FileCache` object's methods are called, a
|
||||
:exc:`ValueError` will be raised.
|
||||
|
||||
"""
|
||||
self.sync()
|
||||
self.sync = self.create = self.delete = self._closed
|
||||
self._write_to_file = self._read_to_file = self._closed
|
||||
self._key_to_filename = self._filename_to_key = self._closed
|
||||
self.__getitem__ = self.__setitem__ = self.__delitem__ = self._closed
|
||||
self.__iter__ = self.__len__ = self.__contains__ = self._closed
|
||||
|
||||
def sync(self):
|
||||
"""Sync the write buffer with the cache files and clear the buffer.
|
||||
|
||||
If the :class:`FileCache` object was opened with the optional ``'s'``
|
||||
*flag* argument, then calling :meth:`sync` will do nothing.
|
||||
"""
|
||||
if self._sync:
|
||||
return # opened in sync mode, so skip the manual sync
|
||||
self._sync = True
|
||||
for ekey in self._buffer:
|
||||
filename = self._key_to_filename(ekey)
|
||||
try:
|
||||
self._write_to_file(filename, self._buffer[ekey])
|
||||
except:
|
||||
logger.error("Couldn't write content from %r to cache file: %r: %s", ekey, filename,
|
||||
traceback.format_exc())
|
||||
self._buffer.clear()
|
||||
self._sync = False
|
||||
|
||||
def _closed(self, *args, **kwargs):
|
||||
"""Filler method for closed cache methods."""
|
||||
raise ValueError("invalid operation on closed cache")
|
||||
|
||||
def _encode_key(self, key):
|
||||
"""Encode key using *hex_codec* for constructing a cache filename.
|
||||
|
||||
Keys are implicitly converted to :class:`bytes` if passed as
|
||||
:class:`str`.
|
||||
|
||||
"""
|
||||
if isinstance(key, str) or isinstance(key, unicode):
|
||||
key = key.encode(self._keyencoding)
|
||||
elif not isinstance(key, bytes):
|
||||
raise TypeError("key must be bytes or str")
|
||||
return codecs.encode(key, 'hex_codec').decode(self._keyencoding)
|
||||
|
||||
def _decode_key(self, key):
|
||||
"""Decode key using hex_codec to retrieve the original key.
|
||||
|
||||
Keys are returned as :class:`str` if serialization is enabled.
|
||||
Keys are returned as :class:`bytes` if serialization is disabled.
|
||||
|
||||
"""
|
||||
bkey = codecs.decode(key.encode(self._keyencoding), 'hex_codec')
|
||||
return bkey.decode(self._keyencoding) if self._serialize else bkey
|
||||
|
||||
def _dumps(self, value):
|
||||
return value if not self._serialize else pickle.dumps(value)
|
||||
|
||||
def _loads(self, value):
|
||||
return value if not self._serialize else pickle.loads(value)
|
||||
|
||||
def _key_to_filename(self, key):
|
||||
"""Convert an encoded key to an absolute cache filename."""
|
||||
return os.path.join(self.cache_dir, key)
|
||||
|
||||
def _filename_to_key(self, absfilename):
|
||||
"""Convert an absolute cache filename to a key name."""
|
||||
return os.path.split(absfilename)[1]
|
||||
|
||||
def _all_filenames(self):
|
||||
"""Return a list of absolute cache filenames"""
|
||||
try:
|
||||
for entry in scandir(self.cache_dir):
|
||||
if entry.is_file(follow_symlinks=False):
|
||||
yield os.path.join(self.cache_dir, entry.name)
|
||||
except (FileNotFoundError, OSError):
|
||||
raise StopIteration
|
||||
|
||||
def _all_keys(self):
|
||||
"""Return a list of all encoded key names."""
|
||||
file_keys = [self._filename_to_key(fn) for fn in self._all_filenames()]
|
||||
if self._sync:
|
||||
return set(file_keys)
|
||||
else:
|
||||
return set(file_keys + list(self._buffer))
|
||||
|
||||
def _write_to_file(self, filename, bytesvalue):
|
||||
"""Write bytesvalue to filename."""
|
||||
fh, tmp = tempfile.mkstemp()
|
||||
with os.fdopen(fh, self._flag) as f:
|
||||
f.write(self._dumps(bytesvalue))
|
||||
rename(tmp, filename)
|
||||
os.chmod(filename, self._mode)
|
||||
|
||||
def _read_from_file(self, filename):
|
||||
"""Read data from filename."""
|
||||
try:
|
||||
with open(filename, 'rb') as f:
|
||||
return self._loads(f.read())
|
||||
except (IOError, OSError):
|
||||
logger.warning('Error opening file: {}'.format(filename))
|
||||
return None
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
ekey = self._encode_key(key)
|
||||
if not self._sync:
|
||||
self._buffer[ekey] = value
|
||||
else:
|
||||
filename = self._key_to_filename(ekey)
|
||||
self._write_to_file(filename, value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
if not self._sync:
|
||||
try:
|
||||
return self._buffer[ekey]
|
||||
except KeyError:
|
||||
pass
|
||||
filename = self._key_to_filename(ekey)
|
||||
if filename not in self._all_filenames():
|
||||
raise KeyError(key)
|
||||
return self._read_from_file(filename)
|
||||
|
||||
def __delitem__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
filename = self._key_to_filename(ekey)
|
||||
if not self._sync:
|
||||
try:
|
||||
del self._buffer[ekey]
|
||||
except KeyError:
|
||||
if filename not in self._all_filenames():
|
||||
raise KeyError(key)
|
||||
try:
|
||||
os.remove(filename)
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
|
||||
def __iter__(self):
|
||||
for key in self._all_keys():
|
||||
yield self._decode_key(key)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._all_keys())
|
||||
|
||||
def __contains__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
return ekey in self._all_keys()
|
||||
@@ -0,0 +1,113 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
r"""
|
||||
werkzeug.posixemulation
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Provides a POSIX emulation for some features that are relevant to
|
||||
web applications. The main purpose is to simplify support for
|
||||
systems such as Windows NT that are not 100% POSIX compatible.
|
||||
|
||||
Currently this only implements a :func:`rename` function that
|
||||
follows POSIX semantics. Eg: if the target file already exists it
|
||||
will be replaced without asking.
|
||||
|
||||
This module was introduced in 0.6.1 and is not a public interface.
|
||||
It might become one in later versions of Werkzeug.
|
||||
|
||||
:copyright: (c) 2013 by the Werkzeug Team, see AUTHORS for more details.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import errno
|
||||
import time
|
||||
import random
|
||||
import shutil
|
||||
|
||||
|
||||
can_rename_open_file = False
|
||||
if os.name == 'nt': # pragma: no cover
|
||||
_rename = lambda src, dst: False
|
||||
_rename_atomic = lambda src, dst: False
|
||||
if sys.version_info >= (3, 0):
|
||||
unicode = str
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
|
||||
_MOVEFILE_REPLACE_EXISTING = 0x1
|
||||
_MOVEFILE_WRITE_THROUGH = 0x8
|
||||
_MoveFileEx = ctypes.windll.kernel32.MoveFileExW
|
||||
|
||||
def _rename(src, dst):
|
||||
if not isinstance(src, unicode):
|
||||
src = unicode(src, sys.getfilesystemencoding())
|
||||
if not isinstance(dst, unicode):
|
||||
dst = unicode(dst, sys.getfilesystemencoding())
|
||||
if _rename_atomic(src, dst):
|
||||
return True
|
||||
retry = 0
|
||||
rv = False
|
||||
while not rv and retry < 100:
|
||||
rv = _MoveFileEx(src, dst, _MOVEFILE_REPLACE_EXISTING |
|
||||
_MOVEFILE_WRITE_THROUGH)
|
||||
if not rv:
|
||||
time.sleep(0.001)
|
||||
retry += 1
|
||||
return rv
|
||||
|
||||
# new in Vista and Windows Server 2008
|
||||
_CreateTransaction = ctypes.windll.ktmw32.CreateTransaction
|
||||
_CommitTransaction = ctypes.windll.ktmw32.CommitTransaction
|
||||
_MoveFileTransacted = ctypes.windll.kernel32.MoveFileTransactedW
|
||||
_CloseHandle = ctypes.windll.kernel32.CloseHandle
|
||||
can_rename_open_file = True
|
||||
|
||||
def _rename_atomic(src, dst):
|
||||
ta = _CreateTransaction(None, 0, 0, 0, 0, 1000, 'Werkzeug rename')
|
||||
if ta == -1:
|
||||
return False
|
||||
try:
|
||||
retry = 0
|
||||
rv = False
|
||||
while not rv and retry < 100:
|
||||
rv = _MoveFileTransacted(src, dst, None, None,
|
||||
_MOVEFILE_REPLACE_EXISTING |
|
||||
_MOVEFILE_WRITE_THROUGH, ta)
|
||||
if rv:
|
||||
rv = _CommitTransaction(ta)
|
||||
break
|
||||
else:
|
||||
time.sleep(0.001)
|
||||
retry += 1
|
||||
return rv
|
||||
finally:
|
||||
_CloseHandle(ta)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def rename(src, dst):
|
||||
# Try atomic or pseudo-atomic rename
|
||||
if _rename(src, dst):
|
||||
return
|
||||
# Fall back to "move away and replace"
|
||||
try:
|
||||
os.rename(src, dst)
|
||||
except OSError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
old = "%s-%08x" % (dst, random.randint(0, sys.maxint))
|
||||
os.rename(dst, old)
|
||||
os.rename(src, dst)
|
||||
try:
|
||||
os.unlink(old)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
"""
|
||||
If dst on current filesystem then use
|
||||
atomic rename. Otherwise, fall back to a
|
||||
non-atomic copy and remove.
|
||||
"""
|
||||
rename = shutil.move
|
||||
can_rename_open_file = True
|
||||
@@ -0,0 +1,411 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
ftfy: fixes text for you
|
||||
|
||||
This is a module for making text less broken. See the `fix_text` function
|
||||
for more information.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import unicodedata
|
||||
import ftfy.bad_codecs
|
||||
from ftfy import fixes
|
||||
from ftfy.formatting import display_ljust
|
||||
from ftfy.compatibility import is_printable
|
||||
|
||||
__version__ = '4.4.3'
|
||||
|
||||
|
||||
# See the docstring for ftfy.bad_codecs to see what we're doing here.
|
||||
ftfy.bad_codecs.ok()
|
||||
|
||||
|
||||
def fix_text(text,
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
fix_latin_ligatures=True,
|
||||
fix_character_width=True,
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
fix_surrogates=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True,
|
||||
normalization='NFC',
|
||||
max_decode_length=10**6):
|
||||
r"""
|
||||
Given Unicode text as input, fix inconsistencies and glitches in it,
|
||||
such as mojibake.
|
||||
|
||||
Let's start with some examples:
|
||||
|
||||
>>> print(fix_text('ünicode'))
|
||||
ünicode
|
||||
|
||||
>>> print(fix_text('Broken text… it’s flubberific!',
|
||||
... normalization='NFKC'))
|
||||
Broken text... it's flubberific!
|
||||
|
||||
>>> print(fix_text('HTML entities <3'))
|
||||
HTML entities <3
|
||||
|
||||
>>> print(fix_text('<em>HTML entities <3</em>'))
|
||||
<em>HTML entities <3</em>
|
||||
|
||||
>>> print(fix_text("¯\\_(ã\x83\x84)_/¯"))
|
||||
¯\_(ツ)_/¯
|
||||
|
||||
>>> # This example string starts with a byte-order mark, even if
|
||||
>>> # you can't see it on the Web.
|
||||
>>> print(fix_text('\ufeffParty like\nit’s 1999!'))
|
||||
Party like
|
||||
it's 1999!
|
||||
|
||||
>>> print(fix_text('LOUD NOISES'))
|
||||
LOUD NOISES
|
||||
|
||||
>>> len(fix_text('fi' * 100000))
|
||||
200000
|
||||
|
||||
>>> len(fix_text(''))
|
||||
0
|
||||
|
||||
Based on the options you provide, ftfy applies these steps in order:
|
||||
|
||||
- If `remove_terminal_escapes` is True, remove sequences of bytes that are
|
||||
instructions for Unix terminals, such as the codes that make text appear
|
||||
in different colors.
|
||||
|
||||
- If `fix_encoding` is True, look for common mistakes that come from
|
||||
encoding or decoding Unicode text incorrectly, and fix them if they are
|
||||
reasonably fixable. See `fixes.fix_encoding` for details.
|
||||
|
||||
- If `fix_entities` is True, replace HTML entities with their equivalent
|
||||
characters. If it's "auto" (the default), then consider replacing HTML
|
||||
entities, but don't do so in text where you have seen a pair of actual
|
||||
angle brackets (that's probably actually HTML and you shouldn't mess
|
||||
with the entities).
|
||||
|
||||
- If `uncurl_quotes` is True, replace various curly quotation marks with
|
||||
plain-ASCII straight quotes.
|
||||
|
||||
- If `fix_latin_ligatures` is True, then ligatures made of Latin letters,
|
||||
such as `fi`, will be separated into individual letters. These ligatures
|
||||
are usually not meaningful outside of font rendering, and often represent
|
||||
copy-and-paste errors.
|
||||
|
||||
- If `fix_character_width` is True, half-width and full-width characters
|
||||
will be replaced by their standard-width form.
|
||||
|
||||
- If `fix_line_breaks` is true, convert all line breaks to Unix style
|
||||
(CRLF and CR line breaks become LF line breaks).
|
||||
|
||||
- If `fix_surrogates` is true, ensure that there are no UTF-16 surrogates
|
||||
in the resulting string, by converting them to the correct characters
|
||||
when they're appropriately paired, or replacing them with \ufffd
|
||||
otherwise.
|
||||
|
||||
- If `remove_control_chars` is true, remove control characters that
|
||||
are not suitable for use in text. This includes most of the ASCII control
|
||||
characters, plus some Unicode controls such as the byte order mark
|
||||
(U+FEFF). Useful control characters, such as Tab, Line Feed, and
|
||||
bidirectional marks, are left as they are.
|
||||
|
||||
- If `remove_bom` is True, remove the Byte-Order Mark at the start of the
|
||||
string if it exists. (This is largely redundant, because it's a special
|
||||
case of `remove_control_characters`. This option will become deprecated
|
||||
in a later version.)
|
||||
|
||||
- If `normalization` is not None, apply the specified form of Unicode
|
||||
normalization, which can be one of 'NFC', 'NFKC', 'NFD', and 'NFKD'.
|
||||
|
||||
- The default normalization, NFC, combines characters and diacritics that
|
||||
are written using separate code points, such as converting "e" plus an
|
||||
acute accent modifier into "é", or converting "ka" (か) plus a dakuten
|
||||
into the single character "ga" (が). Unicode can be converted to NFC
|
||||
form without any change in its meaning.
|
||||
|
||||
- If you ask for NFKC normalization, it will apply additional
|
||||
normalizations that can change the meanings of characters. For example,
|
||||
ellipsis characters will be replaced with three periods, all ligatures
|
||||
will be replaced with the individual characters that make them up,
|
||||
and characters that differ in font style will be converted to the same
|
||||
character.
|
||||
|
||||
- If anything was changed, repeat all the steps, so that the function is
|
||||
idempotent. "&amp;" will become "&", for example, not "&".
|
||||
|
||||
`fix_text` will work one line at a time, with the possibility that some
|
||||
lines are in different encodings, allowing it to fix text that has been
|
||||
concatenated together from different sources.
|
||||
|
||||
When it encounters lines longer than `max_decode_length` (1 million
|
||||
codepoints by default), it will not run the `fix_encoding` step, to avoid
|
||||
unbounded slowdowns.
|
||||
|
||||
If you're certain that any decoding errors in the text would have affected
|
||||
the entire text in the same way, and you don't mind operations that scale
|
||||
with the length of the text, you can use `fix_text_segment` directly to
|
||||
fix the whole string in one batch.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
|
||||
|
||||
out = []
|
||||
pos = 0
|
||||
while pos < len(text):
|
||||
textbreak = text.find('\n', pos) + 1
|
||||
fix_encoding_this_time = fix_encoding
|
||||
if textbreak == 0:
|
||||
textbreak = len(text)
|
||||
if (textbreak - pos) > max_decode_length:
|
||||
fix_encoding_this_time = False
|
||||
|
||||
substring = text[pos:textbreak]
|
||||
|
||||
if fix_entities == 'auto' and '<' in substring and '>' in substring:
|
||||
# we see angle brackets together; this could be HTML
|
||||
fix_entities = False
|
||||
|
||||
out.append(
|
||||
fix_text_segment(
|
||||
substring,
|
||||
fix_entities=fix_entities,
|
||||
remove_terminal_escapes=remove_terminal_escapes,
|
||||
fix_encoding=fix_encoding_this_time,
|
||||
uncurl_quotes=uncurl_quotes,
|
||||
fix_latin_ligatures=fix_latin_ligatures,
|
||||
fix_character_width=fix_character_width,
|
||||
fix_line_breaks=fix_line_breaks,
|
||||
fix_surrogates=fix_surrogates,
|
||||
remove_control_chars=remove_control_chars,
|
||||
remove_bom=remove_bom,
|
||||
normalization=normalization
|
||||
)
|
||||
)
|
||||
pos = textbreak
|
||||
|
||||
return ''.join(out)
|
||||
|
||||
# Some alternate names for the main functions
|
||||
ftfy = fix_text
|
||||
fix_encoding = fixes.fix_encoding
|
||||
fix_text_encoding = fixes.fix_text_encoding # deprecated
|
||||
|
||||
|
||||
def fix_file(input_file,
|
||||
encoding=None,
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
fix_latin_ligatures=True,
|
||||
fix_character_width=True,
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
fix_surrogates=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True,
|
||||
normalization='NFC'):
|
||||
"""
|
||||
Fix text that is found in a file.
|
||||
|
||||
If the file is being read as Unicode text, use that. If it's being read as
|
||||
bytes, then we hope an encoding was supplied. If not, unfortunately, we
|
||||
have to guess what encoding it is. We'll try a few common encodings, but we
|
||||
make no promises. See the `guess_bytes` function for how this is done.
|
||||
|
||||
The output is a stream of fixed lines of text.
|
||||
"""
|
||||
entities = fix_entities
|
||||
for line in input_file:
|
||||
if isinstance(line, bytes):
|
||||
if encoding is None:
|
||||
line, encoding = guess_bytes(line)
|
||||
else:
|
||||
line = line.decode(encoding)
|
||||
if fix_entities == 'auto' and '<' in line and '>' in line:
|
||||
entities = False
|
||||
yield fix_text_segment(
|
||||
line,
|
||||
fix_entities=entities,
|
||||
remove_terminal_escapes=remove_terminal_escapes,
|
||||
fix_encoding=fix_encoding,
|
||||
fix_latin_ligatures=fix_latin_ligatures,
|
||||
fix_character_width=fix_character_width,
|
||||
uncurl_quotes=uncurl_quotes,
|
||||
fix_line_breaks=fix_line_breaks,
|
||||
fix_surrogates=fix_surrogates,
|
||||
remove_control_chars=remove_control_chars,
|
||||
remove_bom=remove_bom,
|
||||
normalization=normalization
|
||||
)
|
||||
|
||||
|
||||
def fix_text_segment(text,
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
fix_latin_ligatures=True,
|
||||
fix_character_width=True,
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
fix_surrogates=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True,
|
||||
normalization='NFC'):
|
||||
"""
|
||||
Apply fixes to text in a single chunk. This could be a line of text
|
||||
within a larger run of `fix_text`, or it could be a larger amount
|
||||
of text that you are certain is in a consistent encoding.
|
||||
|
||||
See `fix_text` for a description of the parameters.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
|
||||
|
||||
if fix_entities == 'auto' and '<' in text and '>' in text:
|
||||
fix_entities = False
|
||||
while True:
|
||||
origtext = text
|
||||
if remove_terminal_escapes:
|
||||
text = fixes.remove_terminal_escapes(text)
|
||||
if fix_encoding:
|
||||
text = fixes.fix_encoding(text)
|
||||
if fix_entities:
|
||||
text = fixes.unescape_html(text)
|
||||
if fix_latin_ligatures:
|
||||
text = fixes.fix_latin_ligatures(text)
|
||||
if fix_character_width:
|
||||
text = fixes.fix_character_width(text)
|
||||
if uncurl_quotes:
|
||||
text = fixes.uncurl_quotes(text)
|
||||
if fix_line_breaks:
|
||||
text = fixes.fix_line_breaks(text)
|
||||
if fix_surrogates:
|
||||
text = fixes.fix_surrogates(text)
|
||||
if remove_control_chars:
|
||||
text = fixes.remove_control_chars(text)
|
||||
if remove_bom and not remove_control_chars:
|
||||
# Skip this step if we've already done `remove_control_chars`,
|
||||
# because it would be redundant.
|
||||
text = fixes.remove_bom(text)
|
||||
if normalization is not None:
|
||||
text = unicodedata.normalize(normalization, text)
|
||||
if text == origtext:
|
||||
return text
|
||||
|
||||
|
||||
def guess_bytes(bstring):
|
||||
"""
|
||||
NOTE: Using `guess_bytes` is not the recommended way of using ftfy. ftfy
|
||||
is not designed to be an encoding detector.
|
||||
|
||||
In the unfortunate situation that you have some bytes in an unknown
|
||||
encoding, ftfy can guess a reasonable strategy for decoding them, by trying
|
||||
a few common encodings that can be distinguished from each other.
|
||||
|
||||
Unlike the rest of ftfy, this may not be accurate, and it may *create*
|
||||
Unicode problems instead of solving them!
|
||||
|
||||
It doesn't try East Asian encodings at all, and if you have East Asian text
|
||||
that you don't know how to decode, you are somewhat out of luck. East
|
||||
Asian encodings require some serious statistics to distinguish from each
|
||||
other, so we can't support them without decreasing the accuracy of ftfy.
|
||||
|
||||
If you don't know which encoding you have at all, I recommend
|
||||
trying the 'chardet' module, and being appropriately skeptical about its
|
||||
results.
|
||||
|
||||
The encodings we try here are:
|
||||
|
||||
- UTF-16 with a byte order mark, because a UTF-16 byte order mark looks
|
||||
like nothing else
|
||||
- UTF-8, because it's the global standard, which has been used by a
|
||||
majority of the Web since 2008
|
||||
- "utf-8-variants", because it's what people actually implement when they
|
||||
think they're doing UTF-8
|
||||
- MacRoman, because Microsoft Office thinks it's still a thing, and it
|
||||
can be distinguished by its line breaks. (If there are no line breaks in
|
||||
the string, though, you're out of luck.)
|
||||
- "sloppy-windows-1252", the Latin-1-like encoding that is the most common
|
||||
single-byte encoding
|
||||
"""
|
||||
if type(bstring) == type(''):
|
||||
raise UnicodeError(
|
||||
"This string was already decoded as Unicode. You should pass "
|
||||
"bytes to guess_bytes, not Unicode."
|
||||
)
|
||||
|
||||
if bstring.startswith(b'\xfe\xff') or bstring.startswith(b'\xff\xfe'):
|
||||
return bstring.decode('utf-16'), 'utf-16'
|
||||
|
||||
byteset = set(bytes(bstring))
|
||||
byte_ed, byte_c0, byte_CR, byte_LF = b'\xed\xc0\r\n'
|
||||
|
||||
try:
|
||||
if byte_ed in byteset or byte_c0 in byteset:
|
||||
# Byte 0xed can be used to encode a range of codepoints that
|
||||
# are UTF-16 surrogates. UTF-8 does not use UTF-16 surrogates,
|
||||
# so when we see 0xed, it's very likely we're being asked to
|
||||
# decode CESU-8, the variant that encodes UTF-16 surrogates
|
||||
# instead of the original characters themselves.
|
||||
#
|
||||
# This will occasionally trigger on standard UTF-8, as there
|
||||
# are some Korean characters that also use byte 0xed, but that's
|
||||
# not harmful.
|
||||
#
|
||||
# Byte 0xc0 is impossible because, numerically, it would only
|
||||
# encode characters lower than U+0040. Those already have
|
||||
# single-byte representations, and UTF-8 requires using the
|
||||
# shortest possible representation. However, Java hides the null
|
||||
# codepoint, U+0000, in a non-standard longer representation -- it
|
||||
# encodes it as 0xc0 0x80 instead of 0x00, guaranteeing that 0x00
|
||||
# will never appear in the encoded bytes.
|
||||
#
|
||||
# The 'utf-8-variants' decoder can handle both of these cases, as
|
||||
# well as standard UTF-8, at the cost of a bit of speed.
|
||||
return bstring.decode('utf-8-variants'), 'utf-8-variants'
|
||||
else:
|
||||
return bstring.decode('utf-8'), 'utf-8'
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
if byte_CR in bstring and byte_LF not in bstring:
|
||||
return bstring.decode('macroman'), 'macroman'
|
||||
else:
|
||||
return bstring.decode('sloppy-windows-1252'), 'sloppy-windows-1252'
|
||||
|
||||
|
||||
def explain_unicode(text):
|
||||
"""
|
||||
A utility method that's useful for debugging mysterious Unicode.
|
||||
|
||||
It breaks down a string, showing you for each codepoint its number in
|
||||
hexadecimal, its glyph, its category in the Unicode standard, and its name
|
||||
in the Unicode standard.
|
||||
|
||||
>>> explain_unicode('(╯°□°)╯︵ ┻━┻')
|
||||
U+0028 ( [Ps] LEFT PARENTHESIS
|
||||
U+256F ╯ [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
|
||||
U+00B0 ° [So] DEGREE SIGN
|
||||
U+25A1 □ [So] WHITE SQUARE
|
||||
U+00B0 ° [So] DEGREE SIGN
|
||||
U+0029 ) [Pe] RIGHT PARENTHESIS
|
||||
U+256F ╯ [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
|
||||
U+FE35 ︵ [Ps] PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
|
||||
U+0020 [Zs] SPACE
|
||||
U+253B ┻ [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
|
||||
U+2501 ━ [So] BOX DRAWINGS HEAVY HORIZONTAL
|
||||
U+253B ┻ [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
|
||||
"""
|
||||
for char in text:
|
||||
if is_printable(char):
|
||||
display = char
|
||||
else:
|
||||
display = char.encode('unicode-escape').decode('ascii')
|
||||
print('U+{code:04X} {display} [{category}] {name}'.format(
|
||||
display=display_ljust(display, 7),
|
||||
code=ord(char),
|
||||
category=unicodedata.category(char),
|
||||
name=unicodedata.name(char, '<unknown>')
|
||||
))
|
||||
@@ -0,0 +1,94 @@
|
||||
# coding: utf-8
|
||||
r"""
|
||||
Give Python the ability to decode some common, flawed encodings.
|
||||
|
||||
Python does not want you to be sloppy with your text. Its encoders and decoders
|
||||
("codecs") follow the relevant standards whenever possible, which means that
|
||||
when you get text that *doesn't* follow those standards, you'll probably fail
|
||||
to decode it. Or you might succeed at decoding it for implementation-specific
|
||||
reasons, which is perhaps worse.
|
||||
|
||||
There are some encodings out there that Python wishes didn't exist, which are
|
||||
widely used outside of Python:
|
||||
|
||||
- "utf-8-variants", a family of not-quite-UTF-8 encodings, including the
|
||||
ever-popular CESU-8 and "Java modified UTF-8".
|
||||
- "Sloppy" versions of character map encodings, where bytes that don't map to
|
||||
anything will instead map to the Unicode character with the same number.
|
||||
|
||||
Simply importing this module, or in fact any part of the `ftfy` package, will
|
||||
make these new "bad codecs" available to Python through the standard Codecs
|
||||
API. You never have to actually call any functions inside `ftfy.bad_codecs`.
|
||||
|
||||
However, if you want to call something because your code checker insists on it,
|
||||
you can call ``ftfy.bad_codecs.ok()``.
|
||||
|
||||
A quick example of decoding text that's encoded in CESU-8:
|
||||
|
||||
>>> import ftfy.bad_codecs
|
||||
>>> print(b'\xed\xa0\xbd\xed\xb8\x8d'.decode('utf-8-variants'))
|
||||
😍
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
from encodings import normalize_encoding
|
||||
import codecs
|
||||
|
||||
_CACHE = {}
|
||||
|
||||
# Define some aliases for 'utf-8-variants'. All hyphens get turned into
|
||||
# underscores, because of `normalize_encoding`.
|
||||
UTF8_VAR_NAMES = (
|
||||
'utf_8_variants', 'utf8_variants',
|
||||
'utf_8_variant', 'utf8_variant',
|
||||
'utf_8_var', 'utf8_var',
|
||||
'cesu_8', 'cesu8',
|
||||
'java_utf_8', 'java_utf8'
|
||||
)
|
||||
|
||||
|
||||
def search_function(encoding):
|
||||
"""
|
||||
Register our "bad codecs" with Python's codecs API. This involves adding
|
||||
a search function that takes in an encoding name, and returns a codec
|
||||
for that encoding if it knows one, or None if it doesn't.
|
||||
|
||||
The encodings this will match are:
|
||||
|
||||
- Encodings of the form 'sloppy-windows-NNNN' or 'sloppy-iso-8859-N',
|
||||
where the non-sloppy version is an encoding that leaves some bytes
|
||||
unmapped to characters.
|
||||
- The 'utf-8-variants' encoding, which has the several aliases seen
|
||||
above.
|
||||
"""
|
||||
if encoding in _CACHE:
|
||||
return _CACHE[encoding]
|
||||
|
||||
norm_encoding = normalize_encoding(encoding)
|
||||
codec = None
|
||||
if norm_encoding in UTF8_VAR_NAMES:
|
||||
from ftfy.bad_codecs.utf8_variants import CODEC_INFO
|
||||
codec = CODEC_INFO
|
||||
elif norm_encoding.startswith('sloppy_'):
|
||||
from ftfy.bad_codecs.sloppy import CODECS
|
||||
codec = CODECS.get(norm_encoding)
|
||||
|
||||
if codec is not None:
|
||||
_CACHE[encoding] = codec
|
||||
|
||||
return codec
|
||||
|
||||
|
||||
def ok():
|
||||
"""
|
||||
A feel-good function that gives you something to call after importing
|
||||
this package.
|
||||
|
||||
Why is this here? Pyflakes. Pyflakes gets upset when you import a module
|
||||
and appear not to use it. It doesn't know that you're using it when
|
||||
you use the ``unicode.encode`` and ``bytes.decode`` methods with certain
|
||||
encodings.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
codecs.register(search_function)
|
||||
@@ -0,0 +1,164 @@
|
||||
# coding: utf-8
|
||||
r"""
|
||||
Decodes single-byte encodings, filling their "holes" in the same messy way that
|
||||
everyone else does.
|
||||
|
||||
A single-byte encoding maps each byte to a Unicode character, except that some
|
||||
bytes are left unmapped. In the commonly-used Windows-1252 encoding, for
|
||||
example, bytes 0x81 and 0x8D, among others, have no meaning.
|
||||
|
||||
Python, wanting to preserve some sense of decorum, will handle these bytes
|
||||
as errors. But Windows knows that 0x81 and 0x8D are possible bytes and they're
|
||||
different from each other. It just hasn't defined what they are in terms of
|
||||
Unicode.
|
||||
|
||||
Software that has to interoperate with Windows-1252 and Unicode -- such as all
|
||||
the common Web browsers -- will pick some Unicode characters for them to map
|
||||
to, and the characters they pick are the Unicode characters with the same
|
||||
numbers: U+0081 and U+008D. This is the same as what Latin-1 does, and the
|
||||
resulting characters tend to fall into a range of Unicode that's set aside for
|
||||
obselete Latin-1 control characters anyway.
|
||||
|
||||
These sloppy codecs let Python do the same thing, thus interoperating with
|
||||
other software that works this way. It defines a sloppy version of many
|
||||
single-byte encodings with holes. (There is no need for a sloppy version of
|
||||
an encoding without holes: for example, there is no such thing as
|
||||
sloppy-iso-8859-2 or sloppy-macroman.)
|
||||
|
||||
The following encodings will become defined:
|
||||
|
||||
- sloppy-windows-1250 (Central European, sort of based on ISO-8859-2)
|
||||
- sloppy-windows-1251 (Cyrillic)
|
||||
- sloppy-windows-1252 (Western European, based on Latin-1)
|
||||
- sloppy-windows-1253 (Greek, sort of based on ISO-8859-7)
|
||||
- sloppy-windows-1254 (Turkish, based on ISO-8859-9)
|
||||
- sloppy-windows-1255 (Hebrew, based on ISO-8859-8)
|
||||
- sloppy-windows-1256 (Arabic)
|
||||
- sloppy-windows-1257 (Baltic, based on ISO-8859-13)
|
||||
- sloppy-windows-1258 (Vietnamese)
|
||||
- sloppy-cp874 (Thai, based on ISO-8859-11)
|
||||
- sloppy-iso-8859-3 (Maltese and Esperanto, I guess)
|
||||
- sloppy-iso-8859-6 (different Arabic)
|
||||
- sloppy-iso-8859-7 (Greek)
|
||||
- sloppy-iso-8859-8 (Hebrew)
|
||||
- sloppy-iso-8859-11 (Thai)
|
||||
|
||||
Aliases such as "sloppy-cp1252" for "sloppy-windows-1252" will also be
|
||||
defined.
|
||||
|
||||
Only sloppy-windows-1251 and sloppy-windows-1252 are used by the rest of ftfy;
|
||||
the rest are rather uncommon.
|
||||
|
||||
Here are some examples, using `ftfy.explain_unicode` to illustrate how
|
||||
sloppy-windows-1252 merges Windows-1252 with Latin-1:
|
||||
|
||||
>>> from ftfy import explain_unicode
|
||||
>>> some_bytes = b'\x80\x81\x82'
|
||||
>>> explain_unicode(some_bytes.decode('latin-1'))
|
||||
U+0080 \x80 [Cc] <unknown>
|
||||
U+0081 \x81 [Cc] <unknown>
|
||||
U+0082 \x82 [Cc] <unknown>
|
||||
|
||||
>>> explain_unicode(some_bytes.decode('windows-1252', 'replace'))
|
||||
U+20AC € [Sc] EURO SIGN
|
||||
U+FFFD � [So] REPLACEMENT CHARACTER
|
||||
U+201A ‚ [Ps] SINGLE LOW-9 QUOTATION MARK
|
||||
|
||||
>>> explain_unicode(some_bytes.decode('sloppy-windows-1252'))
|
||||
U+20AC € [Sc] EURO SIGN
|
||||
U+0081 \x81 [Cc] <unknown>
|
||||
U+201A ‚ [Ps] SINGLE LOW-9 QUOTATION MARK
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import codecs
|
||||
from encodings import normalize_encoding
|
||||
import sys
|
||||
|
||||
REPLACEMENT_CHAR = '\ufffd'
|
||||
PY26 = sys.version_info[:2] == (2, 6)
|
||||
|
||||
def make_sloppy_codec(encoding):
|
||||
"""
|
||||
Take a codec name, and return a 'sloppy' version of that codec that can
|
||||
encode and decode the unassigned bytes in that encoding.
|
||||
|
||||
Single-byte encodings in the standard library are defined using some
|
||||
boilerplate classes surrounding the functions that do the actual work,
|
||||
`codecs.charmap_decode` and `charmap_encode`. This function, given an
|
||||
encoding name, *defines* those boilerplate classes.
|
||||
"""
|
||||
# Make an array of all 256 possible bytes.
|
||||
all_bytes = bytearray(range(256))
|
||||
|
||||
# Get a list of what they would decode to in Latin-1.
|
||||
sloppy_chars = list(all_bytes.decode('latin-1'))
|
||||
|
||||
# Get a list of what they decode to in the given encoding. Use the
|
||||
# replacement character for unassigned bytes.
|
||||
if PY26:
|
||||
decoded_chars = all_bytes.decode(encoding, 'replace')
|
||||
else:
|
||||
decoded_chars = all_bytes.decode(encoding, errors='replace')
|
||||
|
||||
# Update the sloppy_chars list. Each byte that was successfully decoded
|
||||
# gets its decoded value in the list. The unassigned bytes are left as
|
||||
# they are, which gives their decoding in Latin-1.
|
||||
for i, char in enumerate(decoded_chars):
|
||||
if char != REPLACEMENT_CHAR:
|
||||
sloppy_chars[i] = char
|
||||
|
||||
# For ftfy's own purposes, we're going to allow byte 1A, the "Substitute"
|
||||
# control code, to encode the Unicode replacement character U+FFFD.
|
||||
sloppy_chars[0x1a] = REPLACEMENT_CHAR
|
||||
|
||||
# Create the data structures that tell the charmap methods how to encode
|
||||
# and decode in this sloppy encoding.
|
||||
decoding_table = ''.join(sloppy_chars)
|
||||
encoding_table = codecs.charmap_build(decoding_table)
|
||||
|
||||
# Now produce all the class boilerplate. Look at the Python source for
|
||||
# `encodings.cp1252` for comparison; this is almost exactly the same,
|
||||
# except I made it follow pep8.
|
||||
class Codec(codecs.Codec):
|
||||
def encode(self, input, errors='strict'):
|
||||
return codecs.charmap_encode(input, errors, encoding_table)
|
||||
|
||||
def decode(self, input, errors='strict'):
|
||||
return codecs.charmap_decode(input, errors, decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input, self.errors, encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
return codecs.charmap_decode(input, self.errors, decoding_table)[0]
|
||||
|
||||
class StreamWriter(Codec, codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec, codecs.StreamReader):
|
||||
pass
|
||||
|
||||
return codecs.CodecInfo(
|
||||
name='sloppy-' + encoding,
|
||||
encode=Codec().encode,
|
||||
decode=Codec().decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamreader=StreamReader,
|
||||
streamwriter=StreamWriter,
|
||||
)
|
||||
|
||||
# Define a codec for each incomplete encoding. The resulting CODECS dictionary
|
||||
# can be used by the main module of ftfy.bad_codecs.
|
||||
CODECS = {}
|
||||
INCOMPLETE_ENCODINGS = (
|
||||
['windows-%s' % num for num in range(1250, 1259)] +
|
||||
['iso-8859-%s' % num for num in (3, 6, 7, 8, 11)] +
|
||||
['cp%s' % num for num in range(1250, 1259)] + ['cp874']
|
||||
)
|
||||
|
||||
for _encoding in INCOMPLETE_ENCODINGS:
|
||||
_new_name = normalize_encoding('sloppy-' + _encoding)
|
||||
CODECS[_new_name] = make_sloppy_codec(_encoding)
|
||||
@@ -0,0 +1,282 @@
|
||||
r"""
|
||||
This file defines a codec called "utf-8-variants" (or "utf-8-var"), which can
|
||||
decode text that's been encoded with a popular non-standard version of UTF-8.
|
||||
This includes CESU-8, the accidental encoding made by layering UTF-8 on top of
|
||||
UTF-16, as well as Java's twist on CESU-8 that contains a two-byte encoding for
|
||||
codepoint 0.
|
||||
|
||||
This is particularly relevant in Python 3, which provides no other way of
|
||||
decoding CESU-8 [1]_.
|
||||
|
||||
The easiest way to use the codec is to simply import `ftfy.bad_codecs`:
|
||||
|
||||
>>> import ftfy.bad_codecs
|
||||
>>> result = b'here comes a null! \xc0\x80'.decode('utf-8-var')
|
||||
>>> print(repr(result).lstrip('u'))
|
||||
'here comes a null! \x00'
|
||||
|
||||
The codec does not at all enforce "correct" CESU-8. For example, the Unicode
|
||||
Consortium's not-quite-standard describing CESU-8 requires that there is only
|
||||
one possible encoding of any character, so it does not allow mixing of valid
|
||||
UTF-8 and CESU-8. This codec *does* allow that, just like Python 2's UTF-8
|
||||
decoder does.
|
||||
|
||||
Characters in the Basic Multilingual Plane still have only one encoding. This
|
||||
codec still enforces the rule, within the BMP, that characters must appear in
|
||||
their shortest form. There is one exception: the sequence of bytes `0xc0 0x80`,
|
||||
instead of just `0x00`, may be used to encode the null character `U+0000`, like
|
||||
in Java.
|
||||
|
||||
If you encode with this codec, you get legitimate UTF-8. Decoding with this
|
||||
codec and then re-encoding is not idempotent, although encoding and then
|
||||
decoding is. So this module won't produce CESU-8 for you. Look for that
|
||||
functionality in the sister module, "Breaks Text For You", coming approximately
|
||||
never.
|
||||
|
||||
.. [1] In a pinch, you can decode CESU-8 in Python 2 using the UTF-8 codec:
|
||||
first decode the bytes (incorrectly), then encode them, then decode them
|
||||
again, using UTF-8 as the codec every time.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import codecs
|
||||
from encodings.utf_8 import (IncrementalDecoder as UTF8IncrementalDecoder,
|
||||
IncrementalEncoder as UTF8IncrementalEncoder)
|
||||
from ftfy.compatibility import bytes_to_ints, unichr, PYTHON2
|
||||
|
||||
NAME = 'utf-8-variants'
|
||||
|
||||
# This regular expression matches all possible six-byte CESU-8 sequences,
|
||||
# plus truncations of them at the end of the string. (If any of the
|
||||
# subgroups matches $, then all the subgroups after it also have to match $,
|
||||
# as there are no more characters to match.)
|
||||
CESU8_EXPR = (
|
||||
b'('
|
||||
b'\xed'
|
||||
b'([\xa0-\xaf]|$)'
|
||||
b'([\x80-\xbf]|$)'
|
||||
b'(\xed|$)'
|
||||
b'([\xb0-\xbf]|$)'
|
||||
b'([\x80-\xbf]|$)'
|
||||
b')'
|
||||
)
|
||||
|
||||
CESU8_RE = re.compile(CESU8_EXPR)
|
||||
|
||||
# This expression matches isolated surrogate characters that aren't
|
||||
# CESU-8, which have to be handled carefully on Python 2.
|
||||
SURROGATE_EXPR = (b'(\xed([\xa0-\xbf]|$)([\x80-\xbf]|$))')
|
||||
|
||||
# This expression matches the Java encoding of U+0, including if it's
|
||||
# truncated and we need more bytes.
|
||||
NULL_EXPR = b'(\xc0(\x80|$))'
|
||||
|
||||
# This regex matches cases that we need to decode differently from
|
||||
# standard UTF-8.
|
||||
SPECIAL_BYTES_RE = re.compile(b'|'.join([NULL_EXPR, CESU8_EXPR, SURROGATE_EXPR]))
|
||||
|
||||
|
||||
class IncrementalDecoder(UTF8IncrementalDecoder):
|
||||
"""
|
||||
An incremental decoder that extends Python's built-in UTF-8 decoder.
|
||||
|
||||
This encoder needs to take in bytes, possibly arriving in a stream, and
|
||||
output the correctly decoded text. The general strategy for doing this
|
||||
is to fall back on the real UTF-8 decoder whenever possible, because
|
||||
the real UTF-8 decoder is way optimized, but to call specialized methods
|
||||
we define here for the cases the real encoder isn't expecting.
|
||||
"""
|
||||
def _buffer_decode(self, input, errors, final):
|
||||
"""
|
||||
Decode bytes that may be arriving in a stream, following the Codecs
|
||||
API.
|
||||
|
||||
`input` is the incoming sequence of bytes. `errors` tells us how to
|
||||
handle errors, though we delegate all error-handling cases to the real
|
||||
UTF-8 decoder to ensure correct behavior. `final` indicates whether
|
||||
this is the end of the sequence, in which case we should raise an
|
||||
error given incomplete input.
|
||||
|
||||
Returns as much decoded text as possible, and the number of bytes
|
||||
consumed.
|
||||
"""
|
||||
# decoded_segments are the pieces of text we have decoded so far,
|
||||
# and position is our current position in the byte string. (Bytes
|
||||
# before this position have been consumed, and bytes after it have
|
||||
# yet to be decoded.)
|
||||
decoded_segments = []
|
||||
position = 0
|
||||
while True:
|
||||
# Use _buffer_decode_step to decode a segment of text.
|
||||
decoded, consumed = self._buffer_decode_step(
|
||||
input[position:],
|
||||
errors,
|
||||
final
|
||||
)
|
||||
if consumed == 0:
|
||||
# Either there's nothing left to decode, or we need to wait
|
||||
# for more input. Either way, we're done for now.
|
||||
break
|
||||
|
||||
# Append the decoded text to the list, and update our position.
|
||||
decoded_segments.append(decoded)
|
||||
position += consumed
|
||||
|
||||
if final:
|
||||
# _buffer_decode_step must consume all the bytes when `final` is
|
||||
# true.
|
||||
assert position == len(input)
|
||||
|
||||
return ''.join(decoded_segments), position
|
||||
|
||||
def _buffer_decode_step(self, input, errors, final):
|
||||
"""
|
||||
There are three possibilities for each decoding step:
|
||||
|
||||
- Decode as much real UTF-8 as possible.
|
||||
- Decode a six-byte CESU-8 sequence at the current position.
|
||||
- Decode a Java-style null at the current position.
|
||||
|
||||
This method figures out which step is appropriate, and does it.
|
||||
"""
|
||||
# Get a reference to the superclass method that we'll be using for
|
||||
# most of the real work.
|
||||
sup = UTF8IncrementalDecoder._buffer_decode
|
||||
|
||||
# Find the next byte position that indicates a variant of UTF-8.
|
||||
match = SPECIAL_BYTES_RE.search(input)
|
||||
if match is None:
|
||||
return sup(input, errors, final)
|
||||
|
||||
cutoff = match.start()
|
||||
if cutoff > 0:
|
||||
return sup(input[:cutoff], errors, True)
|
||||
|
||||
# Some byte sequence that we intend to handle specially matches
|
||||
# at the beginning of the input.
|
||||
if input.startswith(b'\xc0'):
|
||||
if len(input) > 1:
|
||||
# Decode the two-byte sequence 0xc0 0x80.
|
||||
return '\u0000', 2
|
||||
else:
|
||||
if final:
|
||||
# We hit the end of the stream. Let the superclass method
|
||||
# handle it.
|
||||
return sup(input, errors, True)
|
||||
else:
|
||||
# Wait to see another byte.
|
||||
return '', 0
|
||||
else:
|
||||
# Decode a possible six-byte sequence starting with 0xed.
|
||||
return self._buffer_decode_surrogates(sup, input, errors, final)
|
||||
|
||||
@staticmethod
|
||||
def _buffer_decode_surrogates(sup, input, errors, final):
|
||||
"""
|
||||
When we have improperly encoded surrogates, we can still see the
|
||||
bits that they were meant to represent.
|
||||
|
||||
The surrogates were meant to encode a 20-bit number, to which we
|
||||
add 0x10000 to get a codepoint. That 20-bit number now appears in
|
||||
this form:
|
||||
|
||||
11101101 1010abcd 10efghij 11101101 1011klmn 10opqrst
|
||||
|
||||
The CESU8_RE above matches byte sequences of this form. Then we need
|
||||
to extract the bits and assemble a codepoint number from them.
|
||||
"""
|
||||
if len(input) < 6:
|
||||
if final:
|
||||
# We found 0xed near the end of the stream, and there aren't
|
||||
# six bytes to decode. Delegate to the superclass method to
|
||||
# handle it as an error.
|
||||
if PYTHON2 and len(input) >= 3:
|
||||
# We can't trust Python 2 to raise an error when it's
|
||||
# asked to decode a surrogate, so let's force the issue.
|
||||
input = mangle_surrogates(input)
|
||||
return sup(input, errors, final)
|
||||
else:
|
||||
# We found a surrogate, the stream isn't over yet, and we don't
|
||||
# know enough of the following bytes to decode anything, so
|
||||
# consume zero bytes and wait.
|
||||
return '', 0
|
||||
else:
|
||||
if CESU8_RE.match(input):
|
||||
# Given this is a CESU-8 sequence, do some math to pull out
|
||||
# the intended 20-bit value, and consume six bytes.
|
||||
bytenums = bytes_to_ints(input[:6])
|
||||
codepoint = (
|
||||
((bytenums[1] & 0x0f) << 16) +
|
||||
((bytenums[2] & 0x3f) << 10) +
|
||||
((bytenums[4] & 0x0f) << 6) +
|
||||
(bytenums[5] & 0x3f) +
|
||||
0x10000
|
||||
)
|
||||
return unichr(codepoint), 6
|
||||
else:
|
||||
# This looked like a CESU-8 sequence, but it wasn't one.
|
||||
# 0xed indicates the start of a three-byte sequence, so give
|
||||
# three bytes to the superclass to decode as usual -- except
|
||||
# for working around the Python 2 discrepancy as before.
|
||||
if PYTHON2:
|
||||
input = mangle_surrogates(input)
|
||||
return sup(input[:3], errors, False)
|
||||
|
||||
|
||||
def mangle_surrogates(bytestring):
|
||||
"""
|
||||
When Python 3 sees the UTF-8 encoding of a surrogate codepoint, it treats
|
||||
it as an error (which it is). In 'replace' mode, it will decode as three
|
||||
replacement characters. But Python 2 will just output the surrogate
|
||||
codepoint.
|
||||
|
||||
To ensure consistency between Python 2 and Python 3, and protect downstream
|
||||
applications from malformed strings, we turn surrogate sequences at the
|
||||
start of the string into the bytes `ff ff ff`, which we're *sure* won't
|
||||
decode, and which turn into three replacement characters in 'replace' mode.
|
||||
|
||||
This function does nothing in Python 3, and it will be deprecated in ftfy
|
||||
5.0.
|
||||
"""
|
||||
if PYTHON2:
|
||||
if bytestring.startswith(b'\xed') and len(bytestring) >= 3:
|
||||
decoded = bytestring[:3].decode('utf-8', 'replace')
|
||||
if '\ud800' <= decoded <= '\udfff':
|
||||
return b'\xff\xff\xff' + mangle_surrogates(bytestring[3:])
|
||||
return bytestring
|
||||
else:
|
||||
# On Python 3, nothing needs to be done.
|
||||
return bytestring
|
||||
|
||||
# The encoder is identical to UTF-8.
|
||||
IncrementalEncoder = UTF8IncrementalEncoder
|
||||
|
||||
|
||||
# Everything below here is boilerplate that matches the modules in the
|
||||
# built-in `encodings` package.
|
||||
def encode(input, errors='strict'):
|
||||
return IncrementalEncoder(errors).encode(input, final=True), len(input)
|
||||
|
||||
|
||||
def decode(input, errors='strict'):
|
||||
return IncrementalDecoder(errors).decode(input, final=True), len(input)
|
||||
|
||||
|
||||
class StreamWriter(codecs.StreamWriter):
|
||||
encode = encode
|
||||
|
||||
|
||||
class StreamReader(codecs.StreamReader):
|
||||
decode = decode
|
||||
|
||||
|
||||
CODEC_INFO = codecs.CodecInfo(
|
||||
name=NAME,
|
||||
encode=encode,
|
||||
decode=decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamreader=StreamReader,
|
||||
streamwriter=StreamWriter,
|
||||
)
|
||||
@@ -0,0 +1,162 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Heuristics to determine whether re-encoding text is actually making it
|
||||
more reasonable.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import unicodedata
|
||||
from ftfy.chardata import chars_to_classes
|
||||
|
||||
# The following regex uses the mapping of character classes to ASCII
|
||||
# characters defined in chardata.py and build_data.py:
|
||||
#
|
||||
# L = Latin capital letter
|
||||
# l = Latin lowercase letter
|
||||
# A = Non-latin capital or title-case letter
|
||||
# a = Non-latin lowercase letter
|
||||
# C = Non-cased letter (Lo)
|
||||
# X = Control character (Cc)
|
||||
# m = Letter modifier (Lm)
|
||||
# M = Mark (Mc, Me, Mn)
|
||||
# N = Miscellaneous numbers (No)
|
||||
# 1 = Math symbol (Sm) or currency symbol (Sc)
|
||||
# 2 = Symbol modifier (Sk)
|
||||
# 3 = Other symbol (So)
|
||||
# S = UTF-16 surrogate
|
||||
# _ = Unassigned character
|
||||
# = Whitespace
|
||||
# o = Other
|
||||
|
||||
|
||||
def _make_weirdness_regex():
|
||||
"""
|
||||
Creates a list of regexes that match 'weird' character sequences.
|
||||
The more matches there are, the weirder the text is.
|
||||
"""
|
||||
groups = []
|
||||
|
||||
# Match lowercase letters that are followed by non-ASCII uppercase letters
|
||||
groups.append('lA')
|
||||
|
||||
# Match diacritical marks, except when they modify a non-cased letter or
|
||||
# another mark.
|
||||
#
|
||||
# You wouldn't put a diacritical mark on a digit or a space, for example.
|
||||
# You might put it on a Latin letter, but in that case there will almost
|
||||
# always be a pre-composed version, and we normalize to pre-composed
|
||||
# versions first. The cases that can't be pre-composed tend to be in
|
||||
# large scripts without case, which are in class C.
|
||||
groups.append('[^CM]M')
|
||||
|
||||
# Match non-Latin characters adjacent to Latin characters.
|
||||
#
|
||||
# This is a simplification from ftfy version 2, which compared all
|
||||
# adjacent scripts. However, the ambiguities we need to resolve come from
|
||||
# encodings designed to represent Latin characters.
|
||||
groups.append('[Ll][AaC]')
|
||||
groups.append('[AaC][Ll]')
|
||||
|
||||
# Match IPA letters next to capital letters.
|
||||
#
|
||||
# IPA uses lowercase letters only. Some accented capital letters next to
|
||||
# punctuation can accidentally decode as IPA letters, and an IPA letter
|
||||
# appearing next to a capital letter is a good sign that this happened.
|
||||
groups.append('[LA]i')
|
||||
groups.append('i[LA]')
|
||||
|
||||
# Match non-combining diacritics. We've already set aside the common ones
|
||||
# like ^ (the CIRCUMFLEX ACCENT, repurposed as a caret, exponent sign,
|
||||
# or happy eye) and assigned them to category 'o'. The remaining ones,
|
||||
# like the diaeresis (¨), are pretty weird to see on their own instead
|
||||
# of combined with a letter.
|
||||
groups.append('2')
|
||||
|
||||
# Match C1 control characters, which are almost always the result of
|
||||
# decoding Latin-1 that was meant to be Windows-1252.
|
||||
groups.append('X')
|
||||
|
||||
# Match private use and unassigned characters.
|
||||
groups.append('P')
|
||||
groups.append('_')
|
||||
|
||||
# Match adjacent characters from any different pair of these categories:
|
||||
# - Modifier marks (M)
|
||||
# - Letter modifiers (m)
|
||||
# - Miscellaneous numbers (N)
|
||||
# - Symbols (1 or 3, because 2 is already weird on its own)
|
||||
|
||||
exclusive_categories = 'MmN13'
|
||||
for cat1 in exclusive_categories:
|
||||
others_range = ''.join(c for c in exclusive_categories if c != cat1)
|
||||
groups.append('{cat1}[{others_range}]'.format(
|
||||
cat1=cat1, others_range=others_range
|
||||
))
|
||||
regex = '|'.join('({0})'.format(group) for group in groups)
|
||||
return re.compile(regex)
|
||||
|
||||
WEIRDNESS_RE = _make_weirdness_regex()
|
||||
|
||||
# These characters appear in mojibake but also appear commonly on their own.
|
||||
# We have a slight preference to leave them alone.
|
||||
COMMON_SYMBOL_RE = re.compile(
|
||||
'['
|
||||
'\N{HORIZONTAL ELLIPSIS}\N{EM DASH}\N{EN DASH}'
|
||||
'\N{LEFT SINGLE QUOTATION MARK}\N{LEFT DOUBLE QUOTATION MARK}'
|
||||
'\N{RIGHT SINGLE QUOTATION MARK}\N{RIGHT DOUBLE QUOTATION MARK}'
|
||||
'\N{INVERTED EXCLAMATION MARK}\N{INVERTED QUESTION MARK}\N{DEGREE SIGN}'
|
||||
'\N{TRADE MARK SIGN}'
|
||||
'\N{REGISTERED SIGN}'
|
||||
'\N{SINGLE LEFT-POINTING ANGLE QUOTATION MARK}'
|
||||
'\N{SINGLE RIGHT-POINTING ANGLE QUOTATION MARK}'
|
||||
'\N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}'
|
||||
'\N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}'
|
||||
'\N{NO-BREAK SPACE}'
|
||||
'\N{ACUTE ACCENT}\N{MULTIPLICATION SIGN}\N{LATIN SMALL LETTER SHARP S}'
|
||||
'\ufeff' # The byte-order mark, whose encoding '' looks common
|
||||
']'
|
||||
)
|
||||
|
||||
def sequence_weirdness(text):
|
||||
"""
|
||||
Determine how often a text has unexpected characters or sequences of
|
||||
characters. This metric is used to disambiguate when text should be
|
||||
re-decoded or left as is.
|
||||
|
||||
We start by normalizing text in NFC form, so that penalties for
|
||||
diacritical marks don't apply to characters that know what to do with
|
||||
them.
|
||||
|
||||
The following things are deemed weird:
|
||||
|
||||
- Lowercase letters followed by non-ASCII uppercase letters
|
||||
- Non-Latin characters next to Latin characters
|
||||
- Un-combined diacritical marks, unless they're stacking on non-alphabetic
|
||||
characters (in languages that do that kind of thing a lot) or other
|
||||
marks
|
||||
- C1 control characters
|
||||
- Adjacent symbols from any different pair of these categories:
|
||||
|
||||
- Modifier marks
|
||||
- Letter modifiers
|
||||
- Non-digit numbers
|
||||
- Symbols (including math and currency)
|
||||
|
||||
The return value is the number of instances of weirdness.
|
||||
"""
|
||||
text2 = unicodedata.normalize('NFC', text)
|
||||
weirdness = len(WEIRDNESS_RE.findall(chars_to_classes(text2)))
|
||||
punct_discount = len(COMMON_SYMBOL_RE.findall(text2))
|
||||
return weirdness * 2 - punct_discount
|
||||
|
||||
|
||||
def text_cost(text):
|
||||
"""
|
||||
An overall cost function for text. Weirder is worse, but all else being
|
||||
equal, shorter strings are better.
|
||||
|
||||
The overall cost is measured as the "weirdness" (see
|
||||
:func:`sequence_weirdness`) plus the length.
|
||||
"""
|
||||
return sequence_weirdness(text) + len(text)
|
||||
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
A script to make the char_classes.dat file.
|
||||
|
||||
This never needs to run in normal usage. It needs to be run if the character
|
||||
classes we care about change, or if a new version of Python supports a new
|
||||
Unicode standard and we want it to affect our string decoding.
|
||||
|
||||
The file that we generate is based on Unicode 9.0, as supported by Python 3.6.
|
||||
You can certainly use it in earlier versions. This simply makes sure that we
|
||||
get consistent results from running ftfy on different versions of Python.
|
||||
|
||||
The file will be written to the current directory.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import unicodedata
|
||||
import sys
|
||||
import zlib
|
||||
if sys.hexversion >= 0x03000000:
|
||||
unichr = chr
|
||||
|
||||
# L = Latin capital letter
|
||||
# l = Latin lowercase letter
|
||||
# A = Non-latin capital or title-case letter
|
||||
# a = Non-latin lowercase letter
|
||||
# C = Non-cased letter (Lo)
|
||||
# X = Control character (Cc)
|
||||
# m = Letter modifier (Lm)
|
||||
# M = Mark (Mc, Me, Mn)
|
||||
# N = Miscellaneous numbers (No)
|
||||
# P = Private use (Co)
|
||||
# 1 = Math symbol (Sm) or currency symbol (Sc)
|
||||
# 2 = Symbol modifier (Sk)
|
||||
# 3 = Other symbol (So)
|
||||
# S = UTF-16 surrogate
|
||||
# _ = Unassigned character
|
||||
# = Whitespace
|
||||
# o = Other
|
||||
|
||||
|
||||
def make_char_data_file(do_it_anyway=False):
|
||||
"""
|
||||
Build the compressed data file 'char_classes.dat' and write it to the
|
||||
current directory.
|
||||
|
||||
If you run this, run it in Python 3.6 or later. It will run in earlier
|
||||
versions, but you won't get the Unicode 9 standard, leading to inconsistent
|
||||
behavior.
|
||||
|
||||
To protect against this, running this in the wrong version of Python will
|
||||
raise an error unless you pass `do_it_anyway=True`.
|
||||
"""
|
||||
if sys.hexversion < 0x03060000 and not do_it_anyway:
|
||||
raise RuntimeError(
|
||||
"This function should be run in Python 3.6 or later."
|
||||
)
|
||||
|
||||
cclasses = [None] * 0x110000
|
||||
for codepoint in range(0x0, 0x110000):
|
||||
char = unichr(codepoint)
|
||||
category = unicodedata.category(char)
|
||||
|
||||
if (0x250 <= codepoint < 0x300) and char != 'ə':
|
||||
# IPA symbols and modifiers.
|
||||
#
|
||||
# This category excludes the schwa (ə), which is used as a normal
|
||||
# Latin letter in some languages.
|
||||
cclasses[codepoint] = 'i'
|
||||
elif category.startswith('L'): # letters
|
||||
if unicodedata.name(char, '').startswith('LATIN'):
|
||||
if category == 'Lu':
|
||||
cclasses[codepoint] = 'L'
|
||||
else:
|
||||
cclasses[codepoint] = 'l'
|
||||
else:
|
||||
if category == 'Lu' or category == 'Lt':
|
||||
cclasses[codepoint] = 'A'
|
||||
elif category == 'Ll':
|
||||
cclasses[codepoint] = 'a'
|
||||
elif category == 'Lo':
|
||||
cclasses[codepoint] = 'C'
|
||||
elif category == 'Lm':
|
||||
cclasses[codepoint] = 'm'
|
||||
else:
|
||||
raise ValueError('got some weird kind of letter')
|
||||
elif 0xfe00 <= codepoint <= 0xfe0f or 0x1f3fb <= codepoint <= 0x1f3ff:
|
||||
# Variation selectors and skin-tone modifiers have the category
|
||||
# of non-spacing marks, but they act like symbols
|
||||
cclasses[codepoint] = '3'
|
||||
elif category.startswith('M'): # marks
|
||||
cclasses[codepoint] = 'M'
|
||||
elif category == 'No':
|
||||
cclasses[codepoint] = 'N'
|
||||
elif category == 'Sm' or category == 'Sc':
|
||||
cclasses[codepoint] = '1'
|
||||
elif category == 'Sk':
|
||||
cclasses[codepoint] = '2'
|
||||
elif category == 'So':
|
||||
cclasses[codepoint] = '3'
|
||||
elif category == 'Cc':
|
||||
cclasses[codepoint] = 'X'
|
||||
elif category == 'Cs':
|
||||
cclasses[codepoint] = 'S'
|
||||
elif category == 'Co':
|
||||
cclasses[codepoint] = 'P'
|
||||
elif category.startswith('Z'):
|
||||
cclasses[codepoint] = ' '
|
||||
elif 0x1f000 <= codepoint <= 0x1ffff:
|
||||
# This range is rapidly having emoji added to it. Assume that
|
||||
# an unassigned codepoint in this range is just a symbol we
|
||||
# don't know yet.
|
||||
cclasses[codepoint] = '3'
|
||||
elif category == 'Cn':
|
||||
cclasses[codepoint] = '_'
|
||||
else:
|
||||
cclasses[codepoint] = 'o'
|
||||
|
||||
# Mark whitespace control characters as whitespace
|
||||
cclasses[9] = cclasses[10] = cclasses[12] = cclasses[13] = ' '
|
||||
|
||||
# Some other exceptions for characters that are more commonly used as
|
||||
# punctuation or decoration than for their ostensible purpose.
|
||||
# For example, tilde is not usually a "math symbol", and the accents
|
||||
# `´ are much more like quotation marks than modifiers.
|
||||
for char in "^~`´˝^`":
|
||||
cclasses[ord(char)] = 'o'
|
||||
|
||||
out = open('char_classes.dat', 'wb')
|
||||
out.write(zlib.compress(''.join(cclasses).encode('ascii')))
|
||||
out.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
make_char_data_file()
|
||||
Binary file not shown.
@@ -0,0 +1,214 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This gives other modules access to the gritty details about characters and the
|
||||
encodings that use them.
|
||||
"""
|
||||
|
||||
import re
|
||||
import zlib
|
||||
import unicodedata
|
||||
import itertools
|
||||
from pkg_resources import resource_string
|
||||
from ftfy.compatibility import unichr
|
||||
|
||||
# These are the encodings we will try to fix in ftfy, in the
|
||||
# order that they should be tried.
|
||||
CHARMAP_ENCODINGS = [
|
||||
u'latin-1',
|
||||
u'sloppy-windows-1252',
|
||||
u'sloppy-windows-1250',
|
||||
u'iso-8859-2',
|
||||
u'sloppy-windows-1251',
|
||||
u'macroman',
|
||||
u'cp437',
|
||||
]
|
||||
|
||||
|
||||
def _build_regexes():
|
||||
"""
|
||||
ENCODING_REGEXES contain reasonably fast ways to detect if we
|
||||
could represent a given string in a given encoding. The simplest one is
|
||||
the u'ascii' detector, which of course just determines if all characters
|
||||
are between U+0000 and U+007F.
|
||||
"""
|
||||
# Define a regex that matches ASCII text.
|
||||
encoding_regexes = {u'ascii': re.compile('^[\x00-\x7f]*$')}
|
||||
|
||||
for encoding in CHARMAP_ENCODINGS:
|
||||
# Make a sequence of characters that bytes \x80 to \xFF decode to
|
||||
# in each encoding, as well as byte \x1A, which is used to represent
|
||||
# the replacement character � in the sloppy-* encodings.
|
||||
latin1table = u''.join(unichr(i) for i in range(128, 256)) + '\x1a'
|
||||
charlist = latin1table.encode(u'latin-1').decode(encoding)
|
||||
|
||||
# The rest of the ASCII bytes -- bytes \x00 to \x19 and \x1B
|
||||
# to \x7F -- will decode as those ASCII characters in any encoding we
|
||||
# support, so we can just include them as ranges. This also lets us
|
||||
# not worry about escaping regex special characters, because all of
|
||||
# them are in the \x1B to \x7F range.
|
||||
regex = u'^[\x00-\x19\x1b-\x7f{0}]*$'.format(charlist)
|
||||
encoding_regexes[encoding] = re.compile(regex)
|
||||
return encoding_regexes
|
||||
ENCODING_REGEXES = _build_regexes()
|
||||
|
||||
|
||||
def _build_utf8_punct_regex():
|
||||
"""
|
||||
Recognize UTF-8 mojibake that's so blatant that we can fix it even when the
|
||||
rest of the string doesn't decode as UTF-8 -- namely, UTF-8 sequences for
|
||||
the u'General Punctuation' characters U+2000 to U+2040, re-encoded in
|
||||
Windows-1252.
|
||||
|
||||
These are recognizable by the distinctiveu'â€u' ('\xe2\x80') sequence they
|
||||
all begin with when decoded as Windows-1252.
|
||||
"""
|
||||
# We're making a regex that has all the literal bytes from 0x80 to 0xbf in
|
||||
# a range. "Couldn't this have just said [\x80-\xbf]?", you might ask.
|
||||
# However, when we decode the regex as Windows-1252, the resulting
|
||||
# characters won't even be remotely contiguous.
|
||||
#
|
||||
# Unrelatedly, the expression that generates these bytes will be so much
|
||||
# prettier when we deprecate Python 2.
|
||||
continuation_char_list = ''.join(
|
||||
unichr(i) for i in range(0x80, 0xc0)
|
||||
).encode(u'latin-1')
|
||||
obvious_utf8 = (u'â€['
|
||||
+ continuation_char_list.decode(u'sloppy-windows-1252')
|
||||
+ u']')
|
||||
return re.compile(obvious_utf8)
|
||||
PARTIAL_UTF8_PUNCT_RE = _build_utf8_punct_regex()
|
||||
|
||||
|
||||
# Recognize UTF-8 sequences that would be valid if it weren't for a b'\xa0'
|
||||
# that some Windows-1252 program converted to a plain space.
|
||||
#
|
||||
# The smaller values are included on a case-by-case basis, because we don't want
|
||||
# to decode likely input sequences to unlikely characters. These are the ones
|
||||
# that *do* form likely characters before 0xa0:
|
||||
#
|
||||
# 0xc2 -> U+A0 NO-BREAK SPACE
|
||||
# 0xc3 -> U+E0 LATIN SMALL LETTER A WITH GRAVE
|
||||
# 0xc5 -> U+160 LATIN CAPITAL LETTER S WITH CARON
|
||||
# 0xce -> U+3A0 GREEK CAPITAL LETTER PI
|
||||
# 0xd0 -> U+420 CYRILLIC CAPITAL LETTER ER
|
||||
#
|
||||
# These still need to come with a cost, so that they only get converted when
|
||||
# there's evidence that it fixes other things. Any of these could represent
|
||||
# characters that legitimately appear surrounded by spaces, particularly U+C5
|
||||
# (Å), which is a word in multiple languages!
|
||||
#
|
||||
# We should consider checking for b'\x85' being converted to ... in the future.
|
||||
# I've seen it once, but the text still wasn't recoverable.
|
||||
|
||||
ALTERED_UTF8_RE = re.compile(b'[\xc2\xc3\xc5\xce\xd0][ ]'
|
||||
b'|[\xe0-\xef][ ][\x80-\xbf]'
|
||||
b'|[\xe0-\xef][\x80-\xbf][ ]'
|
||||
b'|[\xf0-\xf4][ ][\x80-\xbf][\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x80-\xbf][ ][\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x80-\xbf][\x80-\xbf][ ]')
|
||||
|
||||
# This expression matches UTF-8 and CESU-8 sequences where some of the
|
||||
# continuation bytes have been lost. The byte 0x1a (sometimes written as ^Z) is
|
||||
# used within ftfy to represent a byte that produced the replacement character
|
||||
# \ufffd. We don't know which byte it was, but we can at least decode the UTF-8
|
||||
# sequence as \ufffd instead of failing to re-decode it at all.
|
||||
LOSSY_UTF8_RE = re.compile(
|
||||
b'[\xc2-\xdf][\x1a]'
|
||||
b'|\xed[\xa0-\xaf][\x1a]\xed[\xb0-\xbf][\x1a\x80-\xbf]'
|
||||
b'|\xed[\xa0-\xaf][\x1a\x80-\xbf]\xed[\xb0-\xbf][\x1a]'
|
||||
b'|[\xe0-\xef][\x1a][\x1a\x80-\xbf]'
|
||||
b'|[\xe0-\xef][\x1a\x80-\xbf][\x1a]'
|
||||
b'|[\xf0-\xf4][\x1a][\x1a\x80-\xbf][\x1a\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x1a\x80-\xbf][\x1a][\x1a\x80-\xbf]'
|
||||
b'|[\xf0-\xf4][\x1a\x80-\xbf][\x1a\x80-\xbf][\x1a]'
|
||||
b'|\x1a'
|
||||
)
|
||||
|
||||
# These regexes match various Unicode variations on single and double quotes.
|
||||
SINGLE_QUOTE_RE = re.compile(u'[\u2018-\u201b]')
|
||||
DOUBLE_QUOTE_RE = re.compile(u'[\u201c-\u201f]')
|
||||
|
||||
|
||||
def possible_encoding(text, encoding):
|
||||
"""
|
||||
Given text and a single-byte encoding, check whether that text could have
|
||||
been decoded from that single-byte encoding.
|
||||
|
||||
In other words, check whether it can be encoded in that encoding, possibly
|
||||
sloppily.
|
||||
"""
|
||||
return bool(ENCODING_REGEXES[encoding].match(text))
|
||||
|
||||
|
||||
CHAR_CLASS_STRING = zlib.decompress(
|
||||
resource_string(__name__, 'char_classes.dat')
|
||||
).decode(u'ascii')
|
||||
|
||||
def chars_to_classes(string):
|
||||
"""
|
||||
Convert each Unicode character to a letter indicating which of many
|
||||
classes it's in.
|
||||
|
||||
See build_data.py for where this data comes from and what it means.
|
||||
"""
|
||||
return string.translate(CHAR_CLASS_STRING)
|
||||
|
||||
|
||||
def _build_control_char_mapping():
|
||||
"""
|
||||
Build a translate mapping that strips likely-unintended control characters.
|
||||
See :func:`ftfy.fixes.remove_control_chars` for a description of these
|
||||
codepoint ranges and why they should be removed.
|
||||
"""
|
||||
control_chars = {}
|
||||
|
||||
for i in itertools.chain(
|
||||
range(0x00, 0x09), [0x0b],
|
||||
range(0x0e, 0x20), [0x7f],
|
||||
range(0x206a, 0x2070),
|
||||
[0xfeff],
|
||||
range(0xfff9, 0xfffd),
|
||||
range(0x1d173, 0x1d17b),
|
||||
range(0xe0000, 0xe0080)
|
||||
):
|
||||
control_chars[i] = None
|
||||
|
||||
return control_chars
|
||||
CONTROL_CHARS = _build_control_char_mapping()
|
||||
|
||||
|
||||
# A translate mapping that breaks ligatures made of Latin letters. While
|
||||
# ligatures may be important to the representation of other languages, in
|
||||
# Latin letters they tend to represent a copy/paste error.
|
||||
#
|
||||
# Ligatures may also be separated by NFKC normalization, but that is sometimes
|
||||
# more normalization than you want.
|
||||
LIGATURES = {
|
||||
ord(u'IJ'): u'IJ',
|
||||
ord(u'ij'): u'ij',
|
||||
ord(u'ff'): u'ff',
|
||||
ord(u'fi'): u'fi',
|
||||
ord(u'fl'): u'fl',
|
||||
ord(u'ffi'): u'ffi',
|
||||
ord(u'ffl'): u'ffl',
|
||||
ord(u'ſt'): u'ſt',
|
||||
ord(u'st'): u'st'
|
||||
}
|
||||
|
||||
|
||||
def _build_width_map():
|
||||
"""
|
||||
Build a translate mapping that replaces halfwidth and fullwidth forms
|
||||
with their standard-width forms.
|
||||
"""
|
||||
# Though it's not listed as a fullwidth character, we'll want to convert
|
||||
# U+3000 IDEOGRAPHIC SPACE to U+20 SPACE on the same principle, so start
|
||||
# with that in the dictionary.
|
||||
width_map = {0x3000: u' '}
|
||||
for i in range(0xff01, 0xfff0):
|
||||
char = unichr(i)
|
||||
alternate = unicodedata.normalize(u'NFKC', char)
|
||||
if alternate != char:
|
||||
width_map[i] = alternate
|
||||
return width_map
|
||||
WIDTH_MAP = _build_width_map()
|
||||
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
A command-line utility for fixing text found in a file.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import io
|
||||
import codecs
|
||||
from ftfy import fix_file, __version__
|
||||
from ftfy.compatibility import PYTHON2
|
||||
|
||||
|
||||
ENCODE_ERROR_TEXT_UNIX = """ftfy error:
|
||||
Unfortunately, this output stream does not support Unicode.
|
||||
|
||||
Your system locale may be very old or misconfigured. You should use a locale
|
||||
that supports UTF-8. One way to do this is to `export LANG=C.UTF-8`.
|
||||
"""
|
||||
|
||||
ENCODE_ERROR_TEXT_WINDOWS = """ftfy error:
|
||||
Unfortunately, this output stream does not support Unicode.
|
||||
|
||||
You might be trying to output to the Windows Command Prompt (cmd.exe), which
|
||||
does not fully support Unicode for historical reasons. In general, we recommend
|
||||
finding a way to run Python without using cmd.exe.
|
||||
|
||||
You can work around this problem by using the '-o filename' option in ftfy to
|
||||
output to a file instead.
|
||||
"""
|
||||
|
||||
DECODE_ERROR_TEXT = """ftfy error:
|
||||
This input couldn't be decoded as %r. We got the following error:
|
||||
|
||||
%s
|
||||
|
||||
ftfy works best when its input is in a known encoding. You can use `ftfy -g`
|
||||
to guess, if you're desperate. Otherwise, give the encoding name with the
|
||||
`-e` option, such as `ftfy -e latin-1`.
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Run ftfy as a command-line utility.
|
||||
"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ftfy (fixes text for you), version %s" % __version__
|
||||
)
|
||||
parser.add_argument('filename', default='-', nargs='?',
|
||||
help='The file whose Unicode is to be fixed. Defaults '
|
||||
'to -, meaning standard input.')
|
||||
parser.add_argument('-o', '--output', type=str, default='-',
|
||||
help='The file to output to. Defaults to -, meaning '
|
||||
'standard output.')
|
||||
parser.add_argument('-g', '--guess', action='store_true',
|
||||
help="Ask ftfy to guess the encoding of your input. "
|
||||
"This is risky. Overrides -e.")
|
||||
parser.add_argument('-e', '--encoding', type=str, default='utf-8',
|
||||
help='The encoding of the input. Defaults to UTF-8.')
|
||||
parser.add_argument('-n', '--normalization', type=str, default='NFC',
|
||||
help='The normalization of Unicode to apply. '
|
||||
'Defaults to NFC. Can be "none".')
|
||||
parser.add_argument('--preserve-entities', action='store_true',
|
||||
help="Leave HTML entities as they are. The default "
|
||||
"is to decode them, as long as no HTML tags "
|
||||
"have appeared in the file.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
encoding = args.encoding
|
||||
if args.guess:
|
||||
encoding = None
|
||||
|
||||
if args.filename == '-':
|
||||
# Get a standard input stream made of bytes, so we can decode it as
|
||||
# whatever encoding is necessary.
|
||||
if PYTHON2:
|
||||
file = sys.stdin
|
||||
else:
|
||||
file = sys.stdin.buffer
|
||||
else:
|
||||
file = open(args.filename, 'rb')
|
||||
|
||||
if args.output == '-':
|
||||
encode_output = PYTHON2
|
||||
outfile = sys.stdout
|
||||
else:
|
||||
encode_output = False
|
||||
outfile = io.open(args.output, 'w', encoding='utf-8')
|
||||
|
||||
normalization = args.normalization
|
||||
if normalization.lower() == 'none':
|
||||
normalization = None
|
||||
|
||||
if args.preserve_entities:
|
||||
fix_entities = False
|
||||
else:
|
||||
fix_entities = 'auto'
|
||||
|
||||
try:
|
||||
for line in fix_file(file, encoding=encoding,
|
||||
fix_entities=fix_entities,
|
||||
normalization=normalization):
|
||||
if encode_output:
|
||||
outfile.write(line.encode('utf-8'))
|
||||
else:
|
||||
try:
|
||||
outfile.write(line)
|
||||
except UnicodeEncodeError:
|
||||
if sys.platform == 'win32':
|
||||
sys.stderr.write(ENCODE_ERROR_TEXT_WINDOWS)
|
||||
else:
|
||||
sys.stderr.write(ENCODE_ERROR_TEXT_UNIX)
|
||||
sys.exit(1)
|
||||
except UnicodeDecodeError as err:
|
||||
sys.stderr.write(DECODE_ERROR_TEXT % (encoding, err))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,55 @@
|
||||
"""
|
||||
Makes some function names and behavior consistent between Python 2 and
|
||||
Python 3, and also between narrow and wide builds.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import sys
|
||||
import unicodedata
|
||||
|
||||
if sys.hexversion >= 0x03000000:
|
||||
unichr = chr
|
||||
xrange = range
|
||||
PYTHON2 = False
|
||||
else:
|
||||
unichr = unichr
|
||||
xrange = xrange
|
||||
PYTHON2 = True
|
||||
|
||||
PYTHON34_OR_LATER = (sys.hexversion >= 0x03040000)
|
||||
|
||||
|
||||
def _narrow_unichr_workaround(codepoint):
|
||||
"""
|
||||
A replacement for unichr() on narrow builds of Python. This will get
|
||||
us the narrow representation of an astral character, which will be
|
||||
a string of length two, containing two UTF-16 surrogates.
|
||||
"""
|
||||
escaped = b'\\U%08x' % codepoint
|
||||
return escaped.decode('unicode-escape')
|
||||
|
||||
|
||||
if sys.maxunicode < 0x10000:
|
||||
unichr = _narrow_unichr_workaround
|
||||
|
||||
|
||||
def bytes_to_ints(bytestring):
|
||||
"""
|
||||
No matter what version of Python this is, make a sequence of integers from
|
||||
a bytestring. On Python 3, this is easy, because a 'bytes' object _is_ a
|
||||
sequence of integers.
|
||||
"""
|
||||
if PYTHON2:
|
||||
return [ord(b) for b in bytestring]
|
||||
else:
|
||||
return bytestring
|
||||
|
||||
|
||||
def is_printable(char):
|
||||
"""
|
||||
str.isprintable() is new in Python 3. It's useful in `explain_unicode`, so
|
||||
let's make a crude approximation in Python 2.
|
||||
"""
|
||||
if PYTHON2:
|
||||
return not unicodedata.category(char).startswith('C')
|
||||
else:
|
||||
return char.isprintable()
|
||||
@@ -0,0 +1,664 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This module contains the individual fixes that the main fix_text function
|
||||
can perform.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import sys
|
||||
import codecs
|
||||
import warnings
|
||||
from ftfy.chardata import (possible_encoding, CHARMAP_ENCODINGS,
|
||||
CONTROL_CHARS, LIGATURES, WIDTH_MAP,
|
||||
PARTIAL_UTF8_PUNCT_RE, ALTERED_UTF8_RE,
|
||||
LOSSY_UTF8_RE, SINGLE_QUOTE_RE, DOUBLE_QUOTE_RE)
|
||||
from ftfy.badness import text_cost
|
||||
from ftfy.compatibility import unichr
|
||||
from html5lib.constants import entities
|
||||
|
||||
|
||||
BYTES_ERROR_TEXT = """Hey wait, this isn't Unicode.
|
||||
|
||||
ftfy is designed to fix problems that were introduced by handling Unicode
|
||||
incorrectly. It might be able to fix the bytes you just handed it, but the
|
||||
fact that you just gave a pile of bytes to a function that fixes text means
|
||||
that your code is *also* handling Unicode incorrectly.
|
||||
|
||||
ftfy takes Unicode text as input. You should take these bytes and decode
|
||||
them from the encoding you think they are in. If you're not sure what encoding
|
||||
they're in:
|
||||
|
||||
- First, try to find out. 'utf-8' is a good assumption.
|
||||
- If the encoding is simply unknowable, try running your bytes through
|
||||
ftfy.guess_bytes. As the name implies, this may not always be accurate.
|
||||
|
||||
If you're confused by this, please read the Python Unicode HOWTO:
|
||||
|
||||
http://docs.python.org/%d/howto/unicode.html
|
||||
""" % sys.version_info[0]
|
||||
|
||||
|
||||
def fix_encoding(text):
|
||||
r"""
|
||||
Fix text with incorrectly-decoded garbage ("mojibake") whenever possible.
|
||||
|
||||
This function looks for the evidence of mojibake, formulates a plan to fix
|
||||
it, and applies the plan. It determines whether it should replace nonsense
|
||||
sequences of single-byte characters that were really meant to be UTF-8
|
||||
characters, and if so, turns them into the correctly-encoded Unicode
|
||||
character that they were meant to represent.
|
||||
|
||||
The input to the function must be Unicode. If you don't have Unicode text,
|
||||
you're not using the right tool to solve your problem.
|
||||
|
||||
`fix_encoding` decodes text that looks like it was decoded incorrectly. It
|
||||
leaves alone text that doesn't.
|
||||
|
||||
>>> print(fix_encoding('único'))
|
||||
único
|
||||
|
||||
>>> print(fix_encoding('This text is fine already :þ'))
|
||||
This text is fine already :þ
|
||||
|
||||
Because these characters often come from Microsoft products, we allow
|
||||
for the possibility that we get not just Unicode characters 128-255, but
|
||||
also Windows's conflicting idea of what characters 128-160 are.
|
||||
|
||||
>>> print(fix_encoding('This — should be an em dash'))
|
||||
This — should be an em dash
|
||||
|
||||
We might have to deal with both Windows characters and raw control
|
||||
characters at the same time, especially when dealing with characters like
|
||||
0x81 that have no mapping in Windows. This is a string that Python's
|
||||
standard `.encode` and `.decode` methods cannot correct.
|
||||
|
||||
>>> print(fix_encoding('This text is sad .â\x81”.'))
|
||||
This text is sad .⁔.
|
||||
|
||||
However, it has safeguards against fixing sequences of letters and
|
||||
punctuation that can occur in valid text. In the following example,
|
||||
the last three characters are not replaced with a Korean character,
|
||||
even though they could be.
|
||||
|
||||
>>> print(fix_encoding('not such a fan of Charlotte Brontë…”'))
|
||||
not such a fan of Charlotte Brontë…”
|
||||
|
||||
This function can now recover some complex manglings of text, such as when
|
||||
UTF-8 mojibake has been normalized in a way that replaces U+A0 with a
|
||||
space:
|
||||
|
||||
>>> print(fix_encoding('The more you know 🌠'))
|
||||
The more you know 🌠
|
||||
|
||||
Cases of genuine ambiguity can sometimes be addressed by finding other
|
||||
characters that are not double-encoded, and expecting the encoding to
|
||||
be consistent:
|
||||
|
||||
>>> print(fix_encoding('AHÅ™, the new sofa from IKEA®'))
|
||||
AHÅ™, the new sofa from IKEA®
|
||||
|
||||
Finally, we handle the case where the text is in a single-byte encoding
|
||||
that was intended as Windows-1252 all along but read as Latin-1:
|
||||
|
||||
>>> print(fix_encoding('This text was never UTF-8 at all\x85'))
|
||||
This text was never UTF-8 at all…
|
||||
|
||||
The best version of the text is found using
|
||||
:func:`ftfy.badness.text_cost`.
|
||||
"""
|
||||
text, _ = fix_encoding_and_explain(text)
|
||||
return text
|
||||
|
||||
|
||||
def fix_text_encoding(text):
|
||||
"""
|
||||
A deprecated name for :func:`ftfy.fixes.fix_encoding`.
|
||||
"""
|
||||
warnings.warn('fix_text_encoding is now known as fix_encoding',
|
||||
DeprecationWarning)
|
||||
return fix_encoding(text)
|
||||
|
||||
|
||||
# When we support discovering mojibake in more encodings, we run the risk
|
||||
# of more false positives. We can mitigate false positives by assigning an
|
||||
# additional cost to using encodings that are rarer than Windows-1252, so
|
||||
# that these encodings will only be used if they fix multiple problems.
|
||||
ENCODING_COSTS = {
|
||||
'macroman': 2,
|
||||
'iso-8859-2': 2,
|
||||
'sloppy-windows-1250': 2,
|
||||
'sloppy-windows-1251': 3,
|
||||
'cp437': 3,
|
||||
}
|
||||
|
||||
|
||||
def fix_encoding_and_explain(text):
|
||||
"""
|
||||
Re-decodes text that has been decoded incorrectly, and also return a
|
||||
"plan" indicating all the steps required to fix it.
|
||||
|
||||
The resulting plan could be used with :func:`ftfy.fixes.apply_plan`
|
||||
to fix additional strings that are broken in the same way.
|
||||
"""
|
||||
best_version = text
|
||||
best_cost = text_cost(text)
|
||||
best_plan = []
|
||||
plan_so_far = []
|
||||
while True:
|
||||
prevtext = text
|
||||
text, plan = fix_one_step_and_explain(text)
|
||||
plan_so_far.extend(plan)
|
||||
cost = text_cost(text)
|
||||
for _, _, step_cost in plan_so_far:
|
||||
cost += step_cost
|
||||
|
||||
if cost < best_cost:
|
||||
best_cost = cost
|
||||
best_version = text
|
||||
best_plan = list(plan_so_far)
|
||||
if text == prevtext:
|
||||
return best_version, best_plan
|
||||
|
||||
|
||||
def fix_one_step_and_explain(text):
|
||||
"""
|
||||
Performs a single step of re-decoding text that's been decoded incorrectly.
|
||||
|
||||
Returns the decoded text, plus a "plan" for how to reproduce what it did.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(BYTES_ERROR_TEXT)
|
||||
if len(text) == 0:
|
||||
return text, []
|
||||
|
||||
# The first plan is to return ASCII text unchanged.
|
||||
if possible_encoding(text, 'ascii'):
|
||||
return text, []
|
||||
|
||||
# As we go through the next step, remember the possible encodings
|
||||
# that we encounter but don't successfully fix yet. We may need them
|
||||
# later.
|
||||
possible_1byte_encodings = []
|
||||
|
||||
# Suppose the text was supposed to be UTF-8, but it was decoded using
|
||||
# a single-byte encoding instead. When these cases can be fixed, they
|
||||
# are usually the correct thing to do, so try them next.
|
||||
for encoding in CHARMAP_ENCODINGS:
|
||||
if possible_encoding(text, encoding):
|
||||
encoded_bytes = text.encode(encoding)
|
||||
encode_step = ('encode', encoding, ENCODING_COSTS.get(encoding, 0))
|
||||
transcode_steps = []
|
||||
|
||||
# Now, find out if it's UTF-8 (or close enough). Otherwise,
|
||||
# remember the encoding for later.
|
||||
try:
|
||||
decoding = 'utf-8'
|
||||
# Check encoded_bytes for sequences that would be UTF-8,
|
||||
# except they have b' ' where b'\xa0' would belong.
|
||||
if ALTERED_UTF8_RE.search(encoded_bytes):
|
||||
encoded_bytes = restore_byte_a0(encoded_bytes)
|
||||
cost = encoded_bytes.count(b'\xa0') * 2
|
||||
transcode_steps.append(('transcode', 'restore_byte_a0', cost))
|
||||
|
||||
# Check for the byte 0x1a, which indicates where one of our
|
||||
# 'sloppy' codecs found a replacement character.
|
||||
if encoding.startswith('sloppy') and b'\x1a' in encoded_bytes:
|
||||
encoded_bytes = replace_lossy_sequences(encoded_bytes)
|
||||
transcode_steps.append(('transcode', 'replace_lossy_sequences', 0))
|
||||
|
||||
if b'\xed' in encoded_bytes or b'\xc0' in encoded_bytes:
|
||||
decoding = 'utf-8-variants'
|
||||
|
||||
decode_step = ('decode', decoding, 0)
|
||||
steps = [encode_step] + transcode_steps + [decode_step]
|
||||
fixed = encoded_bytes.decode(decoding)
|
||||
return fixed, steps
|
||||
|
||||
except UnicodeDecodeError:
|
||||
possible_1byte_encodings.append(encoding)
|
||||
|
||||
# Look for a-hat-euro sequences that remain, and fix them in isolation.
|
||||
if PARTIAL_UTF8_PUNCT_RE.search(text):
|
||||
steps = [('transcode', 'fix_partial_utf8_punct_in_1252', 1)]
|
||||
fixed = fix_partial_utf8_punct_in_1252(text)
|
||||
return fixed, steps
|
||||
|
||||
# The next most likely case is that this is Latin-1 that was intended to
|
||||
# be read as Windows-1252, because those two encodings in particular are
|
||||
# easily confused.
|
||||
if 'latin-1' in possible_1byte_encodings:
|
||||
if 'windows-1252' in possible_1byte_encodings:
|
||||
# This text is in the intersection of Latin-1 and
|
||||
# Windows-1252, so it's probably legit.
|
||||
return text, []
|
||||
else:
|
||||
# Otherwise, it means we have characters that are in Latin-1 but
|
||||
# not in Windows-1252. Those are C1 control characters. Nobody
|
||||
# wants those. Assume they were meant to be Windows-1252. Don't
|
||||
# use the sloppy codec, because bad Windows-1252 characters are
|
||||
# a bad sign.
|
||||
encoded = text.encode('latin-1')
|
||||
try:
|
||||
fixed = encoded.decode('windows-1252')
|
||||
steps = []
|
||||
if fixed != text:
|
||||
steps = [('encode', 'latin-1', 0),
|
||||
('decode', 'windows-1252', 1)]
|
||||
return fixed, steps
|
||||
except UnicodeDecodeError:
|
||||
# This text contained characters that don't even make sense
|
||||
# if you assume they were supposed to be Windows-1252. In
|
||||
# that case, let's not assume anything.
|
||||
pass
|
||||
|
||||
# The cases that remain are mixups between two different single-byte
|
||||
# encodings, and not the common case of Latin-1 vs. Windows-1252.
|
||||
#
|
||||
# These cases may be unsolvable without adding false positives, though
|
||||
# I have vague ideas about how to optionally address them in the future.
|
||||
|
||||
# Return the text unchanged; the plan is empty.
|
||||
return text, []
|
||||
|
||||
|
||||
def apply_plan(text, plan):
|
||||
"""
|
||||
Apply a plan for fixing the encoding of text.
|
||||
|
||||
The plan is a list of tuples of the form (operation, encoding, cost):
|
||||
|
||||
- `operation` is 'encode' if it turns a string into bytes, 'decode' if it
|
||||
turns bytes into a string, and 'transcode' if it keeps the type the same.
|
||||
- `encoding` is the name of the encoding to use, such as 'utf-8' or
|
||||
'latin-1', or the function name in the case of 'transcode'.
|
||||
- The `cost` does not affect how the plan itself works. It's used by other
|
||||
users of plans, namely `fix_encoding_and_explain`, which has to decide
|
||||
*which* plan to use.
|
||||
"""
|
||||
obj = text
|
||||
for operation, encoding, _ in plan:
|
||||
if operation == 'encode':
|
||||
obj = obj.encode(encoding)
|
||||
elif operation == 'decode':
|
||||
obj = obj.decode(encoding)
|
||||
elif operation == 'transcode':
|
||||
if encoding in TRANSCODERS:
|
||||
obj = TRANSCODERS[encoding](obj)
|
||||
else:
|
||||
raise ValueError("Unknown transcode operation: %s" % encoding)
|
||||
else:
|
||||
raise ValueError("Unknown plan step: %s" % operation)
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
HTML_ENTITY_RE = re.compile(r"&#?\w{0,8};")
|
||||
|
||||
|
||||
def unescape_html(text):
|
||||
"""
|
||||
Decode all three types of HTML entities/character references.
|
||||
|
||||
Code by Fredrik Lundh of effbot.org. Rob Speer made a slight change
|
||||
to it for efficiency: it won't match entities longer than 8 characters,
|
||||
because there are no valid entities like that.
|
||||
|
||||
>>> print(unescape_html('<tag>'))
|
||||
<tag>
|
||||
"""
|
||||
def fixup(match):
|
||||
"""
|
||||
Replace one matched HTML entity with the character it represents,
|
||||
if possible.
|
||||
"""
|
||||
text = match.group(0)
|
||||
if text[:2] == "&#":
|
||||
# character reference
|
||||
try:
|
||||
if text[:3] == "&#x":
|
||||
codept = int(text[3:-1], 16)
|
||||
else:
|
||||
codept = int(text[2:-1])
|
||||
if 0x80 <= codept < 0xa0:
|
||||
# Decode this range of characters as Windows-1252, as Web
|
||||
# browsers do in practice.
|
||||
return unichr(codept).encode('latin-1').decode('sloppy-windows-1252')
|
||||
else:
|
||||
return unichr(codept)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
# named entity
|
||||
try:
|
||||
text = entities[text[1:]]
|
||||
except KeyError:
|
||||
pass
|
||||
return text # leave as is
|
||||
return HTML_ENTITY_RE.sub(fixup, text)
|
||||
|
||||
|
||||
ANSI_RE = re.compile('\033\\[((?:\\d|;)*)([a-zA-Z])')
|
||||
|
||||
def remove_terminal_escapes(text):
|
||||
r"""
|
||||
Strip out "ANSI" terminal escape sequences, such as those that produce
|
||||
colored text on Unix.
|
||||
|
||||
>>> print(remove_terminal_escapes(
|
||||
... "\033[36;44mI'm blue, da ba dee da ba doo...\033[0m"
|
||||
... ))
|
||||
I'm blue, da ba dee da ba doo...
|
||||
"""
|
||||
return ANSI_RE.sub('', text)
|
||||
|
||||
|
||||
def uncurl_quotes(text):
|
||||
r"""
|
||||
Replace curly quotation marks with straight equivalents.
|
||||
|
||||
>>> print(uncurl_quotes('\u201chere\u2019s a test\u201d'))
|
||||
"here's a test"
|
||||
"""
|
||||
return SINGLE_QUOTE_RE.sub("'", DOUBLE_QUOTE_RE.sub('"', text))
|
||||
|
||||
|
||||
def fix_latin_ligatures(text):
|
||||
"""
|
||||
Replace single-character ligatures of Latin letters, such as 'fi', with the
|
||||
characters that they contain, as in 'fi'. Latin ligatures are usually not
|
||||
intended in text strings (though they're lovely in *rendered* text). If
|
||||
you have such a ligature in your string, it is probably a result of a
|
||||
copy-and-paste glitch.
|
||||
|
||||
We leave ligatures in other scripts alone to be safe. They may be intended,
|
||||
and removing them may lose information. If you want to take apart nearly
|
||||
all ligatures, use NFKC normalization.
|
||||
|
||||
>>> print(fix_latin_ligatures("fluffiest"))
|
||||
fluffiest
|
||||
"""
|
||||
return text.translate(LIGATURES)
|
||||
|
||||
|
||||
def fix_character_width(text):
|
||||
"""
|
||||
The ASCII characters, katakana, and Hangul characters have alternate
|
||||
"halfwidth" or "fullwidth" forms that help text line up in a grid.
|
||||
|
||||
If you don't need these width properties, you probably want to replace
|
||||
these characters with their standard form, which is what this function
|
||||
does.
|
||||
|
||||
Note that this replaces the ideographic space, U+3000, with the ASCII
|
||||
space, U+20.
|
||||
|
||||
>>> print(fix_character_width("LOUD NOISES"))
|
||||
LOUD NOISES
|
||||
>>> print(fix_character_width("Uターン")) # this means "U-turn"
|
||||
Uターン
|
||||
"""
|
||||
return text.translate(WIDTH_MAP)
|
||||
|
||||
|
||||
def fix_line_breaks(text):
|
||||
r"""
|
||||
Convert all line breaks to Unix style.
|
||||
|
||||
This will convert the following sequences into the standard \\n
|
||||
line break:
|
||||
|
||||
- CRLF (\\r\\n), used on Windows and in some communication
|
||||
protocols
|
||||
- CR (\\r), once used on Mac OS Classic, and now kept alive
|
||||
by misguided software such as Microsoft Office for Mac
|
||||
- LINE SEPARATOR (\\u2028) and PARAGRAPH SEPARATOR (\\u2029),
|
||||
defined by Unicode and used to sow confusion and discord
|
||||
- NEXT LINE (\\x85), a C1 control character that is certainly
|
||||
not what you meant
|
||||
|
||||
The NEXT LINE character is a bit of an odd case, because it
|
||||
usually won't show up if `fix_encoding` is also being run.
|
||||
\\x85 is very common mojibake for \\u2026, HORIZONTAL ELLIPSIS.
|
||||
|
||||
>>> print(fix_line_breaks(
|
||||
... "This string is made of two things:\u2029"
|
||||
... "1. Unicode\u2028"
|
||||
... "2. Spite"
|
||||
... ))
|
||||
This string is made of two things:
|
||||
1. Unicode
|
||||
2. Spite
|
||||
|
||||
For further testing and examples, let's define a function to make sure
|
||||
we can see the control characters in their escaped form:
|
||||
|
||||
>>> def eprint(text):
|
||||
... print(text.encode('unicode-escape').decode('ascii'))
|
||||
|
||||
>>> eprint(fix_line_breaks("Content-type: text/plain\r\n\r\nHi."))
|
||||
Content-type: text/plain\n\nHi.
|
||||
|
||||
>>> eprint(fix_line_breaks("This is how Microsoft \r trolls Mac users"))
|
||||
This is how Microsoft \n trolls Mac users
|
||||
|
||||
>>> eprint(fix_line_breaks("What is this \x85 I don't even"))
|
||||
What is this \n I don't even
|
||||
"""
|
||||
return text.replace('\r\n', '\n').replace('\r', '\n')\
|
||||
.replace('\u2028', '\n').replace('\u2029', '\n')\
|
||||
.replace('\u0085', '\n')
|
||||
|
||||
|
||||
SURROGATE_RE = re.compile('[\ud800-\udfff]')
|
||||
SURROGATE_PAIR_RE = re.compile('[\ud800-\udbff][\udc00-\udfff]')
|
||||
|
||||
|
||||
def convert_surrogate_pair(match):
|
||||
"""
|
||||
Convert a surrogate pair to the single codepoint it represents.
|
||||
|
||||
This implements the formula described at:
|
||||
http://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates
|
||||
"""
|
||||
pair = match.group(0)
|
||||
codept = 0x10000 + (ord(pair[0]) - 0xd800) * 0x400 + (ord(pair[1]) - 0xdc00)
|
||||
return unichr(codept)
|
||||
|
||||
|
||||
def fix_surrogates(text):
|
||||
"""
|
||||
Replace 16-bit surrogate codepoints with the characters they represent
|
||||
(when properly paired), or with \ufffd otherwise.
|
||||
|
||||
>>> high_surrogate = unichr(0xd83d)
|
||||
>>> low_surrogate = unichr(0xdca9)
|
||||
>>> print(fix_surrogates(high_surrogate + low_surrogate))
|
||||
💩
|
||||
>>> print(fix_surrogates(low_surrogate + high_surrogate))
|
||||
��
|
||||
|
||||
The above doctest had to be very carefully written, because even putting
|
||||
the Unicode escapes of the surrogates in the docstring was causing
|
||||
various tools to fail, which I think just goes to show why this fixer is
|
||||
necessary.
|
||||
"""
|
||||
if SURROGATE_RE.search(text):
|
||||
text = SURROGATE_PAIR_RE.sub(convert_surrogate_pair, text)
|
||||
text = SURROGATE_RE.sub('\ufffd', text)
|
||||
return text
|
||||
|
||||
|
||||
def remove_control_chars(text):
|
||||
"""
|
||||
Remove various control characters that you probably didn't intend to be in
|
||||
your text. Many of these characters appear in the table of "Characters not
|
||||
suitable for use with markup" at
|
||||
http://www.unicode.org/reports/tr20/tr20-9.html.
|
||||
|
||||
This includes:
|
||||
|
||||
- ASCII control characters, except for the important whitespace characters
|
||||
(U+00 to U+08, U+0B, U+0E to U+1F, U+7F)
|
||||
- Deprecated Arabic control characters (U+206A to U+206F)
|
||||
- Interlinear annotation characters (U+FFF9 to U+FFFB)
|
||||
- The Object Replacement Character (U+FFFC)
|
||||
- The byte order mark (U+FEFF)
|
||||
- Musical notation control characters (U+1D173 to U+1D17A)
|
||||
- Tag characters (U+E0000 to U+E007F)
|
||||
|
||||
However, these similar characters are left alone:
|
||||
|
||||
- Control characters that produce whitespace (U+09, U+0A, U+0C, U+0D,
|
||||
U+2028, and U+2029)
|
||||
- C1 control characters (U+80 to U+9F) -- even though they are basically
|
||||
never used intentionally, they are important clues about what mojibake
|
||||
has happened
|
||||
- Control characters that affect glyph rendering, such as joiners and
|
||||
right-to-left marks (U+200C to U+200F, U+202A to U+202E)
|
||||
"""
|
||||
return text.translate(CONTROL_CHARS)
|
||||
|
||||
|
||||
def remove_bom(text):
|
||||
r"""
|
||||
Remove a byte-order mark that was accidentally decoded as if it were part
|
||||
of the text.
|
||||
|
||||
>>> print(remove_bom("\ufeffWhere do you want to go today?"))
|
||||
Where do you want to go today?
|
||||
"""
|
||||
return text.lstrip(unichr(0xfeff))
|
||||
|
||||
|
||||
# Define a regex to match valid escape sequences in Python string literals.
|
||||
ESCAPE_SEQUENCE_RE = re.compile(r'''
|
||||
( \\U........ # 8-digit hex escapes
|
||||
| \\u.... # 4-digit hex escapes
|
||||
| \\x.. # 2-digit hex escapes
|
||||
| \\[0-7]{1,3} # Octal escapes
|
||||
| \\N\{[^}]+\} # Unicode characters by name
|
||||
| \\[\\'"abfnrtv] # Single-character escapes
|
||||
)''', re.UNICODE | re.VERBOSE)
|
||||
|
||||
|
||||
def decode_escapes(text):
|
||||
r"""
|
||||
Decode backslashed escape sequences, including \\x, \\u, and \\U character
|
||||
references, even in the presence of other Unicode.
|
||||
|
||||
This is what Python's "string-escape" and "unicode-escape" codecs were
|
||||
meant to do, but in contrast, this actually works. It will decode the
|
||||
string exactly the same way that the Python interpreter decodes its string
|
||||
literals.
|
||||
|
||||
>>> factoid = '\\u20a1 is the currency symbol for the colón.'
|
||||
>>> print(factoid[1:])
|
||||
u20a1 is the currency symbol for the colón.
|
||||
>>> print(decode_escapes(factoid))
|
||||
₡ is the currency symbol for the colón.
|
||||
|
||||
Even though Python itself can read string literals with a combination of
|
||||
escapes and literal Unicode -- you're looking at one right now -- the
|
||||
"unicode-escape" codec doesn't work on literal Unicode. (See
|
||||
http://stackoverflow.com/a/24519338/773754 for more details.)
|
||||
|
||||
Instead, this function searches for just the parts of a string that
|
||||
represent escape sequences, and decodes them, leaving the rest alone. All
|
||||
valid escape sequences are made of ASCII characters, and this allows
|
||||
"unicode-escape" to work correctly.
|
||||
|
||||
This fix cannot be automatically applied by the `ftfy.fix_text` function,
|
||||
because escaped text is not necessarily a mistake, and there is no way
|
||||
to distinguish text that's supposed to be escaped from text that isn't.
|
||||
"""
|
||||
def decode_match(match):
|
||||
"Given a regex match, decode the escape sequence it contains."
|
||||
return codecs.decode(match.group(0), 'unicode-escape')
|
||||
|
||||
return ESCAPE_SEQUENCE_RE.sub(decode_match, text)
|
||||
|
||||
|
||||
def restore_byte_a0(byts):
|
||||
"""
|
||||
Some mojibake has been additionally altered by a process that said "hmm,
|
||||
byte A0, that's basically a space!" and replaced it with an ASCII space.
|
||||
When the A0 is part of a sequence that we intend to decode as UTF-8,
|
||||
changing byte A0 to 20 would make it fail to decode.
|
||||
|
||||
This process finds sequences that would convincingly decode as UTF-8 if
|
||||
byte 20 were changed to A0, and puts back the A0. For the purpose of
|
||||
deciding whether this is a good idea, this step gets a cost of twice
|
||||
the number of bytes that are changed.
|
||||
|
||||
This is used as a step within `fix_encoding`.
|
||||
"""
|
||||
def replacement(match):
|
||||
"The function to apply when this regex matches."
|
||||
return match.group(0).replace(b'\x20', b'\xa0')
|
||||
|
||||
return ALTERED_UTF8_RE.sub(replacement, byts)
|
||||
|
||||
|
||||
def replace_lossy_sequences(byts):
|
||||
"""
|
||||
This function identifies sequences where information has been lost in
|
||||
a "sloppy" codec, indicated by byte 1A, and if they would otherwise look
|
||||
like a UTF-8 sequence, it replaces them with the UTF-8 sequence for U+FFFD.
|
||||
|
||||
A further explanation:
|
||||
|
||||
ftfy can now fix text in a few cases that it would previously fix
|
||||
incompletely, because of the fact that it can't successfully apply the fix
|
||||
to the entire string. A very common case of this is when characters have
|
||||
been erroneously decoded as windows-1252, but instead of the "sloppy"
|
||||
windows-1252 that passes through unassigned bytes, the unassigned bytes get
|
||||
turned into U+FFFD (�), so we can't tell what they were.
|
||||
|
||||
This most commonly happens with curly quotation marks that appear
|
||||
``“ like this �``.
|
||||
|
||||
We can do better by building on ftfy's "sloppy codecs" to let them handle
|
||||
less-sloppy but more-lossy text. When they encounter the character ``�``,
|
||||
instead of refusing to encode it, they encode it as byte 1A -- an
|
||||
ASCII control code called SUBSTITUTE that once was meant for about the same
|
||||
purpose. We can then apply a fixer that looks for UTF-8 sequences where
|
||||
some continuation bytes have been replaced by byte 1A, and decode the whole
|
||||
sequence as �; if that doesn't work, it'll just turn the byte back into �
|
||||
itself.
|
||||
|
||||
As a result, the above text ``“ like this �`` will decode as
|
||||
``“ like this �``.
|
||||
|
||||
If U+1A was actually in the original string, then the sloppy codecs will
|
||||
not be used, and this function will not be run, so your weird control
|
||||
character will be left alone but wacky fixes like this won't be possible.
|
||||
|
||||
This is used as a step within `fix_encoding`.
|
||||
"""
|
||||
return LOSSY_UTF8_RE.sub('\ufffd'.encode('utf-8'), byts)
|
||||
|
||||
|
||||
def fix_partial_utf8_punct_in_1252(text):
|
||||
"""
|
||||
Fix particular characters that seem to be found in the wild encoded in
|
||||
UTF-8 and decoded in Latin-1 or Windows-1252, even when this fix can't be
|
||||
consistently applied.
|
||||
|
||||
For this function, we assume the text has been decoded in Windows-1252.
|
||||
If it was decoded in Latin-1, we'll call this right after it goes through
|
||||
the Latin-1-to-Windows-1252 fixer.
|
||||
|
||||
This is used as a step within `fix_encoding`.
|
||||
"""
|
||||
def replacement(match):
|
||||
"The function to apply when this regex matches."
|
||||
return match.group(0).encode('sloppy-windows-1252').decode('utf-8')
|
||||
return PARTIAL_UTF8_PUNCT_RE.sub(replacement, text)
|
||||
|
||||
|
||||
TRANSCODERS = {
|
||||
'restore_byte_a0': restore_byte_a0,
|
||||
'replace_lossy_sequences': replace_lossy_sequences,
|
||||
'fix_partial_utf8_punct_in_1252': fix_partial_utf8_punct_in_1252
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module provides functions for justifying Unicode text in a monospaced
|
||||
display such as a terminal.
|
||||
|
||||
We used to have our own implementation here, but now we mostly rely on
|
||||
the 'wcwidth' library.
|
||||
"""
|
||||
from __future__ import unicode_literals, division
|
||||
from unicodedata import normalize
|
||||
from wcwidth import wcwidth, wcswidth
|
||||
|
||||
|
||||
def character_width(char):
|
||||
r"""
|
||||
Determine the width that a character is likely to be displayed as in
|
||||
a monospaced terminal. The width for a printable character will
|
||||
always be 0, 1, or 2.
|
||||
|
||||
Nonprintable or control characters will return -1, a convention that comes
|
||||
from wcwidth.
|
||||
|
||||
>>> character_width('車')
|
||||
2
|
||||
>>> character_width('A')
|
||||
1
|
||||
>>> character_width('\N{ZERO WIDTH JOINER}')
|
||||
0
|
||||
>>> character_width('\n')
|
||||
-1
|
||||
"""
|
||||
return wcwidth(char)
|
||||
|
||||
|
||||
def monospaced_width(text):
|
||||
"""
|
||||
Return the number of character cells that this string is likely to occupy
|
||||
when displayed in a monospaced, modern, Unicode-aware terminal emulator.
|
||||
We refer to this as the "display width" of the string.
|
||||
|
||||
This can be useful for formatting text that may contain non-spacing
|
||||
characters, or CJK characters that take up two character cells.
|
||||
|
||||
Returns -1 if the string contains a non-printable or control character.
|
||||
|
||||
>>> monospaced_width('ちゃぶ台返し')
|
||||
12
|
||||
>>> len('ちゃぶ台返し')
|
||||
6
|
||||
>>> monospaced_width('owl\N{SOFT HYPHEN}flavored')
|
||||
12
|
||||
>>> monospaced_width('example\x80')
|
||||
-1
|
||||
|
||||
# The Korean word 'ibnida' can be written with 3 characters or 7 jamo.
|
||||
# Either way, it *looks* the same and takes up 6 character cells.
|
||||
>>> monospaced_width('입니다')
|
||||
6
|
||||
>>> monospaced_width('\u110b\u1175\u11b8\u1102\u1175\u1103\u1161')
|
||||
6
|
||||
"""
|
||||
# NFC-normalize the text first, so that we don't need special cases for
|
||||
# Hangul jamo.
|
||||
return wcswidth(normalize('NFC', text))
|
||||
|
||||
|
||||
def display_ljust(text, width, fillchar=' '):
|
||||
"""
|
||||
Return `text` left-justified in a Unicode string whose display width,
|
||||
in a monospaced terminal, should be at least `width` character cells.
|
||||
The rest of the string will be padded with `fillchar`, which must be
|
||||
a width-1 character.
|
||||
|
||||
"Left" here means toward the beginning of the string, which may actually
|
||||
appear on the right in an RTL context. This is similar to the use of the
|
||||
word "left" in "left parenthesis".
|
||||
|
||||
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||||
>>> for line in lines:
|
||||
... print(display_ljust(line, 20, '▒'))
|
||||
Table flip▒▒▒▒▒▒▒▒▒▒
|
||||
(╯°□°)╯︵ ┻━┻▒▒▒▒▒▒▒
|
||||
ちゃぶ台返し▒▒▒▒▒▒▒▒
|
||||
|
||||
This example, and the similar ones that follow, should come out justified
|
||||
correctly when viewed in a monospaced terminal. It will probably not look
|
||||
correct if you're viewing this code or documentation in a Web browser.
|
||||
"""
|
||||
if character_width(fillchar) != 1:
|
||||
raise ValueError("The padding character must have display width 1")
|
||||
|
||||
text_width = monospaced_width(text)
|
||||
if text_width == -1:
|
||||
# There's a control character here, so just don't add padding
|
||||
return text
|
||||
|
||||
padding = max(0, width - text_width)
|
||||
return text + fillchar * padding
|
||||
|
||||
|
||||
def display_rjust(text, width, fillchar=' '):
|
||||
"""
|
||||
Return `text` right-justified in a Unicode string whose display width,
|
||||
in a monospaced terminal, should be at least `width` character cells.
|
||||
The rest of the string will be padded with `fillchar`, which must be
|
||||
a width-1 character.
|
||||
|
||||
"Right" here means toward the end of the string, which may actually be on
|
||||
the left in an RTL context. This is similar to the use of the word "right"
|
||||
in "right parenthesis".
|
||||
|
||||
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||||
>>> for line in lines:
|
||||
... print(display_rjust(line, 20, '▒'))
|
||||
▒▒▒▒▒▒▒▒▒▒Table flip
|
||||
▒▒▒▒▒▒▒(╯°□°)╯︵ ┻━┻
|
||||
▒▒▒▒▒▒▒▒ちゃぶ台返し
|
||||
"""
|
||||
if character_width(fillchar) != 1:
|
||||
raise ValueError("The padding character must have display width 1")
|
||||
|
||||
text_width = monospaced_width(text)
|
||||
if text_width == -1:
|
||||
return text
|
||||
|
||||
padding = max(0, width - text_width)
|
||||
return fillchar * padding + text
|
||||
|
||||
|
||||
def display_center(text, width, fillchar=' '):
|
||||
"""
|
||||
Return `text` centered in a Unicode string whose display width, in a
|
||||
monospaced terminal, should be at least `width` character cells. The rest
|
||||
of the string will be padded with `fillchar`, which must be a width-1
|
||||
character.
|
||||
|
||||
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||||
>>> for line in lines:
|
||||
... print(display_center(line, 20, '▒'))
|
||||
▒▒▒▒▒Table flip▒▒▒▒▒
|
||||
▒▒▒(╯°□°)╯︵ ┻━┻▒▒▒▒
|
||||
▒▒▒▒ちゃぶ台返し▒▒▒▒
|
||||
"""
|
||||
if character_width(fillchar) != 1:
|
||||
raise ValueError("The padding character must have display width 1")
|
||||
|
||||
text_width = monospaced_width(text)
|
||||
if text_width == -1:
|
||||
return text
|
||||
|
||||
padding = max(0, width - text_width)
|
||||
left_padding = padding // 2
|
||||
right_padding = padding - left_padding
|
||||
return fillchar * left_padding + text + fillchar * right_padding
|
||||
@@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This file defines a general method for evaluating ftfy using data that arrives
|
||||
in a stream. A concrete implementation of it is found in `twitter_tester.py`.
|
||||
"""
|
||||
from __future__ import print_function, unicode_literals
|
||||
from ftfy import fix_text
|
||||
from ftfy.fixes import fix_encoding, unescape_html
|
||||
from ftfy.chardata import possible_encoding
|
||||
|
||||
|
||||
class StreamTester:
|
||||
"""
|
||||
Take in a sequence of texts, and show the ones that will be changed by
|
||||
ftfy. This will also periodically show updates, such as the proportion of
|
||||
texts that changed.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.num_fixed = 0
|
||||
self.count = 0
|
||||
|
||||
def check_ftfy(self, text, encoding_only=True):
|
||||
"""
|
||||
Given a single text input, check whether `ftfy.fix_text_encoding`
|
||||
would change it. If so, display the change.
|
||||
"""
|
||||
self.count += 1
|
||||
text = unescape_html(text)
|
||||
if not possible_encoding(text, 'ascii'):
|
||||
if encoding_only:
|
||||
fixed = fix_encoding(text)
|
||||
else:
|
||||
fixed = fix_text(text, uncurl_quotes=False, fix_character_width=False)
|
||||
if text != fixed:
|
||||
# possibly filter common bots before printing
|
||||
print('\nText:\t{text!r}\nFixed:\t{fixed!r}\n'.format(
|
||||
text=text, fixed=fixed
|
||||
))
|
||||
self.num_fixed += 1
|
||||
elif 'â€' in text or '\x80' in text:
|
||||
print('\nNot fixed:\t{text!r}'.format(text=text))
|
||||
|
||||
# Print status updates once in a while
|
||||
if self.count % 100 == 0:
|
||||
print('.', end='', flush=True)
|
||||
if self.count % 10000 == 0:
|
||||
print('\n%d/%d fixed' % (self.num_fixed, self.count))
|
||||
@@ -0,0 +1,72 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
Do what is necessary to authenticate this tester as a Twitter "app", using
|
||||
somebody's Twitter account.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import os
|
||||
|
||||
|
||||
AUTH_TOKEN_PATH = os.path.expanduser('~/.cache/oauth/twitter_ftfy.auth')
|
||||
|
||||
def get_auth():
|
||||
"""
|
||||
Twitter has some bizarre requirements about how to authorize an "app" to
|
||||
use its API.
|
||||
|
||||
The user of the app has to log in to get a secret token. That's fine. But
|
||||
the app itself has its own "consumer secret" token. The app has to know it,
|
||||
and the user of the app has to not know it.
|
||||
|
||||
This is, of course, impossible. It's equivalent to DRM. Your computer can't
|
||||
*really* make use of secret information while hiding the same information
|
||||
from you.
|
||||
|
||||
The threat appears to be that, if you have this super-sekrit token, you can
|
||||
impersonate the app while doing something different. Well, of course you
|
||||
can do that, because you *have the source code* and you can change it to do
|
||||
what you want. You still have to log in as a particular user who has a
|
||||
token that's actually secret, you know.
|
||||
|
||||
Even developers of closed-source applications that use the Twitter API are
|
||||
unsure what to do, for good reason. These "secrets" are not secret in any
|
||||
cryptographic sense. A bit of Googling shows that the secret tokens for
|
||||
every popular Twitter app are already posted on the Web.
|
||||
|
||||
Twitter wants us to pretend this string can be kept secret, and hide this
|
||||
secret behind a fig leaf like everybody else does. So that's what we've
|
||||
done.
|
||||
"""
|
||||
|
||||
from twitter.oauth import OAuth
|
||||
from twitter import oauth_dance, read_token_file
|
||||
|
||||
def unhide(secret):
|
||||
"""
|
||||
Do something mysterious and exactly as secure as every other Twitter
|
||||
app.
|
||||
"""
|
||||
return ''.join([chr(ord(c) - 0x2800) for c in secret])
|
||||
|
||||
fig_leaf = '⠴⡹⠹⡩⠶⠴⡶⡅⡂⡩⡅⠳⡏⡉⡈⠰⠰⡹⡥⡶⡈⡐⡍⡂⡫⡍⡗⡬⡒⡧⡶⡣⡰⡄⡧⡸⡑⡣⠵⡓⠶⠴⡁'
|
||||
consumer_key = 'OFhyNd2Zt4Ba6gJGJXfbsw'
|
||||
|
||||
if os.path.exists(AUTH_TOKEN_PATH):
|
||||
token, token_secret = read_token_file(AUTH_TOKEN_PATH)
|
||||
else:
|
||||
authdir = os.path.dirname(AUTH_TOKEN_PATH)
|
||||
if not os.path.exists(authdir):
|
||||
os.makedirs(authdir)
|
||||
token, token_secret = oauth_dance(
|
||||
app_name='ftfy-tester',
|
||||
consumer_key=consumer_key,
|
||||
consumer_secret=unhide(fig_leaf),
|
||||
token_filename=AUTH_TOKEN_PATH
|
||||
)
|
||||
|
||||
return OAuth(
|
||||
token=token,
|
||||
token_secret=token_secret,
|
||||
consumer_key=consumer_key,
|
||||
consumer_secret=unhide(fig_leaf)
|
||||
)
|
||||
@@ -0,0 +1,88 @@
|
||||
"""
|
||||
Implements a StreamTester that runs over Twitter data. See the class
|
||||
docstring.
|
||||
|
||||
This module is written for Python 3 only. The __future__ imports you see here
|
||||
are just to let Python 2 scan the file without crashing with a SyntaxError.
|
||||
"""
|
||||
from __future__ import print_function, unicode_literals
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from ftfy.streamtester import StreamTester
|
||||
|
||||
|
||||
class TwitterTester(StreamTester):
|
||||
"""
|
||||
This class uses the StreamTester code (defined in `__init__.py`) to
|
||||
evaluate ftfy's real-world performance, by feeding it live data from
|
||||
Twitter.
|
||||
|
||||
This is a semi-manual evaluation. It requires a human to look at the
|
||||
results and determine if they are good. The three possible cases we
|
||||
can see here are:
|
||||
|
||||
- Success: the process takes in mojibake and outputs correct text.
|
||||
- False positive: the process takes in correct text, and outputs
|
||||
mojibake. Every false positive should be considered a bug, and
|
||||
reported on GitHub if it isn't already.
|
||||
- Confusion: the process takes in mojibake and outputs different
|
||||
mojibake. Not a great outcome, but not as dire as a false
|
||||
positive.
|
||||
|
||||
This tester cannot reveal false negatives. So far, that can only be
|
||||
done by the unit tests.
|
||||
"""
|
||||
OUTPUT_DIR = './twitterlogs'
|
||||
|
||||
def __init__(self):
|
||||
self.lines_by_lang = defaultdict(list)
|
||||
super().__init__()
|
||||
|
||||
def save_files(self):
|
||||
"""
|
||||
When processing data from live Twitter, save it to log files so that
|
||||
it can be replayed later.
|
||||
"""
|
||||
if not os.path.exists(self.OUTPUT_DIR):
|
||||
os.makedirs(self.OUTPUT_DIR)
|
||||
for lang, lines in self.lines_by_lang.items():
|
||||
filename = 'tweets.{}.txt'.format(lang)
|
||||
fullname = os.path.join(self.OUTPUT_DIR, filename)
|
||||
langfile = open(fullname, 'a', encoding='utf-8')
|
||||
for line in lines:
|
||||
print(line.replace('\n', ' '), file=langfile)
|
||||
langfile.close()
|
||||
self.lines_by_lang = defaultdict(list)
|
||||
|
||||
def run_sample(self):
|
||||
"""
|
||||
Listen to live data from Twitter, and pass on the fully-formed tweets
|
||||
to `check_ftfy`. This requires the `twitter` Python package as a
|
||||
dependency.
|
||||
"""
|
||||
from twitter import TwitterStream
|
||||
from ftfy.streamtester.oauth import get_auth
|
||||
twitter_stream = TwitterStream(auth=get_auth())
|
||||
iterator = twitter_stream.statuses.sample()
|
||||
for tweet in iterator:
|
||||
if 'text' in tweet:
|
||||
self.check_ftfy(tweet['text'])
|
||||
if 'user' in tweet:
|
||||
lang = tweet['user'].get('lang', 'NONE')
|
||||
self.lines_by_lang[lang].append(tweet['text'])
|
||||
if self.count % 10000 == 100:
|
||||
self.save_files()
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
When run from the command line, this script connects to the Twitter stream
|
||||
and runs the TwitterTester on it forever. Or at least until the stream
|
||||
drops.
|
||||
"""
|
||||
tester = TwitterTester()
|
||||
tester.run_sample()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -33,8 +33,6 @@ def guess_filename(filename, options):
|
||||
if not options.get('yaml') and not options.get('json') and not options.get('show_property'):
|
||||
print('For:', filename)
|
||||
|
||||
options['implicit'] = True # Force implicit option in CLI
|
||||
|
||||
guess = api.guessit(filename, options)
|
||||
|
||||
if options.get('show_property'):
|
||||
|
||||
@@ -4,4 +4,4 @@
|
||||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '2.1.3.dev0'
|
||||
__version__ = '2.1.4'
|
||||
|
||||
@@ -126,7 +126,8 @@ class GuessItApi(object):
|
||||
for match in matches:
|
||||
if isinstance(match.value, six.text_type):
|
||||
match.value = match.value.encode("ascii")
|
||||
return matches.to_dict(options.get('advanced', False), options.get('implicit', False))
|
||||
return matches.to_dict(options.get('advanced', False), options.get('single_value', False),
|
||||
options.get('enforce_list', False))
|
||||
except:
|
||||
raise GuessitException(string, options)
|
||||
|
||||
|
||||
@@ -54,6 +54,10 @@ def build_argument_parser():
|
||||
help='Display the value of a single property (title, series, video_codec, year, ...)')
|
||||
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=None,
|
||||
help='Display advanced information for filename guesses, as json output')
|
||||
output_opts.add_argument('-s', '--single-value', dest='single_value', action='store_true', default=None,
|
||||
help='Keep only first value found for each property')
|
||||
output_opts.add_argument('-l', '--enforce-list', dest='enforce_list', action='store_true', default=None,
|
||||
help='Wrap each found value in a list even when property has a single value')
|
||||
output_opts.add_argument('-j', '--json', dest='json', action='store_true', default=None,
|
||||
help='Display information for filename guesses as json output')
|
||||
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=None,
|
||||
|
||||
@@ -39,12 +39,12 @@ def audio_codec():
|
||||
rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority)
|
||||
|
||||
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
|
||||
rebulk.regex("Dolby", "DolbyDigital", "Dolby-Digital", "DDP?", value="DolbyDigital")
|
||||
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='AC3')
|
||||
rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
|
||||
rebulk.regex("AAC", value="AAC")
|
||||
rebulk.regex("AC3D?", value="AC3")
|
||||
rebulk.regex("Flac", value="FLAC")
|
||||
rebulk.regex("DTS", value="DTS")
|
||||
rebulk.string("AAC", value="AAC")
|
||||
rebulk.string('EAC3', 'DDP', 'DD+', value="EAC3")
|
||||
rebulk.string("Flac", value="FLAC")
|
||||
rebulk.string("DTS", value="DTS")
|
||||
rebulk.regex("True-?HD", value="TrueHD")
|
||||
|
||||
rebulk.defaults(name="audio_profile")
|
||||
|
||||
@@ -34,15 +34,17 @@ def container():
|
||||
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
|
||||
'iso', 'vob']
|
||||
torrent = ['torrent']
|
||||
nzb = ['nzb']
|
||||
|
||||
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
|
||||
|
||||
rebulk.defaults(name='container',
|
||||
validator=seps_surround,
|
||||
formatter=lambda s: s.upper(),
|
||||
formatter=lambda s: s.lower(),
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name in ['format',
|
||||
'video_codec'] or other.name == 'container' and 'extension' in other.tags
|
||||
@@ -51,5 +53,6 @@ def container():
|
||||
rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle'])
|
||||
rebulk.string(*videos, tags=['video'])
|
||||
rebulk.string(*torrent, tags=['torrent'])
|
||||
rebulk.string(*nzb, tags=['nzb'])
|
||||
|
||||
return rebulk
|
||||
|
||||
@@ -24,12 +24,18 @@ def edition():
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name == 'episode_details' and other.value == 'Special'
|
||||
else '__default__')
|
||||
rebulk.string('SE', value='Special Edition', tags='has-neighbor')
|
||||
rebulk.string('se', value='Special Edition', tags='has-neighbor')
|
||||
rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition')
|
||||
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition')
|
||||
rebulk.regex('limited', 'limited-edition', value='Limited Edition')
|
||||
rebulk.regex('limited', 'limited-edition', value='Limited Edition', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical Edition')
|
||||
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
|
||||
value="Director's cut")
|
||||
value="Director's Cut")
|
||||
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
|
||||
value='Extended', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
|
||||
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
|
||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
|
||||
|
||||
return rebulk
|
||||
|
||||
@@ -5,7 +5,7 @@ Episode title
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
from rebulk import Rebulk, Rule, AppendMatch, RenameMatch, POST_PROCESS
|
||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PROCESS
|
||||
|
||||
from ..common import seps, title_seps
|
||||
from ..common.formatters import cleanup
|
||||
@@ -19,8 +19,12 @@ def episode_title():
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().rules(EpisodeTitleFromPosition,
|
||||
AlternativeTitleReplace,
|
||||
previous_names = ('episode', 'episode_details', 'episode_count',
|
||||
'season', 'season_count', 'date', 'title', 'year')
|
||||
|
||||
rebulk = Rebulk().rules(RemoveConflictsWithEpisodeTitle(previous_names),
|
||||
EpisodeTitleFromPosition(previous_names),
|
||||
AlternativeTitleReplace(previous_names),
|
||||
TitleToEpisodeTitle,
|
||||
Filepart3EpisodeTitle,
|
||||
Filepart2EpisodeTitle,
|
||||
@@ -28,6 +32,62 @@ def episode_title():
|
||||
return rebulk
|
||||
|
||||
|
||||
class RemoveConflictsWithEpisodeTitle(Rule):
|
||||
"""
|
||||
Remove conflicting matches that might lead to wrong episode_title parsing.
|
||||
"""
|
||||
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(RemoveConflictsWithEpisodeTitle, self).__init__()
|
||||
self.previous_names = previous_names
|
||||
self.next_names = ('streaming_service', 'screen_size', 'format',
|
||||
'video_codec', 'audio_codec', 'other', 'container')
|
||||
self.affected_if_holes_after = ('part', )
|
||||
self.affected_names = ('part', 'year')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: m.name in self.affected_names):
|
||||
before = matches.previous(match, index=0,
|
||||
predicate=lambda m, fp=filepart: not m.private and m.start >= fp.start)
|
||||
if not before or before.name not in self.previous_names:
|
||||
continue
|
||||
|
||||
after = matches.next(match, index=0,
|
||||
predicate=lambda m, fp=filepart: not m.private and m.end <= fp.end)
|
||||
if not after or after.name not in self.next_names:
|
||||
continue
|
||||
|
||||
group = matches.markers.at_match(match, predicate=lambda m: m.name == 'group', index=0)
|
||||
|
||||
def has_value_in_same_group(current_match, current_group=group):
|
||||
"""Return true if current match has value and belongs to the current group."""
|
||||
return current_match.value.strip(seps) and (
|
||||
current_group == matches.markers.at_match(current_match,
|
||||
predicate=lambda mm: mm.name == 'group', index=0)
|
||||
)
|
||||
|
||||
holes_before = matches.holes(before.end, match.start, predicate=has_value_in_same_group)
|
||||
holes_after = matches.holes(match.end, after.start, predicate=has_value_in_same_group)
|
||||
|
||||
if not holes_before and not holes_after:
|
||||
continue
|
||||
|
||||
if match.name in self.affected_if_holes_after and not holes_after:
|
||||
continue
|
||||
|
||||
to_remove.append(match)
|
||||
if match.parent:
|
||||
to_remove.append(match.parent)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class TitleToEpisodeTitle(Rule):
|
||||
"""
|
||||
If multiple different title are found, convert the one following episode number to episode_title.
|
||||
@@ -65,12 +125,14 @@ class EpisodeTitleFromPosition(TitleBaseRule):
|
||||
"""
|
||||
dependency = TitleToEpisodeTitle
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
|
||||
self.previous_names = previous_names
|
||||
|
||||
def hole_filter(self, hole, matches):
|
||||
episode = matches.previous(hole,
|
||||
lambda previous: any(name in previous.names
|
||||
for name in ['episode', 'episode_details',
|
||||
'episode_count', 'season', 'season_count',
|
||||
'date', 'title', 'year']),
|
||||
for name in self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
@@ -88,9 +150,6 @@ class EpisodeTitleFromPosition(TitleBaseRule):
|
||||
return False
|
||||
return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
|
||||
|
||||
def __init__(self):
|
||||
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
|
||||
|
||||
def when(self, matches, context):
|
||||
if matches.named('episode_title'):
|
||||
return
|
||||
@@ -104,6 +163,10 @@ class AlternativeTitleReplace(Rule):
|
||||
dependency = EpisodeTitleFromPosition
|
||||
consequence = RenameMatch
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(AlternativeTitleReplace, self).__init__()
|
||||
self.previous_names = previous_names
|
||||
|
||||
def when(self, matches, context):
|
||||
if matches.named('episode_title'):
|
||||
return
|
||||
@@ -115,10 +178,7 @@ class AlternativeTitleReplace(Rule):
|
||||
if main_title:
|
||||
episode = matches.previous(main_title,
|
||||
lambda previous: any(name in previous.names
|
||||
for name in ['episode', 'episode_details',
|
||||
'episode_count', 'season',
|
||||
'season_count',
|
||||
'date', 'title', 'year']),
|
||||
for name in self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user