Compare commits
808 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6d444ebe99 | |||
| 237eafed35 | |||
| fbc5069fb8 | |||
| d23c44589e | |||
| 42cc500b05 | |||
| 81760192dc | |||
| 2cb077423d | |||
| de8aaaa5e5 | |||
| b9ebd4e1d6 | |||
| 8fdf1e841c | |||
| 9df92d0262 | |||
| a07d5aa440 | |||
| 54bd222605 | |||
| 6487258136 | |||
| d1935a4439 | |||
| 026c30642e | |||
| 036d036a61 | |||
| 2092d44627 | |||
| c6e7e64ba3 | |||
| a8f5ad6435 | |||
| afa0c3a1b0 | |||
| b3132d57b2 | |||
| 0a2a6b558f | |||
| adb9926928 | |||
| 3ce25007b5 | |||
| 5690ada2a7 | |||
| 76481186e9 | |||
| 8d2d2341c8 | |||
| 4e20d282f7 | |||
| edc3ce1ba4 | |||
| b9249ff09a | |||
| c3b2ffa97d | |||
| 4e3b8ee3c2 | |||
| a749ed4837 | |||
| 67ba6be6e2 | |||
| 7a47e6617d | |||
| 4a4c6e7df2 | |||
| 5661528862 | |||
| 696e9d6b64 | |||
| c0aa465827 | |||
| a6120ae27a | |||
| ba8a165aa5 | |||
| 833d7072ed | |||
| 9829137001 | |||
| c686214f56 | |||
| 2252d7ea6a | |||
| e7fbfca2d7 | |||
| 9ca959a20a | |||
| bd8e26ecab | |||
| 451b34dceb | |||
| 02761db660 | |||
| 42b7e9fa62 | |||
| edf6c25e17 | |||
| e91aac65cc | |||
| 01d5a18af8 | |||
| 70c1142f8d | |||
| 8b6b162073 | |||
| 5199fbe0cb | |||
| 924de62dff | |||
| 4cba7d8684 | |||
| f3f9ab1360 | |||
| 682d1d85ce | |||
| a1cc9a2049 | |||
| a7f7b3e572 | |||
| 7c32a7c2c8 | |||
| e842579f25 | |||
| bdd9134a0e | |||
| a01552e88c | |||
| 824957ae85 | |||
| af335d5565 | |||
| 2f9eb51868 | |||
| aebbc17643 | |||
| 84e78e1e20 | |||
| 89bb747ee3 | |||
| 62e37dbd09 | |||
| edef9cb936 | |||
| 3ae02c3050 | |||
| a4016616a1 | |||
| b4855611c4 | |||
| 1b44f6d220 | |||
| b0f0af087b | |||
| 1344f7255d | |||
| 39fe3b0fd6 | |||
| 0ba676b5e7 | |||
| 4d6897c138 | |||
| c7c6ba09e9 | |||
| c06baa67f1 | |||
| cdb7946c00 | |||
| bdb5da8df0 | |||
| e961c8d3aa | |||
| 3eb1a9eef8 | |||
| 67aead8fcc | |||
| fd764d0576 | |||
| dad55d7922 | |||
| fb32772512 | |||
| 918ce65acd | |||
| 9f03b9ee71 | |||
| 2235de1a2d | |||
| 8804c89f04 | |||
| 2e8805015c | |||
| f435ca2961 | |||
| 71c3761b20 | |||
| e4c441043a | |||
| 8a655a5d6e | |||
| 777c21ce87 | |||
| e22ff09691 | |||
| d0f685e87c | |||
| 8f71c417a9 | |||
| b62977c494 | |||
| 8d11136c1c | |||
| 4a7ea43095 | |||
| 8fe4bd2751 | |||
| 38bb819a24 | |||
| dbe75ad18d | |||
| 760441b45a | |||
| 56645b601b | |||
| 885e4bc99f | |||
| b04e5510fd | |||
| 806000725b | |||
| 71270641d3 | |||
| bf4f2bec91 | |||
| dafad3a7a3 | |||
| 182a1cc3fb | |||
| 4b7664aaa6 | |||
| 2050aef1e5 | |||
| 390af30bf6 | |||
| 698f48b1fd | |||
| 2e5cc61ac6 | |||
| 8d97fb7633 | |||
| 8a41c393bb | |||
| 6ae38359d7 | |||
| 7ddd1e3497 | |||
| 20a0993aa8 | |||
| 57d58056de | |||
| 06c6fa4d01 | |||
| 41f884e129 | |||
| 77a74c8839 | |||
| c198788017 | |||
| 4cbfa21b52 | |||
| f3754de394 | |||
| d47ad013cd | |||
| 8c4372d0d3 | |||
| 1c7b9145c8 | |||
| c477f53ee6 | |||
| f99f03dc33 | |||
| 2ddd786819 | |||
| 6e604f98e3 | |||
| 729404d05f | |||
| de50dfdb7c | |||
| 7bda522f0a | |||
| 6c39fb0649 | |||
| a7342ac77e | |||
| 5d45b8bbdd | |||
| aa0ff38ed7 | |||
| d55aa3b569 | |||
| d86a99fb32 | |||
| c687152724 | |||
| 65ec539875 | |||
| 6dba0792d2 | |||
| df78cecb31 | |||
| 3d8687f69d | |||
| 92196897a9 | |||
| 4206edfb13 | |||
| c08e63ab80 | |||
| 03646b4f87 | |||
| d9fa860b0c | |||
| 93d8494ddc | |||
| bd982958fa | |||
| e280b62f5c | |||
| 2bb050de40 | |||
| f3ed3bf0bf | |||
| 79457536f2 | |||
| 048f930da1 | |||
| 6aa8108fce | |||
| c234f75d7e | |||
| 064b634f77 | |||
| 8d83184cd1 | |||
| 7a5112bee5 | |||
| 0c549c6bda | |||
| c48e704502 | |||
| bec66895d9 | |||
| c9f1e8a8bb | |||
| ac209e7ee2 | |||
| 525256e15c | |||
| 3b8c965f4b | |||
| 8f8da8e6ea | |||
| ac9b81abea | |||
| 1c39c55423 | |||
| ca11273b37 | |||
| b532a60c3d | |||
| 941662e9f2 | |||
| 4d1e4c3ebe | |||
| f66fd9bcae | |||
| f5c5ecd1b9 | |||
| f9b7855d19 | |||
| 418a8af99a | |||
| ce3b4661de | |||
| 4b811f38b0 | |||
| bba2823065 | |||
| 5547e9658d | |||
| e14cbb19f5 | |||
| 0613a001c5 | |||
| 2970ba69f8 | |||
| 2c6b811d4d | |||
| d5a3caf961 | |||
| 7e64778546 | |||
| 1afd0d7c28 | |||
| 3027a3c3e8 | |||
| 3d7df100ff | |||
| 4de5030196 | |||
| e3bfe368db | |||
| e45fe0aaa0 | |||
| 807d758bfa | |||
| 7c5164b9a5 | |||
| 1e15fb8e43 | |||
| ae996b4b9a | |||
| 3259a7eec9 | |||
| 39a5aa1d63 | |||
| dbe378ad82 | |||
| a316c11974 | |||
| 2fd05c2464 | |||
| 8adabb946e | |||
| 3f251b9c0e | |||
| aadd60c3ad | |||
| 99cc994865 | |||
| da0355ca88 | |||
| aaa7c0934a | |||
| 03c70f4dfa | |||
| 0704609fa5 | |||
| d26569b26f | |||
| 007e93e526 | |||
| 8feec0284d | |||
| eaa79fb3bd | |||
| 3af5102e93 | |||
| d936460d83 | |||
| f51649c59f | |||
| be1e33b555 | |||
| 059645dec7 | |||
| 6439becd7d | |||
| 917fbc1ea2 | |||
| c97fee90b7 | |||
| 35d04946b4 | |||
| d0d71d626e | |||
| 5a1b39c67e | |||
| a8cbd37697 | |||
| b2bac94009 | |||
| d88b7e2a17 | |||
| 68bf35d83d | |||
| a78e6587ac | |||
| 21f715a321 | |||
| 18a5dfd81f | |||
| 2a7b5e2efb | |||
| 0d63b0361f | |||
| 4e301ddd24 | |||
| bc182276ac | |||
| 4980523d10 | |||
| 85baf58b55 | |||
| d7a4d02564 | |||
| 0e6f4c45db | |||
| 932cadce3c | |||
| 3926ea9c69 | |||
| dd1495c881 | |||
| 8c27e6aade | |||
| ba2774eeb5 | |||
| 8e854a8d64 | |||
| 86f5ed198f | |||
| cc57520c71 | |||
| 8d9f8960b2 | |||
| f66573620b | |||
| 3544a0e7f8 | |||
| 9c9db90886 | |||
| c4bc4d22e9 | |||
| b107c70a0c | |||
| 084069441f | |||
| 8b01433e61 | |||
| b72902b8f4 | |||
| 354e455ae7 | |||
| 8aaed47e39 | |||
| c7598aaf12 | |||
| cbe2d16d9b | |||
| 953eb97513 | |||
| b340b3b699 | |||
| f9f2579904 | |||
| 3a90653edd | |||
| a8ae18f43c | |||
| c235dd934a | |||
| 3e7c2cb0c2 | |||
| 1c9398b5b9 | |||
| 6a9c818e67 | |||
| 753baf85b6 | |||
| 7685c2a6b7 | |||
| cf1203566e | |||
| 052e6a475b | |||
| 8890acef3a | |||
| 72570ee21b | |||
| 100c94ad83 | |||
| 2ea3bf20a7 | |||
| b1cb7c7259 | |||
| 7510dfc5c5 | |||
| b18bbba23f | |||
| 4e28cea2a3 | |||
| a9bafc5efd | |||
| a04ff3343b | |||
| aa09fb28d2 | |||
| e6900c18b9 | |||
| 221a17a5af | |||
| fc638c608b | |||
| 71d9d96d81 | |||
| 5a8b999509 | |||
| 720d7e9d8d | |||
| c69be5934d | |||
| dae186fb03 | |||
| 076ad78355 | |||
| 421aa3a95c | |||
| 153d186a1c | |||
| 2238835868 | |||
| e0be4542ab | |||
| fab841bc7a | |||
| 789a28a966 | |||
| 7cde652ed1 | |||
| 5359116e72 | |||
| 17edfd215d | |||
| e292b46cca | |||
| d091b20ebe | |||
| 50a53562a1 | |||
| 55a479590b | |||
| 8874bb64fb | |||
| 38afba3075 | |||
| ba48e30128 | |||
| 77397b6877 | |||
| f50fa0554a | |||
| d0dd9f629d | |||
| c82637e760 | |||
| 152cfb3f07 | |||
| 7f579181fd | |||
| 3e0f39b6f1 | |||
| 244d3b1a5b | |||
| 7c24302f7c | |||
| 6cafc3a1e8 | |||
| 1ab0d31baa | |||
| b2fadc5a90 | |||
| 38f3d85909 | |||
| 3694100265 | |||
| af44f271ab | |||
| 9984f6aef9 | |||
| 51a1debc39 | |||
| b8a68f62a0 | |||
| 5ded188f51 | |||
| 12c5dda1fa | |||
| 25146049bf | |||
| 5598ee0c78 | |||
| 6e4b0cbcbf | |||
| 572cf29974 | |||
| 5601d19002 | |||
| e81dd5df76 | |||
| e7919d5a47 | |||
| 6f634fbc21 | |||
| 7478ece1ff | |||
| cd72b6f477 | |||
| fab96de4c7 | |||
| 0ffa17cf67 | |||
| 777549a15f | |||
| c07ded004d | |||
| da3e96a9d8 | |||
| d6e8a03ddf | |||
| b13cbd1e54 | |||
| 6b2e5c154b | |||
| 137a4d1e0d | |||
| 1725550acc | |||
| bd91e173b0 | |||
| 47a11b3e64 | |||
| b5e57519ff | |||
| 20845bbcd4 | |||
| 739c10ade6 | |||
| 14ea2d72a7 | |||
| 4a9ea97ea1 | |||
| b017a94353 | |||
| 15b65dd844 | |||
| 079ea8c39d | |||
| 4b949dcd72 | |||
| 2626cf4253 | |||
| b260c8aaec | |||
| 1ece46473b | |||
| 890c3cc8b0 | |||
| 7b45c9f1c5 | |||
| 58fb2f5ea6 | |||
| a79f3e47ba | |||
| b3b9db9ff6 | |||
| 9aed245241 | |||
| aa03fdb445 | |||
| 7cb8356598 | |||
| ac347755fd | |||
| b16cb15e88 | |||
| 4989c37964 | |||
| 06849c5814 | |||
| 78b67a6f5e | |||
| acf79df4d0 | |||
| bc5a9caf63 | |||
| 7b34b07cdc | |||
| 8df1a1bf17 | |||
| 1143b0f2d2 | |||
| 86883336fd | |||
| 62d77c5811 | |||
| 8397dddbbe | |||
| 47ef94d8c3 | |||
| 8aa4a485ed | |||
| cb4ef9c9ea | |||
| 2f80852a7c | |||
| 190a580642 | |||
| 6ba85f5069 | |||
| 707b5921fb | |||
| 2e25e68444 | |||
| 034260e426 | |||
| b4eda8bbff | |||
| 93a1b7fb52 | |||
| 8ef44c3520 | |||
| 449de57fc7 | |||
| cbe29e233d | |||
| bef56ff124 | |||
| 5a05c0f858 | |||
| c1e13e520b | |||
| cebe92bd8f | |||
| 6f8cfc7914 | |||
| e7e98b83d2 | |||
| 4b72bb9d28 | |||
| 221068874b | |||
| 6028d8b2f1 | |||
| ddaafe9310 | |||
| 139e38731a | |||
| d25056cb35 | |||
| 5c80a7091b | |||
| 5faf190202 | |||
| 169b114ff6 | |||
| bc67326573 | |||
| a32543533d | |||
| 6b6e40ef96 | |||
| 8127b7ecf0 | |||
| 09425ccbe0 | |||
| 61fbc4e3b5 | |||
| 158e4f85da | |||
| 8b1107d2e1 | |||
| 59ffa9084f | |||
| 19df673c50 | |||
| 5f20894413 | |||
| 7349874804 | |||
| fda5dc7e89 | |||
| d60b45a667 | |||
| ab2e69a76e | |||
| 6a836338a5 | |||
| 5a02365605 | |||
| 26b38c4f64 | |||
| 9b7edf2960 | |||
| 7050f64fae | |||
| 4623a989d8 | |||
| 87b942bd6d | |||
| 87ee5cc627 | |||
| bff8fe8b70 | |||
| 1495882dc7 | |||
| 2e50d84f2a | |||
| d32716f4c5 | |||
| 876aa4eda0 | |||
| 3673aee8e9 | |||
| a758191ee0 | |||
| 99410249c7 | |||
| a705f2ad30 | |||
| 33223dedc1 | |||
| bd8e8ef346 | |||
| c75e7bf656 | |||
| cb4117376a | |||
| 0d37920aad | |||
| 0da6e76200 | |||
| 5f5934a6ee | |||
| 85b7a2f4f5 | |||
| 3dcfd30a04 | |||
| b5a0f65783 | |||
| 3862e6f3a4 | |||
| 1d4e2ec50b | |||
| 8b85485510 | |||
| 722ce3ac8b | |||
| 1e132f2808 | |||
| d007e0a172 | |||
| 3ddd722cc1 | |||
| 82d8189966 | |||
| 2d533eb004 | |||
| f9c899701f | |||
| e9f62fbb09 | |||
| 5b2f09318a | |||
| 8c260c43a8 | |||
| eee793302c | |||
| 0d1fdf6e60 | |||
| 64398d8f30 | |||
| cab736b573 | |||
| 93071dd81e | |||
| e8fcb8f91a | |||
| 33cacfe884 | |||
| f624f7f05a | |||
| 624195d870 | |||
| ab2ef66263 | |||
| 4ea0372212 | |||
| ff31912e8a | |||
| dcefed2e4c | |||
| 55bbc4f585 | |||
| 0f2bb99b39 | |||
| 85342eeed3 | |||
| 374a6a668a | |||
| e3be3195ee | |||
| 503279f3c2 | |||
| f8bb54024c | |||
| 6e53fc606a | |||
| ab810c48af | |||
| 13bb9183af | |||
| 2c5b6ea690 | |||
| a8efa2e266 | |||
| e73eb2fd86 | |||
| d38fa26e13 | |||
| 716f4493e8 | |||
| 3220974a4a | |||
| 6732272047 | |||
| 547f038139 | |||
| 3b0ee60eaa | |||
| a869281de7 | |||
| a4ed77c7bb | |||
| 81718e64d3 | |||
| dee0daf8aa | |||
| 8e599fb22a | |||
| acb5589af1 | |||
| 6db2771cd6 | |||
| 06d4e0a19a | |||
| 3b18c6c14f | |||
| 300359acf2 | |||
| 5456d0200a | |||
| 9890f66443 | |||
| aba863bc84 | |||
| ade416f5c8 | |||
| 7097267f7c | |||
| b0d8d1a86d | |||
| 2c8296ba85 | |||
| 4dd17de146 | |||
| 3a281b0b57 | |||
| 04ed625f1a | |||
| 1cddfb1b2d | |||
| 796b64d83e | |||
| 240a3687d7 | |||
| 9ed4764ab2 | |||
| f253a13297 | |||
| 744cd57dd5 | |||
| e2a5647363 | |||
| a1f324c105 | |||
| 767e0f8ac7 | |||
| 0c0ad02234 | |||
| c09973ec56 | |||
| 03a72e1917 | |||
| f9e0eaaf83 | |||
| 985f75f7da | |||
| 171cbd6c53 | |||
| 9875bc5c5b | |||
| 882509f891 | |||
| 3396502334 | |||
| b7fb99c3d4 | |||
| c82307a710 | |||
| 309a99d183 | |||
| 09a6ef0194 | |||
| 43afcb4239 | |||
| 7a78f33ac3 | |||
| d5fb538630 | |||
| a22cdf5d5b | |||
| fe0636bbbf | |||
| 13859cfbd7 | |||
| 0adadc59ac | |||
| d65ba19c6c | |||
| 5cedbd2fa0 | |||
| 735fb09762 | |||
| 79d61419b0 | |||
| 248b93e5c6 | |||
| d8eff1adb5 | |||
| c911620254 | |||
| c68a32b889 | |||
| 788819a900 | |||
| 27c94af980 | |||
| 81122665a0 | |||
| 1856e687eb | |||
| 6055793d46 | |||
| 99b670ff10 | |||
| 7a09218cc0 | |||
| a34d0523b5 | |||
| f06e900bab | |||
| 7da15a2d44 | |||
| e999cc53d0 | |||
| b7d4bd00a5 | |||
| 8c2aa849d7 | |||
| 01a759fff8 | |||
| cb0008b59e | |||
| 9cd825aff1 | |||
| 8ad52d2979 | |||
| efd6143498 | |||
| 157fae5f83 | |||
| 6d63301b63 | |||
| 9801c8c6b3 | |||
| e04f4c0bd0 | |||
| b501578584 | |||
| 308f429c91 | |||
| 1d45172475 | |||
| 085a4f30db | |||
| 7a600dc2b6 | |||
| c0c2891d8d | |||
| 06b269a2ba | |||
| f3a4db0d87 | |||
| bcd99d18c4 | |||
| c05c400c6f | |||
| 0f081d8d7b | |||
| 833dc5e3ae | |||
| 0be3df435b | |||
| f4446af57e | |||
| 253aa664a8 | |||
| 0df037a295 | |||
| ed49d743f9 | |||
| 203cc392c0 | |||
| 52ba5a7f24 | |||
| 8aa0576bbc | |||
| 5ce9cc79c8 | |||
| 1a596dfdea | |||
| aeecb3ff59 | |||
| 85c8d2d558 | |||
| 2cf4e7ac59 | |||
| e7412a91f9 | |||
| 9888d03982 | |||
| 765cc39553 | |||
| 6e58c2f984 | |||
| 295542ff18 | |||
| 9d72d9c647 | |||
| 853897ec3e | |||
| 9cf8ad7399 | |||
| fdf974c5e3 | |||
| 2920dbfe8d | |||
| 77d05f7697 | |||
| 3ffeaeffb6 | |||
| db2755675c | |||
| 7ca090f73c | |||
| bb251ad29e | |||
| 75d770e019 | |||
| 49bf116c18 | |||
| b7d227fe0f | |||
| 83f59935f2 | |||
| 37b794fa14 | |||
| 1f5c45df91 | |||
| 62e3020234 | |||
| 895d457500 | |||
| 586269efd3 | |||
| 576718fc03 | |||
| 648dd4147a | |||
| c4df743c3e | |||
| b98fead37e | |||
| 6522094164 | |||
| fcd3dfe75c | |||
| ec9a798590 | |||
| 5825443d4d | |||
| 9768b3fadd | |||
| 77a72d6663 | |||
| 08d647c024 | |||
| a77ef040be | |||
| 13e581b953 | |||
| 1cc18617c5 | |||
| 2642f65614 | |||
| 4abb2aacf9 | |||
| 904daaf2b3 | |||
| 3044f2b1fb | |||
| 826accb2d1 | |||
| d5cb35ed95 | |||
| 24c7e4be8c | |||
| abbd7283b2 | |||
| 2980aa08d7 | |||
| e2344abbc4 | |||
| 80097c3500 | |||
| 714f36caee | |||
| fb1860d78b | |||
| ce7acd278e | |||
| ae8473183d | |||
| 69fb328b50 | |||
| b8d9899796 | |||
| e58fa1964d | |||
| 1627dee77e | |||
| bbac0c033f | |||
| 6437e1dbad | |||
| 48a9e998ff | |||
| 6b6ca461f0 | |||
| 7960952a30 | |||
| 5ec64efb75 | |||
| 2440b2eae4 | |||
| 54db2857c9 | |||
| 5b8f0b7361 | |||
| 053ebe3963 | |||
| 661b0367f5 | |||
| 01da0697a0 | |||
| a3d3b670ae | |||
| 5c64a332f8 | |||
| 6fcd9b645a | |||
| 78da16654a | |||
| da20d4882b | |||
| 1f31c38d24 | |||
| 5f2fd9733b | |||
| 8a225b4e09 | |||
| af05b41937 | |||
| d618da457e | |||
| d16bdad782 | |||
| f6d33e73a0 | |||
| 7b48e445f5 | |||
| 2390f904bd | |||
| 3bee3631a3 | |||
| 9da0b2d3c1 | |||
| 7a092e4585 | |||
| 196fb6b4f6 | |||
| 9507002961 | |||
| 943ed38c2f | |||
| 496619b492 | |||
| 4772b42d64 | |||
| 5bc10953cc | |||
| 18deca202d | |||
| 84bc4b018d | |||
| 1a0598a47a | |||
| 973d117887 | |||
| c284c8f336 | |||
| df69cbc84c | |||
| 646453887f | |||
| 189d617005 | |||
| 554cd8bfe7 | |||
| 79505dea20 | |||
| 5358a46b7e | |||
| aff1599ce7 | |||
| bc7df1c8a1 | |||
| f1df1d25a8 | |||
| 47d9b472ed | |||
| 89ab8c34d8 | |||
| 600498f9c1 | |||
| 845fbcd2ac | |||
| 3cc9f19b8f | |||
| e68c642005 | |||
| 81ae950577 | |||
| 62b4496cd6 | |||
| 29b7292d15 | |||
| 791058a2d2 | |||
| b6c108faef | |||
| 72d592866a | |||
| 4052993246 | |||
| a24f6e7789 | |||
| 0d0fd49924 | |||
| 139dcb409e | |||
| 707e6e7d13 | |||
| 36abb29ddd | |||
| a700fe761e | |||
| 7577164471 | |||
| 1bce743ea3 | |||
| f85ab0364a | |||
| eb3a0d52fd | |||
| b8cd295a12 | |||
| d3ff49ee0c | |||
| d4833f1e6e | |||
| 548483ed2f | |||
| f6f39b97c8 | |||
| 21ea5e0df9 | |||
| 3cbab6a5c7 | |||
| f19f39ba16 | |||
| b9c0fd9a1c | |||
| ce520e6944 | |||
| 0ad62a95e2 | |||
| 8f62a69e06 | |||
| 34bbb98f7f | |||
| 26cd6bb955 | |||
| 97534c633d | |||
| 0a9a2963c2 | |||
| 05afc39a35 | |||
| 84fdc1f55f | |||
| 3b03c3c2bb | |||
| 980f62686d | |||
| 202f2532a6 | |||
| 78d193a2fd | |||
| 0c109b0f27 | |||
| e33c0ab86c | |||
| 3a0189069d | |||
| 2688bd9edd | |||
| 889f7bd2d7 | |||
| 1eb0e4419d | |||
| 7b5ca875dc | |||
| 2d22a6c383 | |||
| 01c656ffb2 | |||
| 580a8c0f3e | |||
| f0258349bf | |||
| d9080eeb80 | |||
| b504744876 | |||
| 638e8b5b47 | |||
| 9b9c40f310 | |||
| cc3a1db879 | |||
| a16312803e | |||
| f20e97574a | |||
| e698b9d608 | |||
| 07d02ad75e | |||
| 91f51a27af | |||
| a60318260a | |||
| c3e7e336b5 | |||
| 0b1037b497 | |||
| 7da48b7dc5 | |||
| 73bcfc6151 | |||
| dfe1a16aa0 | |||
| 4f0e685feb | |||
| fca052b308 | |||
| 47d61bb83a | |||
| d5850afcc2 | |||
| 0c48b0799e | |||
| 96a8c33767 |
+2
-1
@@ -55,4 +55,5 @@ docs/_build/
|
||||
# pycharm
|
||||
.idea
|
||||
|
||||
icon.psd
|
||||
icon.psd
|
||||
main-icon.psd
|
||||
+250
@@ -1,3 +1,253 @@
|
||||
|
||||
2.5.4.2541
|
||||
|
||||
- core: try retrieving advanced_settings.json from the path given, which may be a file path or a directory
|
||||
- menu: ignore options: fix plugin not responding, fix unicode strings; resolve #509
|
||||
- providers: addic7ed: fix usage/adapt to new show search method
|
||||
- providers: opensubtitles: properly handle responses again, re-enable automatic throttling based on those (broken since XMLRPC handler rewrite)
|
||||
|
||||
|
||||
2.5.4.2527
|
||||
|
||||
- core: bugfixes
|
||||
- core: get_item: don't fail on socket timeout; fixes #498
|
||||
- core: fix scandir encoding errors; #453 #461 #441
|
||||
- core: clamp menu history to 25 items
|
||||
- add UnRAR for aarch64 (untested), arm (armv5tel, untested), linux/i386, MacOSX/i386; fixes #311
|
||||
- add 3rd party licenses
|
||||
- menu: new debounce/history mechanism; fixes the back button usage
|
||||
- config: add custom path option for advanced_settings.json
|
||||
- providers: opensubtitles: re-add support for throttling based on HTTP response codes, which got ditched due to new connection interface
|
||||
- providers: legendastv: disable if unrar wasn't found
|
||||
- providers: addic7ed: reduce show cache to 1 week
|
||||
- advanced settings: sonarr/radarr: make ssl verification optional
|
||||
- advanced settings: opensubtitles: add configurable connection timeout
|
||||
- refiners: drone: use certifi for HTTPS connections
|
||||
- tasks: SearchAllRecentlyAddedMissing: fix ZeroDivisionError in edgecases; fixes #496
|
||||
|
||||
|
||||
2.5.3.2452
|
||||
|
||||
- core: update certifi to 2018.01.18
|
||||
- core: metadata storage: only allow one subtitle per language
|
||||
- core: metadata storage: only parse latest metadata subtitle in localmedia
|
||||
- core: metadata storage: kill existing metadata subtitles explicitly upon storing a new one
|
||||
- core: metadata storage: fix selecting current subtitle from menu
|
||||
- providers: opensubtitles: use new requests based transport by default, finally fixes ResponseNotReady properly
|
||||
- providers: opensubtitles: mask token in logs
|
||||
- providers: don't check for hash validity if it isn't verifiable (fixes napiprojekt, #478)
|
||||
- submod: common: extend non_word_only matching
|
||||
- submod: common: reduce multi spaces to one
|
||||
- submod: OCR: fix III'll=I'll
|
||||
- advanced settings: add option to use HTTP instead of HTTPS for OpenSubtitles
|
||||
|
||||
|
||||
2.5.3.2422
|
||||
|
||||
- core: don't fail on embedded subtitle streams without language code set, fixes #473
|
||||
- providers: catch ResponseNotReady in list_subtitles_provider as well (partly fixes OpenSubtitles)
|
||||
- providers: don't use retry logic in case of ResponseNotReady
|
||||
- providers: addic7ed: use new search endpoint
|
||||
|
||||
|
||||
2.5.3.2414
|
||||
|
||||
- core: expand user agent list
|
||||
- core: update subliminal to 4ad5d31
|
||||
- core: treat 23.976, 23.98, 24.0 fps as equal
|
||||
- core: correctly skip blacklist entries when iterating through currently known subs
|
||||
- core: fix unpacking of packs without asked-for-release-group
|
||||
- core: fix embedded subtitle language detection; add debug log
|
||||
- core: treat embedded subtitle containing "forced" in its title as forced
|
||||
- core: improve embedded subtitles detection
|
||||
- core: store extracted embedded forced subtitles with the "forced" suffix (e.g.: video.en.forced.srt)
|
||||
- core: don't bother trying to extract embedded subtitle if transcoder wasn't found
|
||||
- core: fix automatic extraction of unknown embedded subtitle streams
|
||||
- core: skip immediately searching for new subtitle after successfully extracting embedded
|
||||
- core: extract embedded ASS: don't transcode to SRT using ffmpeg (Plex Transcoder), do the transcoding later using pysubs2; fixes offset issues
|
||||
- core: extract embedded: let ffmpeg auto convert mov_text/tx3g to srt
|
||||
- core: fix transcoder detection; add fallback #460
|
||||
- core: remove LD_LIBRARY_PATH from environment before calling notification executable
|
||||
- core: auto extract embedded subtitles in a separate thread
|
||||
- core: reduce encoding change log spam
|
||||
- core: only allow one automatic extraction at a time; add optional advanced settings "auto_extract_multithread"
|
||||
- core: add minimum score a subtitle has to have when considered by the find better subtitles task, when the current subtitle is an extracted embedded one; add advanced_settings entries
|
||||
- core/config: automatic extraction: add config setting to indicate whether there should be an immediate search for available subtitles after extraction or not (default: off)
|
||||
- core/menu/submod: add reverse_rtl modification for Hebrew; fixes #409
|
||||
- core: scoring: assume title match on tvdb_id match
|
||||
- tasks: search all recently added missing: fix attribute access on missing stored subtitle info
|
||||
- providers: add hosszupuska (hungarian, thanks morpheus133 for the basic implementation)
|
||||
- providers: add argenteam (spanish, thanks mmiraglia for the basic implementation)
|
||||
- providers: addic7ed: use random user agent by default (enforce for existing configs)
|
||||
- providers: enable subscene by default
|
||||
- providers: opensubtitles: add fallback for dict based query response in contrast to list/array based
|
||||
- advanced settings: make text-based-subtitle-formats configurable
|
||||
- menu: submod: inverse-reverse subtitle timing time-choices for better accessibility
|
||||
- submod: reduce log spam in case of debug logs enabled
|
||||
- submod: style tags could result in no output at all
|
||||
- submod: fix empty content if only non-line-mods were used, no line-mods; fixes #449
|
||||
- submod: HI: correctly handle style tags when checking for brackets
|
||||
- submod: HI: don't remove anything that's surrounded by quotes
|
||||
- submod: HI: double or triple dash is em dash
|
||||
- submod: HI: HI_before_colon_noncaps, don't assume single quotes are sentence enders
|
||||
- submod: common: don't uppercase after abbreviations
|
||||
- submod: common: don't break phone numbers (more than one spaced number pair found)
|
||||
- submod: common: also count lines only consisting of dots as removable
|
||||
- submod: common: replace more than 3 consecutive dots with 3 dots
|
||||
- submod: OCR: "H i." = "Hi."
|
||||
|
||||
|
||||
2.5.0.2287
|
||||
|
||||
- core: reduce main icon size
|
||||
- core: fix usage on NVIDIA SHIELD (hopefully, please report back), #441
|
||||
- core: add scandir fallback to listdir in case of badly configured locale in environment, #441, #440
|
||||
- core: get subtitles from archive: don't assume an episode match
|
||||
- core: get subtitles from archive: don't assume any attributes in guess
|
||||
- core: improve release group detection for drone/filebot/file_info refiners
|
||||
- core: fix language detection for embedded subtitle streams
|
||||
- core: support extraction of embedded mov_text subtitles in mp4 video files
|
||||
- refiners: drone: add http:// to url if not given
|
||||
- providers: opensubtitles: retry/reinitialize request when encountering ResponseNotReady
|
||||
- config: clarify subscene being only enabled for TV series by default
|
||||
- menu: when encountering permission errors when scanning media files, warn in the menu about them
|
||||
- submod: common: don't break -- addic7ed --
|
||||
- submod: common: remove lines that consist only of dash, underscore
|
||||
- submod: OCR: fix Ls = Is
|
||||
- submod: OCR: fix bad HI colons (ANNOUNCER; instead of ANNOUNCER:)
|
||||
- submod: common: fix lines consisting only of bad music symbols (*#¶ = ♪)
|
||||
- submod: HI: remove music-symbol-only-lines
|
||||
- submod: HI: be less aggressive about lines ending with a colon; please re-apply all your mods via advanced menu
|
||||
- submod: OCR: fix it'sjust, isn'tjust, Iam, Ican
|
||||
|
||||
|
||||
2.5.0.2247
|
||||
- fix ignoring by-hash-matched episodes
|
||||
|
||||
|
||||
2.5.0.2241
|
||||
|
||||
- fix issue when removing crap from filenames to not accidentally remove release group #436
|
||||
- fix initialization of soft ignore list after upgrade fron 2.0
|
||||
|
||||
|
||||
2.5.0.2221
|
||||
|
||||
- refiners: add support for retrieving original filename from
|
||||
- drone derivates: sonarr, radarr
|
||||
- filebot
|
||||
- symlinks
|
||||
- file_info meta file lists (see wiki)
|
||||
|
||||
- providers: add subscene (disabled by default to not flood subscene on release)
|
||||
- normal search
|
||||
- season pack search if season has concluded
|
||||
|
||||
- core: add provider subtitle-archive/pack cache for retrieving single subtitles from previously downloaded (season-) packs (subscene)
|
||||
- core/agent: massive performance improvements over 2.0
|
||||
- core/agent/background-tasks: reduce memory usage to a fraction of 2.0
|
||||
- core/providers: add dynamic provider throttling when certain events occur (ServiceUnavailable, too many downloads, ...), to lighten the provider-load
|
||||
- core/agent/config: automatically extract embedded subtitles (and use them if no current subtitle)
|
||||
- core: fix internal subtitle info storage issues
|
||||
- core: always store internal subtitle information even if no subtitle was downloaded (fixes SearchAllRecentlyAddedMissing)
|
||||
- core: fix internal subtitle info storage on windows (gzip handling is broken there)
|
||||
- core: don't fail on missing logfile paths
|
||||
- core: fix default encoding order for non-script-serbian
|
||||
- core: improve logging
|
||||
- core: add AsRequested to cleanup garbage names
|
||||
- core: treat SDTV and HDTV the same when searching for subtitles
|
||||
- core: parse_video: trust PMS season and episode numbers
|
||||
- core: parse_video: add series year information from PMS if none found
|
||||
- core: upgrade dependencies
|
||||
- core: update subliminal to 62cdb3c
|
||||
- core: add new file based cache mechanism, rendering DBM/memory backends obsolete
|
||||
- core: treat 23.980 fps as 23.976 and vice-versa
|
||||
- core: add HTTP proxy support for querying the providers (supports credentials)
|
||||
- core: only compute file hashes for enabled providers
|
||||
- core: massive speedup; refine only when needed, exit early otherwise
|
||||
- core: store last modified timestamp in subtitle info storage
|
||||
- core: only write to subtitle info storage if we haven't had one or any subtitle was downloaded
|
||||
- core: only clean up the sub-folder if a subtitle-sub-folder has been selected, and not the parent one also
|
||||
- core: support for CP437 encoded filenames in ZIP-Archives
|
||||
- core: use scandir library instead of os.listdir if possible, reducing performance-impact
|
||||
- core: archives: support multi-episode subtitles (partly)
|
||||
- core: subtitle cleanup: add support for hi, cc, sdh secondary filename tags; don't autoclean .txt
|
||||
- core: increase request timeout by three times in case a proxy is being used
|
||||
- core: fix language=Unknown in Plex when "Restrict to one language"-setting is set
|
||||
- core: refining: re-add old detected title as alternative title after re-refining with plex metadata's title; fixes #428
|
||||
- core: implement advanced_settings.json (see advanced_settings.json.template for reference, copy to "Plug-in Support/Data/com.plexapp.agents.subzero" to use it)
|
||||
- core/tasks: fix search all recently added missing (the total number of items will change in the menu while running), reduces memory usage
|
||||
- core/menu: add support for extracting embedded subtitles using the builtin plex transcoder
|
||||
- core/menu: skip wrong season or episode in returned subtitle results
|
||||
- core/config: fix language handling if treat undefined as first language is set
|
||||
- providers: remove shooter.cn
|
||||
- providers: add support for zip/rar archives containing more than one subtitle file
|
||||
- submod: common: remove redundant interpunction ("Hello !!!" -> "Hello!")
|
||||
- submod: skip provider hashing when applying mods
|
||||
- submod: correctly drop empty line (fixing broken display)
|
||||
- submod: OCR: fix F'xxxxx -> Fxxxxx
|
||||
- submod: HI: improve bracket matching
|
||||
- submod: OCR: fix l/L instead of I more aggressively
|
||||
- submod: common: fix uppercase I's in lowercase words more aggressively
|
||||
- submod: HI: improve HI_before_colon
|
||||
- submod: common: be more aggressive when fixing numbers; correctly space out spaced ellipses; don't break spaced ellipses; handle multiple spaces in numbers
|
||||
- menu: add support for extracting embedded subtitles for a whole season
|
||||
- menu: add reapply mods to current subtitle
|
||||
- menu: pad titles for more submenus, resulting in detail view in PlexWeb
|
||||
- menu: add subtitle selection submenu (if multiple subtitles are inside the subtitle info storage; e.g. previously downloaded ones or extracted embedded)
|
||||
- menu: advanced: add skip findbettersubtitles menu item, which sets the last_run to now (for debugging purposes)
|
||||
- menu: ignore: add more natural title for seasons and episodes (kills your old ignore lists!)
|
||||
- config: skip provider hashing on low impact mode
|
||||
- config: add limit by air date setting to consider for FindBetterSubtitles task (default: 1 year)
|
||||
- advanced settings: define enabled-for media types per provider
|
||||
- advanced settings: define enabled-for languages per provider
|
||||
- advanced settings: add deep-clean option (clean up the subtitle-sub-folder and the parent one)
|
||||
|
||||
|
||||
|
||||
2.0.33.1871
|
||||
- core: normalize line endings in subtitles to LF (\n)
|
||||
- core: add subtitle storage lock to avoid race condition
|
||||
- core: be more verbose about subtitle storage addition
|
||||
- core: fix MPL2 newline parsing, which resulted in broken subtitles
|
||||
- core: encoding change: reduce log spam
|
||||
- submod: common: fix CM_starting_spacedots
|
||||
- opensubtitles: fix request/response handling
|
||||
|
||||
|
||||
|
||||
2.0.33.1849
|
||||
- opensubtitles: add VIP server handling + preference; VIP benefits: 10€/year, ad-free subs, 1000 subs/day, no-cache VIP server, help SZ and subscribe via http://v.ht/osvip
|
||||
- opensubtitles: try to reuse previous token instead of logging in every time
|
||||
- core: add throttling between searches (10 seconds)
|
||||
- core: fix IETF handling for good
|
||||
- core: fix no subtitles being searched in certain situations (when an external subtitle without special tag exists)
|
||||
- core: add subtitle blacklist
|
||||
- core: fixes
|
||||
- core: fix detection of certain PMS media stream language tags ("FR" for example)
|
||||
- core: missing subtitles: correctly skip unwanted subtitle extensions
|
||||
- core: missing subtitles: honor "treat undefined as first language" option correctly
|
||||
- api: add blacklisting endpoints for quickly searching for new subtitls via bookmarklet
|
||||
- submod: colors: apply color mods at the end of processing modifications; fix color mods
|
||||
- submod: new remove_tags modification to remove all styling tags from subtitles
|
||||
- submod: HI: be more aggressive at handling brackets
|
||||
- submod: OCR: update en and hrv
|
||||
- submod: common: remove "torrent downloaded from ..." lines
|
||||
- submod: OCR: fix WholeWord handling, improving modification
|
||||
- submod: apply OCR fixes before HI
|
||||
- submod: OCR: fix broken HI tag colons (ANNOUNCER'. instead of ANNOUNCER:)
|
||||
- menu: advanced: speed up batch modifications
|
||||
- menu: add subtitle blacklist
|
||||
- menu: recently played: show only TV episodes and movies (music tracks were listed here as well)
|
||||
|
||||
|
||||
2.0.29.1767
|
||||
- core: fix internal subtitle storage issues
|
||||
- core: handle "embedded-forced" tag (futureproofing)
|
||||
- core: remove more garbage tags from release groups (nzbgeek, chamele0n, buymore, xpost, postbot)
|
||||
- submod: OCR fix: fix music icon = paragraph
|
||||
|
||||
|
||||
2.0.29.1756
|
||||
- core: don't fail on uppercase file extensions
|
||||
- core: don't re-download a subtitle if we already downloaded one, it still physically exists and external subtitles are configured to be ignored
|
||||
|
||||
+113
-34
@@ -1,13 +1,11 @@
|
||||
# coding=utf-8
|
||||
import sys
|
||||
import datetime
|
||||
import time
|
||||
import os
|
||||
|
||||
from subzero.sandbox import restore_builtins
|
||||
from subzero.sandbox import fix_environment_stuff
|
||||
|
||||
module = sys.modules['__main__']
|
||||
restore_builtins(module, {})
|
||||
fix_environment_stuff(module, {})
|
||||
|
||||
globals = getattr(module, "__builtins__")["globals"]
|
||||
for key, value in getattr(module, "__builtins__").iteritems():
|
||||
@@ -27,7 +25,7 @@ from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
|
||||
from interface.menu import *
|
||||
from support.plex_media import media_to_videos, get_media_item_ids
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles, store_subtitle_info
|
||||
from support.storage import save_subtitles, store_subtitle_info, get_subtitle_storage
|
||||
from support.items import is_ignored
|
||||
from support.config import config
|
||||
from support.lib import get_intent
|
||||
@@ -48,6 +46,8 @@ def Start():
|
||||
intent = get_intent()
|
||||
intent.cleanup()
|
||||
|
||||
#Locale.DefaultLocale = "de"
|
||||
|
||||
# clear expired menu history items
|
||||
now = datetime.datetime.now()
|
||||
if "menu_history" in Dict:
|
||||
@@ -116,6 +116,45 @@ def update_local_media(metadata, media, media_type="movies"):
|
||||
pass
|
||||
|
||||
|
||||
def agent_extract_embedded(video_part_map):
|
||||
try:
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
to_extract = []
|
||||
item_count = 0
|
||||
|
||||
for scanned_video, part_info in video_part_map.iteritems():
|
||||
plexapi_item = scanned_video.plexapi_metadata["item"]
|
||||
stored_subs = subtitle_storage.load_or_new(plexapi_item)
|
||||
|
||||
for plexapi_part in get_all_parts(plexapi_item):
|
||||
item_count = item_count + 1
|
||||
for requested_language in config.lang_list:
|
||||
embedded_subs = stored_subs.get_by_provider(plexapi_part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(plexapi_part.id, requested_language)
|
||||
if not embedded_subs:
|
||||
stream_data = get_embedded_subtitle_streams(plexapi_part, requested_language=requested_language,
|
||||
get_forced=config.forced_only)
|
||||
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
to_extract.append(({scanned_video: part_info}, plexapi_part, str(stream.index),
|
||||
str(requested_language), not current))
|
||||
|
||||
if not cast_bool(Prefs["subtitles.search_after_autoextract"]):
|
||||
scanned_video.subtitle_languages.update({requested_language})
|
||||
else:
|
||||
Log.Debug("Skipping embedded subtitle extraction for %s, already got %r from %s",
|
||||
plexapi_item.rating_key, requested_language, embedded_subs[0].id)
|
||||
if to_extract:
|
||||
Log.Info("Triggering extraction of %d embedded subtitles of %d items", len(to_extract), item_count)
|
||||
Thread.Create(multi_extract_embedded, stream_list=to_extract, refresh=True, with_mods=True,
|
||||
single_thread=not config.advanced.auto_extract_multithread)
|
||||
except:
|
||||
Log.Error("Something went wrong when auto-extracting subtitles, continuing: %s", traceback.format_exc())
|
||||
|
||||
|
||||
class SubZeroAgent(object):
|
||||
agent_type = None
|
||||
agent_type_verbose = None
|
||||
@@ -133,7 +172,14 @@ class SubZeroAgent(object):
|
||||
Log.Debug("Sub-Zero %s, %s search" % (config.version, self.agent_type))
|
||||
results.Append(MetadataSearchResult(id='null', score=100))
|
||||
|
||||
def store_blank_subtitle_metadata(self, video_part_map):
|
||||
store_subtitle_info(video_part_map, dict((k, []) for k in video_part_map.keys()), None, mode="a")
|
||||
|
||||
def update(self, metadata, media, lang):
|
||||
if not config.enable_agent:
|
||||
Log.Debug("Skipping Sub-Zero agent(s)")
|
||||
return
|
||||
|
||||
Log.Debug("Sub-Zero %s, %s update called" % (config.version, self.agent_type))
|
||||
intent = get_intent()
|
||||
|
||||
@@ -141,17 +187,6 @@ class SubZeroAgent(object):
|
||||
Log.Error("Called with empty media, something is really wrong with your setup!")
|
||||
return
|
||||
|
||||
# debounce for self.debounce seconds
|
||||
now = datetime.datetime.now()
|
||||
if "last_call" in Dict:
|
||||
last_call = Dict["last_call"]
|
||||
if last_call + datetime.timedelta(seconds=self.debounce) > now:
|
||||
wait = self.debounce - (now - last_call).seconds
|
||||
if wait >= 1:
|
||||
Log.Debug("Waiting %s seconds until continuing", wait)
|
||||
time.sleep(wait)
|
||||
Dict["last_call"] = now
|
||||
|
||||
item_ids = []
|
||||
try:
|
||||
config.init_subliminal_patches()
|
||||
@@ -181,41 +216,81 @@ class SubZeroAgent(object):
|
||||
set_refresh_menu_state(media, media_type=self.agent_type)
|
||||
|
||||
# scanned_video_part_map = {subliminal.Video: plex_part, ...}
|
||||
scanned_video_part_map = scan_videos(videos, kind=self.agent_type)
|
||||
providers = config.get_providers(media_type=self.agent_type)
|
||||
try:
|
||||
scanned_video_part_map = scan_videos(videos, providers=providers)
|
||||
except IOError, e:
|
||||
Log.Exception("Permission error, please check your folder/file permissions. Exiting.")
|
||||
if cast_bool(Prefs["check_permissions"]):
|
||||
config.permissions_ok = False
|
||||
config.missing_permissions = e.message
|
||||
return
|
||||
|
||||
# auto extract embedded
|
||||
if config.embedded_auto_extract:
|
||||
if config.plex_transcoder:
|
||||
agent_extract_embedded(scanned_video_part_map)
|
||||
else:
|
||||
Log.Warning("Plex Transcoder not found, can't auto extract")
|
||||
|
||||
# clear missing subtitles menu data
|
||||
if not scheduler.is_task_running("MissingSubtitles"):
|
||||
scheduler.clear_task_data("MissingSubtitles")
|
||||
|
||||
downloaded_subtitles = None
|
||||
if not config.enable_agent:
|
||||
Log.Debug("Skipping Sub-Zero agent(s)")
|
||||
|
||||
else:
|
||||
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
|
||||
# debounce for self.debounce seconds
|
||||
now = datetime.datetime.now()
|
||||
if "last_call" in Dict:
|
||||
last_call = Dict["last_call"]
|
||||
if last_call + datetime.timedelta(seconds=self.debounce) > now:
|
||||
wait = self.debounce - (now - last_call).seconds
|
||||
if wait >= 1:
|
||||
Log.Debug("Waiting %s seconds until continuing", wait)
|
||||
Thread.Sleep(wait)
|
||||
|
||||
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
|
||||
try:
|
||||
downloaded_subtitles = download_best_subtitles(scanned_video_part_map, min_score=use_score,
|
||||
throttle_time=self.debounce)
|
||||
item_ids = get_media_item_ids(media, kind=self.agent_type)
|
||||
throttle_time=self.debounce, providers=providers)
|
||||
except:
|
||||
Log.Exception("Something went wrong when downloading subtitles")
|
||||
|
||||
if downloaded_subtitles is not None:
|
||||
Dict["last_call"] = datetime.datetime.now()
|
||||
|
||||
item_ids = get_media_item_ids(media, kind=self.agent_type)
|
||||
|
||||
downloaded_any = False
|
||||
if downloaded_subtitles:
|
||||
downloaded_any = any(downloaded_subtitles.values())
|
||||
|
||||
if downloaded_any:
|
||||
save_subtitles(scanned_video_part_map, downloaded_subtitles, mods=config.default_mods)
|
||||
save_successful = False
|
||||
try:
|
||||
save_successful = save_subtitles(scanned_video_part_map, downloaded_subtitles,
|
||||
mods=config.default_mods)
|
||||
except:
|
||||
Log.Exception("Something went wrong when saving subtitles")
|
||||
|
||||
track_usage("Subtitle", "refreshed", "download", 1)
|
||||
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
# store item(s) in history
|
||||
for subtitle in video_subtitles:
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle)
|
||||
# store SZ meta info even if download wasn't successful
|
||||
if not save_successful:
|
||||
self.store_blank_subtitle_metadata(scanned_video_part_map)
|
||||
|
||||
else:
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
# store item(s) in history
|
||||
for subtitle in video_subtitles:
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle)
|
||||
history.destroy()
|
||||
else:
|
||||
# store subtitle info even if we've downloaded none
|
||||
store_subtitle_info(scanned_video_part_map, dict((k, []) for k in scanned_video_part_map.keys()),
|
||||
None, mode="a")
|
||||
# store SZ meta info even if we've downloaded none
|
||||
self.store_blank_subtitle_metadata(scanned_video_part_map)
|
||||
|
||||
update_local_media(metadata, media, media_type=self.agent_type)
|
||||
|
||||
@@ -232,6 +307,10 @@ class SubZeroAgent(object):
|
||||
|
||||
Dict.Save()
|
||||
|
||||
# fsync cache
|
||||
if config.new_style_cache:
|
||||
config.sync_cache()
|
||||
|
||||
|
||||
class SubZeroSubtitlesAgentMovies(SubZeroAgent, Agent.Movies):
|
||||
contributes_to = ['com.plexapp.agents.imdb', 'com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.themoviedb', 'com.plexapp.agents.hama']
|
||||
|
||||
@@ -8,7 +8,7 @@ import urlparse
|
||||
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
|
||||
from subzero.lib.io import FileIO
|
||||
from subzero.constants import PREFIX, PLUGIN_IDENTIFIER
|
||||
@@ -20,74 +20,98 @@ from support.lib import Plex
|
||||
from support.storage import reset_storage, log_storage, get_subtitle_storage
|
||||
from support.scheduler import scheduler
|
||||
from support.items import set_mods_for_part, get_item_kind_from_rating_key
|
||||
from support.i18n import _
|
||||
|
||||
|
||||
@route(PREFIX + '/advanced')
|
||||
def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
oc = SubFolderObjectContainer(header=header or "Internal stuff, pay attention!", message=message, no_cache=True,
|
||||
no_history=True,
|
||||
replace_parent=False, title2="Advanced")
|
||||
oc = SubFolderObjectContainer(
|
||||
header=header or _("Internal stuff, pay attention!"),
|
||||
message=message,
|
||||
no_cache=True,
|
||||
no_history=True,
|
||||
replace_parent=False,
|
||||
title2=_("Advanced"))
|
||||
|
||||
if config.lock_advanced_menu and not config.pin_correct:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), success_go_to="advanced"),
|
||||
title=pad_title("Enter PIN"),
|
||||
summary="The owner has restricted the access to this menu. Please enter the correct pin",
|
||||
key=Callback(
|
||||
PinMenu,
|
||||
randomize=timestamp(),
|
||||
success_go_to=_("advanced")),
|
||||
title=pad_title(_("Enter PIN")),
|
||||
summary=_("The owner has restricted the access to this menu. Please enter the correct pin"),
|
||||
))
|
||||
return oc
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerRestart, randomize=timestamp()),
|
||||
title=pad_title("Restart the plugin"),
|
||||
title=pad_title(_("Restart the plugin")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(GetLogsLink),
|
||||
title="Get my logs (copy the appearing link and open it in your browser, please)",
|
||||
summary="Copy the appearing link and open it in your browser, please",
|
||||
title=_("Get my logs (copy the appearing link and open it in your browser, please)"),
|
||||
summary=_("Copy the appearing link and open it in your browser, please"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerBetterSubtitles, randomize=timestamp()),
|
||||
title=pad_title("Trigger find better subtitles"),
|
||||
title=pad_title(_("Trigger find better subtitles")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SkipFindBetterSubtitles, randomize=timestamp()),
|
||||
title=pad_title(_("Skip next find better subtitles (sets last run to now)")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerStorageMaintenance, randomize=timestamp()),
|
||||
title=pad_title("Trigger subtitle storage maintenance"),
|
||||
title=pad_title(_("Trigger subtitle storage maintenance")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerStorageMigration, randomize=timestamp()),
|
||||
title=pad_title("Trigger subtitle storage migration (expensive)"),
|
||||
title=pad_title(_("Trigger subtitle storage migration (expensive)")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerCacheMaintenance, randomize=timestamp()),
|
||||
title=pad_title(_("Trigger cache maintenance (refiners, providers and packs/archives)")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ApplyDefaultMods, randomize=timestamp()),
|
||||
title=pad_title("Apply configured default subtitle mods to all (active) stored subtitles"),
|
||||
title=pad_title(_("Apply configured default subtitle mods to all (active) stored subtitles")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ReApplyMods, randomize=timestamp()),
|
||||
title=pad_title("Re-Apply mods of all stored subtitles"),
|
||||
title=pad_title(_("Re-Apply mods of all stored subtitles")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key="tasks", randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's scheduled tasks state storage"),
|
||||
title=pad_title(_("Log the plugin's scheduled tasks state storage")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key="ignore", randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's internal ignorelist storage"),
|
||||
title=pad_title(_("Log the plugin's internal ignorelist storage")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(LogStorage, key=None, randomize=timestamp()),
|
||||
title=pad_title("Log the plugin's complete state storage"),
|
||||
title=pad_title(_("Log the plugin's complete state storage")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="tasks", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's scheduled tasks state storage"),
|
||||
title=pad_title(_("Reset the plugin's scheduled tasks state storage")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="ignore", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's internal ignorelist storage"),
|
||||
title=pad_title(_("Reset the plugin's internal ignorelist storage")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="menu_history", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's menu history storage"),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(InvalidateCache, randomize=timestamp()),
|
||||
title=pad_title("Invalidate Sub-Zero metadata caches (subliminal)"),
|
||||
title=pad_title(_("Invalidate Sub-Zero metadata caches (subliminal)")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetProviderThrottle, randomize=timestamp()),
|
||||
title=pad_title(_("Reset provider throttle states")),
|
||||
))
|
||||
return oc
|
||||
|
||||
@@ -99,15 +123,20 @@ def DispatchRestart():
|
||||
@route(PREFIX + '/advanced/restart/trigger')
|
||||
@debounce
|
||||
def TriggerRestart(randomize=None):
|
||||
set_refresh_menu_state("Restarting the plugin")
|
||||
set_refresh_menu_state(_("Restarting the plugin"))
|
||||
DispatchRestart()
|
||||
return fatality(header="Restart triggered, please wait about 5 seconds", force_title=" ", only_refresh=True,
|
||||
replace_parent=True,
|
||||
no_history=True, randomize=timestamp())
|
||||
return fatality(
|
||||
header=_("Restart triggered, please wait about 5 seconds"),
|
||||
force_title=" ",
|
||||
only_refresh=True,
|
||||
replace_parent=True,
|
||||
no_history=True,
|
||||
randomize=timestamp())
|
||||
|
||||
|
||||
@route(PREFIX + '/advanced/restart/execute')
|
||||
def Restart():
|
||||
@debounce
|
||||
def Restart(randomize=None):
|
||||
Plex[":/plugins"].restart(PLUGIN_IDENTIFIER)
|
||||
|
||||
|
||||
@@ -115,10 +144,17 @@ def Restart():
|
||||
@debounce
|
||||
def ResetStorage(key, randomize=None, sure=False):
|
||||
if not sure:
|
||||
oc = SubFolderObjectContainer(no_history=True, title1="Reset subtitle storage", title2="Are you sure?")
|
||||
oc = SubFolderObjectContainer(
|
||||
no_history=True,
|
||||
title1=_("Reset subtitle storage"),
|
||||
title2=_("Are you sure?"))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key=key, sure=True, randomize=timestamp()),
|
||||
title=pad_title("Are you really sure?"),
|
||||
key=Callback(
|
||||
ResetStorage,
|
||||
key=key,
|
||||
sure=True,
|
||||
randomize=timestamp()),
|
||||
title=pad_title(_("Are you really sure?")),
|
||||
|
||||
))
|
||||
return oc
|
||||
@@ -132,8 +168,8 @@ def ResetStorage(key, randomize=None, sure=False):
|
||||
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Information Storage (%s) reset' % key
|
||||
header=_("Success"),
|
||||
message=_("Information Storage (%s) reset", key)
|
||||
)
|
||||
|
||||
|
||||
@@ -142,8 +178,8 @@ def LogStorage(key, randomize=None):
|
||||
log_storage(key)
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Information Storage (%s) logged' % key
|
||||
header=_("Success"),
|
||||
message=_("Information Storage (%s) logged", key)
|
||||
)
|
||||
|
||||
|
||||
@@ -153,8 +189,21 @@ def TriggerBetterSubtitles(randomize=None):
|
||||
scheduler.dispatch_task("FindBetterSubtitles")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='FindBetterSubtitles triggered'
|
||||
header=_("Success"),
|
||||
message=_("FindBetterSubtitles triggered")
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/skipbetter')
|
||||
@debounce
|
||||
def SkipFindBetterSubtitles(randomize=None):
|
||||
task = scheduler.task("FindBetterSubtitles")
|
||||
task.last_run = datetime.datetime.now()
|
||||
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header=_("Success"),
|
||||
message=_("FindBetterSubtitles skipped")
|
||||
)
|
||||
|
||||
|
||||
@@ -164,8 +213,8 @@ def TriggerStorageMaintenance(randomize=None):
|
||||
scheduler.dispatch_task("SubtitleStorageMaintenance")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='SubtitleStorageMaintenance triggered'
|
||||
header=_("Success"),
|
||||
message=_("SubtitleStorageMaintenance triggered")
|
||||
)
|
||||
|
||||
|
||||
@@ -175,8 +224,19 @@ def TriggerStorageMigration(randomize=None):
|
||||
scheduler.dispatch_task("MigrateSubtitleStorage")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='MigrateSubtitleStorage triggered'
|
||||
header=_("Success"),
|
||||
message=_("MigrateSubtitleStorage triggered")
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/triggercachemaintenance')
|
||||
@debounce
|
||||
def TriggerCacheMaintenance(randomize=None):
|
||||
scheduler.dispatch_task("CacheMaintenance")
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header=_("Success"),
|
||||
message=_("TriggerCacheMaintenance triggered")
|
||||
)
|
||||
|
||||
|
||||
@@ -228,8 +288,8 @@ def ApplyDefaultMods(randomize=None):
|
||||
Thread.CreateTimer(1.0, apply_default_mods)
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='This may take some time ...'
|
||||
header=_("Success"),
|
||||
message=_("This may take some time ...")
|
||||
)
|
||||
|
||||
|
||||
@@ -239,17 +299,20 @@ def ReApplyMods(randomize=None):
|
||||
Thread.CreateTimer(1.0, apply_default_mods, reapply_current=True)
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='This may take some time ...'
|
||||
header=_("Success"),
|
||||
message=_("This may take some time ...")
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/get_logs_link')
|
||||
def GetLogsLink():
|
||||
if not config.plex_token:
|
||||
oc = ObjectContainer(title2="Download Logs", no_cache=True, no_history=True,
|
||||
header="Sorry, feature unavailable",
|
||||
message="Universal Plex token not available")
|
||||
oc = ObjectContainer(
|
||||
title2=_("Download Logs"),
|
||||
no_cache=True,
|
||||
no_history=True,
|
||||
header=_("Sorry, feature unavailable"),
|
||||
message=_("Universal Plex token not available"))
|
||||
return oc
|
||||
|
||||
# try getting the link base via the request in context, first, otherwise use the public ip
|
||||
@@ -274,9 +337,12 @@ def GetLogsLink():
|
||||
Log.Debug("Using ip-based fallback link_base")
|
||||
|
||||
logs_link = "%s%s?X-Plex-Token=%s" % (link_base, PREFIX + '/logs', config.plex_token)
|
||||
oc = ObjectContainer(title2=logs_link, no_cache=True, no_history=True,
|
||||
header="Copy this link and open this in your browser, please",
|
||||
message=logs_link)
|
||||
oc = ObjectContainer(
|
||||
title2=logs_link,
|
||||
no_cache=True,
|
||||
no_history=True,
|
||||
header=_("Copy this link and open this in your browser, please"),
|
||||
message=logs_link)
|
||||
return oc
|
||||
|
||||
|
||||
@@ -300,35 +366,51 @@ def DownloadLogs():
|
||||
@debounce
|
||||
def InvalidateCache(randomize=None):
|
||||
from subliminal.cache import region
|
||||
region.invalidate()
|
||||
if config.new_style_cache:
|
||||
region.backend.clear()
|
||||
else:
|
||||
region.invalidate()
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header='Success',
|
||||
message='Cache invalidated'
|
||||
header=_("Success"),
|
||||
message=_("Cache invalidated")
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/pin')
|
||||
def PinMenu(pin="", randomize=None, success_go_to="channel"):
|
||||
oc = ObjectContainer(title2="Enter PIN number %s" % (len(pin) + 1), no_cache=True, no_history=True,
|
||||
skip_pin_lock=True)
|
||||
oc = ObjectContainer(
|
||||
title2=_("Enter PIN number ") + str(len(pin) + 1),
|
||||
no_cache=True,
|
||||
no_history=True,
|
||||
skip_pin_lock=True)
|
||||
|
||||
if pin == config.pin:
|
||||
Dict["pin_correct_time"] = datetime.datetime.now()
|
||||
config.locked = False
|
||||
if success_go_to == "channel":
|
||||
return fatality(force_title="PIN correct", header="PIN correct", no_history=True)
|
||||
return fatality(
|
||||
force_title=_("PIN correct"),
|
||||
header=_("PIN correct"),
|
||||
no_history=True)
|
||||
elif success_go_to == "advanced":
|
||||
return AdvancedMenu(randomize=timestamp())
|
||||
|
||||
for i in range(10):
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), pin=pin + str(i), success_go_to=success_go_to),
|
||||
key=Callback(
|
||||
PinMenu,
|
||||
randomize=timestamp(),
|
||||
pin=pin + str(i),
|
||||
success_go_to=success_go_to),
|
||||
title=pad_title(str(i)),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp(), success_go_to=success_go_to),
|
||||
title=pad_title("Reset"),
|
||||
key=Callback(
|
||||
PinMenu,
|
||||
randomize=timestamp(),
|
||||
success_go_to=success_go_to),
|
||||
title=pad_title(_("Reset")),
|
||||
))
|
||||
return oc
|
||||
|
||||
@@ -337,4 +419,15 @@ def PinMenu(pin="", randomize=None, success_go_to="channel"):
|
||||
def ClearPin(randomize=None):
|
||||
Dict["pin_correct_time"] = None
|
||||
config.locked = True
|
||||
return fatality(force_title="Menu locked", header=" ", no_history=True)
|
||||
return fatality(force_title=_("Menu locked"), header=" ", no_history=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/reset_throttle')
|
||||
def ResetProviderThrottle(randomize=None):
|
||||
Dict["provider_throttle"] = {}
|
||||
Dict.Save()
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header=_("Success"),
|
||||
message=_("Provider throttles reset")
|
||||
)
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
# coding=utf-8
|
||||
|
||||
import datetime
|
||||
import operator
|
||||
|
||||
from support.config import config
|
||||
from support.helpers import timestamp
|
||||
|
||||
|
||||
def enable_channel_wrapper(func):
|
||||
@@ -25,3 +29,157 @@ def enable_channel_wrapper(func):
|
||||
return (func if (config.enable_channel or enforce_route) else noop)(*args, **kwargs)
|
||||
|
||||
return wrap
|
||||
|
||||
|
||||
ROUTE_REGISTRY = {}
|
||||
|
||||
|
||||
def get_func_name(args):
|
||||
return list(args).pop(0).__name__
|
||||
|
||||
|
||||
def get_lookup_key(f, args, kwargs):
|
||||
return tuple([f.__name__, tuple(args), tuple([(key, value) for key, value in kwargs.iteritems()])])
|
||||
|
||||
|
||||
def should_debounce(f, key, kw):
|
||||
return getattr(f, "debounce", False) and "randomize" in kw and key in Dict["menu_history"]
|
||||
|
||||
|
||||
def register_route_function(f):
|
||||
fn = f.__name__
|
||||
if fn != "ValidatePrefs" and fn not in ROUTE_REGISTRY:
|
||||
ROUTE_REGISTRY[fn] = f
|
||||
return f
|
||||
|
||||
|
||||
def main_menu_fallback():
|
||||
key = get_lookup_key(ROUTE_REGISTRY["fatality"], [], {})
|
||||
Dict["last_menu_item"] = key
|
||||
add_to_menu_history(key)
|
||||
|
||||
return ROUTE_REGISTRY["fatality"](randomize=timestamp())
|
||||
|
||||
|
||||
def add_to_menu_history(key):
|
||||
# add function to menu history
|
||||
mh = Dict["menu_history"]
|
||||
if key in mh:
|
||||
del mh[key]
|
||||
|
||||
mh[key] = datetime.datetime.now() + datetime.timedelta(hours=6)
|
||||
|
||||
# limit to 25 items
|
||||
Dict["menu_history"] = dict(sorted(sorted(mh.items(), key=operator.itemgetter(1),
|
||||
reverse=True)[:25]))
|
||||
|
||||
try:
|
||||
Dict.Save()
|
||||
except TypeError:
|
||||
Log.Error("Can't save menu history for: %r", key)
|
||||
del Dict["menu_history"][key]
|
||||
|
||||
|
||||
def route_wrapper(*args, **kwargs):
|
||||
def wrap(f):
|
||||
already_wrapped = getattr(f, "orig_f", False)
|
||||
|
||||
register_route_function(f)
|
||||
|
||||
def inner(*a, **kw):
|
||||
if "menu_history" not in Dict:
|
||||
Dict["menu_history"] = {}
|
||||
|
||||
if "last_menu_item" not in Dict:
|
||||
Dict["last_menu_item"] = None
|
||||
|
||||
key = get_lookup_key(f, list(a), kw)
|
||||
|
||||
ret_f = f
|
||||
ret_a = a
|
||||
ret_kw = kw
|
||||
# mh = Dict["menu_history"]
|
||||
# mh_keys = [k for k, v in sorted(mh.items(), key=operator.itemgetter(1))]
|
||||
#
|
||||
# fallback_needed = False
|
||||
# fallback_found = False
|
||||
|
||||
if should_debounce(ret_f, key, kw):
|
||||
# special case for TriggerRestart
|
||||
if ret_f.__name__ in ("TriggerRestart", "Restart"):
|
||||
Log.Debug("Don't trigger a re-restart, falling back to main menu")
|
||||
else:
|
||||
Log.Debug("not triggering %s twice with %s, %s, returning to main menu" %
|
||||
(f.__name__, a, kw))
|
||||
|
||||
return main_menu_fallback()
|
||||
#
|
||||
# fallback_needed = True
|
||||
#
|
||||
# # try to find a suitable fallback route in case we've encountered an already visited
|
||||
# # debounced route
|
||||
# fallbacks = []
|
||||
# current_last_visit = mh[key]
|
||||
# last_menu_item = Dict["last_menu_item"]
|
||||
# direction_backwards = True
|
||||
#
|
||||
# if last_menu_item and last_menu_item in mh and key in mh:
|
||||
# last_mi_pos = mh_keys.index(last_menu_item)
|
||||
# current_mi_pos = mh_keys.index(key)
|
||||
# if current_mi_pos > -1 and last_mi_pos > -1:
|
||||
# print "SHEKEL", current_mi_pos, last_mi_pos, current_mi_pos < last_mi_pos
|
||||
|
||||
# only consider items in menu history that have an older timestamp than the current
|
||||
# for key_, last_visit in sorted(mh.items(), key=operator.itemgetter(1),
|
||||
# reverse=True):
|
||||
# if last_visit < current_last_visit:
|
||||
# fallbacks.append(key_)
|
||||
#
|
||||
# for key_ in fallbacks:
|
||||
# # old data structure
|
||||
# if not len(key_) == 3 or not (isinstance(key_[1], tuple) and isinstance(key_[2], tuple)):
|
||||
# continue
|
||||
#
|
||||
# old_f, old_a, old_kw = key_
|
||||
# if old_f == "ValidatePrefs":
|
||||
# continue
|
||||
#
|
||||
# possible_fallback = ROUTE_REGISTRY[old_f]
|
||||
#
|
||||
# # non-debounced function found
|
||||
# if not getattr(possible_fallback, "debounce", False):
|
||||
# ret_kw = dict(old_kw)
|
||||
# ret_a = old_a
|
||||
# if "randomize" in ret_kw:
|
||||
# ret_kw["randomize"] = timestamp()
|
||||
#
|
||||
# ret_f = possible_fallback
|
||||
# key = get_lookup_key(ret_f, list(ret_a), ret_kw)
|
||||
# fallback_found = True
|
||||
#
|
||||
# Log.Debug("not triggering %s twice with %s, %s, returning to %s, %s, %s" %
|
||||
# (f.__name__, a, kw, ret_f.__name__, ret_a, ret_kw))
|
||||
#
|
||||
# break
|
||||
#
|
||||
# if not fallback_found:
|
||||
# Log.Debug("No fallback found in menu history for %s, falling back to main menu", f)
|
||||
# return main_menu_fallback()
|
||||
|
||||
# if not fallback_needed:
|
||||
# add_to_menu_history(key)
|
||||
# if ret_f.__name__ != "ValidatePrefs":
|
||||
# Dict["last_menu_item"] = key
|
||||
#
|
||||
add_to_menu_history(key)
|
||||
Dict["last_menu_item"] = key
|
||||
return ret_f(*ret_a, **ret_kw)
|
||||
|
||||
# @route may be used multiple times
|
||||
if not already_wrapped:
|
||||
inner.orig_f = f
|
||||
|
||||
return enable_channel_wrapper(route(*args, **kwargs))(inner)
|
||||
return enable_channel_wrapper(route(*args, **kwargs))(f)
|
||||
|
||||
return wrap
|
||||
|
||||
@@ -1,28 +1,29 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
|
||||
from sub_mod import SubtitleModificationsMenu
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_ignore_options, get_item_task_data, \
|
||||
set_refresh_menu_state, route
|
||||
set_refresh_menu_state, route, extract_embedded_sub
|
||||
|
||||
from refresh_item import RefreshItem
|
||||
from subzero.constants import PREFIX
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, df, get_language, display_language
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub, get_item_title
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import timestamp, df, get_language, display_language, get_language_from_stream
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub, get_item_title, save_stored_sub
|
||||
from support.plex_media import get_plex_metadata, get_part, get_embedded_subtitle_streams
|
||||
from support.scanning import scan_videos
|
||||
from support.scheduler import scheduler
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.i18n import _
|
||||
|
||||
|
||||
# fixme: needs kwargs cleanup
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}/actions')
|
||||
@debounce
|
||||
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None):
|
||||
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None, header=None,
|
||||
message=None):
|
||||
"""
|
||||
displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
|
||||
:param rating_key:
|
||||
@@ -40,14 +41,23 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
|
||||
timeout = 30
|
||||
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
|
||||
oc = SubFolderObjectContainer(
|
||||
title2=title,
|
||||
replace_parent=True,
|
||||
header=header,
|
||||
message=message)
|
||||
|
||||
if not item:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=rating_key, title=title, base_title=base_title,
|
||||
item_title=item_title, randomize=timestamp()),
|
||||
title=u"Item not found: %s!" % item_title,
|
||||
summary="Plex didn't return any information about the item, please refresh it and come back later",
|
||||
key=Callback(
|
||||
ItemDetailsMenu,
|
||||
rating_key=rating_key,
|
||||
title=title,
|
||||
base_title=base_title,
|
||||
item_title=item_title,
|
||||
randomize=timestamp()),
|
||||
title=_(u"Item not found: %s!", item_title),
|
||||
summary=_("Plex didn't return any information about the item, please refresh it and come back later"),
|
||||
thumb=default_thumb
|
||||
))
|
||||
return oc
|
||||
@@ -59,26 +69,37 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
season = get_item(item.season.rating_key)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(MetadataMenu, rating_key=season.rating_key, title=season.title, base_title=show.title,
|
||||
previous_item_type="show", previous_rating_key=show.rating_key,
|
||||
display_items=True, randomize=timestamp()),
|
||||
title=u"< Back to %s" % season.title,
|
||||
summary="Back to %s > %s" % (show.title, season.title),
|
||||
key=Callback(
|
||||
MetadataMenu,
|
||||
rating_key=season.rating_key,
|
||||
title=season.title,
|
||||
base_title=show.title,
|
||||
previous_item_type="show",
|
||||
previous_rating_key=show.rating_key,
|
||||
display_items=True,
|
||||
randomize=timestamp()),
|
||||
title=_(u"< Back to %s", season.title),
|
||||
summary=_("Back to %s > %s", show.title, season.title),
|
||||
thumb=season.thumb or default_thumb
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, randomize=timestamp(),
|
||||
timeout=timeout * 1000),
|
||||
title=u"Refresh: %s" % item_title,
|
||||
summary="Refreshes the %s, possibly searching for missing and picking up new subtitles on disk" % current_kind,
|
||||
key=Callback(
|
||||
RefreshItem,
|
||||
rating_key=rating_key,
|
||||
item_title=item_title,
|
||||
randomize=timestamp(),
|
||||
timeout=timeout * 1000),
|
||||
title=_(u"Refresh: %s", item_title),
|
||||
summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up "
|
||||
"new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind)),
|
||||
thumb=item.thumb or default_thumb
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(),
|
||||
timeout=timeout * 1000),
|
||||
title=u"Force-find subtitles: %s" % item_title,
|
||||
summary="Issues a forced refresh, ignoring known subtitles and searching for new ones",
|
||||
title=_(u"Force-find subtitles: %(item_title)s", item_title=item_title),
|
||||
summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones"),
|
||||
thumb=item.thumb or default_thumb
|
||||
))
|
||||
|
||||
@@ -98,6 +119,12 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
part_id = str(part.id)
|
||||
part_index += 1
|
||||
|
||||
part_index_addon = u""
|
||||
part_summary_addon = u""
|
||||
if has_multiple_parts:
|
||||
part_index_addon = _(u"File %(file_part_index)s: ", file_part_index=part_index)
|
||||
part_summary_addon = u"%s " % filename
|
||||
|
||||
# iterate through all configured languages
|
||||
for lang in config.lang_list:
|
||||
# get corresponding stored subtitle data for that media part (physical media item), for language
|
||||
@@ -105,23 +132,22 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
current_sub_id = None
|
||||
current_sub_provider_name = None
|
||||
|
||||
part_index_addon = ""
|
||||
part_summary_addon = ""
|
||||
if has_multiple_parts:
|
||||
part_index_addon = u"File %s: " % part_index
|
||||
part_summary_addon = "%s " % filename
|
||||
|
||||
summary = u"%sNo current subtitle in storage" % part_summary_addon
|
||||
summary = _(u"%(part_summary)sNo current subtitle in storage", part_summary=part_summary_addon)
|
||||
current_score = None
|
||||
if current_sub:
|
||||
current_sub_id = current_sub.id
|
||||
current_sub_provider_name = current_sub.provider_name
|
||||
current_score = current_sub.score
|
||||
|
||||
summary = u"%sCurrent subtitle: %s (added: %s, %s), Language: %s, Score: %i, Storage: %s" % \
|
||||
(part_summary_addon, current_sub.provider_name, df(current_sub.date_added),
|
||||
current_sub.mode_verbose, display_language(lang), current_sub.score,
|
||||
current_sub.storage_type)
|
||||
summary = _(u"%(part_summary)sCurrent subtitle: %(provider_name)s (added: %(date_added)s, "
|
||||
u"%(mode)s), Language: %(language)s, Score: %(score)i, Storage: %(storage_type)s",
|
||||
part_summary=part_summary_addon,
|
||||
provider_name=current_sub.provider_name,
|
||||
date_added=df(current_sub.date_added),
|
||||
mode=current_sub.mode_verbose,
|
||||
language=display_language(lang),
|
||||
score=current_sub.score,
|
||||
storage_type=current_sub.storage_type)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
@@ -130,7 +156,8 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"%sActions for %s subtitle" % (part_index_addon, display_language(lang)),
|
||||
title=_(u"%(part_summary)sManage %(language)s subtitle", part_summary=part_index_addon,
|
||||
language=display_language(lang)),
|
||||
summary=summary
|
||||
))
|
||||
else:
|
||||
@@ -141,65 +168,177 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"%sList %s subtitles" % (part_index_addon, display_language(lang)),
|
||||
title=_(u"%(part_summary)sList %(language)s subtitles", part_summary=part_index_addon,
|
||||
language=display_language(lang)),
|
||||
summary=summary
|
||||
))
|
||||
|
||||
add_ignore_options(oc, "videos", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
if config.plex_transcoder:
|
||||
# embedded subtitles
|
||||
embedded_count = 0
|
||||
embedded_langs = []
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
|
||||
lang = get_language_from_stream(stream.language_code)
|
||||
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = list(config.lang_list)[0]
|
||||
|
||||
if lang:
|
||||
embedded_langs.append(lang)
|
||||
embedded_count += 1
|
||||
|
||||
if embedded_count:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListEmbeddedSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_type=plex_item.type, item_title=item_title, base_title=base_title,
|
||||
randomize=timestamp()),
|
||||
title=_(u"%(part_summary)sEmbedded subtitles (%(languages)s)",
|
||||
part_summary=part_index_addon,
|
||||
languages=", ".join(display_language(l) for l in set(embedded_langs))),
|
||||
summary=_(u"Extract and activate embedded subtitle streams")
|
||||
))
|
||||
|
||||
ignore_title = item_title
|
||||
if current_kind == "episode":
|
||||
ignore_title = get_item_title(item)
|
||||
add_ignore_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}')
|
||||
@debounce
|
||||
def SubtitleOptionsMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True, header=kwargs.get("header"),
|
||||
message=kwargs.get("message"))
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
current_data = kwargs["current_data"]
|
||||
current_data = unicode(kwargs["current_data"])
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
subs_count = stored_subs.count(part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
title=kwargs["title"], randomize=timestamp()),
|
||||
title=u"< Back to %s" % kwargs["title"],
|
||||
summary=kwargs["current_data"],
|
||||
title=_(u"< Back to %s", kwargs["title"]),
|
||||
summary=current_data,
|
||||
thumb=default_thumb
|
||||
))
|
||||
if subs_count:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListStoredSubsForItemMenu, randomize=timestamp(), **kwargs),
|
||||
title=_(u"Select active %(language)s subtitle", language=kwargs["language_name"]),
|
||||
summary=_(u"%(count)d subtitles in storage", count=subs_count)
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"List %s subtitles" % kwargs["language_name"],
|
||||
summary=kwargs["current_data"]
|
||||
title=_(u"List available %(language)s subtitles", language=kwargs["language_name"]),
|
||||
summary=current_data
|
||||
))
|
||||
if current_sub:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Modify %s subtitle" % kwargs["language_name"],
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
title=_(u"Modify current %(language)s subtitle", language=kwargs["language_name"]),
|
||||
summary=_(u"Currently applied mods: %(mod_list)s",
|
||||
mod_list=(", ".join(current_sub.mods) if current_sub.mods else "none"))
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(BlacklistSubtitleMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Blacklist %s subtitle and search for a new one" % kwargs["language_name"],
|
||||
summary=current_data
|
||||
))
|
||||
if current_sub.provider_name != "embedded":
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(BlacklistSubtitleMenu, randomize=timestamp(), **kwargs),
|
||||
title=_(u"Blacklist current %(language)s subtitle and search for a new one",
|
||||
language=kwargs["language_name"]),
|
||||
summary=current_data
|
||||
))
|
||||
|
||||
current_bl, subs = stored_subs.get_blacklist(part_id, language)
|
||||
if current_bl:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ManageBlacklistMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Manage blacklist (%s contained)" % len(current_bl),
|
||||
summary=u"Inspect currently blacklisted subtitles"
|
||||
title=_(u"Manage blacklist (%(amount)s contained)", amount=len(current_bl)),
|
||||
summary=_(u"Inspect currently blacklisted subtitles")
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/list_stored_subs/{rating_key}/{part_id}')
|
||||
def ListStoredSubsForItemMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = Language.fromietf(kwargs["language"])
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
all_subs = stored_subs.get_all(part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
for key, subtitle in sorted(filter(lambda x: x[0] not in ("current", "blacklist"), all_subs.items()),
|
||||
key=lambda x: x[1].date_added, reverse=True):
|
||||
is_current = key == all_subs["current"]
|
||||
|
||||
summary = _(u"added: %(date_added)s, %(mode)s, Language: %(language)s, Score: %(score)i, Storage: "
|
||||
u"%(storage_type)s",
|
||||
date_added=df(subtitle.date_added),
|
||||
mode=subtitle.mode_verbose,
|
||||
language=display_language(language),
|
||||
score=subtitle.score,
|
||||
storage_type=subtitle.storage_type)
|
||||
|
||||
sub_name = subtitle.provider_name
|
||||
if sub_name == "embedded":
|
||||
sub_name += " (%s)" % subtitle.id
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SelectStoredSubForItemMenu, randomize=timestamp(), sub_key="__".join(key), **kwargs),
|
||||
title=_(u"%(current_state)s%(subtitle_name)s, Score: %(score)s",
|
||||
current_state=_("Current: ") if is_current else _("Stored: "),
|
||||
subtitle_name=sub_name,
|
||||
score=subtitle.score),
|
||||
summary=summary
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/set_current_sub/{rating_key}/{part_id}')
|
||||
@debounce
|
||||
def SelectStoredSubForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = Language.fromietf(kwargs["language"])
|
||||
item_type = kwargs["item_type"]
|
||||
sub_key = tuple(kwargs.pop("sub_key").split("__"))
|
||||
|
||||
plex_item = get_item(rating_key)
|
||||
storage = get_subtitle_storage()
|
||||
stored_subs = storage.load(plex_item.rating_key)
|
||||
|
||||
subtitles = stored_subs.get_all(part_id, language)
|
||||
subtitle = subtitles[sub_key]
|
||||
|
||||
subtitles["current"] = sub_key
|
||||
|
||||
save_stored_sub(subtitle, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
|
||||
stored_subs=stored_subs)
|
||||
|
||||
storage.destroy()
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
kwargs["header"] = _("Success")
|
||||
kwargs["message"] = _("Subtitle saved to disk")
|
||||
|
||||
return SubtitleOptionsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/blacklist_recent/{language}')
|
||||
@route(PREFIX + '/item/blacklist_recent')
|
||||
def BlacklistRecentSubtitleMenu(**kwargs):
|
||||
@@ -284,6 +423,7 @@ def ManageBlacklistMenu(**kwargs):
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
remove_sub_key = kwargs.pop("remove_sub_key", None)
|
||||
current_data = unicode(kwargs["current_data"])
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
current_bl, subs = stored_subs.get_blacklist(part_id, language)
|
||||
@@ -299,8 +439,8 @@ def ManageBlacklistMenu(**kwargs):
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
title=kwargs["title"], randomize=timestamp()),
|
||||
title=u"< Back to %s" % kwargs["title"],
|
||||
summary=kwargs["current_data"],
|
||||
title=_(u"< Back to %s", kwargs["title"]),
|
||||
summary=current_data,
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
@@ -310,15 +450,19 @@ def ManageBlacklistMenu(**kwargs):
|
||||
|
||||
for sub_key, data in sorted(current_bl.iteritems(), key=sorter, reverse=True):
|
||||
provider_name, subtitle_id = sub_key
|
||||
title = u"%s, %s (added: %s, %s), Language: " \
|
||||
u"%s, Score: %i, Storage: %s" % (provider_name, subtitle_id, df(data["date_added"]),
|
||||
current_sub.get_mode_verbose(data["mode"]),
|
||||
display_language(Language.fromietf(language)), data["score"],
|
||||
data["storage_type"])
|
||||
title = _(u"%(provider_name)s, %(subtitle_id)s (added: %(date_added)s, %(mode)s), Language: %(language)s, "
|
||||
u"Score: %(score)i, Storage: %(storage_type)s",
|
||||
provider_name=provider_name,
|
||||
subtitle_id=subtitle_id,
|
||||
date_added=df(data["date_added"]),
|
||||
mode=current_sub.get_mode_verbose(data["mode"]),
|
||||
language=display_language(Language.fromietf(language)),
|
||||
score=data["score"],
|
||||
storage_type=data["storage_type"])
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ManageBlacklistMenu, remove_sub_key="__".join(sub_key), randomize=timestamp(), **kwargs),
|
||||
title=title,
|
||||
summary=u"Remove subtitle from blacklist"
|
||||
summary=_(u"Remove subtitle from blacklist")
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
@@ -327,7 +471,6 @@ def ManageBlacklistMenu(**kwargs):
|
||||
|
||||
|
||||
@route(PREFIX + '/item/search/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item_title=None, filename=None,
|
||||
item_type="episode", language=None, language_name=None, force=False, current_id=None,
|
||||
current_data=None,
|
||||
@@ -337,6 +480,8 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
running = scheduler.is_task_running("AvailableSubsForItem")
|
||||
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
|
||||
|
||||
current_data = unicode(current_data) if current_data else None
|
||||
|
||||
if (search_results is None or force) and not running:
|
||||
scheduler.dispatch_task("AvailableSubsForItem", rating_key=rating_key, item_type=item_type, part_id=part_id,
|
||||
language=language)
|
||||
@@ -345,7 +490,7 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
oc = SubFolderObjectContainer(title2=unicode(title), replace_parent=True)
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=rating_key, item_title=item_title, title=title, randomize=timestamp()),
|
||||
title=u"< Back to %s" % title,
|
||||
title=_(u"< Back to %s", title),
|
||||
summary=current_data,
|
||||
thumb=default_thumb
|
||||
))
|
||||
@@ -353,7 +498,7 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
plex_part = None
|
||||
if not config.low_impact_mode:
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True)
|
||||
|
||||
if not scanned_parts:
|
||||
Log.Error("Couldn't list available subtitles for %s", rating_key)
|
||||
@@ -363,20 +508,24 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
|
||||
video_display_data = [video.format] if video.format else []
|
||||
if video.release_group:
|
||||
video_display_data.append(u"by %s" % video.release_group)
|
||||
video_display_data.append(unicode(_(u"by %(release_group)s", release_group=video.release_group)))
|
||||
video_display_data = " ".join(video_display_data)
|
||||
else:
|
||||
video_display_data = metadata["filename"]
|
||||
|
||||
current_display = (u"Current: %s (%s) " % (current_provider, current_score) if current_provider else "")
|
||||
current_display = (_(u"Current: %(provider_name)s (%(score)s) ",
|
||||
provider_name=current_provider,
|
||||
score=current_score if current_provider else ""))
|
||||
if not running:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, item_title=item_title, language=language,
|
||||
filename=filename, part_id=part_id, title=title, current_id=current_id, force=True,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
current_data=current_data, item_type=item_type, randomize=timestamp()),
|
||||
title=u"Search for %s subs (%s)" % (get_language(language).name, video_display_data),
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
title=_(u"Search for %(language)s subs (%(video_data)s)",
|
||||
language=get_language(language).name,
|
||||
video_data=video_display_data),
|
||||
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
@@ -387,8 +536,8 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
randomize=timestamp()),
|
||||
title=u"No subtitles found",
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
title=_(u"No subtitles found"),
|
||||
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
else:
|
||||
@@ -398,9 +547,10 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
part_id=part_id, title=title, current_id=current_id, item_type=item_type,
|
||||
current_provider=current_provider, current_score=current_score,
|
||||
randomize=timestamp()),
|
||||
title=u"Searching for %s subs (%s), refresh here ..." % (display_language(get_language(language)),
|
||||
video_display_data),
|
||||
summary=u"%sFilename: %s" % (current_display, filename),
|
||||
title=_(u"Searching for %(language)s subs (%(video_data)s), refresh here ...",
|
||||
language=display_language(get_language(language)),
|
||||
video_data=video_display_data),
|
||||
summary=_(u"%(current_info)sFilename: %(filename)s", current_info=current_display, filename=filename),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
@@ -422,16 +572,25 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
wrong_fps_addon = ""
|
||||
if subtitle.wrong_fps:
|
||||
if plex_part:
|
||||
wrong_fps_addon = " (wrong FPS, sub: %s, media: %s)" % (subtitle.fps, plex_part.fps)
|
||||
wrong_fps_addon = _(" (wrong FPS, sub: %(subtitle_fps)s, media: %(media_fps)s)",
|
||||
subtitle_fps=subtitle.fps,
|
||||
media_fps=plex_part.fps)
|
||||
else:
|
||||
wrong_fps_addon = " (wrong FPS, sub: %s, media: unknown, low impact mode)" % subtitle.fps
|
||||
wrong_fps_addon = _(" (wrong FPS, sub: %(subtitle_fps)s, media: unknown, low impact mode)",
|
||||
subtitle_fps=subtitle.fps)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerDownloadSubtitle, rating_key=rating_key, randomize=timestamp(), item_title=item_title,
|
||||
subtitle_id=str(subtitle.id), language=language),
|
||||
title=u"%s%s: %s, score: %s%s" % (bl_addon, "Available" if current_id != subtitle.id else "Current",
|
||||
subtitle.provider_name, subtitle.score, wrong_fps_addon),
|
||||
summary=u"Release: %s, Matches: %s" % (subtitle.release_info, ", ".join(subtitle.matches)),
|
||||
title=_(u"%(blacklisted_state)s%(current_state)s: %(provider_name)s, score: %(score)s%(wrong_fps_state)s",
|
||||
blacklisted_state=bl_addon,
|
||||
current_state=_("Available") if current_id != subtitle.id else _("Current"),
|
||||
provider_name=subtitle.provider_name,
|
||||
score=subtitle.score,
|
||||
wrong_fps_state=wrong_fps_addon),
|
||||
summary=_(u"Release: %(release_info)s, Matches: %(matches)s",
|
||||
release_info=subtitle.release_info,
|
||||
matches=", ".join(subtitle.matches)),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
@@ -445,7 +604,7 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None, language=None, randomize=None):
|
||||
from interface.main import fatality
|
||||
|
||||
set_refresh_menu_state("Downloading subtitle for %s" % item_title or rating_key)
|
||||
set_refresh_menu_state(_("Downloading subtitle for %(title_or_id)s", title_or_id=item_title or rating_key))
|
||||
search_results = get_item_task_data("AvailableSubsForItem", rating_key, language)
|
||||
|
||||
download_subtitle = None
|
||||
@@ -462,3 +621,81 @@ def TriggerDownloadSubtitle(rating_key=None, subtitle_id=None, item_title=None,
|
||||
scheduler.clear_task_data("AvailableSubsForItem")
|
||||
|
||||
return fatality(randomize=timestamp(), header=" ", replace_parent=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/embedded/{rating_key}/{part_id}')
|
||||
def ListEmbeddedSubsForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
title = kwargs["title"]
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc = SubFolderObjectContainer(title2=title, replace_parent=True)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
base_title=kwargs["base_title"], title=kwargs["item_title"], randomize=timestamp()),
|
||||
title=_("< Back to %s", kwargs["title"]),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
plex_item = get_item(rating_key)
|
||||
part = get_part(plex_item, part_id)
|
||||
|
||||
if part:
|
||||
for stream_data in get_embedded_subtitle_streams(part, skip_duplicate_unknown=False):
|
||||
language = stream_data["language"]
|
||||
is_unknown = stream_data["is_unknown"]
|
||||
stream = stream_data["stream"]
|
||||
is_forced = stream_data["is_forced"]
|
||||
|
||||
if language:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, with_mods=True, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s with default mods",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/extract_embedded/{rating_key}/{part_id}/{stream_index}')
|
||||
@debounce
|
||||
def TriggerExtractEmbeddedSubForItemMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.get("part_id")
|
||||
stream_index = kwargs.get("stream_index")
|
||||
|
||||
Thread.Create(extract_embedded_sub, **kwargs)
|
||||
header = _(u"Extracting of embedded subtitle %s of part %s:%s triggered",
|
||||
stream_index, rating_key, part_id)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
kwargs.pop("item_type")
|
||||
kwargs.pop("stream_index")
|
||||
kwargs.pop("part_id")
|
||||
kwargs.pop("with_mods", False)
|
||||
kwargs.pop("language")
|
||||
kwargs["title"] = kwargs["item_title"]
|
||||
kwargs["header"] = header
|
||||
kwargs["message"] = header
|
||||
|
||||
return ItemDetailsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ from support.ignore import ignore_list
|
||||
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, get_item_title
|
||||
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_ignore_options, \
|
||||
ObjectContainer, route, handler
|
||||
from support.i18n import _
|
||||
from item_details import ItemDetailsMenu
|
||||
|
||||
|
||||
@@ -35,25 +36,34 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
if config.lock_menu and not config.pin_correct:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(PinMenu, randomize=timestamp()),
|
||||
title=pad_title("Enter PIN"),
|
||||
summary="The owner has restricted the access to this menu. Please enter the correct pin",
|
||||
title=pad_title(_("Enter PIN")),
|
||||
summary=_("The owner has restricted the access to this menu. Please enter the correct pin"),
|
||||
))
|
||||
return oc
|
||||
|
||||
if not config.permissions_ok and config.missing_permissions:
|
||||
for title, path in config.missing_permissions:
|
||||
if not isinstance(config.missing_permissions, list):
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, randomize=timestamp()),
|
||||
title=pad_title("Insufficient permissions"),
|
||||
summary="Insufficient permissions on library %s, folder: %s" % (title, path),
|
||||
title=pad_title(_("Insufficient permissions")),
|
||||
summary=config.missing_permissions,
|
||||
))
|
||||
else:
|
||||
for title, path in config.missing_permissions:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, randomize=timestamp()),
|
||||
title=pad_title(_("Insufficient permissions")),
|
||||
summary=_("Insufficient permissions on library %(title)s, folder: %(path)s",
|
||||
title=title,
|
||||
path=path),
|
||||
))
|
||||
return oc
|
||||
|
||||
if not config.enabled_sections:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, randomize=timestamp()),
|
||||
title=pad_title("I'm not enabled!"),
|
||||
summary="Please enable me for some of your libraries in your server settings; currently I do nothing",
|
||||
title=pad_title(_("I'm not enabled!")),
|
||||
summary=_("Please enable me for some of your libraries in your server settings; currently I do nothing"),
|
||||
))
|
||||
return oc
|
||||
|
||||
@@ -61,46 +71,42 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
if Dict["current_refresh_state"]:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("Working ... refresh here"),
|
||||
summary="Current state: %s; Last state: %s" % (
|
||||
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
|
||||
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
|
||||
title=pad_title(_("Working ... refresh here")),
|
||||
summary=_("Current state: %s; Last state: %s",
|
||||
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
|
||||
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
|
||||
)
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(OnDeckMenu),
|
||||
title="On-deck items",
|
||||
summary="Shows the current on deck items and allows you to individually (force-) refresh their metadata/"
|
||||
"subtitles.",
|
||||
title=_("On-deck items"),
|
||||
summary=_("Shows the current on deck items and allows you to individually (force-) refresh their metadata/subtitles."),
|
||||
thumb=R("icon-ondeck.jpg")
|
||||
))
|
||||
if "last_played_items" in Dict and Dict["last_played_items"]:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentlyPlayedMenu),
|
||||
title=pad_title("Recently played items"),
|
||||
summary="Shows the %i recently played items and allows you to individually (force-) refresh their "
|
||||
"metadata/subtitles." % config.store_recently_played_amount,
|
||||
title=pad_title(_("Recently played items")),
|
||||
summary=_("Shows the %s recently played items and allows you to individually (force-) refresh their metadata/subtitles.", config.store_recently_played_amount),
|
||||
thumb=R("icon-played.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentlyAddedMenu),
|
||||
title="Recently-added items",
|
||||
summary="Shows the recently added items per section.",
|
||||
title=_("Recently-added items"),
|
||||
summary=_("Shows the recently added items per section."),
|
||||
thumb=R("icon-added.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, randomize=timestamp()),
|
||||
title="Show recently added items with missing subtitles",
|
||||
summary="Lists items with missing subtitles. Click on \"Find recent items with missing subs\" "
|
||||
"to update list",
|
||||
title=_("Show recently added items with missing subtitles"),
|
||||
summary=_("Lists items with missing subtitles. Click on Find recent items with missing subs to update list"),
|
||||
thumb=R("icon-missing.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SectionsMenu),
|
||||
title="Browse all items",
|
||||
summary="Go through your whole library and manage your ignore list. You can also "
|
||||
"(force-) refresh the metadata/subtitles of individual items.",
|
||||
title=_("Browse all items"),
|
||||
summary=_("Go through your whole library and manage your ignore list. You can also (force-) refresh the metadata/subtitles of individual items."),
|
||||
thumb=R("icon-browse.jpg")
|
||||
))
|
||||
|
||||
@@ -108,41 +114,46 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
task = scheduler.task(task_name)
|
||||
|
||||
if task.ready_for_display:
|
||||
task_state = "Running: %s/%s (%s%%)" % (task.items_done, task.items_searching, task.percentage)
|
||||
task_state = _("Running: %(items_done)s/%(items_searching)s (%(percentage)s%%)",
|
||||
items_done=task.items_done,
|
||||
items_searching=task.items_searching,
|
||||
percentage=task.percentage)
|
||||
else:
|
||||
task_state = "Last scheduler run: %s; Next scheduled run: %s; Last runtime: %s" % (
|
||||
df(scheduler.last_run(task_name)) or "never",
|
||||
df(scheduler.next_run(task_name)) or "never",
|
||||
lr = scheduler.last_run(task_name)
|
||||
nr = scheduler.next_run(task_name)
|
||||
task_state = _("Last run: %s; Next scheduled run: %s; Last runtime: %s",
|
||||
df(scheduler.last_run(task_name)) if lr else "never",
|
||||
df(scheduler.next_run(task_name)) if nr else "never",
|
||||
str(task.last_run_time).split(".")[0])
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshMissing, randomize=timestamp()),
|
||||
title="Search for missing subtitles (in recently-added items, max-age: %s)" % Prefs[
|
||||
"scheduler.item_is_recent_age"],
|
||||
summary="Automatically run periodically by the scheduler, if configured. %s" % task_state,
|
||||
title=_("Search for missing subtitles (in recently-added items, max-age: %s)", Prefs[
|
||||
"scheduler.item_is_recent_age"]),
|
||||
summary=_("Automatically run periodically by the scheduler, if configured. %s", task_state),
|
||||
thumb=R("icon-search.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(IgnoreListMenu),
|
||||
title="Display ignore list (%d)" % len(ignore_list),
|
||||
summary="Show the current ignore list (mainly used for the automatic tasks)",
|
||||
title=_("Display ignore list (%(ignored_count)d)", ignored_count=len(ignore_list)),
|
||||
summary=_("Show the current ignore list (mainly used for the automatic tasks)"),
|
||||
thumb=R("icon-ignore.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(HistoryMenu),
|
||||
title="History",
|
||||
summary="Show the last %i downloaded subtitles" % int(Prefs["history_size"]),
|
||||
title=_("History"),
|
||||
summary=_("Show the last %i downloaded subtitles", int(Prefs["history_size"])),
|
||||
thumb=R("icon-history.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("Refresh"),
|
||||
summary="Current state: %s; Last state: %s" % (
|
||||
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
|
||||
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
|
||||
title=pad_title(_("Refresh")),
|
||||
summary=_("Current state: %s; Last state: %s",
|
||||
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
|
||||
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
|
||||
),
|
||||
thumb=R("icon-refresh.jpg")
|
||||
))
|
||||
@@ -151,15 +162,31 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
if config.pin:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ClearPin, randomize=timestamp()),
|
||||
title=pad_title("Re-lock menu(s)"),
|
||||
summary="Enabled the PIN again for menu(s)"
|
||||
title=pad_title(_("Re-lock menu(s)")),
|
||||
summary=_("Enabled the PIN again for menu(s)")
|
||||
))
|
||||
|
||||
if not only_refresh:
|
||||
if "provider_throttle" in Dict and Dict["provider_throttle"].keys():
|
||||
summary_data = []
|
||||
for provider, data in Dict["provider_throttle"].iteritems():
|
||||
reason, until, desc = data
|
||||
summary_data.append(unicode(_("%(throttled_provider)s until %(until_date)s (%(reason)s)",
|
||||
throttled_provider=provider,
|
||||
until_date=until.strftime("%y/%m/%d %H:%M"),
|
||||
reason=reason)))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title(_("Throttled providers: %s", ", ".join(Dict["provider_throttle"].keys()))),
|
||||
summary=", ".join(summary_data),
|
||||
thumb=R("icon-throttled.jpg")
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(AdvancedMenu),
|
||||
title=pad_title("Advanced functions"),
|
||||
summary="Use at your own risk",
|
||||
title=pad_title(_("Advanced functions")),
|
||||
summary=_("Use at your own risk"),
|
||||
thumb=R("icon-advanced.jpg")
|
||||
))
|
||||
|
||||
@@ -173,12 +200,12 @@ def OnDeckMenu(message=None):
|
||||
:param message:
|
||||
:return:
|
||||
"""
|
||||
return mergedItemsMenu(title="Items On Deck", base_title="Items On Deck", itemGetter=get_on_deck_items)
|
||||
return mergedItemsMenu(title=_("Items On Deck"), base_title=_("Items On Deck"), itemGetter=get_on_deck_items)
|
||||
|
||||
|
||||
@route(PREFIX + '/recently_played')
|
||||
def RecentlyPlayedMenu():
|
||||
base_title = "Recently Played"
|
||||
base_title = _("Recently Played")
|
||||
oc = SubFolderObjectContainer(title2=base_title, replace_parent=True)
|
||||
|
||||
for item in [get_item(rating_key) for rating_key in Dict["last_played_items"]]:
|
||||
@@ -206,13 +233,13 @@ def RecentlyAddedMenu(message=None):
|
||||
:param message:
|
||||
:return:
|
||||
"""
|
||||
return SectionsMenu(base_title="Recently added", section_items_key="recently_added", ignore_options=False)
|
||||
return SectionsMenu(base_title=_("Recently added"), section_items_key="recently_added", ignore_options=False)
|
||||
|
||||
|
||||
@route(PREFIX + '/recent', force=bool)
|
||||
@debounce
|
||||
def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
title = "Items with missing subtitles"
|
||||
title = _("Items with missing subtitles")
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
|
||||
running = scheduler.is_task_running("MissingSubtitles")
|
||||
@@ -226,13 +253,13 @@ def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
if not running:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, force=True, randomize=timestamp()),
|
||||
title=u"Find recent items with missing subtitles",
|
||||
title=_(u"Find recent items with missing subtitles"),
|
||||
thumb=default_thumb
|
||||
))
|
||||
else:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, force=False, randomize=timestamp()),
|
||||
title=u"Updating, refresh here ...",
|
||||
title=_(u"Updating, refresh here ..."),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
@@ -242,7 +269,7 @@ def RecentMissingSubtitlesMenu(force=False, randomize=None):
|
||||
key=Callback(ItemDetailsMenu, title=title + " > " + item_title, item_title=item_title,
|
||||
rating_key=item_id),
|
||||
title=item_title,
|
||||
summary="Missing: %s" % ", ".join(display_language(l) for l in missing_languages),
|
||||
summary=_("Missing: %s", ", ".join(display_language(l) for l in missing_languages)),
|
||||
thumb=get_item_thumb(item) or default_thumb
|
||||
))
|
||||
|
||||
@@ -300,18 +327,25 @@ def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
|
||||
"""
|
||||
is_ignored = rating_key in ignore_list[kind]
|
||||
if not sure:
|
||||
oc = SubFolderObjectContainer(no_history=True, replace_parent=True, title1="%s %s %s %s the ignore list" % (
|
||||
"Add" if not is_ignored else "Remove", ignore_list.verbose(kind), title,
|
||||
"to" if not is_ignored else "from"), title2="Are you sure?")
|
||||
t = u"Add %(kind)s %(title)s to the ignore list"
|
||||
if is_ignored:
|
||||
t = u"Remove %(kind)s %(title)s from the ignore list"
|
||||
oc = SubFolderObjectContainer(no_history=True, replace_parent=True,
|
||||
title1=_(t,
|
||||
kind=ignore_list.verbose(kind),
|
||||
title=title
|
||||
),
|
||||
title2=_("Are you sure?"))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(IgnoreMenu, kind=kind, rating_key=rating_key, title=title, sure=True,
|
||||
todo="add" if not is_ignored else "remove"),
|
||||
title=pad_title("Are you sure?"),
|
||||
title=pad_title(_("Are you sure?")),
|
||||
))
|
||||
return oc
|
||||
|
||||
rel = ignore_list[kind]
|
||||
dont_change = False
|
||||
state = None
|
||||
if todo == "remove":
|
||||
if not is_ignored:
|
||||
dont_change = True
|
||||
@@ -320,7 +354,6 @@ def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
|
||||
Log.Info("Removed %s (%s) from the ignore list", title, rating_key)
|
||||
ignore_list.remove_title(kind, rating_key)
|
||||
ignore_list.save()
|
||||
state = "removed from"
|
||||
elif todo == "add":
|
||||
if is_ignored:
|
||||
dont_change = True
|
||||
@@ -329,25 +362,29 @@ def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
|
||||
Log.Info("Added %s (%s) to the ignore list", title, rating_key)
|
||||
ignore_list.add_title(kind, rating_key, title)
|
||||
ignore_list.save()
|
||||
state = "added to"
|
||||
else:
|
||||
dont_change = True
|
||||
|
||||
if dont_change:
|
||||
return fatality(force_title=" ", header="Didn't change the ignore list", no_history=True)
|
||||
return fatality(force_title=" ", header=_("Didn't change the ignore list"), no_history=True)
|
||||
|
||||
return fatality(force_title=" ", header="%s %s the ignore list" % (title, state), no_history=True)
|
||||
t = "%(title)s added to the ignore list"
|
||||
if todo == "remove":
|
||||
t = "%(title)s removed from the ignore list"
|
||||
return fatality(force_title=" ", header=_(t,
|
||||
title=title,),
|
||||
no_history=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/sections')
|
||||
def SectionsMenu(base_title="Sections", section_items_key="all", ignore_options=True):
|
||||
def SectionsMenu(base_title=_("Sections"), section_items_key="all", ignore_options=True):
|
||||
"""
|
||||
displays the menu for all sections
|
||||
:return:
|
||||
"""
|
||||
items = get_all_items("sections")
|
||||
|
||||
return dig_tree(SubFolderObjectContainer(title2="Sections", no_cache=True, no_history=True), items, None,
|
||||
return dig_tree(SubFolderObjectContainer(title2=_("Sections"), no_cache=True, no_history=True), items, None,
|
||||
menu_determination_callback=determine_section_display, pass_kwargs={"base_title": base_title,
|
||||
"section_items_key": section_items_key,
|
||||
"ignore_options": ignore_options},
|
||||
@@ -408,7 +445,7 @@ def SectionFirstLetterMenu(rating_key, title=None, base_title=None, section_titl
|
||||
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SectionMenu, title="All", base_title=title, rating_key=rating_key, ignore_options=False),
|
||||
key=Callback(SectionMenu, title=_("All"), base_title=title, rating_key=rating_key, ignore_options=False),
|
||||
title="All"
|
||||
)
|
||||
)
|
||||
|
||||
+183
-33
@@ -2,22 +2,29 @@
|
||||
import locale
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import traceback
|
||||
|
||||
import logger
|
||||
import copy
|
||||
|
||||
from requests import HTTPError
|
||||
from item_details import ItemDetailsMenu
|
||||
from refresh_item import RefreshItem
|
||||
from menu_helpers import add_ignore_options, dig_tree, set_refresh_menu_state, \
|
||||
should_display_ignore, default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route
|
||||
default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route, \
|
||||
extract_embedded_sub
|
||||
from main import fatality, IgnoreMenu
|
||||
from advanced import DispatchRestart
|
||||
from subzero.constants import ART, PREFIX, DEPENDENCY_MODULE_NAMES
|
||||
from support.plex_media import get_all_parts, get_embedded_subtitle_streams
|
||||
from support.scheduler import scheduler
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, df, display_language
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_all_items, get_items_info, \
|
||||
get_item_kind_from_rating_key, get_item
|
||||
from support.items import get_all_items, get_items_info, get_item_kind_from_rating_key, get_item, MI_KEY, get_item_title
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.i18n import _
|
||||
|
||||
# init GUI
|
||||
ObjectContainer.art = R(ART)
|
||||
@@ -25,6 +32,7 @@ ObjectContainer.no_cache = True
|
||||
|
||||
# default thumb for DirectoryObjects
|
||||
DirectoryObject.thumb = default_thumb
|
||||
Plugin.AddViewGroup("full_details", viewMode="InfoList", mediaType="items", type="list", summary=2)
|
||||
|
||||
|
||||
@route(PREFIX + '/section/firstLetter/key', deeper=bool)
|
||||
@@ -51,7 +59,7 @@ def FirstLetterMetadataMenu(rating_key, key, title=None, base_title=None, displa
|
||||
|
||||
@route(PREFIX + '/section/contents', display_items=bool)
|
||||
def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, previous_item_type=None,
|
||||
previous_rating_key=None, randomize=None):
|
||||
previous_rating_key=None, message=None, header=None, randomize=None):
|
||||
"""
|
||||
displays the contents of a section based on whether it has a deeper tree or not (movies->movie (item) list; series->series list)
|
||||
:param rating_key:
|
||||
@@ -65,49 +73,80 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
title = unicode(title)
|
||||
item_title = title
|
||||
title = base_title + " > " + title
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True, header=header, message=message,
|
||||
view_group="full_details")
|
||||
|
||||
current_kind = get_item_kind_from_rating_key(rating_key)
|
||||
|
||||
if display_items:
|
||||
timeout = 30
|
||||
show = None
|
||||
|
||||
# add back to series for season
|
||||
if current_kind == "season":
|
||||
timeout = 360
|
||||
timeout = 720
|
||||
|
||||
show = get_item(previous_rating_key)
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(MetadataMenu, rating_key=show.rating_key, title=show.title, base_title=show.section.title,
|
||||
previous_item_type="section", display_items=True, randomize=timestamp()),
|
||||
title=u"< Back to %s" % show.title,
|
||||
title=_(u"< Back to %s", show.title),
|
||||
thumb=show.thumb or default_thumb
|
||||
))
|
||||
elif current_kind == "series":
|
||||
timeout = 1800
|
||||
# it shouldn't take more than 6 minutes to scan all of a series' files and determine the force refresh
|
||||
timeout = 3600
|
||||
|
||||
items = get_all_items(key="children", value=rating_key, base="library/metadata")
|
||||
kind, deeper = get_items_info(items)
|
||||
dig_tree(oc, items, MetadataMenu,
|
||||
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": kind,
|
||||
"previous_rating_key": rating_key})
|
||||
|
||||
# we don't know exactly where we are here, only add ignore option to series
|
||||
if should_display_ignore(items, previous=previous_item_type):
|
||||
add_ignore_options(oc, "series", title=item_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
if current_kind in ("series", "season"):
|
||||
item = get_item(rating_key)
|
||||
sub_title = get_item_title(item)
|
||||
add_ignore_options(oc, current_kind, title=sub_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
|
||||
# mass-extract embedded
|
||||
if current_kind == "season" and config.plex_transcoder:
|
||||
for lang in config.lang_list:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title,
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=_(u"Extract missing %(language)s embedded subtitles", language=display_language(lang)),
|
||||
summary=_("Extracts the not yet extracted embedded subtitles of all episodes for the current "
|
||||
"season with all configured default modifications")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title, force=True,
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=_(u"Extract and activate %(language)s embedded subtitles", language=display_language(lang)),
|
||||
summary=_("Extracts embedded subtitles of all episodes for the current season "
|
||||
"with all configured default modifications")
|
||||
))
|
||||
|
||||
# add refresh
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=title, refresh_kind=current_kind,
|
||||
previous_rating_key=previous_rating_key, timeout=timeout * 1000, randomize=timestamp()),
|
||||
title=u"Refresh: %s" % item_title,
|
||||
summary="Refreshes the %s, possibly searching for missing and picking up new subtitles on disk" % current_kind
|
||||
title=_(u"Refresh: %s", item_title),
|
||||
summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up "
|
||||
"new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind))
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RefreshItem, rating_key=rating_key, item_title=title, force=True,
|
||||
refresh_kind=current_kind, previous_rating_key=previous_rating_key, timeout=timeout * 1000,
|
||||
randomize=timestamp()),
|
||||
title=u"Auto-Find subtitles: %s" % item_title,
|
||||
summary="Issues a forced refresh, ignoring known subtitles and searching for new ones"
|
||||
title=_(u"Auto-Find subtitles: %s", item_title),
|
||||
summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones")
|
||||
))
|
||||
else:
|
||||
return ItemDetailsMenu(rating_key=rating_key, title=title, item_title=item_title)
|
||||
@@ -115,6 +154,70 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/season/extract_embedded/{rating_key}/{language}')
|
||||
def SeasonExtractEmbedded(**kwargs):
|
||||
rating_key = kwargs.get("rating_key")
|
||||
requested_language = kwargs.pop("language")
|
||||
with_mods = kwargs.pop("with_mods")
|
||||
item_title = kwargs.pop("item_title")
|
||||
title = kwargs.pop("title")
|
||||
force = kwargs.pop("force", False)
|
||||
|
||||
Thread.Create(season_extract_embedded, **{"rating_key": rating_key, "requested_language": requested_language,
|
||||
"with_mods": with_mods, "force": force})
|
||||
|
||||
kwargs["header"] = _("Success")
|
||||
kwargs["message"] = _(u"Extracting of embedded subtitles for %s triggered", title)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
return MetadataMenu(randomize=timestamp(), title=item_title, **kwargs)
|
||||
|
||||
|
||||
def multi_extract_embedded(stream_list, refresh=False, with_mods=False, single_thread=True):
|
||||
def execute():
|
||||
for video_part_map, plexapi_part, stream_index, language, set_current in stream_list:
|
||||
plexapi_item = video_part_map.keys()[0].plexapi_metadata["item"]
|
||||
|
||||
extract_embedded_sub(rating_key=plexapi_item.rating_key, part_id=plexapi_part.id,
|
||||
plex_item=plexapi_item, part=plexapi_part, scanned_videos=video_part_map,
|
||||
stream_index=stream_index, set_current=set_current,
|
||||
language=language, with_mods=with_mods, refresh=refresh)
|
||||
|
||||
if single_thread:
|
||||
with Thread.Lock(key="extract_embedded"):
|
||||
execute()
|
||||
else:
|
||||
execute()
|
||||
|
||||
|
||||
def season_extract_embedded(rating_key, requested_language, with_mods=False, force=False):
|
||||
# get stored subtitle info for item id
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
try:
|
||||
for data in get_all_items(key="children", value=rating_key, base="library/metadata"):
|
||||
item = get_item(data[MI_KEY])
|
||||
if item:
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
for part in get_all_parts(item):
|
||||
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(part.id, requested_language)
|
||||
if not embedded_subs or force:
|
||||
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language,
|
||||
get_forced=config.forced_only)
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
set_current = not current or force
|
||||
refresh = not current
|
||||
|
||||
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
|
||||
stream_index=str(stream.index), set_current=set_current,
|
||||
refresh=refresh, language=requested_language, with_mods=with_mods)
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
@route(PREFIX + '/ignore_list')
|
||||
def IgnoreListMenu():
|
||||
oc = SubFolderObjectContainer(title2="Ignore list", replace_parent=True)
|
||||
@@ -130,9 +233,9 @@ def IgnoreListMenu():
|
||||
def HistoryMenu():
|
||||
from support.history import get_history
|
||||
history = get_history()
|
||||
oc = SubFolderObjectContainer(title2="History", replace_parent=True)
|
||||
oc = SubFolderObjectContainer(title2=_("History"), replace_parent=True)
|
||||
|
||||
for item in history.history_items:
|
||||
for item in history.items:
|
||||
possible_language = item.language
|
||||
language_display = item.lang_name if not possible_language else display_language(possible_language)
|
||||
|
||||
@@ -140,10 +243,12 @@ def HistoryMenu():
|
||||
key=Callback(ItemDetailsMenu, title=item.title, item_title=item.item_title,
|
||||
rating_key=item.rating_key),
|
||||
title=u"%s (%s)" % (item.item_title, item.mode_verbose),
|
||||
summary=u"%s in %s (%s, score: %s), %s" % (language_display, item.section_title,
|
||||
summary=_(u"%s in %s (%s, score: %s), %s", language_display, item.section_title,
|
||||
item.provider_name, item.score, df(item.time))
|
||||
))
|
||||
|
||||
history.destroy()
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@@ -155,6 +260,15 @@ def RefreshMissing(randomize=None):
|
||||
return fatality(header=header, replace_parent=True)
|
||||
|
||||
|
||||
def replace_item(obj, key, replace_value):
|
||||
for k, v in obj.items():
|
||||
if isinstance(v, dict):
|
||||
obj[k] = replace_item(v, key, replace_value)
|
||||
if key in obj:
|
||||
obj[key] = replace_value
|
||||
return obj
|
||||
|
||||
|
||||
@route(PREFIX + '/ValidatePrefs', enforce_route=True)
|
||||
def ValidatePrefs():
|
||||
Core.log.setLevel(logging.DEBUG)
|
||||
@@ -178,20 +292,20 @@ def ValidatePrefs():
|
||||
update_dict = True
|
||||
|
||||
elif Dict["channel_enabled"] != config.enable_channel:
|
||||
Log.Debug("Channel features %s, restarting plugin", "enabled" if config.enable_channel else "disabled")
|
||||
Log.Debug("Interface features %s, restarting plugin", "enabled" if config.enable_channel else "disabled")
|
||||
update_dict = True
|
||||
restart = True
|
||||
|
||||
if "plugin_pin_mode" not in Dict:
|
||||
if "plugin_pin_mode2" not in Dict:
|
||||
update_dict = True
|
||||
|
||||
elif Dict["plugin_pin_mode"] != Prefs["plugin_pin_mode"]:
|
||||
elif Dict["plugin_pin_mode2"] != Prefs["plugin_pin_mode2"]:
|
||||
update_dict = True
|
||||
restart = True
|
||||
|
||||
if update_dict:
|
||||
Dict["channel_enabled"] = config.enable_channel
|
||||
Dict["plugin_pin_mode"] = Prefs["plugin_pin_mode"]
|
||||
Dict["plugin_pin_mode2"] = Prefs["plugin_pin_mode2"]
|
||||
Dict.Save()
|
||||
|
||||
if restart:
|
||||
@@ -208,21 +322,31 @@ def ValidatePrefs():
|
||||
# SZ config debug
|
||||
Log.Debug("--- SZ Config-Debug ---")
|
||||
for attr in [
|
||||
"app_support_path", "data_path", "data_items_path", "enable_agent",
|
||||
"version", "app_support_path", "data_path", "data_items_path", "enable_agent",
|
||||
"enable_channel", "permissions_ok", "missing_permissions", "fs_encoding",
|
||||
"subtitle_destination_folder", "dbm_supported", "lang_list", "providers"]:
|
||||
Log.Debug("config.%s: %s", attr, getattr(config, attr))
|
||||
"subtitle_destination_folder", "new_style_cache", "dbm_supported", "lang_list", "providers",
|
||||
"plex_transcoder", "refiner_settings", "unrar", "adv_cfg_path"]:
|
||||
|
||||
value = getattr(config, attr)
|
||||
if isinstance(value, dict):
|
||||
d = replace_item(copy.deepcopy(value), "api_key", "xxxxxxxxxxxxxxxxxxxxxxxxx")
|
||||
Log.Debug("config.%s: %s", attr, d)
|
||||
continue
|
||||
|
||||
Log.Debug("config.%s: %s", attr, value)
|
||||
|
||||
for attr in ["plugin_log_path", "server_log_path"]:
|
||||
value = getattr(config, attr)
|
||||
access = os.access(value, os.R_OK)
|
||||
if Core.runtime.os == "Windows":
|
||||
try:
|
||||
f = open(value, "r")
|
||||
f.read(1)
|
||||
f.close()
|
||||
except:
|
||||
access = False
|
||||
|
||||
if value:
|
||||
access = os.access(value, os.R_OK)
|
||||
if Core.runtime.os == "Windows":
|
||||
try:
|
||||
f = open(value, "r")
|
||||
f.read(1)
|
||||
f.close()
|
||||
except:
|
||||
access = False
|
||||
|
||||
Log.Debug("config.%s: %s (accessible: %s)", attr, value, access)
|
||||
|
||||
@@ -230,10 +354,36 @@ def ValidatePrefs():
|
||||
"subtitles.save.filesystem", ]:
|
||||
Log.Debug("Pref.%s: %s", attr, Prefs[attr])
|
||||
|
||||
# debug drone
|
||||
if "sonarr" in config.refiner_settings or "radarr" in config.refiner_settings:
|
||||
Log.Debug("----- Connections -----")
|
||||
try:
|
||||
from subliminal_patch.refiners.drone import SonarrClient, RadarrClient
|
||||
for key, cls in [("sonarr", SonarrClient), ("radarr", RadarrClient)]:
|
||||
if key in config.refiner_settings:
|
||||
cname = key.capitalize()
|
||||
try:
|
||||
status = cls(**config.refiner_settings[key]).status()
|
||||
except HTTPError, e:
|
||||
if e.response.status_code == 401:
|
||||
Log.Debug("%s: NOT WORKING - BAD API KEY", cname)
|
||||
else:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
|
||||
except:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
|
||||
else:
|
||||
if status and status["version"]:
|
||||
Log.Debug("%s: OK - %s", cname, status["version"])
|
||||
else:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname)
|
||||
except:
|
||||
Log.Debug("Something went really wrong when evaluating Sonarr/Radarr: %s", traceback.format_exc())
|
||||
|
||||
# fixme: check existance of and os access of logs
|
||||
Log.Debug("----- Environment -----")
|
||||
Log.Debug("Platform: %s", Core.runtime.platform)
|
||||
Log.Debug("OS: %s", Core.runtime.os)
|
||||
Log.Debug("----- Environment -----")
|
||||
Log.Debug("Python: %s", platform.python_version())
|
||||
for key, value in os.environ.iteritems():
|
||||
if key.startswith("PLEX") or key.startswith("SZ_"):
|
||||
if "TOKEN" in key:
|
||||
|
||||
@@ -1,33 +1,36 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
import types
|
||||
import datetime
|
||||
import subprocess
|
||||
import os
|
||||
import operator
|
||||
|
||||
from func import enable_channel_wrapper
|
||||
from support.items import get_kind, get_item_thumb
|
||||
from support.helpers import get_video_display_title
|
||||
from func import enable_channel_wrapper, route_wrapper, register_route_function
|
||||
from subzero.language import Language
|
||||
from support.i18n import is_localized_string, _
|
||||
from support.items import get_kind, get_item_thumb, get_item, get_item_kind_from_item, refresh_item
|
||||
from support.helpers import get_video_display_title, pad_title, display_language, quote_args, is_stream_forced
|
||||
from support.ignore import ignore_list
|
||||
from support.lib import get_intent
|
||||
from support.config import config
|
||||
from subzero.constants import ICON_SUB, ICON
|
||||
from support.plex_media import get_part, get_plex_metadata
|
||||
from support.scheduler import scheduler
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles
|
||||
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
|
||||
default_thumb = R(ICON_SUB)
|
||||
main_icon = ICON if not config.is_development else "icon-dev.jpg"
|
||||
|
||||
# noinspection PyUnboundLocalVariable
|
||||
route = enable_channel_wrapper(route)
|
||||
route = route_wrapper
|
||||
# noinspection PyUnboundLocalVariable
|
||||
handler = enable_channel_wrapper(handler)
|
||||
|
||||
|
||||
def should_display_ignore(items, previous=None):
|
||||
kind = get_kind(items)
|
||||
return items and (
|
||||
(kind in ("show", "season")) or
|
||||
(kind == "episode" and previous != "season")
|
||||
)
|
||||
|
||||
|
||||
def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None, add_kind=True):
|
||||
"""
|
||||
|
||||
@@ -47,10 +50,15 @@ def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None
|
||||
|
||||
in_list = rating_key in ignore_list[use_kind]
|
||||
|
||||
t = u"Ignore %(kind)s \"%(title)s\""
|
||||
if in_list:
|
||||
t = u"Un-ignore %(kind)s \"%(title)s\""
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(callback_menu, kind=use_kind, rating_key=rating_key, title=title),
|
||||
title=u"%s %s \"%s\"" % (
|
||||
"Un-Ignore" if in_list else "Ignore", ignore_list.verbose(kind) if add_kind else "", unicode(title))
|
||||
key=Callback(callback_menu, kind=use_kind, sure=False, todo="not_set", rating_key=rating_key, title=title),
|
||||
title=_(t,
|
||||
kind=ignore_list.verbose(kind) if add_kind else "",
|
||||
title=unicode(title))
|
||||
)
|
||||
)
|
||||
|
||||
@@ -72,7 +80,7 @@ def dig_tree(oc, items, menu_callback, menu_determination_callback=None, force_r
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(menu_callback or menu_determination_callback(kind, item, pass_kwargs=pass_kwargs), title=title,
|
||||
rating_key=force_rating_key or key, **add_kwargs),
|
||||
title=title, thumb=thumb, summary=summary
|
||||
title=pad_title(title) if kind in ("show", "season") else title, thumb=thumb, summary=summary
|
||||
))
|
||||
return oc
|
||||
|
||||
@@ -90,8 +98,8 @@ def set_refresh_menu_state(state_or_media, media_type="movies"):
|
||||
Dict["current_refresh_state"] = None
|
||||
return
|
||||
|
||||
if isinstance(state_or_media, types.StringTypes):
|
||||
Dict["current_refresh_state"] = state_or_media
|
||||
if isinstance(state_or_media, types.StringTypes) or is_localized_string(state_or_media):
|
||||
Dict["current_refresh_state"] = unicode(state_or_media)
|
||||
return
|
||||
|
||||
media = state_or_media
|
||||
@@ -102,14 +110,19 @@ def set_refresh_menu_state(state_or_media, media_type="movies"):
|
||||
for episode in media.seasons[season].episodes:
|
||||
ep = media.seasons[season].episodes[episode]
|
||||
media_id = ep.id
|
||||
title = get_video_display_title("show", ep.title, parent_title=media.title, season=int(season), episode=int(episode))
|
||||
title = get_video_display_title(_("show"), ep.title, parent_title=media.title, season=int(season), episode=int(episode))
|
||||
else:
|
||||
title = get_video_display_title("movie", media.title)
|
||||
title = get_video_display_title(_("movie"), media.title)
|
||||
|
||||
intent = get_intent()
|
||||
force_refresh = intent.get("force", media_id)
|
||||
|
||||
Dict["current_refresh_state"] = u"%sRefreshing %s" % ("Force-" if force_refresh else "", unicode(title))
|
||||
t = u"Refreshing %(title)s"
|
||||
if force_refresh:
|
||||
t = u"Force-refreshing %(title)s"
|
||||
|
||||
Dict["current_refresh_state"] = unicode(_(t,
|
||||
title=unicode(title)))
|
||||
|
||||
|
||||
def get_item_task_data(task_name, rating_key, language):
|
||||
@@ -124,30 +137,74 @@ def debounce(func):
|
||||
:param func:
|
||||
:return:
|
||||
"""
|
||||
def get_lookup_key(args, kwargs):
|
||||
func_name = list(args).pop(0).__name__
|
||||
return tuple([func_name] + [(key, value) for key, value in kwargs.iteritems()])
|
||||
|
||||
def wrap(*args, **kwargs):
|
||||
if "randomize" in kwargs:
|
||||
if "menu_history" not in Dict:
|
||||
Dict["menu_history"] = {}
|
||||
func.debounce = True
|
||||
|
||||
key = get_lookup_key([func] + list(args), kwargs)
|
||||
if key in Dict["menu_history"]:
|
||||
Log.Debug("not triggering %s twice with %s, %s" % (func, args, kwargs))
|
||||
return ObjectContainer()
|
||||
else:
|
||||
Dict["menu_history"][key] = datetime.datetime.now() + datetime.timedelta(hours=6)
|
||||
return func
|
||||
|
||||
|
||||
def extract_embedded_sub(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.pop("part_id")
|
||||
stream_index = kwargs.pop("stream_index")
|
||||
with_mods = kwargs.pop("with_mods", False)
|
||||
language = Language.fromietf(kwargs.pop("language"))
|
||||
refresh = kwargs.pop("refresh", True)
|
||||
set_current = kwargs.pop("set_current", True)
|
||||
|
||||
plex_item = kwargs.pop("plex_item", get_item(rating_key))
|
||||
item_type = get_item_kind_from_item(plex_item)
|
||||
part = kwargs.pop("part", get_part(plex_item, part_id))
|
||||
scanned_videos = kwargs.pop("scanned_videos", None)
|
||||
|
||||
any_successful = False
|
||||
|
||||
if part:
|
||||
if not scanned_videos:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
scanned_videos = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if str(stream.index) == stream_index:
|
||||
is_forced = is_stream_forced(stream)
|
||||
bn = os.path.basename(part.file)
|
||||
|
||||
set_refresh_menu_state(_(u"Extracting subtitle %(stream_index)s of %(filename)s",
|
||||
stream_index=stream_index,
|
||||
filename=bn))
|
||||
Log.Info(u"Extracting stream %s (%s) of %s", stream_index, display_language(language), bn)
|
||||
|
||||
out_codec = stream.codec if stream.codec != "mov_text" else "srt"
|
||||
|
||||
args = [
|
||||
config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", out_codec, "-"
|
||||
]
|
||||
output = None
|
||||
try:
|
||||
Dict.Save()
|
||||
except TypeError:
|
||||
Log.Error("Can't save menu history for: %r", key)
|
||||
del Dict["menu_history"][key]
|
||||
output = subprocess.check_output(quote_args(args), stderr=subprocess.PIPE, shell=True)
|
||||
except:
|
||||
Log.Error("Extraction failed: %s", traceback.format_exc())
|
||||
|
||||
return func(*args, **kwargs)
|
||||
if output:
|
||||
subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None)
|
||||
subtitle.content = output
|
||||
subtitle.provider_name = "embedded"
|
||||
subtitle.id = "stream_%s" % stream_index
|
||||
subtitle.score = 0
|
||||
subtitle.set_encoding("utf-8")
|
||||
|
||||
return wrap
|
||||
# fixme: speedup video; only video.name is needed
|
||||
save_successful = save_subtitles(scanned_videos, {scanned_videos.keys()[0]: [subtitle]}, mode="m",
|
||||
set_current=set_current, is_forced=is_forced)
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
if save_successful and refresh:
|
||||
refresh_item(rating_key)
|
||||
|
||||
any_successful = True
|
||||
|
||||
return any_successful
|
||||
|
||||
|
||||
class SZObjectContainer(ObjectContainer):
|
||||
@@ -181,10 +238,10 @@ class SubFolderObjectContainer(ObjectContainer):
|
||||
from support.helpers import pad_title, timestamp
|
||||
self.add(DirectoryObject(
|
||||
key=Callback(fatality, force_title=" ", randomize=timestamp()),
|
||||
title=pad_title("<< Back to home"),
|
||||
summary="Current state: %s; Last state: %s" % (
|
||||
(Dict["current_refresh_state"] or "Idle") if "current_refresh_state" in Dict else "Idle",
|
||||
(Dict["last_refresh_state"] or "None") if "last_refresh_state" in Dict else "None"
|
||||
title=pad_title(_("<< Back to home")),
|
||||
summary=_("Current state: %s; Last state: %s",
|
||||
(Dict["current_refresh_state"] or _("Idle")) if "current_refresh_state" in Dict else _("Idle"),
|
||||
(Dict["last_refresh_state"] or _("None")) if "last_refresh_state" in Dict else _("None")
|
||||
)
|
||||
))
|
||||
|
||||
@@ -202,4 +259,4 @@ class ZipObject(ObjectClass):
|
||||
self.SetHeader("Content-Disposition",
|
||||
'attachment; filename="' + datetime.datetime.now().strftime("Logs_%y%m%d_%H-%M-%S.zip")
|
||||
+ '"')
|
||||
return self.zipdata
|
||||
return self.zipdata
|
||||
|
||||
@@ -4,6 +4,7 @@ from subzero.constants import PREFIX
|
||||
from menu_helpers import debounce, set_refresh_menu_state, route
|
||||
from support.items import refresh_item
|
||||
from support.helpers import timestamp
|
||||
from support.i18n import _
|
||||
|
||||
|
||||
@route(PREFIX + '/item/refresh/{rating_key}/force', force=True)
|
||||
@@ -15,9 +16,17 @@ def RefreshItem(rating_key=None, came_from="/recent", item_title=None, force=Fal
|
||||
from interface.main import fatality
|
||||
header = " "
|
||||
if trigger:
|
||||
set_refresh_menu_state(u"Triggering %sRefresh for %s" % ("Force-" if force else "", item_title))
|
||||
t = u"Triggering refresh for %(title)s"
|
||||
if force:
|
||||
u"Triggering forced refresh for %(title)s"
|
||||
set_refresh_menu_state(_(t,
|
||||
title=item_title))
|
||||
Thread.Create(refresh_item, rating_key=rating_key, force=force, refresh_kind=refresh_kind,
|
||||
parent_rating_key=previous_rating_key, timeout=int(timeout))
|
||||
|
||||
header = u"%s of item %s triggered" % ("Refresh" if not force else "Forced-refresh", rating_key)
|
||||
t = u"Refresh of item %(item_id)s triggered"
|
||||
if force:
|
||||
t = u"Forced refresh of item %(item_id)s triggered"
|
||||
header = _(t,
|
||||
item_id=rating_key)
|
||||
return fatality(randomize=timestamp(), header=header, replace_parent=True)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
import traceback
|
||||
import types
|
||||
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, route
|
||||
from subzero.modification import registry as mod_registry, SubtitleModifications
|
||||
@@ -12,14 +12,15 @@ from support.plex_media import get_plex_metadata
|
||||
from support.scanning import scan_videos
|
||||
from support.helpers import timestamp, pad_title
|
||||
from support.items import get_current_sub, set_mods_for_part
|
||||
from support.i18n import _
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleModificationsMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
lang_instance = Language.fromietf(language)
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
@@ -30,8 +31,8 @@ def SubtitleModificationsMenu(**kwargs):
|
||||
from interface.item_details import SubtitleOptionsMenu
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleOptionsMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"< Back to subtitle options for: %s" % kwargs["title"],
|
||||
summary=kwargs["current_data"],
|
||||
title=_(u"< Back to subtitle options for: %s", kwargs["title"]),
|
||||
summary=unicode(kwargs["current_data"]),
|
||||
thumb=default_thumb
|
||||
))
|
||||
|
||||
@@ -42,45 +43,53 @@ def SubtitleModificationsMenu(**kwargs):
|
||||
if mod.exclusive and identifier in current_mods:
|
||||
continue
|
||||
|
||||
if mod.languages and lang_instance not in mod.languages:
|
||||
continue
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=identifier, mode="add", randomize=timestamp(), **kwargs),
|
||||
title=pad_title(mod.description), summary=mod.long_description or ""
|
||||
title=pad_title(_(mod.description)), summary=_(mod.long_description) or ""
|
||||
))
|
||||
|
||||
fps_mod = SubtitleModifications.get_mod_class("change_FPS")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleFPSModMenu, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(fps_mod.description), summary=fps_mod.long_description or ""
|
||||
title=pad_title(_(fps_mod.description)), summary=_(fps_mod.long_description) or ""
|
||||
))
|
||||
|
||||
shift_mod = SubtitleModifications.get_mod_class("shift_offset")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(shift_mod.description), summary=shift_mod.long_description or ""
|
||||
title=pad_title(_(shift_mod.description)), summary=_(shift_mod.long_description) or ""
|
||||
))
|
||||
|
||||
color_mod = SubtitleModifications.get_mod_class("color")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleColorModMenu, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(color_mod.description), summary=color_mod.long_description or ""
|
||||
title=pad_title(_(color_mod.description)), summary=_(color_mod.long_description) or ""
|
||||
))
|
||||
|
||||
if current_mods:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=None, mode="remove_last", randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Remove last applied mod (%s)" % current_mods[-1]),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
|
||||
title=pad_title(_("Remove last applied mod (%s)", current_mods[-1])),
|
||||
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleListMods, randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Manage applied mods"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods))
|
||||
title=pad_title(_("Manage applied mods")),
|
||||
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods))
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleReapplyMods, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(_("Reapply applied mods")),
|
||||
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=None, mode="clear", randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Restore original version"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_mods) if current_mods else "none")
|
||||
title=pad_title(_("Restore original version")),
|
||||
summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
@@ -100,28 +109,31 @@ def SubtitleFPSModMenu(**kwargs):
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modification menu"
|
||||
title=_("< Back to subtitle modification menu")
|
||||
))
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
target_fps = plex_part.fps
|
||||
|
||||
for fps in ["23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
|
||||
for fps in ["23.980", "23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
|
||||
if float(fps) == float(target_fps):
|
||||
continue
|
||||
|
||||
if float(fps) > float(target_fps):
|
||||
indicator = "subs constantly getting faster"
|
||||
indicator = _("subs constantly getting faster")
|
||||
else:
|
||||
indicator = "subs constantly getting slower"
|
||||
indicator = _("subs constantly getting slower")
|
||||
|
||||
mod_ident = SubtitleModifications.get_mod_signature("change_FPS", **{"from": fps, "to": target_fps})
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
|
||||
title="%s fps -> %s fps (%s)" % (fps, target_fps, indicator)
|
||||
title=_("%(from_fps)s fps -> %(to_fps)s fps (%(slower_or_faster_indicator)s)",
|
||||
from_fps=fps,
|
||||
to_fps=target_fps,
|
||||
slower_or_faster_indicator=indicator)
|
||||
))
|
||||
|
||||
return oc
|
||||
@@ -139,13 +151,13 @@ def SubtitleShiftModUnitMenu(**kwargs):
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modifications"
|
||||
title=_("< Back to subtitle modifications")
|
||||
))
|
||||
|
||||
for unit, title in POSSIBLE_UNITS:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleShiftModMenu, unit=unit, randomize=timestamp(), **kwargs),
|
||||
title="Adjust by %s" % title
|
||||
title=_("Adjust by %(time_and_unit)s", time_and_unit=title)
|
||||
))
|
||||
|
||||
return oc
|
||||
@@ -162,16 +174,16 @@ def SubtitleShiftModMenu(unit=None, **kwargs):
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to unit selection"
|
||||
title=_("< Back to unit selection")
|
||||
))
|
||||
|
||||
rng = []
|
||||
if unit == "h":
|
||||
rng = range(-10, 11)
|
||||
rng = list(reversed(range(-10, 0))) + list(reversed(range(1, 11)))
|
||||
elif unit in ("m", "s"):
|
||||
rng = range(-15, 15)
|
||||
rng = list(reversed(range(-15, 0))) + list(reversed(range(1, 16)))
|
||||
elif unit == "ms":
|
||||
rng = range(-900, 1000, 100)
|
||||
rng = list(reversed(range(-900, 0, 100))) + list(reversed(range(100, 1000, 100)))
|
||||
|
||||
for i in rng:
|
||||
if i == 0:
|
||||
@@ -196,7 +208,7 @@ def SubtitleColorModMenu(**kwargs):
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modification menu"
|
||||
title=_("< Back to subtitle modification menu")
|
||||
))
|
||||
|
||||
for color, code in color_mod.colors.iteritems():
|
||||
@@ -228,6 +240,22 @@ def SubtitleSetMods(mods=None, mode=None, **kwargs):
|
||||
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_reapply_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleReapplyMods(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
lang_a2 = kwargs["language"]
|
||||
item_type = kwargs["item_type"]
|
||||
|
||||
language = Language.fromietf(lang_a2)
|
||||
|
||||
set_mods_for_part(rating_key, part_id, language, item_type, [], mode="add")
|
||||
|
||||
kwargs.pop("randomize")
|
||||
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_list_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleListMods(**kwargs):
|
||||
@@ -242,13 +270,13 @@ def SubtitleListMods(**kwargs):
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title="< Back to subtitle modifications"
|
||||
title=_("< Back to subtitle modifications")
|
||||
))
|
||||
|
||||
for identifier in current_sub.mods:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=identifier, mode="remove", randomize=timestamp(), **kwargs),
|
||||
title="Remove: %s" % identifier
|
||||
title=_("Remove: %(mod_name)s", mod_name=identifier)
|
||||
))
|
||||
|
||||
storage.destroy()
|
||||
|
||||
@@ -13,6 +13,12 @@ import lib
|
||||
|
||||
sys.modules["support.lib"] = lib
|
||||
|
||||
import i18n
|
||||
|
||||
sys.modules["support.i18n"] = i18n
|
||||
|
||||
helpers._ = i18n._
|
||||
|
||||
import plex_media
|
||||
sys.modules["support.plex_media"] = plex_media
|
||||
|
||||
|
||||
+372
-41
@@ -1,31 +1,41 @@
|
||||
# coding=utf-8
|
||||
|
||||
import copy
|
||||
import os
|
||||
import re
|
||||
import inspect
|
||||
import sys
|
||||
import rarfile
|
||||
|
||||
import jstyleson
|
||||
import datetime
|
||||
|
||||
import subliminal
|
||||
import subliminal_patch
|
||||
import subzero.constants
|
||||
import lib
|
||||
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError
|
||||
|
||||
from subliminal_patch.core import is_windows_special_path
|
||||
from whichdb import whichdb
|
||||
from babelfish import Language
|
||||
|
||||
from subliminal_patch.exceptions import TooManyRequests
|
||||
from subzero.language import Language
|
||||
from subliminal.cli import MutexLock
|
||||
from subzero.lib.io import FileIO, get_viable_encoding
|
||||
from subzero.lib.dict import Dicked
|
||||
from subzero.util import get_root_path
|
||||
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW, MEDIA_TYPE_TO_STRING
|
||||
from dogpile.cache.region import register_backend as register_cache_backend
|
||||
from lib import Plex
|
||||
from helpers import check_write_permissions, cast_bool, cast_int
|
||||
from helpers import check_write_permissions, cast_bool, cast_int, mswindows
|
||||
|
||||
SUBTITLE_EXTS = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'txt', 'psb',
|
||||
'vtt']
|
||||
TEXT_SUBTITLE_EXTS = ("srt", "ass", "ssa", "vtt")
|
||||
register_cache_backend(
|
||||
"subzero.cache.file", "subzero.cache_backends.file", "SZFileBackend")
|
||||
|
||||
SUBTITLE_EXTS_BASE = ['utf', 'utf8', 'utf-8', 'srt', 'smi', 'rt', 'ssa', 'aqt', 'jss', 'ass', 'idx', 'sub', 'psb',
|
||||
'vtt']
|
||||
SUBTITLE_EXTS = SUBTITLE_EXTS_BASE + ["txt"]
|
||||
|
||||
TEXT_SUBTITLE_EXTS = ("srt", "ass", "ssa", "vtt", "mov_text")
|
||||
VIDEO_EXTS = ['3g2', '3gp', 'asf', 'asx', 'avc', 'avi', 'avs', 'bivx', 'bup', 'divx', 'dv', 'dvr-ms', 'evo', 'fli',
|
||||
'flv',
|
||||
'm2t', 'm2ts', 'm2v', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'mts', 'nsv', 'nuv', 'ogm', 'ogv', 'tp',
|
||||
@@ -46,6 +56,25 @@ def int_or_default(s, default):
|
||||
return default
|
||||
|
||||
|
||||
VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable)
|
||||
|
||||
PROVIDER_THROTTLE_MAP = {
|
||||
"default": {
|
||||
TooManyRequests: (datetime.timedelta(hours=1), "1 hour"),
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
|
||||
ServiceUnavailable: (datetime.timedelta(minutes=20), "20 minutes"),
|
||||
},
|
||||
"opensubtitles": {
|
||||
TooManyRequests: (datetime.timedelta(hours=3), "3 hours"),
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
|
||||
},
|
||||
"addic7ed": {
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
|
||||
TooManyRequests: (datetime.timedelta(minutes=5), "5 minutes"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class Config(object):
|
||||
libraries_root = None
|
||||
plugin_info = ""
|
||||
@@ -62,6 +91,10 @@ class Config(object):
|
||||
dbm_supported = False
|
||||
pms_request_timeout = 15
|
||||
low_impact_mode = False
|
||||
new_style_cache = False
|
||||
pack_cache_dir = None
|
||||
advanced = None
|
||||
debug_i18n = False
|
||||
|
||||
enable_channel = True
|
||||
enable_agent = True
|
||||
@@ -70,11 +103,8 @@ class Config(object):
|
||||
lock_advanced_menu = False
|
||||
locked = False
|
||||
pin_valid_minutes = 10
|
||||
lang_list = None
|
||||
subtitle_destination_folder = None
|
||||
subtitle_formats = None
|
||||
providers = None
|
||||
provider_settings = None
|
||||
max_recent_items_per_library = 200
|
||||
permissions_ok = False
|
||||
missing_permissions = None
|
||||
@@ -88,6 +118,7 @@ class Config(object):
|
||||
remove_tags = False
|
||||
fix_ocr = False
|
||||
fix_common = False
|
||||
reverse_rtl = False
|
||||
colors = ""
|
||||
chmod = None
|
||||
forced_only = False
|
||||
@@ -100,6 +131,14 @@ class Config(object):
|
||||
react_to_activities = False
|
||||
activity_mode = None
|
||||
no_refresh = False
|
||||
plex_transcoder = None
|
||||
refiner_settings = None
|
||||
exact_filenames = False
|
||||
only_one = False
|
||||
embedded_auto_extract = False
|
||||
ietf_as_alpha3 = False
|
||||
unrar = None
|
||||
adv_cfg_path = None
|
||||
|
||||
store_recently_played_amount = 40
|
||||
|
||||
@@ -127,20 +166,22 @@ class Config(object):
|
||||
subzero.constants.DEFAULT_TIMEOUT = lib.DEFAULT_TIMEOUT = self.pms_request_timeout = \
|
||||
min(cast_int(Prefs['pms_request_timeout'], 15), 45)
|
||||
self.low_impact_mode = cast_bool(Prefs['low_impact_mode'])
|
||||
self.new_style_cache = cast_bool(Prefs['new_style_cache'])
|
||||
self.pack_cache_dir = self.get_pack_cache_dir()
|
||||
self.advanced = self.get_advanced_config()
|
||||
self.debug_i18n = self.advanced.debug_i18n
|
||||
|
||||
os.environ["SZ_USER_AGENT"] = self.get_user_agent()
|
||||
|
||||
self.providers = self.get_providers()
|
||||
|
||||
self.setup_proxies()
|
||||
self.set_plugin_mode()
|
||||
self.set_plugin_lock()
|
||||
self.set_activity_modes()
|
||||
self.parse_rename_mode()
|
||||
|
||||
self.lang_list = self.get_lang_list()
|
||||
self.subtitle_destination_folder = self.get_subtitle_destination_folder()
|
||||
self.subtitle_formats = self.get_subtitle_formats()
|
||||
self.forced_only = cast_bool(Prefs["subtitles.only_foreign"])
|
||||
self.provider_settings = self.get_provider_settings()
|
||||
self.max_recent_items_per_library = int_or_default(Prefs["scheduler.max_recent_items_per_library"], 2000)
|
||||
self.sections = list(Plex["library"].sections())
|
||||
self.missing_permissions = []
|
||||
@@ -153,6 +194,7 @@ class Config(object):
|
||||
self.remove_tags = cast_bool(Prefs['subtitles.remove_tags'])
|
||||
self.fix_ocr = cast_bool(Prefs['subtitles.fix_ocr'])
|
||||
self.fix_common = cast_bool(Prefs['subtitles.fix_common'])
|
||||
self.reverse_rtl = cast_bool(Prefs['subtitles.reverse_rtl'])
|
||||
self.colors = Prefs['subtitles.colors'] if Prefs['subtitles.colors'] != "don't change" else None
|
||||
self.chmod = self.check_chmod()
|
||||
self.exotic_ext = cast_bool(Prefs["subtitles.scan.exotic_ext"])
|
||||
@@ -162,21 +204,60 @@ class Config(object):
|
||||
self.default_mods = self.get_default_mods()
|
||||
self.debug_mods = cast_bool(Prefs['log_debug_mods'])
|
||||
self.no_refresh = os.environ.get("SZ_NO_REFRESH", False)
|
||||
self.plex_transcoder = self.get_plex_transcoder()
|
||||
self.only_one = cast_bool(Prefs['subtitles.only_one'])
|
||||
self.embedded_auto_extract = cast_bool(Prefs["subtitles.embedded.autoextract"])
|
||||
self.ietf_as_alpha3 = cast_bool(Prefs["subtitles.language.ietf_normalize"])
|
||||
self.initialized = True
|
||||
|
||||
def init_libraries(self):
|
||||
try_executables = []
|
||||
custom_unrar = os.environ.get("SZ_UNRAR_TOOL")
|
||||
if custom_unrar:
|
||||
if os.path.isfile(custom_unrar):
|
||||
try_executables.append(custom_unrar)
|
||||
|
||||
unrar_exe = None
|
||||
if Core.runtime.os == "Windows":
|
||||
unrar_exe = os.path.abspath(os.path.join(self.libraries_root, "Windows", "i386", "UnRAR", "UnRAR.exe"))
|
||||
if os.path.isfile(unrar_exe):
|
||||
rarfile.UNRAR_TOOL = unrar_exe
|
||||
Log.Info("Using UnRAR from: %s", unrar_exe)
|
||||
|
||||
custom_unrar = os.environ.get("SZ_UNRAR_TOOL")
|
||||
if custom_unrar and os.path.isfile(custom_unrar):
|
||||
rarfile.UNRAR_TOOL = custom_unrar
|
||||
Log.Info("Using UnRAR from: %s", custom_unrar)
|
||||
elif Core.runtime.os == "MacOSX":
|
||||
unrar_exe = os.path.abspath(os.path.join(self.libraries_root, "MacOSX", "i386", "UnRAR", "unrar"))
|
||||
|
||||
elif Core.runtime.os == "Linux":
|
||||
unrar_exe = os.path.abspath(os.path.join(self.libraries_root, "Linux", Core.runtime.cpu, "UnRAR", "unrar"))
|
||||
|
||||
if unrar_exe and os.path.isfile(unrar_exe):
|
||||
try_executables.append(unrar_exe)
|
||||
|
||||
try_executables.append("unrar")
|
||||
|
||||
for exe in try_executables:
|
||||
rarfile.UNRAR_TOOL = exe
|
||||
rarfile.ORIG_UNRAR_TOOL = exe
|
||||
try:
|
||||
rarfile.custom_check([rarfile.UNRAR_TOOL], True)
|
||||
except:
|
||||
Log.Debug("custom check failed for: %s", exe)
|
||||
continue
|
||||
|
||||
rarfile.OPEN_ARGS = rarfile.ORIG_OPEN_ARGS
|
||||
rarfile.EXTRACT_ARGS = rarfile.ORIG_EXTRACT_ARGS
|
||||
rarfile.TEST_ARGS = rarfile.ORIG_TEST_ARGS
|
||||
Log.Info("Using UnRAR from: %s", exe)
|
||||
self.unrar = exe
|
||||
return
|
||||
|
||||
Log.Warn("UnRAR not found")
|
||||
|
||||
def init_cache(self):
|
||||
if self.new_style_cache:
|
||||
subliminal.region.configure('subzero.cache.file', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'appname': "sz_cache",
|
||||
'app_cache_dir': self.data_path})
|
||||
Log.Info("Using new style file based cache!")
|
||||
return
|
||||
|
||||
names = ['dbhash', 'gdbm', 'dbm']
|
||||
dbfn = None
|
||||
self.dbm_supported = False
|
||||
@@ -222,12 +303,49 @@ class Config(object):
|
||||
Log.Warn("Not using file based cache!")
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
|
||||
def sync_cache(self):
|
||||
if not self.new_style_cache:
|
||||
return
|
||||
Log.Debug("Syncing cache")
|
||||
subliminal.region.backend.sync()
|
||||
|
||||
def get_pack_cache_dir(self):
|
||||
pack_cache_dir = os.path.join(config.data_path, "pack_cache")
|
||||
if not os.path.isdir(pack_cache_dir):
|
||||
os.makedirs(pack_cache_dir)
|
||||
|
||||
return pack_cache_dir
|
||||
|
||||
def get_advanced_config(self):
|
||||
paths = []
|
||||
if Prefs['path_to_advanced_settings']:
|
||||
paths = [
|
||||
Prefs['path_to_advanced_settings'],
|
||||
os.path.join(Prefs['path_to_advanced_settings'], "advanced_settings.json")
|
||||
]
|
||||
|
||||
paths.append(os.path.join(config.data_path, "advanced_settings.json"))
|
||||
|
||||
for path in paths:
|
||||
if os.path.isfile(path):
|
||||
data = FileIO.read(path, "r")
|
||||
|
||||
d = Dicked(**jstyleson.loads(data))
|
||||
self.adv_cfg_path = path
|
||||
Log.Info(u"Using advanced settings from: %s", path)
|
||||
return d
|
||||
|
||||
return Dicked()
|
||||
|
||||
def set_log_paths(self):
|
||||
# find log handler
|
||||
for handler in Core.log.handlers:
|
||||
if getattr(getattr(handler, "__class__"), "__name__") in (
|
||||
'FileHandler', 'RotatingFileHandler', 'TimedRotatingFileHandler'):
|
||||
cls_name = getattr(getattr(handler, "__class__"), "__name__")
|
||||
if cls_name in ('FileHandler', 'RotatingFileHandler', 'TimedRotatingFileHandler'):
|
||||
plugin_log_file = handler.baseFilename
|
||||
if cls_name in ("RotatingFileHandler", "TimedRotatingFileHandler"):
|
||||
handler.backupCount = int_or_default(Prefs['log_rotate_keep'], 5)
|
||||
|
||||
if os.path.isfile(os.path.realpath(plugin_log_file)):
|
||||
self.plugin_log_path = plugin_log_file
|
||||
|
||||
@@ -258,16 +376,16 @@ class Config(object):
|
||||
if not self.providers:
|
||||
self.enable_agent = False
|
||||
self.enable_channel = False
|
||||
Log.Warn("No providers enabled, disabling agent and channel!")
|
||||
Log.Warn("No providers enabled, disabling agent and interface!")
|
||||
return
|
||||
|
||||
if Prefs["plugin_mode"] == "only agent":
|
||||
if Prefs["plugin_mode2"] == "only agent":
|
||||
self.enable_channel = False
|
||||
elif Prefs["plugin_mode"] == "only channel":
|
||||
elif Prefs["plugin_mode2"] == "only interface":
|
||||
self.enable_agent = False
|
||||
|
||||
def set_plugin_lock(self):
|
||||
if Prefs["plugin_pin_mode"] in ("channel menu", "advanced menu"):
|
||||
if Prefs["plugin_pin_mode2"] in ("interface", "advanced menu"):
|
||||
# check pin
|
||||
pin = Prefs["plugin_pin"]
|
||||
if not pin or not len(pin):
|
||||
@@ -280,8 +398,8 @@ class Config(object):
|
||||
except ValueError:
|
||||
Log.Warn("PIN has to be an integer (0-9)")
|
||||
self.pin = pin
|
||||
self.lock_advanced_menu = Prefs["plugin_pin_mode"] == "advanced menu"
|
||||
self.lock_menu = Prefs["plugin_pin_mode"] == "channel menu"
|
||||
self.lock_advanced_menu = Prefs["plugin_pin_mode2"] == "advanced menu"
|
||||
self.lock_menu = Prefs["plugin_pin_mode2"] == "interface"
|
||||
|
||||
try:
|
||||
self.pin_valid_minutes = int(Prefs["plugin_pin_valid_for"].strip())
|
||||
@@ -315,6 +433,9 @@ class Config(object):
|
||||
if isinstance(path_str, unicode):
|
||||
path_str = path_str.encode(self.fs_encoding)
|
||||
|
||||
if not os.path.exists(path_str):
|
||||
continue
|
||||
|
||||
if use_ignore_fs:
|
||||
# check whether we've got an ignore file inside the section path
|
||||
if self.is_physically_ignored(path_str):
|
||||
@@ -384,12 +505,30 @@ class Config(object):
|
||||
if not fn:
|
||||
return
|
||||
|
||||
splitted_fn = fn.split()
|
||||
exe_fn = splitted_fn[0]
|
||||
arguments = [arg.strip() for arg in splitted_fn[1:]]
|
||||
got_args = "%(" in fn
|
||||
if got_args:
|
||||
first_arg_pos = fn.index("%(")
|
||||
exe_fn = fn[:first_arg_pos].strip()
|
||||
arguments = [arg.strip() for arg in fn[first_arg_pos:].split()]
|
||||
else:
|
||||
exe_fn = fn
|
||||
arguments = []
|
||||
|
||||
if os.path.isfile(exe_fn) and os.access(exe_fn, os.X_OK):
|
||||
return exe_fn, arguments
|
||||
|
||||
# try finding the executable itself, the path might contain spaces and there might have been other arguments
|
||||
fn_split = exe_fn.split(u" ")
|
||||
tmp_exe_fn = fn_split[0]
|
||||
|
||||
for offset in range(1, len(fn_split)+1):
|
||||
if os.path.isfile(tmp_exe_fn) and os.access(tmp_exe_fn, os.X_OK):
|
||||
exe_fn = tmp_exe_fn.strip()
|
||||
arguments = [arg.strip() for arg in fn_split[offset:]] + arguments
|
||||
return exe_fn, arguments
|
||||
|
||||
tmp_exe_fn = u" ".join(fn_split[:offset+1])
|
||||
|
||||
Log.Error("Notify executable not existing or not executable: %s" % exe_fn)
|
||||
|
||||
def refresh_enabled_sections(self):
|
||||
@@ -420,7 +559,27 @@ class Config(object):
|
||||
return enabled_sections
|
||||
|
||||
# Prepare a list of languages we want subs for
|
||||
def get_lang_list(self):
|
||||
def get_lang_list(self, provider=None):
|
||||
# advanced settings
|
||||
if provider and self.advanced.providers and provider in self.advanced.providers:
|
||||
adv_languages = self.advanced.providers[provider].get("languages", None)
|
||||
if adv_languages:
|
||||
adv_out = set()
|
||||
for adv_lang in adv_languages:
|
||||
adv_lang = adv_lang.strip()
|
||||
try:
|
||||
real_lang = Language.fromietf(adv_lang)
|
||||
except:
|
||||
try:
|
||||
real_lang = Language.fromname(adv_lang)
|
||||
except:
|
||||
continue
|
||||
adv_out.update({real_lang})
|
||||
|
||||
# fallback to default languages if no valid language was found in advanced settings
|
||||
if adv_out:
|
||||
return adv_out
|
||||
|
||||
l = {Language.fromietf(Prefs["langPref1a"])}
|
||||
lang_custom = Prefs["langPrefCustom"].strip()
|
||||
|
||||
@@ -453,6 +612,8 @@ class Config(object):
|
||||
|
||||
return l
|
||||
|
||||
lang_list = property(get_lang_list)
|
||||
|
||||
def get_subtitle_destination_folder(self):
|
||||
if not Prefs["subtitles.save.filesystem"]:
|
||||
return
|
||||
@@ -471,7 +632,7 @@ class Config(object):
|
||||
out.append("vtt")
|
||||
return out
|
||||
|
||||
def get_providers(self):
|
||||
def get_providers(self, media_type="series"):
|
||||
providers = {'opensubtitles': cast_bool(Prefs['provider.opensubtitles.enabled']),
|
||||
# 'thesubdb': Prefs['provider.thesubdb.enabled'],
|
||||
'podnapisi': cast_bool(Prefs['provider.podnapisi.enabled']),
|
||||
@@ -480,10 +641,21 @@ class Config(object):
|
||||
'tvsubtitles': cast_bool(Prefs['provider.tvsubtitles.enabled']),
|
||||
'legendastv': cast_bool(Prefs['provider.legendastv.enabled']),
|
||||
'napiprojekt': cast_bool(Prefs['provider.napiprojekt.enabled']),
|
||||
'shooter': cast_bool(Prefs['provider.shooter.enabled']),
|
||||
'hosszupuska': cast_bool(Prefs['provider.hosszupuska.enabled']),
|
||||
'supersubtitles': cast_bool(Prefs['provider.supersubtitles.enabled']),
|
||||
'shooter': False,
|
||||
'subscene': cast_bool(Prefs['provider.subscene.enabled']),
|
||||
'argenteam': cast_bool(Prefs['provider.argenteam.enabled']),
|
||||
'subscenter': False,
|
||||
'assrt': cast_bool(Prefs['provider.assrt.enabled']),
|
||||
}
|
||||
|
||||
providers_by_prefs = copy.deepcopy(providers)
|
||||
|
||||
# disable subscene for movies by default
|
||||
if media_type == "movies":
|
||||
providers["subscene"] = False
|
||||
|
||||
# ditch non-forced-subtitles-reporting providers
|
||||
if self.forced_only:
|
||||
providers["addic7ed"] = False
|
||||
@@ -491,32 +663,109 @@ class Config(object):
|
||||
providers["legendastv"] = False
|
||||
providers["napiprojekt"] = False
|
||||
providers["shooter"] = False
|
||||
providers["hosszupuska"] = False
|
||||
providers["supersubtitles"] = False
|
||||
providers["titlovi"] = False
|
||||
providers["subscenter"] = False
|
||||
providers["argenteam"] = False
|
||||
providers["assrt"] = False
|
||||
|
||||
if not self.unrar and providers["legendastv"]:
|
||||
providers["legendastv"] = False
|
||||
Log.Info("Disabling LegendasTV, because UnRAR wasn't found")
|
||||
|
||||
# advanced settings
|
||||
if media_type and self.advanced.providers:
|
||||
for provider, data in self.advanced.providers.iteritems():
|
||||
if provider not in providers or not providers_by_prefs[provider]:
|
||||
continue
|
||||
|
||||
if data["enabled_for"] is not None:
|
||||
providers[provider] = media_type in data["enabled_for"]
|
||||
|
||||
if "provider_throttle" not in Dict:
|
||||
Dict["provider_throttle"] = {}
|
||||
|
||||
changed = False
|
||||
for provider, enabled in dict(providers).iteritems():
|
||||
reason, until, throttle_desc = Dict["provider_throttle"].get(provider, (None, None, None))
|
||||
if reason:
|
||||
now = datetime.datetime.now()
|
||||
if now < until:
|
||||
Log.Info("Not using %s until %s, because of: %s", provider,
|
||||
until.strftime("%y/%m/%d %H:%M"), reason)
|
||||
providers[provider] = False
|
||||
else:
|
||||
Log.Info("Using %s again after %s, (disabled because: %s)", provider, throttle_desc, reason)
|
||||
del Dict["provider_throttle"][provider]
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
Dict.Save()
|
||||
|
||||
return filter(lambda prov: providers[prov], providers)
|
||||
|
||||
providers = property(get_providers)
|
||||
|
||||
def get_provider_settings(self):
|
||||
os_use_https = self.advanced.providers.opensubtitles.use_https \
|
||||
if self.advanced.providers.opensubtitles.use_https != None else True
|
||||
provider_settings = {'addic7ed': {'username': Prefs['provider.addic7ed.username'],
|
||||
'password': Prefs['provider.addic7ed.password'],
|
||||
'use_random_agents': cast_bool(Prefs['provider.addic7ed.use_random_agents']),
|
||||
'use_random_agents': cast_bool(Prefs['provider.addic7ed.use_random_agents1']),
|
||||
},
|
||||
'opensubtitles': {'username': Prefs['provider.opensubtitles.username'],
|
||||
'password': Prefs['provider.opensubtitles.password'],
|
||||
'use_tag_search': cast_bool(Prefs['provider.opensubtitles.use_tags']),
|
||||
'only_foreign': cast_bool(Prefs['subtitles.only_foreign']),
|
||||
'is_vip': cast_bool(Prefs['provider.opensubtitles.is_vip'])
|
||||
'use_tag_search': self.exact_filenames,
|
||||
'only_foreign': self.forced_only,
|
||||
'is_vip': cast_bool(Prefs['provider.opensubtitles.is_vip']),
|
||||
'use_ssl': os_use_https,
|
||||
'timeout': self.advanced.providers.opensubtitles.timeout or 15
|
||||
},
|
||||
'podnapisi': {
|
||||
'only_foreign': cast_bool(Prefs['subtitles.only_foreign'])
|
||||
'only_foreign': self.forced_only,
|
||||
},
|
||||
'legendastv': {'username': Prefs['provider.legendastv.username'],
|
||||
'password': Prefs['provider.legendastv.password'],
|
||||
},
|
||||
'assrt': {'token': Prefs['provider.assrt.token'], }
|
||||
}
|
||||
|
||||
return provider_settings
|
||||
|
||||
provider_settings = property(get_provider_settings)
|
||||
|
||||
def provider_throttle(self, name, exception):
|
||||
"""
|
||||
throttle a provider :name: for X hours based on the :exception: type
|
||||
:param name:
|
||||
:param exception:
|
||||
:return:
|
||||
"""
|
||||
cls = getattr(exception, "__class__")
|
||||
cls_name = getattr(cls, "__name__")
|
||||
if cls not in VALID_THROTTLE_EXCEPTIONS:
|
||||
for valid_cls in VALID_THROTTLE_EXCEPTIONS:
|
||||
if isinstance(cls, valid_cls):
|
||||
cls = valid_cls
|
||||
|
||||
throttle_data = PROVIDER_THROTTLE_MAP.get(name, PROVIDER_THROTTLE_MAP["default"]).get(cls, None) or \
|
||||
PROVIDER_THROTTLE_MAP["default"].get(cls, None)
|
||||
|
||||
if not throttle_data:
|
||||
return
|
||||
|
||||
throttle_delta, throttle_description = throttle_data
|
||||
|
||||
if "provider_throttle" not in Dict:
|
||||
Dict["provider_throttle"] = {}
|
||||
|
||||
throttle_until = datetime.datetime.now() + throttle_delta
|
||||
Dict["provider_throttle"][name] = (cls_name, throttle_until, throttle_description)
|
||||
|
||||
Log.Info("Throttling %s for %s, until %s, because of: %s", name, throttle_description,
|
||||
throttle_until.strftime("%y/%m/%d %H:%M"), cls_name)
|
||||
Dict.Save()
|
||||
|
||||
@property
|
||||
def provider_pool(self):
|
||||
if cast_bool(Prefs['providers.multithreading']):
|
||||
@@ -576,9 +825,17 @@ class Config(object):
|
||||
mods.append("common")
|
||||
if self.colors:
|
||||
mods.append("color(name=%s)" % self.colors)
|
||||
if self.reverse_rtl:
|
||||
mods.append("reverse_rtl")
|
||||
|
||||
return mods
|
||||
|
||||
def setup_proxies(self):
|
||||
proxy = Prefs["proxy"]
|
||||
if proxy:
|
||||
os.environ["SZ_HTTP_PROXY"] = proxy.strip()
|
||||
Log.Debug("Using HTTP Proxy: %s", proxy)
|
||||
|
||||
def set_activity_modes(self):
|
||||
val = Prefs["activity.on_playback"]
|
||||
if val == "never":
|
||||
@@ -595,6 +852,80 @@ class Config(object):
|
||||
else:
|
||||
self.activity_mode = "next_episode"
|
||||
|
||||
def get_plex_transcoder(self):
|
||||
base_path = os.environ.get("PLEX_MEDIA_SERVER_HOME", None)
|
||||
if not base_path:
|
||||
# fall back to bundled plugins path
|
||||
bundle_path = os.environ.get("PLEXBUNDLEDPLUGINSPATH", None)
|
||||
if bundle_path:
|
||||
base_path = os.path.normpath(os.path.join(bundle_path, "..", ".."))
|
||||
|
||||
if sys.platform == "darwin":
|
||||
fn = os.path.join(base_path, "MacOS", "Plex Transcoder")
|
||||
elif mswindows:
|
||||
fn = os.path.join(base_path, "plextranscoder.exe")
|
||||
else:
|
||||
fn = os.path.join(base_path, "Plex Transcoder")
|
||||
|
||||
if os.path.isfile(fn):
|
||||
return fn
|
||||
|
||||
# look inside Resources folder as fallback, as well
|
||||
fn = os.path.join(base_path, "Resources", "Plex Transcoder")
|
||||
if os.path.isfile(fn):
|
||||
return fn
|
||||
|
||||
def parse_rename_mode(self):
|
||||
# fixme: exact_filenames should be determined via callback combined with info about the current video
|
||||
# (original_name)
|
||||
|
||||
mode = str(Prefs["media_rename1"])
|
||||
self.refiner_settings = {}
|
||||
|
||||
if cast_bool(Prefs['use_file_info_file']):
|
||||
self.refiner_settings["file_info_file"] = True
|
||||
self.exact_filenames = True
|
||||
|
||||
if mode == "none of the above":
|
||||
return
|
||||
|
||||
elif mode == "Symlink to original file":
|
||||
self.refiner_settings["symlinks"] = True
|
||||
self.exact_filenames = True
|
||||
return
|
||||
|
||||
elif mode == "I keep the original filenames":
|
||||
self.exact_filenames = True
|
||||
return
|
||||
|
||||
if mode in ("Filebot", "Sonarr/Radarr/Filebot"):
|
||||
self.refiner_settings["filebot"] = True
|
||||
|
||||
if mode in ("Sonarr/Radarr (fill api info below)", "Sonarr/Radarr/Filebot"):
|
||||
if Prefs["drone_api.sonarr.url"] and Prefs["drone_api.sonarr.api_key"]:
|
||||
self.refiner_settings["sonarr"] = {
|
||||
"base_url": Prefs["drone_api.sonarr.url"],
|
||||
"api_key": Prefs["drone_api.sonarr.api_key"],
|
||||
}
|
||||
if self.advanced.refiners.sonarr:
|
||||
self.refiner_settings["sonarr"].update(self.advanced.refiners.sonarr)
|
||||
|
||||
self.exact_filenames = True
|
||||
|
||||
if Prefs["drone_api.radarr.url"] and Prefs["drone_api.radarr.api_key"]:
|
||||
self.refiner_settings["radarr"] = {
|
||||
"base_url": Prefs["drone_api.radarr.url"],
|
||||
"api_key": Prefs["drone_api.radarr.api_key"]
|
||||
}
|
||||
if self.advanced.refiners.radarr:
|
||||
self.refiner_settings["radarr"].update(self.advanced.refiners.radarr)
|
||||
|
||||
self.exact_filenames = True
|
||||
|
||||
@property
|
||||
def text_based_formats(self):
|
||||
return self.advanced.text_subtitle_formats or TEXT_SUBTITLE_EXTS
|
||||
|
||||
def init_subliminal_patches(self):
|
||||
# configure custom subtitle destination folders for scanning pre-existing subs
|
||||
Log.Debug("Patching subliminal ...")
|
||||
|
||||
@@ -28,6 +28,7 @@ def migrate():
|
||||
time=item.time)
|
||||
|
||||
del Dict["history"]
|
||||
history.destroy()
|
||||
Dict.Save()
|
||||
|
||||
# migrate subtitle storage from Dict to Data
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# coding=utf-8
|
||||
from babelfish import Language
|
||||
import os
|
||||
|
||||
from subzero.language import Language
|
||||
|
||||
import subliminal_patch as subliminal
|
||||
|
||||
@@ -8,67 +10,111 @@ from support.helpers import cast_bool
|
||||
from subtitlehelpers import get_subtitles_from_metadata
|
||||
from subliminal_patch import compute_score
|
||||
from support.plex_media import get_blacklist_from_part_map
|
||||
from subzero.video import refine_video
|
||||
from support.storage import get_pack_data, store_pack_data
|
||||
|
||||
|
||||
def download_best_subtitles(video_part_map, min_score=0, throttle_time=None):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
ietf_as_alpha3 = cast_bool(Prefs["subtitles.language.ietf_normalize"])
|
||||
def get_missing_languages(video, part):
|
||||
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
|
||||
if not languages:
|
||||
return
|
||||
|
||||
# should we treat IETF as alpha3? (ditch the country part)
|
||||
alpha3_map = {}
|
||||
if ietf_as_alpha3:
|
||||
if config.ietf_as_alpha3:
|
||||
for language in languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
missing_languages = False
|
||||
if not Prefs['subtitles.save.filesystem']:
|
||||
# scan for existing metadata subtitles
|
||||
meta_subs = get_subtitles_from_metadata(part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
if subList:
|
||||
video.subtitle_languages.add(language)
|
||||
Log.Debug("Found metadata subtitle %s for %s", language, video)
|
||||
|
||||
have_languages = video.subtitle_languages.copy()
|
||||
if config.ietf_as_alpha3:
|
||||
for language in have_languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
missing_languages = (set(str(l) for l in languages) - set(str(l) for l in have_languages))
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
|
||||
if not missing_languages or found_one_which_is_enough:
|
||||
if found_one_which_is_enough:
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
return False
|
||||
|
||||
# re-add country codes to the missing languages, in case we've removed them above
|
||||
if config.ietf_as_alpha3:
|
||||
for language in languages:
|
||||
language.country = alpha3_map.get(language.alpha3, None)
|
||||
|
||||
return missing_languages
|
||||
|
||||
|
||||
def pre_download_hook(subtitle):
|
||||
if subtitle.is_pack:
|
||||
# try retrieving the subtitle from a cached pack archive
|
||||
pack_data = get_pack_data(subtitle)
|
||||
if pack_data:
|
||||
subtitle.pack_data = pack_data
|
||||
|
||||
|
||||
def post_download_hook(subtitle):
|
||||
# if a new pack was downloaded, store it in the cache; providers' download method is responsible for
|
||||
# setting subtitle.pack_data to None in case the cached pack data we provided was successfully used
|
||||
if subtitle.is_pack and subtitle.pack_data:
|
||||
# store pack data in cache
|
||||
store_pack_data(subtitle, subtitle.pack_data)
|
||||
|
||||
# may be redundant
|
||||
subtitle.pack_data = None
|
||||
|
||||
|
||||
def language_hook(provider):
|
||||
return config.get_lang_list(provider=provider)
|
||||
|
||||
|
||||
def download_best_subtitles(video_part_map, min_score=0, throttle_time=None, providers=None):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
|
||||
if not languages:
|
||||
return
|
||||
|
||||
use_videos = []
|
||||
for video, part in video_part_map.iteritems():
|
||||
if not Prefs['subtitles.save.filesystem']:
|
||||
# scan for existing metadata subtitles
|
||||
meta_subs = get_subtitles_from_metadata(part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
if subList:
|
||||
video.subtitle_languages.add(language)
|
||||
Log.Debug("Found metadata subtitle %s for %s", language, video)
|
||||
if not video.ignore_all:
|
||||
missing_languages = get_missing_languages(video, part)
|
||||
else:
|
||||
missing_languages = languages
|
||||
|
||||
have_languages = video.subtitle_languages.copy()
|
||||
if ietf_as_alpha3:
|
||||
for language in have_languages:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
if missing_languages:
|
||||
Log.Info(u"%s has missing languages: %s", os.path.basename(video.name), missing_languages)
|
||||
refine_video(video, refiner_settings=config.refiner_settings)
|
||||
use_videos.append(video)
|
||||
|
||||
missing_subs = (set(str(l) for l in languages) - set(str(l) for l in have_languages))
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
found_one_which_is_enough = len(video.subtitle_languages) >= 1 and Prefs['subtitles.only_one']
|
||||
if not missing_subs or found_one_which_is_enough:
|
||||
if found_one_which_is_enough:
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
continue
|
||||
missing_languages = True
|
||||
break
|
||||
|
||||
if missing_languages:
|
||||
# re-add country codes to the missing languages, in case we've removed them above
|
||||
if ietf_as_alpha3:
|
||||
for language in languages:
|
||||
language.country = alpha3_map.get(language.alpha3, None)
|
||||
# prepare blacklist
|
||||
blacklist = get_blacklist_from_part_map(video_part_map, languages)
|
||||
|
||||
if use_videos:
|
||||
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s, languages: %s" %
|
||||
(min_score, hearing_impaired, languages))
|
||||
|
||||
# prepare blacklist
|
||||
blacklist = get_blacklist_from_part_map(video_part_map, languages)
|
||||
|
||||
return subliminal.download_best_subtitles(video_part_map.keys(), languages, min_score, hearing_impaired, providers=config.providers,
|
||||
provider_configs=config.provider_settings, pool_class=config.provider_pool,
|
||||
compute_score=compute_score, throttle_time=throttle_time, blacklist=blacklist)
|
||||
return subliminal.download_best_subtitles(set(use_videos), languages, min_score, hearing_impaired,
|
||||
providers=providers or config.providers,
|
||||
provider_configs=config.provider_settings,
|
||||
pool_class=config.provider_pool,
|
||||
compute_score=compute_score, throttle_time=throttle_time,
|
||||
blacklist=blacklist, throttle_callback=config.provider_throttle,
|
||||
pre_download_hook=pre_download_hook,
|
||||
post_download_hook=post_download_hook,
|
||||
language_hook=language_hook)
|
||||
Log.Debug("All languages for all requested videos exist. Doing nothing.")
|
||||
@@ -15,7 +15,7 @@ from collections import OrderedDict
|
||||
import chardet
|
||||
|
||||
from bs4 import UnicodeDammit
|
||||
from babelfish import Language
|
||||
from subzero.language import Language
|
||||
from subzero.analytics import track_event
|
||||
|
||||
mswindows = (sys.platform == "win32")
|
||||
@@ -158,10 +158,11 @@ def get_video_display_title(kind, title, section_title=None, parent_title=None,
|
||||
if add_section_title:
|
||||
section_add = ("%s: " % section_title) if section_title else ""
|
||||
|
||||
if kind == "show" and parent_title:
|
||||
if kind in ("season", "show") and parent_title:
|
||||
if season and episode:
|
||||
return '%s%s S%02dE%02d%s' % (section_add, parent_title, season or 0, episode or 0,
|
||||
(", %s" % title if title else ""))
|
||||
|
||||
return '%s%s%s' % (section_add, parent_title, (", %s" % title if title else ""))
|
||||
return "%s%s" % (section_add, title)
|
||||
|
||||
@@ -209,7 +210,7 @@ def decode_message(s):
|
||||
|
||||
|
||||
def timestamp():
|
||||
return int(time.time())
|
||||
return int(time.time()*1000)
|
||||
|
||||
|
||||
def df(d):
|
||||
@@ -291,7 +292,6 @@ def notify_executable(exe_info, videos, subtitles, storage):
|
||||
prepared_arguments = [arg % prepared_data for arg in arguments]
|
||||
|
||||
Log.Debug(u"Calling %s with arguments: %s" % (exe, prepared_arguments))
|
||||
env = os.environ
|
||||
if not mswindows:
|
||||
env_path = {"PATH": os.pathsep.join(
|
||||
[
|
||||
@@ -302,14 +302,30 @@ def notify_executable(exe_info, videos, subtitles, storage):
|
||||
)
|
||||
}
|
||||
env = dict(os.environ, **env_path)
|
||||
env.pop("LD_LIBRARY_PATH", None)
|
||||
else:
|
||||
env = dict(os.environ)
|
||||
|
||||
# clean out any Plex-PYTHONPATH that may bleed through the spawned process
|
||||
if "PYTHONPATH" in env and "plex" in env["PYTHONPATH"].lower():
|
||||
del env["PYTHONPATH"]
|
||||
|
||||
try:
|
||||
output = subprocess.check_output(quote_args([exe] + prepared_arguments),
|
||||
stderr=subprocess.STDOUT, shell=True, env=env)
|
||||
except subprocess.CalledProcessError:
|
||||
Log.Error(u"Calling %s failed: %s" % (exe, traceback.format_exc()))
|
||||
proc = subprocess.Popen(quote_args([exe] + prepared_arguments), stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE, shell=True, env=env, cwd=os.path.dirname(exe))
|
||||
output, errors = proc.communicate()
|
||||
|
||||
if proc.returncode == 1:
|
||||
Log.Error(u"Calling %s with args %s failed: output:\n%s, error:\n%s", exe, prepared_arguments,
|
||||
output, errors)
|
||||
return
|
||||
|
||||
output = output.decode()
|
||||
|
||||
except:
|
||||
Log.Error(u"Calling %s failed: %s", exe, traceback.format_exc())
|
||||
else:
|
||||
Log.Debug(u"Process output: %s" % output)
|
||||
Log.Debug(u"Process output: %s", output)
|
||||
|
||||
|
||||
def track_usage(category=None, action=None, label=None, value=None):
|
||||
@@ -354,18 +370,29 @@ def dispatch_track_usage(*args, **kwargs):
|
||||
Log.Debug("Something went wrong when reporting anonymous user statistics: %s", traceback.format_exc())
|
||||
|
||||
|
||||
def get_language_from_stream(lang_code):
|
||||
if lang_code:
|
||||
lang = Locale.Language.Match(lang_code)
|
||||
if lang and lang != "xx":
|
||||
# Log.Debug("Found language: %r", lang)
|
||||
return Language.fromietf(lang)
|
||||
|
||||
|
||||
def get_language(lang_short):
|
||||
return Language.fromietf(lang_short)
|
||||
|
||||
|
||||
def display_language(l):
|
||||
addons = []
|
||||
if l.country:
|
||||
addons.append(l.country.alpha2)
|
||||
if l.script:
|
||||
addons.append(l.script.code)
|
||||
return _(str(l).lower())
|
||||
|
||||
return l.name if not addons else "%s (%s)" % (l.name, ", ".join(addons))
|
||||
|
||||
def is_stream_forced(stream):
|
||||
stream_title = getattr(stream, "title", "") or ""
|
||||
forced = getattr(stream, "forced", False)
|
||||
if not forced and stream_title and "forced" in stream_title.strip().lower():
|
||||
forced = True
|
||||
|
||||
return forced
|
||||
|
||||
|
||||
class PartUnknownException(Exception):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# coding=utf-8
|
||||
from subzero.history_storage import SubtitleHistory
|
||||
|
||||
get_history = lambda: SubtitleHistory(Data, int(Prefs["history_size"]))
|
||||
get_history = lambda: SubtitleHistory(Data, Thread, int(Prefs["history_size"]))
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
# coding=utf-8
|
||||
|
||||
import inspect
|
||||
|
||||
from support.config import config
|
||||
|
||||
|
||||
core = getattr(Data, "_core")
|
||||
|
||||
|
||||
# get original localization module in order to access its base classes later on
|
||||
def get_localization_module():
|
||||
cls = getattr(core.localization, "__class__")
|
||||
return inspect.getmodule(cls)
|
||||
|
||||
|
||||
plex_i18n_module = get_localization_module()
|
||||
|
||||
|
||||
def old_style_placeholders_count(s):
|
||||
# fixme: incomplete, use regex
|
||||
return sum(s.count(c) for c in ["%s", "%d", "%r", "%f", "%i"])
|
||||
|
||||
|
||||
def check_old_style_placeholders(k, args):
|
||||
# replace escaped %'s?
|
||||
k = k.__str__().replace("%%", "")
|
||||
|
||||
if "%(" in k:
|
||||
Log.Error(u"%r defines named placeholders for formatting" % k)
|
||||
return "NEEDS NAMED ARGUMENTS"
|
||||
|
||||
placeholders_found = old_style_placeholders_count(k)
|
||||
if placeholders_found and not args:
|
||||
Log.Error(u"%r requires a arguments for formatting" % k)
|
||||
return "NEEDS FORMAT ARGUMENTS"
|
||||
|
||||
elif not placeholders_found and args:
|
||||
Log.Error(u"%r doesn't define placeholders for formatting" % k)
|
||||
return "HAS NO FORMAT ARGUMENTS"
|
||||
|
||||
elif placeholders_found and placeholders_found != len(args):
|
||||
Log.Error(u"%r wrong amount of arguments supplied for formatting" % k)
|
||||
return "WRONG FORMAT ARGUMENT COUNT"
|
||||
|
||||
|
||||
class SmartLocalStringFormatter(plex_i18n_module.LocalStringFormatter):
|
||||
"""
|
||||
this allows the use of dictionaries for string formatting, also does some sanity checking on the keys and values
|
||||
"""
|
||||
def __init__(self, string1, string2, locale=None):
|
||||
if isinstance(string2, tuple):
|
||||
# dictionary passed
|
||||
if len(string2) == 1 and hasattr(string2[0], "iteritems"):
|
||||
string2 = string2[0]
|
||||
if config.debug_i18n:
|
||||
if "%(" not in string1.__str__().replace("%%", ""):
|
||||
Log.Error(u"%r: dictionary for non-named format string supplied" % string1.__str__())
|
||||
string1 = "%s"
|
||||
string2 = "NO NAMED ARGUMENTS"
|
||||
|
||||
# arguments
|
||||
elif len(string2) >= 1 and config.debug_i18n:
|
||||
msg = check_old_style_placeholders(string1, string2)
|
||||
if msg:
|
||||
string1 = "%s"
|
||||
string2 = msg
|
||||
|
||||
setattr(self, "_string1", string1)
|
||||
setattr(self, "_string2", string2)
|
||||
setattr(self, "_locale", locale)
|
||||
|
||||
|
||||
def local_string_with_optional_format(key, *args, **kwargs):
|
||||
if kwargs:
|
||||
args = (kwargs,)
|
||||
else:
|
||||
args = tuple(args)
|
||||
|
||||
if args:
|
||||
# fixme: may not be the best idea as this evaluates the string early
|
||||
try:
|
||||
return unicode(SmartLocalStringFormatter(plex_i18n_module.LocalString(core, key, Locale.CurrentLocale), args))
|
||||
except TypeError:
|
||||
Log.Exception("Broken translation!")
|
||||
return unicode(SmartLocalStringFormatter(plex_i18n_module.LocalString(core, key, "en"), args))
|
||||
|
||||
# check string instances for arguments
|
||||
if config.debug_i18n:
|
||||
msg = check_old_style_placeholders(key, args)
|
||||
if msg:
|
||||
return msg
|
||||
|
||||
try:
|
||||
return unicode(plex_i18n_module.LocalString(core, key, Locale.CurrentLocale))
|
||||
|
||||
except TypeError:
|
||||
Log.Exception("Broken translation!")
|
||||
return unicode(plex_i18n_module.LocalString(core, key, "en"))
|
||||
|
||||
|
||||
_ = local_string_with_optional_format
|
||||
|
||||
|
||||
def is_localized_string(s):
|
||||
return hasattr(s, "localize")
|
||||
@@ -11,7 +11,8 @@ class IgnoreDict(DictProxy):
|
||||
"section": "sections",
|
||||
"show": "series",
|
||||
"movie": "videos",
|
||||
"episode": "videos"
|
||||
"episode": "videos",
|
||||
"season": "seasons",
|
||||
}
|
||||
|
||||
# getItems types mapped to their verbose names
|
||||
@@ -19,9 +20,10 @@ class IgnoreDict(DictProxy):
|
||||
"sections": "Section",
|
||||
"series": "Series",
|
||||
"videos": "Item",
|
||||
"seasons": "Season",
|
||||
}
|
||||
|
||||
key_order = ("sections", "series", "videos")
|
||||
key_order = ("sections", "series", "videos", "seasons")
|
||||
|
||||
def __len__(self):
|
||||
try:
|
||||
@@ -35,7 +37,7 @@ class IgnoreDict(DictProxy):
|
||||
return self.translate_keys.get(name)
|
||||
|
||||
def verbose(self, name):
|
||||
return self.keys_verbose.get(name)
|
||||
return self.keys_verbose.get(self.translate_key(name) or name)
|
||||
|
||||
def get_title_key(self, kind, key):
|
||||
return "%s_%s" % (kind, key)
|
||||
@@ -57,6 +59,7 @@ class IgnoreDict(DictProxy):
|
||||
Dict.Save()
|
||||
|
||||
def setup_defaults(self):
|
||||
return {"sections": [], "series": [], "videos": [], "titles": {}}
|
||||
return {"sections": [], "series": [], "videos": [], "titles": {}, "seasons": []}
|
||||
|
||||
|
||||
ignore_list = IgnoreDict(Dict)
|
||||
|
||||
@@ -8,12 +8,15 @@ import os
|
||||
|
||||
import time
|
||||
|
||||
import datetime
|
||||
|
||||
from ignore import ignore_list
|
||||
from helpers import is_recent, get_plex_item_display_title, query_plex, PartUnknownException
|
||||
from lib import Plex, get_intent
|
||||
from config import config, IGNORE_FN
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
from subzero.modification import registry as mod_registry, SubtitleModifications
|
||||
from socket import timeout
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -28,7 +31,11 @@ def get_item(key):
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
item_container = Plex["library"].metadata(item_id)
|
||||
try:
|
||||
item_container = Plex["library"].metadata(item_id)
|
||||
except timeout:
|
||||
Log.Debug("PMS API timed out when querying information about item %d", item_id)
|
||||
return
|
||||
|
||||
try:
|
||||
return list(item_container)[0]
|
||||
@@ -59,12 +66,15 @@ def get_item_kind_from_item(item):
|
||||
|
||||
def get_item_title(item):
|
||||
kind = get_item_kind_from_item(item)
|
||||
if kind not in ("episode", "movie"):
|
||||
if kind not in ("episode", "movie", "season", "series"):
|
||||
return
|
||||
|
||||
if kind == "episode":
|
||||
return get_plex_item_display_title(item, "show", parent=item.season, section_title=None,
|
||||
parent_title=item.show.title)
|
||||
elif kind == "season":
|
||||
return get_plex_item_display_title(item, "season", parent=item.show, section_title="Season",
|
||||
parent_title=item.show.title)
|
||||
else:
|
||||
return get_plex_item_display_title(item, kind, section_title=None)
|
||||
|
||||
@@ -255,7 +265,7 @@ def is_ignored(rating_key, item=None):
|
||||
:return:
|
||||
"""
|
||||
# item in soft ignore list
|
||||
if rating_key in ignore_list["videos"]:
|
||||
if ignore_list["videos"] and rating_key in ignore_list["videos"]:
|
||||
Log.Debug("Item %s is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
@@ -263,12 +273,17 @@ def is_ignored(rating_key, item=None):
|
||||
kind = get_item_kind(item)
|
||||
|
||||
# show in soft ignore list
|
||||
if kind == "Episode" and item.show.rating_key in ignore_list["series"]:
|
||||
if kind == "Episode" and ignore_list["series"] and item.show.rating_key in ignore_list["series"]:
|
||||
Log.Debug("Item %s's show is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
# season in soft ignore list
|
||||
if kind == "Episode" and ignore_list["seasons"] and item.season.rating_key in ignore_list["seasons"]:
|
||||
Log.Debug("Item %s's season is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
# section in soft ignore list
|
||||
if item.section.key in ignore_list["sections"]:
|
||||
if ignore_list["sections"] and item.section.key in ignore_list["sections"]:
|
||||
Log.Debug("Item %s's section is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
@@ -323,7 +338,7 @@ def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, paren
|
||||
Log.Info("%s item %s", "Refreshing" if not force else "Forced-refreshing", key)
|
||||
Plex["library/metadata"].refresh(key)
|
||||
if multiple:
|
||||
time.sleep(10)
|
||||
Thread.Sleep(10.0)
|
||||
|
||||
|
||||
def get_current_sub(rating_key, part_id, language, plex_item=None):
|
||||
@@ -336,11 +351,77 @@ def get_current_sub(rating_key, part_id, language, plex_item=None):
|
||||
return current_sub, stored_subs, subtitle_storage
|
||||
|
||||
|
||||
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
|
||||
def save_stored_sub(stored_subtitle, rating_key, part_id, language, item_type, plex_item=None, storage=None,
|
||||
stored_subs=None):
|
||||
"""
|
||||
in order for this to work, if the calling supplies stored_subs and storage, it has to trigger its saving and
|
||||
destruction explicitly
|
||||
:param stored_subtitle:
|
||||
:param rating_key:
|
||||
:param part_id:
|
||||
:param language:
|
||||
:param item_type:
|
||||
:param plex_item:
|
||||
:param storage:
|
||||
:param stored_subs:
|
||||
:return:
|
||||
"""
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles
|
||||
from support.storage import save_subtitles, get_subtitle_storage
|
||||
|
||||
plex_item = plex_item or get_item(rating_key)
|
||||
|
||||
stored_subs_was_provided = True
|
||||
if not stored_subs or not storage:
|
||||
storage = get_subtitle_storage()
|
||||
stored_subs = storage.load(plex_item.rating_key)
|
||||
stored_subs_was_provided = False
|
||||
|
||||
if not all([plex_item, stored_subs]):
|
||||
return
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
except PartUnknownException:
|
||||
return
|
||||
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
subtitle = ModifiedSubtitle(language, mods=stored_subtitle.mods)
|
||||
subtitle.content = stored_subtitle.content
|
||||
if stored_subtitle.encoding:
|
||||
# thanks plex
|
||||
setattr(subtitle, "_guessed_encoding", stored_subtitle.encoding)
|
||||
|
||||
if stored_subtitle.encoding != "utf-8":
|
||||
subtitle.normalize()
|
||||
stored_subtitle.content = subtitle.content
|
||||
stored_subtitle.encoding = "utf-8"
|
||||
storage.save(stored_subs)
|
||||
|
||||
subtitle.plex_media_fps = plex_part.fps
|
||||
subtitle.page_link = stored_subtitle.id
|
||||
subtitle.language = language
|
||||
subtitle.id = stored_subtitle.id
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
|
||||
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
|
||||
", ".join(stored_subtitle.mods) if stored_subtitle.mods else "none")
|
||||
except:
|
||||
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
|
||||
|
||||
if subtitle.storage_path:
|
||||
stored_subtitle.last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
|
||||
|
||||
if not stored_subs_was_provided:
|
||||
storage.save(stored_subs)
|
||||
storage.destroy()
|
||||
|
||||
|
||||
def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"):
|
||||
plex_item = get_item(rating_key)
|
||||
|
||||
if not plex_item:
|
||||
@@ -374,39 +455,9 @@ def set_mods_for_part(rating_key, part_id, language, item_type, mods, mode="add"
|
||||
current_sub.mods.pop()
|
||||
else:
|
||||
raise NotImplementedError("Wrong mode given")
|
||||
|
||||
save_stored_sub(current_sub, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
|
||||
stored_subs=stored_subs)
|
||||
|
||||
storage.save(stored_subs)
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
except PartUnknownException:
|
||||
return
|
||||
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True,
|
||||
no_refining=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
subtitle = ModifiedSubtitle(language, mods=current_sub.mods)
|
||||
subtitle.content = current_sub.content
|
||||
if current_sub.encoding:
|
||||
# thanks plex
|
||||
setattr(subtitle, "_guessed_encoding", current_sub.encoding)
|
||||
|
||||
if current_sub.encoding != "utf-8":
|
||||
subtitle.set_encoding("utf-8")
|
||||
current_sub.content = subtitle.content
|
||||
current_sub.encoding = "utf-8"
|
||||
storage.save(stored_subs)
|
||||
|
||||
storage.destroy()
|
||||
|
||||
subtitle.plex_media_fps = plex_part.fps
|
||||
subtitle.page_link = "modify subtitles with: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
subtitle.language = language
|
||||
subtitle.id = current_sub.id
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
|
||||
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
|
||||
", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
except:
|
||||
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
|
||||
|
||||
@@ -9,29 +9,33 @@ import subtitlehelpers
|
||||
from config import config as sz_config
|
||||
|
||||
|
||||
SECONDARY_TAGS = ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom', 'hi', 'cc', 'sdh']
|
||||
|
||||
|
||||
def find_subtitles(part):
|
||||
lang_sub_map = {}
|
||||
part_filename = helpers.unicodize(part.file)
|
||||
part_basename = os.path.splitext(os.path.basename(part_filename))[0]
|
||||
use_filesystem = helpers.cast_bool(Prefs["subtitles.save.filesystem"])
|
||||
paths = [os.path.dirname(part_filename)] if use_filesystem else []
|
||||
sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
|
||||
global_subtitle_folder = None
|
||||
use_sub_subfolder = Prefs["subtitles.save.subFolder"] != "current folder" and not sub_dir_custom
|
||||
sub_subfolder = None
|
||||
paths = [os.path.dirname(part_filename)] if use_filesystem else []
|
||||
|
||||
global_folders = []
|
||||
|
||||
if use_filesystem:
|
||||
# Check for local subtitles subdirectory
|
||||
sub_dir_base = paths[0]
|
||||
|
||||
sub_dir_list = []
|
||||
|
||||
if Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
if use_sub_subfolder:
|
||||
# got selected subfolder
|
||||
sub_dir_list.append(os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"]))
|
||||
|
||||
sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
sub_subfolder = os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"])
|
||||
sub_dir_list.append(sub_subfolder)
|
||||
sub_subfolder = os.path.normpath(helpers.unicodize(sub_subfolder))
|
||||
|
||||
if sub_dir_custom:
|
||||
# got custom subfolder
|
||||
@@ -84,8 +88,12 @@ def find_subtitles(part):
|
||||
media_files.append(root)
|
||||
|
||||
# cleanup any leftover subtitle if no associated media file was found
|
||||
if helpers.cast_bool(Prefs["subtitles.autoclean"]):
|
||||
if use_filesystem and helpers.cast_bool(Prefs["subtitles.autoclean"]):
|
||||
for path in paths:
|
||||
# only housekeep in sub_subfolder if sub_subfolder is used
|
||||
if use_sub_subfolder and path != sub_subfolder and not sz_config.advanced.thorough_cleaning:
|
||||
continue
|
||||
|
||||
# we can't housekeep the global subtitle folders as we don't know about *all* media files
|
||||
# in a library; skip them
|
||||
skip_path = False
|
||||
@@ -105,11 +113,10 @@ def find_subtitles(part):
|
||||
if os.path.isfile(enc_fn):
|
||||
(root, ext) = os.path.splitext(file_path_listing)
|
||||
# it's a subtitle file
|
||||
if ext.lower()[1:] in config.SUBTITLE_EXTS:
|
||||
if ext.lower()[1:] in config.SUBTITLE_EXTS_BASE:
|
||||
# get fn without forced/default/normal tag
|
||||
split_tag = root.rsplit(".", 1)
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded',
|
||||
'embedded-forced', 'custom']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
|
||||
root = split_tag[0]
|
||||
|
||||
# get associated media file name without language
|
||||
@@ -135,8 +142,7 @@ def find_subtitles(part):
|
||||
# get fn without forced/default/normal tag
|
||||
split_tag = local_basename.rsplit(".", 1)
|
||||
has_additional_tag = False
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in ['forced', 'normal', 'default', 'embedded', 'embedded-forced',
|
||||
'custom']:
|
||||
if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS:
|
||||
local_basename = split_tag[0]
|
||||
has_additional_tag = True
|
||||
|
||||
|
||||
@@ -4,14 +4,15 @@ import time
|
||||
|
||||
import os
|
||||
|
||||
from babelfish import Language, LanguageReverseError
|
||||
from babelfish import LanguageReverseError
|
||||
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import get_plex_item_display_title, cast_bool
|
||||
from support.helpers import get_plex_item_display_title, cast_bool, get_language_from_stream
|
||||
from support.items import get_item
|
||||
from support.lib import Plex
|
||||
from support.storage import get_subtitle_storage
|
||||
from subzero.video import has_external_subtitle
|
||||
from subzero.language import Language
|
||||
|
||||
|
||||
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()):
|
||||
@@ -29,8 +30,6 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
|
||||
subtitle_target_dir, tdir_is_absolute = config.subtitle_sub_dir
|
||||
|
||||
ietf_as_alpha3 = cast_bool(Prefs["subtitles.language.ietf_normalize"])
|
||||
|
||||
missing = set()
|
||||
languages_set = set([Language.fromietf(str(l)) for l in languages])
|
||||
for media in item.media:
|
||||
@@ -99,14 +98,12 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
else:
|
||||
# parse with internal language parser first
|
||||
try:
|
||||
lang = Locale.Language.Match(stream.language_code)
|
||||
if lang and lang != "xx":
|
||||
#Log.Debug("Found language: %r", lang)
|
||||
lang = Language.fromietf(lang)
|
||||
elif lang == "xx" and config.treat_und_as_first:
|
||||
lang = Language.fromietf(str(list(config.lang_list)[0]))
|
||||
else:
|
||||
continue
|
||||
lang = get_language_from_stream(stream.language_code)
|
||||
if not lang:
|
||||
if config.treat_und_as_first:
|
||||
lang = Language.fromietf(str(list(config.lang_list)[0]))
|
||||
else:
|
||||
continue
|
||||
|
||||
except (ValueError, LanguageReverseError):
|
||||
continue
|
||||
@@ -128,7 +125,7 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
|
||||
check_languages = set([Language.fromietf(str(l)) for l in languages])
|
||||
alpha3_map = {}
|
||||
if ietf_as_alpha3:
|
||||
if config.ietf_as_alpha3:
|
||||
for language in existing_flat:
|
||||
if language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
@@ -150,7 +147,7 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
continue
|
||||
|
||||
missing_from_part = set(Language.fromietf(l) for l in check_languages_str - existing_flat_str)
|
||||
if ietf_as_alpha3:
|
||||
if config.ietf_as_alpha3:
|
||||
for language in missing_from_part:
|
||||
language.country = alpha3_map.get(language.alpha3, None)
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import os
|
||||
import helpers
|
||||
from items import get_item
|
||||
from lib import Plex
|
||||
from support.config import TEXT_SUBTITLE_EXTS, config
|
||||
|
||||
|
||||
def get_metadata_dict(item, part, add):
|
||||
@@ -152,10 +153,9 @@ def get_stream_fps(streams):
|
||||
|
||||
|
||||
def get_media_item_ids(media, kind="series"):
|
||||
ids = []
|
||||
if kind == "movies":
|
||||
ids.append(media.id)
|
||||
else:
|
||||
# fixme: does this work correctly for full series force-refreshes and its intents?
|
||||
ids = [media.id]
|
||||
if kind == "series":
|
||||
for season in media.seasons:
|
||||
for episode in media.seasons[season].episodes:
|
||||
ids.append(media.seasons[season].episodes[episode].id)
|
||||
@@ -163,6 +163,48 @@ def get_media_item_ids(media, kind="series"):
|
||||
return ids
|
||||
|
||||
|
||||
def get_all_parts(plex_item):
|
||||
parts = []
|
||||
for media in plex_item.media:
|
||||
parts += media.parts
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, get_forced=None):
|
||||
streams = []
|
||||
has_unknown = False
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
|
||||
language = helpers.get_language_from_stream(stream.language_code)
|
||||
is_unknown = False
|
||||
found_requested_language = requested_language and requested_language == language
|
||||
is_forced = helpers.is_stream_forced(stream)
|
||||
|
||||
if get_forced is not None:
|
||||
if (get_forced and not is_forced) or (not get_forced and is_forced):
|
||||
continue
|
||||
|
||||
if not language and config.treat_und_as_first:
|
||||
# only consider first unknown subtitle stream
|
||||
if has_unknown and skip_duplicate_unknown:
|
||||
continue
|
||||
|
||||
language = list(config.lang_list)[0]
|
||||
is_unknown = True
|
||||
has_unknown = True
|
||||
|
||||
if not requested_language or found_requested_language or has_unknown:
|
||||
streams.append({"stream": stream, "is_unknown": is_unknown, "language": language,
|
||||
"is_forced": is_forced})
|
||||
|
||||
if found_requested_language:
|
||||
break
|
||||
|
||||
return streams
|
||||
|
||||
|
||||
def get_part(plex_item, part_id):
|
||||
for media in plex_item.media:
|
||||
for part in media.parts:
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
# coding=utf-8
|
||||
|
||||
import traceback
|
||||
import helpers
|
||||
from babelfish.exceptions import LanguageError
|
||||
|
||||
from support.lib import Plex, get_intent
|
||||
from support.plex_media import get_stream_fps
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
|
||||
from subzero.video import parse_video
|
||||
from subzero.video import parse_video, set_existing_languages
|
||||
from subzero.language import language_from_stream
|
||||
|
||||
|
||||
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, no_refining=False):
|
||||
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False):
|
||||
"""
|
||||
returnes a subliminal/guessit-refined parsed video
|
||||
:param pms_video_info:
|
||||
@@ -40,46 +43,62 @@ def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, no
|
||||
plexpy_part = part
|
||||
|
||||
# embedded subtitles
|
||||
# fixme: skip the whole scanning process if known_embedded == wanted languages?
|
||||
if plexpy_part:
|
||||
for stream in plexpy_part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3:
|
||||
if (config.forced_only and getattr(stream, "forced")) or \
|
||||
(not config.forced_only and not getattr(stream, "forced")):
|
||||
if embedded_subtitles:
|
||||
for stream in plexpy_part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3:
|
||||
is_forced = helpers.is_stream_forced(stream)
|
||||
|
||||
# embedded subtitle
|
||||
# fixme: tap into external subtitles here instead of scanning for ourselves later?
|
||||
if not stream.stream_key and stream.codec:
|
||||
if config.exotic_ext or stream.codec.lower() in TEXT_SUBTITLE_EXTS:
|
||||
lang_code = stream.language_code
|
||||
if (config.forced_only and is_forced) or \
|
||||
(not config.forced_only and not is_forced):
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang_code and config.treat_und_as_first:
|
||||
lang_code = list(config.lang_list)[0].alpha3
|
||||
known_embedded.append(lang_code)
|
||||
# embedded subtitle
|
||||
# fixme: tap into external subtitles here instead of scanning for ourselves later?
|
||||
if stream.codec and getattr(stream, "index", None):
|
||||
if config.exotic_ext or stream.codec.lower() in config.text_based_formats:
|
||||
lang = None
|
||||
try:
|
||||
lang = language_from_stream(stream.language_code)
|
||||
except LanguageError:
|
||||
Log.Debug("Couldn't detect embedded subtitle stream language: %s", stream.language_code)
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = list(config.lang_list)[0]
|
||||
|
||||
if lang:
|
||||
known_embedded.append(lang.alpha3)
|
||||
else:
|
||||
Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)
|
||||
|
||||
Log.Debug("Known embedded: %r", known_embedded)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load(rating_key)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
try:
|
||||
# get basic video info scan (filename)
|
||||
video = parse_video(plex_part.file, pms_video_info, hints, external_subtitles=external_subtitles,
|
||||
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
forced_only=config.forced_only, no_refining=no_refining, ignore_all=ignore_all,
|
||||
stored_subs=stored_subs)
|
||||
video = parse_video(plex_part.file, hints, skip_hashing=config.low_impact_mode or skip_hashing,
|
||||
providers=providers)
|
||||
|
||||
if not ignore_all:
|
||||
set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles,
|
||||
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
forced_only=config.forced_only, stored_subs=stored_subs, languages=config.lang_list,
|
||||
only_one=config.only_one)
|
||||
|
||||
# add video fps info
|
||||
video.fps = plex_part.fps
|
||||
return video
|
||||
|
||||
except ValueError:
|
||||
Log.Warn("File could not be guessed by subliminal: %s" % plex_part.file)
|
||||
Log.Warn("File could not be guessed: %s: %s", plex_part.file, traceback.format_exc())
|
||||
|
||||
|
||||
def scan_videos(videos, kind="series", ignore_all=False, no_refining=False):
|
||||
def scan_videos(videos, ignore_all=False, providers=None, skip_hashing=False):
|
||||
"""
|
||||
receives a list of videos containing dictionaries returned by media_to_videos
|
||||
:param videos:
|
||||
@@ -95,8 +114,10 @@ def scan_videos(videos, kind="series", ignore_all=False, no_refining=False):
|
||||
|
||||
hints = helpers.get_item_hints(video)
|
||||
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
|
||||
p = providers or config.get_providers(media_type="series" if video["type"] == "episode" else "movies")
|
||||
scanned_video = scan_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
|
||||
rating_key=video["id"], no_refining=no_refining)
|
||||
rating_key=video["id"], providers=p,
|
||||
skip_hashing=skip_hashing)
|
||||
|
||||
if not scanned_video:
|
||||
continue
|
||||
@@ -105,5 +126,6 @@ def scan_videos(videos, kind="series", ignore_all=False, no_refining=False):
|
||||
part_metadata = video.copy()
|
||||
del part_metadata["plex_part"]
|
||||
scanned_video.plexapi_metadata = part_metadata
|
||||
scanned_video.ignore_all = force_refresh
|
||||
ret[scanned_video] = video["plex_part"]
|
||||
return ret
|
||||
return ret
|
||||
|
||||
@@ -4,6 +4,7 @@ import datetime
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
from config import config
|
||||
|
||||
def parse_frequency(s):
|
||||
if s == "never" or s is None:
|
||||
@@ -135,6 +136,7 @@ class DefaultScheduler(object):
|
||||
except:
|
||||
Log.Error("Scheduler: task.post_run failed for %s: %s", name, traceback.format_exc())
|
||||
Dict.Save()
|
||||
config.sync_cache()
|
||||
|
||||
def dispatch_task(self, *args, **kwargs):
|
||||
if "queue" not in Dict["tasks"]:
|
||||
|
||||
@@ -4,9 +4,12 @@ import datetime
|
||||
import os
|
||||
import pprint
|
||||
import copy
|
||||
import traceback
|
||||
import types
|
||||
|
||||
from subliminal_patch.core import save_subtitles as subliminal_save_subtitles
|
||||
from subzero.subtitle_storage import StoredSubtitlesManager
|
||||
from subzero.lib.io import FileIO
|
||||
|
||||
from subtitlehelpers import force_utf8
|
||||
from config import config
|
||||
@@ -16,30 +19,45 @@ from support.items import get_item
|
||||
|
||||
|
||||
def get_subtitle_storage():
|
||||
return StoredSubtitlesManager(Data, get_item)
|
||||
return StoredSubtitlesManager(Data, Thread, get_item)
|
||||
|
||||
|
||||
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a"):
|
||||
def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_type, mode="a", set_current=True):
|
||||
"""
|
||||
stores information about downloaded subtitles in plex's Dict()
|
||||
"""
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
part = scanned_video_part_map[video]
|
||||
part_id = str(part.id)
|
||||
video_id = str(video.id)
|
||||
plex_item = get_item(video_id)
|
||||
if not plex_item:
|
||||
Log.Warning("Plex item not found: %s", video_id)
|
||||
continue
|
||||
|
||||
metadata = video.plexapi_metadata
|
||||
title = get_title_for_video_metadata(metadata)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(plex_item)
|
||||
stored_subs = subtitle_storage.load(video_id)
|
||||
is_new = False
|
||||
if not stored_subs:
|
||||
is_new = True
|
||||
Log.Debug(u"Creating new subtitle storage: %s, %s", video_id, part_id)
|
||||
stored_subs = subtitle_storage.new(plex_item)
|
||||
|
||||
for subtitle in video_subtitles:
|
||||
lang = str(subtitle.language)
|
||||
subtitle.set_encoding("utf-8")
|
||||
Log.Debug(u"Adding subtitle to storage: %s, %s, %s, %s" % (video_id, part_id, title,
|
||||
subtitle.guess_encoding()))
|
||||
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode)
|
||||
subtitle.normalize()
|
||||
Log.Debug(u"Adding subtitle to storage: %s, %s, %s, %s, %s" % (video_id, part_id, lang, title,
|
||||
subtitle.guess_encoding()))
|
||||
|
||||
last_mod = None
|
||||
if subtitle.storage_path:
|
||||
last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(subtitle.storage_path))
|
||||
|
||||
ret_val = stored_subs.add(part_id, lang, subtitle, storage_type, mode=mode, last_mod=last_mod,
|
||||
set_current=set_current)
|
||||
|
||||
if ret_val:
|
||||
Log.Debug("Subtitle stored")
|
||||
@@ -47,9 +65,11 @@ def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_ty
|
||||
else:
|
||||
Log.Debug("Subtitle already existing in storage")
|
||||
|
||||
Log.Debug("Saving subtitle storage for %s" % video_id)
|
||||
subtitle_storage.save(stored_subs)
|
||||
subtitle_storage.destroy()
|
||||
if is_new or video_subtitles:
|
||||
Log.Debug("Saving subtitle storage for %s" % video_id)
|
||||
subtitle_storage.save(stored_subs)
|
||||
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
def reset_storage(key):
|
||||
@@ -71,36 +91,47 @@ def log_storage(key):
|
||||
Log.Debug(pprint.pformat(Dict[key]))
|
||||
|
||||
|
||||
def save_subtitles_to_file(subtitles):
|
||||
def get_target_folder(file_path):
|
||||
fld = None
|
||||
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
|
||||
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
# specific subFolder requested, create it if it doesn't exist
|
||||
fld_base = os.path.split(file_path)[0]
|
||||
if fld_custom:
|
||||
if fld_custom.startswith("/"):
|
||||
# absolute folder
|
||||
fld = fld_custom
|
||||
else:
|
||||
fld = os.path.join(fld_base, fld_custom)
|
||||
else:
|
||||
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
|
||||
fld = force_unicode(fld)
|
||||
if not os.path.exists(fld):
|
||||
os.makedirs(fld)
|
||||
return fld
|
||||
|
||||
|
||||
def save_subtitles_to_file(subtitles, tags=None, forced_tag=None):
|
||||
forced_tag = forced_tag or config.forced_only
|
||||
for video, video_subtitles in subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
fld = None
|
||||
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
|
||||
# specific subFolder requested, create it if it doesn't exist
|
||||
fld_base = os.path.split(video.name)[0]
|
||||
if fld_custom:
|
||||
if fld_custom.startswith("/"):
|
||||
# absolute folder
|
||||
fld = fld_custom
|
||||
else:
|
||||
fld = os.path.join(fld_base, fld_custom)
|
||||
else:
|
||||
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
|
||||
fld = force_unicode(fld)
|
||||
if not os.path.exists(fld):
|
||||
os.makedirs(fld)
|
||||
subliminal_save_subtitles(video, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
|
||||
chmod=config.chmod, forced_tag=config.forced_only, path_decoder=force_unicode,
|
||||
debug_mods=config.debug_mods, formats=config.subtitle_formats)
|
||||
if not isinstance(video, types.StringTypes):
|
||||
file_path = video.name
|
||||
else:
|
||||
file_path = video
|
||||
|
||||
fld = get_target_folder(file_path)
|
||||
subliminal_save_subtitles(file_path, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
|
||||
chmod=config.chmod, forced_tag=forced_tag, path_decoder=force_unicode,
|
||||
debug_mods=config.debug_mods, formats=config.subtitle_formats, tags=tags)
|
||||
return True
|
||||
|
||||
|
||||
def save_subtitles_to_metadata(videos, subtitles):
|
||||
def save_subtitles_to_metadata(videos, subtitles, is_forced=False):
|
||||
for video, video_subtitles in subtitles.items():
|
||||
mediaPart = videos[video]
|
||||
for subtitle in video_subtitles:
|
||||
@@ -112,14 +143,19 @@ def save_subtitles_to_metadata(videos, subtitles):
|
||||
mp = PMSMediaProxy(video.id).get_part(mediaPart.id)
|
||||
else:
|
||||
mp = mediaPart
|
||||
mp.subtitles[Locale.Language.Match(subtitle.language.alpha2)][subtitle.id] = Proxy.Media(content, ext="srt")
|
||||
pm = Proxy.Media(content, ext="srt", forced="1" if is_forced else None)
|
||||
lang = Locale.Language.Match(subtitle.language.alpha2)
|
||||
mp.subtitles[lang].validate_keys({})
|
||||
mp.subtitles[lang]["subzero"] = pm
|
||||
return True
|
||||
|
||||
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None):
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None,
|
||||
set_current=True, is_forced=False):
|
||||
"""
|
||||
|
||||
:param scanned_video_part_map:
|
||||
:param set_current: save the subtitle as the current one
|
||||
:param scanned_video_part_map:
|
||||
:param downloaded_subtitles:
|
||||
:param mode:
|
||||
:param bare_save: don't trigger anything; don't store information
|
||||
@@ -145,30 +181,64 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_
|
||||
save_to_fs = cast_bool(Prefs['subtitles.save.filesystem'])
|
||||
if save_to_fs:
|
||||
storage = "filesystem"
|
||||
try:
|
||||
Log.Debug("Using filesystem as subtitle storage")
|
||||
save_subtitles_to_file(downloaded_subtitles)
|
||||
except OSError:
|
||||
if cast_bool(Prefs["subtitles.save.metadata_fallback"]):
|
||||
meta_fallback = True
|
||||
storage = "metadata"
|
||||
|
||||
if set_current:
|
||||
if save_to_fs:
|
||||
try:
|
||||
Log.Debug("Using filesystem as subtitle storage")
|
||||
save_subtitles_to_file(downloaded_subtitles, forced_tag=is_forced)
|
||||
except OSError:
|
||||
if cast_bool(Prefs["subtitles.save.metadata_fallback"]):
|
||||
meta_fallback = True
|
||||
storage = "metadata"
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
save_successful = True
|
||||
save_successful = True
|
||||
|
||||
if not save_to_fs or meta_fallback:
|
||||
if meta_fallback:
|
||||
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
|
||||
else:
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
|
||||
if not save_to_fs or meta_fallback:
|
||||
if meta_fallback:
|
||||
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
|
||||
else:
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles,
|
||||
is_forced=is_forced)
|
||||
|
||||
if not bare_save and save_successful and config.notify_executable:
|
||||
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
|
||||
if not bare_save and save_successful and config.notify_executable:
|
||||
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
|
||||
|
||||
if not bare_save and save_successful:
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode)
|
||||
if not bare_save and save_successful or not set_current:
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode, set_current=set_current)
|
||||
|
||||
return save_successful
|
||||
|
||||
|
||||
def get_pack_id(subtitle):
|
||||
return "%s_%s" % (subtitle.provider_name, subtitle.numeric_id)
|
||||
|
||||
|
||||
def get_pack_data(subtitle):
|
||||
subtitle_id = get_pack_id(subtitle)
|
||||
|
||||
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
|
||||
if os.path.isfile(archive):
|
||||
Log.Info("Loading archive from pack cache: %s", subtitle_id)
|
||||
try:
|
||||
data = FileIO.read(archive, 'rb')
|
||||
|
||||
return data
|
||||
except:
|
||||
Log.Error("Couldn't load archive from pack cache: %s: %s", subtitle_id, traceback.format_exc())
|
||||
|
||||
|
||||
def store_pack_data(subtitle, data):
|
||||
subtitle_id = get_pack_id(subtitle)
|
||||
|
||||
archive = os.path.join(config.pack_cache_dir, subtitle_id + ".archive")
|
||||
|
||||
Log.Info("Storing archive in pack cache: %s", subtitle_id)
|
||||
try:
|
||||
FileIO.write(archive, data, 'wb')
|
||||
|
||||
except:
|
||||
Log.Error("Couldn't store archive in pack cache: %s: %s", subtitle_id, traceback.format_exc())
|
||||
|
||||
@@ -133,7 +133,13 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
# Attempt to extract the language from the filename (e.g. Avatar (2009).eng)
|
||||
# IETF support thanks to
|
||||
# https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
|
||||
language = Locale.Language.Match(match_ietf_language(file))
|
||||
lang_part = match_ietf_language(file)
|
||||
if lang_part != file:
|
||||
language = Locale.Language.Match(lang_part)
|
||||
elif config.only_one:
|
||||
language = Locale.Language.Match(list(config.lang_list)[0].alpha2)
|
||||
else:
|
||||
language = Locale.Language.Match("xx")
|
||||
|
||||
# skip non-SRT if wanted
|
||||
if not config.exotic_ext and ext not in TEXT_SUBTITLE_EXTS:
|
||||
@@ -190,7 +196,7 @@ def get_subtitles_from_metadata(part):
|
||||
if p_type == "Media":
|
||||
# metadata subtitle
|
||||
Log.Debug(u"Found metadata subtitle: %s, %s" % (language, repr(proxy)))
|
||||
subs[language].append(key)
|
||||
subs[language] = [key]
|
||||
return subs
|
||||
|
||||
|
||||
|
||||
+337
-223
@@ -1,29 +1,27 @@
|
||||
# coding=utf-8
|
||||
|
||||
import glob
|
||||
import os
|
||||
import datetime
|
||||
import time
|
||||
import operator
|
||||
import traceback
|
||||
from urllib2 import URLError
|
||||
|
||||
from subliminal_patch.score import compute_score
|
||||
from subliminal_patch.core import download_subtitles
|
||||
from subliminal import list_subtitles as list_all_subtitles
|
||||
from babelfish import Language
|
||||
from subliminal import list_subtitles as list_all_subtitles, region as subliminal_cache_region
|
||||
from subzero.language import Language
|
||||
from subzero.video import refine_video
|
||||
|
||||
from missing_subtitles import items_get_all_missing_subs, refresh_item
|
||||
from scheduler import scheduler
|
||||
from storage import save_subtitles, get_subtitle_storage
|
||||
from support.config import config
|
||||
from support.items import get_recent_items, get_item, is_ignored
|
||||
from support.items import get_recent_items, get_item, is_ignored, get_item_title
|
||||
from support.helpers import track_usage, get_title_for_video_metadata, cast_bool, PartUnknownException
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.scanning import scan_videos
|
||||
from download import download_best_subtitles
|
||||
|
||||
|
||||
PROVIDER_SLACK = 30
|
||||
DL_PROVIDER_SLACK = 30
|
||||
from support.i18n import _
|
||||
from download import download_best_subtitles, pre_download_hook, post_download_hook, language_hook
|
||||
|
||||
|
||||
class Task(object):
|
||||
@@ -34,6 +32,9 @@ class Task(object):
|
||||
time_start = None
|
||||
data = None
|
||||
|
||||
PROVIDER_SLACK = 30
|
||||
DL_PROVIDER_SLACK = 30
|
||||
|
||||
stored_attributes = ("last_run", "last_run_time", "running")
|
||||
default_data = {"last_run": None, "last_run_time": None, "running": False, "data": {}}
|
||||
|
||||
@@ -99,7 +100,7 @@ class Task(object):
|
||||
|
||||
class SubtitleListingMixin(object):
|
||||
def list_subtitles(self, rating_key, item_type, part_id, language, skip_wrong_fps=True, metadata=None,
|
||||
scanned_parts=None):
|
||||
scanned_parts=None, air_date_cutoff=None):
|
||||
|
||||
if not metadata:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
@@ -107,18 +108,26 @@ class SubtitleListingMixin(object):
|
||||
if not metadata:
|
||||
return
|
||||
|
||||
providers = config.get_providers(media_type="series" if item_type == "episode" else "movies")
|
||||
if not scanned_parts:
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, providers=providers)
|
||||
if not scanned_parts:
|
||||
Log.Error(u"%s: Couldn't list available subtitles for %s", self.name, rating_key)
|
||||
return
|
||||
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
refine_video(video, refiner_settings=config.refiner_settings)
|
||||
|
||||
if air_date_cutoff is not None and metadata["item"].year and \
|
||||
metadata["item"].year + air_date_cutoff < datetime.date.today().year:
|
||||
Log.Debug("Skipping searching for subtitles: %s, it aired over %s year(s) ago.", rating_key,
|
||||
air_date_cutoff)
|
||||
return
|
||||
|
||||
config.init_subliminal_patches()
|
||||
|
||||
provider_settings = config.provider_settings.copy()
|
||||
provider_settings = config.provider_settings
|
||||
if not skip_wrong_fps:
|
||||
provider_settings = config.provider_settings.copy()
|
||||
provider_settings["opensubtitles"]["skip_wrong_fps"] = False
|
||||
|
||||
if item_type == "episode":
|
||||
@@ -130,10 +139,12 @@ class SubtitleListingMixin(object):
|
||||
|
||||
languages = {Language.fromietf(language)}
|
||||
|
||||
available_subs = list_all_subtitles(scanned_parts, languages,
|
||||
providers=config.providers,
|
||||
available_subs = list_all_subtitles([video], languages,
|
||||
providers=providers,
|
||||
provider_configs=provider_settings,
|
||||
pool_class=config.provider_pool)
|
||||
pool_class=config.provider_pool,
|
||||
throttle_callback=config.provider_throttle,
|
||||
language_hook=language_hook)
|
||||
|
||||
use_hearing_impaired = Prefs['subtitles.search.hearingImpaired'] in ("prefer", "force HI")
|
||||
|
||||
@@ -147,6 +158,16 @@ class SubtitleListingMixin(object):
|
||||
Log.Error(u"%s: Match computation failed for %s: %s", self.name, s, traceback.format_exc())
|
||||
continue
|
||||
|
||||
# skip wrong season/episodes
|
||||
if item_type == "episode":
|
||||
can_verify_series = True
|
||||
if not s.hash_verifiable and "hash" in matches:
|
||||
can_verify_series = False
|
||||
|
||||
if can_verify_series and not {"series", "season", "episode"}.issubset(matches):
|
||||
Log.Debug(u"%s: Skipping %s, because it doesn't match our series/episode", self.name, s)
|
||||
continue
|
||||
|
||||
unsorted_subtitles.append(
|
||||
(s, compute_score(matches, s, video, hearing_impaired=use_hearing_impaired), matches))
|
||||
scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1), reverse=True)
|
||||
@@ -172,21 +193,36 @@ class DownloadSubtitleMixin(object):
|
||||
item_type = subtitle.item_type
|
||||
part_id = subtitle.part_id
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
providers = config.get_providers(media_type="series" if item_type == "episode" else "movies")
|
||||
scanned_parts = scan_videos([metadata], ignore_all=True, providers=providers)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
pre_download_hook(subtitle)
|
||||
|
||||
# downloaded_subtitles = {subliminal.Video: [subtitle, subtitle, ...]}
|
||||
download_subtitles([subtitle], providers=config.providers, provider_configs=config.provider_settings,
|
||||
pool_class=config.provider_pool)
|
||||
download_subtitles([subtitle], providers=providers,
|
||||
provider_configs=config.provider_settings,
|
||||
pool_class=config.provider_pool, throttle_callback=config.provider_throttle)
|
||||
|
||||
post_download_hook(subtitle)
|
||||
|
||||
# may be redundant
|
||||
subtitle.pack_data = None
|
||||
|
||||
download_successful = False
|
||||
|
||||
if subtitle.content:
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode=mode, mods=config.default_mods)
|
||||
Log.Debug(u"%s: Manually downloaded subtitle for: %s", self.name, rating_key)
|
||||
if mode == "m":
|
||||
Log.Debug(u"%s: Manually downloaded subtitle for: %s", self.name, rating_key)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
elif mode == "b":
|
||||
Log.Debug(u"%s: Downloaded better subtitle for: %s", self.name, rating_key)
|
||||
track_usage("Subtitle", "better", "download", 1)
|
||||
download_successful = True
|
||||
refresh_item(rating_key)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
|
||||
except:
|
||||
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s",
|
||||
self.name, traceback.format_exc())
|
||||
@@ -201,12 +237,15 @@ class DownloadSubtitleMixin(object):
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
subtitle=subtitle,
|
||||
mode=mode)
|
||||
history.destroy()
|
||||
|
||||
# clear missing subtitles menu data
|
||||
if not scheduler.is_task_running("MissingSubtitles"):
|
||||
scheduler.clear_task_data("MissingSubtitles")
|
||||
else:
|
||||
set_refresh_menu_state(u"%s: Subtitle download failed (%s)" % (self.name, rating_key))
|
||||
set_refresh_menu_state(_(u"%(class_name)s: Subtitle download failed (%(item_id)s)",
|
||||
class_name=self.name,
|
||||
item_id=rating_key))
|
||||
return download_successful
|
||||
|
||||
|
||||
@@ -322,6 +361,8 @@ class SearchAllRecentlyAddedMissing(Task):
|
||||
now = datetime.datetime.now()
|
||||
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
|
||||
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
|
||||
series_providers = config.get_providers(media_type="series")
|
||||
movie_providers = config.get_providers(media_type="movies")
|
||||
|
||||
is_recent_str = Prefs["scheduler.item_is_recent_age"]
|
||||
num, ident = is_recent_str.split()
|
||||
@@ -333,24 +374,9 @@ class SearchAllRecentlyAddedMissing(Task):
|
||||
max_search_days = int(num) * 7
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
recent_sub_fns = subtitle_storage.get_recent_files(age_days=max_search_days)
|
||||
viable_items = {}
|
||||
recent_files = subtitle_storage.get_recent_files(age_days=max_search_days)
|
||||
|
||||
# determine viable items
|
||||
for fn in recent_sub_fns:
|
||||
# added_date <= max_search_days?
|
||||
stored_subs = subtitle_storage.load(filename=fn)
|
||||
if not stored_subs:
|
||||
continue
|
||||
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
continue
|
||||
|
||||
viable_items[fn] = stored_subs
|
||||
|
||||
subtitle_storage.destroy()
|
||||
|
||||
self.items_searching = len(viable_items)
|
||||
self.items_searching = len(recent_files)
|
||||
|
||||
download_count = 0
|
||||
videos_with_downloads = 0
|
||||
@@ -359,101 +385,130 @@ class SearchAllRecentlyAddedMissing(Task):
|
||||
|
||||
Log.Info(u"%s: Searching for subtitles for %s items", self.name, self.items_searching)
|
||||
|
||||
def skip_item():
|
||||
self.items_searching = self.items_searching - 1
|
||||
self.percentage = int(self.items_done * 100 / self.items_searching) if self.items_searching > 0 else 100
|
||||
|
||||
# search for subtitles in viable items
|
||||
for fn, stored_subs in viable_items.iteritems():
|
||||
video_id = stored_subs.video_id
|
||||
|
||||
if stored_subs.item_type == "episode":
|
||||
min_score = min_score_series
|
||||
else:
|
||||
min_score = min_score_movies
|
||||
|
||||
parts = []
|
||||
plex_item = get_item(video_id)
|
||||
|
||||
if not plex_item:
|
||||
Log.Info(u"%s: Item %s unknown, skipping", self.name, video_id)
|
||||
continue
|
||||
|
||||
if is_ignored(video_id, item=plex_item):
|
||||
continue
|
||||
|
||||
for media in plex_item.media:
|
||||
parts += media.parts
|
||||
|
||||
downloads_per_video = 0
|
||||
hit_providers = False
|
||||
for part in parts:
|
||||
part_id = part.id
|
||||
|
||||
try:
|
||||
metadata = get_plex_metadata(video_id, part_id, stored_subs.item_type)
|
||||
except PartUnknownException:
|
||||
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
|
||||
try:
|
||||
for fn in recent_files:
|
||||
stored_subs = subtitle_storage.load(filename=fn)
|
||||
if not stored_subs:
|
||||
Log.Debug("Skipping item %s because storage is empty", fn)
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
if not metadata:
|
||||
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
|
||||
video_id = stored_subs.video_id
|
||||
|
||||
# added_date <= max_search_days?
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
Log.Debug("Skipping item %s because it's too old", video_id)
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
Log.Debug(u"%s: Looking for missing subtitles: %s:%s", self.name, video_id, part_id)
|
||||
scanned_parts = scan_videos([metadata], kind="series"
|
||||
if stored_subs.item_type == "episode" else "movie")
|
||||
if stored_subs.item_type == "episode":
|
||||
min_score = min_score_series
|
||||
providers = series_providers
|
||||
else:
|
||||
min_score = min_score_movies
|
||||
providers = movie_providers
|
||||
|
||||
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score)
|
||||
hit_providers = downloaded_subtitles is not None
|
||||
download_successful = False
|
||||
parts = []
|
||||
plex_item = get_item(video_id)
|
||||
|
||||
if downloaded_subtitles:
|
||||
downloaded_any = any(downloaded_subtitles.values())
|
||||
if not downloaded_any:
|
||||
continue
|
||||
if not plex_item:
|
||||
Log.Info(u"%s: Item %s unknown, skipping", self.name, video_id)
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
if is_ignored(video_id, item=plex_item):
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
for media in plex_item.media:
|
||||
parts += media.parts
|
||||
|
||||
downloads_per_video = 0
|
||||
hit_providers = False
|
||||
for part in parts:
|
||||
part_id = part.id
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, downloaded_subtitles, mode="a", mods=config.default_mods)
|
||||
Log.Debug(u"%s: Downloaded subtitle for item with missing subs: %s", self.name, video_id)
|
||||
download_successful = True
|
||||
refresh_item(video_id)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
except:
|
||||
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s", self.name,
|
||||
traceback.format_exc())
|
||||
finally:
|
||||
metadata = get_plex_metadata(video_id, part_id, stored_subs.item_type)
|
||||
except PartUnknownException:
|
||||
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
|
||||
continue
|
||||
|
||||
if not metadata:
|
||||
Log.Info(u"%s: Part %s:%s unknown, skipping", self.name, video_id, part_id)
|
||||
continue
|
||||
|
||||
Log.Debug(u"%s: Looking for missing subtitles: %s", self.name, get_item_title(plex_item))
|
||||
scanned_parts = scan_videos([metadata], providers=providers)
|
||||
|
||||
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score,
|
||||
providers=providers)
|
||||
hit_providers = downloaded_subtitles is not None
|
||||
download_successful = False
|
||||
|
||||
if downloaded_subtitles:
|
||||
downloaded_any = any(downloaded_subtitles.values())
|
||||
if not downloaded_any:
|
||||
continue
|
||||
|
||||
try:
|
||||
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
|
||||
if download_successful:
|
||||
# store item in history
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
for subtitle in video_subtitles:
|
||||
downloads_per_video += 1
|
||||
history.add(item_title, video.id, section_title=metadata["section"],
|
||||
subtitle=subtitle,
|
||||
mode="a")
|
||||
save_subtitles(scanned_parts, downloaded_subtitles, mode="a", mods=config.default_mods)
|
||||
Log.Debug(u"%s: Downloaded subtitle for item with missing subs: %s", self.name, video_id)
|
||||
download_successful = True
|
||||
refresh_item(video_id)
|
||||
track_usage("Subtitle", "manual", "download", 1)
|
||||
except:
|
||||
Log.Error(u"%s: DEBUG HIT: %s", self.name, traceback.format_exc())
|
||||
Log.Error(u"%s: Something went wrong when downloading specific subtitle: %s", self.name,
|
||||
traceback.format_exc())
|
||||
finally:
|
||||
scanned_parts = None
|
||||
try:
|
||||
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
|
||||
if download_successful:
|
||||
# store item in history
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
time.sleep(PROVIDER_SLACK)
|
||||
for subtitle in video_subtitles:
|
||||
downloads_per_video += 1
|
||||
history.add(item_title, video.id, section_title=metadata["section"],
|
||||
subtitle=subtitle,
|
||||
mode="a")
|
||||
|
||||
download_count += downloads_per_video
|
||||
downloaded_subtitles = None
|
||||
except:
|
||||
Log.Error(u"%s: DEBUG HIT: %s", self.name, traceback.format_exc())
|
||||
|
||||
if downloads_per_video:
|
||||
videos_with_downloads += 1
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
|
||||
Thread.Sleep(self.PROVIDER_SLACK)
|
||||
|
||||
self.items_done = self.items_done + 1
|
||||
self.percentage = int(self.items_done * 100 / self.items_searching)
|
||||
download_count += downloads_per_video
|
||||
|
||||
if downloads_per_video:
|
||||
Log.Debug(u"%s: Subtitles have been downloaded, "
|
||||
u"waiting %s seconds before continuing", self.name, DL_PROVIDER_SLACK)
|
||||
time.sleep(DL_PROVIDER_SLACK)
|
||||
else:
|
||||
if hit_providers:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
time.sleep(PROVIDER_SLACK)
|
||||
if downloads_per_video:
|
||||
videos_with_downloads += 1
|
||||
|
||||
self.items_done = self.items_done + 1
|
||||
self.percentage = int(self.items_done * 100 / self.items_searching) if self.items_searching > 0 else 100
|
||||
|
||||
stored_subs = None
|
||||
|
||||
if downloads_per_video:
|
||||
Log.Debug(u"%s: Subtitles have been downloaded, "
|
||||
u"waiting %s seconds before continuing", self.name, self.DL_PROVIDER_SLACK)
|
||||
Thread.Sleep(self.DL_PROVIDER_SLACK)
|
||||
else:
|
||||
if hit_providers:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
|
||||
Thread.Sleep(self.PROVIDER_SLACK)
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
history.destroy()
|
||||
|
||||
if download_count:
|
||||
Log.Debug(u"%s: done. Missing subtitles found for %s/%s items (%s subs downloaded)", self.name,
|
||||
@@ -528,7 +583,7 @@ class LegacySearchAllRecentlyAddedMissing(Task):
|
||||
while 1:
|
||||
if item_id in self.items_done:
|
||||
items_done_count += 1
|
||||
self.percentage = int(items_done_count * 100 / missing_count)
|
||||
self.percentage = int(items_done_count * 100 / missing_count) if missing_count > 0 else 100
|
||||
Log.Debug(u"Task: %s, item %s done (%s%%, %s/%s)", self.name, item_id, self.percentage,
|
||||
items_done_count, missing_count)
|
||||
break
|
||||
@@ -548,10 +603,10 @@ class LegacySearchAllRecentlyAddedMissing(Task):
|
||||
except URLError:
|
||||
pass
|
||||
search_started = datetime.datetime.now()
|
||||
time.sleep(1)
|
||||
time.sleep(0.1)
|
||||
Thread.Sleep(1)
|
||||
Thread.Sleep(0.1)
|
||||
# we can't hammer the PMS, otherwise requests will be stalled
|
||||
time.sleep(5)
|
||||
Thread.Sleep(5)
|
||||
|
||||
Log.Debug("Task: %s, done (%s%%, %s/%s). Failed items: %s", self.name, self.percentage,
|
||||
items_done_count, missing_count, self.items_failed)
|
||||
@@ -595,135 +650,162 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
now = datetime.datetime.now()
|
||||
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
|
||||
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
|
||||
min_score_extracted_series = config.advanced.find_better_as_extracted_tv_score or 352
|
||||
min_score_extracted_movies = config.advanced.find_better_as_extracted_movie_score or 82
|
||||
overwrite_manually_modified = cast_bool(
|
||||
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified"])
|
||||
overwrite_manually_selected = cast_bool(
|
||||
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected"])
|
||||
|
||||
air_date_cutoff_pref = Prefs["scheduler.tasks.FindBetterSubtitles.air_date_cutoff"]
|
||||
if air_date_cutoff_pref == "don't limit":
|
||||
air_date_cutoff = None
|
||||
else:
|
||||
air_date_cutoff = int(air_date_cutoff_pref.split()[0])
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
recent_subs = subtitle_storage.load_recent_files(age_days=max_search_days)
|
||||
viable_item_count = 0
|
||||
|
||||
for fn, stored_subs in recent_subs.iteritems():
|
||||
video_id = stored_subs.video_id
|
||||
try:
|
||||
for fn in subtitle_storage.get_recent_files(age_days=max_search_days):
|
||||
stored_subs = subtitle_storage.load(filename=fn)
|
||||
if not stored_subs:
|
||||
continue
|
||||
|
||||
if stored_subs.item_type == "episode":
|
||||
cutoff = self.series_cutoff
|
||||
min_score = min_score_series
|
||||
else:
|
||||
cutoff = self.movies_cutoff
|
||||
min_score = min_score_movies
|
||||
video_id = stored_subs.video_id
|
||||
|
||||
# don't search for better subtitles until at least 30 minutes have passed
|
||||
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
|
||||
Log.Debug(u"%s: Item %s too new, skipping", self.name, video_id)
|
||||
continue
|
||||
if stored_subs.item_type == "episode":
|
||||
cutoff = self.series_cutoff
|
||||
min_score = min_score_series
|
||||
min_score_extracted = min_score_extracted_series
|
||||
else:
|
||||
cutoff = self.movies_cutoff
|
||||
min_score = min_score_movies
|
||||
min_score_extracted = min_score_extracted_movies
|
||||
|
||||
# added_date <= max_search_days?
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
continue
|
||||
# don't search for better subtitles until at least 30 minutes have passed
|
||||
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
|
||||
Log.Debug(u"%s: Item %s too new, skipping", self.name, video_id)
|
||||
continue
|
||||
|
||||
viable_item_count += 1
|
||||
ditch_parts = []
|
||||
# added_date <= max_search_days?
|
||||
if stored_subs.added_at + datetime.timedelta(days=max_search_days) <= now:
|
||||
continue
|
||||
|
||||
# look through all stored subtitle data
|
||||
for part_id, languages in stored_subs.parts.iteritems():
|
||||
part_id = str(part_id)
|
||||
viable_item_count += 1
|
||||
ditch_parts = []
|
||||
|
||||
# all languages
|
||||
for language, current_subs in languages.iteritems():
|
||||
current_key = current_subs.get("current")
|
||||
current = current_subs.get(current_key)
|
||||
# look through all stored subtitle data
|
||||
for part_id, languages in stored_subs.parts.iteritems():
|
||||
part_id = str(part_id)
|
||||
|
||||
# currently got subtitle?
|
||||
# fixme: check for existence
|
||||
if not current:
|
||||
continue
|
||||
current_score = current.score
|
||||
current_mode = current.mode
|
||||
# all languages
|
||||
for language, current_subs in languages.iteritems():
|
||||
current_key = current_subs.get("current")
|
||||
current = current_subs.get(current_key)
|
||||
|
||||
# late cutoff met? skip
|
||||
if current_score >= cutoff:
|
||||
Log.Debug(u"%s: Skipping finding better subs, "
|
||||
u"cutoff met (current: %s, cutoff: %s): %s (%s)",
|
||||
self.name, current_score, cutoff, stored_subs.title, video_id)
|
||||
continue
|
||||
# currently got subtitle?
|
||||
# fixme: check for existence
|
||||
if not current:
|
||||
continue
|
||||
current_score = current.score
|
||||
current_mode = current.mode
|
||||
|
||||
# got manual subtitle but don't want to touch those?
|
||||
if current_mode == "m" and not overwrite_manually_selected:
|
||||
Log.Debug(u"%s: Skipping finding better subs, "
|
||||
u"had manual: %s (%s)", self.name, stored_subs.title, video_id)
|
||||
continue
|
||||
# late cutoff met? skip
|
||||
if current_score >= cutoff:
|
||||
Log.Debug(u"%s: Skipping finding better subs, "
|
||||
u"cutoff met (current: %s, cutoff: %s): %s (%s)",
|
||||
self.name, current_score, cutoff, stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
# subtitle modifications different from default
|
||||
if not overwrite_manually_modified and current.mods \
|
||||
and set(current.mods).difference(set(config.default_mods)):
|
||||
Log.Debug(u"%s: Skipping finding better subs, it has manual modifications: %s (%s)",
|
||||
self.name, stored_subs.title, video_id)
|
||||
continue
|
||||
# got manual subtitle but don't want to touch those?
|
||||
if current_mode == "m" and not overwrite_manually_selected:
|
||||
Log.Debug(u"%s: Skipping finding better subs, "
|
||||
u"had manual: %s (%s)", self.name, stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
try:
|
||||
subs = self.list_subtitles(video_id, stored_subs.item_type, part_id, language)
|
||||
except PartUnknownException:
|
||||
Log.Info(u"%s: Part %s unknown/gone; ditching subtitle info", self.name, part_id)
|
||||
ditch_parts.append(part_id)
|
||||
continue
|
||||
# subtitle modifications different from default
|
||||
if not overwrite_manually_modified and current.mods \
|
||||
and set(current.mods).difference(set(config.default_mods)):
|
||||
Log.Debug(u"%s: Skipping finding better subs, it has manual modifications: %s (%s)",
|
||||
self.name, stored_subs.title, video_id)
|
||||
continue
|
||||
|
||||
hit_providers = subs is not None
|
||||
try:
|
||||
subs = self.list_subtitles(video_id, stored_subs.item_type, part_id, language,
|
||||
air_date_cutoff=air_date_cutoff)
|
||||
except PartUnknownException:
|
||||
Log.Info(u"%s: Part %s unknown/gone; ditching subtitle info", self.name, part_id)
|
||||
ditch_parts.append(part_id)
|
||||
continue
|
||||
|
||||
if subs:
|
||||
# subs are already sorted by score
|
||||
better_downloaded = False
|
||||
better_tried_download = 0
|
||||
better_visited = 0
|
||||
for sub in subs:
|
||||
if sub.score > current_score and sub.score > min_score:
|
||||
Log.Debug(u"%s: Better subtitle found for %s, downloading", self.name, video_id)
|
||||
better_tried_download += 1
|
||||
ret = self.download_subtitle(sub, video_id, mode="b")
|
||||
if ret:
|
||||
better_found += 1
|
||||
better_downloaded = True
|
||||
break
|
||||
else:
|
||||
Log.Debug(u"%s: Couldn't download/save subtitle. "
|
||||
u"Continuing to the next one", self.name)
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing",
|
||||
self.name, DL_PROVIDER_SLACK)
|
||||
time.sleep(DL_PROVIDER_SLACK)
|
||||
better_visited += 1
|
||||
hit_providers = subs is not None
|
||||
|
||||
if better_tried_download and not better_downloaded:
|
||||
Log.Debug(u"%s: Tried downloading better subtitle for %s, "
|
||||
u"but every try failed.", self.name, video_id)
|
||||
if subs:
|
||||
# subs are already sorted by score
|
||||
better_downloaded = False
|
||||
better_tried_download = 0
|
||||
better_visited = 0
|
||||
for sub in subs:
|
||||
if sub.score > current_score and sub.score > min_score:
|
||||
if current.provider_name == "embedded" and sub.score < min_score_extracted:
|
||||
Log.Debug(u"%s: Not downloading subtitle for %s, we've got an active extracted "
|
||||
u"embedded sub and the min score %s isn't met (%s).",
|
||||
self.name, video_id, min_score_extracted, sub.score)
|
||||
better_visited += 1
|
||||
break
|
||||
|
||||
elif better_downloaded:
|
||||
Log.Debug(u"%s: Better subtitle downloaded for %s", self.name, video_id)
|
||||
Log.Debug(u"%s: Better subtitle found for %s, downloading", self.name, video_id)
|
||||
better_tried_download += 1
|
||||
ret = self.download_subtitle(sub, video_id, mode="b")
|
||||
if ret:
|
||||
better_found += 1
|
||||
better_downloaded = True
|
||||
break
|
||||
else:
|
||||
Log.Debug(u"%s: Couldn't download/save subtitle. "
|
||||
u"Continuing to the next one", self.name)
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing",
|
||||
self.name, self.DL_PROVIDER_SLACK)
|
||||
Thread.Sleep(self.DL_PROVIDER_SLACK)
|
||||
better_visited += 1
|
||||
|
||||
if better_tried_download or better_downloaded:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, DL_PROVIDER_SLACK)
|
||||
time.sleep(DL_PROVIDER_SLACK)
|
||||
if better_tried_download and not better_downloaded:
|
||||
Log.Debug(u"%s: Tried downloading better subtitle for %s, "
|
||||
u"but every try failed.", self.name, video_id)
|
||||
|
||||
elif better_visited:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
time.sleep(PROVIDER_SLACK)
|
||||
elif better_downloaded:
|
||||
Log.Debug(u"%s: Better subtitle downloaded for %s", self.name, video_id)
|
||||
|
||||
elif hit_providers:
|
||||
# hit the providers but didn't try downloading? wait.
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, PROVIDER_SLACK)
|
||||
time.sleep(PROVIDER_SLACK)
|
||||
if better_tried_download or better_downloaded:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.DL_PROVIDER_SLACK)
|
||||
Thread.Sleep(self.DL_PROVIDER_SLACK)
|
||||
|
||||
if ditch_parts:
|
||||
for part_id in ditch_parts:
|
||||
try:
|
||||
del stored_subs.parts[part_id]
|
||||
except KeyError:
|
||||
pass
|
||||
subtitle_storage.save(stored_subs)
|
||||
elif better_visited:
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
|
||||
Thread.Sleep(self.PROVIDER_SLACK)
|
||||
|
||||
time.sleep(1)
|
||||
subs = None
|
||||
|
||||
subtitle_storage.destroy()
|
||||
elif hit_providers:
|
||||
# hit the providers but didn't try downloading? wait.
|
||||
Log.Debug(u"%s: Waiting %s seconds before continuing", self.name, self.PROVIDER_SLACK)
|
||||
Thread.Sleep(self.PROVIDER_SLACK)
|
||||
|
||||
if ditch_parts:
|
||||
for part_id in ditch_parts:
|
||||
try:
|
||||
del stored_subs.parts[part_id]
|
||||
except KeyError:
|
||||
pass
|
||||
subtitle_storage.save(stored_subs)
|
||||
ditch_parts = None
|
||||
|
||||
stored_subs = None
|
||||
|
||||
Thread.Sleep(1)
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
|
||||
if better_found:
|
||||
Log.Debug(u"%s: done. Better subtitles found for %s/%s items", self.name, better_found,
|
||||
@@ -788,6 +870,37 @@ class MigrateSubtitleStorage(Task):
|
||||
storage.destroy()
|
||||
|
||||
|
||||
class CacheMaintenance(Task):
|
||||
periodic = True
|
||||
frequency = "every 1 days"
|
||||
|
||||
main_cache_validity = 14 # days
|
||||
pack_cache_validity = 4 # days
|
||||
|
||||
def run(self):
|
||||
super(CacheMaintenance, self).run()
|
||||
self.running = True
|
||||
Log.Info(u"%s: Running cache maintenance", self.name)
|
||||
now = datetime.datetime.now()
|
||||
|
||||
def remove_expired(path, expiry):
|
||||
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(path))
|
||||
if mtime + datetime.timedelta(days=expiry) < now:
|
||||
try:
|
||||
os.remove(path)
|
||||
except (IOError, OSError):
|
||||
Log.Debug("Couldn't remove cache file: %s", os.path.basename(path))
|
||||
|
||||
# main cache
|
||||
if config.new_style_cache:
|
||||
for fn in subliminal_cache_region.backend.all_filenames:
|
||||
remove_expired(fn, self.main_cache_validity)
|
||||
|
||||
# archive cache
|
||||
for fn in glob.iglob(os.path.join(config.pack_cache_dir, "*.archive")):
|
||||
remove_expired(fn, self.pack_cache_validity)
|
||||
|
||||
|
||||
scheduler.register(LegacySearchAllRecentlyAddedMissing)
|
||||
scheduler.register(SearchAllRecentlyAddedMissing)
|
||||
scheduler.register(AvailableSubsForItem)
|
||||
@@ -797,3 +910,4 @@ scheduler.register(FindBetterSubtitles)
|
||||
scheduler.register(SubtitleStorageMaintenance)
|
||||
scheduler.register(MigrateSubtitleStorage)
|
||||
scheduler.register(MenuHistoryMaintenance)
|
||||
scheduler.register(CacheMaintenance)
|
||||
|
||||
+156
-21
@@ -50,7 +50,9 @@
|
||||
"tr",
|
||||
"uk",
|
||||
"vi",
|
||||
"hr"
|
||||
"hr",
|
||||
"zh-hans",
|
||||
"zh-hant"
|
||||
],
|
||||
"default": "en"
|
||||
},
|
||||
@@ -106,7 +108,9 @@
|
||||
"tr",
|
||||
"uk",
|
||||
"vi",
|
||||
"hr"
|
||||
"hr",
|
||||
"zh-hans",
|
||||
"zh-hant"
|
||||
],
|
||||
"default": "None"
|
||||
},
|
||||
@@ -162,7 +166,9 @@
|
||||
"tr",
|
||||
"uk",
|
||||
"vi",
|
||||
"hr"
|
||||
"hr",
|
||||
"zh-hans",
|
||||
"zh-hant"
|
||||
],
|
||||
"default": "None"
|
||||
},
|
||||
@@ -202,6 +208,50 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "media_rename1",
|
||||
"label": "I rename my files using",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"Sonarr/Radarr (fill api info below)",
|
||||
"Filebot",
|
||||
"Sonarr/Radarr/Filebot",
|
||||
"Symlink to original file",
|
||||
"I keep the original filenames",
|
||||
"none of the above"
|
||||
],
|
||||
"default": "I keep the original filenames"
|
||||
},
|
||||
{
|
||||
"id": "use_file_info_file",
|
||||
"label": "Retrieve original filename from .file_info/file_info index files (see wiki)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.sonarr.url",
|
||||
"label": "Sonarr URL (add URL base if configured)",
|
||||
"type": "text",
|
||||
"default": "http://127.0.0.1:8989"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.sonarr.api_key",
|
||||
"label": "Sonarr API key",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "drone_api.radarr.url",
|
||||
"label": "Radarr URL (add URL base if configured, min. version: 0.2.0.897)",
|
||||
"type": "text",
|
||||
"default": "http://127.0.0.1:7878"
|
||||
},
|
||||
{
|
||||
"id": "drone_api.radarr.api_key",
|
||||
"label": "Radarr API key",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.enabled",
|
||||
"label": "Provider: Enable OpenSubtitles",
|
||||
@@ -293,10 +343,10 @@
|
||||
"default": "19"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.use_random_agents",
|
||||
"id": "provider.addic7ed.use_random_agents1",
|
||||
"label": "Addic7ed: Use random user agents",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.legendastv.enabled",
|
||||
@@ -331,11 +381,41 @@
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.shooter.enabled",
|
||||
"label": "Provider: Enable Shooter.cn (Chinese)",
|
||||
"id": "provider.subscene.enabled",
|
||||
"label": "Provider: Enable SubScene (TV shows)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.supersubtitles.enabled",
|
||||
"label": "Provider: Enable feliratok.info (Hungarian)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.hosszupuska.enabled",
|
||||
"label": "Provider: Enable hosszupuskasub.com (Hungarian)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.argenteam.enabled",
|
||||
"label": "Provider: Enable aRGENTeaM (Spanish)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.assrt.enabled",
|
||||
"label": "Provider: Enable assrt.net (Chinese)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.assrt.token",
|
||||
"label": "Assrt API Token",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "providers.multithreading",
|
||||
"label": "Search enabled providers simultaneously (multithreading)",
|
||||
@@ -343,20 +423,26 @@
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.use_tags",
|
||||
"label": "I keep the exact (release-) filename of my media files",
|
||||
"id": "subtitles.embedded.autoextract",
|
||||
"label": "Automatically extract and use embedded subtitles upon media addition (with configured default mods)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.search_after_autoextract",
|
||||
"label": "After automatic extraction of embedded subtitles, also immediately search for available subtitles?",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.embedded",
|
||||
"label": "Don't search for subtitles if there are embedded subtitles inside the media file (MKV/MP4)?",
|
||||
"label": "Don't search for subtitles of a language if there are embedded subtitles inside the media file (MKV/MP4)?",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.external",
|
||||
"label": "Don't search for subtitles if they already exist on the filesystem (metadata/filesystem)?",
|
||||
"label": "Don't search for subtitles of a language if they already exist on the filesystem (metadata/filesystem)?",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
@@ -373,7 +459,7 @@
|
||||
},
|
||||
{
|
||||
"id": "subtitles.scan.exotic_ext",
|
||||
"label": "Include \"exotic\" subtitle formats (anything else than .srt/.ssa/.ass/.vtt; embedded or external) in the above?",
|
||||
"label": "Include non-text subtitle formats (anything else than .srt/.ssa/.ass/.vtt; embedded or external) in the above?",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
@@ -415,7 +501,7 @@
|
||||
},
|
||||
{
|
||||
"id": "subtitles.fix_common",
|
||||
"label": "Fix common whitespace/punctuation issues in subtitles",
|
||||
"label": "Fix common issues in subtitles",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
@@ -425,6 +511,12 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.reverse_rtl",
|
||||
"label": "Reverse punctuation in RTL languages (heb)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.colors",
|
||||
"label": "Change colors of subtitles to",
|
||||
@@ -572,11 +664,30 @@
|
||||
"type": "text",
|
||||
"default": "7"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.air_date_cutoff",
|
||||
"label": "Scheduler: Don't search for better subtitles if the item's air date is older than",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"don't limit",
|
||||
"1 year",
|
||||
"2 years",
|
||||
"3 years",
|
||||
"4 years",
|
||||
"5 years",
|
||||
"6 years",
|
||||
"7 years",
|
||||
"8 years",
|
||||
"9 years",
|
||||
"10 years"
|
||||
],
|
||||
"default": "1 year"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_selected",
|
||||
"label": "Scheduler: Overwrite manually selected subtitles when better found",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified",
|
||||
@@ -622,15 +733,15 @@
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "plugin_mode",
|
||||
"id": "plugin_mode2",
|
||||
"label": "Sub-Zero mode",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"agent + channel",
|
||||
"agent + interface",
|
||||
"only agent",
|
||||
"only channel"
|
||||
"only interface"
|
||||
],
|
||||
"default": "agent + channel"
|
||||
"default": "agent + interface"
|
||||
},
|
||||
{
|
||||
"id": "plugin_pin",
|
||||
@@ -647,12 +758,12 @@
|
||||
"default": "10"
|
||||
},
|
||||
{
|
||||
"id": "plugin_pin_mode",
|
||||
"id": "plugin_pin_mode2",
|
||||
"label": "Use PIN to restrict access to (needs plugin or PMS restart)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"disabled",
|
||||
"channel menu",
|
||||
"interface",
|
||||
"advanced menu"
|
||||
],
|
||||
"default": "disabled"
|
||||
@@ -669,6 +780,12 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "new_style_cache",
|
||||
"label": "Use new style caching (for subliminal)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "low_impact_mode",
|
||||
"label": "Low impact mode (for remote filesystems)",
|
||||
@@ -681,6 +798,18 @@
|
||||
"type": "text",
|
||||
"default": "15"
|
||||
},
|
||||
{
|
||||
"id": "proxy",
|
||||
"label": "HTTP proxy to use for providers (supports credentials)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "path_to_advanced_settings",
|
||||
"label": "Custom path to advanced_settings.json",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "log_level",
|
||||
"label": "How verbose should the logging be?",
|
||||
@@ -694,6 +823,12 @@
|
||||
],
|
||||
"default": "WARNING"
|
||||
},
|
||||
{
|
||||
"id": "log_rotate_keep",
|
||||
"label": "How many log backups to keep?",
|
||||
"type": "text",
|
||||
"default": "5"
|
||||
},
|
||||
{
|
||||
"id": "log_debug_mods",
|
||||
"label": "Log subtitle modification (debug)",
|
||||
|
||||
+6
-4
@@ -9,11 +9,11 @@
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>2.0.33</string>
|
||||
<string>2.5.4</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>2.0.33.1849</string>
|
||||
<string>2.5.7.2663</string>
|
||||
<key>PlexFrameworkVersion</key>
|
||||
<string>2</string>
|
||||
<key>PlexPluginClass</key>
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
<h1>Sub-Zero for Plex</h1><i>Subtitles done right</i>
|
||||
|
||||
Version 2.0.33.1849
|
||||
Version 2.5.7.2663
|
||||
|
||||
Originally based on @bramwalet's awesome <a href="https://github.com/bramwalet/Subliminal.bundle">Subliminal.bundle</a>
|
||||
|
||||
@@ -44,7 +44,9 @@ Score info: <a href="http://v.ht/szscores">http://v.ht/szscores&
|
||||
Plex thread: <a href="https://forums.plex.tv/discussion/186575">https://forums.plex.tv/discussion/186575</a>
|
||||
Github: <a href="https://github.com/pannal/Sub-Zero.bundle">https://github.com/pannal/Sub-Zero</a>
|
||||
|
||||
panni, 2017
|
||||
3rd party licenses: <a href="https://github.com/pannal/Sub-Zero.bundle/tree/master/Licenses">https://github.com/pannal/Sub-Zero.bundle/tree/master/Licenses</a>
|
||||
|
||||
panni, 2018
|
||||
</div>
|
||||
</string>
|
||||
</dict>
|
||||
|
||||
Executable
BIN
Binary file not shown.
BIN
Binary file not shown.
Executable
BIN
Binary file not shown.
Executable
BIN
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -5,26 +5,31 @@ http://www.crummy.com/software/BeautifulSoup/
|
||||
|
||||
Beautiful Soup uses a pluggable XML or HTML parser to parse a
|
||||
(possibly invalid) document into a tree representation. Beautiful Soup
|
||||
provides provides methods and Pythonic idioms that make it easy to
|
||||
navigate, search, and modify the parse tree.
|
||||
provides methods and Pythonic idioms that make it easy to navigate,
|
||||
search, and modify the parse tree.
|
||||
|
||||
Beautiful Soup works with Python 2.6 and up. It works better if lxml
|
||||
Beautiful Soup works with Python 2.7 and up. It works better if lxml
|
||||
and/or html5lib is installed.
|
||||
|
||||
For more than you ever wanted to know about Beautiful Soup, see the
|
||||
documentation:
|
||||
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
|
||||
"""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.4.1"
|
||||
__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
|
||||
__version__ = "4.6.0"
|
||||
__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = ['BeautifulSoup']
|
||||
|
||||
import os
|
||||
import re
|
||||
import traceback
|
||||
import warnings
|
||||
|
||||
from .builder import builder_registry, ParserRejectedMarkup
|
||||
@@ -77,7 +82,7 @@ class BeautifulSoup(Tag):
|
||||
|
||||
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
|
||||
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
|
||||
|
||||
def __init__(self, markup="", features=None, builder=None,
|
||||
parse_only=None, from_encoding=None, exclude_encodings=None,
|
||||
@@ -137,6 +142,10 @@ class BeautifulSoup(Tag):
|
||||
from_encoding = from_encoding or deprecated_argument(
|
||||
"fromEncoding", "from_encoding")
|
||||
|
||||
if from_encoding and isinstance(markup, unicode):
|
||||
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
|
||||
from_encoding = None
|
||||
|
||||
if len(kwargs) > 0:
|
||||
arg = kwargs.keys().pop()
|
||||
raise TypeError(
|
||||
@@ -161,19 +170,29 @@ class BeautifulSoup(Tag):
|
||||
markup_type = "XML"
|
||||
else:
|
||||
markup_type = "HTML"
|
||||
|
||||
caller = traceback.extract_stack()[0]
|
||||
filename = caller[0]
|
||||
line_number = caller[1]
|
||||
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
|
||||
filename=filename,
|
||||
line_number=line_number,
|
||||
parser=builder.NAME,
|
||||
markup_type=markup_type))
|
||||
|
||||
self.builder = builder
|
||||
self.is_xml = builder.is_xml
|
||||
self.known_xml = self.is_xml
|
||||
self.builder.soup = self
|
||||
|
||||
self.parse_only = parse_only
|
||||
|
||||
if hasattr(markup, 'read'): # It's a file-type object.
|
||||
markup = markup.read()
|
||||
elif len(markup) <= 256:
|
||||
elif len(markup) <= 256 and (
|
||||
(isinstance(markup, bytes) and not b'<' in markup)
|
||||
or (isinstance(markup, unicode) and not u'<' in markup)
|
||||
):
|
||||
# Print out warnings for a couple beginner problems
|
||||
# involving passing non-markup to Beautiful Soup.
|
||||
# Beautiful Soup will still parse the input as markup,
|
||||
@@ -195,16 +214,10 @@ class BeautifulSoup(Tag):
|
||||
if isinstance(markup, unicode):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
|
||||
if markup[:5] == "http:" or markup[:6] == "https:":
|
||||
# TODO: This is ugly but I couldn't get it to work in
|
||||
# Python 3 otherwise.
|
||||
if ((isinstance(markup, bytes) and not b' ' in markup)
|
||||
or (isinstance(markup, unicode) and not u' ' in markup)):
|
||||
if isinstance(markup, unicode):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
|
||||
'"%s" looks like a filename, not markup. You should'
|
||||
' probably open this file and pass the filehandle into'
|
||||
' Beautiful Soup.' % markup)
|
||||
self._check_markup_is_url(markup)
|
||||
|
||||
for (self.markup, self.original_encoding, self.declared_html_encoding,
|
||||
self.contains_replacement_characters) in (
|
||||
@@ -223,15 +236,52 @@ class BeautifulSoup(Tag):
|
||||
self.builder.soup = None
|
||||
|
||||
def __copy__(self):
|
||||
return type(self)(self.encode(), builder=self.builder)
|
||||
copy = type(self)(
|
||||
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
|
||||
)
|
||||
|
||||
# Although we encoded the tree to UTF-8, that may not have
|
||||
# been the encoding of the original markup. Set the copy's
|
||||
# .original_encoding to reflect the original object's
|
||||
# .original_encoding.
|
||||
copy.original_encoding = self.original_encoding
|
||||
return copy
|
||||
|
||||
def __getstate__(self):
|
||||
# Frequently a tree builder can't be pickled.
|
||||
d = dict(self.__dict__)
|
||||
if 'builder' in d and not self.builder.picklable:
|
||||
del d['builder']
|
||||
d['builder'] = None
|
||||
return d
|
||||
|
||||
@staticmethod
|
||||
def _check_markup_is_url(markup):
|
||||
"""
|
||||
Check if markup looks like it's actually a url and raise a warning
|
||||
if so. Markup can be unicode or str (py2) / bytes (py3).
|
||||
"""
|
||||
if isinstance(markup, bytes):
|
||||
space = b' '
|
||||
cant_start_with = (b"http:", b"https:")
|
||||
elif isinstance(markup, unicode):
|
||||
space = u' '
|
||||
cant_start_with = (u"http:", u"https:")
|
||||
else:
|
||||
return
|
||||
|
||||
if any(markup.startswith(prefix) for prefix in cant_start_with):
|
||||
if not space in markup:
|
||||
if isinstance(markup, bytes):
|
||||
decoded_markup = markup.decode('utf-8', 'replace')
|
||||
else:
|
||||
decoded_markup = markup
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an'
|
||||
' HTTP client. You should probably use an HTTP client like'
|
||||
' requests to get the document behind the URL, and feed'
|
||||
' that document to Beautiful Soup.' % decoded_markup
|
||||
)
|
||||
|
||||
def _feed(self):
|
||||
# Convert the document to Unicode.
|
||||
self.builder.reset()
|
||||
@@ -335,7 +385,18 @@ class BeautifulSoup(Tag):
|
||||
if parent.next_sibling:
|
||||
# This node is being inserted into an element that has
|
||||
# already been parsed. Deal with any dangling references.
|
||||
index = parent.contents.index(o)
|
||||
index = len(parent.contents)-1
|
||||
while index >= 0:
|
||||
if parent.contents[index] is o:
|
||||
break
|
||||
index -= 1
|
||||
else:
|
||||
raise ValueError(
|
||||
"Error building tree: supposedly %r was inserted "
|
||||
"into %r after the fact, but I don't see it!" % (
|
||||
o, parent
|
||||
)
|
||||
)
|
||||
if index == 0:
|
||||
previous_element = parent
|
||||
previous_sibling = None
|
||||
@@ -387,7 +448,7 @@ class BeautifulSoup(Tag):
|
||||
"""Push a start tag on to the stack.
|
||||
|
||||
If this method returns None, the tag was rejected by the
|
||||
SoupStrainer. You should proceed as if the tag had not occured
|
||||
SoupStrainer. You should proceed as if the tag had not occurred
|
||||
in the document. For instance, if this was a self-closing tag,
|
||||
don't call handle_endtag.
|
||||
"""
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
import sys
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
HTMLAwareEntitySubstitution,
|
||||
whitespace_re
|
||||
)
|
||||
|
||||
@@ -227,9 +231,14 @@ class HTMLTreeBuilder(TreeBuilder):
|
||||
Such as which tags are empty-element tags.
|
||||
"""
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
|
||||
'spacer', 'link', 'frame', 'base'])
|
||||
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
|
||||
empty_element_tags = set([
|
||||
# These are from HTML5.
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
|
||||
# These are from HTML4, removed in HTML5.
|
||||
'spacer', 'frame'
|
||||
])
|
||||
|
||||
# The HTML standard defines these attributes as containing a
|
||||
# space-separated list of values, not a single value. That is,
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__all__ = [
|
||||
'HTML5TreeBuilder',
|
||||
]
|
||||
|
||||
from pdb import set_trace
|
||||
import warnings
|
||||
import re
|
||||
from bs4.builder import (
|
||||
PERMISSIVE,
|
||||
HTML,
|
||||
@@ -15,7 +18,10 @@ from bs4.element import (
|
||||
whitespace_re,
|
||||
)
|
||||
import html5lib
|
||||
from html5lib.constants import namespaces
|
||||
from html5lib.constants import (
|
||||
namespaces,
|
||||
prefixes,
|
||||
)
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
@@ -23,6 +29,15 @@ from bs4.element import (
|
||||
Tag,
|
||||
)
|
||||
|
||||
try:
|
||||
# Pre-0.99999999
|
||||
from html5lib.treebuilders import _base as treebuilder_base
|
||||
new_html5lib = False
|
||||
except ImportError, e:
|
||||
# 0.99999999 and up
|
||||
from html5lib.treebuilders import base as treebuilder_base
|
||||
new_html5lib = True
|
||||
|
||||
class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
"""Use html5lib to build a tree."""
|
||||
|
||||
@@ -47,7 +62,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
if self.soup.parse_only is not None:
|
||||
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
|
||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||
doc = parser.parse(markup, encoding=self.user_specified_encoding)
|
||||
|
||||
extra_kwargs = dict()
|
||||
if not isinstance(markup, unicode):
|
||||
if new_html5lib:
|
||||
extra_kwargs['override_encoding'] = self.user_specified_encoding
|
||||
else:
|
||||
extra_kwargs['encoding'] = self.user_specified_encoding
|
||||
doc = parser.parse(markup, **extra_kwargs)
|
||||
|
||||
# Set the character encoding detected by the tokenizer.
|
||||
if isinstance(markup, unicode):
|
||||
@@ -55,11 +77,17 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
# charEncoding to UTF-8 if it gets Unicode input.
|
||||
doc.original_encoding = None
|
||||
else:
|
||||
doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
if not isinstance(original_encoding, basestring):
|
||||
# In 0.99999999 and up, the encoding is an html5lib
|
||||
# Encoding object. We want to use a string for compatibility
|
||||
# with other tree builders.
|
||||
original_encoding = original_encoding.name
|
||||
doc.original_encoding = original_encoding
|
||||
|
||||
def create_treebuilder(self, namespaceHTMLElements):
|
||||
self.underlying_builder = TreeBuilderForHtml5lib(
|
||||
self.soup, namespaceHTMLElements)
|
||||
namespaceHTMLElements, self.soup)
|
||||
return self.underlying_builder
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
@@ -67,10 +95,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
return u'<html><head></head><body>%s</body></html>' % fragment
|
||||
|
||||
|
||||
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
||||
|
||||
def __init__(self, soup, namespaceHTMLElements):
|
||||
self.soup = soup
|
||||
def __init__(self, namespaceHTMLElements, soup=None):
|
||||
if soup:
|
||||
self.soup = soup
|
||||
else:
|
||||
from bs4 import BeautifulSoup
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
|
||||
|
||||
def documentClass(self):
|
||||
@@ -93,7 +125,8 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
return TextNode(Comment(data), self.soup)
|
||||
|
||||
def fragmentClass(self):
|
||||
self.soup = BeautifulSoup("")
|
||||
from bs4 import BeautifulSoup
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
self.soup.name = "[document_fragment]"
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
@@ -105,7 +138,57 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
return self.soup
|
||||
|
||||
def getFragment(self):
|
||||
return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
|
||||
return treebuilder_base.TreeBuilder.getFragment(self).element
|
||||
|
||||
def testSerializer(self, element):
|
||||
from bs4 import BeautifulSoup
|
||||
rv = []
|
||||
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if isinstance(element, BeautifulSoup):
|
||||
pass
|
||||
if isinstance(element, Doctype):
|
||||
m = doctype_re.match(element)
|
||||
if m:
|
||||
name = m.group(1)
|
||||
if m.lastindex > 1:
|
||||
publicId = m.group(2) or ""
|
||||
systemId = m.group(3) or m.group(4) or ""
|
||||
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
|
||||
(' ' * indent, name, publicId, systemId))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
|
||||
elif isinstance(element, Comment):
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element))
|
||||
elif isinstance(element, NavigableString):
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
||||
else:
|
||||
if element.namespace:
|
||||
name = "%s %s" % (prefixes[element.namespace],
|
||||
element.name)
|
||||
else:
|
||||
name = element.name
|
||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||
if element.attrs:
|
||||
attributes = []
|
||||
for name, value in element.attrs.items():
|
||||
if isinstance(name, NamespacedAttribute):
|
||||
name = "%s %s" % (prefixes[name.namespace], name.name)
|
||||
if isinstance(value, list):
|
||||
value = " ".join(value)
|
||||
attributes.append((name, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
indent += 2
|
||||
for child in element.children:
|
||||
serializeElement(child, indent)
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
class AttrList(object):
|
||||
def __init__(self, element):
|
||||
@@ -137,9 +220,9 @@ class AttrList(object):
|
||||
return name in list(self.attrs.keys())
|
||||
|
||||
|
||||
class Element(html5lib.treebuilders._base.Node):
|
||||
class Element(treebuilder_base.Node):
|
||||
def __init__(self, element, soup, namespace):
|
||||
html5lib.treebuilders._base.Node.__init__(self, element.name)
|
||||
treebuilder_base.Node.__init__(self, element.name)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
self.namespace = namespace
|
||||
@@ -158,8 +241,10 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
child = node
|
||||
elif node.element.__class__ == NavigableString:
|
||||
string_child = child = node.element
|
||||
node.parent = self
|
||||
else:
|
||||
child = node.element
|
||||
node.parent = self
|
||||
|
||||
if not isinstance(child, basestring) and child.parent is not None:
|
||||
node.element.extract()
|
||||
@@ -197,6 +282,8 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
most_recent_element=most_recent_element)
|
||||
|
||||
def getAttributes(self):
|
||||
if isinstance(self.element, Comment):
|
||||
return {}
|
||||
return AttrList(self.element)
|
||||
|
||||
def setAttributes(self, attributes):
|
||||
@@ -224,11 +311,11 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
if insertBefore:
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
self.insertBefore(data, insertBefore)
|
||||
self.insertBefore(text, insertBefore)
|
||||
else:
|
||||
self.appendChild(data)
|
||||
self.appendChild(text)
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = self.element.index(refNode.element)
|
||||
@@ -250,6 +337,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
# print "MOVE", self.element.contents
|
||||
# print "FROM", self.element
|
||||
# print "TO", new_parent.element
|
||||
|
||||
element = self.element
|
||||
new_parent_element = new_parent.element
|
||||
# Determine what this tag's next_element will be once all the children
|
||||
@@ -268,7 +356,6 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
new_parents_last_descendant_next_element = new_parent_element.next_element
|
||||
|
||||
to_append = element.contents
|
||||
append_after = new_parent_element.contents
|
||||
if len(to_append) > 0:
|
||||
# Set the first child's previous_element and previous_sibling
|
||||
# to elements within the new parent
|
||||
@@ -285,12 +372,19 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
if new_parents_last_child:
|
||||
new_parents_last_child.next_sibling = first_child
|
||||
|
||||
# Fix the last child's next_element and next_sibling
|
||||
last_child = to_append[-1]
|
||||
last_child.next_element = new_parents_last_descendant_next_element
|
||||
# Find the very last element being moved. It is now the
|
||||
# parent's last descendant. It has no .next_sibling and
|
||||
# its .next_element is whatever the previous last
|
||||
# descendant had.
|
||||
last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
|
||||
|
||||
last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
|
||||
if new_parents_last_descendant_next_element:
|
||||
new_parents_last_descendant_next_element.previous_element = last_child
|
||||
last_child.next_sibling = None
|
||||
# TODO: This code has no test coverage and I'm not sure
|
||||
# how to get html5lib to go through this path, but it's
|
||||
# just the other side of the previous line.
|
||||
new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
|
||||
last_childs_last_descendant.next_sibling = None
|
||||
|
||||
for child in to_append:
|
||||
child.parent = new_parent_element
|
||||
@@ -324,7 +418,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
|
||||
class TextNode(Element):
|
||||
def __init__(self, element, soup):
|
||||
html5lib.treebuilders._base.Node.__init__(self, None)
|
||||
treebuilder_base.Node.__init__(self, None)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__all__ = [
|
||||
'HTMLParserTreeBuilder',
|
||||
]
|
||||
@@ -49,7 +52,31 @@ from bs4.builder import (
|
||||
HTMLPARSER = 'html.parser'
|
||||
|
||||
class BeautifulSoupHTMLParser(HTMLParser):
|
||||
def handle_starttag(self, name, attrs):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
HTMLParser.__init__(self, *args, **kwargs)
|
||||
|
||||
# Keep a list of empty-element tags that were encountered
|
||||
# without an explicit closing tag. If we encounter a closing tag
|
||||
# of this type, we'll associate it with one of those entries.
|
||||
#
|
||||
# This isn't a stack because we don't care about the
|
||||
# order. It's a list of closing tags we've already handled and
|
||||
# will ignore, assuming they ever show up.
|
||||
self.already_closed_empty_element = []
|
||||
|
||||
def handle_startendtag(self, name, attrs):
|
||||
# This is only called when the markup looks like
|
||||
# <tag/>.
|
||||
|
||||
# is_startend() tells handle_starttag not to close the tag
|
||||
# just because its name matches a known empty-element tag. We
|
||||
# know that this is an empty-element tag and we want to call
|
||||
# handle_endtag ourselves.
|
||||
tag = self.handle_starttag(name, attrs, handle_empty_element=False)
|
||||
self.handle_endtag(name)
|
||||
|
||||
def handle_starttag(self, name, attrs, handle_empty_element=True):
|
||||
# XXX namespace
|
||||
attr_dict = {}
|
||||
for key, value in attrs:
|
||||
@@ -59,10 +86,34 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
||||
value = ''
|
||||
attr_dict[key] = value
|
||||
attrvalue = '""'
|
||||
self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
#print "START", name
|
||||
tag = self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
if tag and tag.is_empty_element and handle_empty_element:
|
||||
# Unlike other parsers, html.parser doesn't send separate end tag
|
||||
# events for empty-element tags. (It's handled in
|
||||
# handle_startendtag, but only if the original markup looked like
|
||||
# <tag/>.)
|
||||
#
|
||||
# So we need to call handle_endtag() ourselves. Since we
|
||||
# know the start event is identical to the end event, we
|
||||
# don't want handle_endtag() to cross off any previous end
|
||||
# events for tags of this name.
|
||||
self.handle_endtag(name, check_already_closed=False)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self.soup.handle_endtag(name)
|
||||
# But we might encounter an explicit closing tag for this tag
|
||||
# later on. If so, we want to ignore it.
|
||||
self.already_closed_empty_element.append(name)
|
||||
|
||||
def handle_endtag(self, name, check_already_closed=True):
|
||||
#print "END", name
|
||||
if check_already_closed and name in self.already_closed_empty_element:
|
||||
# This is a redundant end tag for an empty-element tag.
|
||||
# We've already called handle_endtag() for it, so just
|
||||
# check it off the list.
|
||||
# print "ALREADY CLOSED", name
|
||||
self.already_closed_empty_element.remove(name)
|
||||
else:
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.soup.handle_data(data)
|
||||
@@ -166,6 +217,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
warnings.warn(RuntimeWarning(
|
||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
||||
raise e
|
||||
parser.already_closed_empty_element = []
|
||||
|
||||
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
|
||||
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__all__ = [
|
||||
'LXMLTreeBuilderForXML',
|
||||
'LXMLTreeBuilder',
|
||||
@@ -12,6 +14,7 @@ from bs4.element import (
|
||||
Doctype,
|
||||
NamespacedAttribute,
|
||||
ProcessingInstruction,
|
||||
XMLProcessingInstruction,
|
||||
)
|
||||
from bs4.builder import (
|
||||
FAST,
|
||||
@@ -29,6 +32,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
DEFAULT_PARSER_CLASS = etree.XMLParser
|
||||
|
||||
is_xml = True
|
||||
processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
NAME = "lxml-xml"
|
||||
ALTERNATE_NAMES = ["xml"]
|
||||
@@ -87,6 +91,16 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
|
||||
Each 4-tuple represents a strategy for parsing the document.
|
||||
"""
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
if is_html:
|
||||
self.processing_instruction_class = ProcessingInstruction
|
||||
else:
|
||||
self.processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
if isinstance(markup, unicode):
|
||||
# We were given Unicode. Maybe lxml can parse Unicode on
|
||||
# this system?
|
||||
@@ -98,11 +112,6 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
yield (markup.encode("utf8"), "utf8",
|
||||
document_declared_encoding, False)
|
||||
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
detector = EncodingDetector(
|
||||
markup, try_encodings, is_html, exclude_encodings)
|
||||
@@ -201,7 +210,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
def pi(self, target, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(target + ' ' + data)
|
||||
self.soup.endData(ProcessingInstruction)
|
||||
self.soup.endData(self.processing_instruction_class)
|
||||
|
||||
def data(self, content):
|
||||
self.soup.handle_data(content)
|
||||
@@ -229,6 +238,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
|
||||
|
||||
features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
|
||||
is_xml = False
|
||||
processing_instruction_class = ProcessingInstruction
|
||||
|
||||
def default_parser(self, encoding):
|
||||
return etree.HTMLParser
|
||||
|
||||
@@ -6,9 +6,10 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal
|
||||
Feed Parser. It works best on XML and HTML, but it does not rewrite the
|
||||
XML or HTML to reflect a new encoding; that's the tree builder's job.
|
||||
"""
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
from pdb import set_trace
|
||||
import codecs
|
||||
from htmlentitydefs import codepoint2name
|
||||
import re
|
||||
@@ -309,7 +310,7 @@ class EncodingDetector:
|
||||
else:
|
||||
xml_endpos = 1024
|
||||
html_endpos = max(2048, int(len(markup) * 0.05))
|
||||
|
||||
|
||||
declared_encoding = None
|
||||
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
|
||||
if not declared_encoding_match and is_html:
|
||||
@@ -346,7 +347,7 @@ class UnicodeDammit:
|
||||
self.tried_encodings = []
|
||||
self.contains_replacement_characters = False
|
||||
self.is_html = is_html
|
||||
|
||||
self.log = logging.getLogger(__name__)
|
||||
self.detector = EncodingDetector(
|
||||
markup, override_encodings, is_html, exclude_encodings)
|
||||
|
||||
@@ -376,9 +377,10 @@ class UnicodeDammit:
|
||||
if encoding != "ascii":
|
||||
u = self._convert_from(encoding, "replace")
|
||||
if u is not None:
|
||||
logging.warning(
|
||||
self.log.warning(
|
||||
"Some characters could not be decoded, and were "
|
||||
"replaced with REPLACEMENT CHARACTER.")
|
||||
"replaced with REPLACEMENT CHARACTER."
|
||||
)
|
||||
self.contains_replacement_characters = True
|
||||
break
|
||||
|
||||
@@ -734,7 +736,7 @@ class UnicodeDammit:
|
||||
0xde : b'\xc3\x9e', # Þ
|
||||
0xdf : b'\xc3\x9f', # ß
|
||||
0xe0 : b'\xc3\xa0', # à
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe2 : b'\xc3\xa2', # â
|
||||
0xe3 : b'\xc3\xa3', # ã
|
||||
0xe4 : b'\xc3\xa4', # ä
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Diagnostic functions, mainly for use when doing tech support."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
import cProfile
|
||||
@@ -56,7 +58,8 @@ def diagnose(data):
|
||||
data = data.read()
|
||||
elif os.path.exists(data):
|
||||
print '"%s" looks like a filename. Reading data from the file.' % data
|
||||
data = open(data).read()
|
||||
with open(data) as fp:
|
||||
data = fp.read()
|
||||
elif data.startswith("http:") or data.startswith("https:"):
|
||||
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
|
||||
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
from pdb import set_trace
|
||||
import collections
|
||||
import re
|
||||
import shlex
|
||||
import sys
|
||||
import warnings
|
||||
from bs4.dammit import EntitySubstitution
|
||||
@@ -99,6 +101,8 @@ class HTMLAwareEntitySubstitution(EntitySubstitution):
|
||||
|
||||
preformatted_tags = set(["pre"])
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
|
||||
@classmethod
|
||||
def _substitute_if_appropriate(cls, ns, f):
|
||||
if (isinstance(ns, NavigableString)
|
||||
@@ -127,8 +131,8 @@ class PageElement(object):
|
||||
# to methods like encode() and prettify():
|
||||
#
|
||||
# "html" - All Unicode characters with corresponding HTML entities
|
||||
# are converted to those entities on output.
|
||||
# "minimal" - Bare ampersands and angle brackets are converted to
|
||||
# are converted to those entities on output.
|
||||
# "minimal" - Bare ampersands and angle brackets are converted to
|
||||
# XML entities: & < >
|
||||
# None - The null formatter. Unicode characters are never
|
||||
# converted to entities. This is not recommended, but it's
|
||||
@@ -169,11 +173,19 @@ class PageElement(object):
|
||||
|
||||
This is used when mapping a formatter name ("minimal") to an
|
||||
appropriate function (one that performs entity-substitution on
|
||||
the contents of <script> and <style> tags, or not). It's
|
||||
the contents of <script> and <style> tags, or not). It can be
|
||||
inefficient, but it should be called very rarely.
|
||||
"""
|
||||
if self.known_xml is not None:
|
||||
# Most of the time we will have determined this when the
|
||||
# document is parsed.
|
||||
return self.known_xml
|
||||
|
||||
# Otherwise, it's likely that this element was created by
|
||||
# direct invocation of the constructor from within the user's
|
||||
# Python code.
|
||||
if self.parent is None:
|
||||
# This is the top-level object. It should have .is_xml set
|
||||
# This is the top-level object. It should have .known_xml set
|
||||
# from tree creation. If not, take a guess--BS is usually
|
||||
# used on HTML markup.
|
||||
return getattr(self, 'is_xml', False)
|
||||
@@ -523,9 +535,16 @@ class PageElement(object):
|
||||
return ResultSet(strainer, result)
|
||||
elif isinstance(name, basestring):
|
||||
# Optimization to find all tags with a given name.
|
||||
if name.count(':') == 1:
|
||||
# This is a name with a prefix.
|
||||
prefix, name = name.split(':', 1)
|
||||
else:
|
||||
prefix = None
|
||||
result = (element for element in generator
|
||||
if isinstance(element, Tag)
|
||||
and element.name == name)
|
||||
and element.name == name
|
||||
and (prefix is None or element.prefix == prefix)
|
||||
)
|
||||
return ResultSet(strainer, result)
|
||||
results = ResultSet(strainer)
|
||||
while True:
|
||||
@@ -637,7 +656,7 @@ class PageElement(object):
|
||||
return lambda el: el._attr_value_as_string(
|
||||
attribute, '').startswith(value)
|
||||
elif operator == '$':
|
||||
# string represenation of `attribute` ends with `value`
|
||||
# string representation of `attribute` ends with `value`
|
||||
return lambda el: el._attr_value_as_string(
|
||||
attribute, '').endswith(value)
|
||||
elif operator == '*':
|
||||
@@ -677,6 +696,11 @@ class NavigableString(unicode, PageElement):
|
||||
PREFIX = ''
|
||||
SUFFIX = ''
|
||||
|
||||
# We can't tell just by looking at a string whether it's contained
|
||||
# in an XML document or an HTML document.
|
||||
|
||||
known_xml = None
|
||||
|
||||
def __new__(cls, value):
|
||||
"""Create a new NavigableString.
|
||||
|
||||
@@ -743,10 +767,16 @@ class CData(PreformattedString):
|
||||
SUFFIX = u']]>'
|
||||
|
||||
class ProcessingInstruction(PreformattedString):
|
||||
"""A SGML processing instruction."""
|
||||
|
||||
PREFIX = u'<?'
|
||||
SUFFIX = u'>'
|
||||
|
||||
class XMLProcessingInstruction(ProcessingInstruction):
|
||||
"""An XML processing instruction."""
|
||||
PREFIX = u'<?'
|
||||
SUFFIX = u'?>'
|
||||
|
||||
class Comment(PreformattedString):
|
||||
|
||||
PREFIX = u'<!--'
|
||||
@@ -781,7 +811,8 @@ class Tag(PageElement):
|
||||
"""Represents a found HTML tag with its attributes and contents."""
|
||||
|
||||
def __init__(self, parser=None, builder=None, name=None, namespace=None,
|
||||
prefix=None, attrs=None, parent=None, previous=None):
|
||||
prefix=None, attrs=None, parent=None, previous=None,
|
||||
is_xml=None):
|
||||
"Basic constructor."
|
||||
|
||||
if parser is None:
|
||||
@@ -795,6 +826,14 @@ class Tag(PageElement):
|
||||
self.name = name
|
||||
self.namespace = namespace
|
||||
self.prefix = prefix
|
||||
if builder is not None:
|
||||
preserve_whitespace_tags = builder.preserve_whitespace_tags
|
||||
else:
|
||||
if is_xml:
|
||||
preserve_whitespace_tags = []
|
||||
else:
|
||||
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
|
||||
self.preserve_whitespace_tags = preserve_whitespace_tags
|
||||
if attrs is None:
|
||||
attrs = {}
|
||||
elif attrs:
|
||||
@@ -805,6 +844,13 @@ class Tag(PageElement):
|
||||
attrs = dict(attrs)
|
||||
else:
|
||||
attrs = dict(attrs)
|
||||
|
||||
# If possible, determine ahead of time whether this tag is an
|
||||
# XML tag.
|
||||
if builder:
|
||||
self.known_xml = builder.is_xml
|
||||
else:
|
||||
self.known_xml = is_xml
|
||||
self.attrs = attrs
|
||||
self.contents = []
|
||||
self.setup(parent, previous)
|
||||
@@ -824,7 +870,7 @@ class Tag(PageElement):
|
||||
Its contents are a copy of the old Tag's contents.
|
||||
"""
|
||||
clone = type(self)(None, self.builder, self.name, self.namespace,
|
||||
self.nsprefix, self.attrs)
|
||||
self.prefix, self.attrs, is_xml=self._is_xml)
|
||||
for attr in ('can_be_empty_element', 'hidden'):
|
||||
setattr(clone, attr, getattr(self, attr))
|
||||
for child in self.contents:
|
||||
@@ -946,6 +992,13 @@ class Tag(PageElement):
|
||||
attribute."""
|
||||
return self.attrs.get(key, default)
|
||||
|
||||
def get_attribute_list(self, key, default=None):
|
||||
"""The same as get(), but always returns a list."""
|
||||
value = self.get(key, default)
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return value
|
||||
|
||||
def has_attr(self, key):
|
||||
return key in self.attrs
|
||||
|
||||
@@ -997,7 +1050,7 @@ class Tag(PageElement):
|
||||
tag_name, tag_name))
|
||||
return self.find(tag_name)
|
||||
# We special case contents to avoid recursion.
|
||||
elif not tag.startswith("__") and not tag=="contents":
|
||||
elif not tag.startswith("__") and not tag == "contents":
|
||||
return self.find(tag)
|
||||
raise AttributeError(
|
||||
"'%s' object has no attribute '%s'" % (self.__class__, tag))
|
||||
@@ -1057,10 +1110,11 @@ class Tag(PageElement):
|
||||
|
||||
def _should_pretty_print(self, indent_level):
|
||||
"""Should this tag be pretty-printed?"""
|
||||
|
||||
return (
|
||||
indent_level is not None and
|
||||
(self.name not in HTMLAwareEntitySubstitution.preformatted_tags
|
||||
or self._is_xml))
|
||||
indent_level is not None
|
||||
and self.name not in self.preserve_whitespace_tags
|
||||
)
|
||||
|
||||
def decode(self, indent_level=None,
|
||||
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
@@ -1280,6 +1334,7 @@ class Tag(PageElement):
|
||||
|
||||
_selector_combinators = ['>', '+', '~']
|
||||
_select_debug = False
|
||||
quoted_colon = re.compile('"[^"]*:[^"]*"')
|
||||
def select_one(self, selector):
|
||||
"""Perform a CSS selection operation on the current element."""
|
||||
value = self.select(selector, limit=1)
|
||||
@@ -1305,8 +1360,7 @@ class Tag(PageElement):
|
||||
if limit and len(context) >= limit:
|
||||
break
|
||||
return context
|
||||
|
||||
tokens = selector.split()
|
||||
tokens = shlex.split(selector)
|
||||
current_context = [self]
|
||||
|
||||
if tokens[-1] in self._selector_combinators:
|
||||
@@ -1358,7 +1412,7 @@ class Tag(PageElement):
|
||||
return classes.issubset(candidate.get('class', []))
|
||||
checker = classes_match
|
||||
|
||||
elif ':' in token:
|
||||
elif ':' in token and not self.quoted_colon.search(token):
|
||||
# Pseudo-class
|
||||
tag_name, pseudo = token.split(':', 1)
|
||||
if tag_name == '':
|
||||
@@ -1389,11 +1443,8 @@ class Tag(PageElement):
|
||||
self.count += 1
|
||||
if self.count == self.destination:
|
||||
return True
|
||||
if self.count > self.destination:
|
||||
# Stop the generator that's sending us
|
||||
# these things.
|
||||
raise StopIteration()
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
checker = Counter(pseudo_value).nth_child_of_type
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
@@ -1498,13 +1549,12 @@ class Tag(PageElement):
|
||||
# don't include it in the context more than once.
|
||||
new_context.append(candidate)
|
||||
new_context_ids.add(id(candidate))
|
||||
if limit and len(new_context) >= limit:
|
||||
break
|
||||
elif self._select_debug:
|
||||
print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
|
||||
|
||||
|
||||
current_context = new_context
|
||||
if limit and len(current_context) >= limit:
|
||||
current_context = current_context[:limit]
|
||||
|
||||
if self._select_debug:
|
||||
print "Final verdict:"
|
||||
@@ -1662,28 +1712,22 @@ class SoupStrainer(object):
|
||||
"I don't know how to match against a %s" % markup.__class__)
|
||||
return found
|
||||
|
||||
def _matches(self, markup, match_against):
|
||||
def _matches(self, markup, match_against, already_tried=None):
|
||||
# print u"Matching %s against %s" % (markup, match_against)
|
||||
result = False
|
||||
if isinstance(markup, list) or isinstance(markup, tuple):
|
||||
# This should only happen when searching a multi-valued attribute
|
||||
# like 'class'.
|
||||
if (isinstance(match_against, unicode)
|
||||
and ' ' in match_against):
|
||||
# A bit of a special case. If they try to match "foo
|
||||
# bar" on a multivalue attribute's value, only accept
|
||||
# the literal value "foo bar"
|
||||
#
|
||||
# XXX This is going to be pretty slow because we keep
|
||||
# splitting match_against. But it shouldn't come up
|
||||
# too often.
|
||||
return (whitespace_re.split(match_against) == markup)
|
||||
else:
|
||||
for item in markup:
|
||||
if self._matches(item, match_against):
|
||||
return True
|
||||
return False
|
||||
|
||||
for item in markup:
|
||||
if self._matches(item, match_against):
|
||||
return True
|
||||
# We didn't match any particular value of the multivalue
|
||||
# attribute, but maybe we match the attribute value when
|
||||
# considered as a string.
|
||||
if self._matches(' '.join(markup), match_against):
|
||||
return True
|
||||
return False
|
||||
|
||||
if match_against is True:
|
||||
# True matches any non-None value.
|
||||
return markup is not None
|
||||
@@ -1693,6 +1737,7 @@ class SoupStrainer(object):
|
||||
|
||||
# Custom callables take the tag as an argument, but all
|
||||
# other ways of matching match the tag name as a string.
|
||||
original_markup = markup
|
||||
if isinstance(markup, Tag):
|
||||
markup = markup.name
|
||||
|
||||
@@ -1703,18 +1748,51 @@ class SoupStrainer(object):
|
||||
# None matches None, False, an empty string, an empty list, and so on.
|
||||
return not match_against
|
||||
|
||||
if isinstance(match_against, unicode):
|
||||
if (hasattr(match_against, '__iter__')
|
||||
and not isinstance(match_against, basestring)):
|
||||
# We're asked to match against an iterable of items.
|
||||
# The markup must be match at least one item in the
|
||||
# iterable. We'll try each one in turn.
|
||||
#
|
||||
# To avoid infinite recursion we need to keep track of
|
||||
# items we've already seen.
|
||||
if not already_tried:
|
||||
already_tried = set()
|
||||
for item in match_against:
|
||||
if item.__hash__:
|
||||
key = item
|
||||
else:
|
||||
key = id(item)
|
||||
if key in already_tried:
|
||||
continue
|
||||
else:
|
||||
already_tried.add(key)
|
||||
if self._matches(original_markup, item, already_tried):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
# Beyond this point we might need to run the test twice: once against
|
||||
# the tag's name and once against its prefixed name.
|
||||
match = False
|
||||
|
||||
if not match and isinstance(match_against, unicode):
|
||||
# Exact string match
|
||||
return markup == match_against
|
||||
match = markup == match_against
|
||||
|
||||
if hasattr(match_against, 'match'):
|
||||
if not match and hasattr(match_against, 'search'):
|
||||
# Regexp match
|
||||
return match_against.search(markup)
|
||||
|
||||
if hasattr(match_against, '__iter__'):
|
||||
# The markup must be an exact match against something
|
||||
# in the iterable.
|
||||
return markup in match_against
|
||||
if (not match
|
||||
and isinstance(original_markup, Tag)
|
||||
and original_markup.prefix):
|
||||
# Try the whole thing again with the prefixed tag name.
|
||||
return self._matches(
|
||||
original_markup.prefix + ':' + original_markup.name, match_against
|
||||
)
|
||||
|
||||
return match
|
||||
|
||||
|
||||
class ResultSet(list):
|
||||
@@ -1723,3 +1801,8 @@ class ResultSet(list):
|
||||
def __init__(self, source, result=()):
|
||||
super(ResultSet, self).__init__(result)
|
||||
self.source = source
|
||||
|
||||
def __getattr__(self, key):
|
||||
raise AttributeError(
|
||||
"ResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?" % key
|
||||
)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Helper classes for tests."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
import pickle
|
||||
@@ -67,6 +69,18 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
markup in these tests, there's not much room for interpretation.
|
||||
"""
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify that all HTML4 and HTML5 empty element (aka void element) tags
|
||||
are handled correctly.
|
||||
"""
|
||||
for name in [
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
'spacer', 'frame'
|
||||
]:
|
||||
soup = self.soup("")
|
||||
new_tag = soup.new_tag(name)
|
||||
self.assertEqual(True, new_tag.is_empty_element)
|
||||
|
||||
def test_pickle_and_unpickle_identity(self):
|
||||
# Pickling a tree, then unpickling it, yields a tree identical
|
||||
# to the original.
|
||||
@@ -137,6 +151,14 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
markup.replace(b"\n", b""))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
# We test both Unicode and bytestring to verify that
|
||||
# process_markup correctly sets processing_instruction_class
|
||||
# even when the markup is already Unicode and there is no
|
||||
# need to process anything.
|
||||
markup = u"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.decode())
|
||||
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
@@ -215,9 +237,22 @@ Hello, world!
|
||||
self.assertEqual(comment, baz.previous_element)
|
||||
|
||||
def test_preserved_whitespace_in_pre_and_textarea(self):
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags."""
|
||||
self.assertSoupEquals("<pre> </pre>")
|
||||
self.assertSoupEquals("<textarea> woo </textarea>")
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags,
|
||||
even if that would mean not prettifying the markup.
|
||||
"""
|
||||
pre_markup = "<pre> </pre>"
|
||||
textarea_markup = "<textarea> woo\nwoo </textarea>"
|
||||
self.assertSoupEquals(pre_markup)
|
||||
self.assertSoupEquals(textarea_markup)
|
||||
|
||||
soup = self.soup(pre_markup)
|
||||
self.assertEqual(soup.pre.prettify(), pre_markup)
|
||||
|
||||
soup = self.soup(textarea_markup)
|
||||
self.assertEqual(soup.textarea.prettify(), textarea_markup)
|
||||
|
||||
soup = self.soup("<textarea></textarea>")
|
||||
self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
|
||||
|
||||
def test_nested_inline_elements(self):
|
||||
"""Inline elements can be nested indefinitely."""
|
||||
@@ -307,6 +342,13 @@ Hello, world!
|
||||
self.assertEqual("p", soup.p.name)
|
||||
self.assertConnectedness(soup)
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify consistent handling of empty-element tags,
|
||||
no matter how they come in through the markup.
|
||||
"""
|
||||
self.assertSoupEquals('<br/><br/><br/>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('<br /><br /><br />', "<br/><br/><br/>")
|
||||
|
||||
def test_head_tag_between_head_and_body(self):
|
||||
"Prevent recurrence of a bug in the html5lib treebuilder."
|
||||
content = """<html><head></head>
|
||||
@@ -480,7 +522,9 @@ Hello, world!
|
||||
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
|
||||
soup = self.soup(
|
||||
hebrew_document, from_encoding="iso8859-8")
|
||||
self.assertEqual(soup.original_encoding, 'iso8859-8')
|
||||
# Some tree builders call it iso8859-8, others call it iso-8859-9.
|
||||
# That's not a difference we really care about.
|
||||
assert soup.original_encoding in ('iso8859-8', 'iso-8859-8')
|
||||
self.assertEqual(
|
||||
soup.encode('utf-8'),
|
||||
hebrew_document.decode("iso8859-8").encode("utf-8"))
|
||||
@@ -563,6 +607,11 @@ class XMLTreeBuilderSmokeTest(object):
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out *exactly* the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
@@ -639,6 +688,40 @@ class XMLTreeBuilderSmokeTest(object):
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
def test_find_by_prefixed_name(self):
|
||||
doc = """<?xml version="1.0" encoding="utf-8"?>
|
||||
<Document xmlns="http://example.com/ns0"
|
||||
xmlns:ns1="http://example.com/ns1"
|
||||
xmlns:ns2="http://example.com/ns2"
|
||||
<ns1:tag>foo</ns1:tag>
|
||||
<ns1:tag>bar</ns1:tag>
|
||||
<ns2:tag key="value">baz</ns2:tag>
|
||||
</Document>
|
||||
"""
|
||||
soup = self.soup(doc)
|
||||
|
||||
# There are three <tag> tags.
|
||||
self.assertEqual(3, len(soup.find_all('tag')))
|
||||
|
||||
# But two of them are ns1:tag and one of them is ns2:tag.
|
||||
self.assertEqual(2, len(soup.find_all('ns1:tag')))
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag')))
|
||||
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag', key='value')))
|
||||
self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag'])))
|
||||
|
||||
def test_copy_tag_preserves_namespace(self):
|
||||
xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<w:document xmlns:w="http://example.com/ns0"/>"""
|
||||
|
||||
soup = self.soup(xml)
|
||||
tag = soup.document
|
||||
duplicate = copy.copy(tag)
|
||||
|
||||
# The two tags have the same namespace prefix.
|
||||
self.assertEqual(tag.prefix, duplicate.prefix)
|
||||
|
||||
|
||||
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
||||
"""Smoke test for a tree builder that supports HTML5."""
|
||||
|
||||
|
||||
@@ -84,6 +84,33 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
|
||||
def test_reparented_markup_containing_identical_whitespace_nodes(self):
|
||||
"""Verify that we keep the two whitespace nodes in this
|
||||
document distinct when reparenting the adjacent <tbody> tags.
|
||||
"""
|
||||
markup = '<table> <tbody><tbody><ims></tbody> </table>'
|
||||
soup = self.soup(markup)
|
||||
space1, space2 = soup.find_all(string=' ')
|
||||
tbody1, tbody2 = soup.find_all('tbody')
|
||||
assert space1.next_element is tbody1
|
||||
assert tbody2.next_element is space2
|
||||
|
||||
def test_reparented_markup_containing_children(self):
|
||||
markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
|
||||
soup = self.soup(markup)
|
||||
noscript = soup.noscript
|
||||
self.assertEqual("target", noscript.next_element)
|
||||
target = soup.find(string='target')
|
||||
|
||||
# The 'aftermath' string was duplicated; we want the second one.
|
||||
final_aftermath = soup.find_all(string='aftermath')[-1]
|
||||
|
||||
# The <noscript> tag was moved beneath a copy of the <a> tag,
|
||||
# but the 'target' string within is still connected to the
|
||||
# (second) 'aftermath' string.
|
||||
self.assertEqual(final_aftermath, target.next_element)
|
||||
self.assertEqual(target, final_aftermath.previous_element)
|
||||
|
||||
def test_processing_instruction(self):
|
||||
"""Processing instructions become comments."""
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
@@ -96,3 +123,8 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
a1, a2 = soup.find_all('a')
|
||||
self.assertEqual(a1, a2)
|
||||
assert a1 is not a2
|
||||
|
||||
def test_foster_parenting(self):
|
||||
markup = b"""<table><td></tbody>A"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
|
||||
|
||||
@@ -29,4 +29,6 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
|
||||
|
||||
|
||||
def test_redundant_empty_element_closing_tags(self):
|
||||
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('</br></br></br>', "")
|
||||
|
||||
@@ -35,7 +35,6 @@ try:
|
||||
except ImportError, e:
|
||||
LXML_PRESENT = False
|
||||
|
||||
PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
|
||||
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
|
||||
|
||||
class TestConstructor(SoupTest):
|
||||
@@ -77,7 +76,7 @@ class TestWarnings(SoupTest):
|
||||
def test_no_warning_if_explicit_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", "html.parser")
|
||||
self.assertEquals([], w)
|
||||
self.assertEqual([], w)
|
||||
|
||||
def test_parseOnlyThese_renamed_to_parse_only(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
@@ -118,15 +117,34 @@ class TestWarnings(SoupTest):
|
||||
soup = self.soup(filename)
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
def test_url_warning(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("http://www.crummy.com/")
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("looks like a URL" in msg)
|
||||
def test_url_warning_with_bytes_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/")
|
||||
# Be aware this isn't the only warning that can be raised during
|
||||
# execution..
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
# note - this url must differ from the bytes one otherwise
|
||||
# python's warnings system swallows the second warning
|
||||
soup = self.soup(u"http://www.crummyunicode.com/")
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_bytes_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(u"http://www.crummyuncode.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("http://www.crummy.com/ is great")
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
class TestSelectiveParsing(SoupTest):
|
||||
|
||||
@@ -260,7 +278,7 @@ class TestEncodingConversion(SoupTest):
|
||||
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
|
||||
|
||||
@skipIf(
|
||||
PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
|
||||
PYTHON_3_PRE_3_2,
|
||||
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
|
||||
def test_attribute_name_containing_unicode_characters(self):
|
||||
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Tests for Beautiful Soup's tree traversal methods.
|
||||
|
||||
@@ -222,7 +223,19 @@ class TestFindAllByName(TreeTest):
|
||||
self.assertSelects(
|
||||
tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
|
||||
|
||||
def test_find_with_multi_valued_attribute(self):
|
||||
soup = self.soup(
|
||||
"<div class='a b'>1</div><div class='a c'>2</div><div class='a d'>3</div>"
|
||||
)
|
||||
r1 = soup.find('div', 'a d');
|
||||
r2 = soup.find('div', re.compile(r'a d'));
|
||||
r3, r4 = soup.find_all('div', ['a b', 'a d']);
|
||||
self.assertEqual('3', r1.string)
|
||||
self.assertEqual('3', r2.string)
|
||||
self.assertEqual('1', r3.string)
|
||||
self.assertEqual('3', r4.string)
|
||||
|
||||
|
||||
class TestFindAllByAttribute(TreeTest):
|
||||
|
||||
def test_find_all_by_attribute_name(self):
|
||||
@@ -294,10 +307,10 @@ class TestFindAllByAttribute(TreeTest):
|
||||
f = tree.find_all("gar", class_=re.compile("a"))
|
||||
self.assertSelects(f, ["Found it"])
|
||||
|
||||
# Since the class is not the string "foo bar", but the two
|
||||
# strings "foo" and "bar", this will not find anything.
|
||||
# If the search fails to match the individual strings "foo" and "bar",
|
||||
# it will be tried against the combined string "foo bar".
|
||||
f = tree.find_all("gar", class_=re.compile("o b"))
|
||||
self.assertSelects(f, [])
|
||||
self.assertSelects(f, ["Found it"])
|
||||
|
||||
def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
|
||||
soup = self.soup("<a class='bar'>Found it</a>")
|
||||
@@ -335,7 +348,7 @@ class TestFindAllByAttribute(TreeTest):
|
||||
strainer = SoupStrainer(attrs={'id' : 'first'})
|
||||
self.assertSelects(tree.find_all(strainer), ['Match.'])
|
||||
|
||||
def test_find_all_with_missing_atribute(self):
|
||||
def test_find_all_with_missing_attribute(self):
|
||||
# You can pass in None as the value of an attribute to find_all.
|
||||
# This will match tags that do not have that attribute set.
|
||||
tree = self.soup("""<a id="1">ID present.</a>
|
||||
@@ -1273,6 +1286,10 @@ class TestCDAtaListAttributes(SoupTest):
|
||||
soup = self.soup("<a class='foo\tbar'>")
|
||||
self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode())
|
||||
|
||||
def test_get_attribute_list(self):
|
||||
soup = self.soup("<a id='abc def'>")
|
||||
self.assertEqual(['abc def'], soup.a.get_attribute_list('id'))
|
||||
|
||||
def test_accept_charset(self):
|
||||
soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">')
|
||||
self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset'])
|
||||
@@ -1328,6 +1345,13 @@ class TestPersistence(SoupTest):
|
||||
copied = copy.deepcopy(self.tree)
|
||||
self.assertEqual(copied.decode(), self.tree.decode())
|
||||
|
||||
def test_copy_preserves_encoding(self):
|
||||
soup = BeautifulSoup(b'<p> </p>', 'html.parser')
|
||||
encoding = soup.original_encoding
|
||||
copy = soup.__copy__()
|
||||
self.assertEqual(u"<p> </p>", unicode(copy))
|
||||
self.assertEqual(encoding, copy.original_encoding)
|
||||
|
||||
def test_unicode_pickle(self):
|
||||
# A tree containing Unicode characters can be pickled.
|
||||
html = u"<b>\N{SNOWMAN}</b>"
|
||||
@@ -1676,8 +1700,8 @@ class TestSoupSelector(TreeTest):
|
||||
def setUp(self):
|
||||
self.soup = BeautifulSoup(self.HTML, 'html.parser')
|
||||
|
||||
def assertSelects(self, selector, expected_ids):
|
||||
el_ids = [el['id'] for el in self.soup.select(selector)]
|
||||
def assertSelects(self, selector, expected_ids, **kwargs):
|
||||
el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
|
||||
el_ids.sort()
|
||||
expected_ids.sort()
|
||||
self.assertEqual(expected_ids, el_ids,
|
||||
@@ -1720,6 +1744,13 @@ class TestSoupSelector(TreeTest):
|
||||
for selector in ('html div', 'html body div', 'body div'):
|
||||
self.assertSelects(selector, ['data1', 'main', 'inner', 'footer'])
|
||||
|
||||
|
||||
def test_limit(self):
|
||||
self.assertSelects('html div', ['main'], limit=1)
|
||||
self.assertSelects('html body div', ['inner', 'main'], limit=2)
|
||||
self.assertSelects('body div', ['data1', 'main', 'inner', 'footer'],
|
||||
limit=10)
|
||||
|
||||
def test_tag_no_match(self):
|
||||
self.assertEqual(len(self.soup.select('del')), 0)
|
||||
|
||||
@@ -1902,6 +1933,14 @@ class TestSoupSelector(TreeTest):
|
||||
('div[data-tag]', ['data1'])
|
||||
)
|
||||
|
||||
def test_quoted_space_in_selector_name(self):
|
||||
html = """<div style="display: wrong">nope</div>
|
||||
<div style="display: right">yes</div>
|
||||
"""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
[chosen] = soup.select('div[style="display: right"]')
|
||||
self.assertEqual("yes", chosen.string)
|
||||
|
||||
def test_unsupported_pseudoclass(self):
|
||||
self.assertRaises(
|
||||
NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from .core import where, old_where
|
||||
|
||||
__version__ = "2017.04.17"
|
||||
__version__ = "2018.01.18"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,18 +19,19 @@ class DeprecatedBundleWarning(DeprecationWarning):
|
||||
|
||||
|
||||
def where():
|
||||
f = os.path.split(__file__)[0]
|
||||
f = os.path.dirname(__file__)
|
||||
|
||||
return os.path.join(f, 'cacert.pem')
|
||||
|
||||
|
||||
def old_where():
|
||||
warnings.warn(
|
||||
"The weak security bundle is being deprecated.",
|
||||
"The weak security bundle has been removed. certifi.old_where() is now an alias "
|
||||
"of certifi.where(). Please update your code to use certifi.where() instead. "
|
||||
"certifi.old_where() will be removed in 2018.",
|
||||
DeprecatedBundleWarning
|
||||
)
|
||||
f = os.path.split(__file__)[0]
|
||||
return os.path.join(f, 'weak.pem')
|
||||
return where()
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(where())
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,436 @@
|
||||
"""contextlib2 - backports and enhancements to the contextlib module"""
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
from collections import deque
|
||||
from functools import wraps
|
||||
|
||||
__all__ = ["contextmanager", "closing", "ContextDecorator", "ExitStack",
|
||||
"redirect_stdout", "redirect_stderr", "suppress"]
|
||||
|
||||
# Backwards compatibility
|
||||
__all__ += ["ContextStack"]
|
||||
|
||||
class ContextDecorator(object):
|
||||
"A base class or mixin that enables context managers to work as decorators."
|
||||
|
||||
def refresh_cm(self):
|
||||
"""Returns the context manager used to actually wrap the call to the
|
||||
decorated function.
|
||||
|
||||
The default implementation just returns *self*.
|
||||
|
||||
Overriding this method allows otherwise one-shot context managers
|
||||
like _GeneratorContextManager to support use as decorators via
|
||||
implicit recreation.
|
||||
|
||||
DEPRECATED: refresh_cm was never added to the standard library's
|
||||
ContextDecorator API
|
||||
"""
|
||||
warnings.warn("refresh_cm was never added to the standard library",
|
||||
DeprecationWarning)
|
||||
return self._recreate_cm()
|
||||
|
||||
def _recreate_cm(self):
|
||||
"""Return a recreated instance of self.
|
||||
|
||||
Allows an otherwise one-shot context manager like
|
||||
_GeneratorContextManager to support use as
|
||||
a decorator via implicit recreation.
|
||||
|
||||
This is a private interface just for _GeneratorContextManager.
|
||||
See issue #11647 for details.
|
||||
"""
|
||||
return self
|
||||
|
||||
def __call__(self, func):
|
||||
@wraps(func)
|
||||
def inner(*args, **kwds):
|
||||
with self._recreate_cm():
|
||||
return func(*args, **kwds)
|
||||
return inner
|
||||
|
||||
|
||||
class _GeneratorContextManager(ContextDecorator):
|
||||
"""Helper for @contextmanager decorator."""
|
||||
|
||||
def __init__(self, func, args, kwds):
|
||||
self.gen = func(*args, **kwds)
|
||||
self.func, self.args, self.kwds = func, args, kwds
|
||||
# Issue 19330: ensure context manager instances have good docstrings
|
||||
doc = getattr(func, "__doc__", None)
|
||||
if doc is None:
|
||||
doc = type(self).__doc__
|
||||
self.__doc__ = doc
|
||||
# Unfortunately, this still doesn't provide good help output when
|
||||
# inspecting the created context manager instances, since pydoc
|
||||
# currently bypasses the instance docstring and shows the docstring
|
||||
# for the class instead.
|
||||
# See http://bugs.python.org/issue19404 for more details.
|
||||
|
||||
def _recreate_cm(self):
|
||||
# _GCM instances are one-shot context managers, so the
|
||||
# CM must be recreated each time a decorated function is
|
||||
# called
|
||||
return self.__class__(self.func, self.args, self.kwds)
|
||||
|
||||
def __enter__(self):
|
||||
try:
|
||||
return next(self.gen)
|
||||
except StopIteration:
|
||||
raise RuntimeError("generator didn't yield")
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if type is None:
|
||||
try:
|
||||
next(self.gen)
|
||||
except StopIteration:
|
||||
return
|
||||
else:
|
||||
raise RuntimeError("generator didn't stop")
|
||||
else:
|
||||
if value is None:
|
||||
# Need to force instantiation so we can reliably
|
||||
# tell if we get the same exception back
|
||||
value = type()
|
||||
try:
|
||||
self.gen.throw(type, value, traceback)
|
||||
raise RuntimeError("generator didn't stop after throw()")
|
||||
except StopIteration as exc:
|
||||
# Suppress StopIteration *unless* it's the same exception that
|
||||
# was passed to throw(). This prevents a StopIteration
|
||||
# raised inside the "with" statement from being suppressed.
|
||||
return exc is not value
|
||||
except RuntimeError as exc:
|
||||
# Don't re-raise the passed in exception
|
||||
if exc is value:
|
||||
return False
|
||||
# Likewise, avoid suppressing if a StopIteration exception
|
||||
# was passed to throw() and later wrapped into a RuntimeError
|
||||
# (see PEP 479).
|
||||
if _HAVE_EXCEPTION_CHAINING and exc.__cause__ is value:
|
||||
return False
|
||||
raise
|
||||
except:
|
||||
# only re-raise if it's *not* the exception that was
|
||||
# passed to throw(), because __exit__() must not raise
|
||||
# an exception unless __exit__() itself failed. But throw()
|
||||
# has to raise the exception to signal propagation, so this
|
||||
# fixes the impedance mismatch between the throw() protocol
|
||||
# and the __exit__() protocol.
|
||||
#
|
||||
if sys.exc_info()[1] is not value:
|
||||
raise
|
||||
|
||||
|
||||
def contextmanager(func):
|
||||
"""@contextmanager decorator.
|
||||
|
||||
Typical usage:
|
||||
|
||||
@contextmanager
|
||||
def some_generator(<arguments>):
|
||||
<setup>
|
||||
try:
|
||||
yield <value>
|
||||
finally:
|
||||
<cleanup>
|
||||
|
||||
This makes this:
|
||||
|
||||
with some_generator(<arguments>) as <variable>:
|
||||
<body>
|
||||
|
||||
equivalent to this:
|
||||
|
||||
<setup>
|
||||
try:
|
||||
<variable> = <value>
|
||||
<body>
|
||||
finally:
|
||||
<cleanup>
|
||||
|
||||
"""
|
||||
@wraps(func)
|
||||
def helper(*args, **kwds):
|
||||
return _GeneratorContextManager(func, args, kwds)
|
||||
return helper
|
||||
|
||||
|
||||
class closing(object):
|
||||
"""Context to automatically close something at the end of a block.
|
||||
|
||||
Code like this:
|
||||
|
||||
with closing(<module>.open(<arguments>)) as f:
|
||||
<block>
|
||||
|
||||
is equivalent to this:
|
||||
|
||||
f = <module>.open(<arguments>)
|
||||
try:
|
||||
<block>
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
"""
|
||||
def __init__(self, thing):
|
||||
self.thing = thing
|
||||
def __enter__(self):
|
||||
return self.thing
|
||||
def __exit__(self, *exc_info):
|
||||
self.thing.close()
|
||||
|
||||
|
||||
class _RedirectStream(object):
|
||||
|
||||
_stream = None
|
||||
|
||||
def __init__(self, new_target):
|
||||
self._new_target = new_target
|
||||
# We use a list of old targets to make this CM re-entrant
|
||||
self._old_targets = []
|
||||
|
||||
def __enter__(self):
|
||||
self._old_targets.append(getattr(sys, self._stream))
|
||||
setattr(sys, self._stream, self._new_target)
|
||||
return self._new_target
|
||||
|
||||
def __exit__(self, exctype, excinst, exctb):
|
||||
setattr(sys, self._stream, self._old_targets.pop())
|
||||
|
||||
|
||||
class redirect_stdout(_RedirectStream):
|
||||
"""Context manager for temporarily redirecting stdout to another file.
|
||||
|
||||
# How to send help() to stderr
|
||||
with redirect_stdout(sys.stderr):
|
||||
help(dir)
|
||||
|
||||
# How to write help() to a file
|
||||
with open('help.txt', 'w') as f:
|
||||
with redirect_stdout(f):
|
||||
help(pow)
|
||||
"""
|
||||
|
||||
_stream = "stdout"
|
||||
|
||||
|
||||
class redirect_stderr(_RedirectStream):
|
||||
"""Context manager for temporarily redirecting stderr to another file."""
|
||||
|
||||
_stream = "stderr"
|
||||
|
||||
|
||||
class suppress(object):
|
||||
"""Context manager to suppress specified exceptions
|
||||
|
||||
After the exception is suppressed, execution proceeds with the next
|
||||
statement following the with statement.
|
||||
|
||||
with suppress(FileNotFoundError):
|
||||
os.remove(somefile)
|
||||
# Execution still resumes here if the file was already removed
|
||||
"""
|
||||
|
||||
def __init__(self, *exceptions):
|
||||
self._exceptions = exceptions
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exctype, excinst, exctb):
|
||||
# Unlike isinstance and issubclass, CPython exception handling
|
||||
# currently only looks at the concrete type hierarchy (ignoring
|
||||
# the instance and subclass checking hooks). While Guido considers
|
||||
# that a bug rather than a feature, it's a fairly hard one to fix
|
||||
# due to various internal implementation details. suppress provides
|
||||
# the simpler issubclass based semantics, rather than trying to
|
||||
# exactly reproduce the limitations of the CPython interpreter.
|
||||
#
|
||||
# See http://bugs.python.org/issue12029 for more details
|
||||
return exctype is not None and issubclass(exctype, self._exceptions)
|
||||
|
||||
|
||||
# Context manipulation is Python 3 only
|
||||
_HAVE_EXCEPTION_CHAINING = sys.version_info[0] >= 3
|
||||
if _HAVE_EXCEPTION_CHAINING:
|
||||
def _make_context_fixer(frame_exc):
|
||||
def _fix_exception_context(new_exc, old_exc):
|
||||
# Context may not be correct, so find the end of the chain
|
||||
while 1:
|
||||
exc_context = new_exc.__context__
|
||||
if exc_context is old_exc:
|
||||
# Context is already set correctly (see issue 20317)
|
||||
return
|
||||
if exc_context is None or exc_context is frame_exc:
|
||||
break
|
||||
new_exc = exc_context
|
||||
# Change the end of the chain to point to the exception
|
||||
# we expect it to reference
|
||||
new_exc.__context__ = old_exc
|
||||
return _fix_exception_context
|
||||
|
||||
def _reraise_with_existing_context(exc_details):
|
||||
try:
|
||||
# bare "raise exc_details[1]" replaces our carefully
|
||||
# set-up context
|
||||
fixed_ctx = exc_details[1].__context__
|
||||
raise exc_details[1]
|
||||
except BaseException:
|
||||
exc_details[1].__context__ = fixed_ctx
|
||||
raise
|
||||
else:
|
||||
# No exception context in Python 2
|
||||
def _make_context_fixer(frame_exc):
|
||||
return lambda new_exc, old_exc: None
|
||||
|
||||
# Use 3 argument raise in Python 2,
|
||||
# but use exec to avoid SyntaxError in Python 3
|
||||
def _reraise_with_existing_context(exc_details):
|
||||
exc_type, exc_value, exc_tb = exc_details
|
||||
exec ("raise exc_type, exc_value, exc_tb")
|
||||
|
||||
# Handle old-style classes if they exist
|
||||
try:
|
||||
from types import InstanceType
|
||||
except ImportError:
|
||||
# Python 3 doesn't have old-style classes
|
||||
_get_type = type
|
||||
else:
|
||||
# Need to handle old-style context managers on Python 2
|
||||
def _get_type(obj):
|
||||
obj_type = type(obj)
|
||||
if obj_type is InstanceType:
|
||||
return obj.__class__ # Old-style class
|
||||
return obj_type # New-style class
|
||||
|
||||
# Inspired by discussions on http://bugs.python.org/issue13585
|
||||
class ExitStack(object):
|
||||
"""Context manager for dynamic management of a stack of exit callbacks
|
||||
|
||||
For example:
|
||||
|
||||
with ExitStack() as stack:
|
||||
files = [stack.enter_context(open(fname)) for fname in filenames]
|
||||
# All opened files will automatically be closed at the end of
|
||||
# the with statement, even if attempts to open files later
|
||||
# in the list raise an exception
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self._exit_callbacks = deque()
|
||||
|
||||
def pop_all(self):
|
||||
"""Preserve the context stack by transferring it to a new instance"""
|
||||
new_stack = type(self)()
|
||||
new_stack._exit_callbacks = self._exit_callbacks
|
||||
self._exit_callbacks = deque()
|
||||
return new_stack
|
||||
|
||||
def _push_cm_exit(self, cm, cm_exit):
|
||||
"""Helper to correctly register callbacks to __exit__ methods"""
|
||||
def _exit_wrapper(*exc_details):
|
||||
return cm_exit(cm, *exc_details)
|
||||
_exit_wrapper.__self__ = cm
|
||||
self.push(_exit_wrapper)
|
||||
|
||||
def push(self, exit):
|
||||
"""Registers a callback with the standard __exit__ method signature
|
||||
|
||||
Can suppress exceptions the same way __exit__ methods can.
|
||||
|
||||
Also accepts any object with an __exit__ method (registering a call
|
||||
to the method instead of the object itself)
|
||||
"""
|
||||
# We use an unbound method rather than a bound method to follow
|
||||
# the standard lookup behaviour for special methods
|
||||
_cb_type = _get_type(exit)
|
||||
try:
|
||||
exit_method = _cb_type.__exit__
|
||||
except AttributeError:
|
||||
# Not a context manager, so assume its a callable
|
||||
self._exit_callbacks.append(exit)
|
||||
else:
|
||||
self._push_cm_exit(exit, exit_method)
|
||||
return exit # Allow use as a decorator
|
||||
|
||||
def callback(self, callback, *args, **kwds):
|
||||
"""Registers an arbitrary callback and arguments.
|
||||
|
||||
Cannot suppress exceptions.
|
||||
"""
|
||||
def _exit_wrapper(exc_type, exc, tb):
|
||||
callback(*args, **kwds)
|
||||
# We changed the signature, so using @wraps is not appropriate, but
|
||||
# setting __wrapped__ may still help with introspection
|
||||
_exit_wrapper.__wrapped__ = callback
|
||||
self.push(_exit_wrapper)
|
||||
return callback # Allow use as a decorator
|
||||
|
||||
def enter_context(self, cm):
|
||||
"""Enters the supplied context manager
|
||||
|
||||
If successful, also pushes its __exit__ method as a callback and
|
||||
returns the result of the __enter__ method.
|
||||
"""
|
||||
# We look up the special methods on the type to match the with statement
|
||||
_cm_type = _get_type(cm)
|
||||
_exit = _cm_type.__exit__
|
||||
result = _cm_type.__enter__(cm)
|
||||
self._push_cm_exit(cm, _exit)
|
||||
return result
|
||||
|
||||
def close(self):
|
||||
"""Immediately unwind the context stack"""
|
||||
self.__exit__(None, None, None)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc_details):
|
||||
received_exc = exc_details[0] is not None
|
||||
|
||||
# We manipulate the exception state so it behaves as though
|
||||
# we were actually nesting multiple with statements
|
||||
frame_exc = sys.exc_info()[1]
|
||||
_fix_exception_context = _make_context_fixer(frame_exc)
|
||||
|
||||
# Callbacks are invoked in LIFO order to match the behaviour of
|
||||
# nested context managers
|
||||
suppressed_exc = False
|
||||
pending_raise = False
|
||||
while self._exit_callbacks:
|
||||
cb = self._exit_callbacks.pop()
|
||||
try:
|
||||
if cb(*exc_details):
|
||||
suppressed_exc = True
|
||||
pending_raise = False
|
||||
exc_details = (None, None, None)
|
||||
except:
|
||||
new_exc_details = sys.exc_info()
|
||||
# simulate the stack of exceptions by setting the context
|
||||
_fix_exception_context(new_exc_details[1], exc_details[1])
|
||||
pending_raise = True
|
||||
exc_details = new_exc_details
|
||||
if pending_raise:
|
||||
_reraise_with_existing_context(exc_details)
|
||||
return received_exc and suppressed_exc
|
||||
|
||||
# Preserve backwards compatibility
|
||||
class ContextStack(ExitStack):
|
||||
"""Backwards compatibility alias for ExitStack"""
|
||||
|
||||
def __init__(self):
|
||||
warnings.warn("ContextStack has been renamed to ExitStack",
|
||||
DeprecationWarning)
|
||||
super(ContextStack, self).__init__()
|
||||
|
||||
def register_exit(self, callback):
|
||||
return self.push(callback)
|
||||
|
||||
def register(self, callback, *args, **kwds):
|
||||
return self.callback(callback, *args, **kwds)
|
||||
|
||||
def preserve(self):
|
||||
return self.pop_all()
|
||||
@@ -1,4 +1,4 @@
|
||||
__version__ = '0.6.2'
|
||||
__version__ = '0.6.5'
|
||||
|
||||
from .lock import Lock # noqa
|
||||
from .lock import NeedRegenerationException # noqa
|
||||
from .lock import NeedRegenerationException # noqa
|
||||
|
||||
@@ -13,6 +13,13 @@ class NoValue(object):
|
||||
def payload(self):
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
"""Ensure __repr__ is a consistent value in case NoValue is used to
|
||||
fill another cache key.
|
||||
|
||||
"""
|
||||
return '<dogpile.cache.api.NoValue object>'
|
||||
|
||||
if py3k:
|
||||
def __bool__(self): # pragma NO COVERAGE
|
||||
return False
|
||||
@@ -20,6 +27,7 @@ class NoValue(object):
|
||||
def __nonzero__(self): # pragma NO COVERAGE
|
||||
return False
|
||||
|
||||
|
||||
NO_VALUE = NoValue()
|
||||
"""Value returned from ``get()`` that describes
|
||||
a key not present."""
|
||||
|
||||
@@ -15,3 +15,11 @@ class RegionNotConfigured(DogpileCacheException):
|
||||
|
||||
class ValidationError(DogpileCacheException):
|
||||
"""Error validating a value or option."""
|
||||
|
||||
|
||||
class PluginNotFound(DogpileCacheException):
|
||||
"""The specified plugin could not be found.
|
||||
|
||||
.. versionadded:: 0.6.4
|
||||
|
||||
"""
|
||||
|
||||
+35
-5
@@ -410,7 +410,13 @@ class CacheRegion(object):
|
||||
"configured with backend: %s. "
|
||||
"Specify replace_existing_backend=True to replace."
|
||||
% self.backend)
|
||||
backend_cls = _backend_loader.load(backend)
|
||||
|
||||
try:
|
||||
backend_cls = _backend_loader.load(backend)
|
||||
except PluginLoader.NotFound:
|
||||
raise exception.PluginNotFound(
|
||||
"Couldn't find cache plugin to load: %s" % backend)
|
||||
|
||||
if _config_argument_dict:
|
||||
self.backend = backend_cls.from_config_dict(
|
||||
_config_argument_dict,
|
||||
@@ -487,8 +493,19 @@ class CacheRegion(object):
|
||||
a value. Any retrieved value whose creation
|
||||
time is prior to this timestamp
|
||||
is considered to be stale. It does not
|
||||
affect the data in the cache in any way, and is also
|
||||
local to this instance of :class:`.CacheRegion`.
|
||||
affect the data in the cache in any way, and is
|
||||
**local to this instance of :class:`.CacheRegion`.**
|
||||
|
||||
.. warning::
|
||||
|
||||
The :meth:`.CacheRegion.invalidate` method's default mode of
|
||||
operation is to set a timestamp **local to this CacheRegion
|
||||
in this Python process only**. It does not impact other Python
|
||||
processes or regions as the timestamp is **only stored locally in
|
||||
memory**. To implement invalidation where the
|
||||
timestamp is stored in the cache or similar so that all Python
|
||||
processes can be affected by an invalidation timestamp, implement a
|
||||
custom :class:`.RegionInvalidationStrategy`.
|
||||
|
||||
Once set, the invalidation time is honored by
|
||||
the :meth:`.CacheRegion.get_or_create`,
|
||||
@@ -550,6 +567,8 @@ class CacheRegion(object):
|
||||
_config_prefix="%sarguments." % prefix,
|
||||
wrap=config_dict.get(
|
||||
"%swrap" % prefix, None),
|
||||
replace_existing_backend=config_dict.get(
|
||||
"%sreplace_existing_backend" % prefix, False),
|
||||
)
|
||||
|
||||
@memoized_property
|
||||
@@ -944,11 +963,14 @@ class CacheRegion(object):
|
||||
if not should_cache_fn:
|
||||
self.backend.set_multi(values_w_created)
|
||||
else:
|
||||
self.backend.set_multi(dict(
|
||||
values_to_cache = dict(
|
||||
(k, v)
|
||||
for k, v in values_w_created.items()
|
||||
if should_cache_fn(v[0])
|
||||
))
|
||||
)
|
||||
|
||||
if values_to_cache:
|
||||
self.backend.set_multi(values_to_cache)
|
||||
|
||||
values.update(values_w_created)
|
||||
return [values[orig_to_mangled[k]].payload for k in keys]
|
||||
@@ -1075,6 +1097,14 @@ class CacheRegion(object):
|
||||
.. versionadded:: 0.5.0 Added ``refresh()`` method to decorated
|
||||
function.
|
||||
|
||||
``original()`` on other hand will invoke the decorated function
|
||||
without any caching::
|
||||
|
||||
newvalue = generate_something.original(5, 6)
|
||||
|
||||
.. versionadded:: 0.6.0 Added ``original()`` method to decorated
|
||||
function.
|
||||
|
||||
Lastly, the ``get()`` method returns either the value cached
|
||||
for the given key, or the token ``NO_VALUE`` if no such key
|
||||
exists::
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .nameregistry import NameRegistry # noqa
|
||||
from .readwrite_lock import ReadWriteMutex # noqa
|
||||
from .langhelpers import PluginLoader, memoized_property, \
|
||||
coerce_string_conf, to_list, KeyReentrantMutex # noqa
|
||||
coerce_string_conf, to_list, KeyReentrantMutex # noqa
|
||||
|
||||
@@ -39,9 +39,9 @@ class PluginLoader(object):
|
||||
self.impls[name] = impl.load
|
||||
return impl.load()
|
||||
else:
|
||||
raise Exception(
|
||||
"Can't load plugin %s %s" %
|
||||
(self.group, name))
|
||||
raise self.NotFound(
|
||||
"Can't load plugin %s %s" % (self.group, name)
|
||||
)
|
||||
|
||||
def register(self, name, modulepath, objname):
|
||||
def load():
|
||||
@@ -49,6 +49,9 @@ class PluginLoader(object):
|
||||
return getattr(mod, objname)
|
||||
self.impls[name] = load
|
||||
|
||||
class NotFound(Exception):
|
||||
"""The specified plugin could not be found."""
|
||||
|
||||
|
||||
class memoized_property(object):
|
||||
"""A read-only @property that is only evaluated once."""
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
Copyright (c) 2013, Ethan Furman.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
Redistributions of source code must retain the above
|
||||
copyright notice, this list of conditions and the
|
||||
following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
Neither the name Ethan Furman nor the names of any
|
||||
contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -0,0 +1,3 @@
|
||||
enum34 is the new Python stdlib enum module available in Python 3.4
|
||||
backported for previous versions of Python from 2.4 to 3.3.
|
||||
tested on 2.6, 2.7, and 3.3+
|
||||
@@ -0,0 +1,837 @@
|
||||
"""Python Enumerations"""
|
||||
|
||||
import sys as _sys
|
||||
|
||||
__all__ = ['Enum', 'IntEnum', 'unique']
|
||||
|
||||
version = 1, 1, 6
|
||||
|
||||
pyver = float('%s.%s' % _sys.version_info[:2])
|
||||
|
||||
try:
|
||||
any
|
||||
except NameError:
|
||||
def any(iterable):
|
||||
for element in iterable:
|
||||
if element:
|
||||
return True
|
||||
return False
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
OrderedDict = None
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
# In Python 2 basestring is the ancestor of both str and unicode
|
||||
# in Python 3 it's just str, but was missing in 3.1
|
||||
basestring = str
|
||||
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
# In Python 3 unicode no longer exists (it's just str)
|
||||
unicode = str
|
||||
|
||||
class _RouteClassAttributeToGetattr(object):
|
||||
"""Route attribute access on a class to __getattr__.
|
||||
|
||||
This is a descriptor, used to define attributes that act differently when
|
||||
accessed through an instance and through a class. Instance access remains
|
||||
normal, but access to an attribute through a class will be routed to the
|
||||
class's __getattr__ method; this is done by raising AttributeError.
|
||||
|
||||
"""
|
||||
def __init__(self, fget=None):
|
||||
self.fget = fget
|
||||
|
||||
def __get__(self, instance, ownerclass=None):
|
||||
if instance is None:
|
||||
raise AttributeError()
|
||||
return self.fget(instance)
|
||||
|
||||
def __set__(self, instance, value):
|
||||
raise AttributeError("can't set attribute")
|
||||
|
||||
def __delete__(self, instance):
|
||||
raise AttributeError("can't delete attribute")
|
||||
|
||||
|
||||
def _is_descriptor(obj):
|
||||
"""Returns True if obj is a descriptor, False otherwise."""
|
||||
return (
|
||||
hasattr(obj, '__get__') or
|
||||
hasattr(obj, '__set__') or
|
||||
hasattr(obj, '__delete__'))
|
||||
|
||||
|
||||
def _is_dunder(name):
|
||||
"""Returns True if a __dunder__ name, False otherwise."""
|
||||
return (name[:2] == name[-2:] == '__' and
|
||||
name[2:3] != '_' and
|
||||
name[-3:-2] != '_' and
|
||||
len(name) > 4)
|
||||
|
||||
|
||||
def _is_sunder(name):
|
||||
"""Returns True if a _sunder_ name, False otherwise."""
|
||||
return (name[0] == name[-1] == '_' and
|
||||
name[1:2] != '_' and
|
||||
name[-2:-1] != '_' and
|
||||
len(name) > 2)
|
||||
|
||||
|
||||
def _make_class_unpicklable(cls):
|
||||
"""Make the given class un-picklable."""
|
||||
def _break_on_call_reduce(self, protocol=None):
|
||||
raise TypeError('%r cannot be pickled' % self)
|
||||
cls.__reduce_ex__ = _break_on_call_reduce
|
||||
cls.__module__ = '<unknown>'
|
||||
|
||||
|
||||
class _EnumDict(dict):
|
||||
"""Track enum member order and ensure member names are not reused.
|
||||
|
||||
EnumMeta will use the names found in self._member_names as the
|
||||
enumeration member names.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
super(_EnumDict, self).__init__()
|
||||
self._member_names = []
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""Changes anything not dundered or not a descriptor.
|
||||
|
||||
If a descriptor is added with the same name as an enum member, the name
|
||||
is removed from _member_names (this may leave a hole in the numerical
|
||||
sequence of values).
|
||||
|
||||
If an enum member name is used twice, an error is raised; duplicate
|
||||
values are not checked for.
|
||||
|
||||
Single underscore (sunder) names are reserved.
|
||||
|
||||
Note: in 3.x __order__ is simply discarded as a not necessary piece
|
||||
leftover from 2.x
|
||||
|
||||
"""
|
||||
if pyver >= 3.0 and key in ('_order_', '__order__'):
|
||||
return
|
||||
elif key == '__order__':
|
||||
key = '_order_'
|
||||
if _is_sunder(key):
|
||||
if key != '_order_':
|
||||
raise ValueError('_names_ are reserved for future Enum use')
|
||||
elif _is_dunder(key):
|
||||
pass
|
||||
elif key in self._member_names:
|
||||
# descriptor overwriting an enum?
|
||||
raise TypeError('Attempted to reuse key: %r' % key)
|
||||
elif not _is_descriptor(value):
|
||||
if key in self:
|
||||
# enum overwriting a descriptor?
|
||||
raise TypeError('Key already defined as: %r' % self[key])
|
||||
self._member_names.append(key)
|
||||
super(_EnumDict, self).__setitem__(key, value)
|
||||
|
||||
|
||||
# Dummy value for Enum as EnumMeta explicity checks for it, but of course until
|
||||
# EnumMeta finishes running the first time the Enum class doesn't exist. This
|
||||
# is also why there are checks in EnumMeta like `if Enum is not None`
|
||||
Enum = None
|
||||
|
||||
|
||||
class EnumMeta(type):
|
||||
"""Metaclass for Enum"""
|
||||
@classmethod
|
||||
def __prepare__(metacls, cls, bases):
|
||||
return _EnumDict()
|
||||
|
||||
def __new__(metacls, cls, bases, classdict):
|
||||
# an Enum class is final once enumeration items have been defined; it
|
||||
# cannot be mixed with other types (int, float, etc.) if it has an
|
||||
# inherited __new__ unless a new __new__ is defined (or the resulting
|
||||
# class will fail).
|
||||
if type(classdict) is dict:
|
||||
original_dict = classdict
|
||||
classdict = _EnumDict()
|
||||
for k, v in original_dict.items():
|
||||
classdict[k] = v
|
||||
|
||||
member_type, first_enum = metacls._get_mixins_(bases)
|
||||
__new__, save_new, use_args = metacls._find_new_(classdict, member_type,
|
||||
first_enum)
|
||||
# save enum items into separate mapping so they don't get baked into
|
||||
# the new class
|
||||
members = dict((k, classdict[k]) for k in classdict._member_names)
|
||||
for name in classdict._member_names:
|
||||
del classdict[name]
|
||||
|
||||
# py2 support for definition order
|
||||
_order_ = classdict.get('_order_')
|
||||
if _order_ is None:
|
||||
if pyver < 3.0:
|
||||
try:
|
||||
_order_ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])]
|
||||
except TypeError:
|
||||
_order_ = [name for name in sorted(members.keys())]
|
||||
else:
|
||||
_order_ = classdict._member_names
|
||||
else:
|
||||
del classdict['_order_']
|
||||
if pyver < 3.0:
|
||||
_order_ = _order_.replace(',', ' ').split()
|
||||
aliases = [name for name in members if name not in _order_]
|
||||
_order_ += aliases
|
||||
|
||||
# check for illegal enum names (any others?)
|
||||
invalid_names = set(members) & set(['mro'])
|
||||
if invalid_names:
|
||||
raise ValueError('Invalid enum member name(s): %s' % (
|
||||
', '.join(invalid_names), ))
|
||||
|
||||
# save attributes from super classes so we know if we can take
|
||||
# the shortcut of storing members in the class dict
|
||||
base_attributes = set([a for b in bases for a in b.__dict__])
|
||||
# create our new Enum type
|
||||
enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict)
|
||||
enum_class._member_names_ = [] # names in random order
|
||||
if OrderedDict is not None:
|
||||
enum_class._member_map_ = OrderedDict()
|
||||
else:
|
||||
enum_class._member_map_ = {} # name->value map
|
||||
enum_class._member_type_ = member_type
|
||||
|
||||
# Reverse value->name map for hashable values.
|
||||
enum_class._value2member_map_ = {}
|
||||
|
||||
# instantiate them, checking for duplicates as we go
|
||||
# we instantiate first instead of checking for duplicates first in case
|
||||
# a custom __new__ is doing something funky with the values -- such as
|
||||
# auto-numbering ;)
|
||||
if __new__ is None:
|
||||
__new__ = enum_class.__new__
|
||||
for member_name in _order_:
|
||||
value = members[member_name]
|
||||
if not isinstance(value, tuple):
|
||||
args = (value, )
|
||||
else:
|
||||
args = value
|
||||
if member_type is tuple: # special case for tuple enums
|
||||
args = (args, ) # wrap it one more time
|
||||
if not use_args or not args:
|
||||
enum_member = __new__(enum_class)
|
||||
if not hasattr(enum_member, '_value_'):
|
||||
enum_member._value_ = value
|
||||
else:
|
||||
enum_member = __new__(enum_class, *args)
|
||||
if not hasattr(enum_member, '_value_'):
|
||||
enum_member._value_ = member_type(*args)
|
||||
value = enum_member._value_
|
||||
enum_member._name_ = member_name
|
||||
enum_member.__objclass__ = enum_class
|
||||
enum_member.__init__(*args)
|
||||
# If another member with the same value was already defined, the
|
||||
# new member becomes an alias to the existing one.
|
||||
for name, canonical_member in enum_class._member_map_.items():
|
||||
if canonical_member.value == enum_member._value_:
|
||||
enum_member = canonical_member
|
||||
break
|
||||
else:
|
||||
# Aliases don't appear in member names (only in __members__).
|
||||
enum_class._member_names_.append(member_name)
|
||||
# performance boost for any member that would not shadow
|
||||
# a DynamicClassAttribute (aka _RouteClassAttributeToGetattr)
|
||||
if member_name not in base_attributes:
|
||||
setattr(enum_class, member_name, enum_member)
|
||||
# now add to _member_map_
|
||||
enum_class._member_map_[member_name] = enum_member
|
||||
try:
|
||||
# This may fail if value is not hashable. We can't add the value
|
||||
# to the map, and by-value lookups for this value will be
|
||||
# linear.
|
||||
enum_class._value2member_map_[value] = enum_member
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
|
||||
# If a custom type is mixed into the Enum, and it does not know how
|
||||
# to pickle itself, pickle.dumps will succeed but pickle.loads will
|
||||
# fail. Rather than have the error show up later and possibly far
|
||||
# from the source, sabotage the pickle protocol for this class so
|
||||
# that pickle.dumps also fails.
|
||||
#
|
||||
# However, if the new class implements its own __reduce_ex__, do not
|
||||
# sabotage -- it's on them to make sure it works correctly. We use
|
||||
# __reduce_ex__ instead of any of the others as it is preferred by
|
||||
# pickle over __reduce__, and it handles all pickle protocols.
|
||||
unpicklable = False
|
||||
if '__reduce_ex__' not in classdict:
|
||||
if member_type is not object:
|
||||
methods = ('__getnewargs_ex__', '__getnewargs__',
|
||||
'__reduce_ex__', '__reduce__')
|
||||
if not any(m in member_type.__dict__ for m in methods):
|
||||
_make_class_unpicklable(enum_class)
|
||||
unpicklable = True
|
||||
|
||||
|
||||
# double check that repr and friends are not the mixin's or various
|
||||
# things break (such as pickle)
|
||||
for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'):
|
||||
class_method = getattr(enum_class, name)
|
||||
obj_method = getattr(member_type, name, None)
|
||||
enum_method = getattr(first_enum, name, None)
|
||||
if name not in classdict and class_method is not enum_method:
|
||||
if name == '__reduce_ex__' and unpicklable:
|
||||
continue
|
||||
setattr(enum_class, name, enum_method)
|
||||
|
||||
# method resolution and int's are not playing nice
|
||||
# Python's less than 2.6 use __cmp__
|
||||
|
||||
if pyver < 2.6:
|
||||
|
||||
if issubclass(enum_class, int):
|
||||
setattr(enum_class, '__cmp__', getattr(int, '__cmp__'))
|
||||
|
||||
elif pyver < 3.0:
|
||||
|
||||
if issubclass(enum_class, int):
|
||||
for method in (
|
||||
'__le__',
|
||||
'__lt__',
|
||||
'__gt__',
|
||||
'__ge__',
|
||||
'__eq__',
|
||||
'__ne__',
|
||||
'__hash__',
|
||||
):
|
||||
setattr(enum_class, method, getattr(int, method))
|
||||
|
||||
# replace any other __new__ with our own (as long as Enum is not None,
|
||||
# anyway) -- again, this is to support pickle
|
||||
if Enum is not None:
|
||||
# if the user defined their own __new__, save it before it gets
|
||||
# clobbered in case they subclass later
|
||||
if save_new:
|
||||
setattr(enum_class, '__member_new__', enum_class.__dict__['__new__'])
|
||||
setattr(enum_class, '__new__', Enum.__dict__['__new__'])
|
||||
return enum_class
|
||||
|
||||
def __bool__(cls):
|
||||
"""
|
||||
classes/types should always be True.
|
||||
"""
|
||||
return True
|
||||
|
||||
def __call__(cls, value, names=None, module=None, type=None, start=1):
|
||||
"""Either returns an existing member, or creates a new enum class.
|
||||
|
||||
This method is used both when an enum class is given a value to match
|
||||
to an enumeration member (i.e. Color(3)) and for the functional API
|
||||
(i.e. Color = Enum('Color', names='red green blue')).
|
||||
|
||||
When used for the functional API: `module`, if set, will be stored in
|
||||
the new class' __module__ attribute; `type`, if set, will be mixed in
|
||||
as the first base class.
|
||||
|
||||
Note: if `module` is not set this routine will attempt to discover the
|
||||
calling module by walking the frame stack; if this is unsuccessful
|
||||
the resulting class will not be pickleable.
|
||||
|
||||
"""
|
||||
if names is None: # simple value lookup
|
||||
return cls.__new__(cls, value)
|
||||
# otherwise, functional API: we're creating a new Enum type
|
||||
return cls._create_(value, names, module=module, type=type, start=start)
|
||||
|
||||
def __contains__(cls, member):
|
||||
return isinstance(member, cls) and member.name in cls._member_map_
|
||||
|
||||
def __delattr__(cls, attr):
|
||||
# nicer error message when someone tries to delete an attribute
|
||||
# (see issue19025).
|
||||
if attr in cls._member_map_:
|
||||
raise AttributeError(
|
||||
"%s: cannot delete Enum member." % cls.__name__)
|
||||
super(EnumMeta, cls).__delattr__(attr)
|
||||
|
||||
def __dir__(self):
|
||||
return (['__class__', '__doc__', '__members__', '__module__'] +
|
||||
self._member_names_)
|
||||
|
||||
@property
|
||||
def __members__(cls):
|
||||
"""Returns a mapping of member name->value.
|
||||
|
||||
This mapping lists all enum members, including aliases. Note that this
|
||||
is a copy of the internal mapping.
|
||||
|
||||
"""
|
||||
return cls._member_map_.copy()
|
||||
|
||||
def __getattr__(cls, name):
|
||||
"""Return the enum member matching `name`
|
||||
|
||||
We use __getattr__ instead of descriptors or inserting into the enum
|
||||
class' __dict__ in order to support `name` and `value` being both
|
||||
properties for enum members (which live in the class' __dict__) and
|
||||
enum members themselves.
|
||||
|
||||
"""
|
||||
if _is_dunder(name):
|
||||
raise AttributeError(name)
|
||||
try:
|
||||
return cls._member_map_[name]
|
||||
except KeyError:
|
||||
raise AttributeError(name)
|
||||
|
||||
def __getitem__(cls, name):
|
||||
return cls._member_map_[name]
|
||||
|
||||
def __iter__(cls):
|
||||
return (cls._member_map_[name] for name in cls._member_names_)
|
||||
|
||||
def __reversed__(cls):
|
||||
return (cls._member_map_[name] for name in reversed(cls._member_names_))
|
||||
|
||||
def __len__(cls):
|
||||
return len(cls._member_names_)
|
||||
|
||||
__nonzero__ = __bool__
|
||||
|
||||
def __repr__(cls):
|
||||
return "<enum %r>" % cls.__name__
|
||||
|
||||
def __setattr__(cls, name, value):
|
||||
"""Block attempts to reassign Enum members.
|
||||
|
||||
A simple assignment to the class namespace only changes one of the
|
||||
several possible ways to get an Enum member from the Enum class,
|
||||
resulting in an inconsistent Enumeration.
|
||||
|
||||
"""
|
||||
member_map = cls.__dict__.get('_member_map_', {})
|
||||
if name in member_map:
|
||||
raise AttributeError('Cannot reassign members.')
|
||||
super(EnumMeta, cls).__setattr__(name, value)
|
||||
|
||||
def _create_(cls, class_name, names=None, module=None, type=None, start=1):
|
||||
"""Convenience method to create a new Enum class.
|
||||
|
||||
`names` can be:
|
||||
|
||||
* A string containing member names, separated either with spaces or
|
||||
commas. Values are auto-numbered from 1.
|
||||
* An iterable of member names. Values are auto-numbered from 1.
|
||||
* An iterable of (member name, value) pairs.
|
||||
* A mapping of member name -> value.
|
||||
|
||||
"""
|
||||
if pyver < 3.0:
|
||||
# if class_name is unicode, attempt a conversion to ASCII
|
||||
if isinstance(class_name, unicode):
|
||||
try:
|
||||
class_name = class_name.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
raise TypeError('%r is not representable in ASCII' % class_name)
|
||||
metacls = cls.__class__
|
||||
if type is None:
|
||||
bases = (cls, )
|
||||
else:
|
||||
bases = (type, cls)
|
||||
classdict = metacls.__prepare__(class_name, bases)
|
||||
_order_ = []
|
||||
|
||||
# special processing needed for names?
|
||||
if isinstance(names, basestring):
|
||||
names = names.replace(',', ' ').split()
|
||||
if isinstance(names, (tuple, list)) and isinstance(names[0], basestring):
|
||||
names = [(e, i+start) for (i, e) in enumerate(names)]
|
||||
|
||||
# Here, names is either an iterable of (name, value) or a mapping.
|
||||
item = None # in case names is empty
|
||||
for item in names:
|
||||
if isinstance(item, basestring):
|
||||
member_name, member_value = item, names[item]
|
||||
else:
|
||||
member_name, member_value = item
|
||||
classdict[member_name] = member_value
|
||||
_order_.append(member_name)
|
||||
# only set _order_ in classdict if name/value was not from a mapping
|
||||
if not isinstance(item, basestring):
|
||||
classdict['_order_'] = ' '.join(_order_)
|
||||
enum_class = metacls.__new__(metacls, class_name, bases, classdict)
|
||||
|
||||
# TODO: replace the frame hack if a blessed way to know the calling
|
||||
# module is ever developed
|
||||
if module is None:
|
||||
try:
|
||||
module = _sys._getframe(2).f_globals['__name__']
|
||||
except (AttributeError, ValueError):
|
||||
pass
|
||||
if module is None:
|
||||
_make_class_unpicklable(enum_class)
|
||||
else:
|
||||
enum_class.__module__ = module
|
||||
|
||||
return enum_class
|
||||
|
||||
@staticmethod
|
||||
def _get_mixins_(bases):
|
||||
"""Returns the type for creating enum members, and the first inherited
|
||||
enum class.
|
||||
|
||||
bases: the tuple of bases that was given to __new__
|
||||
|
||||
"""
|
||||
if not bases or Enum is None:
|
||||
return object, Enum
|
||||
|
||||
|
||||
# double check that we are not subclassing a class with existing
|
||||
# enumeration members; while we're at it, see if any other data
|
||||
# type has been mixed in so we can use the correct __new__
|
||||
member_type = first_enum = None
|
||||
for base in bases:
|
||||
if (base is not Enum and
|
||||
issubclass(base, Enum) and
|
||||
base._member_names_):
|
||||
raise TypeError("Cannot extend enumerations")
|
||||
# base is now the last base in bases
|
||||
if not issubclass(base, Enum):
|
||||
raise TypeError("new enumerations must be created as "
|
||||
"`ClassName([mixin_type,] enum_type)`")
|
||||
|
||||
# get correct mix-in type (either mix-in type of Enum subclass, or
|
||||
# first base if last base is Enum)
|
||||
if not issubclass(bases[0], Enum):
|
||||
member_type = bases[0] # first data type
|
||||
first_enum = bases[-1] # enum type
|
||||
else:
|
||||
for base in bases[0].__mro__:
|
||||
# most common: (IntEnum, int, Enum, object)
|
||||
# possible: (<Enum 'AutoIntEnum'>, <Enum 'IntEnum'>,
|
||||
# <class 'int'>, <Enum 'Enum'>,
|
||||
# <class 'object'>)
|
||||
if issubclass(base, Enum):
|
||||
if first_enum is None:
|
||||
first_enum = base
|
||||
else:
|
||||
if member_type is None:
|
||||
member_type = base
|
||||
|
||||
return member_type, first_enum
|
||||
|
||||
if pyver < 3.0:
|
||||
@staticmethod
|
||||
def _find_new_(classdict, member_type, first_enum):
|
||||
"""Returns the __new__ to be used for creating the enum members.
|
||||
|
||||
classdict: the class dictionary given to __new__
|
||||
member_type: the data type whose __new__ will be used by default
|
||||
first_enum: enumeration to check for an overriding __new__
|
||||
|
||||
"""
|
||||
# now find the correct __new__, checking to see of one was defined
|
||||
# by the user; also check earlier enum classes in case a __new__ was
|
||||
# saved as __member_new__
|
||||
__new__ = classdict.get('__new__', None)
|
||||
if __new__:
|
||||
return None, True, True # __new__, save_new, use_args
|
||||
|
||||
N__new__ = getattr(None, '__new__')
|
||||
O__new__ = getattr(object, '__new__')
|
||||
if Enum is None:
|
||||
E__new__ = N__new__
|
||||
else:
|
||||
E__new__ = Enum.__dict__['__new__']
|
||||
# check all possibles for __member_new__ before falling back to
|
||||
# __new__
|
||||
for method in ('__member_new__', '__new__'):
|
||||
for possible in (member_type, first_enum):
|
||||
try:
|
||||
target = possible.__dict__[method]
|
||||
except (AttributeError, KeyError):
|
||||
target = getattr(possible, method, None)
|
||||
if target not in [
|
||||
None,
|
||||
N__new__,
|
||||
O__new__,
|
||||
E__new__,
|
||||
]:
|
||||
if method == '__member_new__':
|
||||
classdict['__new__'] = target
|
||||
return None, False, True
|
||||
if isinstance(target, staticmethod):
|
||||
target = target.__get__(member_type)
|
||||
__new__ = target
|
||||
break
|
||||
if __new__ is not None:
|
||||
break
|
||||
else:
|
||||
__new__ = object.__new__
|
||||
|
||||
# if a non-object.__new__ is used then whatever value/tuple was
|
||||
# assigned to the enum member name will be passed to __new__ and to the
|
||||
# new enum member's __init__
|
||||
if __new__ is object.__new__:
|
||||
use_args = False
|
||||
else:
|
||||
use_args = True
|
||||
|
||||
return __new__, False, use_args
|
||||
else:
|
||||
@staticmethod
|
||||
def _find_new_(classdict, member_type, first_enum):
|
||||
"""Returns the __new__ to be used for creating the enum members.
|
||||
|
||||
classdict: the class dictionary given to __new__
|
||||
member_type: the data type whose __new__ will be used by default
|
||||
first_enum: enumeration to check for an overriding __new__
|
||||
|
||||
"""
|
||||
# now find the correct __new__, checking to see of one was defined
|
||||
# by the user; also check earlier enum classes in case a __new__ was
|
||||
# saved as __member_new__
|
||||
__new__ = classdict.get('__new__', None)
|
||||
|
||||
# should __new__ be saved as __member_new__ later?
|
||||
save_new = __new__ is not None
|
||||
|
||||
if __new__ is None:
|
||||
# check all possibles for __member_new__ before falling back to
|
||||
# __new__
|
||||
for method in ('__member_new__', '__new__'):
|
||||
for possible in (member_type, first_enum):
|
||||
target = getattr(possible, method, None)
|
||||
if target not in (
|
||||
None,
|
||||
None.__new__,
|
||||
object.__new__,
|
||||
Enum.__new__,
|
||||
):
|
||||
__new__ = target
|
||||
break
|
||||
if __new__ is not None:
|
||||
break
|
||||
else:
|
||||
__new__ = object.__new__
|
||||
|
||||
# if a non-object.__new__ is used then whatever value/tuple was
|
||||
# assigned to the enum member name will be passed to __new__ and to the
|
||||
# new enum member's __init__
|
||||
if __new__ is object.__new__:
|
||||
use_args = False
|
||||
else:
|
||||
use_args = True
|
||||
|
||||
return __new__, save_new, use_args
|
||||
|
||||
|
||||
########################################################
|
||||
# In order to support Python 2 and 3 with a single
|
||||
# codebase we have to create the Enum methods separately
|
||||
# and then use the `type(name, bases, dict)` method to
|
||||
# create the class.
|
||||
########################################################
|
||||
temp_enum_dict = {}
|
||||
temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n"
|
||||
|
||||
def __new__(cls, value):
|
||||
# all enum instances are actually created during class construction
|
||||
# without calling this method; this method is called by the metaclass'
|
||||
# __call__ (i.e. Color(3) ), and by pickle
|
||||
if type(value) is cls:
|
||||
# For lookups like Color(Color.red)
|
||||
value = value.value
|
||||
#return value
|
||||
# by-value search for a matching enum member
|
||||
# see if it's in the reverse mapping (for hashable values)
|
||||
try:
|
||||
if value in cls._value2member_map_:
|
||||
return cls._value2member_map_[value]
|
||||
except TypeError:
|
||||
# not there, now do long search -- O(n) behavior
|
||||
for member in cls._member_map_.values():
|
||||
if member.value == value:
|
||||
return member
|
||||
raise ValueError("%s is not a valid %s" % (value, cls.__name__))
|
||||
temp_enum_dict['__new__'] = __new__
|
||||
del __new__
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s.%s: %r>" % (
|
||||
self.__class__.__name__, self._name_, self._value_)
|
||||
temp_enum_dict['__repr__'] = __repr__
|
||||
del __repr__
|
||||
|
||||
def __str__(self):
|
||||
return "%s.%s" % (self.__class__.__name__, self._name_)
|
||||
temp_enum_dict['__str__'] = __str__
|
||||
del __str__
|
||||
|
||||
if pyver >= 3.0:
|
||||
def __dir__(self):
|
||||
added_behavior = [
|
||||
m
|
||||
for cls in self.__class__.mro()
|
||||
for m in cls.__dict__
|
||||
if m[0] != '_' and m not in self._member_map_
|
||||
]
|
||||
return (['__class__', '__doc__', '__module__', ] + added_behavior)
|
||||
temp_enum_dict['__dir__'] = __dir__
|
||||
del __dir__
|
||||
|
||||
def __format__(self, format_spec):
|
||||
# mixed-in Enums should use the mixed-in type's __format__, otherwise
|
||||
# we can get strange results with the Enum name showing up instead of
|
||||
# the value
|
||||
|
||||
# pure Enum branch
|
||||
if self._member_type_ is object:
|
||||
cls = str
|
||||
val = str(self)
|
||||
# mix-in branch
|
||||
else:
|
||||
cls = self._member_type_
|
||||
val = self.value
|
||||
return cls.__format__(val, format_spec)
|
||||
temp_enum_dict['__format__'] = __format__
|
||||
del __format__
|
||||
|
||||
|
||||
####################################
|
||||
# Python's less than 2.6 use __cmp__
|
||||
|
||||
if pyver < 2.6:
|
||||
|
||||
def __cmp__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
if self is other:
|
||||
return 0
|
||||
return -1
|
||||
return NotImplemented
|
||||
raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__cmp__'] = __cmp__
|
||||
del __cmp__
|
||||
|
||||
else:
|
||||
|
||||
def __le__(self, other):
|
||||
raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__le__'] = __le__
|
||||
del __le__
|
||||
|
||||
def __lt__(self, other):
|
||||
raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__lt__'] = __lt__
|
||||
del __lt__
|
||||
|
||||
def __ge__(self, other):
|
||||
raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__ge__'] = __ge__
|
||||
del __ge__
|
||||
|
||||
def __gt__(self, other):
|
||||
raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__))
|
||||
temp_enum_dict['__gt__'] = __gt__
|
||||
del __gt__
|
||||
|
||||
|
||||
def __eq__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
return self is other
|
||||
return NotImplemented
|
||||
temp_enum_dict['__eq__'] = __eq__
|
||||
del __eq__
|
||||
|
||||
def __ne__(self, other):
|
||||
if type(other) is self.__class__:
|
||||
return self is not other
|
||||
return NotImplemented
|
||||
temp_enum_dict['__ne__'] = __ne__
|
||||
del __ne__
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self._name_)
|
||||
temp_enum_dict['__hash__'] = __hash__
|
||||
del __hash__
|
||||
|
||||
def __reduce_ex__(self, proto):
|
||||
return self.__class__, (self._value_, )
|
||||
temp_enum_dict['__reduce_ex__'] = __reduce_ex__
|
||||
del __reduce_ex__
|
||||
|
||||
# _RouteClassAttributeToGetattr is used to provide access to the `name`
|
||||
# and `value` properties of enum members while keeping some measure of
|
||||
# protection from modification, while still allowing for an enumeration
|
||||
# to have members named `name` and `value`. This works because enumeration
|
||||
# members are not set directly on the enum class -- __getattr__ is
|
||||
# used to look them up.
|
||||
|
||||
@_RouteClassAttributeToGetattr
|
||||
def name(self):
|
||||
return self._name_
|
||||
temp_enum_dict['name'] = name
|
||||
del name
|
||||
|
||||
@_RouteClassAttributeToGetattr
|
||||
def value(self):
|
||||
return self._value_
|
||||
temp_enum_dict['value'] = value
|
||||
del value
|
||||
|
||||
@classmethod
|
||||
def _convert(cls, name, module, filter, source=None):
|
||||
"""
|
||||
Create a new Enum subclass that replaces a collection of global constants
|
||||
"""
|
||||
# convert all constants from source (or module) that pass filter() to
|
||||
# a new Enum called name, and export the enum and its members back to
|
||||
# module;
|
||||
# also, replace the __reduce_ex__ method so unpickling works in
|
||||
# previous Python versions
|
||||
module_globals = vars(_sys.modules[module])
|
||||
if source:
|
||||
source = vars(source)
|
||||
else:
|
||||
source = module_globals
|
||||
members = dict((name, value) for name, value in source.items() if filter(name))
|
||||
cls = cls(name, members, module=module)
|
||||
cls.__reduce_ex__ = _reduce_ex_by_name
|
||||
module_globals.update(cls.__members__)
|
||||
module_globals[name] = cls
|
||||
return cls
|
||||
temp_enum_dict['_convert'] = _convert
|
||||
del _convert
|
||||
|
||||
Enum = EnumMeta('Enum', (object, ), temp_enum_dict)
|
||||
del temp_enum_dict
|
||||
|
||||
# Enum has now been created
|
||||
###########################
|
||||
|
||||
class IntEnum(int, Enum):
|
||||
"""Enum where members are also (and must be) ints"""
|
||||
|
||||
def _reduce_ex_by_name(self, proto):
|
||||
return self.name
|
||||
|
||||
def unique(enumeration):
|
||||
"""Class decorator that ensures only unique members exist in an enumeration."""
|
||||
duplicates = []
|
||||
for name, member in enumeration.__members__.items():
|
||||
if name != member.name:
|
||||
duplicates.append((name, member.name))
|
||||
if duplicates:
|
||||
duplicate_names = ', '.join(
|
||||
["%s -> %s" % (alias, name) for (alias, name) in duplicates]
|
||||
)
|
||||
raise ValueError('duplicate names found in %r: %s' %
|
||||
(enumeration, duplicate_names)
|
||||
)
|
||||
return enumeration
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,735 @@
|
||||
``enum`` --- support for enumerations
|
||||
========================================
|
||||
|
||||
.. :synopsis: enumerations are sets of symbolic names bound to unique, constant
|
||||
values.
|
||||
.. :moduleauthor:: Ethan Furman <ethan@stoneleaf.us>
|
||||
.. :sectionauthor:: Barry Warsaw <barry@python.org>,
|
||||
.. :sectionauthor:: Eli Bendersky <eliben@gmail.com>,
|
||||
.. :sectionauthor:: Ethan Furman <ethan@stoneleaf.us>
|
||||
|
||||
----------------
|
||||
|
||||
An enumeration is a set of symbolic names (members) bound to unique, constant
|
||||
values. Within an enumeration, the members can be compared by identity, and
|
||||
the enumeration itself can be iterated over.
|
||||
|
||||
|
||||
Module Contents
|
||||
---------------
|
||||
|
||||
This module defines two enumeration classes that can be used to define unique
|
||||
sets of names and values: ``Enum`` and ``IntEnum``. It also defines
|
||||
one decorator, ``unique``.
|
||||
|
||||
``Enum``
|
||||
|
||||
Base class for creating enumerated constants. See section `Functional API`_
|
||||
for an alternate construction syntax.
|
||||
|
||||
``IntEnum``
|
||||
|
||||
Base class for creating enumerated constants that are also subclasses of ``int``.
|
||||
|
||||
``unique``
|
||||
|
||||
Enum class decorator that ensures only one name is bound to any one value.
|
||||
|
||||
|
||||
Creating an Enum
|
||||
----------------
|
||||
|
||||
Enumerations are created using the ``class`` syntax, which makes them
|
||||
easy to read and write. An alternative creation method is described in
|
||||
`Functional API`_. To define an enumeration, subclass ``Enum`` as
|
||||
follows::
|
||||
|
||||
>>> from enum import Enum
|
||||
>>> class Color(Enum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
... blue = 3
|
||||
|
||||
Note: Nomenclature
|
||||
|
||||
- The class ``Color`` is an *enumeration* (or *enum*)
|
||||
- The attributes ``Color.red``, ``Color.green``, etc., are
|
||||
*enumeration members* (or *enum members*).
|
||||
- The enum members have *names* and *values* (the name of
|
||||
``Color.red`` is ``red``, the value of ``Color.blue`` is
|
||||
``3``, etc.)
|
||||
|
||||
Note:
|
||||
|
||||
Even though we use the ``class`` syntax to create Enums, Enums
|
||||
are not normal Python classes. See `How are Enums different?`_ for
|
||||
more details.
|
||||
|
||||
Enumeration members have human readable string representations::
|
||||
|
||||
>>> print(Color.red)
|
||||
Color.red
|
||||
|
||||
...while their ``repr`` has more information::
|
||||
|
||||
>>> print(repr(Color.red))
|
||||
<Color.red: 1>
|
||||
|
||||
The *type* of an enumeration member is the enumeration it belongs to::
|
||||
|
||||
>>> type(Color.red)
|
||||
<enum 'Color'>
|
||||
>>> isinstance(Color.green, Color)
|
||||
True
|
||||
>>>
|
||||
|
||||
Enum members also have a property that contains just their item name::
|
||||
|
||||
>>> print(Color.red.name)
|
||||
red
|
||||
|
||||
Enumerations support iteration. In Python 3.x definition order is used; in
|
||||
Python 2.x the definition order is not available, but class attribute
|
||||
``__order__`` is supported; otherwise, value order is used::
|
||||
|
||||
>>> class Shake(Enum):
|
||||
... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x
|
||||
... vanilla = 7
|
||||
... chocolate = 4
|
||||
... cookies = 9
|
||||
... mint = 3
|
||||
...
|
||||
>>> for shake in Shake:
|
||||
... print(shake)
|
||||
...
|
||||
Shake.vanilla
|
||||
Shake.chocolate
|
||||
Shake.cookies
|
||||
Shake.mint
|
||||
|
||||
The ``__order__`` attribute is always removed, and in 3.x it is also ignored
|
||||
(order is definition order); however, in the stdlib version it will be ignored
|
||||
but not removed.
|
||||
|
||||
Enumeration members are hashable, so they can be used in dictionaries and sets::
|
||||
|
||||
>>> apples = {}
|
||||
>>> apples[Color.red] = 'red delicious'
|
||||
>>> apples[Color.green] = 'granny smith'
|
||||
>>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'}
|
||||
True
|
||||
|
||||
|
||||
Programmatic access to enumeration members and their attributes
|
||||
---------------------------------------------------------------
|
||||
|
||||
Sometimes it's useful to access members in enumerations programmatically (i.e.
|
||||
situations where ``Color.red`` won't do because the exact color is not known
|
||||
at program-writing time). ``Enum`` allows such access::
|
||||
|
||||
>>> Color(1)
|
||||
<Color.red: 1>
|
||||
>>> Color(3)
|
||||
<Color.blue: 3>
|
||||
|
||||
If you want to access enum members by *name*, use item access::
|
||||
|
||||
>>> Color['red']
|
||||
<Color.red: 1>
|
||||
>>> Color['green']
|
||||
<Color.green: 2>
|
||||
|
||||
If have an enum member and need its ``name`` or ``value``::
|
||||
|
||||
>>> member = Color.red
|
||||
>>> member.name
|
||||
'red'
|
||||
>>> member.value
|
||||
1
|
||||
|
||||
|
||||
Duplicating enum members and values
|
||||
-----------------------------------
|
||||
|
||||
Having two enum members (or any other attribute) with the same name is invalid;
|
||||
in Python 3.x this would raise an error, but in Python 2.x the second member
|
||||
simply overwrites the first::
|
||||
|
||||
>>> # python 2.x
|
||||
>>> class Shape(Enum):
|
||||
... square = 2
|
||||
... square = 3
|
||||
...
|
||||
>>> Shape.square
|
||||
<Shape.square: 3>
|
||||
|
||||
>>> # python 3.x
|
||||
>>> class Shape(Enum):
|
||||
... square = 2
|
||||
... square = 3
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: Attempted to reuse key: 'square'
|
||||
|
||||
However, two enum members are allowed to have the same value. Given two members
|
||||
A and B with the same value (and A defined first), B is an alias to A. By-value
|
||||
lookup of the value of A and B will return A. By-name lookup of B will also
|
||||
return A::
|
||||
|
||||
>>> class Shape(Enum):
|
||||
... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x
|
||||
... square = 2
|
||||
... diamond = 1
|
||||
... circle = 3
|
||||
... alias_for_square = 2
|
||||
...
|
||||
>>> Shape.square
|
||||
<Shape.square: 2>
|
||||
>>> Shape.alias_for_square
|
||||
<Shape.square: 2>
|
||||
>>> Shape(2)
|
||||
<Shape.square: 2>
|
||||
|
||||
|
||||
Allowing aliases is not always desirable. ``unique`` can be used to ensure
|
||||
that none exist in a particular enumeration::
|
||||
|
||||
>>> from enum import unique
|
||||
>>> @unique
|
||||
... class Mistake(Enum):
|
||||
... __order__ = 'one two three four' # only needed in 2.x
|
||||
... one = 1
|
||||
... two = 2
|
||||
... three = 3
|
||||
... four = 3
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: duplicate names found in <enum 'Mistake'>: four -> three
|
||||
|
||||
Iterating over the members of an enum does not provide the aliases::
|
||||
|
||||
>>> list(Shape)
|
||||
[<Shape.square: 2>, <Shape.diamond: 1>, <Shape.circle: 3>]
|
||||
|
||||
The special attribute ``__members__`` is a dictionary mapping names to members.
|
||||
It includes all names defined in the enumeration, including the aliases::
|
||||
|
||||
>>> for name, member in sorted(Shape.__members__.items()):
|
||||
... name, member
|
||||
...
|
||||
('alias_for_square', <Shape.square: 2>)
|
||||
('circle', <Shape.circle: 3>)
|
||||
('diamond', <Shape.diamond: 1>)
|
||||
('square', <Shape.square: 2>)
|
||||
|
||||
The ``__members__`` attribute can be used for detailed programmatic access to
|
||||
the enumeration members. For example, finding all the aliases::
|
||||
|
||||
>>> [name for name, member in Shape.__members__.items() if member.name != name]
|
||||
['alias_for_square']
|
||||
|
||||
Comparisons
|
||||
-----------
|
||||
|
||||
Enumeration members are compared by identity::
|
||||
|
||||
>>> Color.red is Color.red
|
||||
True
|
||||
>>> Color.red is Color.blue
|
||||
False
|
||||
>>> Color.red is not Color.blue
|
||||
True
|
||||
|
||||
Ordered comparisons between enumeration values are *not* supported. Enum
|
||||
members are not integers (but see `IntEnum`_ below)::
|
||||
|
||||
>>> Color.red < Color.blue
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
TypeError: unorderable types: Color() < Color()
|
||||
|
||||
.. warning::
|
||||
|
||||
In Python 2 *everything* is ordered, even though the ordering may not
|
||||
make sense. If you want your enumerations to have a sensible ordering
|
||||
check out the `OrderedEnum`_ recipe below.
|
||||
|
||||
|
||||
Equality comparisons are defined though::
|
||||
|
||||
>>> Color.blue == Color.red
|
||||
False
|
||||
>>> Color.blue != Color.red
|
||||
True
|
||||
>>> Color.blue == Color.blue
|
||||
True
|
||||
|
||||
Comparisons against non-enumeration values will always compare not equal
|
||||
(again, ``IntEnum`` was explicitly designed to behave differently, see
|
||||
below)::
|
||||
|
||||
>>> Color.blue == 2
|
||||
False
|
||||
|
||||
|
||||
Allowed members and attributes of enumerations
|
||||
----------------------------------------------
|
||||
|
||||
The examples above use integers for enumeration values. Using integers is
|
||||
short and handy (and provided by default by the `Functional API`_), but not
|
||||
strictly enforced. In the vast majority of use-cases, one doesn't care what
|
||||
the actual value of an enumeration is. But if the value *is* important,
|
||||
enumerations can have arbitrary values.
|
||||
|
||||
Enumerations are Python classes, and can have methods and special methods as
|
||||
usual. If we have this enumeration::
|
||||
|
||||
>>> class Mood(Enum):
|
||||
... funky = 1
|
||||
... happy = 3
|
||||
...
|
||||
... def describe(self):
|
||||
... # self is the member here
|
||||
... return self.name, self.value
|
||||
...
|
||||
... def __str__(self):
|
||||
... return 'my custom str! {0}'.format(self.value)
|
||||
...
|
||||
... @classmethod
|
||||
... def favorite_mood(cls):
|
||||
... # cls here is the enumeration
|
||||
... return cls.happy
|
||||
|
||||
Then::
|
||||
|
||||
>>> Mood.favorite_mood()
|
||||
<Mood.happy: 3>
|
||||
>>> Mood.happy.describe()
|
||||
('happy', 3)
|
||||
>>> str(Mood.funky)
|
||||
'my custom str! 1'
|
||||
|
||||
The rules for what is allowed are as follows: _sunder_ names (starting and
|
||||
ending with a single underscore) are reserved by enum and cannot be used;
|
||||
all other attributes defined within an enumeration will become members of this
|
||||
enumeration, with the exception of *__dunder__* names and descriptors (methods
|
||||
are also descriptors).
|
||||
|
||||
Note:
|
||||
|
||||
If your enumeration defines ``__new__`` and/or ``__init__`` then
|
||||
whatever value(s) were given to the enum member will be passed into
|
||||
those methods. See `Planet`_ for an example.
|
||||
|
||||
|
||||
Restricted subclassing of enumerations
|
||||
--------------------------------------
|
||||
|
||||
Subclassing an enumeration is allowed only if the enumeration does not define
|
||||
any members. So this is forbidden::
|
||||
|
||||
>>> class MoreColor(Color):
|
||||
... pink = 17
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: Cannot extend enumerations
|
||||
|
||||
But this is allowed::
|
||||
|
||||
>>> class Foo(Enum):
|
||||
... def some_behavior(self):
|
||||
... pass
|
||||
...
|
||||
>>> class Bar(Foo):
|
||||
... happy = 1
|
||||
... sad = 2
|
||||
...
|
||||
|
||||
Allowing subclassing of enums that define members would lead to a violation of
|
||||
some important invariants of types and instances. On the other hand, it makes
|
||||
sense to allow sharing some common behavior between a group of enumerations.
|
||||
(See `OrderedEnum`_ for an example.)
|
||||
|
||||
|
||||
Pickling
|
||||
--------
|
||||
|
||||
Enumerations can be pickled and unpickled::
|
||||
|
||||
>>> from enum.test_enum import Fruit
|
||||
>>> from pickle import dumps, loads
|
||||
>>> Fruit.tomato is loads(dumps(Fruit.tomato, 2))
|
||||
True
|
||||
|
||||
The usual restrictions for pickling apply: picklable enums must be defined in
|
||||
the top level of a module, since unpickling requires them to be importable
|
||||
from that module.
|
||||
|
||||
Note:
|
||||
|
||||
With pickle protocol version 4 (introduced in Python 3.4) it is possible
|
||||
to easily pickle enums nested in other classes.
|
||||
|
||||
|
||||
|
||||
Functional API
|
||||
--------------
|
||||
|
||||
The ``Enum`` class is callable, providing the following functional API::
|
||||
|
||||
>>> Animal = Enum('Animal', 'ant bee cat dog')
|
||||
>>> Animal
|
||||
<enum 'Animal'>
|
||||
>>> Animal.ant
|
||||
<Animal.ant: 1>
|
||||
>>> Animal.ant.value
|
||||
1
|
||||
>>> list(Animal)
|
||||
[<Animal.ant: 1>, <Animal.bee: 2>, <Animal.cat: 3>, <Animal.dog: 4>]
|
||||
|
||||
The semantics of this API resemble ``namedtuple``. The first argument
|
||||
of the call to ``Enum`` is the name of the enumeration.
|
||||
|
||||
The second argument is the *source* of enumeration member names. It can be a
|
||||
whitespace-separated string of names, a sequence of names, a sequence of
|
||||
2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to
|
||||
values. The last two options enable assigning arbitrary values to
|
||||
enumerations; the others auto-assign increasing integers starting with 1. A
|
||||
new class derived from ``Enum`` is returned. In other words, the above
|
||||
assignment to ``Animal`` is equivalent to::
|
||||
|
||||
>>> class Animals(Enum):
|
||||
... ant = 1
|
||||
... bee = 2
|
||||
... cat = 3
|
||||
... dog = 4
|
||||
|
||||
Pickling enums created with the functional API can be tricky as frame stack
|
||||
implementation details are used to try and figure out which module the
|
||||
enumeration is being created in (e.g. it will fail if you use a utility
|
||||
function in separate module, and also may not work on IronPython or Jython).
|
||||
The solution is to specify the module name explicitly as follows::
|
||||
|
||||
>>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__)
|
||||
|
||||
Derived Enumerations
|
||||
--------------------
|
||||
|
||||
IntEnum
|
||||
^^^^^^^
|
||||
|
||||
A variation of ``Enum`` is provided which is also a subclass of
|
||||
``int``. Members of an ``IntEnum`` can be compared to integers;
|
||||
by extension, integer enumerations of different types can also be compared
|
||||
to each other::
|
||||
|
||||
>>> from enum import IntEnum
|
||||
>>> class Shape(IntEnum):
|
||||
... circle = 1
|
||||
... square = 2
|
||||
...
|
||||
>>> class Request(IntEnum):
|
||||
... post = 1
|
||||
... get = 2
|
||||
...
|
||||
>>> Shape == 1
|
||||
False
|
||||
>>> Shape.circle == 1
|
||||
True
|
||||
>>> Shape.circle == Request.post
|
||||
True
|
||||
|
||||
However, they still can't be compared to standard ``Enum`` enumerations::
|
||||
|
||||
>>> class Shape(IntEnum):
|
||||
... circle = 1
|
||||
... square = 2
|
||||
...
|
||||
>>> class Color(Enum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
...
|
||||
>>> Shape.circle == Color.red
|
||||
False
|
||||
|
||||
``IntEnum`` values behave like integers in other ways you'd expect::
|
||||
|
||||
>>> int(Shape.circle)
|
||||
1
|
||||
>>> ['a', 'b', 'c'][Shape.circle]
|
||||
'b'
|
||||
>>> [i for i in range(Shape.square)]
|
||||
[0, 1]
|
||||
|
||||
For the vast majority of code, ``Enum`` is strongly recommended,
|
||||
since ``IntEnum`` breaks some semantic promises of an enumeration (by
|
||||
being comparable to integers, and thus by transitivity to other
|
||||
unrelated enumerations). It should be used only in special cases where
|
||||
there's no other choice; for example, when integer constants are
|
||||
replaced with enumerations and backwards compatibility is required with code
|
||||
that still expects integers.
|
||||
|
||||
|
||||
Others
|
||||
^^^^^^
|
||||
|
||||
While ``IntEnum`` is part of the ``enum`` module, it would be very
|
||||
simple to implement independently::
|
||||
|
||||
class IntEnum(int, Enum):
|
||||
pass
|
||||
|
||||
This demonstrates how similar derived enumerations can be defined; for example
|
||||
a ``StrEnum`` that mixes in ``str`` instead of ``int``.
|
||||
|
||||
Some rules:
|
||||
|
||||
1. When subclassing ``Enum``, mix-in types must appear before
|
||||
``Enum`` itself in the sequence of bases, as in the ``IntEnum``
|
||||
example above.
|
||||
2. While ``Enum`` can have members of any type, once you mix in an
|
||||
additional type, all the members must have values of that type, e.g.
|
||||
``int`` above. This restriction does not apply to mix-ins which only
|
||||
add methods and don't specify another data type such as ``int`` or
|
||||
``str``.
|
||||
3. When another data type is mixed in, the ``value`` attribute is *not the
|
||||
same* as the enum member itself, although it is equivalant and will compare
|
||||
equal.
|
||||
4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and
|
||||
``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for
|
||||
IntEnum) treat the enum member as its mixed-in type.
|
||||
|
||||
Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int``
|
||||
subclasses are printed as strings and not numbers when the ``%d``, ``%i``,
|
||||
or ``%u`` codes are used.
|
||||
5. ``str.__format__`` (or ``format``) will use the mixed-in
|
||||
type's ``__format__``. If the ``Enum``'s ``str`` or
|
||||
``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes.
|
||||
|
||||
|
||||
Decorators
|
||||
----------
|
||||
|
||||
unique
|
||||
^^^^^^
|
||||
|
||||
A ``class`` decorator specifically for enumerations. It searches an
|
||||
enumeration's ``__members__`` gathering any aliases it finds; if any are
|
||||
found ``ValueError`` is raised with the details::
|
||||
|
||||
>>> @unique
|
||||
... class NoDupes(Enum):
|
||||
... first = 'one'
|
||||
... second = 'two'
|
||||
... third = 'two'
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: duplicate names found in <enum 'NoDupes'>: third -> second
|
||||
|
||||
|
||||
Interesting examples
|
||||
--------------------
|
||||
|
||||
While ``Enum`` and ``IntEnum`` are expected to cover the majority of
|
||||
use-cases, they cannot cover them all. Here are recipes for some different
|
||||
types of enumerations that can be used directly, or as examples for creating
|
||||
one's own.
|
||||
|
||||
|
||||
AutoNumber
|
||||
^^^^^^^^^^
|
||||
|
||||
Avoids having to specify the value for each enumeration member::
|
||||
|
||||
>>> class AutoNumber(Enum):
|
||||
... def __new__(cls):
|
||||
... value = len(cls.__members__) + 1
|
||||
... obj = object.__new__(cls)
|
||||
... obj._value_ = value
|
||||
... return obj
|
||||
...
|
||||
>>> class Color(AutoNumber):
|
||||
... __order__ = "red green blue" # only needed in 2.x
|
||||
... red = ()
|
||||
... green = ()
|
||||
... blue = ()
|
||||
...
|
||||
>>> Color.green.value == 2
|
||||
True
|
||||
|
||||
Note:
|
||||
|
||||
The `__new__` method, if defined, is used during creation of the Enum
|
||||
members; it is then replaced by Enum's `__new__` which is used after
|
||||
class creation for lookup of existing members. Due to the way Enums are
|
||||
supposed to behave, there is no way to customize Enum's `__new__`.
|
||||
|
||||
|
||||
UniqueEnum
|
||||
^^^^^^^^^^
|
||||
|
||||
Raises an error if a duplicate member name is found instead of creating an
|
||||
alias::
|
||||
|
||||
>>> class UniqueEnum(Enum):
|
||||
... def __init__(self, *args):
|
||||
... cls = self.__class__
|
||||
... if any(self.value == e.value for e in cls):
|
||||
... a = self.name
|
||||
... e = cls(self.value).name
|
||||
... raise ValueError(
|
||||
... "aliases not allowed in UniqueEnum: %r --> %r"
|
||||
... % (a, e))
|
||||
...
|
||||
>>> class Color(UniqueEnum):
|
||||
... red = 1
|
||||
... green = 2
|
||||
... blue = 3
|
||||
... grene = 2
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green'
|
||||
|
||||
|
||||
OrderedEnum
|
||||
^^^^^^^^^^^
|
||||
|
||||
An ordered enumeration that is not based on ``IntEnum`` and so maintains
|
||||
the normal ``Enum`` invariants (such as not being comparable to other
|
||||
enumerations)::
|
||||
|
||||
>>> class OrderedEnum(Enum):
|
||||
... def __ge__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ >= other._value_
|
||||
... return NotImplemented
|
||||
... def __gt__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ > other._value_
|
||||
... return NotImplemented
|
||||
... def __le__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ <= other._value_
|
||||
... return NotImplemented
|
||||
... def __lt__(self, other):
|
||||
... if self.__class__ is other.__class__:
|
||||
... return self._value_ < other._value_
|
||||
... return NotImplemented
|
||||
...
|
||||
>>> class Grade(OrderedEnum):
|
||||
... __ordered__ = 'A B C D F'
|
||||
... A = 5
|
||||
... B = 4
|
||||
... C = 3
|
||||
... D = 2
|
||||
... F = 1
|
||||
...
|
||||
>>> Grade.C < Grade.A
|
||||
True
|
||||
|
||||
|
||||
Planet
|
||||
^^^^^^
|
||||
|
||||
If ``__new__`` or ``__init__`` is defined the value of the enum member
|
||||
will be passed to those methods::
|
||||
|
||||
>>> class Planet(Enum):
|
||||
... MERCURY = (3.303e+23, 2.4397e6)
|
||||
... VENUS = (4.869e+24, 6.0518e6)
|
||||
... EARTH = (5.976e+24, 6.37814e6)
|
||||
... MARS = (6.421e+23, 3.3972e6)
|
||||
... JUPITER = (1.9e+27, 7.1492e7)
|
||||
... SATURN = (5.688e+26, 6.0268e7)
|
||||
... URANUS = (8.686e+25, 2.5559e7)
|
||||
... NEPTUNE = (1.024e+26, 2.4746e7)
|
||||
... def __init__(self, mass, radius):
|
||||
... self.mass = mass # in kilograms
|
||||
... self.radius = radius # in meters
|
||||
... @property
|
||||
... def surface_gravity(self):
|
||||
... # universal gravitational constant (m3 kg-1 s-2)
|
||||
... G = 6.67300E-11
|
||||
... return G * self.mass / (self.radius * self.radius)
|
||||
...
|
||||
>>> Planet.EARTH.value
|
||||
(5.976e+24, 6378140.0)
|
||||
>>> Planet.EARTH.surface_gravity
|
||||
9.802652743337129
|
||||
|
||||
|
||||
How are Enums different?
|
||||
------------------------
|
||||
|
||||
Enums have a custom metaclass that affects many aspects of both derived Enum
|
||||
classes and their instances (members).
|
||||
|
||||
|
||||
Enum Classes
|
||||
^^^^^^^^^^^^
|
||||
|
||||
The ``EnumMeta`` metaclass is responsible for providing the
|
||||
``__contains__``, ``__dir__``, ``__iter__`` and other methods that
|
||||
allow one to do things with an ``Enum`` class that fail on a typical
|
||||
class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is
|
||||
responsible for ensuring that various other methods on the final ``Enum``
|
||||
class are correct (such as ``__new__``, ``__getnewargs__``,
|
||||
``__str__`` and ``__repr__``).
|
||||
|
||||
.. note::
|
||||
|
||||
``__dir__`` is not changed in the Python 2 line as it messes up some
|
||||
of the decorators included in the stdlib.
|
||||
|
||||
|
||||
Enum Members (aka instances)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The most interesting thing about Enum members is that they are singletons.
|
||||
``EnumMeta`` creates them all while it is creating the ``Enum``
|
||||
class itself, and then puts a custom ``__new__`` in place to ensure
|
||||
that no new ones are ever instantiated by returning only the existing
|
||||
member instances.
|
||||
|
||||
|
||||
Finer Points
|
||||
^^^^^^^^^^^^
|
||||
|
||||
``Enum`` members are instances of an ``Enum`` class, and even though they
|
||||
are accessible as `EnumClass.member1.member2`, they should not be
|
||||
accessed directly from the member as that lookup may fail or, worse,
|
||||
return something besides the ``Enum`` member you were looking for
|
||||
(changed in version 1.1.1)::
|
||||
|
||||
>>> class FieldTypes(Enum):
|
||||
... name = 1
|
||||
... value = 2
|
||||
... size = 3
|
||||
...
|
||||
>>> FieldTypes.value.size
|
||||
<FieldTypes.size: 3>
|
||||
>>> FieldTypes.size.value
|
||||
3
|
||||
|
||||
The ``__members__`` attribute is only available on the class.
|
||||
|
||||
In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being
|
||||
the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if
|
||||
``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python
|
||||
2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified
|
||||
as the ``OrderedDict`` type didn't exist yet.
|
||||
|
||||
If you give your ``Enum`` subclass extra methods, like the `Planet`_
|
||||
class above, those methods will show up in a `dir` of the member,
|
||||
but not of the class::
|
||||
|
||||
>>> dir(Planet)
|
||||
['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS',
|
||||
'VENUS', '__class__', '__doc__', '__members__', '__module__']
|
||||
>>> dir(Planet.EARTH)
|
||||
['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value']
|
||||
|
||||
A ``__new__`` method will only be used for the creation of the
|
||||
``Enum`` members -- after that it is replaced. This means if you wish to
|
||||
change how ``Enum`` members are looked up you either have to write a
|
||||
helper function or a ``classmethod``.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,312 @@
|
||||
import codecs
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import shutil
|
||||
import tempfile
|
||||
import traceback
|
||||
|
||||
import appdirs
|
||||
|
||||
from scandir import scandir
|
||||
|
||||
try:
|
||||
from collections.abc import MutableMapping
|
||||
unicode = str
|
||||
except ImportError:
|
||||
# Python 2 imports
|
||||
from collections import MutableMapping
|
||||
FileNotFoundError = IOError
|
||||
|
||||
from .posixemulation import rename
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileCache(MutableMapping):
|
||||
"""A persistent file cache that is dictionary-like and has a write buffer.
|
||||
|
||||
*appname* is passed to `appdirs <https://pypi.python.org/pypi/appdirs/>`_
|
||||
to determine a system-appropriate location for the cache files. The cache
|
||||
directory used is available via :data:`cache_dir`.
|
||||
|
||||
By default, a write buffer is used, so writing to cache files is not done
|
||||
until :meth:`sync` is explicitly called. This behavior can be changed using
|
||||
the optional *flag* argument.
|
||||
|
||||
.. NOTE::
|
||||
Keys and values are always stored as :class:`bytes` objects. If data
|
||||
serialization is enabled, keys are returned as :class:`str` or
|
||||
:class:`unicode` objects.
|
||||
If data serialization is disabled, keys are returned as a
|
||||
:class:`bytes` object.
|
||||
|
||||
:param str appname: The app/script the cache should be associated with.
|
||||
:param str flag: How the cache should be opened. See below for details.
|
||||
:param mode: The Unix mode for the cache files.
|
||||
:param str keyencoding: The encoding the keys use, defaults to 'utf-8'.
|
||||
This is used if *serialize* is ``False``; the keys are treated as
|
||||
:class:`bytes` objects.
|
||||
:param bool serialize: Whether or not to (de)serialize the values. If a
|
||||
cache is used with a :class:`~shelve.Shelf`, set this to ``False``.
|
||||
:param str app_cache_dir: absolute path to root cache directory to be
|
||||
used in place of system-appropriate location determined by appdirs
|
||||
|
||||
The optional *flag* argument can be:
|
||||
|
||||
+---------+-------------------------------------------+
|
||||
| Value | Meaning |
|
||||
+=========+===========================================+
|
||||
| ``'r'`` | Open existing cache for reading only |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'w'`` | Open existing cache for reading and |
|
||||
| | writing |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'c'`` | Open cache for reading and writing, |
|
||||
| | creating it if it doesn't exist (default) |
|
||||
+---------+-------------------------------------------+
|
||||
| ``'n'`` | Always create a new, empty cache, open |
|
||||
| | for reading and writing |
|
||||
+---------+-------------------------------------------+
|
||||
|
||||
If a ``'s'`` is appended to the *flag* argument, the cache will be opened
|
||||
in sync mode. Writing to the cache will happen immediately and will not be
|
||||
buffered.
|
||||
|
||||
If an application needs to use more than one cache, then it should use
|
||||
subcaches. To create a subcache, append a series of one or more names
|
||||
separated by periods to the application name when creating a
|
||||
:class:`FileCache` object (e.g. ``'appname.subcache'`` or
|
||||
``'appname.subcache.subcache'``).
|
||||
Subcaches are a way for an application to use more than one cache without
|
||||
polluting a user's cache directory. All caches -- main caches or subcaches
|
||||
-- are totally independent. The only aspect in which they are linked is
|
||||
that all of an application's caches exist in the same system directory.
|
||||
Because each cache is independent of every other cache, calling
|
||||
:meth:`delete` on an application's main cache will not delete data in
|
||||
its subcaches.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, appname, flag='c', mode=0o666, keyencoding='utf-8',
|
||||
serialize=True, app_cache_dir=None):
|
||||
"""Initialize a :class:`FileCache` object."""
|
||||
if not isinstance(flag, str):
|
||||
raise TypeError("flag must be str not '{}'".format(type(flag)))
|
||||
elif flag[0] not in 'rwcn':
|
||||
raise ValueError("invalid flag: '{}', first flag must be one of "
|
||||
"'r', 'w', 'c' or 'n'".format(flag))
|
||||
elif len(flag) > 1 and flag[1] != 's':
|
||||
raise ValueError("invalid flag: '{}', second flag must be "
|
||||
"'s'".format(flag))
|
||||
|
||||
appname, subcache = self._parse_appname(appname)
|
||||
if 'cache' in subcache:
|
||||
raise ValueError("invalid subcache name: 'cache'.")
|
||||
self._is_subcache = bool(subcache)
|
||||
|
||||
if not app_cache_dir:
|
||||
app_cache_dir = appdirs.user_cache_dir(appname, appname)
|
||||
subcache_dir = os.path.join(app_cache_dir, *subcache)
|
||||
self.cache_dir = os.path.join(subcache_dir, 'cache')
|
||||
exists = os.path.exists(self.cache_dir)
|
||||
|
||||
if len(flag) > 1 and flag[1] == 's':
|
||||
self._sync = True
|
||||
else:
|
||||
self._sync = False
|
||||
self._buffer = {}
|
||||
|
||||
if exists and 'n' in flag:
|
||||
self.clear()
|
||||
self.create()
|
||||
elif not exists and ('c' in flag or 'n' in flag):
|
||||
self.create()
|
||||
elif not exists:
|
||||
raise FileNotFoundError("no such directory: '{}'".format(
|
||||
self.cache_dir))
|
||||
|
||||
self._flag = 'rb' if 'r' in flag else 'wb'
|
||||
self._mode = mode
|
||||
self._keyencoding = keyencoding
|
||||
self._serialize = serialize
|
||||
|
||||
def _parse_appname(self, appname):
|
||||
"""Splits an appname into the appname and subcache components."""
|
||||
components = appname.split('.')
|
||||
return components[0], components[1:]
|
||||
|
||||
def create(self):
|
||||
"""Create the write buffer and cache directory."""
|
||||
if not self._sync and not hasattr(self, '_buffer'):
|
||||
self._buffer = {}
|
||||
if not os.path.exists(self.cache_dir):
|
||||
os.makedirs(self.cache_dir)
|
||||
|
||||
def clear(self):
|
||||
"""Remove all items from the write buffer and cache.
|
||||
|
||||
The write buffer object and cache directory are not deleted.
|
||||
|
||||
"""
|
||||
self.delete()
|
||||
self.create()
|
||||
|
||||
def delete(self):
|
||||
"""Delete the write buffer and cache directory."""
|
||||
if not self._sync:
|
||||
del self._buffer
|
||||
shutil.rmtree(self.cache_dir)
|
||||
|
||||
def close(self):
|
||||
"""Sync the write buffer, then close the cache.
|
||||
|
||||
If a closed :class:`FileCache` object's methods are called, a
|
||||
:exc:`ValueError` will be raised.
|
||||
|
||||
"""
|
||||
self.sync()
|
||||
self.sync = self.create = self.delete = self._closed
|
||||
self._write_to_file = self._read_to_file = self._closed
|
||||
self._key_to_filename = self._filename_to_key = self._closed
|
||||
self.__getitem__ = self.__setitem__ = self.__delitem__ = self._closed
|
||||
self.__iter__ = self.__len__ = self.__contains__ = self._closed
|
||||
|
||||
def sync(self):
|
||||
"""Sync the write buffer with the cache files and clear the buffer.
|
||||
|
||||
If the :class:`FileCache` object was opened with the optional ``'s'``
|
||||
*flag* argument, then calling :meth:`sync` will do nothing.
|
||||
"""
|
||||
if self._sync:
|
||||
return # opened in sync mode, so skip the manual sync
|
||||
self._sync = True
|
||||
for ekey in self._buffer:
|
||||
filename = self._key_to_filename(ekey)
|
||||
try:
|
||||
self._write_to_file(filename, self._buffer[ekey])
|
||||
except:
|
||||
logger.error("Couldn't write content from %r to cache file: %r: %s", ekey, filename,
|
||||
traceback.format_exc())
|
||||
self._buffer.clear()
|
||||
self._sync = False
|
||||
|
||||
def _closed(self, *args, **kwargs):
|
||||
"""Filler method for closed cache methods."""
|
||||
raise ValueError("invalid operation on closed cache")
|
||||
|
||||
def _encode_key(self, key):
|
||||
"""Encode key using *hex_codec* for constructing a cache filename.
|
||||
|
||||
Keys are implicitly converted to :class:`bytes` if passed as
|
||||
:class:`str`.
|
||||
|
||||
"""
|
||||
if isinstance(key, str) or isinstance(key, unicode):
|
||||
key = key.encode(self._keyencoding)
|
||||
elif not isinstance(key, bytes):
|
||||
raise TypeError("key must be bytes or str")
|
||||
return codecs.encode(key, 'hex_codec').decode(self._keyencoding)
|
||||
|
||||
def _decode_key(self, key):
|
||||
"""Decode key using hex_codec to retrieve the original key.
|
||||
|
||||
Keys are returned as :class:`str` if serialization is enabled.
|
||||
Keys are returned as :class:`bytes` if serialization is disabled.
|
||||
|
||||
"""
|
||||
bkey = codecs.decode(key.encode(self._keyencoding), 'hex_codec')
|
||||
return bkey.decode(self._keyencoding) if self._serialize else bkey
|
||||
|
||||
def _dumps(self, value):
|
||||
return value if not self._serialize else pickle.dumps(value)
|
||||
|
||||
def _loads(self, value):
|
||||
return value if not self._serialize else pickle.loads(value)
|
||||
|
||||
def _key_to_filename(self, key):
|
||||
"""Convert an encoded key to an absolute cache filename."""
|
||||
return os.path.join(self.cache_dir, key)
|
||||
|
||||
def _filename_to_key(self, absfilename):
|
||||
"""Convert an absolute cache filename to a key name."""
|
||||
return os.path.split(absfilename)[1]
|
||||
|
||||
def _all_filenames(self):
|
||||
"""Return a list of absolute cache filenames"""
|
||||
try:
|
||||
for entry in scandir(self.cache_dir):
|
||||
if entry.is_file(follow_symlinks=False):
|
||||
yield os.path.join(self.cache_dir, entry.name)
|
||||
except (FileNotFoundError, OSError):
|
||||
raise StopIteration
|
||||
|
||||
def _all_keys(self):
|
||||
"""Return a list of all encoded key names."""
|
||||
file_keys = [self._filename_to_key(fn) for fn in self._all_filenames()]
|
||||
if self._sync:
|
||||
return set(file_keys)
|
||||
else:
|
||||
return set(file_keys + list(self._buffer))
|
||||
|
||||
def _write_to_file(self, filename, bytesvalue):
|
||||
"""Write bytesvalue to filename."""
|
||||
fh, tmp = tempfile.mkstemp()
|
||||
with os.fdopen(fh, self._flag) as f:
|
||||
f.write(self._dumps(bytesvalue))
|
||||
rename(tmp, filename)
|
||||
os.chmod(filename, self._mode)
|
||||
|
||||
def _read_from_file(self, filename):
|
||||
"""Read data from filename."""
|
||||
try:
|
||||
with open(filename, 'rb') as f:
|
||||
return self._loads(f.read())
|
||||
except (IOError, OSError):
|
||||
logger.warning('Error opening file: {}'.format(filename))
|
||||
return None
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
ekey = self._encode_key(key)
|
||||
if not self._sync:
|
||||
self._buffer[ekey] = value
|
||||
else:
|
||||
filename = self._key_to_filename(ekey)
|
||||
self._write_to_file(filename, value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
if not self._sync:
|
||||
try:
|
||||
return self._buffer[ekey]
|
||||
except KeyError:
|
||||
pass
|
||||
filename = self._key_to_filename(ekey)
|
||||
if filename not in self._all_filenames():
|
||||
raise KeyError(key)
|
||||
return self._read_from_file(filename)
|
||||
|
||||
def __delitem__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
filename = self._key_to_filename(ekey)
|
||||
if not self._sync:
|
||||
try:
|
||||
del self._buffer[ekey]
|
||||
except KeyError:
|
||||
if filename not in self._all_filenames():
|
||||
raise KeyError(key)
|
||||
try:
|
||||
os.remove(filename)
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
|
||||
def __iter__(self):
|
||||
for key in self._all_keys():
|
||||
yield self._decode_key(key)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._all_keys())
|
||||
|
||||
def __contains__(self, key):
|
||||
ekey = self._encode_key(key)
|
||||
return ekey in self._all_keys()
|
||||
@@ -0,0 +1,113 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
r"""
|
||||
werkzeug.posixemulation
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Provides a POSIX emulation for some features that are relevant to
|
||||
web applications. The main purpose is to simplify support for
|
||||
systems such as Windows NT that are not 100% POSIX compatible.
|
||||
|
||||
Currently this only implements a :func:`rename` function that
|
||||
follows POSIX semantics. Eg: if the target file already exists it
|
||||
will be replaced without asking.
|
||||
|
||||
This module was introduced in 0.6.1 and is not a public interface.
|
||||
It might become one in later versions of Werkzeug.
|
||||
|
||||
:copyright: (c) 2013 by the Werkzeug Team, see AUTHORS for more details.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import errno
|
||||
import time
|
||||
import random
|
||||
import shutil
|
||||
|
||||
|
||||
can_rename_open_file = False
|
||||
if os.name == 'nt': # pragma: no cover
|
||||
_rename = lambda src, dst: False
|
||||
_rename_atomic = lambda src, dst: False
|
||||
if sys.version_info >= (3, 0):
|
||||
unicode = str
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
|
||||
_MOVEFILE_REPLACE_EXISTING = 0x1
|
||||
_MOVEFILE_WRITE_THROUGH = 0x8
|
||||
_MoveFileEx = ctypes.windll.kernel32.MoveFileExW
|
||||
|
||||
def _rename(src, dst):
|
||||
if not isinstance(src, unicode):
|
||||
src = unicode(src, sys.getfilesystemencoding())
|
||||
if not isinstance(dst, unicode):
|
||||
dst = unicode(dst, sys.getfilesystemencoding())
|
||||
if _rename_atomic(src, dst):
|
||||
return True
|
||||
retry = 0
|
||||
rv = False
|
||||
while not rv and retry < 100:
|
||||
rv = _MoveFileEx(src, dst, _MOVEFILE_REPLACE_EXISTING |
|
||||
_MOVEFILE_WRITE_THROUGH)
|
||||
if not rv:
|
||||
time.sleep(0.001)
|
||||
retry += 1
|
||||
return rv
|
||||
|
||||
# new in Vista and Windows Server 2008
|
||||
_CreateTransaction = ctypes.windll.ktmw32.CreateTransaction
|
||||
_CommitTransaction = ctypes.windll.ktmw32.CommitTransaction
|
||||
_MoveFileTransacted = ctypes.windll.kernel32.MoveFileTransactedW
|
||||
_CloseHandle = ctypes.windll.kernel32.CloseHandle
|
||||
can_rename_open_file = True
|
||||
|
||||
def _rename_atomic(src, dst):
|
||||
ta = _CreateTransaction(None, 0, 0, 0, 0, 1000, 'Werkzeug rename')
|
||||
if ta == -1:
|
||||
return False
|
||||
try:
|
||||
retry = 0
|
||||
rv = False
|
||||
while not rv and retry < 100:
|
||||
rv = _MoveFileTransacted(src, dst, None, None,
|
||||
_MOVEFILE_REPLACE_EXISTING |
|
||||
_MOVEFILE_WRITE_THROUGH, ta)
|
||||
if rv:
|
||||
rv = _CommitTransaction(ta)
|
||||
break
|
||||
else:
|
||||
time.sleep(0.001)
|
||||
retry += 1
|
||||
return rv
|
||||
finally:
|
||||
_CloseHandle(ta)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def rename(src, dst):
|
||||
# Try atomic or pseudo-atomic rename
|
||||
if _rename(src, dst):
|
||||
return
|
||||
# Fall back to "move away and replace"
|
||||
try:
|
||||
os.rename(src, dst)
|
||||
except OSError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
old = "%s-%08x" % (dst, random.randint(0, sys.maxint))
|
||||
os.rename(dst, old)
|
||||
os.rename(src, dst)
|
||||
try:
|
||||
os.unlink(old)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
"""
|
||||
If dst on current filesystem then use
|
||||
atomic rename. Otherwise, fall back to a
|
||||
non-atomic copy and remove.
|
||||
"""
|
||||
rename = shutil.move
|
||||
can_rename_open_file = True
|
||||
@@ -1,163 +0,0 @@
|
||||
Change Log
|
||||
----------
|
||||
|
||||
0.999
|
||||
~~~~~
|
||||
|
||||
Released on December 23, 2013
|
||||
|
||||
* Fix #127: add work-around for CPython issue #20007: .read(0) on
|
||||
http.client.HTTPResponse drops the rest of the content.
|
||||
|
||||
* Fix #115: lxml treewalker can now deal with fragments containing, at
|
||||
their root level, text nodes with non-ASCII characters on Python 2.
|
||||
|
||||
|
||||
0.99
|
||||
~~~~
|
||||
|
||||
Released on September 10, 2013
|
||||
|
||||
* No library changes from 1.0b3; released as 0.99 as pip has changed
|
||||
behaviour from 1.4 to avoid installing pre-release versions per
|
||||
PEP 440.
|
||||
|
||||
|
||||
1.0b3
|
||||
~~~~~
|
||||
|
||||
Released on July 24, 2013
|
||||
|
||||
* Removed ``RecursiveTreeWalker`` from ``treewalkers._base``. Any
|
||||
implementation using it should be moved to
|
||||
``NonRecursiveTreeWalker``, as everything bundled with html5lib has
|
||||
for years.
|
||||
|
||||
* Fix #67 so that ``BufferedStream`` to correctly returns a bytes
|
||||
object, thereby fixing any case where html5lib is passed a
|
||||
non-seekable RawIOBase-like object.
|
||||
|
||||
|
||||
1.0b2
|
||||
~~~~~
|
||||
|
||||
Released on June 27, 2013
|
||||
|
||||
* Removed reordering of attributes within the serializer. There is now
|
||||
an ``alphabetical_attributes`` option which preserves the previous
|
||||
behaviour through a new filter. This allows attribute order to be
|
||||
preserved through html5lib if the tree builder preserves order.
|
||||
|
||||
* Removed ``dom2sax`` from DOM treebuilders. It has been replaced by
|
||||
``treeadapters.sax.to_sax`` which is generic and supports any
|
||||
treewalker; it also resolves all known bugs with ``dom2sax``.
|
||||
|
||||
* Fix treewalker assertions on hitting bytes strings on
|
||||
Python 2. Previous to 1.0b1, treewalkers coped with mixed
|
||||
bytes/unicode data on Python 2; this reintroduces this prior
|
||||
behaviour on Python 2. Behaviour is unchanged on Python 3.
|
||||
|
||||
|
||||
1.0b1
|
||||
~~~~~
|
||||
|
||||
Released on May 17, 2013
|
||||
|
||||
* Implementation updated to implement the `HTML specification
|
||||
<http://www.whatwg.org/specs/web-apps/current-work/>`_ as of 5th May
|
||||
2013 (`SVN <http://svn.whatwg.org/webapps/>`_ revision r7867).
|
||||
|
||||
* Python 3.2+ supported in a single codebase using the ``six`` library.
|
||||
|
||||
* Removed support for Python 2.5 and older.
|
||||
|
||||
* Removed the deprecated Beautiful Soup 3 treebuilder.
|
||||
``beautifulsoup4`` can use ``html5lib`` as a parser instead. Note that
|
||||
since it doesn't support namespaces, foreign content like SVG and
|
||||
MathML is parsed incorrectly.
|
||||
|
||||
* Removed ``simpletree`` from the package. The default tree builder is
|
||||
now ``etree`` (using the ``xml.etree.cElementTree`` implementation if
|
||||
available, and ``xml.etree.ElementTree`` otherwise).
|
||||
|
||||
* Removed the ``XHTMLSerializer`` as it never actually guaranteed its
|
||||
output was well-formed XML, and hence provided little of use.
|
||||
|
||||
* Removed default DOM treebuilder, so ``html5lib.treebuilders.dom`` is no
|
||||
longer supported. ``html5lib.treebuilders.getTreeBuilder("dom")`` will
|
||||
return the default DOM treebuilder, which uses ``xml.dom.minidom``.
|
||||
|
||||
* Optional heuristic character encoding detection now based on
|
||||
``charade`` for Python 2.6 - 3.3 compatibility.
|
||||
|
||||
* Optional ``Genshi`` treewalker support fixed.
|
||||
|
||||
* Many bugfixes, including:
|
||||
|
||||
* #33: null in attribute value breaks XML AttValue;
|
||||
|
||||
* #4: nested, indirect descendant, <button> causes infinite loop;
|
||||
|
||||
* `Google Code 215
|
||||
<http://code.google.com/p/html5lib/issues/detail?id=215>`_: Properly
|
||||
detect seekable streams;
|
||||
|
||||
* `Google Code 206
|
||||
<http://code.google.com/p/html5lib/issues/detail?id=206>`_: add
|
||||
support for <video preload=...>, <audio preload=...>;
|
||||
|
||||
* `Google Code 205
|
||||
<http://code.google.com/p/html5lib/issues/detail?id=205>`_: add
|
||||
support for <video poster=...>;
|
||||
|
||||
* `Google Code 202
|
||||
<http://code.google.com/p/html5lib/issues/detail?id=202>`_: Unicode
|
||||
file breaks InputStream.
|
||||
|
||||
* Source code is now mostly PEP 8 compliant.
|
||||
|
||||
* Test harness has been improved and now depends on ``nose``.
|
||||
|
||||
* Documentation updated and moved to http://html5lib.readthedocs.org/.
|
||||
|
||||
|
||||
0.95
|
||||
~~~~
|
||||
|
||||
Released on February 11, 2012
|
||||
|
||||
|
||||
0.90
|
||||
~~~~
|
||||
|
||||
Released on January 17, 2010
|
||||
|
||||
|
||||
0.11.1
|
||||
~~~~~~
|
||||
|
||||
Released on June 12, 2008
|
||||
|
||||
|
||||
0.11
|
||||
~~~~
|
||||
|
||||
Released on June 10, 2008
|
||||
|
||||
|
||||
0.10
|
||||
~~~~
|
||||
|
||||
Released on October 7, 2007
|
||||
|
||||
|
||||
0.9
|
||||
~~~
|
||||
|
||||
Released on March 11, 2007
|
||||
|
||||
|
||||
0.2
|
||||
~~~
|
||||
|
||||
Released on January 8, 2007
|
||||
@@ -1,157 +0,0 @@
|
||||
html5lib
|
||||
========
|
||||
|
||||
.. image:: https://travis-ci.org/html5lib/html5lib-python.png?branch=master
|
||||
:target: https://travis-ci.org/html5lib/html5lib-python
|
||||
|
||||
html5lib is a pure-python library for parsing HTML. It is designed to
|
||||
conform to the WHATWG HTML specification, as is implemented by all major
|
||||
web browsers.
|
||||
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
Simple usage follows this pattern:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
with open("mydocument.html", "rb") as f:
|
||||
document = html5lib.parse(f)
|
||||
|
||||
or:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
document = html5lib.parse("<p>Hello World!")
|
||||
|
||||
By default, the ``document`` will be an ``xml.etree`` element instance.
|
||||
Whenever possible, html5lib chooses the accelerated ``ElementTree``
|
||||
implementation (i.e. ``xml.etree.cElementTree`` on Python 2.x).
|
||||
|
||||
Two other tree types are supported: ``xml.dom.minidom`` and
|
||||
``lxml.etree``. To use an alternative format, specify the name of
|
||||
a treebuilder:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
with open("mydocument.html", "rb") as f:
|
||||
lxml_etree_document = html5lib.parse(f, treebuilder="lxml")
|
||||
|
||||
When using with ``urllib2`` (Python 2), the charset from HTTP should be
|
||||
pass into html5lib as follows:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from contextlib import closing
|
||||
from urllib2 import urlopen
|
||||
import html5lib
|
||||
|
||||
with closing(urlopen("http://example.com/")) as f:
|
||||
document = html5lib.parse(f, encoding=f.info().getparam("charset"))
|
||||
|
||||
When using with ``urllib.request`` (Python 3), the charset from HTTP
|
||||
should be pass into html5lib as follows:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from urllib.request import urlopen
|
||||
import html5lib
|
||||
|
||||
with urlopen("http://example.com/") as f:
|
||||
document = html5lib.parse(f, encoding=f.info().get_content_charset())
|
||||
|
||||
To have more control over the parser, create a parser object explicitly.
|
||||
For instance, to make the parser raise exceptions on parse errors, use:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
with open("mydocument.html", "rb") as f:
|
||||
parser = html5lib.HTMLParser(strict=True)
|
||||
document = parser.parse(f)
|
||||
|
||||
When you're instantiating parser objects explicitly, pass a treebuilder
|
||||
class as the ``tree`` keyword argument to use an alternative document
|
||||
format:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
|
||||
minidom_document = parser.parse("<p>Hello World!")
|
||||
|
||||
More documentation is available at http://html5lib.readthedocs.org/.
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
html5lib works on CPython 2.6+, CPython 3.2+ and PyPy. To install it,
|
||||
use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ pip install html5lib
|
||||
|
||||
|
||||
Optional Dependencies
|
||||
---------------------
|
||||
|
||||
The following third-party libraries may be used for additional
|
||||
functionality:
|
||||
|
||||
- ``datrie`` can be used to improve parsing performance (though in
|
||||
almost all cases the improvement is marginal);
|
||||
|
||||
- ``lxml`` is supported as a tree format (for both building and
|
||||
walking) under CPython (but *not* PyPy where it is known to cause
|
||||
segfaults);
|
||||
|
||||
- ``genshi`` has a treewalker (but not builder); and
|
||||
|
||||
- ``charade`` can be used as a fallback when character encoding cannot
|
||||
be determined; ``chardet``, from which it was forked, can also be used
|
||||
on Python 2.
|
||||
|
||||
- ``ordereddict`` can be used under Python 2.6
|
||||
(``collections.OrderedDict`` is used instead on later versions) to
|
||||
serialize attributes in alphabetical order.
|
||||
|
||||
|
||||
Bugs
|
||||
----
|
||||
|
||||
Please report any bugs on the `issue tracker
|
||||
<https://github.com/html5lib/html5lib-python/issues>`_.
|
||||
|
||||
|
||||
Tests
|
||||
-----
|
||||
|
||||
Unit tests require the ``nose`` library and can be run using the
|
||||
``nosetests`` command in the root directory; ``ordereddict`` is
|
||||
required under Python 2.6. All should pass.
|
||||
|
||||
Test data are contained in a separate `html5lib-tests
|
||||
<https://github.com/html5lib/html5lib-tests>`_ repository and included
|
||||
as a submodule, thus for git checkouts they must be initialized::
|
||||
|
||||
$ git submodule init
|
||||
$ git submodule update
|
||||
|
||||
If you have all compatible Python implementations available on your
|
||||
system, you can run tests on all of them using the ``tox`` utility,
|
||||
which can be found on PyPI.
|
||||
|
||||
|
||||
Questions?
|
||||
----------
|
||||
|
||||
There's a mailing list available for support on Google Groups,
|
||||
`html5lib-discuss <http://groups.google.com/group/html5lib-discuss>`_,
|
||||
though you may get a quicker response asking on IRC in `#whatwg on
|
||||
irc.freenode.net <http://wiki.whatwg.org/wiki/IRC>`_.
|
||||
@@ -1,14 +1,23 @@
|
||||
"""
|
||||
HTML parsing library based on the WHATWG "HTML5"
|
||||
specification. The parser is designed to be compatible with existing
|
||||
HTML found in the wild and implements well-defined error recovery that
|
||||
HTML parsing library based on the `WHATWG HTML specification
|
||||
<https://whatwg.org/html>`_. The parser is designed to be compatible with
|
||||
existing HTML found in the wild and implements well-defined error recovery that
|
||||
is largely compatible with modern desktop web browsers.
|
||||
|
||||
Example usage:
|
||||
Example usage::
|
||||
|
||||
import html5lib
|
||||
f = open("my_document.html")
|
||||
tree = html5lib.parse(f)
|
||||
import html5lib
|
||||
with open("my_document.html", "rb") as f:
|
||||
tree = html5lib.parse(f)
|
||||
|
||||
For convenience, this module re-exports the following names:
|
||||
|
||||
* :func:`~.html5parser.parse`
|
||||
* :func:`~.html5parser.parseFragment`
|
||||
* :class:`~.html5parser.HTMLParser`
|
||||
* :func:`~.treebuilders.getTreeBuilder`
|
||||
* :func:`~.treewalkers.getTreeWalker`
|
||||
* :func:`~.serializer.serialize`
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
@@ -20,4 +29,7 @@ from .serializer import serialize
|
||||
|
||||
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
||||
"getTreeWalker", "serialize"]
|
||||
__version__ = "0.999"
|
||||
|
||||
# this has to be at the top level, see how setup.py parses this
|
||||
#: Distribution version number.
|
||||
__version__ = "1.0.1"
|
||||
|
||||
+9
-6
@@ -175,18 +175,18 @@ def escapeRegexp(string):
|
||||
return string
|
||||
|
||||
# output from the above
|
||||
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
|
||||
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
|
||||
|
||||
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
|
||||
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
|
||||
|
||||
# Simpler things
|
||||
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
|
||||
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
|
||||
|
||||
|
||||
class InfosetFilter(object):
|
||||
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
|
||||
|
||||
def __init__(self, replaceChars=None,
|
||||
def __init__(self,
|
||||
dropXmlnsLocalName=False,
|
||||
dropXmlnsAttrNs=False,
|
||||
preventDoubleDashComments=False,
|
||||
@@ -217,7 +217,7 @@ class InfosetFilter(object):
|
||||
else:
|
||||
return self.toXmlName(name)
|
||||
|
||||
def coerceElement(self, name, namespace=None):
|
||||
def coerceElement(self, name):
|
||||
return self.toXmlName(name)
|
||||
|
||||
def coerceComment(self, data):
|
||||
@@ -225,11 +225,14 @@ class InfosetFilter(object):
|
||||
while "--" in data:
|
||||
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
|
||||
data = data.replace("--", "- -")
|
||||
if data.endswith("-"):
|
||||
warnings.warn("Comments cannot end in a dash", DataLossWarning)
|
||||
data += " "
|
||||
return data
|
||||
|
||||
def coerceCharacters(self, data):
|
||||
if self.replaceFormFeedCharacters:
|
||||
for i in range(data.count("\x0C")):
|
||||
for _ in range(data.count("\x0C")):
|
||||
warnings.warn("Text cannot contain U+000C", DataLossWarning)
|
||||
data = data.replace("\x0C", " ")
|
||||
# Other non-xml characters
|
||||
+134
-97
@@ -1,13 +1,16 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from six import text_type
|
||||
from six.moves import http_client
|
||||
|
||||
from six import text_type, binary_type
|
||||
from six.moves import http_client, urllib
|
||||
|
||||
import codecs
|
||||
import re
|
||||
|
||||
import webencodings
|
||||
|
||||
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
|
||||
from .constants import encodings, ReparseException
|
||||
from . import utils
|
||||
from .constants import _ReparseException
|
||||
from . import _utils
|
||||
|
||||
from io import StringIO
|
||||
|
||||
@@ -16,19 +19,26 @@ try:
|
||||
except ImportError:
|
||||
BytesIO = StringIO
|
||||
|
||||
try:
|
||||
from io import BufferedIOBase
|
||||
except ImportError:
|
||||
class BufferedIOBase(object):
|
||||
pass
|
||||
|
||||
# Non-unicode versions of constants for use in the pre-parser
|
||||
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
|
||||
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
|
||||
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
|
||||
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
|
||||
|
||||
invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
|
||||
|
||||
invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa
|
||||
|
||||
if _utils.supports_lone_surrogates:
|
||||
# Use one extra step of indirection and create surrogates with
|
||||
# eval. Not using this indirection would introduce an illegal
|
||||
# unicode literal on platforms not supporting such lone
|
||||
# surrogates.
|
||||
assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
|
||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
|
||||
eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used
|
||||
"]")
|
||||
else:
|
||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
|
||||
|
||||
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
||||
@@ -38,7 +48,7 @@ non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
||||
0x10FFFE, 0x10FFFF])
|
||||
|
||||
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
|
||||
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
|
||||
|
||||
# Cache for charsUntil()
|
||||
charsUntilRegEx = {}
|
||||
@@ -118,10 +128,13 @@ class BufferedStream(object):
|
||||
return b"".join(rv)
|
||||
|
||||
|
||||
def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
|
||||
if isinstance(source, http_client.HTTPResponse):
|
||||
# Work around Python bug #20007: read(0) closes the connection.
|
||||
# http://bugs.python.org/issue20007
|
||||
def HTMLInputStream(source, **kwargs):
|
||||
# Work around Python bug #20007: read(0) closes the connection.
|
||||
# http://bugs.python.org/issue20007
|
||||
if (isinstance(source, http_client.HTTPResponse) or
|
||||
# Also check for addinfourl wrapping HTTPResponse
|
||||
(isinstance(source, urllib.response.addbase) and
|
||||
isinstance(source.fp, http_client.HTTPResponse))):
|
||||
isUnicode = False
|
||||
elif hasattr(source, "read"):
|
||||
isUnicode = isinstance(source.read(0), text_type)
|
||||
@@ -129,12 +142,13 @@ def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
|
||||
isUnicode = isinstance(source, text_type)
|
||||
|
||||
if isUnicode:
|
||||
if encoding is not None:
|
||||
raise TypeError("Cannot explicitly set an encoding with a unicode string")
|
||||
encodings = [x for x in kwargs if x.endswith("_encoding")]
|
||||
if encodings:
|
||||
raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
|
||||
|
||||
return HTMLUnicodeInputStream(source)
|
||||
return HTMLUnicodeInputStream(source, **kwargs)
|
||||
else:
|
||||
return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
|
||||
return HTMLBinaryInputStream(source, **kwargs)
|
||||
|
||||
|
||||
class HTMLUnicodeInputStream(object):
|
||||
@@ -160,22 +174,21 @@ class HTMLUnicodeInputStream(object):
|
||||
regardless of any BOM or later declaration (such as in a meta
|
||||
element)
|
||||
|
||||
parseMeta - Look for a <meta> element containing encoding information
|
||||
|
||||
"""
|
||||
|
||||
# Craziness
|
||||
if len("\U0010FFFF") == 1:
|
||||
if not _utils.supports_lone_surrogates:
|
||||
# Such platforms will have already checked for such
|
||||
# surrogate errors, so no need to do this checking.
|
||||
self.reportCharacterErrors = None
|
||||
elif len("\U0010FFFF") == 1:
|
||||
self.reportCharacterErrors = self.characterErrorsUCS4
|
||||
self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
|
||||
else:
|
||||
self.reportCharacterErrors = self.characterErrorsUCS2
|
||||
self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")
|
||||
|
||||
# List of where new lines occur
|
||||
self.newLines = [0]
|
||||
|
||||
self.charEncoding = ("utf-8", "certain")
|
||||
self.charEncoding = (lookupEncoding("utf-8"), "certain")
|
||||
self.dataStream = self.openStream(source)
|
||||
|
||||
self.reset()
|
||||
@@ -265,12 +278,10 @@ class HTMLUnicodeInputStream(object):
|
||||
self._bufferedCharacter = data[-1]
|
||||
data = data[:-1]
|
||||
|
||||
self.reportCharacterErrors(data)
|
||||
if self.reportCharacterErrors:
|
||||
self.reportCharacterErrors(data)
|
||||
|
||||
# Replace invalid characters
|
||||
# Note U+0000 is dealt with in the tokenizer
|
||||
data = self.replaceCharactersRegexp.sub("\ufffd", data)
|
||||
|
||||
data = data.replace("\r\n", "\n")
|
||||
data = data.replace("\r", "\n")
|
||||
|
||||
@@ -280,7 +291,7 @@ class HTMLUnicodeInputStream(object):
|
||||
return True
|
||||
|
||||
def characterErrorsUCS4(self, data):
|
||||
for i in range(len(invalid_unicode_re.findall(data))):
|
||||
for _ in range(len(invalid_unicode_re.findall(data))):
|
||||
self.errors.append("invalid-codepoint")
|
||||
|
||||
def characterErrorsUCS2(self, data):
|
||||
@@ -293,9 +304,9 @@ class HTMLUnicodeInputStream(object):
|
||||
codepoint = ord(match.group())
|
||||
pos = match.start()
|
||||
# Pretty sure there should be endianness issues here
|
||||
if utils.isSurrogatePair(data[pos:pos + 2]):
|
||||
if _utils.isSurrogatePair(data[pos:pos + 2]):
|
||||
# We have a surrogate pair!
|
||||
char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
|
||||
char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
|
||||
if char_val in non_bmp_invalid_codepoints:
|
||||
self.errors.append("invalid-codepoint")
|
||||
skip = True
|
||||
@@ -378,7 +389,9 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
|
||||
def __init__(self, source, override_encoding=None, transport_encoding=None,
|
||||
same_origin_parent_encoding=None, likely_encoding=None,
|
||||
default_encoding="windows-1252", useChardet=True):
|
||||
"""Initialises the HTMLInputStream.
|
||||
|
||||
HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
||||
@@ -391,8 +404,6 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
regardless of any BOM or later declaration (such as in a meta
|
||||
element)
|
||||
|
||||
parseMeta - Look for a <meta> element containing encoding information
|
||||
|
||||
"""
|
||||
# Raw Stream - for unicode objects this will encode to utf-8 and set
|
||||
# self.charEncoding as appropriate
|
||||
@@ -400,27 +411,28 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
|
||||
HTMLUnicodeInputStream.__init__(self, self.rawStream)
|
||||
|
||||
self.charEncoding = (codecName(encoding), "certain")
|
||||
|
||||
# Encoding Information
|
||||
# Number of bytes to use when looking for a meta element with
|
||||
# encoding information
|
||||
self.numBytesMeta = 512
|
||||
self.numBytesMeta = 1024
|
||||
# Number of bytes to use when using detecting encoding using chardet
|
||||
self.numBytesChardet = 100
|
||||
# Encoding to use if no other information can be found
|
||||
self.defaultEncoding = "windows-1252"
|
||||
# Things from args
|
||||
self.override_encoding = override_encoding
|
||||
self.transport_encoding = transport_encoding
|
||||
self.same_origin_parent_encoding = same_origin_parent_encoding
|
||||
self.likely_encoding = likely_encoding
|
||||
self.default_encoding = default_encoding
|
||||
|
||||
# Detect encoding iff no explicit "transport level" encoding is supplied
|
||||
if (self.charEncoding[0] is None):
|
||||
self.charEncoding = self.detectEncoding(parseMeta, chardet)
|
||||
# Determine encoding
|
||||
self.charEncoding = self.determineEncoding(useChardet)
|
||||
assert self.charEncoding[0] is not None
|
||||
|
||||
# Call superclass
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
|
||||
'replace')
|
||||
self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
|
||||
HTMLUnicodeInputStream.reset(self)
|
||||
|
||||
def openStream(self, source):
|
||||
@@ -437,29 +449,50 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
|
||||
try:
|
||||
stream.seek(stream.tell())
|
||||
except:
|
||||
except: # pylint:disable=bare-except
|
||||
stream = BufferedStream(stream)
|
||||
|
||||
return stream
|
||||
|
||||
def detectEncoding(self, parseMeta=True, chardet=True):
|
||||
# First look for a BOM
|
||||
def determineEncoding(self, chardet=True):
|
||||
# BOMs take precedence over everything
|
||||
# This will also read past the BOM if present
|
||||
encoding = self.detectBOM()
|
||||
confidence = "certain"
|
||||
# If there is no BOM need to look for meta elements with encoding
|
||||
# information
|
||||
if encoding is None and parseMeta:
|
||||
encoding = self.detectEncodingMeta()
|
||||
confidence = "tentative"
|
||||
# Guess with chardet, if avaliable
|
||||
if encoding is None and chardet:
|
||||
confidence = "tentative"
|
||||
charEncoding = self.detectBOM(), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# If we've been overriden, we've been overriden
|
||||
charEncoding = lookupEncoding(self.override_encoding), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Now check the transport layer
|
||||
charEncoding = lookupEncoding(self.transport_encoding), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Look for meta elements with encoding information
|
||||
charEncoding = self.detectEncodingMeta(), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Parent document encoding
|
||||
charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
|
||||
if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
|
||||
return charEncoding
|
||||
|
||||
# "likely" encoding
|
||||
charEncoding = lookupEncoding(self.likely_encoding), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Guess with chardet, if available
|
||||
if chardet:
|
||||
try:
|
||||
try:
|
||||
from charade.universaldetector import UniversalDetector
|
||||
except ImportError:
|
||||
from chardet.universaldetector import UniversalDetector
|
||||
from chardet.universaldetector import UniversalDetector
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
buffers = []
|
||||
detector = UniversalDetector()
|
||||
while not detector.done:
|
||||
@@ -470,37 +503,34 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
buffers.append(buffer)
|
||||
detector.feed(buffer)
|
||||
detector.close()
|
||||
encoding = detector.result['encoding']
|
||||
encoding = lookupEncoding(detector.result['encoding'])
|
||||
self.rawStream.seek(0)
|
||||
except ImportError:
|
||||
pass
|
||||
# If all else fails use the default encoding
|
||||
if encoding is None:
|
||||
confidence = "tentative"
|
||||
encoding = self.defaultEncoding
|
||||
if encoding is not None:
|
||||
return encoding, "tentative"
|
||||
|
||||
# Substitute for equivalent encodings:
|
||||
encodingSub = {"iso-8859-1": "windows-1252"}
|
||||
# Try the default encoding
|
||||
charEncoding = lookupEncoding(self.default_encoding), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
if encoding.lower() in encodingSub:
|
||||
encoding = encodingSub[encoding.lower()]
|
||||
|
||||
return encoding, confidence
|
||||
# Fallback to html5lib's default if even that hasn't worked
|
||||
return lookupEncoding("windows-1252"), "tentative"
|
||||
|
||||
def changeEncoding(self, newEncoding):
|
||||
assert self.charEncoding[1] != "certain"
|
||||
newEncoding = codecName(newEncoding)
|
||||
if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
|
||||
newEncoding = "utf-8"
|
||||
newEncoding = lookupEncoding(newEncoding)
|
||||
if newEncoding is None:
|
||||
return
|
||||
if newEncoding.name in ("utf-16be", "utf-16le"):
|
||||
newEncoding = lookupEncoding("utf-8")
|
||||
assert newEncoding is not None
|
||||
elif newEncoding == self.charEncoding[0]:
|
||||
self.charEncoding = (self.charEncoding[0], "certain")
|
||||
else:
|
||||
self.rawStream.seek(0)
|
||||
self.reset()
|
||||
self.charEncoding = (newEncoding, "certain")
|
||||
raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
|
||||
self.reset()
|
||||
raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
|
||||
|
||||
def detectBOM(self):
|
||||
"""Attempts to detect at BOM at the start of the stream. If
|
||||
@@ -508,8 +538,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
encoding otherwise return None"""
|
||||
bomDict = {
|
||||
codecs.BOM_UTF8: 'utf-8',
|
||||
codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
|
||||
codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
|
||||
codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
|
||||
codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
|
||||
}
|
||||
|
||||
# Go to beginning of file and read in 4 bytes
|
||||
@@ -529,9 +559,12 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
|
||||
# Set the read position past the BOM if one was found, otherwise
|
||||
# set it to the start of the stream
|
||||
self.rawStream.seek(encoding and seek or 0)
|
||||
|
||||
return encoding
|
||||
if encoding:
|
||||
self.rawStream.seek(seek)
|
||||
return lookupEncoding(encoding)
|
||||
else:
|
||||
self.rawStream.seek(0)
|
||||
return None
|
||||
|
||||
def detectEncodingMeta(self):
|
||||
"""Report the encoding declared by the meta element
|
||||
@@ -542,8 +575,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
self.rawStream.seek(0)
|
||||
encoding = parser.getEncoding()
|
||||
|
||||
if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
|
||||
encoding = "utf-8"
|
||||
if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
|
||||
encoding = lookupEncoding("utf-8")
|
||||
|
||||
return encoding
|
||||
|
||||
@@ -557,6 +590,7 @@ class EncodingBytes(bytes):
|
||||
return bytes.__new__(self, value.lower())
|
||||
|
||||
def __init__(self, value):
|
||||
# pylint:disable=unused-argument
|
||||
self._position = -1
|
||||
|
||||
def __iter__(self):
|
||||
@@ -667,7 +701,7 @@ class EncodingParser(object):
|
||||
(b"<!", self.handleOther),
|
||||
(b"<?", self.handleOther),
|
||||
(b"<", self.handlePossibleStartTag))
|
||||
for byte in self.data:
|
||||
for _ in self.data:
|
||||
keepParsing = True
|
||||
for key, method in methodDispatch:
|
||||
if self.data.matchBytes(key):
|
||||
@@ -706,7 +740,7 @@ class EncodingParser(object):
|
||||
return False
|
||||
elif attr[0] == b"charset":
|
||||
tentativeEncoding = attr[1]
|
||||
codec = codecName(tentativeEncoding)
|
||||
codec = lookupEncoding(tentativeEncoding)
|
||||
if codec is not None:
|
||||
self.encoding = codec
|
||||
return False
|
||||
@@ -714,7 +748,7 @@ class EncodingParser(object):
|
||||
contentParser = ContentAttrParser(EncodingBytes(attr[1]))
|
||||
tentativeEncoding = contentParser.parse()
|
||||
if tentativeEncoding is not None:
|
||||
codec = codecName(tentativeEncoding)
|
||||
codec = lookupEncoding(tentativeEncoding)
|
||||
if codec is not None:
|
||||
if hasPragma:
|
||||
self.encoding = codec
|
||||
@@ -871,16 +905,19 @@ class ContentAttrParser(object):
|
||||
return None
|
||||
|
||||
|
||||
def codecName(encoding):
|
||||
def lookupEncoding(encoding):
|
||||
"""Return the python codec name corresponding to an encoding or None if the
|
||||
string doesn't correspond to a valid encoding."""
|
||||
if isinstance(encoding, bytes):
|
||||
if isinstance(encoding, binary_type):
|
||||
try:
|
||||
encoding = encoding.decode("ascii")
|
||||
except UnicodeDecodeError:
|
||||
return None
|
||||
if encoding:
|
||||
canonicalName = ascii_punctuation_re.sub("", encoding).lower()
|
||||
return encodings.get(canonicalName, None)
|
||||
|
||||
if encoding is not None:
|
||||
try:
|
||||
return webencodings.lookup(encoding)
|
||||
except AttributeError:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
+13
-23
@@ -1,9 +1,6 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
try:
|
||||
chr = unichr # flake8: noqa
|
||||
except NameError:
|
||||
pass
|
||||
from six import unichr as chr
|
||||
|
||||
from collections import deque
|
||||
|
||||
@@ -14,9 +11,9 @@ from .constants import digits, hexDigits, EOF
|
||||
from .constants import tokenTypes, tagTokenTypes
|
||||
from .constants import replacementCharacters
|
||||
|
||||
from .inputstream import HTMLInputStream
|
||||
from ._inputstream import HTMLInputStream
|
||||
|
||||
from .trie import Trie
|
||||
from ._trie import Trie
|
||||
|
||||
entitiesTrie = Trie(entities)
|
||||
|
||||
@@ -34,16 +31,11 @@ class HTMLTokenizer(object):
|
||||
Points to HTMLInputStream object.
|
||||
"""
|
||||
|
||||
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
|
||||
lowercaseElementName=True, lowercaseAttrName=True, parser=None):
|
||||
def __init__(self, stream, parser=None, **kwargs):
|
||||
|
||||
self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
|
||||
self.stream = HTMLInputStream(stream, **kwargs)
|
||||
self.parser = parser
|
||||
|
||||
# Perform case conversions?
|
||||
self.lowercaseElementName = lowercaseElementName
|
||||
self.lowercaseAttrName = lowercaseAttrName
|
||||
|
||||
# Setup the initial tokenizer state
|
||||
self.escapeFlag = False
|
||||
self.lastFourChars = []
|
||||
@@ -147,8 +139,8 @@ class HTMLTokenizer(object):
|
||||
output = "&"
|
||||
|
||||
charStack = [self.stream.char()]
|
||||
if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&")
|
||||
or (allowedChar is not None and allowedChar == charStack[0])):
|
||||
if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or
|
||||
(allowedChar is not None and allowedChar == charStack[0])):
|
||||
self.stream.unget(charStack[0])
|
||||
|
||||
elif charStack[0] == "#":
|
||||
@@ -235,8 +227,7 @@ class HTMLTokenizer(object):
|
||||
token = self.currentToken
|
||||
# Add token to the queue to be yielded
|
||||
if (token["type"] in tagTokenTypes):
|
||||
if self.lowercaseElementName:
|
||||
token["name"] = token["name"].translate(asciiUpper2Lower)
|
||||
token["name"] = token["name"].translate(asciiUpper2Lower)
|
||||
if token["type"] == tokenTypes["EndTag"]:
|
||||
if token["data"]:
|
||||
self.tokenQueue.append({"type": tokenTypes["ParseError"],
|
||||
@@ -921,10 +912,9 @@ class HTMLTokenizer(object):
|
||||
# Attributes are not dropped at this stage. That happens when the
|
||||
# start tag token is emitted so values can still be safely appended
|
||||
# to attributes, but we do want to report the parse error in time.
|
||||
if self.lowercaseAttrName:
|
||||
self.currentToken["data"][-1][0] = (
|
||||
self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
|
||||
for name, value in self.currentToken["data"][:-1]:
|
||||
self.currentToken["data"][-1][0] = (
|
||||
self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
|
||||
for name, _ in self.currentToken["data"][:-1]:
|
||||
if self.currentToken["data"][-1][0] == name:
|
||||
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
|
||||
"duplicate-attribute"})
|
||||
@@ -1716,11 +1706,11 @@ class HTMLTokenizer(object):
|
||||
else:
|
||||
data.append(char)
|
||||
|
||||
data = "".join(data)
|
||||
data = "".join(data) # pylint:disable=redefined-variable-type
|
||||
# Deal with null here rather than in the parser
|
||||
nullCount = data.count("\u0000")
|
||||
if nullCount > 0:
|
||||
for i in range(nullCount):
|
||||
for _ in range(nullCount):
|
||||
self.tokenQueue.append({"type": tokenTypes["ParseError"],
|
||||
"data": "invalid-codepoint"})
|
||||
data = data.replace("\u0000", "\uFFFD")
|
||||
+2
@@ -4,9 +4,11 @@ from .py import Trie as PyTrie
|
||||
|
||||
Trie = PyTrie
|
||||
|
||||
# pylint:disable=wrong-import-position
|
||||
try:
|
||||
from .datrie import Trie as DATrie
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
Trie = DATrie
|
||||
# pylint:enable=wrong-import-position
|
||||
+3
-3
@@ -7,13 +7,13 @@ class Trie(Mapping):
|
||||
"""Abstract base class for tries"""
|
||||
|
||||
def keys(self, prefix=None):
|
||||
keys = super().keys()
|
||||
# pylint:disable=arguments-differ
|
||||
keys = super(Trie, self).keys()
|
||||
|
||||
if prefix is None:
|
||||
return set(keys)
|
||||
|
||||
# Python 2.6: no set comprehensions
|
||||
return set([x for x in keys if x.startswith(prefix)])
|
||||
return {x for x in keys if x.startswith(prefix)}
|
||||
|
||||
def has_keys_with_prefix(self, prefix):
|
||||
for key in self.keys():
|
||||
+49
-7
@@ -2,6 +2,8 @@ from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from types import ModuleType
|
||||
|
||||
from six import text_type
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as default_etree
|
||||
except ImportError:
|
||||
@@ -9,7 +11,26 @@ except ImportError:
|
||||
|
||||
|
||||
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
|
||||
"surrogatePairToCodepoint", "moduleFactoryFactory"]
|
||||
"surrogatePairToCodepoint", "moduleFactoryFactory",
|
||||
"supports_lone_surrogates"]
|
||||
|
||||
|
||||
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
|
||||
# caught by the below test. In general this would be any platform
|
||||
# using UTF-16 as its encoding of unicode strings, such as
|
||||
# Jython. This is because UTF-16 itself is based on the use of such
|
||||
# surrogates, and there is no mechanism to further escape such
|
||||
# escapes.
|
||||
try:
|
||||
_x = eval('"\\uD800"') # pylint:disable=eval-used
|
||||
if not isinstance(_x, text_type):
|
||||
# We need this with u"" because of http://bugs.jython.org/issue2039
|
||||
_x = eval('u"\\uD800"') # pylint:disable=eval-used
|
||||
assert isinstance(_x, text_type)
|
||||
except: # pylint:disable=bare-except
|
||||
supports_lone_surrogates = False
|
||||
else:
|
||||
supports_lone_surrogates = True
|
||||
|
||||
|
||||
class MethodDispatcher(dict):
|
||||
@@ -31,19 +52,20 @@ class MethodDispatcher(dict):
|
||||
# anything here.
|
||||
_dictEntries = []
|
||||
for name, value in items:
|
||||
if type(name) in (list, tuple, frozenset, set):
|
||||
if isinstance(name, (list, tuple, frozenset, set)):
|
||||
for item in name:
|
||||
_dictEntries.append((item, value))
|
||||
else:
|
||||
_dictEntries.append((name, value))
|
||||
dict.__init__(self, _dictEntries)
|
||||
assert len(self) == len(_dictEntries)
|
||||
self.default = None
|
||||
|
||||
def __getitem__(self, key):
|
||||
return dict.get(self, key, self.default)
|
||||
|
||||
|
||||
# Some utility functions to dal with weirdness around UCS2 vs UCS4
|
||||
# Some utility functions to deal with weirdness around UCS2 vs UCS4
|
||||
# python builds
|
||||
|
||||
def isSurrogatePair(data):
|
||||
@@ -70,13 +92,33 @@ def moduleFactoryFactory(factory):
|
||||
else:
|
||||
name = b"_%s_factory" % baseModule.__name__
|
||||
|
||||
if name in moduleCache:
|
||||
return moduleCache[name]
|
||||
else:
|
||||
kwargs_tuple = tuple(kwargs.items())
|
||||
|
||||
try:
|
||||
return moduleCache[name][args][kwargs_tuple]
|
||||
except KeyError:
|
||||
mod = ModuleType(name)
|
||||
objs = factory(baseModule, *args, **kwargs)
|
||||
mod.__dict__.update(objs)
|
||||
moduleCache[name] = mod
|
||||
if "name" not in moduleCache:
|
||||
moduleCache[name] = {}
|
||||
if "args" not in moduleCache[name]:
|
||||
moduleCache[name][args] = {}
|
||||
if "kwargs" not in moduleCache[name][args]:
|
||||
moduleCache[name][args][kwargs_tuple] = {}
|
||||
moduleCache[name][args][kwargs_tuple] = mod
|
||||
return mod
|
||||
|
||||
return moduleFactory
|
||||
|
||||
|
||||
def memoize(func):
|
||||
cache = {}
|
||||
|
||||
def wrapped(*args, **kwargs):
|
||||
key = (tuple(args), tuple(kwargs.items()))
|
||||
if key not in cache:
|
||||
cache[key] = func(*args, **kwargs)
|
||||
return cache[key]
|
||||
|
||||
return wrapped
|
||||
@@ -1,292 +1,296 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import string
|
||||
import gettext
|
||||
_ = gettext.gettext
|
||||
|
||||
EOF = None
|
||||
|
||||
E = {
|
||||
"null-character":
|
||||
_("Null character in input stream, replaced with U+FFFD."),
|
||||
"Null character in input stream, replaced with U+FFFD.",
|
||||
"invalid-codepoint":
|
||||
_("Invalid codepoint in stream."),
|
||||
"Invalid codepoint in stream.",
|
||||
"incorrectly-placed-solidus":
|
||||
_("Solidus (/) incorrectly placed in tag."),
|
||||
"Solidus (/) incorrectly placed in tag.",
|
||||
"incorrect-cr-newline-entity":
|
||||
_("Incorrect CR newline entity, replaced with LF."),
|
||||
"Incorrect CR newline entity, replaced with LF.",
|
||||
"illegal-windows-1252-entity":
|
||||
_("Entity used with illegal number (windows-1252 reference)."),
|
||||
"Entity used with illegal number (windows-1252 reference).",
|
||||
"cant-convert-numeric-entity":
|
||||
_("Numeric entity couldn't be converted to character "
|
||||
"(codepoint U+%(charAsInt)08x)."),
|
||||
"Numeric entity couldn't be converted to character "
|
||||
"(codepoint U+%(charAsInt)08x).",
|
||||
"illegal-codepoint-for-numeric-entity":
|
||||
_("Numeric entity represents an illegal codepoint: "
|
||||
"U+%(charAsInt)08x."),
|
||||
"Numeric entity represents an illegal codepoint: "
|
||||
"U+%(charAsInt)08x.",
|
||||
"numeric-entity-without-semicolon":
|
||||
_("Numeric entity didn't end with ';'."),
|
||||
"Numeric entity didn't end with ';'.",
|
||||
"expected-numeric-entity-but-got-eof":
|
||||
_("Numeric entity expected. Got end of file instead."),
|
||||
"Numeric entity expected. Got end of file instead.",
|
||||
"expected-numeric-entity":
|
||||
_("Numeric entity expected but none found."),
|
||||
"Numeric entity expected but none found.",
|
||||
"named-entity-without-semicolon":
|
||||
_("Named entity didn't end with ';'."),
|
||||
"Named entity didn't end with ';'.",
|
||||
"expected-named-entity":
|
||||
_("Named entity expected. Got none."),
|
||||
"Named entity expected. Got none.",
|
||||
"attributes-in-end-tag":
|
||||
_("End tag contains unexpected attributes."),
|
||||
"End tag contains unexpected attributes.",
|
||||
'self-closing-flag-on-end-tag':
|
||||
_("End tag contains unexpected self-closing flag."),
|
||||
"End tag contains unexpected self-closing flag.",
|
||||
"expected-tag-name-but-got-right-bracket":
|
||||
_("Expected tag name. Got '>' instead."),
|
||||
"Expected tag name. Got '>' instead.",
|
||||
"expected-tag-name-but-got-question-mark":
|
||||
_("Expected tag name. Got '?' instead. (HTML doesn't "
|
||||
"support processing instructions.)"),
|
||||
"Expected tag name. Got '?' instead. (HTML doesn't "
|
||||
"support processing instructions.)",
|
||||
"expected-tag-name":
|
||||
_("Expected tag name. Got something else instead"),
|
||||
"Expected tag name. Got something else instead",
|
||||
"expected-closing-tag-but-got-right-bracket":
|
||||
_("Expected closing tag. Got '>' instead. Ignoring '</>'."),
|
||||
"Expected closing tag. Got '>' instead. Ignoring '</>'.",
|
||||
"expected-closing-tag-but-got-eof":
|
||||
_("Expected closing tag. Unexpected end of file."),
|
||||
"Expected closing tag. Unexpected end of file.",
|
||||
"expected-closing-tag-but-got-char":
|
||||
_("Expected closing tag. Unexpected character '%(data)s' found."),
|
||||
"Expected closing tag. Unexpected character '%(data)s' found.",
|
||||
"eof-in-tag-name":
|
||||
_("Unexpected end of file in the tag name."),
|
||||
"Unexpected end of file in the tag name.",
|
||||
"expected-attribute-name-but-got-eof":
|
||||
_("Unexpected end of file. Expected attribute name instead."),
|
||||
"Unexpected end of file. Expected attribute name instead.",
|
||||
"eof-in-attribute-name":
|
||||
_("Unexpected end of file in attribute name."),
|
||||
"Unexpected end of file in attribute name.",
|
||||
"invalid-character-in-attribute-name":
|
||||
_("Invalid character in attribute name"),
|
||||
"Invalid character in attribute name",
|
||||
"duplicate-attribute":
|
||||
_("Dropped duplicate attribute on tag."),
|
||||
"Dropped duplicate attribute on tag.",
|
||||
"expected-end-of-tag-name-but-got-eof":
|
||||
_("Unexpected end of file. Expected = or end of tag."),
|
||||
"Unexpected end of file. Expected = or end of tag.",
|
||||
"expected-attribute-value-but-got-eof":
|
||||
_("Unexpected end of file. Expected attribute value."),
|
||||
"Unexpected end of file. Expected attribute value.",
|
||||
"expected-attribute-value-but-got-right-bracket":
|
||||
_("Expected attribute value. Got '>' instead."),
|
||||
"Expected attribute value. Got '>' instead.",
|
||||
'equals-in-unquoted-attribute-value':
|
||||
_("Unexpected = in unquoted attribute"),
|
||||
"Unexpected = in unquoted attribute",
|
||||
'unexpected-character-in-unquoted-attribute-value':
|
||||
_("Unexpected character in unquoted attribute"),
|
||||
"Unexpected character in unquoted attribute",
|
||||
"invalid-character-after-attribute-name":
|
||||
_("Unexpected character after attribute name."),
|
||||
"Unexpected character after attribute name.",
|
||||
"unexpected-character-after-attribute-value":
|
||||
_("Unexpected character after attribute value."),
|
||||
"Unexpected character after attribute value.",
|
||||
"eof-in-attribute-value-double-quote":
|
||||
_("Unexpected end of file in attribute value (\")."),
|
||||
"Unexpected end of file in attribute value (\").",
|
||||
"eof-in-attribute-value-single-quote":
|
||||
_("Unexpected end of file in attribute value (')."),
|
||||
"Unexpected end of file in attribute value (').",
|
||||
"eof-in-attribute-value-no-quotes":
|
||||
_("Unexpected end of file in attribute value."),
|
||||
"Unexpected end of file in attribute value.",
|
||||
"unexpected-EOF-after-solidus-in-tag":
|
||||
_("Unexpected end of file in tag. Expected >"),
|
||||
"Unexpected end of file in tag. Expected >",
|
||||
"unexpected-character-after-solidus-in-tag":
|
||||
_("Unexpected character after / in tag. Expected >"),
|
||||
"Unexpected character after / in tag. Expected >",
|
||||
"expected-dashes-or-doctype":
|
||||
_("Expected '--' or 'DOCTYPE'. Not found."),
|
||||
"Expected '--' or 'DOCTYPE'. Not found.",
|
||||
"unexpected-bang-after-double-dash-in-comment":
|
||||
_("Unexpected ! after -- in comment"),
|
||||
"Unexpected ! after -- in comment",
|
||||
"unexpected-space-after-double-dash-in-comment":
|
||||
_("Unexpected space after -- in comment"),
|
||||
"Unexpected space after -- in comment",
|
||||
"incorrect-comment":
|
||||
_("Incorrect comment."),
|
||||
"Incorrect comment.",
|
||||
"eof-in-comment":
|
||||
_("Unexpected end of file in comment."),
|
||||
"Unexpected end of file in comment.",
|
||||
"eof-in-comment-end-dash":
|
||||
_("Unexpected end of file in comment (-)"),
|
||||
"Unexpected end of file in comment (-)",
|
||||
"unexpected-dash-after-double-dash-in-comment":
|
||||
_("Unexpected '-' after '--' found in comment."),
|
||||
"Unexpected '-' after '--' found in comment.",
|
||||
"eof-in-comment-double-dash":
|
||||
_("Unexpected end of file in comment (--)."),
|
||||
"Unexpected end of file in comment (--).",
|
||||
"eof-in-comment-end-space-state":
|
||||
_("Unexpected end of file in comment."),
|
||||
"Unexpected end of file in comment.",
|
||||
"eof-in-comment-end-bang-state":
|
||||
_("Unexpected end of file in comment."),
|
||||
"Unexpected end of file in comment.",
|
||||
"unexpected-char-in-comment":
|
||||
_("Unexpected character in comment found."),
|
||||
"Unexpected character in comment found.",
|
||||
"need-space-after-doctype":
|
||||
_("No space after literal string 'DOCTYPE'."),
|
||||
"No space after literal string 'DOCTYPE'.",
|
||||
"expected-doctype-name-but-got-right-bracket":
|
||||
_("Unexpected > character. Expected DOCTYPE name."),
|
||||
"Unexpected > character. Expected DOCTYPE name.",
|
||||
"expected-doctype-name-but-got-eof":
|
||||
_("Unexpected end of file. Expected DOCTYPE name."),
|
||||
"Unexpected end of file. Expected DOCTYPE name.",
|
||||
"eof-in-doctype-name":
|
||||
_("Unexpected end of file in DOCTYPE name."),
|
||||
"Unexpected end of file in DOCTYPE name.",
|
||||
"eof-in-doctype":
|
||||
_("Unexpected end of file in DOCTYPE."),
|
||||
"Unexpected end of file in DOCTYPE.",
|
||||
"expected-space-or-right-bracket-in-doctype":
|
||||
_("Expected space or '>'. Got '%(data)s'"),
|
||||
"Expected space or '>'. Got '%(data)s'",
|
||||
"unexpected-end-of-doctype":
|
||||
_("Unexpected end of DOCTYPE."),
|
||||
"Unexpected end of DOCTYPE.",
|
||||
"unexpected-char-in-doctype":
|
||||
_("Unexpected character in DOCTYPE."),
|
||||
"Unexpected character in DOCTYPE.",
|
||||
"eof-in-innerhtml":
|
||||
_("XXX innerHTML EOF"),
|
||||
"XXX innerHTML EOF",
|
||||
"unexpected-doctype":
|
||||
_("Unexpected DOCTYPE. Ignored."),
|
||||
"Unexpected DOCTYPE. Ignored.",
|
||||
"non-html-root":
|
||||
_("html needs to be the first start tag."),
|
||||
"html needs to be the first start tag.",
|
||||
"expected-doctype-but-got-eof":
|
||||
_("Unexpected End of file. Expected DOCTYPE."),
|
||||
"Unexpected End of file. Expected DOCTYPE.",
|
||||
"unknown-doctype":
|
||||
_("Erroneous DOCTYPE."),
|
||||
"Erroneous DOCTYPE.",
|
||||
"expected-doctype-but-got-chars":
|
||||
_("Unexpected non-space characters. Expected DOCTYPE."),
|
||||
"Unexpected non-space characters. Expected DOCTYPE.",
|
||||
"expected-doctype-but-got-start-tag":
|
||||
_("Unexpected start tag (%(name)s). Expected DOCTYPE."),
|
||||
"Unexpected start tag (%(name)s). Expected DOCTYPE.",
|
||||
"expected-doctype-but-got-end-tag":
|
||||
_("Unexpected end tag (%(name)s). Expected DOCTYPE."),
|
||||
"Unexpected end tag (%(name)s). Expected DOCTYPE.",
|
||||
"end-tag-after-implied-root":
|
||||
_("Unexpected end tag (%(name)s) after the (implied) root element."),
|
||||
"Unexpected end tag (%(name)s) after the (implied) root element.",
|
||||
"expected-named-closing-tag-but-got-eof":
|
||||
_("Unexpected end of file. Expected end tag (%(name)s)."),
|
||||
"Unexpected end of file. Expected end tag (%(name)s).",
|
||||
"two-heads-are-not-better-than-one":
|
||||
_("Unexpected start tag head in existing head. Ignored."),
|
||||
"Unexpected start tag head in existing head. Ignored.",
|
||||
"unexpected-end-tag":
|
||||
_("Unexpected end tag (%(name)s). Ignored."),
|
||||
"Unexpected end tag (%(name)s). Ignored.",
|
||||
"unexpected-start-tag-out-of-my-head":
|
||||
_("Unexpected start tag (%(name)s) that can be in head. Moved."),
|
||||
"Unexpected start tag (%(name)s) that can be in head. Moved.",
|
||||
"unexpected-start-tag":
|
||||
_("Unexpected start tag (%(name)s)."),
|
||||
"Unexpected start tag (%(name)s).",
|
||||
"missing-end-tag":
|
||||
_("Missing end tag (%(name)s)."),
|
||||
"Missing end tag (%(name)s).",
|
||||
"missing-end-tags":
|
||||
_("Missing end tags (%(name)s)."),
|
||||
"Missing end tags (%(name)s).",
|
||||
"unexpected-start-tag-implies-end-tag":
|
||||
_("Unexpected start tag (%(startName)s) "
|
||||
"implies end tag (%(endName)s)."),
|
||||
"Unexpected start tag (%(startName)s) "
|
||||
"implies end tag (%(endName)s).",
|
||||
"unexpected-start-tag-treated-as":
|
||||
_("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
|
||||
"Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
|
||||
"deprecated-tag":
|
||||
_("Unexpected start tag %(name)s. Don't use it!"),
|
||||
"Unexpected start tag %(name)s. Don't use it!",
|
||||
"unexpected-start-tag-ignored":
|
||||
_("Unexpected start tag %(name)s. Ignored."),
|
||||
"Unexpected start tag %(name)s. Ignored.",
|
||||
"expected-one-end-tag-but-got-another":
|
||||
_("Unexpected end tag (%(gotName)s). "
|
||||
"Missing end tag (%(expectedName)s)."),
|
||||
"Unexpected end tag (%(gotName)s). "
|
||||
"Missing end tag (%(expectedName)s).",
|
||||
"end-tag-too-early":
|
||||
_("End tag (%(name)s) seen too early. Expected other end tag."),
|
||||
"End tag (%(name)s) seen too early. Expected other end tag.",
|
||||
"end-tag-too-early-named":
|
||||
_("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
|
||||
"Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
|
||||
"end-tag-too-early-ignored":
|
||||
_("End tag (%(name)s) seen too early. Ignored."),
|
||||
"End tag (%(name)s) seen too early. Ignored.",
|
||||
"adoption-agency-1.1":
|
||||
_("End tag (%(name)s) violates step 1, "
|
||||
"paragraph 1 of the adoption agency algorithm."),
|
||||
"End tag (%(name)s) violates step 1, "
|
||||
"paragraph 1 of the adoption agency algorithm.",
|
||||
"adoption-agency-1.2":
|
||||
_("End tag (%(name)s) violates step 1, "
|
||||
"paragraph 2 of the adoption agency algorithm."),
|
||||
"End tag (%(name)s) violates step 1, "
|
||||
"paragraph 2 of the adoption agency algorithm.",
|
||||
"adoption-agency-1.3":
|
||||
_("End tag (%(name)s) violates step 1, "
|
||||
"paragraph 3 of the adoption agency algorithm."),
|
||||
"End tag (%(name)s) violates step 1, "
|
||||
"paragraph 3 of the adoption agency algorithm.",
|
||||
"adoption-agency-4.4":
|
||||
_("End tag (%(name)s) violates step 4, "
|
||||
"paragraph 4 of the adoption agency algorithm."),
|
||||
"End tag (%(name)s) violates step 4, "
|
||||
"paragraph 4 of the adoption agency algorithm.",
|
||||
"unexpected-end-tag-treated-as":
|
||||
_("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
|
||||
"Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
|
||||
"no-end-tag":
|
||||
_("This element (%(name)s) has no end tag."),
|
||||
"This element (%(name)s) has no end tag.",
|
||||
"unexpected-implied-end-tag-in-table":
|
||||
_("Unexpected implied end tag (%(name)s) in the table phase."),
|
||||
"Unexpected implied end tag (%(name)s) in the table phase.",
|
||||
"unexpected-implied-end-tag-in-table-body":
|
||||
_("Unexpected implied end tag (%(name)s) in the table body phase."),
|
||||
"Unexpected implied end tag (%(name)s) in the table body phase.",
|
||||
"unexpected-char-implies-table-voodoo":
|
||||
_("Unexpected non-space characters in "
|
||||
"table context caused voodoo mode."),
|
||||
"Unexpected non-space characters in "
|
||||
"table context caused voodoo mode.",
|
||||
"unexpected-hidden-input-in-table":
|
||||
_("Unexpected input with type hidden in table context."),
|
||||
"Unexpected input with type hidden in table context.",
|
||||
"unexpected-form-in-table":
|
||||
_("Unexpected form in table context."),
|
||||
"Unexpected form in table context.",
|
||||
"unexpected-start-tag-implies-table-voodoo":
|
||||
_("Unexpected start tag (%(name)s) in "
|
||||
"table context caused voodoo mode."),
|
||||
"Unexpected start tag (%(name)s) in "
|
||||
"table context caused voodoo mode.",
|
||||
"unexpected-end-tag-implies-table-voodoo":
|
||||
_("Unexpected end tag (%(name)s) in "
|
||||
"table context caused voodoo mode."),
|
||||
"Unexpected end tag (%(name)s) in "
|
||||
"table context caused voodoo mode.",
|
||||
"unexpected-cell-in-table-body":
|
||||
_("Unexpected table cell start tag (%(name)s) "
|
||||
"in the table body phase."),
|
||||
"Unexpected table cell start tag (%(name)s) "
|
||||
"in the table body phase.",
|
||||
"unexpected-cell-end-tag":
|
||||
_("Got table cell end tag (%(name)s) "
|
||||
"while required end tags are missing."),
|
||||
"Got table cell end tag (%(name)s) "
|
||||
"while required end tags are missing.",
|
||||
"unexpected-end-tag-in-table-body":
|
||||
_("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
|
||||
"Unexpected end tag (%(name)s) in the table body phase. Ignored.",
|
||||
"unexpected-implied-end-tag-in-table-row":
|
||||
_("Unexpected implied end tag (%(name)s) in the table row phase."),
|
||||
"Unexpected implied end tag (%(name)s) in the table row phase.",
|
||||
"unexpected-end-tag-in-table-row":
|
||||
_("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
|
||||
"Unexpected end tag (%(name)s) in the table row phase. Ignored.",
|
||||
"unexpected-select-in-select":
|
||||
_("Unexpected select start tag in the select phase "
|
||||
"treated as select end tag."),
|
||||
"Unexpected select start tag in the select phase "
|
||||
"treated as select end tag.",
|
||||
"unexpected-input-in-select":
|
||||
_("Unexpected input start tag in the select phase."),
|
||||
"Unexpected input start tag in the select phase.",
|
||||
"unexpected-start-tag-in-select":
|
||||
_("Unexpected start tag token (%(name)s in the select phase. "
|
||||
"Ignored."),
|
||||
"Unexpected start tag token (%(name)s in the select phase. "
|
||||
"Ignored.",
|
||||
"unexpected-end-tag-in-select":
|
||||
_("Unexpected end tag (%(name)s) in the select phase. Ignored."),
|
||||
"Unexpected end tag (%(name)s) in the select phase. Ignored.",
|
||||
"unexpected-table-element-start-tag-in-select-in-table":
|
||||
_("Unexpected table element start tag (%(name)s) in the select in table phase."),
|
||||
"Unexpected table element start tag (%(name)s) in the select in table phase.",
|
||||
"unexpected-table-element-end-tag-in-select-in-table":
|
||||
_("Unexpected table element end tag (%(name)s) in the select in table phase."),
|
||||
"Unexpected table element end tag (%(name)s) in the select in table phase.",
|
||||
"unexpected-char-after-body":
|
||||
_("Unexpected non-space characters in the after body phase."),
|
||||
"Unexpected non-space characters in the after body phase.",
|
||||
"unexpected-start-tag-after-body":
|
||||
_("Unexpected start tag token (%(name)s)"
|
||||
" in the after body phase."),
|
||||
"Unexpected start tag token (%(name)s)"
|
||||
" in the after body phase.",
|
||||
"unexpected-end-tag-after-body":
|
||||
_("Unexpected end tag token (%(name)s)"
|
||||
" in the after body phase."),
|
||||
"Unexpected end tag token (%(name)s)"
|
||||
" in the after body phase.",
|
||||
"unexpected-char-in-frameset":
|
||||
_("Unexpected characters in the frameset phase. Characters ignored."),
|
||||
"Unexpected characters in the frameset phase. Characters ignored.",
|
||||
"unexpected-start-tag-in-frameset":
|
||||
_("Unexpected start tag token (%(name)s)"
|
||||
" in the frameset phase. Ignored."),
|
||||
"Unexpected start tag token (%(name)s)"
|
||||
" in the frameset phase. Ignored.",
|
||||
"unexpected-frameset-in-frameset-innerhtml":
|
||||
_("Unexpected end tag token (frameset) "
|
||||
"in the frameset phase (innerHTML)."),
|
||||
"Unexpected end tag token (frameset) "
|
||||
"in the frameset phase (innerHTML).",
|
||||
"unexpected-end-tag-in-frameset":
|
||||
_("Unexpected end tag token (%(name)s)"
|
||||
" in the frameset phase. Ignored."),
|
||||
"Unexpected end tag token (%(name)s)"
|
||||
" in the frameset phase. Ignored.",
|
||||
"unexpected-char-after-frameset":
|
||||
_("Unexpected non-space characters in the "
|
||||
"after frameset phase. Ignored."),
|
||||
"Unexpected non-space characters in the "
|
||||
"after frameset phase. Ignored.",
|
||||
"unexpected-start-tag-after-frameset":
|
||||
_("Unexpected start tag (%(name)s)"
|
||||
" in the after frameset phase. Ignored."),
|
||||
"Unexpected start tag (%(name)s)"
|
||||
" in the after frameset phase. Ignored.",
|
||||
"unexpected-end-tag-after-frameset":
|
||||
_("Unexpected end tag (%(name)s)"
|
||||
" in the after frameset phase. Ignored."),
|
||||
"Unexpected end tag (%(name)s)"
|
||||
" in the after frameset phase. Ignored.",
|
||||
"unexpected-end-tag-after-body-innerhtml":
|
||||
_("Unexpected end tag after body(innerHtml)"),
|
||||
"Unexpected end tag after body(innerHtml)",
|
||||
"expected-eof-but-got-char":
|
||||
_("Unexpected non-space characters. Expected end of file."),
|
||||
"Unexpected non-space characters. Expected end of file.",
|
||||
"expected-eof-but-got-start-tag":
|
||||
_("Unexpected start tag (%(name)s)"
|
||||
". Expected end of file."),
|
||||
"Unexpected start tag (%(name)s)"
|
||||
". Expected end of file.",
|
||||
"expected-eof-but-got-end-tag":
|
||||
_("Unexpected end tag (%(name)s)"
|
||||
". Expected end of file."),
|
||||
"Unexpected end tag (%(name)s)"
|
||||
". Expected end of file.",
|
||||
"eof-in-table":
|
||||
_("Unexpected end of file. Expected table content."),
|
||||
"Unexpected end of file. Expected table content.",
|
||||
"eof-in-select":
|
||||
_("Unexpected end of file. Expected select content."),
|
||||
"Unexpected end of file. Expected select content.",
|
||||
"eof-in-frameset":
|
||||
_("Unexpected end of file. Expected frameset content."),
|
||||
"Unexpected end of file. Expected frameset content.",
|
||||
"eof-in-script-in-script":
|
||||
_("Unexpected end of file. Expected script content."),
|
||||
"Unexpected end of file. Expected script content.",
|
||||
"eof-in-foreign-lands":
|
||||
_("Unexpected end of file. Expected foreign content"),
|
||||
"Unexpected end of file. Expected foreign content",
|
||||
"non-void-element-with-trailing-solidus":
|
||||
_("Trailing solidus not allowed on element %(name)s"),
|
||||
"Trailing solidus not allowed on element %(name)s",
|
||||
"unexpected-html-element-in-foreign-content":
|
||||
_("Element %(name)s not allowed in a non-html context"),
|
||||
"Element %(name)s not allowed in a non-html context",
|
||||
"unexpected-end-tag-before-html":
|
||||
_("Unexpected end tag (%(name)s) before html."),
|
||||
"Unexpected end tag (%(name)s) before html.",
|
||||
"unexpected-inhead-noscript-tag":
|
||||
"Element %(name)s not allowed in a inhead-noscript context",
|
||||
"eof-in-head-noscript":
|
||||
"Unexpected end of file. Expected inhead-noscript content",
|
||||
"char-in-head-noscript":
|
||||
"Unexpected non-space character. Expected inhead-noscript content",
|
||||
"XXX-undefined-error":
|
||||
_("Undefined error (this sucks and should be fixed)"),
|
||||
"Undefined error (this sucks and should be fixed)",
|
||||
}
|
||||
|
||||
namespaces = {
|
||||
@@ -298,7 +302,7 @@ namespaces = {
|
||||
"xmlns": "http://www.w3.org/2000/xmlns/"
|
||||
}
|
||||
|
||||
scopingElements = frozenset((
|
||||
scopingElements = frozenset([
|
||||
(namespaces["html"], "applet"),
|
||||
(namespaces["html"], "caption"),
|
||||
(namespaces["html"], "html"),
|
||||
@@ -316,9 +320,9 @@ scopingElements = frozenset((
|
||||
(namespaces["svg"], "foreignObject"),
|
||||
(namespaces["svg"], "desc"),
|
||||
(namespaces["svg"], "title"),
|
||||
))
|
||||
])
|
||||
|
||||
formattingElements = frozenset((
|
||||
formattingElements = frozenset([
|
||||
(namespaces["html"], "a"),
|
||||
(namespaces["html"], "b"),
|
||||
(namespaces["html"], "big"),
|
||||
@@ -333,9 +337,9 @@ formattingElements = frozenset((
|
||||
(namespaces["html"], "strong"),
|
||||
(namespaces["html"], "tt"),
|
||||
(namespaces["html"], "u")
|
||||
))
|
||||
])
|
||||
|
||||
specialElements = frozenset((
|
||||
specialElements = frozenset([
|
||||
(namespaces["html"], "address"),
|
||||
(namespaces["html"], "applet"),
|
||||
(namespaces["html"], "area"),
|
||||
@@ -416,22 +420,89 @@ specialElements = frozenset((
|
||||
(namespaces["html"], "wbr"),
|
||||
(namespaces["html"], "xmp"),
|
||||
(namespaces["svg"], "foreignObject")
|
||||
))
|
||||
])
|
||||
|
||||
htmlIntegrationPointElements = frozenset((
|
||||
(namespaces["mathml"], "annotaion-xml"),
|
||||
htmlIntegrationPointElements = frozenset([
|
||||
(namespaces["mathml"], "annotation-xml"),
|
||||
(namespaces["svg"], "foreignObject"),
|
||||
(namespaces["svg"], "desc"),
|
||||
(namespaces["svg"], "title")
|
||||
))
|
||||
])
|
||||
|
||||
mathmlTextIntegrationPointElements = frozenset((
|
||||
mathmlTextIntegrationPointElements = frozenset([
|
||||
(namespaces["mathml"], "mi"),
|
||||
(namespaces["mathml"], "mo"),
|
||||
(namespaces["mathml"], "mn"),
|
||||
(namespaces["mathml"], "ms"),
|
||||
(namespaces["mathml"], "mtext")
|
||||
))
|
||||
])
|
||||
|
||||
adjustSVGAttributes = {
|
||||
"attributename": "attributeName",
|
||||
"attributetype": "attributeType",
|
||||
"basefrequency": "baseFrequency",
|
||||
"baseprofile": "baseProfile",
|
||||
"calcmode": "calcMode",
|
||||
"clippathunits": "clipPathUnits",
|
||||
"contentscripttype": "contentScriptType",
|
||||
"contentstyletype": "contentStyleType",
|
||||
"diffuseconstant": "diffuseConstant",
|
||||
"edgemode": "edgeMode",
|
||||
"externalresourcesrequired": "externalResourcesRequired",
|
||||
"filterres": "filterRes",
|
||||
"filterunits": "filterUnits",
|
||||
"glyphref": "glyphRef",
|
||||
"gradienttransform": "gradientTransform",
|
||||
"gradientunits": "gradientUnits",
|
||||
"kernelmatrix": "kernelMatrix",
|
||||
"kernelunitlength": "kernelUnitLength",
|
||||
"keypoints": "keyPoints",
|
||||
"keysplines": "keySplines",
|
||||
"keytimes": "keyTimes",
|
||||
"lengthadjust": "lengthAdjust",
|
||||
"limitingconeangle": "limitingConeAngle",
|
||||
"markerheight": "markerHeight",
|
||||
"markerunits": "markerUnits",
|
||||
"markerwidth": "markerWidth",
|
||||
"maskcontentunits": "maskContentUnits",
|
||||
"maskunits": "maskUnits",
|
||||
"numoctaves": "numOctaves",
|
||||
"pathlength": "pathLength",
|
||||
"patterncontentunits": "patternContentUnits",
|
||||
"patterntransform": "patternTransform",
|
||||
"patternunits": "patternUnits",
|
||||
"pointsatx": "pointsAtX",
|
||||
"pointsaty": "pointsAtY",
|
||||
"pointsatz": "pointsAtZ",
|
||||
"preservealpha": "preserveAlpha",
|
||||
"preserveaspectratio": "preserveAspectRatio",
|
||||
"primitiveunits": "primitiveUnits",
|
||||
"refx": "refX",
|
||||
"refy": "refY",
|
||||
"repeatcount": "repeatCount",
|
||||
"repeatdur": "repeatDur",
|
||||
"requiredextensions": "requiredExtensions",
|
||||
"requiredfeatures": "requiredFeatures",
|
||||
"specularconstant": "specularConstant",
|
||||
"specularexponent": "specularExponent",
|
||||
"spreadmethod": "spreadMethod",
|
||||
"startoffset": "startOffset",
|
||||
"stddeviation": "stdDeviation",
|
||||
"stitchtiles": "stitchTiles",
|
||||
"surfacescale": "surfaceScale",
|
||||
"systemlanguage": "systemLanguage",
|
||||
"tablevalues": "tableValues",
|
||||
"targetx": "targetX",
|
||||
"targety": "targetY",
|
||||
"textlength": "textLength",
|
||||
"viewbox": "viewBox",
|
||||
"viewtarget": "viewTarget",
|
||||
"xchannelselector": "xChannelSelector",
|
||||
"ychannelselector": "yChannelSelector",
|
||||
"zoomandpan": "zoomAndPan"
|
||||
}
|
||||
|
||||
adjustMathMLAttributes = {"definitionurl": "definitionURL"}
|
||||
|
||||
adjustForeignAttributes = {
|
||||
"xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
|
||||
@@ -451,21 +522,21 @@ adjustForeignAttributes = {
|
||||
unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
|
||||
adjustForeignAttributes.items()])
|
||||
|
||||
spaceCharacters = frozenset((
|
||||
spaceCharacters = frozenset([
|
||||
"\t",
|
||||
"\n",
|
||||
"\u000C",
|
||||
" ",
|
||||
"\r"
|
||||
))
|
||||
])
|
||||
|
||||
tableInsertModeElements = frozenset((
|
||||
tableInsertModeElements = frozenset([
|
||||
"table",
|
||||
"tbody",
|
||||
"tfoot",
|
||||
"thead",
|
||||
"tr"
|
||||
))
|
||||
])
|
||||
|
||||
asciiLowercase = frozenset(string.ascii_lowercase)
|
||||
asciiUppercase = frozenset(string.ascii_uppercase)
|
||||
@@ -486,7 +557,7 @@ headingElements = (
|
||||
"h6"
|
||||
)
|
||||
|
||||
voidElements = frozenset((
|
||||
voidElements = frozenset([
|
||||
"base",
|
||||
"command",
|
||||
"event-source",
|
||||
@@ -502,11 +573,11 @@ voidElements = frozenset((
|
||||
"input",
|
||||
"source",
|
||||
"track"
|
||||
))
|
||||
])
|
||||
|
||||
cdataElements = frozenset(('title', 'textarea'))
|
||||
cdataElements = frozenset(['title', 'textarea'])
|
||||
|
||||
rcdataElements = frozenset((
|
||||
rcdataElements = frozenset([
|
||||
'style',
|
||||
'script',
|
||||
'xmp',
|
||||
@@ -514,27 +585,28 @@ rcdataElements = frozenset((
|
||||
'noembed',
|
||||
'noframes',
|
||||
'noscript'
|
||||
))
|
||||
])
|
||||
|
||||
booleanAttributes = {
|
||||
"": frozenset(("irrelevant",)),
|
||||
"style": frozenset(("scoped",)),
|
||||
"img": frozenset(("ismap",)),
|
||||
"audio": frozenset(("autoplay", "controls")),
|
||||
"video": frozenset(("autoplay", "controls")),
|
||||
"script": frozenset(("defer", "async")),
|
||||
"details": frozenset(("open",)),
|
||||
"datagrid": frozenset(("multiple", "disabled")),
|
||||
"command": frozenset(("hidden", "disabled", "checked", "default")),
|
||||
"hr": frozenset(("noshade")),
|
||||
"menu": frozenset(("autosubmit",)),
|
||||
"fieldset": frozenset(("disabled", "readonly")),
|
||||
"option": frozenset(("disabled", "readonly", "selected")),
|
||||
"optgroup": frozenset(("disabled", "readonly")),
|
||||
"button": frozenset(("disabled", "autofocus")),
|
||||
"input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")),
|
||||
"select": frozenset(("disabled", "readonly", "autofocus", "multiple")),
|
||||
"output": frozenset(("disabled", "readonly")),
|
||||
"": frozenset(["irrelevant", "itemscope"]),
|
||||
"style": frozenset(["scoped"]),
|
||||
"img": frozenset(["ismap"]),
|
||||
"audio": frozenset(["autoplay", "controls"]),
|
||||
"video": frozenset(["autoplay", "controls"]),
|
||||
"script": frozenset(["defer", "async"]),
|
||||
"details": frozenset(["open"]),
|
||||
"datagrid": frozenset(["multiple", "disabled"]),
|
||||
"command": frozenset(["hidden", "disabled", "checked", "default"]),
|
||||
"hr": frozenset(["noshade"]),
|
||||
"menu": frozenset(["autosubmit"]),
|
||||
"fieldset": frozenset(["disabled", "readonly"]),
|
||||
"option": frozenset(["disabled", "readonly", "selected"]),
|
||||
"optgroup": frozenset(["disabled", "readonly"]),
|
||||
"button": frozenset(["disabled", "autofocus"]),
|
||||
"input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
|
||||
"select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
|
||||
"output": frozenset(["disabled", "readonly"]),
|
||||
"iframe": frozenset(["seamless"]),
|
||||
}
|
||||
|
||||
# entitiesWindows1252 has to be _ordered_ and needs to have an index. It
|
||||
@@ -574,7 +646,7 @@ entitiesWindows1252 = (
|
||||
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
)
|
||||
|
||||
xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
|
||||
xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])
|
||||
|
||||
entities = {
|
||||
"AElig": "\xc6",
|
||||
@@ -2815,7 +2887,6 @@ replacementCharacters = {
|
||||
0x0d: "\u000D",
|
||||
0x80: "\u20AC",
|
||||
0x81: "\u0081",
|
||||
0x81: "\u0081",
|
||||
0x82: "\u201A",
|
||||
0x83: "\u0192",
|
||||
0x84: "\u201E",
|
||||
@@ -2848,235 +2919,6 @@ replacementCharacters = {
|
||||
0x9F: "\u0178",
|
||||
}
|
||||
|
||||
encodings = {
|
||||
'437': 'cp437',
|
||||
'850': 'cp850',
|
||||
'852': 'cp852',
|
||||
'855': 'cp855',
|
||||
'857': 'cp857',
|
||||
'860': 'cp860',
|
||||
'861': 'cp861',
|
||||
'862': 'cp862',
|
||||
'863': 'cp863',
|
||||
'865': 'cp865',
|
||||
'866': 'cp866',
|
||||
'869': 'cp869',
|
||||
'ansix341968': 'ascii',
|
||||
'ansix341986': 'ascii',
|
||||
'arabic': 'iso8859-6',
|
||||
'ascii': 'ascii',
|
||||
'asmo708': 'iso8859-6',
|
||||
'big5': 'big5',
|
||||
'big5hkscs': 'big5hkscs',
|
||||
'chinese': 'gbk',
|
||||
'cp037': 'cp037',
|
||||
'cp1026': 'cp1026',
|
||||
'cp154': 'ptcp154',
|
||||
'cp367': 'ascii',
|
||||
'cp424': 'cp424',
|
||||
'cp437': 'cp437',
|
||||
'cp500': 'cp500',
|
||||
'cp775': 'cp775',
|
||||
'cp819': 'windows-1252',
|
||||
'cp850': 'cp850',
|
||||
'cp852': 'cp852',
|
||||
'cp855': 'cp855',
|
||||
'cp857': 'cp857',
|
||||
'cp860': 'cp860',
|
||||
'cp861': 'cp861',
|
||||
'cp862': 'cp862',
|
||||
'cp863': 'cp863',
|
||||
'cp864': 'cp864',
|
||||
'cp865': 'cp865',
|
||||
'cp866': 'cp866',
|
||||
'cp869': 'cp869',
|
||||
'cp936': 'gbk',
|
||||
'cpgr': 'cp869',
|
||||
'cpis': 'cp861',
|
||||
'csascii': 'ascii',
|
||||
'csbig5': 'big5',
|
||||
'cseuckr': 'cp949',
|
||||
'cseucpkdfmtjapanese': 'euc_jp',
|
||||
'csgb2312': 'gbk',
|
||||
'cshproman8': 'hp-roman8',
|
||||
'csibm037': 'cp037',
|
||||
'csibm1026': 'cp1026',
|
||||
'csibm424': 'cp424',
|
||||
'csibm500': 'cp500',
|
||||
'csibm855': 'cp855',
|
||||
'csibm857': 'cp857',
|
||||
'csibm860': 'cp860',
|
||||
'csibm861': 'cp861',
|
||||
'csibm863': 'cp863',
|
||||
'csibm864': 'cp864',
|
||||
'csibm865': 'cp865',
|
||||
'csibm866': 'cp866',
|
||||
'csibm869': 'cp869',
|
||||
'csiso2022jp': 'iso2022_jp',
|
||||
'csiso2022jp2': 'iso2022_jp_2',
|
||||
'csiso2022kr': 'iso2022_kr',
|
||||
'csiso58gb231280': 'gbk',
|
||||
'csisolatin1': 'windows-1252',
|
||||
'csisolatin2': 'iso8859-2',
|
||||
'csisolatin3': 'iso8859-3',
|
||||
'csisolatin4': 'iso8859-4',
|
||||
'csisolatin5': 'windows-1254',
|
||||
'csisolatin6': 'iso8859-10',
|
||||
'csisolatinarabic': 'iso8859-6',
|
||||
'csisolatincyrillic': 'iso8859-5',
|
||||
'csisolatingreek': 'iso8859-7',
|
||||
'csisolatinhebrew': 'iso8859-8',
|
||||
'cskoi8r': 'koi8-r',
|
||||
'csksc56011987': 'cp949',
|
||||
'cspc775baltic': 'cp775',
|
||||
'cspc850multilingual': 'cp850',
|
||||
'cspc862latinhebrew': 'cp862',
|
||||
'cspc8codepage437': 'cp437',
|
||||
'cspcp852': 'cp852',
|
||||
'csptcp154': 'ptcp154',
|
||||
'csshiftjis': 'shift_jis',
|
||||
'csunicode11utf7': 'utf-7',
|
||||
'cyrillic': 'iso8859-5',
|
||||
'cyrillicasian': 'ptcp154',
|
||||
'ebcdiccpbe': 'cp500',
|
||||
'ebcdiccpca': 'cp037',
|
||||
'ebcdiccpch': 'cp500',
|
||||
'ebcdiccphe': 'cp424',
|
||||
'ebcdiccpnl': 'cp037',
|
||||
'ebcdiccpus': 'cp037',
|
||||
'ebcdiccpwt': 'cp037',
|
||||
'ecma114': 'iso8859-6',
|
||||
'ecma118': 'iso8859-7',
|
||||
'elot928': 'iso8859-7',
|
||||
'eucjp': 'euc_jp',
|
||||
'euckr': 'cp949',
|
||||
'extendedunixcodepackedformatforjapanese': 'euc_jp',
|
||||
'gb18030': 'gb18030',
|
||||
'gb2312': 'gbk',
|
||||
'gb231280': 'gbk',
|
||||
'gbk': 'gbk',
|
||||
'greek': 'iso8859-7',
|
||||
'greek8': 'iso8859-7',
|
||||
'hebrew': 'iso8859-8',
|
||||
'hproman8': 'hp-roman8',
|
||||
'hzgb2312': 'hz',
|
||||
'ibm037': 'cp037',
|
||||
'ibm1026': 'cp1026',
|
||||
'ibm367': 'ascii',
|
||||
'ibm424': 'cp424',
|
||||
'ibm437': 'cp437',
|
||||
'ibm500': 'cp500',
|
||||
'ibm775': 'cp775',
|
||||
'ibm819': 'windows-1252',
|
||||
'ibm850': 'cp850',
|
||||
'ibm852': 'cp852',
|
||||
'ibm855': 'cp855',
|
||||
'ibm857': 'cp857',
|
||||
'ibm860': 'cp860',
|
||||
'ibm861': 'cp861',
|
||||
'ibm862': 'cp862',
|
||||
'ibm863': 'cp863',
|
||||
'ibm864': 'cp864',
|
||||
'ibm865': 'cp865',
|
||||
'ibm866': 'cp866',
|
||||
'ibm869': 'cp869',
|
||||
'iso2022jp': 'iso2022_jp',
|
||||
'iso2022jp2': 'iso2022_jp_2',
|
||||
'iso2022kr': 'iso2022_kr',
|
||||
'iso646irv1991': 'ascii',
|
||||
'iso646us': 'ascii',
|
||||
'iso88591': 'windows-1252',
|
||||
'iso885910': 'iso8859-10',
|
||||
'iso8859101992': 'iso8859-10',
|
||||
'iso885911987': 'windows-1252',
|
||||
'iso885913': 'iso8859-13',
|
||||
'iso885914': 'iso8859-14',
|
||||
'iso8859141998': 'iso8859-14',
|
||||
'iso885915': 'iso8859-15',
|
||||
'iso885916': 'iso8859-16',
|
||||
'iso8859162001': 'iso8859-16',
|
||||
'iso88592': 'iso8859-2',
|
||||
'iso885921987': 'iso8859-2',
|
||||
'iso88593': 'iso8859-3',
|
||||
'iso885931988': 'iso8859-3',
|
||||
'iso88594': 'iso8859-4',
|
||||
'iso885941988': 'iso8859-4',
|
||||
'iso88595': 'iso8859-5',
|
||||
'iso885951988': 'iso8859-5',
|
||||
'iso88596': 'iso8859-6',
|
||||
'iso885961987': 'iso8859-6',
|
||||
'iso88597': 'iso8859-7',
|
||||
'iso885971987': 'iso8859-7',
|
||||
'iso88598': 'iso8859-8',
|
||||
'iso885981988': 'iso8859-8',
|
||||
'iso88599': 'windows-1254',
|
||||
'iso885991989': 'windows-1254',
|
||||
'isoceltic': 'iso8859-14',
|
||||
'isoir100': 'windows-1252',
|
||||
'isoir101': 'iso8859-2',
|
||||
'isoir109': 'iso8859-3',
|
||||
'isoir110': 'iso8859-4',
|
||||
'isoir126': 'iso8859-7',
|
||||
'isoir127': 'iso8859-6',
|
||||
'isoir138': 'iso8859-8',
|
||||
'isoir144': 'iso8859-5',
|
||||
'isoir148': 'windows-1254',
|
||||
'isoir149': 'cp949',
|
||||
'isoir157': 'iso8859-10',
|
||||
'isoir199': 'iso8859-14',
|
||||
'isoir226': 'iso8859-16',
|
||||
'isoir58': 'gbk',
|
||||
'isoir6': 'ascii',
|
||||
'koi8r': 'koi8-r',
|
||||
'koi8u': 'koi8-u',
|
||||
'korean': 'cp949',
|
||||
'ksc5601': 'cp949',
|
||||
'ksc56011987': 'cp949',
|
||||
'ksc56011989': 'cp949',
|
||||
'l1': 'windows-1252',
|
||||
'l10': 'iso8859-16',
|
||||
'l2': 'iso8859-2',
|
||||
'l3': 'iso8859-3',
|
||||
'l4': 'iso8859-4',
|
||||
'l5': 'windows-1254',
|
||||
'l6': 'iso8859-10',
|
||||
'l8': 'iso8859-14',
|
||||
'latin1': 'windows-1252',
|
||||
'latin10': 'iso8859-16',
|
||||
'latin2': 'iso8859-2',
|
||||
'latin3': 'iso8859-3',
|
||||
'latin4': 'iso8859-4',
|
||||
'latin5': 'windows-1254',
|
||||
'latin6': 'iso8859-10',
|
||||
'latin8': 'iso8859-14',
|
||||
'latin9': 'iso8859-15',
|
||||
'ms936': 'gbk',
|
||||
'mskanji': 'shift_jis',
|
||||
'pt154': 'ptcp154',
|
||||
'ptcp154': 'ptcp154',
|
||||
'r8': 'hp-roman8',
|
||||
'roman8': 'hp-roman8',
|
||||
'shiftjis': 'shift_jis',
|
||||
'tis620': 'cp874',
|
||||
'unicode11utf7': 'utf-7',
|
||||
'us': 'ascii',
|
||||
'usascii': 'ascii',
|
||||
'utf16': 'utf-16',
|
||||
'utf16be': 'utf-16-be',
|
||||
'utf16le': 'utf-16-le',
|
||||
'utf8': 'utf-8',
|
||||
'windows1250': 'cp1250',
|
||||
'windows1251': 'cp1251',
|
||||
'windows1252': 'cp1252',
|
||||
'windows1253': 'cp1253',
|
||||
'windows1254': 'cp1254',
|
||||
'windows1255': 'cp1255',
|
||||
'windows1256': 'cp1256',
|
||||
'windows1257': 'cp1257',
|
||||
'windows1258': 'cp1258',
|
||||
'windows936': 'gbk',
|
||||
'x-x-big5': 'big5'}
|
||||
|
||||
tokenTypes = {
|
||||
"Doctype": 0,
|
||||
"Characters": 1,
|
||||
@@ -3088,8 +2930,8 @@ tokenTypes = {
|
||||
"ParseError": 7
|
||||
}
|
||||
|
||||
tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
|
||||
tokenTypes["EmptyTag"]))
|
||||
tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
|
||||
tokenTypes["EmptyTag"]])
|
||||
|
||||
|
||||
prefixes = dict([(v, k) for k, v in namespaces.items()])
|
||||
@@ -3097,8 +2939,9 @@ prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
|
||||
|
||||
|
||||
class DataLossWarning(UserWarning):
|
||||
"""Raised when the current tree is unable to represent the input data"""
|
||||
pass
|
||||
|
||||
|
||||
class ReparseException(Exception):
|
||||
class _ReparseException(Exception):
|
||||
pass
|
||||
|
||||
@@ -1,20 +1,29 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
from ordereddict import OrderedDict
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
def _attr_key(attr):
|
||||
"""Return an appropriate key for an attribute for sorting
|
||||
|
||||
Attributes have a namespace that can be either ``None`` or a string. We
|
||||
can't compare the two because they're different types, so we convert
|
||||
``None`` to an empty string first.
|
||||
|
||||
"""
|
||||
return (attr[0][0] or ''), attr[0][1]
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Alphabetizes attributes for elements"""
|
||||
def __iter__(self):
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
if token["type"] in ("StartTag", "EmptyTag"):
|
||||
attrs = OrderedDict()
|
||||
for name, value in sorted(token["data"].items(),
|
||||
key=lambda x: x[0]):
|
||||
key=_attr_key):
|
||||
attrs[name] = value
|
||||
token["data"] = attrs
|
||||
yield token
|
||||
|
||||
@@ -1,11 +1,19 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
class Filter(base.Filter):
|
||||
"""Injects ``<meta charset=ENCODING>`` tag into head of document"""
|
||||
def __init__(self, source, encoding):
|
||||
_base.Filter.__init__(self, source)
|
||||
"""Creates a Filter
|
||||
|
||||
:arg source: the source token stream
|
||||
|
||||
:arg encoding: the encoding to set
|
||||
|
||||
"""
|
||||
base.Filter.__init__(self, source)
|
||||
self.encoding = encoding
|
||||
|
||||
def __iter__(self):
|
||||
@@ -13,7 +21,7 @@ class Filter(_base.Filter):
|
||||
meta_found = (self.encoding is None)
|
||||
pending = []
|
||||
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type == "StartTag":
|
||||
if token["name"].lower() == "head":
|
||||
|
||||
@@ -1,93 +1,93 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from gettext import gettext
|
||||
_ = gettext
|
||||
from six import text_type
|
||||
|
||||
from . import _base
|
||||
from ..constants import cdataElements, rcdataElements, voidElements
|
||||
from . import base
|
||||
from ..constants import namespaces, voidElements
|
||||
|
||||
from ..constants import spaceCharacters
|
||||
spaceCharacters = "".join(spaceCharacters)
|
||||
|
||||
|
||||
class LintError(Exception):
|
||||
pass
|
||||
class Filter(base.Filter):
|
||||
"""Lints the token stream for errors
|
||||
|
||||
If it finds any errors, it'll raise an ``AssertionError``.
|
||||
|
||||
"""
|
||||
def __init__(self, source, require_matching_tags=True):
|
||||
"""Creates a Filter
|
||||
|
||||
:arg source: the source token stream
|
||||
|
||||
:arg require_matching_tags: whether or not to require matching tags
|
||||
|
||||
"""
|
||||
super(Filter, self).__init__(source)
|
||||
self.require_matching_tags = require_matching_tags
|
||||
|
||||
class Filter(_base.Filter):
|
||||
def __iter__(self):
|
||||
open_elements = []
|
||||
contentModelFlag = "PCDATA"
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
namespace = token["namespace"]
|
||||
name = token["name"]
|
||||
if contentModelFlag != "PCDATA":
|
||||
raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||
if not name:
|
||||
raise LintError(_("Empty tag name"))
|
||||
if type == "StartTag" and name in voidElements:
|
||||
raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
|
||||
elif type == "EmptyTag" and name not in voidElements:
|
||||
raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
|
||||
if type == "StartTag":
|
||||
open_elements.append(name)
|
||||
for name, value in token["data"]:
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
|
||||
if not name:
|
||||
raise LintError(_("Empty attribute name"))
|
||||
if not isinstance(value, str):
|
||||
raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
|
||||
if name in cdataElements:
|
||||
contentModelFlag = "CDATA"
|
||||
elif name in rcdataElements:
|
||||
contentModelFlag = "RCDATA"
|
||||
elif name == "plaintext":
|
||||
contentModelFlag = "PLAINTEXT"
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
assert isinstance(token["data"], dict)
|
||||
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
|
||||
assert type == "EmptyTag"
|
||||
else:
|
||||
assert type == "StartTag"
|
||||
if type == "StartTag" and self.require_matching_tags:
|
||||
open_elements.append((namespace, name))
|
||||
for (namespace, name), value in token["data"].items():
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
assert isinstance(value, text_type)
|
||||
|
||||
elif type == "EndTag":
|
||||
namespace = token["namespace"]
|
||||
name = token["name"]
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||
if not name:
|
||||
raise LintError(_("Empty tag name"))
|
||||
if name in voidElements:
|
||||
raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
|
||||
start_name = open_elements.pop()
|
||||
if start_name != name:
|
||||
raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
|
||||
contentModelFlag = "PCDATA"
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
|
||||
assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
|
||||
elif self.require_matching_tags:
|
||||
start = open_elements.pop()
|
||||
assert start == (namespace, name)
|
||||
|
||||
elif type == "Comment":
|
||||
if contentModelFlag != "PCDATA":
|
||||
raise LintError(_("Comment not in PCDATA content model flag"))
|
||||
data = token["data"]
|
||||
assert isinstance(data, text_type)
|
||||
|
||||
elif type in ("Characters", "SpaceCharacters"):
|
||||
data = token["data"]
|
||||
if not isinstance(data, str):
|
||||
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
|
||||
if not data:
|
||||
raise LintError(_("%(type)s token with empty data") % {"type": type})
|
||||
assert isinstance(data, text_type)
|
||||
assert data != ""
|
||||
if type == "SpaceCharacters":
|
||||
data = data.strip(spaceCharacters)
|
||||
if data:
|
||||
raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
|
||||
assert data.strip(spaceCharacters) == ""
|
||||
|
||||
elif type == "Doctype":
|
||||
name = token["name"]
|
||||
if contentModelFlag != "PCDATA":
|
||||
raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
|
||||
if not isinstance(name, str):
|
||||
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||
# XXX: what to do with token["data"] ?
|
||||
assert name is None or isinstance(name, text_type)
|
||||
assert token["publicId"] is None or isinstance(name, text_type)
|
||||
assert token["systemId"] is None or isinstance(name, text_type)
|
||||
|
||||
elif type in ("ParseError", "SerializeError"):
|
||||
pass
|
||||
elif type == "Entity":
|
||||
assert isinstance(token["name"], text_type)
|
||||
|
||||
elif type == "SerializerError":
|
||||
assert isinstance(token["data"], text_type)
|
||||
|
||||
else:
|
||||
raise LintError(_("Unknown token type: %(type)s") % {"type": type})
|
||||
assert False, "Unknown token type: %(type)s" % {"type": type}
|
||||
|
||||
yield token
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
class Filter(base.Filter):
|
||||
"""Removes optional tags from the token stream"""
|
||||
def slider(self):
|
||||
previous1 = previous2 = None
|
||||
for token in self.source:
|
||||
@@ -11,7 +12,8 @@ class Filter(_base.Filter):
|
||||
yield previous2, previous1, token
|
||||
previous2 = previous1
|
||||
previous1 = token
|
||||
yield previous2, previous1, None
|
||||
if previous1 is not None:
|
||||
yield previous2, previous1, None
|
||||
|
||||
def __iter__(self):
|
||||
for previous, token, next in self.slider():
|
||||
@@ -58,7 +60,7 @@ class Filter(_base.Filter):
|
||||
elif tagname == 'colgroup':
|
||||
# A colgroup element's start tag may be omitted if the first thing
|
||||
# inside the colgroup element is a col element, and if the element
|
||||
# is not immediately preceeded by another colgroup element whose
|
||||
# is not immediately preceded by another colgroup element whose
|
||||
# end tag has been omitted.
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
# XXX: we do not look at the preceding event, so instead we never
|
||||
@@ -70,7 +72,7 @@ class Filter(_base.Filter):
|
||||
elif tagname == 'tbody':
|
||||
# A tbody element's start tag may be omitted if the first thing
|
||||
# inside the tbody element is a tr element, and if the element is
|
||||
# not immediately preceeded by a tbody, thead, or tfoot element
|
||||
# not immediately preceded by a tbody, thead, or tfoot element
|
||||
# whose end tag has been omitted.
|
||||
if type == "StartTag":
|
||||
# omit the thead and tfoot elements' end tag when they are
|
||||
|
||||
@@ -1,12 +1,896 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import _base
|
||||
from ..sanitizer import HTMLSanitizerMixin
|
||||
import re
|
||||
from xml.sax.saxutils import escape, unescape
|
||||
|
||||
from six.moves import urllib_parse as urlparse
|
||||
|
||||
from . import base
|
||||
from ..constants import namespaces, prefixes
|
||||
|
||||
__all__ = ["Filter"]
|
||||
|
||||
|
||||
class Filter(_base.Filter, HTMLSanitizerMixin):
|
||||
allowed_elements = frozenset((
|
||||
(namespaces['html'], 'a'),
|
||||
(namespaces['html'], 'abbr'),
|
||||
(namespaces['html'], 'acronym'),
|
||||
(namespaces['html'], 'address'),
|
||||
(namespaces['html'], 'area'),
|
||||
(namespaces['html'], 'article'),
|
||||
(namespaces['html'], 'aside'),
|
||||
(namespaces['html'], 'audio'),
|
||||
(namespaces['html'], 'b'),
|
||||
(namespaces['html'], 'big'),
|
||||
(namespaces['html'], 'blockquote'),
|
||||
(namespaces['html'], 'br'),
|
||||
(namespaces['html'], 'button'),
|
||||
(namespaces['html'], 'canvas'),
|
||||
(namespaces['html'], 'caption'),
|
||||
(namespaces['html'], 'center'),
|
||||
(namespaces['html'], 'cite'),
|
||||
(namespaces['html'], 'code'),
|
||||
(namespaces['html'], 'col'),
|
||||
(namespaces['html'], 'colgroup'),
|
||||
(namespaces['html'], 'command'),
|
||||
(namespaces['html'], 'datagrid'),
|
||||
(namespaces['html'], 'datalist'),
|
||||
(namespaces['html'], 'dd'),
|
||||
(namespaces['html'], 'del'),
|
||||
(namespaces['html'], 'details'),
|
||||
(namespaces['html'], 'dfn'),
|
||||
(namespaces['html'], 'dialog'),
|
||||
(namespaces['html'], 'dir'),
|
||||
(namespaces['html'], 'div'),
|
||||
(namespaces['html'], 'dl'),
|
||||
(namespaces['html'], 'dt'),
|
||||
(namespaces['html'], 'em'),
|
||||
(namespaces['html'], 'event-source'),
|
||||
(namespaces['html'], 'fieldset'),
|
||||
(namespaces['html'], 'figcaption'),
|
||||
(namespaces['html'], 'figure'),
|
||||
(namespaces['html'], 'footer'),
|
||||
(namespaces['html'], 'font'),
|
||||
(namespaces['html'], 'form'),
|
||||
(namespaces['html'], 'header'),
|
||||
(namespaces['html'], 'h1'),
|
||||
(namespaces['html'], 'h2'),
|
||||
(namespaces['html'], 'h3'),
|
||||
(namespaces['html'], 'h4'),
|
||||
(namespaces['html'], 'h5'),
|
||||
(namespaces['html'], 'h6'),
|
||||
(namespaces['html'], 'hr'),
|
||||
(namespaces['html'], 'i'),
|
||||
(namespaces['html'], 'img'),
|
||||
(namespaces['html'], 'input'),
|
||||
(namespaces['html'], 'ins'),
|
||||
(namespaces['html'], 'keygen'),
|
||||
(namespaces['html'], 'kbd'),
|
||||
(namespaces['html'], 'label'),
|
||||
(namespaces['html'], 'legend'),
|
||||
(namespaces['html'], 'li'),
|
||||
(namespaces['html'], 'm'),
|
||||
(namespaces['html'], 'map'),
|
||||
(namespaces['html'], 'menu'),
|
||||
(namespaces['html'], 'meter'),
|
||||
(namespaces['html'], 'multicol'),
|
||||
(namespaces['html'], 'nav'),
|
||||
(namespaces['html'], 'nextid'),
|
||||
(namespaces['html'], 'ol'),
|
||||
(namespaces['html'], 'output'),
|
||||
(namespaces['html'], 'optgroup'),
|
||||
(namespaces['html'], 'option'),
|
||||
(namespaces['html'], 'p'),
|
||||
(namespaces['html'], 'pre'),
|
||||
(namespaces['html'], 'progress'),
|
||||
(namespaces['html'], 'q'),
|
||||
(namespaces['html'], 's'),
|
||||
(namespaces['html'], 'samp'),
|
||||
(namespaces['html'], 'section'),
|
||||
(namespaces['html'], 'select'),
|
||||
(namespaces['html'], 'small'),
|
||||
(namespaces['html'], 'sound'),
|
||||
(namespaces['html'], 'source'),
|
||||
(namespaces['html'], 'spacer'),
|
||||
(namespaces['html'], 'span'),
|
||||
(namespaces['html'], 'strike'),
|
||||
(namespaces['html'], 'strong'),
|
||||
(namespaces['html'], 'sub'),
|
||||
(namespaces['html'], 'sup'),
|
||||
(namespaces['html'], 'table'),
|
||||
(namespaces['html'], 'tbody'),
|
||||
(namespaces['html'], 'td'),
|
||||
(namespaces['html'], 'textarea'),
|
||||
(namespaces['html'], 'time'),
|
||||
(namespaces['html'], 'tfoot'),
|
||||
(namespaces['html'], 'th'),
|
||||
(namespaces['html'], 'thead'),
|
||||
(namespaces['html'], 'tr'),
|
||||
(namespaces['html'], 'tt'),
|
||||
(namespaces['html'], 'u'),
|
||||
(namespaces['html'], 'ul'),
|
||||
(namespaces['html'], 'var'),
|
||||
(namespaces['html'], 'video'),
|
||||
(namespaces['mathml'], 'maction'),
|
||||
(namespaces['mathml'], 'math'),
|
||||
(namespaces['mathml'], 'merror'),
|
||||
(namespaces['mathml'], 'mfrac'),
|
||||
(namespaces['mathml'], 'mi'),
|
||||
(namespaces['mathml'], 'mmultiscripts'),
|
||||
(namespaces['mathml'], 'mn'),
|
||||
(namespaces['mathml'], 'mo'),
|
||||
(namespaces['mathml'], 'mover'),
|
||||
(namespaces['mathml'], 'mpadded'),
|
||||
(namespaces['mathml'], 'mphantom'),
|
||||
(namespaces['mathml'], 'mprescripts'),
|
||||
(namespaces['mathml'], 'mroot'),
|
||||
(namespaces['mathml'], 'mrow'),
|
||||
(namespaces['mathml'], 'mspace'),
|
||||
(namespaces['mathml'], 'msqrt'),
|
||||
(namespaces['mathml'], 'mstyle'),
|
||||
(namespaces['mathml'], 'msub'),
|
||||
(namespaces['mathml'], 'msubsup'),
|
||||
(namespaces['mathml'], 'msup'),
|
||||
(namespaces['mathml'], 'mtable'),
|
||||
(namespaces['mathml'], 'mtd'),
|
||||
(namespaces['mathml'], 'mtext'),
|
||||
(namespaces['mathml'], 'mtr'),
|
||||
(namespaces['mathml'], 'munder'),
|
||||
(namespaces['mathml'], 'munderover'),
|
||||
(namespaces['mathml'], 'none'),
|
||||
(namespaces['svg'], 'a'),
|
||||
(namespaces['svg'], 'animate'),
|
||||
(namespaces['svg'], 'animateColor'),
|
||||
(namespaces['svg'], 'animateMotion'),
|
||||
(namespaces['svg'], 'animateTransform'),
|
||||
(namespaces['svg'], 'clipPath'),
|
||||
(namespaces['svg'], 'circle'),
|
||||
(namespaces['svg'], 'defs'),
|
||||
(namespaces['svg'], 'desc'),
|
||||
(namespaces['svg'], 'ellipse'),
|
||||
(namespaces['svg'], 'font-face'),
|
||||
(namespaces['svg'], 'font-face-name'),
|
||||
(namespaces['svg'], 'font-face-src'),
|
||||
(namespaces['svg'], 'g'),
|
||||
(namespaces['svg'], 'glyph'),
|
||||
(namespaces['svg'], 'hkern'),
|
||||
(namespaces['svg'], 'linearGradient'),
|
||||
(namespaces['svg'], 'line'),
|
||||
(namespaces['svg'], 'marker'),
|
||||
(namespaces['svg'], 'metadata'),
|
||||
(namespaces['svg'], 'missing-glyph'),
|
||||
(namespaces['svg'], 'mpath'),
|
||||
(namespaces['svg'], 'path'),
|
||||
(namespaces['svg'], 'polygon'),
|
||||
(namespaces['svg'], 'polyline'),
|
||||
(namespaces['svg'], 'radialGradient'),
|
||||
(namespaces['svg'], 'rect'),
|
||||
(namespaces['svg'], 'set'),
|
||||
(namespaces['svg'], 'stop'),
|
||||
(namespaces['svg'], 'svg'),
|
||||
(namespaces['svg'], 'switch'),
|
||||
(namespaces['svg'], 'text'),
|
||||
(namespaces['svg'], 'title'),
|
||||
(namespaces['svg'], 'tspan'),
|
||||
(namespaces['svg'], 'use'),
|
||||
))
|
||||
|
||||
allowed_attributes = frozenset((
|
||||
# HTML attributes
|
||||
(None, 'abbr'),
|
||||
(None, 'accept'),
|
||||
(None, 'accept-charset'),
|
||||
(None, 'accesskey'),
|
||||
(None, 'action'),
|
||||
(None, 'align'),
|
||||
(None, 'alt'),
|
||||
(None, 'autocomplete'),
|
||||
(None, 'autofocus'),
|
||||
(None, 'axis'),
|
||||
(None, 'background'),
|
||||
(None, 'balance'),
|
||||
(None, 'bgcolor'),
|
||||
(None, 'bgproperties'),
|
||||
(None, 'border'),
|
||||
(None, 'bordercolor'),
|
||||
(None, 'bordercolordark'),
|
||||
(None, 'bordercolorlight'),
|
||||
(None, 'bottompadding'),
|
||||
(None, 'cellpadding'),
|
||||
(None, 'cellspacing'),
|
||||
(None, 'ch'),
|
||||
(None, 'challenge'),
|
||||
(None, 'char'),
|
||||
(None, 'charoff'),
|
||||
(None, 'choff'),
|
||||
(None, 'charset'),
|
||||
(None, 'checked'),
|
||||
(None, 'cite'),
|
||||
(None, 'class'),
|
||||
(None, 'clear'),
|
||||
(None, 'color'),
|
||||
(None, 'cols'),
|
||||
(None, 'colspan'),
|
||||
(None, 'compact'),
|
||||
(None, 'contenteditable'),
|
||||
(None, 'controls'),
|
||||
(None, 'coords'),
|
||||
(None, 'data'),
|
||||
(None, 'datafld'),
|
||||
(None, 'datapagesize'),
|
||||
(None, 'datasrc'),
|
||||
(None, 'datetime'),
|
||||
(None, 'default'),
|
||||
(None, 'delay'),
|
||||
(None, 'dir'),
|
||||
(None, 'disabled'),
|
||||
(None, 'draggable'),
|
||||
(None, 'dynsrc'),
|
||||
(None, 'enctype'),
|
||||
(None, 'end'),
|
||||
(None, 'face'),
|
||||
(None, 'for'),
|
||||
(None, 'form'),
|
||||
(None, 'frame'),
|
||||
(None, 'galleryimg'),
|
||||
(None, 'gutter'),
|
||||
(None, 'headers'),
|
||||
(None, 'height'),
|
||||
(None, 'hidefocus'),
|
||||
(None, 'hidden'),
|
||||
(None, 'high'),
|
||||
(None, 'href'),
|
||||
(None, 'hreflang'),
|
||||
(None, 'hspace'),
|
||||
(None, 'icon'),
|
||||
(None, 'id'),
|
||||
(None, 'inputmode'),
|
||||
(None, 'ismap'),
|
||||
(None, 'keytype'),
|
||||
(None, 'label'),
|
||||
(None, 'leftspacing'),
|
||||
(None, 'lang'),
|
||||
(None, 'list'),
|
||||
(None, 'longdesc'),
|
||||
(None, 'loop'),
|
||||
(None, 'loopcount'),
|
||||
(None, 'loopend'),
|
||||
(None, 'loopstart'),
|
||||
(None, 'low'),
|
||||
(None, 'lowsrc'),
|
||||
(None, 'max'),
|
||||
(None, 'maxlength'),
|
||||
(None, 'media'),
|
||||
(None, 'method'),
|
||||
(None, 'min'),
|
||||
(None, 'multiple'),
|
||||
(None, 'name'),
|
||||
(None, 'nohref'),
|
||||
(None, 'noshade'),
|
||||
(None, 'nowrap'),
|
||||
(None, 'open'),
|
||||
(None, 'optimum'),
|
||||
(None, 'pattern'),
|
||||
(None, 'ping'),
|
||||
(None, 'point-size'),
|
||||
(None, 'poster'),
|
||||
(None, 'pqg'),
|
||||
(None, 'preload'),
|
||||
(None, 'prompt'),
|
||||
(None, 'radiogroup'),
|
||||
(None, 'readonly'),
|
||||
(None, 'rel'),
|
||||
(None, 'repeat-max'),
|
||||
(None, 'repeat-min'),
|
||||
(None, 'replace'),
|
||||
(None, 'required'),
|
||||
(None, 'rev'),
|
||||
(None, 'rightspacing'),
|
||||
(None, 'rows'),
|
||||
(None, 'rowspan'),
|
||||
(None, 'rules'),
|
||||
(None, 'scope'),
|
||||
(None, 'selected'),
|
||||
(None, 'shape'),
|
||||
(None, 'size'),
|
||||
(None, 'span'),
|
||||
(None, 'src'),
|
||||
(None, 'start'),
|
||||
(None, 'step'),
|
||||
(None, 'style'),
|
||||
(None, 'summary'),
|
||||
(None, 'suppress'),
|
||||
(None, 'tabindex'),
|
||||
(None, 'target'),
|
||||
(None, 'template'),
|
||||
(None, 'title'),
|
||||
(None, 'toppadding'),
|
||||
(None, 'type'),
|
||||
(None, 'unselectable'),
|
||||
(None, 'usemap'),
|
||||
(None, 'urn'),
|
||||
(None, 'valign'),
|
||||
(None, 'value'),
|
||||
(None, 'variable'),
|
||||
(None, 'volume'),
|
||||
(None, 'vspace'),
|
||||
(None, 'vrml'),
|
||||
(None, 'width'),
|
||||
(None, 'wrap'),
|
||||
(namespaces['xml'], 'lang'),
|
||||
# MathML attributes
|
||||
(None, 'actiontype'),
|
||||
(None, 'align'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnlines'),
|
||||
(None, 'columnspacing'),
|
||||
(None, 'columnspan'),
|
||||
(None, 'depth'),
|
||||
(None, 'display'),
|
||||
(None, 'displaystyle'),
|
||||
(None, 'equalcolumns'),
|
||||
(None, 'equalrows'),
|
||||
(None, 'fence'),
|
||||
(None, 'fontstyle'),
|
||||
(None, 'fontweight'),
|
||||
(None, 'frame'),
|
||||
(None, 'height'),
|
||||
(None, 'linethickness'),
|
||||
(None, 'lspace'),
|
||||
(None, 'mathbackground'),
|
||||
(None, 'mathcolor'),
|
||||
(None, 'mathvariant'),
|
||||
(None, 'mathvariant'),
|
||||
(None, 'maxsize'),
|
||||
(None, 'minsize'),
|
||||
(None, 'other'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowlines'),
|
||||
(None, 'rowspacing'),
|
||||
(None, 'rowspan'),
|
||||
(None, 'rspace'),
|
||||
(None, 'scriptlevel'),
|
||||
(None, 'selection'),
|
||||
(None, 'separator'),
|
||||
(None, 'stretchy'),
|
||||
(None, 'width'),
|
||||
(None, 'width'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xlink'], 'show'),
|
||||
(namespaces['xlink'], 'type'),
|
||||
# SVG attributes
|
||||
(None, 'accent-height'),
|
||||
(None, 'accumulate'),
|
||||
(None, 'additive'),
|
||||
(None, 'alphabetic'),
|
||||
(None, 'arabic-form'),
|
||||
(None, 'ascent'),
|
||||
(None, 'attributeName'),
|
||||
(None, 'attributeType'),
|
||||
(None, 'baseProfile'),
|
||||
(None, 'bbox'),
|
||||
(None, 'begin'),
|
||||
(None, 'by'),
|
||||
(None, 'calcMode'),
|
||||
(None, 'cap-height'),
|
||||
(None, 'class'),
|
||||
(None, 'clip-path'),
|
||||
(None, 'color'),
|
||||
(None, 'color-rendering'),
|
||||
(None, 'content'),
|
||||
(None, 'cx'),
|
||||
(None, 'cy'),
|
||||
(None, 'd'),
|
||||
(None, 'dx'),
|
||||
(None, 'dy'),
|
||||
(None, 'descent'),
|
||||
(None, 'display'),
|
||||
(None, 'dur'),
|
||||
(None, 'end'),
|
||||
(None, 'fill'),
|
||||
(None, 'fill-opacity'),
|
||||
(None, 'fill-rule'),
|
||||
(None, 'font-family'),
|
||||
(None, 'font-size'),
|
||||
(None, 'font-stretch'),
|
||||
(None, 'font-style'),
|
||||
(None, 'font-variant'),
|
||||
(None, 'font-weight'),
|
||||
(None, 'from'),
|
||||
(None, 'fx'),
|
||||
(None, 'fy'),
|
||||
(None, 'g1'),
|
||||
(None, 'g2'),
|
||||
(None, 'glyph-name'),
|
||||
(None, 'gradientUnits'),
|
||||
(None, 'hanging'),
|
||||
(None, 'height'),
|
||||
(None, 'horiz-adv-x'),
|
||||
(None, 'horiz-origin-x'),
|
||||
(None, 'id'),
|
||||
(None, 'ideographic'),
|
||||
(None, 'k'),
|
||||
(None, 'keyPoints'),
|
||||
(None, 'keySplines'),
|
||||
(None, 'keyTimes'),
|
||||
(None, 'lang'),
|
||||
(None, 'marker-end'),
|
||||
(None, 'marker-mid'),
|
||||
(None, 'marker-start'),
|
||||
(None, 'markerHeight'),
|
||||
(None, 'markerUnits'),
|
||||
(None, 'markerWidth'),
|
||||
(None, 'mathematical'),
|
||||
(None, 'max'),
|
||||
(None, 'min'),
|
||||
(None, 'name'),
|
||||
(None, 'offset'),
|
||||
(None, 'opacity'),
|
||||
(None, 'orient'),
|
||||
(None, 'origin'),
|
||||
(None, 'overline-position'),
|
||||
(None, 'overline-thickness'),
|
||||
(None, 'panose-1'),
|
||||
(None, 'path'),
|
||||
(None, 'pathLength'),
|
||||
(None, 'points'),
|
||||
(None, 'preserveAspectRatio'),
|
||||
(None, 'r'),
|
||||
(None, 'refX'),
|
||||
(None, 'refY'),
|
||||
(None, 'repeatCount'),
|
||||
(None, 'repeatDur'),
|
||||
(None, 'requiredExtensions'),
|
||||
(None, 'requiredFeatures'),
|
||||
(None, 'restart'),
|
||||
(None, 'rotate'),
|
||||
(None, 'rx'),
|
||||
(None, 'ry'),
|
||||
(None, 'slope'),
|
||||
(None, 'stemh'),
|
||||
(None, 'stemv'),
|
||||
(None, 'stop-color'),
|
||||
(None, 'stop-opacity'),
|
||||
(None, 'strikethrough-position'),
|
||||
(None, 'strikethrough-thickness'),
|
||||
(None, 'stroke'),
|
||||
(None, 'stroke-dasharray'),
|
||||
(None, 'stroke-dashoffset'),
|
||||
(None, 'stroke-linecap'),
|
||||
(None, 'stroke-linejoin'),
|
||||
(None, 'stroke-miterlimit'),
|
||||
(None, 'stroke-opacity'),
|
||||
(None, 'stroke-width'),
|
||||
(None, 'systemLanguage'),
|
||||
(None, 'target'),
|
||||
(None, 'text-anchor'),
|
||||
(None, 'to'),
|
||||
(None, 'transform'),
|
||||
(None, 'type'),
|
||||
(None, 'u1'),
|
||||
(None, 'u2'),
|
||||
(None, 'underline-position'),
|
||||
(None, 'underline-thickness'),
|
||||
(None, 'unicode'),
|
||||
(None, 'unicode-range'),
|
||||
(None, 'units-per-em'),
|
||||
(None, 'values'),
|
||||
(None, 'version'),
|
||||
(None, 'viewBox'),
|
||||
(None, 'visibility'),
|
||||
(None, 'width'),
|
||||
(None, 'widths'),
|
||||
(None, 'x'),
|
||||
(None, 'x-height'),
|
||||
(None, 'x1'),
|
||||
(None, 'x2'),
|
||||
(namespaces['xlink'], 'actuate'),
|
||||
(namespaces['xlink'], 'arcrole'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xlink'], 'role'),
|
||||
(namespaces['xlink'], 'show'),
|
||||
(namespaces['xlink'], 'title'),
|
||||
(namespaces['xlink'], 'type'),
|
||||
(namespaces['xml'], 'base'),
|
||||
(namespaces['xml'], 'lang'),
|
||||
(namespaces['xml'], 'space'),
|
||||
(None, 'y'),
|
||||
(None, 'y1'),
|
||||
(None, 'y2'),
|
||||
(None, 'zoomAndPan'),
|
||||
))
|
||||
|
||||
attr_val_is_uri = frozenset((
|
||||
(None, 'href'),
|
||||
(None, 'src'),
|
||||
(None, 'cite'),
|
||||
(None, 'action'),
|
||||
(None, 'longdesc'),
|
||||
(None, 'poster'),
|
||||
(None, 'background'),
|
||||
(None, 'datasrc'),
|
||||
(None, 'dynsrc'),
|
||||
(None, 'lowsrc'),
|
||||
(None, 'ping'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xml'], 'base'),
|
||||
))
|
||||
|
||||
svg_attr_val_allows_ref = frozenset((
|
||||
(None, 'clip-path'),
|
||||
(None, 'color-profile'),
|
||||
(None, 'cursor'),
|
||||
(None, 'fill'),
|
||||
(None, 'filter'),
|
||||
(None, 'marker'),
|
||||
(None, 'marker-start'),
|
||||
(None, 'marker-mid'),
|
||||
(None, 'marker-end'),
|
||||
(None, 'mask'),
|
||||
(None, 'stroke'),
|
||||
))
|
||||
|
||||
svg_allow_local_href = frozenset((
|
||||
(None, 'altGlyph'),
|
||||
(None, 'animate'),
|
||||
(None, 'animateColor'),
|
||||
(None, 'animateMotion'),
|
||||
(None, 'animateTransform'),
|
||||
(None, 'cursor'),
|
||||
(None, 'feImage'),
|
||||
(None, 'filter'),
|
||||
(None, 'linearGradient'),
|
||||
(None, 'pattern'),
|
||||
(None, 'radialGradient'),
|
||||
(None, 'textpath'),
|
||||
(None, 'tref'),
|
||||
(None, 'set'),
|
||||
(None, 'use')
|
||||
))
|
||||
|
||||
allowed_css_properties = frozenset((
|
||||
'azimuth',
|
||||
'background-color',
|
||||
'border-bottom-color',
|
||||
'border-collapse',
|
||||
'border-color',
|
||||
'border-left-color',
|
||||
'border-right-color',
|
||||
'border-top-color',
|
||||
'clear',
|
||||
'color',
|
||||
'cursor',
|
||||
'direction',
|
||||
'display',
|
||||
'elevation',
|
||||
'float',
|
||||
'font',
|
||||
'font-family',
|
||||
'font-size',
|
||||
'font-style',
|
||||
'font-variant',
|
||||
'font-weight',
|
||||
'height',
|
||||
'letter-spacing',
|
||||
'line-height',
|
||||
'overflow',
|
||||
'pause',
|
||||
'pause-after',
|
||||
'pause-before',
|
||||
'pitch',
|
||||
'pitch-range',
|
||||
'richness',
|
||||
'speak',
|
||||
'speak-header',
|
||||
'speak-numeral',
|
||||
'speak-punctuation',
|
||||
'speech-rate',
|
||||
'stress',
|
||||
'text-align',
|
||||
'text-decoration',
|
||||
'text-indent',
|
||||
'unicode-bidi',
|
||||
'vertical-align',
|
||||
'voice-family',
|
||||
'volume',
|
||||
'white-space',
|
||||
'width',
|
||||
))
|
||||
|
||||
allowed_css_keywords = frozenset((
|
||||
'auto',
|
||||
'aqua',
|
||||
'black',
|
||||
'block',
|
||||
'blue',
|
||||
'bold',
|
||||
'both',
|
||||
'bottom',
|
||||
'brown',
|
||||
'center',
|
||||
'collapse',
|
||||
'dashed',
|
||||
'dotted',
|
||||
'fuchsia',
|
||||
'gray',
|
||||
'green',
|
||||
'!important',
|
||||
'italic',
|
||||
'left',
|
||||
'lime',
|
||||
'maroon',
|
||||
'medium',
|
||||
'none',
|
||||
'navy',
|
||||
'normal',
|
||||
'nowrap',
|
||||
'olive',
|
||||
'pointer',
|
||||
'purple',
|
||||
'red',
|
||||
'right',
|
||||
'solid',
|
||||
'silver',
|
||||
'teal',
|
||||
'top',
|
||||
'transparent',
|
||||
'underline',
|
||||
'white',
|
||||
'yellow',
|
||||
))
|
||||
|
||||
allowed_svg_properties = frozenset((
|
||||
'fill',
|
||||
'fill-opacity',
|
||||
'fill-rule',
|
||||
'stroke',
|
||||
'stroke-width',
|
||||
'stroke-linecap',
|
||||
'stroke-linejoin',
|
||||
'stroke-opacity',
|
||||
))
|
||||
|
||||
allowed_protocols = frozenset((
|
||||
'ed2k',
|
||||
'ftp',
|
||||
'http',
|
||||
'https',
|
||||
'irc',
|
||||
'mailto',
|
||||
'news',
|
||||
'gopher',
|
||||
'nntp',
|
||||
'telnet',
|
||||
'webcal',
|
||||
'xmpp',
|
||||
'callto',
|
||||
'feed',
|
||||
'urn',
|
||||
'aim',
|
||||
'rsync',
|
||||
'tag',
|
||||
'ssh',
|
||||
'sftp',
|
||||
'rtsp',
|
||||
'afs',
|
||||
'data',
|
||||
))
|
||||
|
||||
allowed_content_types = frozenset((
|
||||
'image/png',
|
||||
'image/jpeg',
|
||||
'image/gif',
|
||||
'image/webp',
|
||||
'image/bmp',
|
||||
'text/plain',
|
||||
))
|
||||
|
||||
|
||||
data_content_type = re.compile(r'''
|
||||
^
|
||||
# Match a content type <application>/<type>
|
||||
(?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
|
||||
# Match any character set and encoding
|
||||
(?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
|
||||
|(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
|
||||
# Assume the rest is data
|
||||
,.*
|
||||
$
|
||||
''',
|
||||
re.VERBOSE)
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
|
||||
def __init__(self,
|
||||
source,
|
||||
allowed_elements=allowed_elements,
|
||||
allowed_attributes=allowed_attributes,
|
||||
allowed_css_properties=allowed_css_properties,
|
||||
allowed_css_keywords=allowed_css_keywords,
|
||||
allowed_svg_properties=allowed_svg_properties,
|
||||
allowed_protocols=allowed_protocols,
|
||||
allowed_content_types=allowed_content_types,
|
||||
attr_val_is_uri=attr_val_is_uri,
|
||||
svg_attr_val_allows_ref=svg_attr_val_allows_ref,
|
||||
svg_allow_local_href=svg_allow_local_href):
|
||||
"""Creates a Filter
|
||||
|
||||
:arg allowed_elements: set of elements to allow--everything else will
|
||||
be escaped
|
||||
|
||||
:arg allowed_attributes: set of attributes to allow in
|
||||
elements--everything else will be stripped
|
||||
|
||||
:arg allowed_css_properties: set of CSS properties to allow--everything
|
||||
else will be stripped
|
||||
|
||||
:arg allowed_css_keywords: set of CSS keywords to allow--everything
|
||||
else will be stripped
|
||||
|
||||
:arg allowed_svg_properties: set of SVG properties to allow--everything
|
||||
else will be removed
|
||||
|
||||
:arg allowed_protocols: set of allowed protocols for URIs
|
||||
|
||||
:arg allowed_content_types: set of allowed content types for ``data`` URIs.
|
||||
|
||||
:arg attr_val_is_uri: set of attributes that have URI values--values
|
||||
that have a scheme not listed in ``allowed_protocols`` are removed
|
||||
|
||||
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
|
||||
references
|
||||
|
||||
:arg svg_allow_local_href: set of SVG elements that can have local
|
||||
hrefs--these are removed
|
||||
|
||||
"""
|
||||
super(Filter, self).__init__(source)
|
||||
self.allowed_elements = allowed_elements
|
||||
self.allowed_attributes = allowed_attributes
|
||||
self.allowed_css_properties = allowed_css_properties
|
||||
self.allowed_css_keywords = allowed_css_keywords
|
||||
self.allowed_svg_properties = allowed_svg_properties
|
||||
self.allowed_protocols = allowed_protocols
|
||||
self.allowed_content_types = allowed_content_types
|
||||
self.attr_val_is_uri = attr_val_is_uri
|
||||
self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
|
||||
self.svg_allow_local_href = svg_allow_local_href
|
||||
|
||||
def __iter__(self):
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
token = self.sanitize_token(token)
|
||||
if token:
|
||||
yield token
|
||||
|
||||
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
|
||||
# stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
|
||||
# are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
|
||||
# ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
|
||||
# are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
|
||||
# allowed.
|
||||
#
|
||||
# sanitize_html('<script> do_nasty_stuff() </script>')
|
||||
# => <script> do_nasty_stuff() </script>
|
||||
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
||||
# => <a>Click here for $100</a>
|
||||
def sanitize_token(self, token):
|
||||
|
||||
# accommodate filters which use token_type differently
|
||||
token_type = token["type"]
|
||||
if token_type in ("StartTag", "EndTag", "EmptyTag"):
|
||||
name = token["name"]
|
||||
namespace = token["namespace"]
|
||||
if ((namespace, name) in self.allowed_elements or
|
||||
(namespace is None and
|
||||
(namespaces["html"], name) in self.allowed_elements)):
|
||||
return self.allowed_token(token)
|
||||
else:
|
||||
return self.disallowed_token(token)
|
||||
elif token_type == "Comment":
|
||||
pass
|
||||
else:
|
||||
return token
|
||||
|
||||
def allowed_token(self, token):
|
||||
if "data" in token:
|
||||
attrs = token["data"]
|
||||
attr_names = set(attrs.keys())
|
||||
|
||||
# Remove forbidden attributes
|
||||
for to_remove in (attr_names - self.allowed_attributes):
|
||||
del token["data"][to_remove]
|
||||
attr_names.remove(to_remove)
|
||||
|
||||
# Remove attributes with disallowed URL values
|
||||
for attr in (attr_names & self.attr_val_is_uri):
|
||||
assert attr in attrs
|
||||
# I don't have a clue where this regexp comes from or why it matches those
|
||||
# characters, nor why we call unescape. I just know it's always been here.
|
||||
# Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
|
||||
# this will do is remove *more* than it otherwise would.
|
||||
val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
|
||||
unescape(attrs[attr])).lower()
|
||||
# remove replacement characters from unescaped characters
|
||||
val_unescaped = val_unescaped.replace("\ufffd", "")
|
||||
try:
|
||||
uri = urlparse.urlparse(val_unescaped)
|
||||
except ValueError:
|
||||
uri = None
|
||||
del attrs[attr]
|
||||
if uri and uri.scheme:
|
||||
if uri.scheme not in self.allowed_protocols:
|
||||
del attrs[attr]
|
||||
if uri.scheme == 'data':
|
||||
m = data_content_type.match(uri.path)
|
||||
if not m:
|
||||
del attrs[attr]
|
||||
elif m.group('content_type') not in self.allowed_content_types:
|
||||
del attrs[attr]
|
||||
|
||||
for attr in self.svg_attr_val_allows_ref:
|
||||
if attr in attrs:
|
||||
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
|
||||
' ',
|
||||
unescape(attrs[attr]))
|
||||
if (token["name"] in self.svg_allow_local_href and
|
||||
(namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
|
||||
attrs[(namespaces['xlink'], 'href')])):
|
||||
del attrs[(namespaces['xlink'], 'href')]
|
||||
if (None, 'style') in attrs:
|
||||
attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
|
||||
token["data"] = attrs
|
||||
return token
|
||||
|
||||
def disallowed_token(self, token):
|
||||
token_type = token["type"]
|
||||
if token_type == "EndTag":
|
||||
token["data"] = "</%s>" % token["name"]
|
||||
elif token["data"]:
|
||||
assert token_type in ("StartTag", "EmptyTag")
|
||||
attrs = []
|
||||
for (ns, name), v in token["data"].items():
|
||||
attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
|
||||
token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
|
||||
else:
|
||||
token["data"] = "<%s>" % token["name"]
|
||||
if token.get("selfClosing"):
|
||||
token["data"] = token["data"][:-1] + "/>"
|
||||
|
||||
token["type"] = "Characters"
|
||||
|
||||
del token["name"]
|
||||
return token
|
||||
|
||||
def sanitize_css(self, style):
|
||||
# disallow urls
|
||||
style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
|
||||
|
||||
# gauntlet
|
||||
if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
|
||||
return ''
|
||||
if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
||||
return ''
|
||||
|
||||
clean = []
|
||||
for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
|
||||
if not value:
|
||||
continue
|
||||
if prop.lower() in self.allowed_css_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
|
||||
'padding']:
|
||||
for keyword in value.split():
|
||||
if keyword not in self.allowed_css_keywords and \
|
||||
not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
|
||||
break
|
||||
else:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.lower() in self.allowed_svg_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
|
||||
return ' '.join(clean)
|
||||
|
||||
@@ -2,20 +2,20 @@ from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from . import _base
|
||||
from . import base
|
||||
from ..constants import rcdataElements, spaceCharacters
|
||||
spaceCharacters = "".join(spaceCharacters)
|
||||
|
||||
SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
|
||||
|
||||
|
||||
class Filter(_base.Filter):
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Collapses whitespace except in pre, textarea, and script elements"""
|
||||
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
|
||||
|
||||
def __iter__(self):
|
||||
preserve = 0
|
||||
for token in _base.Filter.__iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type == "StartTag" \
|
||||
and (preserve or token["name"] in self.spacePreserveElements):
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,271 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import re
|
||||
from xml.sax.saxutils import escape, unescape
|
||||
|
||||
from .tokenizer import HTMLTokenizer
|
||||
from .constants import tokenTypes
|
||||
|
||||
|
||||
class HTMLSanitizerMixin(object):
|
||||
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
|
||||
|
||||
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
|
||||
'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
|
||||
'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
|
||||
'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
|
||||
'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
|
||||
'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
|
||||
'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
|
||||
'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
|
||||
'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
|
||||
'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
|
||||
'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
|
||||
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
|
||||
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
|
||||
|
||||
mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
|
||||
'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
|
||||
'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
|
||||
'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
|
||||
'munderover', 'none']
|
||||
|
||||
svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
|
||||
'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
|
||||
'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
|
||||
'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
|
||||
'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
|
||||
'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
|
||||
|
||||
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
|
||||
'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
|
||||
'background', 'balance', 'bgcolor', 'bgproperties', 'border',
|
||||
'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
|
||||
'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
|
||||
'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
|
||||
'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
|
||||
'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
|
||||
'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
|
||||
'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
|
||||
'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
|
||||
'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
|
||||
'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
|
||||
'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
|
||||
'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
|
||||
'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
|
||||
'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
|
||||
'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
|
||||
'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
|
||||
'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
|
||||
'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
|
||||
'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
|
||||
'width', 'wrap', 'xml:lang']
|
||||
|
||||
mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
|
||||
'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
|
||||
'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
|
||||
'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
|
||||
'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
|
||||
'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
|
||||
'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
|
||||
'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
|
||||
'xlink:type', 'xmlns', 'xmlns:xlink']
|
||||
|
||||
svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
|
||||
'arabic-form', 'ascent', 'attributeName', 'attributeType',
|
||||
'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
|
||||
'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
|
||||
'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
|
||||
'fill-opacity', 'fill-rule', 'font-family', 'font-size',
|
||||
'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
|
||||
'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
|
||||
'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
|
||||
'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
|
||||
'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
|
||||
'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
|
||||
'opacity', 'orient', 'origin', 'overline-position',
|
||||
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
|
||||
'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
|
||||
'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
|
||||
'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
|
||||
'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
|
||||
'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
|
||||
'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
|
||||
'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
|
||||
'transform', 'type', 'u1', 'u2', 'underline-position',
|
||||
'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
|
||||
'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
|
||||
'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
|
||||
'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
|
||||
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
|
||||
'y1', 'y2', 'zoomAndPan']
|
||||
|
||||
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
|
||||
'xlink:href', 'xml:base']
|
||||
|
||||
svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
|
||||
'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
|
||||
'mask', 'stroke']
|
||||
|
||||
svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
|
||||
'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
|
||||
'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
|
||||
'set', 'use']
|
||||
|
||||
acceptable_css_properties = ['azimuth', 'background-color',
|
||||
'border-bottom-color', 'border-collapse', 'border-color',
|
||||
'border-left-color', 'border-right-color', 'border-top-color', 'clear',
|
||||
'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
|
||||
'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
|
||||
'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
|
||||
'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
|
||||
'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
|
||||
'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
|
||||
'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
|
||||
'white-space', 'width']
|
||||
|
||||
acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
|
||||
'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
|
||||
'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
|
||||
'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
|
||||
'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
|
||||
'transparent', 'underline', 'white', 'yellow']
|
||||
|
||||
acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
|
||||
'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
|
||||
'stroke-opacity']
|
||||
|
||||
acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
|
||||
'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
|
||||
'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
|
||||
'ssh', 'sftp', 'rtsp', 'afs']
|
||||
|
||||
# subclasses may define their own versions of these constants
|
||||
allowed_elements = acceptable_elements + mathml_elements + svg_elements
|
||||
allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
|
||||
allowed_css_properties = acceptable_css_properties
|
||||
allowed_css_keywords = acceptable_css_keywords
|
||||
allowed_svg_properties = acceptable_svg_properties
|
||||
allowed_protocols = acceptable_protocols
|
||||
|
||||
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
|
||||
# stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
|
||||
# attributes are parsed, and a restricted set, # specified by
|
||||
# ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
|
||||
# attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
|
||||
# in ALLOWED_PROTOCOLS are allowed.
|
||||
#
|
||||
# sanitize_html('<script> do_nasty_stuff() </script>')
|
||||
# => <script> do_nasty_stuff() </script>
|
||||
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
||||
# => <a>Click here for $100</a>
|
||||
def sanitize_token(self, token):
|
||||
|
||||
# accommodate filters which use token_type differently
|
||||
token_type = token["type"]
|
||||
if token_type in list(tokenTypes.keys()):
|
||||
token_type = tokenTypes[token_type]
|
||||
|
||||
if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
|
||||
tokenTypes["EmptyTag"]):
|
||||
if token["name"] in self.allowed_elements:
|
||||
return self.allowed_token(token, token_type)
|
||||
else:
|
||||
return self.disallowed_token(token, token_type)
|
||||
elif token_type == tokenTypes["Comment"]:
|
||||
pass
|
||||
else:
|
||||
return token
|
||||
|
||||
def allowed_token(self, token, token_type):
|
||||
if "data" in token:
|
||||
attrs = dict([(name, val) for name, val in
|
||||
token["data"][::-1]
|
||||
if name in self.allowed_attributes])
|
||||
for attr in self.attr_val_is_uri:
|
||||
if attr not in attrs:
|
||||
continue
|
||||
val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
|
||||
unescape(attrs[attr])).lower()
|
||||
# remove replacement characters from unescaped characters
|
||||
val_unescaped = val_unescaped.replace("\ufffd", "")
|
||||
if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
|
||||
(val_unescaped.split(':')[0] not in
|
||||
self.allowed_protocols)):
|
||||
del attrs[attr]
|
||||
for attr in self.svg_attr_val_allows_ref:
|
||||
if attr in attrs:
|
||||
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
|
||||
' ',
|
||||
unescape(attrs[attr]))
|
||||
if (token["name"] in self.svg_allow_local_href and
|
||||
'xlink:href' in attrs and re.search('^\s*[^#\s].*',
|
||||
attrs['xlink:href'])):
|
||||
del attrs['xlink:href']
|
||||
if 'style' in attrs:
|
||||
attrs['style'] = self.sanitize_css(attrs['style'])
|
||||
token["data"] = [[name, val] for name, val in list(attrs.items())]
|
||||
return token
|
||||
|
||||
def disallowed_token(self, token, token_type):
|
||||
if token_type == tokenTypes["EndTag"]:
|
||||
token["data"] = "</%s>" % token["name"]
|
||||
elif token["data"]:
|
||||
attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
|
||||
token["data"] = "<%s%s>" % (token["name"], attrs)
|
||||
else:
|
||||
token["data"] = "<%s>" % token["name"]
|
||||
if token.get("selfClosing"):
|
||||
token["data"] = token["data"][:-1] + "/>"
|
||||
|
||||
if token["type"] in list(tokenTypes.keys()):
|
||||
token["type"] = "Characters"
|
||||
else:
|
||||
token["type"] = tokenTypes["Characters"]
|
||||
|
||||
del token["name"]
|
||||
return token
|
||||
|
||||
def sanitize_css(self, style):
|
||||
# disallow urls
|
||||
style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
|
||||
|
||||
# gauntlet
|
||||
if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
|
||||
return ''
|
||||
if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
||||
return ''
|
||||
|
||||
clean = []
|
||||
for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
|
||||
if not value:
|
||||
continue
|
||||
if prop.lower() in self.allowed_css_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
|
||||
'padding']:
|
||||
for keyword in value.split():
|
||||
if not keyword in self.acceptable_css_keywords and \
|
||||
not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
|
||||
break
|
||||
else:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.lower() in self.allowed_svg_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
|
||||
return ' '.join(clean)
|
||||
|
||||
|
||||
class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
|
||||
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
|
||||
lowercaseElementName=False, lowercaseAttrName=False, parser=None):
|
||||
# Change case matching defaults as we only output lowercase html anyway
|
||||
# This solution doesn't seem ideal...
|
||||
HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
|
||||
lowercaseElementName, lowercaseAttrName, parser=parser)
|
||||
|
||||
def __iter__(self):
|
||||
for token in HTMLTokenizer.__iter__(self):
|
||||
token = self.sanitize_token(token)
|
||||
if token:
|
||||
yield token
|
||||
@@ -0,0 +1,409 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from six import text_type
|
||||
|
||||
import re
|
||||
|
||||
from codecs import register_error, xmlcharrefreplace_errors
|
||||
|
||||
from .constants import voidElements, booleanAttributes, spaceCharacters
|
||||
from .constants import rcdataElements, entities, xmlEntities
|
||||
from . import treewalkers, _utils
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`"
|
||||
_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]")
|
||||
_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars +
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
|
||||
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
|
||||
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
|
||||
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
|
||||
"\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
|
||||
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
|
||||
"\u3000]")
|
||||
|
||||
|
||||
_encode_entity_map = {}
|
||||
_is_ucs4 = len("\U0010FFFF") == 1
|
||||
for k, v in list(entities.items()):
|
||||
# skip multi-character entities
|
||||
if ((_is_ucs4 and len(v) > 1) or
|
||||
(not _is_ucs4 and len(v) > 2)):
|
||||
continue
|
||||
if v != "&":
|
||||
if len(v) == 2:
|
||||
v = _utils.surrogatePairToCodepoint(v)
|
||||
else:
|
||||
v = ord(v)
|
||||
if v not in _encode_entity_map or k.islower():
|
||||
# prefer < over < and similarly for &, >, etc.
|
||||
_encode_entity_map[v] = k
|
||||
|
||||
|
||||
def htmlentityreplace_errors(exc):
|
||||
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
|
||||
res = []
|
||||
codepoints = []
|
||||
skip = False
|
||||
for i, c in enumerate(exc.object[exc.start:exc.end]):
|
||||
if skip:
|
||||
skip = False
|
||||
continue
|
||||
index = i + exc.start
|
||||
if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
|
||||
codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
|
||||
skip = True
|
||||
else:
|
||||
codepoint = ord(c)
|
||||
codepoints.append(codepoint)
|
||||
for cp in codepoints:
|
||||
e = _encode_entity_map.get(cp)
|
||||
if e:
|
||||
res.append("&")
|
||||
res.append(e)
|
||||
if not e.endswith(";"):
|
||||
res.append(";")
|
||||
else:
|
||||
res.append("&#x%s;" % (hex(cp)[2:]))
|
||||
return ("".join(res), exc.end)
|
||||
else:
|
||||
return xmlcharrefreplace_errors(exc)
|
||||
|
||||
|
||||
register_error("htmlentityreplace", htmlentityreplace_errors)
|
||||
|
||||
|
||||
def serialize(input, tree="etree", encoding=None, **serializer_opts):
|
||||
"""Serializes the input token stream using the specified treewalker
|
||||
|
||||
:arg input: the token stream to serialize
|
||||
|
||||
:arg tree: the treewalker to use
|
||||
|
||||
:arg encoding: the encoding to use
|
||||
|
||||
:arg serializer_opts: any options to pass to the
|
||||
:py:class:`html5lib.serializer.HTMLSerializer` that gets created
|
||||
|
||||
:returns: the tree serialized as a string
|
||||
|
||||
Example:
|
||||
|
||||
>>> from html5lib.html5parser import parse
|
||||
>>> from html5lib.serializer import serialize
|
||||
>>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
|
||||
>>> serialize(token_stream, omit_optional_tags=False)
|
||||
'<html><head></head><body><p>Hi!</p></body></html>'
|
||||
|
||||
"""
|
||||
# XXX: Should we cache this?
|
||||
walker = treewalkers.getTreeWalker(tree)
|
||||
s = HTMLSerializer(**serializer_opts)
|
||||
return s.render(walker(input), encoding)
|
||||
|
||||
|
||||
class HTMLSerializer(object):
|
||||
|
||||
# attribute quoting options
|
||||
quote_attr_values = "legacy" # be secure by default
|
||||
quote_char = '"'
|
||||
use_best_quote_char = True
|
||||
|
||||
# tag syntax options
|
||||
omit_optional_tags = True
|
||||
minimize_boolean_attributes = True
|
||||
use_trailing_solidus = False
|
||||
space_before_trailing_solidus = True
|
||||
|
||||
# escaping options
|
||||
escape_lt_in_attrs = False
|
||||
escape_rcdata = False
|
||||
resolve_entities = True
|
||||
|
||||
# miscellaneous options
|
||||
alphabetical_attributes = False
|
||||
inject_meta_charset = True
|
||||
strip_whitespace = False
|
||||
sanitize = False
|
||||
|
||||
options = ("quote_attr_values", "quote_char", "use_best_quote_char",
|
||||
"omit_optional_tags", "minimize_boolean_attributes",
|
||||
"use_trailing_solidus", "space_before_trailing_solidus",
|
||||
"escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
|
||||
"alphabetical_attributes", "inject_meta_charset",
|
||||
"strip_whitespace", "sanitize")
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize HTMLSerializer
|
||||
|
||||
:arg inject_meta_charset: Whether or not to inject the meta charset.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg quote_attr_values: Whether to quote attribute values that don't
|
||||
require quoting per legacy browser behavior (``"legacy"``), when
|
||||
required by the standard (``"spec"``), or always (``"always"``).
|
||||
|
||||
Defaults to ``"legacy"``.
|
||||
|
||||
:arg quote_char: Use given quote character for attribute quoting.
|
||||
|
||||
Defaults to ``"`` which will use double quotes unless attribute
|
||||
value contains a double quote, in which case single quotes are
|
||||
used.
|
||||
|
||||
:arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
|
||||
values.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg escape_rcdata: Whether to escape characters that need to be
|
||||
escaped within normal elements within rcdata elements such as
|
||||
style.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg resolve_entities: Whether to resolve named character entities that
|
||||
appear in the source tree. The XML predefined entities < >
|
||||
& " ' are unaffected by this setting.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg strip_whitespace: Whether to remove semantically meaningless
|
||||
whitespace. (This compresses all whitespace to a single space
|
||||
except within ``pre``.)
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg minimize_boolean_attributes: Shortens boolean attributes to give
|
||||
just the attribute value, for example::
|
||||
|
||||
<input disabled="disabled">
|
||||
|
||||
becomes::
|
||||
|
||||
<input disabled>
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg use_trailing_solidus: Includes a close-tag slash at the end of the
|
||||
start tag of void elements (empty elements whose end tag is
|
||||
forbidden). E.g. ``<hr/>``.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg space_before_trailing_solidus: Places a space immediately before
|
||||
the closing slash in a tag using a trailing solidus. E.g.
|
||||
``<hr />``. Requires ``use_trailing_solidus=True``.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg sanitize: Strip all unsafe or unknown constructs from output.
|
||||
See :py:class:`html5lib.filters.sanitizer.Filter`.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg omit_optional_tags: Omit start/end tags that are optional.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
"""
|
||||
unexpected_args = frozenset(kwargs) - frozenset(self.options)
|
||||
if len(unexpected_args) > 0:
|
||||
raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args)))
|
||||
if 'quote_char' in kwargs:
|
||||
self.use_best_quote_char = False
|
||||
for attr in self.options:
|
||||
setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
|
||||
self.errors = []
|
||||
self.strict = False
|
||||
|
||||
def encode(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, "htmlentityreplace")
|
||||
else:
|
||||
return string
|
||||
|
||||
def encodeStrict(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, "strict")
|
||||
else:
|
||||
return string
|
||||
|
||||
def serialize(self, treewalker, encoding=None):
|
||||
# pylint:disable=too-many-nested-blocks
|
||||
self.encoding = encoding
|
||||
in_cdata = False
|
||||
self.errors = []
|
||||
|
||||
if encoding and self.inject_meta_charset:
|
||||
from .filters.inject_meta_charset import Filter
|
||||
treewalker = Filter(treewalker, encoding)
|
||||
# Alphabetical attributes is here under the assumption that none of
|
||||
# the later filters add or change order of attributes; it needs to be
|
||||
# before the sanitizer so escaped elements come out correctly
|
||||
if self.alphabetical_attributes:
|
||||
from .filters.alphabeticalattributes import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
# WhitespaceFilter should be used before OptionalTagFilter
|
||||
# for maximum efficiently of this latter filter
|
||||
if self.strip_whitespace:
|
||||
from .filters.whitespace import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
if self.sanitize:
|
||||
from .filters.sanitizer import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
if self.omit_optional_tags:
|
||||
from .filters.optionaltags import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
|
||||
for token in treewalker:
|
||||
type = token["type"]
|
||||
if type == "Doctype":
|
||||
doctype = "<!DOCTYPE %s" % token["name"]
|
||||
|
||||
if token["publicId"]:
|
||||
doctype += ' PUBLIC "%s"' % token["publicId"]
|
||||
elif token["systemId"]:
|
||||
doctype += " SYSTEM"
|
||||
if token["systemId"]:
|
||||
if token["systemId"].find('"') >= 0:
|
||||
if token["systemId"].find("'") >= 0:
|
||||
self.serializeError("System identifer contains both single and double quote characters")
|
||||
quote_char = "'"
|
||||
else:
|
||||
quote_char = '"'
|
||||
doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
|
||||
|
||||
doctype += ">"
|
||||
yield self.encodeStrict(doctype)
|
||||
|
||||
elif type in ("Characters", "SpaceCharacters"):
|
||||
if type == "SpaceCharacters" or in_cdata:
|
||||
if in_cdata and token["data"].find("</") >= 0:
|
||||
self.serializeError("Unexpected </ in CDATA")
|
||||
yield self.encode(token["data"])
|
||||
else:
|
||||
yield self.encode(escape(token["data"]))
|
||||
|
||||
elif type in ("StartTag", "EmptyTag"):
|
||||
name = token["name"]
|
||||
yield self.encodeStrict("<%s" % name)
|
||||
if name in rcdataElements and not self.escape_rcdata:
|
||||
in_cdata = True
|
||||
elif in_cdata:
|
||||
self.serializeError("Unexpected child element of a CDATA element")
|
||||
for (_, attr_name), attr_value in token["data"].items():
|
||||
# TODO: Add namespace support here
|
||||
k = attr_name
|
||||
v = attr_value
|
||||
yield self.encodeStrict(' ')
|
||||
|
||||
yield self.encodeStrict(k)
|
||||
if not self.minimize_boolean_attributes or \
|
||||
(k not in booleanAttributes.get(name, tuple()) and
|
||||
k not in booleanAttributes.get("", tuple())):
|
||||
yield self.encodeStrict("=")
|
||||
if self.quote_attr_values == "always" or len(v) == 0:
|
||||
quote_attr = True
|
||||
elif self.quote_attr_values == "spec":
|
||||
quote_attr = _quoteAttributeSpec.search(v) is not None
|
||||
elif self.quote_attr_values == "legacy":
|
||||
quote_attr = _quoteAttributeLegacy.search(v) is not None
|
||||
else:
|
||||
raise ValueError("quote_attr_values must be one of: "
|
||||
"'always', 'spec', or 'legacy'")
|
||||
v = v.replace("&", "&")
|
||||
if self.escape_lt_in_attrs:
|
||||
v = v.replace("<", "<")
|
||||
if quote_attr:
|
||||
quote_char = self.quote_char
|
||||
if self.use_best_quote_char:
|
||||
if "'" in v and '"' not in v:
|
||||
quote_char = '"'
|
||||
elif '"' in v and "'" not in v:
|
||||
quote_char = "'"
|
||||
if quote_char == "'":
|
||||
v = v.replace("'", "'")
|
||||
else:
|
||||
v = v.replace('"', """)
|
||||
yield self.encodeStrict(quote_char)
|
||||
yield self.encode(v)
|
||||
yield self.encodeStrict(quote_char)
|
||||
else:
|
||||
yield self.encode(v)
|
||||
if name in voidElements and self.use_trailing_solidus:
|
||||
if self.space_before_trailing_solidus:
|
||||
yield self.encodeStrict(" /")
|
||||
else:
|
||||
yield self.encodeStrict("/")
|
||||
yield self.encode(">")
|
||||
|
||||
elif type == "EndTag":
|
||||
name = token["name"]
|
||||
if name in rcdataElements:
|
||||
in_cdata = False
|
||||
elif in_cdata:
|
||||
self.serializeError("Unexpected child element of a CDATA element")
|
||||
yield self.encodeStrict("</%s>" % name)
|
||||
|
||||
elif type == "Comment":
|
||||
data = token["data"]
|
||||
if data.find("--") >= 0:
|
||||
self.serializeError("Comment contains --")
|
||||
yield self.encodeStrict("<!--%s-->" % token["data"])
|
||||
|
||||
elif type == "Entity":
|
||||
name = token["name"]
|
||||
key = name + ";"
|
||||
if key not in entities:
|
||||
self.serializeError("Entity %s not recognized" % name)
|
||||
if self.resolve_entities and key not in xmlEntities:
|
||||
data = entities[key]
|
||||
else:
|
||||
data = "&%s;" % name
|
||||
yield self.encodeStrict(data)
|
||||
|
||||
else:
|
||||
self.serializeError(token["data"])
|
||||
|
||||
def render(self, treewalker, encoding=None):
|
||||
"""Serializes the stream from the treewalker into a string
|
||||
|
||||
:arg treewalker: the treewalker to serialize
|
||||
|
||||
:arg encoding: the string encoding to use
|
||||
|
||||
:returns: the serialized tree
|
||||
|
||||
Example:
|
||||
|
||||
>>> from html5lib import parse, getTreeWalker
|
||||
>>> from html5lib.serializer import HTMLSerializer
|
||||
>>> token_stream = parse('<html><body>Hi!</body></html>')
|
||||
>>> walker = getTreeWalker('etree')
|
||||
>>> serializer = HTMLSerializer(omit_optional_tags=False)
|
||||
>>> serializer.render(walker(token_stream))
|
||||
'<html><head></head><body>Hi!</body></html>'
|
||||
|
||||
"""
|
||||
if encoding:
|
||||
return b"".join(list(self.serialize(treewalker, encoding)))
|
||||
else:
|
||||
return "".join(list(self.serialize(treewalker)))
|
||||
|
||||
def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
|
||||
# XXX The idea is to make data mandatory.
|
||||
self.errors.append(data)
|
||||
if self.strict:
|
||||
raise SerializeError
|
||||
|
||||
|
||||
class SerializeError(Exception):
|
||||
"""Error in serialized tree"""
|
||||
pass
|
||||
@@ -1,16 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from .. import treewalkers
|
||||
|
||||
from .htmlserializer import HTMLSerializer
|
||||
|
||||
|
||||
def serialize(input, tree="etree", format="html", encoding=None,
|
||||
**serializer_opts):
|
||||
# XXX: Should we cache this?
|
||||
walker = treewalkers.getTreeWalker(tree)
|
||||
if format == "html":
|
||||
s = HTMLSerializer(**serializer_opts)
|
||||
else:
|
||||
raise ValueError("type must be html")
|
||||
return s.render(walker(input), encoding)
|
||||
@@ -1,320 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from six import text_type
|
||||
|
||||
import gettext
|
||||
_ = gettext.gettext
|
||||
|
||||
try:
|
||||
from functools import reduce
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
from ..constants import voidElements, booleanAttributes, spaceCharacters
|
||||
from ..constants import rcdataElements, entities, xmlEntities
|
||||
from .. import utils
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
spaceCharacters = "".join(spaceCharacters)
|
||||
|
||||
try:
|
||||
from codecs import register_error, xmlcharrefreplace_errors
|
||||
except ImportError:
|
||||
unicode_encode_errors = "strict"
|
||||
else:
|
||||
unicode_encode_errors = "htmlentityreplace"
|
||||
|
||||
encode_entity_map = {}
|
||||
is_ucs4 = len("\U0010FFFF") == 1
|
||||
for k, v in list(entities.items()):
|
||||
# skip multi-character entities
|
||||
if ((is_ucs4 and len(v) > 1) or
|
||||
(not is_ucs4 and len(v) > 2)):
|
||||
continue
|
||||
if v != "&":
|
||||
if len(v) == 2:
|
||||
v = utils.surrogatePairToCodepoint(v)
|
||||
else:
|
||||
v = ord(v)
|
||||
if not v in encode_entity_map or k.islower():
|
||||
# prefer < over < and similarly for &, >, etc.
|
||||
encode_entity_map[v] = k
|
||||
|
||||
def htmlentityreplace_errors(exc):
|
||||
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
|
||||
res = []
|
||||
codepoints = []
|
||||
skip = False
|
||||
for i, c in enumerate(exc.object[exc.start:exc.end]):
|
||||
if skip:
|
||||
skip = False
|
||||
continue
|
||||
index = i + exc.start
|
||||
if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
|
||||
codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
|
||||
skip = True
|
||||
else:
|
||||
codepoint = ord(c)
|
||||
codepoints.append(codepoint)
|
||||
for cp in codepoints:
|
||||
e = encode_entity_map.get(cp)
|
||||
if e:
|
||||
res.append("&")
|
||||
res.append(e)
|
||||
if not e.endswith(";"):
|
||||
res.append(";")
|
||||
else:
|
||||
res.append("&#x%s;" % (hex(cp)[2:]))
|
||||
return ("".join(res), exc.end)
|
||||
else:
|
||||
return xmlcharrefreplace_errors(exc)
|
||||
|
||||
register_error(unicode_encode_errors, htmlentityreplace_errors)
|
||||
|
||||
del register_error
|
||||
|
||||
|
||||
class HTMLSerializer(object):
|
||||
|
||||
# attribute quoting options
|
||||
quote_attr_values = False
|
||||
quote_char = '"'
|
||||
use_best_quote_char = True
|
||||
|
||||
# tag syntax options
|
||||
omit_optional_tags = True
|
||||
minimize_boolean_attributes = True
|
||||
use_trailing_solidus = False
|
||||
space_before_trailing_solidus = True
|
||||
|
||||
# escaping options
|
||||
escape_lt_in_attrs = False
|
||||
escape_rcdata = False
|
||||
resolve_entities = True
|
||||
|
||||
# miscellaneous options
|
||||
alphabetical_attributes = False
|
||||
inject_meta_charset = True
|
||||
strip_whitespace = False
|
||||
sanitize = False
|
||||
|
||||
options = ("quote_attr_values", "quote_char", "use_best_quote_char",
|
||||
"omit_optional_tags", "minimize_boolean_attributes",
|
||||
"use_trailing_solidus", "space_before_trailing_solidus",
|
||||
"escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
|
||||
"alphabetical_attributes", "inject_meta_charset",
|
||||
"strip_whitespace", "sanitize")
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize HTMLSerializer.
|
||||
|
||||
Keyword options (default given first unless specified) include:
|
||||
|
||||
inject_meta_charset=True|False
|
||||
Whether it insert a meta element to define the character set of the
|
||||
document.
|
||||
quote_attr_values=True|False
|
||||
Whether to quote attribute values that don't require quoting
|
||||
per HTML5 parsing rules.
|
||||
quote_char=u'"'|u"'"
|
||||
Use given quote character for attribute quoting. Default is to
|
||||
use double quote unless attribute value contains a double quote,
|
||||
in which case single quotes are used instead.
|
||||
escape_lt_in_attrs=False|True
|
||||
Whether to escape < in attribute values.
|
||||
escape_rcdata=False|True
|
||||
Whether to escape characters that need to be escaped within normal
|
||||
elements within rcdata elements such as style.
|
||||
resolve_entities=True|False
|
||||
Whether to resolve named character entities that appear in the
|
||||
source tree. The XML predefined entities < > & " '
|
||||
are unaffected by this setting.
|
||||
strip_whitespace=False|True
|
||||
Whether to remove semantically meaningless whitespace. (This
|
||||
compresses all whitespace to a single space except within pre.)
|
||||
minimize_boolean_attributes=True|False
|
||||
Shortens boolean attributes to give just the attribute value,
|
||||
for example <input disabled="disabled"> becomes <input disabled>.
|
||||
use_trailing_solidus=False|True
|
||||
Includes a close-tag slash at the end of the start tag of void
|
||||
elements (empty elements whose end tag is forbidden). E.g. <hr/>.
|
||||
space_before_trailing_solidus=True|False
|
||||
Places a space immediately before the closing slash in a tag
|
||||
using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
|
||||
sanitize=False|True
|
||||
Strip all unsafe or unknown constructs from output.
|
||||
See `html5lib user documentation`_
|
||||
omit_optional_tags=True|False
|
||||
Omit start/end tags that are optional.
|
||||
alphabetical_attributes=False|True
|
||||
Reorder attributes to be in alphabetical order.
|
||||
|
||||
.. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
|
||||
"""
|
||||
if 'quote_char' in kwargs:
|
||||
self.use_best_quote_char = False
|
||||
for attr in self.options:
|
||||
setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
|
||||
self.errors = []
|
||||
self.strict = False
|
||||
|
||||
def encode(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, unicode_encode_errors)
|
||||
else:
|
||||
return string
|
||||
|
||||
def encodeStrict(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, "strict")
|
||||
else:
|
||||
return string
|
||||
|
||||
def serialize(self, treewalker, encoding=None):
|
||||
self.encoding = encoding
|
||||
in_cdata = False
|
||||
self.errors = []
|
||||
|
||||
if encoding and self.inject_meta_charset:
|
||||
from ..filters.inject_meta_charset import Filter
|
||||
treewalker = Filter(treewalker, encoding)
|
||||
# WhitespaceFilter should be used before OptionalTagFilter
|
||||
# for maximum efficiently of this latter filter
|
||||
if self.strip_whitespace:
|
||||
from ..filters.whitespace import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
if self.sanitize:
|
||||
from ..filters.sanitizer import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
if self.omit_optional_tags:
|
||||
from ..filters.optionaltags import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
# Alphabetical attributes must be last, as other filters
|
||||
# could add attributes and alter the order
|
||||
if self.alphabetical_attributes:
|
||||
from ..filters.alphabeticalattributes import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
|
||||
for token in treewalker:
|
||||
type = token["type"]
|
||||
if type == "Doctype":
|
||||
doctype = "<!DOCTYPE %s" % token["name"]
|
||||
|
||||
if token["publicId"]:
|
||||
doctype += ' PUBLIC "%s"' % token["publicId"]
|
||||
elif token["systemId"]:
|
||||
doctype += " SYSTEM"
|
||||
if token["systemId"]:
|
||||
if token["systemId"].find('"') >= 0:
|
||||
if token["systemId"].find("'") >= 0:
|
||||
self.serializeError(_("System identifer contains both single and double quote characters"))
|
||||
quote_char = "'"
|
||||
else:
|
||||
quote_char = '"'
|
||||
doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
|
||||
|
||||
doctype += ">"
|
||||
yield self.encodeStrict(doctype)
|
||||
|
||||
elif type in ("Characters", "SpaceCharacters"):
|
||||
if type == "SpaceCharacters" or in_cdata:
|
||||
if in_cdata and token["data"].find("</") >= 0:
|
||||
self.serializeError(_("Unexpected </ in CDATA"))
|
||||
yield self.encode(token["data"])
|
||||
else:
|
||||
yield self.encode(escape(token["data"]))
|
||||
|
||||
elif type in ("StartTag", "EmptyTag"):
|
||||
name = token["name"]
|
||||
yield self.encodeStrict("<%s" % name)
|
||||
if name in rcdataElements and not self.escape_rcdata:
|
||||
in_cdata = True
|
||||
elif in_cdata:
|
||||
self.serializeError(_("Unexpected child element of a CDATA element"))
|
||||
for (attr_namespace, attr_name), attr_value in token["data"].items():
|
||||
# TODO: Add namespace support here
|
||||
k = attr_name
|
||||
v = attr_value
|
||||
yield self.encodeStrict(' ')
|
||||
|
||||
yield self.encodeStrict(k)
|
||||
if not self.minimize_boolean_attributes or \
|
||||
(k not in booleanAttributes.get(name, tuple())
|
||||
and k not in booleanAttributes.get("", tuple())):
|
||||
yield self.encodeStrict("=")
|
||||
if self.quote_attr_values or not v:
|
||||
quote_attr = True
|
||||
else:
|
||||
quote_attr = reduce(lambda x, y: x or (y in v),
|
||||
spaceCharacters + ">\"'=", False)
|
||||
v = v.replace("&", "&")
|
||||
if self.escape_lt_in_attrs:
|
||||
v = v.replace("<", "<")
|
||||
if quote_attr:
|
||||
quote_char = self.quote_char
|
||||
if self.use_best_quote_char:
|
||||
if "'" in v and '"' not in v:
|
||||
quote_char = '"'
|
||||
elif '"' in v and "'" not in v:
|
||||
quote_char = "'"
|
||||
if quote_char == "'":
|
||||
v = v.replace("'", "'")
|
||||
else:
|
||||
v = v.replace('"', """)
|
||||
yield self.encodeStrict(quote_char)
|
||||
yield self.encode(v)
|
||||
yield self.encodeStrict(quote_char)
|
||||
else:
|
||||
yield self.encode(v)
|
||||
if name in voidElements and self.use_trailing_solidus:
|
||||
if self.space_before_trailing_solidus:
|
||||
yield self.encodeStrict(" /")
|
||||
else:
|
||||
yield self.encodeStrict("/")
|
||||
yield self.encode(">")
|
||||
|
||||
elif type == "EndTag":
|
||||
name = token["name"]
|
||||
if name in rcdataElements:
|
||||
in_cdata = False
|
||||
elif in_cdata:
|
||||
self.serializeError(_("Unexpected child element of a CDATA element"))
|
||||
yield self.encodeStrict("</%s>" % name)
|
||||
|
||||
elif type == "Comment":
|
||||
data = token["data"]
|
||||
if data.find("--") >= 0:
|
||||
self.serializeError(_("Comment contains --"))
|
||||
yield self.encodeStrict("<!--%s-->" % token["data"])
|
||||
|
||||
elif type == "Entity":
|
||||
name = token["name"]
|
||||
key = name + ";"
|
||||
if not key in entities:
|
||||
self.serializeError(_("Entity %s not recognized" % name))
|
||||
if self.resolve_entities and key not in xmlEntities:
|
||||
data = entities[key]
|
||||
else:
|
||||
data = "&%s;" % name
|
||||
yield self.encodeStrict(data)
|
||||
|
||||
else:
|
||||
self.serializeError(token["data"])
|
||||
|
||||
def render(self, treewalker, encoding=None):
|
||||
if encoding:
|
||||
return b"".join(list(self.serialize(treewalker, encoding)))
|
||||
else:
|
||||
return "".join(list(self.serialize(treewalker)))
|
||||
|
||||
def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
|
||||
# XXX The idea is to make data mandatory.
|
||||
self.errors.append(data)
|
||||
if self.strict:
|
||||
raise SerializeError
|
||||
|
||||
|
||||
def SerializeError(Exception):
|
||||
"""Error in serialized tree"""
|
||||
pass
|
||||
@@ -0,0 +1,108 @@
|
||||
from __future__ import print_function
|
||||
import os.path
|
||||
import sys
|
||||
|
||||
import pkg_resources
|
||||
import pytest
|
||||
|
||||
from .tree_construction import TreeConstructionFile
|
||||
from .tokenizer import TokenizerFile
|
||||
from .sanitizer import SanitizerFile
|
||||
|
||||
_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
_root = os.path.join(_dir, "..", "..")
|
||||
_testdata = os.path.join(_dir, "testdata")
|
||||
_tree_construction = os.path.join(_testdata, "tree-construction")
|
||||
_tokenizer = os.path.join(_testdata, "tokenizer")
|
||||
_sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
|
||||
|
||||
|
||||
def fail_if_missing_pytest_expect():
|
||||
"""Throws an exception halting pytest if pytest-expect isn't working"""
|
||||
try:
|
||||
from pytest_expect import expect # noqa
|
||||
except ImportError:
|
||||
header = '*' * 78
|
||||
print(
|
||||
'\n' +
|
||||
header + '\n' +
|
||||
'ERROR: Either pytest-expect or its dependency u-msgpack-python is not\n' +
|
||||
'installed. Please install them both before running pytest.\n' +
|
||||
header + '\n',
|
||||
file=sys.stderr
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
fail_if_missing_pytest_expect()
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
msgs = []
|
||||
|
||||
if not os.path.exists(_testdata):
|
||||
msg = "testdata not available! "
|
||||
if os.path.exists(os.path.join(_root, ".git")):
|
||||
msg += ("Please run git submodule update --init --recursive " +
|
||||
"and then run tests again.")
|
||||
else:
|
||||
msg += ("The testdata doesn't appear to be included with this package, " +
|
||||
"so finding the right version will be hard. :(")
|
||||
msgs.append(msg)
|
||||
|
||||
if config.option.update_xfail:
|
||||
# Check for optional requirements
|
||||
req_file = os.path.join(_root, "requirements-optional.txt")
|
||||
if os.path.exists(req_file):
|
||||
with open(req_file, "r") as fp:
|
||||
for line in fp:
|
||||
if (line.strip() and
|
||||
not (line.startswith("-r") or
|
||||
line.startswith("#"))):
|
||||
if ";" in line:
|
||||
spec, marker = line.strip().split(";", 1)
|
||||
else:
|
||||
spec, marker = line.strip(), None
|
||||
req = pkg_resources.Requirement.parse(spec)
|
||||
if marker and not pkg_resources.evaluate_marker(marker):
|
||||
msgs.append("%s not available in this environment" % spec)
|
||||
else:
|
||||
try:
|
||||
installed = pkg_resources.working_set.find(req)
|
||||
except pkg_resources.VersionConflict:
|
||||
msgs.append("Outdated version of %s installed, need %s" % (req.name, spec))
|
||||
else:
|
||||
if not installed:
|
||||
msgs.append("Need %s" % spec)
|
||||
|
||||
# Check cElementTree
|
||||
import xml.etree.ElementTree as ElementTree
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as cElementTree
|
||||
except ImportError:
|
||||
msgs.append("cElementTree unable to be imported")
|
||||
else:
|
||||
if cElementTree.Element is ElementTree.Element:
|
||||
msgs.append("cElementTree is just an alias for ElementTree")
|
||||
|
||||
if msgs:
|
||||
pytest.exit("\n".join(msgs))
|
||||
|
||||
|
||||
def pytest_collect_file(path, parent):
|
||||
dir = os.path.abspath(path.dirname)
|
||||
dir_and_parents = set()
|
||||
while dir not in dir_and_parents:
|
||||
dir_and_parents.add(dir)
|
||||
dir = os.path.dirname(dir)
|
||||
|
||||
if _tree_construction in dir_and_parents:
|
||||
if path.ext == ".dat":
|
||||
return TreeConstructionFile(path, parent)
|
||||
elif _tokenizer in dir_and_parents:
|
||||
if path.ext == ".test":
|
||||
return TokenizerFile(path, parent)
|
||||
elif _sanitizer_testdata in dir_and_parents:
|
||||
if path.ext == ".dat":
|
||||
return SanitizerFile(path, parent)
|
||||
@@ -1,41 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Allow us to import from the src directory
|
||||
os.chdir(os.path.split(os.path.abspath(__file__))[0])
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
|
||||
|
||||
from html5lib.tokenizer import HTMLTokenizer
|
||||
|
||||
|
||||
class HTMLParser(object):
|
||||
""" Fake parser to test tokenizer output """
|
||||
def parse(self, stream, output=True):
|
||||
tokenizer = HTMLTokenizer(stream)
|
||||
for token in tokenizer:
|
||||
if output:
|
||||
print(token)
|
||||
|
||||
if __name__ == "__main__":
|
||||
x = HTMLParser()
|
||||
if len(sys.argv) > 1:
|
||||
if len(sys.argv) > 2:
|
||||
import hotshot
|
||||
import hotshot.stats
|
||||
prof = hotshot.Profile('stats.prof')
|
||||
prof.runcall(x.parse, sys.argv[1], False)
|
||||
prof.close()
|
||||
stats = hotshot.stats.load('stats.prof')
|
||||
stats.strip_dirs()
|
||||
stats.sort_stats('time')
|
||||
stats.print_stats()
|
||||
else:
|
||||
x.parse(sys.argv[1])
|
||||
else:
|
||||
print("""Usage: python mockParser.py filename [stats]
|
||||
If stats is specified the hotshots profiler will run and output the
|
||||
stats instead.
|
||||
""")
|
||||
@@ -1,36 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
def f1():
|
||||
x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
x += y + z
|
||||
|
||||
|
||||
def f2():
|
||||
x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
x = x + y + z
|
||||
|
||||
|
||||
def f3():
|
||||
x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
x = "".join((x, y, z))
|
||||
|
||||
|
||||
def f4():
|
||||
x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
x = "%s%s%s" % (x, y, z)
|
||||
|
||||
import timeit
|
||||
for x in range(4):
|
||||
statement = "f%s" % (x + 1)
|
||||
t = timeit.Timer(statement, "from __main__ import " + statement)
|
||||
r = t.repeat(3, 1000000)
|
||||
print(r, min(r))
|
||||
@@ -0,0 +1,50 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import codecs
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from html5lib import parseFragment, serialize
|
||||
|
||||
|
||||
class SanitizerFile(pytest.File):
|
||||
def collect(self):
|
||||
with codecs.open(str(self.fspath), "r", encoding="utf-8") as fp:
|
||||
tests = json.load(fp)
|
||||
for i, test in enumerate(tests):
|
||||
yield SanitizerTest(str(i), self, test=test)
|
||||
|
||||
|
||||
class SanitizerTest(pytest.Item):
|
||||
def __init__(self, name, parent, test):
|
||||
super(SanitizerTest, self).__init__(name, parent)
|
||||
self.obj = lambda: 1 # this is to hack around skipif needing a function!
|
||||
self.test = test
|
||||
|
||||
def runtest(self):
|
||||
input = self.test["input"]
|
||||
expected = self.test["output"]
|
||||
|
||||
parsed = parseFragment(input)
|
||||
serialized = serialize(parsed,
|
||||
sanitize=True,
|
||||
omit_optional_tags=False,
|
||||
use_trailing_solidus=True,
|
||||
space_before_trailing_solidus=False,
|
||||
quote_attr_values="always",
|
||||
quote_char="'",
|
||||
alphabetical_attributes=True)
|
||||
errorMsg = "\n".join(["\n\nInput:", input,
|
||||
"\nExpected:", expected,
|
||||
"\nReceived:", serialized])
|
||||
assert expected == serialized, errorMsg
|
||||
|
||||
def repr_failure(self, excinfo):
|
||||
traceback = excinfo.traceback
|
||||
ntraceback = traceback.cut(path=__file__)
|
||||
excinfo.traceback = ntraceback.filter()
|
||||
|
||||
return excinfo.getrepr(funcargs=True,
|
||||
showlocals=False,
|
||||
style="short", tbfilter=False)
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
# pylint:disable=wrong-import-position
|
||||
|
||||
import os
|
||||
import sys
|
||||
import codecs
|
||||
@@ -13,44 +15,66 @@ sys.path.insert(0, os.path.abspath(os.path.join(base_path,
|
||||
os.path.pardir,
|
||||
os.path.pardir)))
|
||||
|
||||
from html5lib import treebuilders
|
||||
from html5lib import treebuilders, treewalkers, treeadapters # noqa
|
||||
del base_path
|
||||
|
||||
# Build a dict of avaliable trees
|
||||
treeTypes = {"DOM": treebuilders.getTreeBuilder("dom")}
|
||||
# Build a dict of available trees
|
||||
treeTypes = {}
|
||||
|
||||
# Try whatever etree implementations are avaliable from a list that are
|
||||
#"supposed" to work
|
||||
try:
|
||||
import xml.etree.ElementTree as ElementTree
|
||||
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
|
||||
except ImportError:
|
||||
try:
|
||||
import elementtree.ElementTree as ElementTree
|
||||
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
|
||||
except ImportError:
|
||||
pass
|
||||
# DOM impls
|
||||
treeTypes["DOM"] = {
|
||||
"builder": treebuilders.getTreeBuilder("dom"),
|
||||
"walker": treewalkers.getTreeWalker("dom")
|
||||
}
|
||||
|
||||
# ElementTree impls
|
||||
import xml.etree.ElementTree as ElementTree # noqa
|
||||
treeTypes['ElementTree'] = {
|
||||
"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
|
||||
"walker": treewalkers.getTreeWalker("etree", ElementTree)
|
||||
}
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as cElementTree
|
||||
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
|
||||
import xml.etree.cElementTree as cElementTree # noqa
|
||||
except ImportError:
|
||||
try:
|
||||
import cElementTree
|
||||
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import lxml.etree as lxml # flake8: noqa
|
||||
except ImportError:
|
||||
pass
|
||||
treeTypes['cElementTree'] = None
|
||||
else:
|
||||
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
|
||||
# On Python 3.3 and above cElementTree is an alias, don't run them twice.
|
||||
if cElementTree.Element is ElementTree.Element:
|
||||
treeTypes['cElementTree'] = None
|
||||
else:
|
||||
treeTypes['cElementTree'] = {
|
||||
"builder": treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True),
|
||||
"walker": treewalkers.getTreeWalker("etree", cElementTree)
|
||||
}
|
||||
|
||||
try:
|
||||
import lxml.etree as lxml # noqa
|
||||
except ImportError:
|
||||
treeTypes['lxml'] = None
|
||||
else:
|
||||
treeTypes['lxml'] = {
|
||||
"builder": treebuilders.getTreeBuilder("lxml"),
|
||||
"walker": treewalkers.getTreeWalker("lxml")
|
||||
}
|
||||
|
||||
# Genshi impls
|
||||
try:
|
||||
import genshi # noqa
|
||||
except ImportError:
|
||||
treeTypes["genshi"] = None
|
||||
else:
|
||||
treeTypes["genshi"] = {
|
||||
"builder": treebuilders.getTreeBuilder("dom"),
|
||||
"adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
|
||||
"walker": treewalkers.getTreeWalker("genshi")
|
||||
}
|
||||
|
||||
# pylint:enable=wrong-import-position
|
||||
|
||||
|
||||
def get_data_files(subdirectory, files='*.dat'):
|
||||
return glob.glob(os.path.join(test_dir, subdirectory, files))
|
||||
def get_data_files(subdirectory, files='*.dat', search_dir=test_dir):
|
||||
return sorted(glob.glob(os.path.join(search_dir, subdirectory, files)))
|
||||
|
||||
|
||||
class DefaultDict(dict):
|
||||
@@ -71,9 +95,6 @@ class TestData(object):
|
||||
self.encoding = encoding
|
||||
self.newTestHeading = newTestHeading
|
||||
|
||||
def __del__(self):
|
||||
self.f.close()
|
||||
|
||||
def __iter__(self):
|
||||
data = DefaultDict(None)
|
||||
key = None
|
||||
@@ -128,7 +149,7 @@ convertExpected = convert(2)
|
||||
def errorMessage(input, expected, actual):
|
||||
msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" %
|
||||
(repr(input), repr(expected), repr(actual)))
|
||||
if sys.version_info.major == 2:
|
||||
if sys.version_info[0] == 2:
|
||||
msg = msg.encode("ascii", "backslashreplace")
|
||||
return msg
|
||||
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import pytest
|
||||
|
||||
import html5lib
|
||||
from html5lib.filters.alphabeticalattributes import Filter
|
||||
from html5lib.serializer import HTMLSerializer
|
||||
|
||||
|
||||
@pytest.mark.parametrize('msg, attrs, expected_attrs', [
|
||||
(
|
||||
'no attrs',
|
||||
{},
|
||||
{}
|
||||
),
|
||||
(
|
||||
'one attr',
|
||||
{(None, 'alt'): 'image'},
|
||||
OrderedDict([((None, 'alt'), 'image')])
|
||||
),
|
||||
(
|
||||
'multiple attrs',
|
||||
{
|
||||
(None, 'src'): 'foo',
|
||||
(None, 'alt'): 'image',
|
||||
(None, 'style'): 'border: 1px solid black;'
|
||||
},
|
||||
OrderedDict([
|
||||
((None, 'alt'), 'image'),
|
||||
((None, 'src'), 'foo'),
|
||||
((None, 'style'), 'border: 1px solid black;')
|
||||
])
|
||||
),
|
||||
])
|
||||
def test_alphabetizing(msg, attrs, expected_attrs):
|
||||
tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}]
|
||||
output_tokens = list(Filter(tokens))
|
||||
|
||||
attrs = output_tokens[0]['data']
|
||||
assert attrs == expected_attrs
|
||||
|
||||
|
||||
def test_with_different_namespaces():
|
||||
tokens = [{
|
||||
'type': 'StartTag',
|
||||
'name': 'pattern',
|
||||
'data': {
|
||||
(None, 'id'): 'patt1',
|
||||
('http://www.w3.org/1999/xlink', 'href'): '#patt2'
|
||||
}
|
||||
}]
|
||||
output_tokens = list(Filter(tokens))
|
||||
|
||||
attrs = output_tokens[0]['data']
|
||||
assert attrs == OrderedDict([
|
||||
((None, 'id'), 'patt1'),
|
||||
(('http://www.w3.org/1999/xlink', 'href'), '#patt2')
|
||||
])
|
||||
|
||||
|
||||
def test_with_serializer():
|
||||
"""Verify filter works in the context of everything else"""
|
||||
parser = html5lib.HTMLParser()
|
||||
dom = parser.parseFragment('<svg><pattern xlink:href="#patt2" id="patt1"></svg>')
|
||||
walker = html5lib.getTreeWalker('etree')
|
||||
ser = HTMLSerializer(
|
||||
alphabetical_attributes=True,
|
||||
quote_attr_values='always'
|
||||
)
|
||||
|
||||
# FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When
|
||||
# that gets fixed, we can fix this expected result.
|
||||
assert (
|
||||
ser.render(walker(dom)) ==
|
||||
'<svg><pattern id="patt1" href="#patt2"></pattern></svg>'
|
||||
)
|
||||
@@ -1,66 +1,116 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
try:
|
||||
unittest.TestCase.assertEqual
|
||||
except AttributeError:
|
||||
unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
|
||||
import pytest
|
||||
|
||||
from .support import get_data_files, TestData, test_dir, errorMessage
|
||||
from html5lib import HTMLParser, inputstream
|
||||
from .support import get_data_files, test_dir, errorMessage, TestData as _TestData
|
||||
from html5lib import HTMLParser, _inputstream
|
||||
|
||||
|
||||
class Html5EncodingTestCase(unittest.TestCase):
|
||||
def test_codec_name_a(self):
|
||||
self.assertEqual(inputstream.codecName("utf-8"), "utf-8")
|
||||
def test_basic_prescan_length():
|
||||
data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
|
||||
pad = 1024 - len(data) + 1
|
||||
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
|
||||
assert len(data) == 1024 # Sanity
|
||||
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
|
||||
assert 'utf-8' == stream.charEncoding[0].name
|
||||
|
||||
def test_codec_name_b(self):
|
||||
self.assertEqual(inputstream.codecName("utf8"), "utf-8")
|
||||
|
||||
def test_codec_name_c(self):
|
||||
self.assertEqual(inputstream.codecName(" utf8 "), "utf-8")
|
||||
def test_parser_reparse():
|
||||
data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
|
||||
pad = 10240 - len(data) + 1
|
||||
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
|
||||
assert len(data) == 10240 # Sanity
|
||||
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
|
||||
assert 'windows-1252' == stream.charEncoding[0].name
|
||||
p = HTMLParser(namespaceHTMLElements=False)
|
||||
doc = p.parse(data, useChardet=False)
|
||||
assert 'utf-8' == p.documentEncoding
|
||||
assert doc.find(".//title").text == "Caf\u00E9"
|
||||
|
||||
def test_codec_name_d(self):
|
||||
self.assertEqual(inputstream.codecName("ISO_8859--1"), "windows-1252")
|
||||
|
||||
@pytest.mark.parametrize("expected,data,kwargs", [
|
||||
("utf-16le", b"\xFF\xFE", {"override_encoding": "iso-8859-2"}),
|
||||
("utf-16be", b"\xFE\xFF", {"override_encoding": "iso-8859-2"}),
|
||||
("utf-8", b"\xEF\xBB\xBF", {"override_encoding": "iso-8859-2"}),
|
||||
("iso-8859-2", b"", {"override_encoding": "iso-8859-2", "transport_encoding": "iso-8859-3"}),
|
||||
("iso-8859-2", b"<meta charset=iso-8859-3>", {"transport_encoding": "iso-8859-2"}),
|
||||
("iso-8859-2", b"<meta charset=iso-8859-2>", {"same_origin_parent_encoding": "iso-8859-3"}),
|
||||
("iso-8859-2", b"", {"same_origin_parent_encoding": "iso-8859-2", "likely_encoding": "iso-8859-3"}),
|
||||
("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16", "likely_encoding": "iso-8859-2"}),
|
||||
("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16be", "likely_encoding": "iso-8859-2"}),
|
||||
("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16le", "likely_encoding": "iso-8859-2"}),
|
||||
("iso-8859-2", b"", {"likely_encoding": "iso-8859-2", "default_encoding": "iso-8859-3"}),
|
||||
("iso-8859-2", b"", {"default_encoding": "iso-8859-2"}),
|
||||
("windows-1252", b"", {"default_encoding": "totally-bogus-string"}),
|
||||
("windows-1252", b"", {}),
|
||||
])
|
||||
def test_parser_args(expected, data, kwargs):
|
||||
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
|
||||
assert expected == stream.charEncoding[0].name
|
||||
p = HTMLParser()
|
||||
p.parse(data, useChardet=False, **kwargs)
|
||||
assert expected == p.documentEncoding
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kwargs", [
|
||||
{"override_encoding": "iso-8859-2"},
|
||||
{"override_encoding": None},
|
||||
{"transport_encoding": "iso-8859-2"},
|
||||
{"transport_encoding": None},
|
||||
{"same_origin_parent_encoding": "iso-8859-2"},
|
||||
{"same_origin_parent_encoding": None},
|
||||
{"likely_encoding": "iso-8859-2"},
|
||||
{"likely_encoding": None},
|
||||
{"default_encoding": "iso-8859-2"},
|
||||
{"default_encoding": None},
|
||||
{"foo_encoding": "iso-8859-2"},
|
||||
{"foo_encoding": None},
|
||||
])
|
||||
def test_parser_args_raises(kwargs):
|
||||
with pytest.raises(TypeError) as exc_info:
|
||||
p = HTMLParser()
|
||||
p.parse("", useChardet=False, **kwargs)
|
||||
assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
|
||||
|
||||
|
||||
def runParserEncodingTest(data, encoding):
|
||||
p = HTMLParser()
|
||||
assert p.documentEncoding is None
|
||||
p.parse(data, useChardet=False)
|
||||
encoding = encoding.lower().decode("ascii")
|
||||
|
||||
assert encoding == p.tokenizer.stream.charEncoding[0], errorMessage(data, encoding, p.tokenizer.stream.charEncoding[0])
|
||||
assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
|
||||
|
||||
|
||||
def runPreScanEncodingTest(data, encoding):
|
||||
stream = inputstream.HTMLBinaryInputStream(data, chardet=False)
|
||||
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
|
||||
encoding = encoding.lower().decode("ascii")
|
||||
|
||||
# Very crude way to ignore irrelevant tests
|
||||
if len(data) > stream.numBytesMeta:
|
||||
return
|
||||
|
||||
assert encoding == stream.charEncoding[0], errorMessage(data, encoding, stream.charEncoding[0])
|
||||
assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
|
||||
|
||||
|
||||
def test_encoding():
|
||||
for filename in get_data_files("encoding"):
|
||||
tests = TestData(filename, b"data", encoding=None)
|
||||
for idx, test in enumerate(tests):
|
||||
tests = _TestData(filename, b"data", encoding=None)
|
||||
for test in tests:
|
||||
yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
|
||||
yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
|
||||
|
||||
|
||||
# pylint:disable=wrong-import-position
|
||||
try:
|
||||
try:
|
||||
import charade # flake8: noqa
|
||||
except ImportError:
|
||||
import chardet # flake8: noqa
|
||||
import chardet # noqa
|
||||
except ImportError:
|
||||
print("charade/chardet not found, skipping chardet tests")
|
||||
print("chardet not found, skipping chardet tests")
|
||||
else:
|
||||
def test_chardet():
|
||||
with open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt"), "rb") as fp:
|
||||
encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
|
||||
assert encoding[0].lower() == "big5"
|
||||
with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
|
||||
encoding = _inputstream.HTMLInputStream(fp.read()).charEncoding
|
||||
assert encoding[0].name == "big5"
|
||||
# pylint:enable=wrong-import-position
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user