Compare commits
618 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 50ceb6ef90 | |||
| a8bce7c4c4 | |||
| f888cadb8f | |||
| ccf264cffb | |||
| 19a66dcf1c | |||
| 67b20b357d | |||
| fd06dbc434 | |||
| d33bc1b148 | |||
| 8de63e92e5 | |||
| a18d01213b | |||
| 2788b0e0b2 | |||
| 67ac4b61f8 | |||
| dabef2240d | |||
| 432255d088 | |||
| 69096d89b2 | |||
| 71fc91fe35 | |||
| 0a4014c4bf | |||
| 1b83203a64 | |||
| fc0ea00214 | |||
| 60ff16c174 | |||
| a352be89f5 | |||
| 06e8c0e1ff | |||
| a0a29618bf | |||
| 2e5f0551a1 | |||
| 6bd0f26296 | |||
| 58aa0da32f | |||
| 3f47c1bcb8 | |||
| 2625ca752f | |||
| 56d16616ff | |||
| 734dbf663e | |||
| 7dacd80660 | |||
| c77489a5be | |||
| 25f204b330 | |||
| 89dded387d | |||
| a9b677f0ce | |||
| 6b918be799 | |||
| f259682391 | |||
| 8a059c988e | |||
| 8512940ccf | |||
| de2b11f69a | |||
| df1fba83a8 | |||
| d98ae74c8a | |||
| 6484646122 | |||
| 52bac14a2e | |||
| 0be589bc5f | |||
| e083e133eb | |||
| c787e671c3 | |||
| 31ff93c3f1 | |||
| 289f174e2b | |||
| ea03f3fc4d | |||
| 740fc93c13 | |||
| e94bd3fcb9 | |||
| dba469750b | |||
| c7fe6076cb | |||
| 356f578014 | |||
| b151ed4c55 | |||
| 9455e3b52b | |||
| 60e2656541 | |||
| fcb1a8a6a7 | |||
| 9e5829151d | |||
| 1f0a713f9b | |||
| ff49dd4512 | |||
| 3cf83b5bf7 | |||
| c51ce55d32 | |||
| ee9268957d | |||
| 5d1858d5da | |||
| d59abea1f5 | |||
| e6b79334d8 | |||
| 88d2a44f08 | |||
| 78e47d3cd5 | |||
| 6b6af347da | |||
| dccee96cf1 | |||
| 92b24de7cd | |||
| 1225a4887c | |||
| 2a82857570 | |||
| 831bec3630 | |||
| 55cbf2478a | |||
| 20a850b9e9 | |||
| 11c649a7af | |||
| c1e5a2077b | |||
| dc96f626dd | |||
| 46f48023f4 | |||
| e3d83f6dc2 | |||
| fc484e569f | |||
| a92f3e2480 | |||
| 064c447528 | |||
| 64c1bcd9e6 | |||
| 0cca4a2ebe | |||
| c4c26a76f1 | |||
| 1a638431d7 | |||
| 5d5fa21630 | |||
| e78ace4664 | |||
| 37596c412c | |||
| d200021243 | |||
| 1a999e202f | |||
| fd748b29e9 | |||
| 775d1e3cf1 | |||
| c6a1df9a79 | |||
| 77861a4c6d | |||
| bf1e1c3139 | |||
| a564a1d808 | |||
| 22ac935f9b | |||
| 02e2bcb417 | |||
| 3445259cde | |||
| c20c32c17d | |||
| 3fec766890 | |||
| f208a24213 | |||
| 9e9dfb3f4d | |||
| 83eecf09ed | |||
| 1aebe8d0dd | |||
| bb64e482df | |||
| 1841a72ca7 | |||
| 997d4aa1cf | |||
| d517e86333 | |||
| 8bcfc712fb | |||
| c0cf2fd78e | |||
| 0a7de0e9b6 | |||
| 1e2a127dac | |||
| 5b8cd215e4 | |||
| 7583edf3fe | |||
| 2f219a1a81 | |||
| 9127c38297 | |||
| 0c379f8b9f | |||
| d2b617bdf4 | |||
| 6d6f6d9356 | |||
| 8ffb20ebe3 | |||
| eed7b9da0c | |||
| 802381b2bc | |||
| f265c861d2 | |||
| 1dc7b4b5e4 | |||
| c48aa2b255 | |||
| 66859802f9 | |||
| 433c8e987b | |||
| aa477ca48c | |||
| 65b502afa4 | |||
| 06c0b44589 | |||
| d651f2cbb7 | |||
| 8b5be8ea4b | |||
| f4e82c560d | |||
| c23b3e93a6 | |||
| de447d2d0b | |||
| 95b1272018 | |||
| 11d111da7c | |||
| 638dec0f04 | |||
| a0ab6e406a | |||
| 23242c0f52 | |||
| 48bf70e825 | |||
| ada0b96872 | |||
| 0e4917bba9 | |||
| 8169d31e86 | |||
| 75b83aa163 | |||
| d2022de970 | |||
| 8db1cdacb4 | |||
| 527d171a6a | |||
| 20620cfa7e | |||
| 4d03ca078d | |||
| 775e2cca47 | |||
| 7cb2486d3e | |||
| 02a3ecc9fe | |||
| 54435398af | |||
| ffc42883de | |||
| 0cf0371a43 | |||
| f5156bcea7 | |||
| efdf3b2c9d | |||
| c3d3163392 | |||
| c91d5ca483 | |||
| 5f0982970d | |||
| ee05da70f4 | |||
| 04c283c48d | |||
| 836945c95c | |||
| bd4c180c07 | |||
| e1f5290365 | |||
| eefffcfb1b | |||
| 9e088a5e9d | |||
| 317c02bf06 | |||
| 22724c269c | |||
| 2a48782b6b | |||
| e7c3039fde | |||
| 2afba02b59 | |||
| 94928c2930 | |||
| 2c25191291 | |||
| ba2f3f2172 | |||
| aa5cba9347 | |||
| 5f40452f57 | |||
| 2dd9b1723b | |||
| ee54839f28 | |||
| c2f054a25e | |||
| f095d5c99c | |||
| ab93f9809a | |||
| bbb9a62357 | |||
| 82ffed699f | |||
| 4751ea8396 | |||
| c15d8fbe58 | |||
| b379468b47 | |||
| 0deb3eae21 | |||
| 0c1042ec5c | |||
| 05d0de5120 | |||
| 2fa217d5d9 | |||
| a65b5a5d82 | |||
| 7bb42e95d8 | |||
| db536502a1 | |||
| 47c8f1a2e6 | |||
| 30a0f11515 | |||
| 9bf5123a00 | |||
| f337b53ae3 | |||
| aea6050d71 | |||
| 13d5e0761e | |||
| ce28d0284c | |||
| 1a0bb9c3e4 | |||
| d0c71b4b67 | |||
| b3f062956d | |||
| 1a853a780c | |||
| 5c47ddeb2d | |||
| b51deb5d01 | |||
| cbf5ea69be | |||
| e139ffefe6 | |||
| dc0a8deb40 | |||
| 97e93cd10a | |||
| 03c934cf21 | |||
| 92d0d70258 | |||
| d44298993c | |||
| 12300d4115 | |||
| b4f08f61a6 | |||
| 861a25be41 | |||
| 3e175109a6 | |||
| fb2210f2fd | |||
| e928918201 | |||
| df607e5772 | |||
| a7cc470645 | |||
| 4e6421b928 | |||
| df48e8fccd | |||
| 58111bf204 | |||
| 8c02e75fed | |||
| 6f3f1cb4b5 | |||
| dd27997deb | |||
| a1f70d1d4d | |||
| 7da0bac643 | |||
| b3ab2a451c | |||
| 850f836ebd | |||
| d9fa9d03da | |||
| 76c20dc3d7 | |||
| 4568e222d1 | |||
| 344025226a | |||
| f546fcffce | |||
| 068c2d4d00 | |||
| ccf5a902e5 | |||
| 8c72cf9057 | |||
| 1ce14aa231 | |||
| 643485b879 | |||
| 5b3d9f26be | |||
| 70674fbce7 | |||
| f48c0799c0 | |||
| 3bc646187f | |||
| b692ebde6f | |||
| d2a665624a | |||
| 10c8b8ceff | |||
| 92edfc7312 | |||
| eeaeb80f0f | |||
| 6204572ddc | |||
| 14f2f45f20 | |||
| 8ac6c9d7a7 | |||
| 237a47b8ed | |||
| 96bdf606e2 | |||
| 5cc4dcf10b | |||
| a0a3c39606 | |||
| b4cc35b109 | |||
| 72eeb7eb35 | |||
| 44f97411a8 | |||
| ce2296c95d | |||
| 3fe0500746 | |||
| a8daaa787a | |||
| ae9aef9899 | |||
| ad9be91f45 | |||
| 216512788c | |||
| 2483a9c901 | |||
| 9147ed90b7 | |||
| a4ce8c8c52 | |||
| dc83d36193 | |||
| 7dbc466a58 | |||
| 5bb36bc87c | |||
| 7301cd259b | |||
| 5ba5a9dfc4 | |||
| 66e7c60767 | |||
| 22dd7ef093 | |||
| e03bb4f280 | |||
| 4d15283473 | |||
| b1dfbffc4f | |||
| a8de8dcc48 | |||
| e7cdbbcacb | |||
| 060ba2a5be | |||
| 72a21e1ef4 | |||
| cbfb498b0f | |||
| 0ec11c532e | |||
| 65201749f7 | |||
| 8c569183be | |||
| c3665f04d6 | |||
| 2406e0ad49 | |||
| a9490b0838 | |||
| 77e1c69f6b | |||
| f2d8139bef | |||
| 92594dba1d | |||
| 9a28ea7672 | |||
| b87c6c24d8 | |||
| 0c166ff36a | |||
| b7c9530ac0 | |||
| e20f102cb9 | |||
| 7856cc5bb3 | |||
| 663fdf6e2f | |||
| 1867aed769 | |||
| 72aa6150b5 | |||
| 247ae71777 | |||
| bbf5d6712c | |||
| d415f6a9f2 | |||
| 1912881d05 | |||
| a85d9e5442 | |||
| fe2f5b2d8f | |||
| cae64feaf9 | |||
| 6338757a9b | |||
| 60eb08c834 | |||
| cbee0bd6c8 | |||
| db9da54391 | |||
| 5d7f9ced17 | |||
| e3825fbf96 | |||
| 71db2ae1c9 | |||
| b7f12e4291 | |||
| 20f18fc391 | |||
| b6185cc2cc | |||
| 265804a834 | |||
| 9e09de5f1e | |||
| db5f85272f | |||
| cf2f3d0dca | |||
| 0d46b3fa19 | |||
| 768046e889 | |||
| ac940ab25d | |||
| 3cee69d23a | |||
| deaa164f25 | |||
| 68bb7acf95 | |||
| 0eefe3200c | |||
| df65bae58f | |||
| b0443dc812 | |||
| 63545ee732 | |||
| 253d099738 | |||
| 2ea27f2597 | |||
| 2c4b68d719 | |||
| a70f9c0673 | |||
| 540a35cb0e | |||
| 1d9a2ff6fc | |||
| 01a5d71b4a | |||
| 4f11fa53cd | |||
| 8f6540118b | |||
| 089618b8a6 | |||
| 6f87037c78 | |||
| d9b36c0616 | |||
| df2bc9767c | |||
| 508810d5c7 | |||
| dc6770ecaa | |||
| 25b8702a42 | |||
| 5a5aa510c5 | |||
| 5d7777095e | |||
| 95ad5b6fbe | |||
| 9e3227ba0b | |||
| d725c87cae | |||
| 6c3bf03bc3 | |||
| 20c04f32be | |||
| 29bafc6215 | |||
| d3279ef923 | |||
| 291e210e63 | |||
| 535b1aaba9 | |||
| 48cafadbdd | |||
| 39d442c2b3 | |||
| 2bf590b6c4 | |||
| 2e80832154 | |||
| f64e7c1a61 | |||
| 3edf593a18 | |||
| 7ffe41ae9b | |||
| 10a7c327f0 | |||
| a32a2cabd8 | |||
| 7d77870daf | |||
| 45d7233485 | |||
| d03afb5d47 | |||
| d5da52d0fb | |||
| 25714acd38 | |||
| afe05779cd | |||
| 8da007f4eb | |||
| 58b2630968 | |||
| 05e03b3ea4 | |||
| c2781e834f | |||
| 557348831d | |||
| cbf03250f9 | |||
| 7a3bc7086e | |||
| 8047c66869 | |||
| 9d17e2ce9a | |||
| 1f855a7fd7 | |||
| 6310b8f4aa | |||
| 839146b8c7 | |||
| 817a6300ea | |||
| 3a5effaa52 | |||
| 6e8ce9d23d | |||
| cae120cfd4 | |||
| 734e0f7128 | |||
| 4c76439f4e | |||
| 2488d4db53 | |||
| 565987faff | |||
| e14402c6a0 | |||
| ccfc40f6fc | |||
| d69a331b87 | |||
| 01fd66c35a | |||
| 73b33fe697 | |||
| 9e730a2b85 | |||
| 6395b0e945 | |||
| 79d16b98f1 | |||
| eb4fa8d85d | |||
| 7e2d5dfa5d | |||
| f785ba8932 | |||
| 63cf4a2d67 | |||
| 9e270bb53f | |||
| 3bafcb6b4e | |||
| b770a40150 | |||
| 3b50b58aac | |||
| 61ad27845b | |||
| 47bb8563ca | |||
| c2c0df0e88 | |||
| 22f0f8cd60 | |||
| 5af10f1c6b | |||
| 67b322025d | |||
| c58a438ad2 | |||
| c3f5a6f9e2 | |||
| 90422448aa | |||
| 66f7019bf3 | |||
| e0e5e29ba1 | |||
| 5f9010e4b9 | |||
| dd40f272cb | |||
| 9abb018fe8 | |||
| d38a22901c | |||
| 0b8eace5bb | |||
| 16b69ef3cc | |||
| acd556d6f1 | |||
| 2a5db95ef2 | |||
| 3c2c71a7da | |||
| 838ef0cdc7 | |||
| 68229fdd72 | |||
| 60d9d2c1b3 | |||
| 5b71c17e99 | |||
| 4497b522f7 | |||
| f0a209742f | |||
| 2ccd568ea2 | |||
| b8ceeab46e | |||
| e504a6b97a | |||
| 4bc8e5031a | |||
| 8a44a798aa | |||
| 6423a7f89d | |||
| 878cdb31fd | |||
| e7981c2e59 | |||
| 3d4a166b2c | |||
| 29a4bb42f4 | |||
| b5d9773704 | |||
| e650089e8c | |||
| ea9b0cb827 | |||
| aaa1eb95ed | |||
| 5a48886bcc | |||
| 43f51d44f2 | |||
| ea7eecccb1 | |||
| 72b7e6b06d | |||
| 0c54f0e36f | |||
| 80560c8eba | |||
| 03c2f7cdcd | |||
| 8b44187299 | |||
| 32eb094f09 | |||
| 811db632d2 | |||
| 1543c50d98 | |||
| 7f7b609b7a | |||
| 4bb8ad5b4c | |||
| 5bede9c89c | |||
| 8af4853964 | |||
| b67e68c83e | |||
| 8eca3e102c | |||
| 09a6e26055 | |||
| a070680c46 | |||
| e655f599bf | |||
| 1791737863 | |||
| ea4f2783a9 | |||
| fc532e30d3 | |||
| c4a79c0e37 | |||
| bba95401bb | |||
| 5c887e1d9d | |||
| b9024945be | |||
| 1479c6ebc5 | |||
| 36c6742532 | |||
| cd9a1402cb | |||
| 2e0745e1b2 | |||
| cd69d46d1b | |||
| f73acb88f9 | |||
| 06622dba80 | |||
| d488361d3f | |||
| 1eee95b31a | |||
| a806229848 | |||
| 0a49932835 | |||
| 6b8cff6dba | |||
| 4d5b0b9583 | |||
| 859f216462 | |||
| 5e4d1cbdab | |||
| 7ce4d61b31 | |||
| c17c59aea6 | |||
| 20952b5c26 | |||
| 7b53161041 | |||
| 5356845e74 | |||
| 298d75abf6 | |||
| 5e57a6d61a | |||
| 97318bfd07 | |||
| 0dc7d74663 | |||
| 89be51441f | |||
| 7dc5b9f7ee | |||
| 5fef8400b9 | |||
| 21ab4f0aa4 | |||
| a15586f1a2 | |||
| 0cdb13cc57 | |||
| d2750b87e9 | |||
| 5c6c7a4459 | |||
| 7e33e534a0 | |||
| 84fd20c05f | |||
| 3eea1840ff | |||
| 0c2c75417f | |||
| ab090747eb | |||
| e2765c34d7 | |||
| e512184d6d | |||
| 898fe7c443 | |||
| da863a44e0 | |||
| ad3b8e6da2 | |||
| 378c0eca3d | |||
| 7ca73b4fca | |||
| f8bfb5dc08 | |||
| 8be8a74239 | |||
| b7d2971b46 | |||
| 5fc9126bd5 | |||
| fb2273e47c | |||
| de5d569197 | |||
| 962131c610 | |||
| 379745f822 | |||
| 31a3d05e0c | |||
| bab5202d5c | |||
| 308d9beac4 | |||
| 1c649f82bf | |||
| 892e5116b4 | |||
| 50723e890d | |||
| a0f54be69b | |||
| 7289e89ceb | |||
| 68a26f7bb6 | |||
| 20304d5b5c | |||
| d026791864 | |||
| c196270242 | |||
| 8d872c8b5a | |||
| 67fe2eebd4 | |||
| 793fa74096 | |||
| 5c03988e3c | |||
| 00adb257e8 | |||
| 49086ea93c | |||
| 0db5652961 | |||
| b895c845a8 | |||
| a2e45c3ef7 | |||
| 8a726e9c89 | |||
| 760e346ab9 | |||
| 7edbdb2333 | |||
| 75478d5ff1 | |||
| c94a13ef54 | |||
| bb2ed5a116 | |||
| a97f5853ad | |||
| 3c40f0ccf0 | |||
| ecbd374dc7 | |||
| d318946791 | |||
| 262b7e250c | |||
| 28a67f4c74 | |||
| 0bfcf96773 | |||
| 6a20750bfd | |||
| c6937325fa | |||
| 3ac8a5cd86 | |||
| d17ced4c45 | |||
| 31ad78d28a | |||
| f31d756185 | |||
| 5a9337c2e2 | |||
| 49b124e7bf | |||
| 031bee20d9 | |||
| 124f6da70c | |||
| 5e3f56a1b0 | |||
| f2be750e2e | |||
| 82b1cdb957 | |||
| bc1f99f6af | |||
| c031eb5829 | |||
| 426fe24894 | |||
| 87e49b8c5f | |||
| d372aa469f | |||
| 7c80ea515f | |||
| 53e92ed3dc | |||
| bdcac383fd | |||
| eb64716012 | |||
| 8062abade9 | |||
| 4acf26b73d | |||
| 00947efe53 | |||
| f35b53a071 | |||
| 2211f99f36 | |||
| 1a6378f24a | |||
| 3702c308b3 | |||
| d5c08a5f51 | |||
| d83286a5ff | |||
| 8d9c9bbfa6 | |||
| a15d2e6587 | |||
| 9f81317ecd | |||
| 4e3f1a926f | |||
| e6e31449f6 | |||
| e573ddbb25 | |||
| d510fd3b71 | |||
| 5a316915a5 | |||
| 3d6cba7b43 | |||
| 9c7716c4a4 | |||
| b1dd85a5b2 | |||
| 02091c7969 | |||
| d4b43f58c5 | |||
| 66bd71e8c9 | |||
| 56b9f971e4 |
+348
@@ -1,3 +1,351 @@
|
||||
2.6.5.3268
|
||||
subscene, addic7ed
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- clarify README
|
||||
- core: fix custom folder handling; #761
|
||||
- core: providers: screwzira: move to ktuvit and wizdom
|
||||
- core: add option to not download subtitles for certain audio languages existing; and/or no audio stream; fix #756
|
||||
- core: delay item refreshes after refresh call (default: 5 seconds; exposed in advanced settings)
|
||||
- menu: allow extraction of embedded subtitles for whole tv shows
|
||||
|
||||
|
||||
2.6.5.3247
|
||||
|
||||
subscene, addic7ed
|
||||
|
||||
either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service (anti-captcha.com or deathbycaptcha.com), add funds, then supply your credentials/apikey in the configuration
|
||||
Changelog
|
||||
core: fix for tv.plex.agents.movie not populating its media types
|
||||
core: tasks: findBetterSubtitles: increase minimum score for better subtitles for movies with extracted embedded subs from 82 to 112
|
||||
|
||||
|
||||
2.6.5.3241
|
||||
|
||||
subscene, addic7ed
|
||||
|
||||
either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service (anti-captcha.com or deathbycaptcha.com), add funds, then supply your credentials/apikey in the configuration
|
||||
Changelog
|
||||
|
||||
core: add support for new Plex Movie agent
|
||||
core: remove py3 compat breaking unnecessary change
|
||||
core: skip drawing tags for SRT
|
||||
core: advanced_settings: refiners: drone: add custom pem_file support; fixes #735
|
||||
providers: core: set DownloadLimitPerDayExceeded timeout to 4 hours (was one day);
|
||||
providers: addic7ed: limit downloads per day; add vip setting
|
||||
providers: addic7ed: properly compare last_dl, add last_reset tracking info to log #723
|
||||
providers: addic7ed: properly implement limits
|
||||
submod: HI: remove more music tags
|
||||
submod: common CM_punctuation_space2: detect AND don't try changing domain/url/host when fixing punctuation
|
||||
|
||||
|
||||
2.6.5.3223
|
||||
|
||||
subscene, addic7ed
|
||||
|
||||
either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service (anti-captcha.com or deathbycaptcha.com), add funds, then supply your credentials/apikey in the configuration
|
||||
Changelog
|
||||
|
||||
core: scoring: reorder subtitles based on second non-hash-score if main hash score is the same; morpheus65535/bazarr#821
|
||||
providers: bsplayer: verify hash; clean up
|
||||
|
||||
|
||||
2.6.5.3217
|
||||
|
||||
subscene, addic7ed
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- core: also extract (missing) embedded subtitles when SearchAllRecentlyAddedMissing is running
|
||||
- core: core: clarify detecting streams (in logs)
|
||||
- core: UnRAR: set binary to executable, even if not checked out from git; might fix #693
|
||||
- core: bazarr-backport: morpheus65535/bazarr#703: use proper language code detection instead of a wild guess; should fix bad existing subtitle detection
|
||||
- core: bazarr-backport: morpheus65535/bazarr#660: fix BOM encoding stuff
|
||||
- core: bazarr-backport: morpheus65535/bazarr#656 further generalize formats; skip release group match if format match failed
|
||||
- core: fix stream detection when using mediainfo (#711)
|
||||
- config/core: make periodic SZ-internal subtitle maintenance interval configurable
|
||||
- providers: add BSPlayer Subtitles
|
||||
- providers: add ScrewZira (Hebrew)
|
||||
|
||||
2.6.5.3183
|
||||
|
||||
|
||||
subscene, addic7ed
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- core: don't fall back to default providers if none enabled
|
||||
- core: don't process any further if stream info is missing
|
||||
- core: support using mediainfo for retrieving MP4 MOV_TEXT subtitle stream titles (PMS bug)
|
||||
- core: fix embedded subtitle extraction in some cases (#681, #680)
|
||||
- core: scanning: add additional INFO logging for undetected languages
|
||||
- core: bazarr-backport: remove existing subtitle file, to support MergerFS
|
||||
- core: bazarr-backport: generic 10 minute throttling if uncaught exception occurs
|
||||
- providers: addic7ed: fix recaptcha solving; fix show ID retrieval (#681)
|
||||
- providers: addic7ed: add timeout on authentication error
|
||||
- providers: addic7ed: fix shows with dots in them (Mayans M.C.)
|
||||
- providers: addic7ed: fix detection of completed subtitle for non-english users (#686)
|
||||
- providers: addic7ed: add more timeouts in the login process
|
||||
- providers: argenteam: bazarr-backport: use new url; fixes
|
||||
|
||||
|
||||
2.6.5.3152
|
||||
|
||||
subscene, addic7ed
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- core: fix core issue possibly impacting results on OpenSubtitles in certain conditions
|
||||
- core: fix default values of opensubtitles-skip-wrong-fps, use_https; fix #676
|
||||
- core: fix for determining whether to search under certain circumstances; fixes #666
|
||||
- core: #664 fix missing language processing of multiple videos refreshed at once
|
||||
- core: #661 fix match strictness when determining preexisting external subtitles
|
||||
- providers: titlovi: New implementation of Titlovi using API (thanks @viking1304)
|
||||
|
||||
|
||||
2.6.5.3124
|
||||
|
||||
subscene, addic7ed and titlovi
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- core: http: fallback to default DNS when normal resolving fails; fixes #657
|
||||
- core: extract embedded/menu: fix detection of unknown streams; don't use unknown streams if a known language was previously found
|
||||
- core: language: use replacement map from bazarr
|
||||
- providers: titlovi: fix matching
|
||||
- providers: subscene: fix unknown language code error when "empty" result is returned
|
||||
- providers: subscene: add support for pt-BR (based on Diaoul/subliminal@b22cf08)
|
||||
- providers: subscene: explicitly set account filters for languages
|
||||
- providers: subscene: limit alternative searches to 3; set throttle to 8
|
||||
- providers: subscene: move login/cookies to initialization sequence
|
||||
- submod: generic: en: fix ";='s
|
||||
|
||||
|
||||
2.6.5.3109
|
||||
|
||||
subscene, addic7ed and titlovi
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- providers: add Napisy24 (polish)
|
||||
- providers: subscene: reduce provider load by possibly half
|
||||
- providers: subscene: support logging in (username/password are now required)
|
||||
- providers: subscene: fallback to non year results if none found with year
|
||||
|
||||
|
||||
2.6.5.3099
|
||||
|
||||
subscene, addic7ed and titlovi
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- core: allow system DNS again by putting "system" as the DNS
|
||||
- providers: subscene: fix again (subscene, contact us please, so we can end this)
|
||||
|
||||
|
||||
2.6.5.3092
|
||||
|
||||
subscene, addic7ed and titlovi
|
||||
- either of those providers might impose a reCAPTCHA verification. In order to use those providers, please create an account at an AntiCaptcha service ([anti-captcha.com](http://getcaptchasolution.com/kkvviom7nh) or [deathbycaptcha.com](http://deathbycaptcha.com)), add funds, then supply your credentials/apikey in the configuration
|
||||
|
||||
Changelog
|
||||
- providers: subscene: fix endpoint (hopefully for longer now)
|
||||
- providers: subscene: don't search for season packs (broken for now; relieves 50% of server load on provider)
|
||||
- providers: subscene: don't calculate video fn for now
|
||||
- providers: argenteam: backport fixes from bazarr
|
||||
- subtitle: try decoding with utf-16 by default as well (zho/farsi)
|
||||
- submod: HI: remove music tags by default
|
||||
- core: compat (bazarr): add env var SZ_KEEP_ENCODING to keep encoding of subtitles
|
||||
|
||||
|
||||
2.6.5.3074
|
||||
|
||||
Changelog
|
||||
- core: cf: bypass cf 95% of the time without captchas
|
||||
- core: fix breaking line endings of certain languages (chinese, UTF-16); fixes #646
|
||||
- core: update pysubs2 to 0.2.3
|
||||
|
||||
|
||||
2.6.5.3062
|
||||
|
||||
Changelog
|
||||
- core: cf: optimize
|
||||
- core: http: don't query DNS with IPs. thanks @fgump (fixes sonarr/radarr)
|
||||
|
||||
|
||||
2.6.5.3041
|
||||
|
||||
Changelog
|
||||
- core: only reference guessed title if there actually is one
|
||||
- core: cf: optimize
|
||||
- core/config: add setting for one existing language to be enough, fixes #491
|
||||
- core/compat: dns: support nameservers via ENV[dns_resolvers]; don't fall back to default DNS when configured custom DNS failed
|
||||
- providers: titlovi: prevent repeated captcha solving for CF
|
||||
|
||||
|
||||
2.6.5.3017
|
||||
|
||||
Changelog
|
||||
- core: SRT parsing: handle (bad) ASS color tag in SRT
|
||||
- core: auto extract embedded: only use one unknown sub for first language
|
||||
- core: better embedded streams language detection
|
||||
- core: optimizations
|
||||
- core: extract embedded: fix is_unknown check
|
||||
- core: don't raise exception when subtitle not found inside archive
|
||||
- core: search external subtitles: fix condition
|
||||
- core: better plex transcoder path detection
|
||||
- core: use Log.Warn instead of Log.Warning (#619, #629, #633)
|
||||
- core: also check for "plex transcoder.exe" in case of windows (fixes #619)
|
||||
- core: auto extract: use mbcs encoding for paths on windows
|
||||
- core: Fix issue scandir not returning the name of the file inside Docker images on ARM systems. (thanks @giejay)
|
||||
- core: also clean PYTHONHOME when calling external notification app
|
||||
- core: update certifi to 2019.3.9
|
||||
- core: scan_video: add series/title as alternative by scanning filename itself without parent folders
|
||||
- core: add generic solution for solving captchas using anti captcha services
|
||||
- core: increase cache time to 180d (was: 30d)
|
||||
- core: guess_matches: handle multiple title matches; fixes bazarr#403
|
||||
- windows: fix compatibility issues with plex transcoder
|
||||
- compat: use lowercase paths on subtitle detection
|
||||
- providers: addic7ed: re-enable (using paid anti captch service)
|
||||
- providers: assrt: assume undefined Chinese flavor as Simplified (chs/zho-Hans)
|
||||
- providers: subscene: make it work again by bypassing cf
|
||||
- providers: subscene: don't fail on missing cover
|
||||
- providers: titlovi: re-enable (might need paid anti captch service)
|
||||
- providers: opensubtitles: fix only_foreign handling
|
||||
- providers: opensubtitles: show subtitles with possibly mismatched series when manually listing subs
|
||||
- menu: list subtitles: show subtitles with bad season/episode values as well
|
||||
- refiners: omdb: fix imdb ids with spaces
|
||||
|
||||
|
||||
2.6.4.2911
|
||||
- core: improve file cache (windows especially); use fixed-length cache filenames; fixes #600
|
||||
- core: don't log "Checking connections ..." when sonarr/radarr not activated
|
||||
- core: make logging for scanning/parsing/preparing videos more clear
|
||||
- core: extract embedded: continue searching for embbedded subs after undefined language is found (thanks @jippo015)
|
||||
- compat: core: don't assume hints["title"] exists
|
||||
- compat: providers: podnapisi: loosen lxml requirement
|
||||
- providers: addic7ed: fix not using user credentials; fixes #605
|
||||
- providers: titlovi: fix provider (thanks @viking1304)
|
||||
- providers: subscene: fix provider
|
||||
- providers: opensubtitles: improve token logging
|
||||
- providers: podnapisi: fix searching for Marvel series
|
||||
- submod: HI: correctly remove uppercase at start of a sentence when lead by a crocodile (>>)
|
||||
- submod: HI: correctly remove lowercase inside brackets when HI matched as well
|
||||
- submod: HI: remove multiple HI_before_colon_caps before one colon
|
||||
- submod: common: also match music symbols after a crocodile; move crocodile removal up the chain
|
||||
|
||||
|
||||
2.6.4.2881
|
||||
- core: extract embedded: fix automatic extraction not actually writing the subtitles to disk under certain circumstances; fixes #598
|
||||
- core: sonarr/radarr: don't block the main thread while checking connectivity; fixes #597
|
||||
- providers: hosszupuska: fix inconsistent series naming (thanks @morpheus133)
|
||||
- providers: opensubtitles: add advanced setting to optionally not skip subtitles with wrong FPS; fixes #578
|
||||
|
||||
|
||||
2.6.4.2864
|
||||
- core: scanning: don't fail on metadata subtitles with bad language code; fixes #596
|
||||
- providers: legendastv, napiprojekt, subscenter, tvsubtitles: fix "No language to search for" issue; fixes #596
|
||||
- menu: fix "ignore list list"
|
||||
- menu: advanced: add skip next search all recently missing subtitles entry
|
||||
|
||||
|
||||
2.6.4.2859
|
||||
- core: fix thread.lock error (only affected the history menu, not the actual functionality)
|
||||
- core: fix audio-based conditional subtitle decision making; fixes #592
|
||||
- core: massively improve metadata subtitle storage
|
||||
- providers: opensubtitles: skip non-forced results when searching for forced
|
||||
- providers: podnapisi: skip non-forced results when searching for forced
|
||||
- submod: common: correctly pad music symbols on either side
|
||||
|
||||
|
||||
|
||||
2.6.4.2834
|
||||
- core: add option to use custom (Google, Cloudflare) DNS to resolve provider hosts in problematic countries; fixes #547
|
||||
- core: add support for downloading subtitles only when the audio streams don't match (any?) configured languages; fixes #519
|
||||
- core: add support for an include list instead of an ignore list; add the option to disable SZ by default, then enable it per item/series/section (inverse ignore list)
|
||||
- core/menu/config: support forced/foreign subtitles independently
|
||||
- core: fallback for OSError on scandir, should fix #532
|
||||
- core: add config versioning/migration system
|
||||
- core: correctly force non-foreign-only-capable providers off; remove subscene from foreign-only capable providers
|
||||
- core: scanning: collect information about audio streams
|
||||
- core: use correct storage path when storing subtitle info, when only VTT is used
|
||||
- core: fix disabled channel mode
|
||||
- core/menu: extract embedded: add extracted embedded subtitles to history
|
||||
- core: embedded subtitle streams: don't try parsing the language if inexistant
|
||||
- core: subtitle: fix log call, fixes #569
|
||||
- core: download best subtitles: only use actually languages searched for
|
||||
- core: refiners: tvdb: warn instead of error when no matching series was found
|
||||
- core: scanning: re-add expected title to guessit for narrowing down the video title
|
||||
- core: resolve #583
|
||||
- core: archives: explicitly skip forced subtitles if not searched for, when picking from an archive
|
||||
- core: activities/auto-refresh: fix hybrid-plus for movies
|
||||
- core: don't disable plugin if all providers throttled; fix #585 #574
|
||||
- core: skip cleanup for ignored paths
|
||||
- core: update requests to 2.20.0 (fixes security issue)
|
||||
- core: update certifi to 2018.10.15
|
||||
- core: auto extract: don't overwrite local sub even if unknown to SZ
|
||||
- config: set autoclean leftover/unused to off by default
|
||||
- providers: opensubtitles: respect rate limit (40 hits/10s); should fix long throttling behaviour
|
||||
- providers: opensubtitles: handle bad/inexistant responses
|
||||
- providers: opensubtitles: log bad response data
|
||||
- providers: opensubtitles: treat empty response as ServiceUnavailable for now
|
||||
- providers: opensubtitles: log reason for ServiceUnavailable
|
||||
- providers: legendastv: match second title and imdb id
|
||||
- providers: titlovi: fix language handling (thanks @viking1304)
|
||||
- providers: titlovi: proper handling of archives with both cyrlic and latin subtitles (thanks @viking1304)
|
||||
- providers: titlovi: allow direct subtitle downloads as fallback (when a subtitle, not an archive was returned)
|
||||
- providers: hosszupuska: implement site change (thanks @morpheus133)
|
||||
- providers: supersubtitles: add base properties to subtitle
|
||||
- providers: opensubtitles, podnapisi: fix foreign/forced handling
|
||||
- providers: subscene: use original/sceneName if possible
|
||||
- menu: fix plugin not responding when ignoring an item in certain menus; fixes #535
|
||||
- menu: select active subtitle: return to item details afterwards; correctly set current
|
||||
- menu: add item thumbnails to history and a couple of submenus
|
||||
- menu: history: use series thumbnail instead of episode screenshot
|
||||
- menu: add full soft include/exclude menu handling
|
||||
- menu: add support for separate forced and not-forced subtitles
|
||||
- menu: fix order of embedded subtitle streams in item detail
|
||||
- menu: support S00E00 and equivalent
|
||||
- submod: add option to fix only-uppercase subtitles and make them readable
|
||||
- submod: keep track of actually applied mods
|
||||
- submod: correctly merge mods of the same kind (offset)
|
||||
- submod: OCR: add dictionaries for bosnian and norwegian bokmal; update dicts for dan, eng, hrv, spa, srp, swe
|
||||
- submod: OCR/HI: skip certain processors for all-caps subs
|
||||
- submod: HI: only remove caps before colon if the colon is followed by whitespace or EOL; fixes #542
|
||||
- submod: HI: remove MAN:
|
||||
- submod: common: improve detection and normalization of quotes, apostrophes
|
||||
- submod: common: fix double quotes that are meant to be single quotes inside words
|
||||
- submod: common: normalize small hyphens to dash
|
||||
- submod: common: remove line only consisting of colon; remove empty colon at start of line
|
||||
- submod: common: add space after punctuation
|
||||
- submod: common: fix lowercase i for english language
|
||||
- submod: common: better fix for music symbols
|
||||
- submod: reverse_RTL: also reverse ":,'-" chars in CM_RTL_reverse (thanks @doopler)
|
||||
- submod: reverse_RTL: enable mod for arabic, farsi and persian besides hebrew
|
||||
- i18n: fix not used translation for recently added missing subtitles menu
|
||||
- i18n: fix spanish translation, fixes #543
|
||||
- i18n: Hungarian translation is incomplete
|
||||
|
||||
|
||||
|
||||
2.5.7.2663
|
||||
- implement translations for the channel and the settings
|
||||
- i18n: German (myself)
|
||||
- i18n: Danish (thanks Uthman, Claus Møller, dane22)
|
||||
- i18n: Dutch (thanks jippo015, Semi Doludizgin, Rafael)
|
||||
- i18n: Hungarian (thanks Morpheus1333, sugarman402)
|
||||
- i18n: Spanish LA&C (thanks Yamil.llanos, Notorius28)
|
||||
- core: notify executable: support spaces in path, fixes #520
|
||||
- core: notify executable: fix usage with python scripts (drops inherited PYTHONPATH), fixes #355
|
||||
- core: fix plugin_pin_mode
|
||||
- refiners: filebot: fix usage on OSX
|
||||
- providers: add assrt.net (Chinese) - thanks @dimotsai!
|
||||
- providers: add supersubtitles (feliratok.info, Hungarian) - thanks @morpheus133!
|
||||
- providers: addic7ed: cache login data instead of re-login per search
|
||||
- submod: HI: support "&" and "+" in hi_before_colon
|
||||
- submod: HI: be less aggressive with HI_before_colon_noncaps; fixes #510
|
||||
|
||||
|
||||
2.5.4.2541
|
||||
|
||||
|
||||
+28
-78
@@ -21,12 +21,13 @@ import support
|
||||
import interface
|
||||
sys.modules["interface"] = interface
|
||||
|
||||
from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
|
||||
from subzero.constants import OS_PLEX_USERAGENT
|
||||
from interface.menu import *
|
||||
from support.plex_media import media_to_videos, get_media_item_ids
|
||||
from support.extract import agent_extract_embedded
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles, store_subtitle_info, get_subtitle_storage
|
||||
from support.items import is_ignored
|
||||
from support.storage import save_subtitles, store_subtitle_info
|
||||
from support.items import is_wanted
|
||||
from support.config import config
|
||||
from support.lib import get_intent
|
||||
from support.helpers import track_usage, get_title_for_video_metadata, get_identifier, cast_bool
|
||||
@@ -34,6 +35,7 @@ from support.history import get_history
|
||||
from support.data import dispatch_migrate
|
||||
from support.activities import activity
|
||||
from support.download import download_best_subtitles
|
||||
from support.localmedia import find_subtitles
|
||||
|
||||
|
||||
def Start():
|
||||
@@ -93,66 +95,9 @@ def Start():
|
||||
track_usage("General", "plugin", "start", config.version)
|
||||
|
||||
|
||||
def update_local_media(metadata, media, media_type="movies"):
|
||||
# Look for subtitles
|
||||
if media_type == "movies":
|
||||
for item in media.items:
|
||||
for part in item.parts:
|
||||
support.localmedia.find_subtitles(part)
|
||||
return
|
||||
|
||||
# Look for subtitles for each episode.
|
||||
for s in media.seasons:
|
||||
# If we've got a date based season, ignore it for now, otherwise it'll collide with S/E folders/XML and PMS
|
||||
# prefers date-based (why?)
|
||||
if int(s) < 1900 or metadata.guid.startswith(PERSONAL_MEDIA_IDENTIFIER):
|
||||
for e in media.seasons[s].episodes:
|
||||
for i in media.seasons[s].episodes[e].items:
|
||||
|
||||
# Look for subtitles.
|
||||
for part in i.parts:
|
||||
support.localmedia.find_subtitles(part)
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
def agent_extract_embedded(video_part_map):
|
||||
try:
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
to_extract = []
|
||||
item_count = 0
|
||||
|
||||
for scanned_video, part_info in video_part_map.iteritems():
|
||||
plexapi_item = scanned_video.plexapi_metadata["item"]
|
||||
stored_subs = subtitle_storage.load_or_new(plexapi_item)
|
||||
|
||||
for plexapi_part in get_all_parts(plexapi_item):
|
||||
item_count = item_count + 1
|
||||
for requested_language in config.lang_list:
|
||||
embedded_subs = stored_subs.get_by_provider(plexapi_part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(plexapi_part.id, requested_language)
|
||||
if not embedded_subs:
|
||||
stream_data = get_embedded_subtitle_streams(plexapi_part, requested_language=requested_language,
|
||||
get_forced=config.forced_only)
|
||||
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
to_extract.append(({scanned_video: part_info}, plexapi_part, str(stream.index),
|
||||
str(requested_language), not current))
|
||||
|
||||
if not cast_bool(Prefs["subtitles.search_after_autoextract"]):
|
||||
scanned_video.subtitle_languages.update({requested_language})
|
||||
else:
|
||||
Log.Debug("Skipping embedded subtitle extraction for %s, already got %r from %s",
|
||||
plexapi_item.rating_key, requested_language, embedded_subs[0].id)
|
||||
if to_extract:
|
||||
Log.Info("Triggering extraction of %d embedded subtitles of %d items", len(to_extract), item_count)
|
||||
Thread.Create(multi_extract_embedded, stream_list=to_extract, refresh=True, with_mods=True,
|
||||
single_thread=not config.advanced.auto_extract_multithread)
|
||||
except:
|
||||
Log.Error("Something went wrong when auto-extracting subtitles, continuing: %s", traceback.format_exc())
|
||||
def update_local_media(videos, ignore_parts_cleanup=None):
|
||||
for video in videos:
|
||||
find_subtitles(video["plex_part"], ignore_parts_cleanup=ignore_parts_cleanup)
|
||||
|
||||
|
||||
class SubZeroAgent(object):
|
||||
@@ -181,29 +126,32 @@ class SubZeroAgent(object):
|
||||
return
|
||||
|
||||
Log.Debug("Sub-Zero %s, %s update called" % (config.version, self.agent_type))
|
||||
intent = get_intent()
|
||||
|
||||
if not media:
|
||||
Log.Error("Called with empty media, something is really wrong with your setup!")
|
||||
return
|
||||
|
||||
intent = get_intent()
|
||||
|
||||
item_ids = []
|
||||
try:
|
||||
config.init_subliminal_patches()
|
||||
videos = media_to_videos(media, kind=self.agent_type)
|
||||
|
||||
# find local media
|
||||
update_local_media(metadata, media, media_type=self.agent_type)
|
||||
all_videos = media_to_videos(media, kind=self.agent_type)
|
||||
|
||||
# media ignored?
|
||||
use_any_parts = False
|
||||
for video in videos:
|
||||
if is_ignored(video["id"]):
|
||||
Log.Debug(u"Ignoring %s" % video)
|
||||
ignore_parts_cleanup = []
|
||||
videos = []
|
||||
for video in all_videos:
|
||||
if not is_wanted(video["id"], item=video["item"]):
|
||||
Log.Debug(u'Skipping "%s"' % video["filename"])
|
||||
ignore_parts_cleanup.append(video["path"])
|
||||
continue
|
||||
use_any_parts = True
|
||||
videos.append(video)
|
||||
|
||||
if not use_any_parts:
|
||||
# find local media
|
||||
update_local_media(all_videos, ignore_parts_cleanup=ignore_parts_cleanup)
|
||||
|
||||
if not videos:
|
||||
Log.Debug(u"Nothing to do.")
|
||||
return
|
||||
|
||||
@@ -231,7 +179,7 @@ class SubZeroAgent(object):
|
||||
if config.plex_transcoder:
|
||||
agent_extract_embedded(scanned_video_part_map)
|
||||
else:
|
||||
Log.Warning("Plex Transcoder not found, can't auto extract")
|
||||
Log.Warn("Plex Transcoder not found, can't auto extract")
|
||||
|
||||
# clear missing subtitles menu data
|
||||
if not scheduler.is_task_running("MissingSubtitles"):
|
||||
@@ -283,16 +231,17 @@ class SubZeroAgent(object):
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
# store item(s) in history
|
||||
for subtitle in video_subtitles:
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
|
||||
history = get_history()
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False)
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
thumb=video.plexapi_metadata["super_thumb"],
|
||||
subtitle=subtitle)
|
||||
history.destroy()
|
||||
else:
|
||||
# store SZ meta info even if we've downloaded none
|
||||
self.store_blank_subtitle_metadata(scanned_video_part_map)
|
||||
|
||||
update_local_media(metadata, media, media_type=self.agent_type)
|
||||
update_local_media(videos)
|
||||
|
||||
finally:
|
||||
# update the menu state
|
||||
@@ -313,7 +262,8 @@ class SubZeroAgent(object):
|
||||
|
||||
|
||||
class SubZeroSubtitlesAgentMovies(SubZeroAgent, Agent.Movies):
|
||||
contributes_to = ['com.plexapp.agents.imdb', 'com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.themoviedb', 'com.plexapp.agents.hama']
|
||||
contributes_to = ['com.plexapp.agents.imdb', 'com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.themoviedb',
|
||||
'com.plexapp.agents.hama', 'tv.plex.agents.movie']
|
||||
score_prefs_key = "subtitles.search.minimumMovieScore2"
|
||||
agent_type_verbose = "Movies"
|
||||
|
||||
|
||||
@@ -61,6 +61,10 @@ def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
key=Callback(SkipFindBetterSubtitles, randomize=timestamp()),
|
||||
title=pad_title(_("Skip next find better subtitles (sets last run to now)")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SkipRecentlyAddedMissing, randomize=timestamp()),
|
||||
title=pad_title(_("Skip next find recently added with missing subtitles (sets last run to now)")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerStorageMaintenance, randomize=timestamp()),
|
||||
title=pad_title(_("Trigger subtitle storage maintenance")),
|
||||
@@ -103,7 +107,7 @@ def AdvancedMenu(randomize=None, header=None, message=None):
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ResetStorage, key="menu_history", randomize=timestamp()),
|
||||
title=pad_title("Reset the plugin's menu history storage"),
|
||||
title=pad_title(_("Reset the plugin's menu history storage")),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(InvalidateCache, randomize=timestamp()),
|
||||
@@ -207,6 +211,19 @@ def SkipFindBetterSubtitles(randomize=None):
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/skipram')
|
||||
@debounce
|
||||
def SkipRecentlyAddedMissing(randomize=None):
|
||||
task = scheduler.task("SearchAllRecentlyAddedMissing")
|
||||
task.last_run = datetime.datetime.now()
|
||||
|
||||
return AdvancedMenu(
|
||||
randomize=timestamp(),
|
||||
header=_("Success"),
|
||||
message=_("SearchAllRecentlyAddedMissing skipped")
|
||||
)
|
||||
|
||||
|
||||
@route(PREFIX + '/triggermaintenance')
|
||||
@debounce
|
||||
def TriggerStorageMaintenance(randomize=None):
|
||||
@@ -240,44 +257,48 @@ def TriggerCacheMaintenance(randomize=None):
|
||||
)
|
||||
|
||||
|
||||
def apply_default_mods(reapply_current=False):
|
||||
def apply_default_mods(reapply_current=False, scandir_generic=False):
|
||||
storage = get_subtitle_storage()
|
||||
subs_applied = 0
|
||||
for fn in storage.get_all_files():
|
||||
data = storage.load(None, filename=fn)
|
||||
if data:
|
||||
video_id = data.video_id
|
||||
item_type = get_item_kind_from_rating_key(video_id)
|
||||
if not item_type:
|
||||
continue
|
||||
|
||||
for part_id, part in data.parts.iteritems():
|
||||
for lang, subs in part.iteritems():
|
||||
current_sub = subs.get("current")
|
||||
if not current_sub:
|
||||
continue
|
||||
sub = subs[current_sub]
|
||||
try:
|
||||
for fn in storage.get_all_files(scandir_generic=scandir_generic):
|
||||
data = storage.load(None, filename=fn)
|
||||
if data:
|
||||
video_id = data.video_id
|
||||
item_type = get_item_kind_from_rating_key(video_id)
|
||||
if not item_type:
|
||||
continue
|
||||
|
||||
if not sub.content:
|
||||
continue
|
||||
|
||||
current_mods = sub.mods or []
|
||||
if not reapply_current:
|
||||
add_mods = list(set(config.default_mods).difference(set(current_mods)))
|
||||
if not add_mods:
|
||||
for part_id, part in data.parts.iteritems():
|
||||
for lang, subs in part.iteritems():
|
||||
current_sub = subs.get("current")
|
||||
if not current_sub:
|
||||
continue
|
||||
else:
|
||||
if not current_mods:
|
||||
sub = subs[current_sub]
|
||||
|
||||
if not sub.content:
|
||||
continue
|
||||
add_mods = []
|
||||
|
||||
try:
|
||||
set_mods_for_part(video_id, part_id, Language.fromietf(lang), item_type, add_mods, mode="add")
|
||||
except:
|
||||
Log.Error("Couldn't set mods for %s:%s: %s", video_id, part_id, traceback.format_exc())
|
||||
continue
|
||||
current_mods = sub.mods or []
|
||||
if not reapply_current:
|
||||
add_mods = list(set(config.default_mods).difference(set(current_mods)))
|
||||
if not add_mods:
|
||||
continue
|
||||
else:
|
||||
if not current_mods:
|
||||
continue
|
||||
add_mods = []
|
||||
|
||||
subs_applied += 1
|
||||
try:
|
||||
set_mods_for_part(video_id, part_id, Language.fromietf(lang), item_type, add_mods, mode="add")
|
||||
except:
|
||||
Log.Error("Couldn't set mods for %s:%s: %s", video_id, part_id, traceback.format_exc())
|
||||
continue
|
||||
|
||||
subs_applied += 1
|
||||
except OSError:
|
||||
return apply_default_mods(reapply_current=reapply_current, scandir_generic=True)
|
||||
storage.destroy()
|
||||
Log.Debug("Applied mods to %i items" % subs_applied)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ from support.config import config
|
||||
from support.helpers import timestamp
|
||||
|
||||
|
||||
def enable_channel_wrapper(func):
|
||||
def enable_channel_wrapper(func, enforce_route=False):
|
||||
"""
|
||||
returns the original wrapper :func: (route or handler) if applicable, else the plain to-be-wrapped function
|
||||
:param func: original wrapper
|
||||
@@ -20,12 +20,11 @@ def enable_channel_wrapper(func):
|
||||
:param k: kwargs
|
||||
:return: originally to-be-wrapped function
|
||||
"""
|
||||
return a[0]
|
||||
return a[0] if len(a) else a
|
||||
|
||||
return inner
|
||||
|
||||
def wrap(*args, **kwargs):
|
||||
enforce_route = kwargs.pop("enforce_route", None)
|
||||
return (func if (config.enable_channel or enforce_route) else noop)(*args, **kwargs)
|
||||
|
||||
return wrap
|
||||
@@ -176,10 +175,11 @@ def route_wrapper(*args, **kwargs):
|
||||
return ret_f(*ret_a, **ret_kw)
|
||||
|
||||
# @route may be used multiple times
|
||||
enforce_route = kwargs.pop("enforce_route", None)
|
||||
if not already_wrapped:
|
||||
inner.orig_f = f
|
||||
|
||||
return enable_channel_wrapper(route(*args, **kwargs))(inner)
|
||||
return enable_channel_wrapper(route(*args, **kwargs))(f)
|
||||
return enable_channel_wrapper(route(*args, **kwargs), enforce_route=enforce_route)(inner)
|
||||
return enable_channel_wrapper(route(*args, **kwargs), enforce_route=enforce_route)(f)
|
||||
|
||||
return wrap
|
||||
|
||||
@@ -1,18 +1,22 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from subzero.language import Language
|
||||
|
||||
from sub_mod import SubtitleModificationsMenu
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_ignore_options, get_item_task_data, \
|
||||
set_refresh_menu_state, route, extract_embedded_sub
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_incl_excl_options, get_item_task_data, \
|
||||
set_refresh_menu_state, route
|
||||
from support.extract import extract_embedded_sub
|
||||
|
||||
from refresh_item import RefreshItem
|
||||
from subzero.constants import PREFIX
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import timestamp, df, get_language, display_language, get_language_from_stream
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub, get_item_title, save_stored_sub
|
||||
from support.plex_media import get_plex_metadata, get_part, get_embedded_subtitle_streams
|
||||
from support.plex_media import get_plex_metadata, get_part, get_embedded_subtitle_streams, is_stream_forced, \
|
||||
update_stream_info
|
||||
from support.scanning import scan_videos
|
||||
from support.scheduler import scheduler
|
||||
from support.storage import get_subtitle_storage
|
||||
@@ -33,7 +37,7 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
:param randomize:
|
||||
:return:
|
||||
"""
|
||||
from interface.main import IgnoreMenu
|
||||
from interface.main import InclExclMenu
|
||||
|
||||
title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
|
||||
item = plex_item = get_item(rating_key)
|
||||
@@ -116,6 +120,8 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
if not os.path.exists(part.file):
|
||||
continue
|
||||
|
||||
update_stream_info(part)
|
||||
|
||||
part_id = str(part.id)
|
||||
part_index += 1
|
||||
|
||||
@@ -142,9 +148,9 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
summary = _(u"%(part_summary)sCurrent subtitle: %(provider_name)s (added: %(date_added)s, "
|
||||
u"%(mode)s), Language: %(language)s, Score: %(score)i, Storage: %(storage_type)s",
|
||||
part_summary=part_summary_addon,
|
||||
provider_name=current_sub.provider_name,
|
||||
provider_name=_(current_sub.provider_name),
|
||||
date_added=df(current_sub.date_added),
|
||||
mode=current_sub.mode_verbose,
|
||||
mode=_(current_sub.mode_verbose),
|
||||
language=display_language(lang),
|
||||
score=current_sub.score,
|
||||
storage_type=current_sub.storage_type)
|
||||
@@ -181,11 +187,13 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
|
||||
lang = get_language_from_stream(stream.language_code)
|
||||
is_forced = is_stream_forced(stream)
|
||||
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = list(config.lang_list)[0]
|
||||
|
||||
if lang:
|
||||
lang = Language.rebuild(lang, forced=is_forced)
|
||||
embedded_langs.append(lang)
|
||||
embedded_count += 1
|
||||
|
||||
@@ -196,14 +204,15 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
randomize=timestamp()),
|
||||
title=_(u"%(part_summary)sEmbedded subtitles (%(languages)s)",
|
||||
part_summary=part_index_addon,
|
||||
languages=", ".join(display_language(l) for l in set(embedded_langs))),
|
||||
summary=_(u"Extract and activate embedded subtitle streams")
|
||||
languages=", ".join(display_language(l)
|
||||
for l in list(OrderedDict.fromkeys(embedded_langs)))),
|
||||
summary=_(u"Extract embedded subtitle streams")
|
||||
))
|
||||
|
||||
ignore_title = item_title
|
||||
if current_kind == "episode":
|
||||
ignore_title = get_item_title(item)
|
||||
add_ignore_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
add_incl_excl_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=InclExclMenu)
|
||||
subtitle_storage.destroy()
|
||||
|
||||
return oc
|
||||
@@ -287,7 +296,7 @@ def ListStoredSubsForItemMenu(**kwargs):
|
||||
summary = _(u"added: %(date_added)s, %(mode)s, Language: %(language)s, Score: %(score)i, Storage: "
|
||||
u"%(storage_type)s",
|
||||
date_added=df(subtitle.date_added),
|
||||
mode=subtitle.mode_verbose,
|
||||
mode=_(subtitle.mode_verbose),
|
||||
language=display_language(language),
|
||||
score=subtitle.score,
|
||||
storage_type=subtitle.storage_type)
|
||||
@@ -324,19 +333,24 @@ def SelectStoredSubForItemMenu(**kwargs):
|
||||
subtitles = stored_subs.get_all(part_id, language)
|
||||
subtitle = subtitles[sub_key]
|
||||
|
||||
subtitles["current"] = sub_key
|
||||
|
||||
save_stored_sub(subtitle, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
|
||||
stored_subs=stored_subs)
|
||||
|
||||
stored_subs.set_current(part_id, language, sub_key)
|
||||
storage.save(stored_subs)
|
||||
storage.destroy()
|
||||
|
||||
kwargs.pop("randomize")
|
||||
kwa = {
|
||||
"header": _("Success"),
|
||||
"message": _("Subtitle saved to disk"),
|
||||
"title": kwargs["title"],
|
||||
"item_title": kwargs["item_title"],
|
||||
"base_title": kwargs.get("base_title")
|
||||
}
|
||||
|
||||
kwargs["header"] = _("Success")
|
||||
kwargs["message"] = _("Subtitle saved to disk")
|
||||
# fixme: return to SubtitleOptionsMenu properly? (needs recomputation of current_data
|
||||
|
||||
return SubtitleOptionsMenu(randomize=timestamp(), **kwargs)
|
||||
return ItemDetailsMenu(rating_key, randomize=timestamp(), **kwa)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/blacklist_recent/{language}')
|
||||
@@ -452,10 +466,10 @@ def ManageBlacklistMenu(**kwargs):
|
||||
provider_name, subtitle_id = sub_key
|
||||
title = _(u"%(provider_name)s, %(subtitle_id)s (added: %(date_added)s, %(mode)s), Language: %(language)s, "
|
||||
u"Score: %(score)i, Storage: %(storage_type)s",
|
||||
provider_name=provider_name,
|
||||
provider_name=_(provider_name),
|
||||
subtitle_id=subtitle_id,
|
||||
date_added=df(data["date_added"]),
|
||||
mode=current_sub.get_mode_verbose(data["mode"]),
|
||||
mode=_(current_sub.get_mode_verbose(data["mode"])),
|
||||
language=display_language(Language.fromietf(language)),
|
||||
score=data["score"],
|
||||
storage_type=data["storage_type"])
|
||||
@@ -514,7 +528,7 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
video_display_data = metadata["filename"]
|
||||
|
||||
current_display = (_(u"Current: %(provider_name)s (%(score)s) ",
|
||||
provider_name=current_provider,
|
||||
provider_name=_(current_provider),
|
||||
score=current_score if current_provider else ""))
|
||||
if not running:
|
||||
oc.add(DirectoryObject(
|
||||
@@ -570,6 +584,8 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
bl_addon = "Blacklisted "
|
||||
|
||||
wrong_fps_addon = ""
|
||||
wrong_series_addon = ""
|
||||
wrong_season_ep_addon = ""
|
||||
if subtitle.wrong_fps:
|
||||
if plex_part:
|
||||
wrong_fps_addon = _(" (wrong FPS, sub: %(subtitle_fps)s, media: %(media_fps)s)",
|
||||
@@ -579,15 +595,24 @@ def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item
|
||||
wrong_fps_addon = _(" (wrong FPS, sub: %(subtitle_fps)s, media: unknown, low impact mode)",
|
||||
subtitle_fps=subtitle.fps)
|
||||
|
||||
if subtitle.wrong_series:
|
||||
wrong_series_addon = _(" (possibly wrong series)")
|
||||
|
||||
if subtitle.wrong_season_ep:
|
||||
wrong_season_ep_addon = _(" (possibly wrong season/episode)")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerDownloadSubtitle, rating_key=rating_key, randomize=timestamp(), item_title=item_title,
|
||||
subtitle_id=str(subtitle.id), language=language),
|
||||
title=_(u"%(blacklisted_state)s%(current_state)s: %(provider_name)s, score: %(score)s%(wrong_fps_state)s",
|
||||
title=_(u"%(blacklisted_state)s%(current_state)s: %(provider_name)s, score: %(score)s%(wrong_fps_state)s"
|
||||
u"%(wrong_series_state)s%(wrong_season_ep_state)s",
|
||||
blacklisted_state=bl_addon,
|
||||
current_state=_("Available") if current_id != subtitle.id else _("Current"),
|
||||
provider_name=subtitle.provider_name,
|
||||
provider_name=_(subtitle.provider_name),
|
||||
score=subtitle.score,
|
||||
wrong_fps_state=wrong_fps_addon),
|
||||
wrong_fps_state=wrong_fps_addon,
|
||||
wrong_series_state=wrong_series_addon,
|
||||
wrong_season_ep_state=wrong_season_ep_addon),
|
||||
summary=_(u"Release: %(release_info)s, Matches: %(matches)s",
|
||||
release_info=subtitle.release_info,
|
||||
matches=", ".join(subtitle.matches)),
|
||||
@@ -649,29 +674,28 @@ def ListEmbeddedSubsForItemMenu(**kwargs):
|
||||
stream = stream_data["stream"]
|
||||
is_forced = stream_data["is_forced"]
|
||||
|
||||
if language:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, with_mods=True, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s with default mods",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, with_mods=True, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s with default mods",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(TriggerExtractEmbeddedSubForItemMenu, randomize=timestamp(),
|
||||
stream_index=str(stream.index), language=language, **kwargs),
|
||||
title=_(u"Extract stream %(stream_index)s, %(language)s%(unknown_state)s%(forced_state)s"
|
||||
u"%(stream_title)s",
|
||||
stream_index=stream.index,
|
||||
language=display_language(language),
|
||||
unknown_state=_(" (unknown)") if is_unknown else "",
|
||||
forced_state=_(" (forced)") if is_forced else "",
|
||||
stream_title=" (\"%s\")" % stream.title if stream.title else ""),
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@@ -682,7 +706,7 @@ def TriggerExtractEmbeddedSubForItemMenu(**kwargs):
|
||||
part_id = kwargs.get("part_id")
|
||||
stream_index = kwargs.get("stream_index")
|
||||
|
||||
Thread.Create(extract_embedded_sub, **kwargs)
|
||||
Thread.Create(extract_embedded_sub, extract_mode="m", **kwargs)
|
||||
header = _(u"Extracting of embedded subtitle %s of part %s:%s triggered",
|
||||
stream_index, rating_key, part_id)
|
||||
|
||||
|
||||
@@ -4,9 +4,9 @@ from subzero.constants import PREFIX, TITLE, ART
|
||||
from support.config import config
|
||||
from support.helpers import pad_title, timestamp, df, display_language
|
||||
from support.scheduler import scheduler
|
||||
from support.ignore import ignore_list
|
||||
from support.ignore import get_decision_list
|
||||
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info, get_item, get_item_title
|
||||
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_ignore_options, \
|
||||
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_incl_excl_options, \
|
||||
ObjectContainer, route, handler
|
||||
from support.i18n import _
|
||||
from item_details import ItemDetailsMenu
|
||||
@@ -100,7 +100,7 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(RecentMissingSubtitlesMenu, randomize=timestamp()),
|
||||
title=_("Show recently added items with missing subtitles"),
|
||||
summary=_("Lists items with missing subtitles. Click on Find recent items with missing subs to update list"),
|
||||
summary=_("Lists items with missing subtitles. Click on \"Find recent items with missing subs\" to update list"),
|
||||
thumb=R("icon-missing.jpg")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
@@ -134,10 +134,15 @@ def fatality(randomize=None, force_title=None, header=None, message=None, only_r
|
||||
thumb=R("icon-search.jpg")
|
||||
))
|
||||
|
||||
ref_list = get_decision_list()
|
||||
incl_excl_ref = _("include list") if ref_list.store == "include" else _("ignore list")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(IgnoreListMenu),
|
||||
title=_("Display ignore list (%(ignored_count)d)", ignored_count=len(ignore_list)),
|
||||
summary=_("Show the current ignore list (mainly used for the automatic tasks)"),
|
||||
title=_("Display %(incl_excl_list_name)s (%(count)d)",
|
||||
incl_excl_list_name=incl_excl_ref, count=len(ref_list)),
|
||||
summary=_("Show the current %(incl_excl_list_name)s (mainly used for the automatic tasks)",
|
||||
incl_excl_list_name=incl_excl_ref),
|
||||
thumb=R("icon-ignore.jpg")
|
||||
))
|
||||
|
||||
@@ -218,6 +223,7 @@ def RecentlyPlayedMenu():
|
||||
item_title = get_item_title(item)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
thumb=get_item_thumb(item) or default_thumb,
|
||||
title=item_title,
|
||||
key=Callback(ItemDetailsMenu, title=base_title + " > " + item.title, item_title=item.title,
|
||||
rating_key=item.rating_key)
|
||||
@@ -314,8 +320,8 @@ def determine_section_display(kind, item, pass_kwargs=None):
|
||||
return SectionMenu
|
||||
|
||||
|
||||
@route(PREFIX + '/ignore/set/{kind}/{rating_key}/{todo}/sure={sure}', kind=str, rating_key=str, todo=str, sure=bool)
|
||||
def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
|
||||
@route(PREFIX + '/incl_excl/set/{kind}/{rating_key}/{todo}/sure={sure}', kind=str, rating_key=str, todo=str, sure=bool)
|
||||
def InclExclMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
|
||||
"""
|
||||
displays the ignore options for a menu
|
||||
:param kind:
|
||||
@@ -325,55 +331,68 @@ def IgnoreMenu(kind, rating_key, title=None, sure=False, todo="not_set"):
|
||||
:param todo:
|
||||
:return:
|
||||
"""
|
||||
is_ignored = rating_key in ignore_list[kind]
|
||||
ref_list = get_decision_list()
|
||||
include = ref_list.store == "include"
|
||||
list_str_ref = "include" if include else "ignore"
|
||||
in_list = rating_key in ref_list[kind]
|
||||
|
||||
if include:
|
||||
# shortcut
|
||||
sure = True
|
||||
todo = "add" if not in_list else "remove"
|
||||
|
||||
if not sure:
|
||||
t = u"Add %(kind)s %(title)s to the ignore list"
|
||||
if is_ignored:
|
||||
if in_list:
|
||||
t = u"Remove %(kind)s %(title)s from the ignore list"
|
||||
oc = SubFolderObjectContainer(no_history=True, replace_parent=True,
|
||||
title1=_(t,
|
||||
kind=ignore_list.verbose(kind),
|
||||
kind=ref_list.verbose(kind),
|
||||
title=title
|
||||
),
|
||||
title2=_("Are you sure?"))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(IgnoreMenu, kind=kind, rating_key=rating_key, title=title, sure=True,
|
||||
todo="add" if not is_ignored else "remove"),
|
||||
key=Callback(InclExclMenu, kind=kind, rating_key=rating_key, title=title, sure=True,
|
||||
todo="add" if not in_list else "remove"),
|
||||
title=pad_title(_("Are you sure?")),
|
||||
))
|
||||
return oc
|
||||
|
||||
rel = ignore_list[kind]
|
||||
rel = ref_list[kind]
|
||||
dont_change = False
|
||||
state = None
|
||||
if todo == "remove":
|
||||
if not is_ignored:
|
||||
if not in_list:
|
||||
dont_change = True
|
||||
else:
|
||||
rel.remove(rating_key)
|
||||
Log.Info("Removed %s (%s) from the ignore list", title, rating_key)
|
||||
ignore_list.remove_title(kind, rating_key)
|
||||
ignore_list.save()
|
||||
Log.Info("Removed %s (%s) from the %s list", title, rating_key, list_str_ref)
|
||||
ref_list.remove_title(kind, rating_key)
|
||||
ref_list.save()
|
||||
elif todo == "add":
|
||||
if is_ignored:
|
||||
if in_list:
|
||||
dont_change = True
|
||||
else:
|
||||
rel.append(rating_key)
|
||||
Log.Info("Added %s (%s) to the ignore list", title, rating_key)
|
||||
ignore_list.add_title(kind, rating_key, title)
|
||||
ignore_list.save()
|
||||
Log.Info("Added %s (%s) to the %s list", title, rating_key, list_str_ref)
|
||||
ref_list.add_title(kind, rating_key, title)
|
||||
ref_list.save()
|
||||
else:
|
||||
dont_change = True
|
||||
|
||||
if dont_change:
|
||||
return fatality(force_title=" ", header=_("Didn't change the ignore list"), no_history=True)
|
||||
return fatality(force_title=" ", header=_("Didn't change the %(incl_excl_list_name)s",
|
||||
incl_excl_list_name=_(list_str_ref)), no_history=True)
|
||||
|
||||
t = "%(title)s added to the ignore list"
|
||||
if todo == "remove":
|
||||
t = "%(title)s removed from the ignore list"
|
||||
return fatality(force_title=" ", header=_(t,
|
||||
title=title,),
|
||||
no_history=True)
|
||||
if include:
|
||||
t = "%(title)s added to the include list"
|
||||
if todo == "remove":
|
||||
t = "%(title)s removed from the include list"
|
||||
else:
|
||||
t = "%(title)s added to the ignore list"
|
||||
if todo == "remove":
|
||||
t = "%(title)s removed from the ignore list"
|
||||
return fatality(force_title=" ", header=_(t, title=title,), no_history=True)
|
||||
|
||||
|
||||
@route(PREFIX + '/sections')
|
||||
@@ -414,7 +433,7 @@ def SectionMenu(rating_key, title=None, base_title=None, section_title=None, ign
|
||||
title = base_title + " > " + title
|
||||
oc = SubFolderObjectContainer(title2=title, no_cache=True, no_history=True)
|
||||
if ignore_options:
|
||||
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
add_incl_excl_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=InclExclMenu)
|
||||
|
||||
return dig_tree(oc, items, MetadataMenu,
|
||||
pass_kwargs={"base_title": title, "display_items": deeper, "previous_item_type": "section",
|
||||
@@ -442,7 +461,7 @@ def SectionFirstLetterMenu(rating_key, title=None, base_title=None, section_titl
|
||||
title = unicode(title)
|
||||
oc = SubFolderObjectContainer(title2=section_title, no_cache=True, no_history=True)
|
||||
title = base_title + " > " + title
|
||||
add_ignore_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
add_incl_excl_options(oc, "sections", title=section_title, rating_key=rating_key, callback_menu=InclExclMenu)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SectionMenu, title=_("All"), base_title=title, rating_key=rating_key, ignore_options=False),
|
||||
|
||||
+108
-110
@@ -11,19 +11,17 @@ import copy
|
||||
from requests import HTTPError
|
||||
from item_details import ItemDetailsMenu
|
||||
from refresh_item import RefreshItem
|
||||
from menu_helpers import add_ignore_options, dig_tree, set_refresh_menu_state, \
|
||||
default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route, \
|
||||
extract_embedded_sub
|
||||
from main import fatality, IgnoreMenu
|
||||
from menu_helpers import add_incl_excl_options, dig_tree, set_refresh_menu_state, \
|
||||
default_thumb, debounce, ObjectContainer, SubFolderObjectContainer, route
|
||||
from main import fatality, InclExclMenu
|
||||
from advanced import DispatchRestart
|
||||
from subzero.constants import ART, PREFIX, DEPENDENCY_MODULE_NAMES
|
||||
from support.plex_media import get_all_parts, get_embedded_subtitle_streams
|
||||
from support.extract import season_extract_embedded
|
||||
from support.scheduler import scheduler
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, df, display_language
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_all_items, get_items_info, get_item_kind_from_rating_key, get_item, MI_KEY, get_item_title
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.ignore import get_decision_list
|
||||
from support.items import get_all_items, get_items_info, get_item_kind_from_rating_key, get_item, get_item_title
|
||||
from support.i18n import _
|
||||
|
||||
# init GUI
|
||||
@@ -107,31 +105,54 @@ def MetadataMenu(rating_key, title=None, base_title=None, display_items=False, p
|
||||
if current_kind in ("series", "season"):
|
||||
item = get_item(rating_key)
|
||||
sub_title = get_item_title(item)
|
||||
add_ignore_options(oc, current_kind, title=sub_title, rating_key=rating_key, callback_menu=IgnoreMenu)
|
||||
add_incl_excl_options(oc, current_kind, title=sub_title, rating_key=rating_key, callback_menu=InclExclMenu)
|
||||
|
||||
# mass-extract embedded
|
||||
if current_kind == "season" and config.plex_transcoder:
|
||||
for lang in config.lang_list:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title,
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=_(u"Extract missing %(language)s embedded subtitles", language=display_language(lang)),
|
||||
summary=_("Extracts the not yet extracted embedded subtitles of all episodes for the current "
|
||||
"season with all configured default modifications")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title, force=True,
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=_(u"Extract and activate %(language)s embedded subtitles", language=display_language(lang)),
|
||||
summary=_("Extracts embedded subtitles of all episodes for the current season "
|
||||
"with all configured default modifications")
|
||||
))
|
||||
if config.plex_transcoder:
|
||||
if current_kind == "season":
|
||||
for lang in config.lang_list:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title,
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=_(u"Extract missing %(language)s embedded subtitles", language=display_language(lang)),
|
||||
summary=_("Extracts the not yet extracted embedded subtitles of all episodes for the current "
|
||||
"season with all configured default modifications")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=show.section.title, display_items=display_items, item_title=item_title,
|
||||
title=title, force=True,
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=_(u"Extract and activate %(language)s embedded subtitles", language=display_language(lang)),
|
||||
summary=_("Extracts embedded subtitles of all episodes for the current season "
|
||||
"with all configured default modifications")
|
||||
))
|
||||
elif current_kind == "series":
|
||||
for lang in config.lang_list:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=title, display_items=display_items, item_title=item_title,
|
||||
title=title, mode="series",
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=_(u"Extract missing %(language)s embedded subtitles", language=display_language(lang)),
|
||||
summary=_("Extracts the not yet extracted embedded subtitles of all episodes for the current "
|
||||
"series with all configured default modifications")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SeasonExtractEmbedded, rating_key=rating_key, language=lang,
|
||||
base_title=title, display_items=display_items, item_title=item_title,
|
||||
title=title, force=True, mode="series",
|
||||
previous_item_type=previous_item_type, with_mods=True,
|
||||
previous_rating_key=previous_rating_key, randomize=timestamp()),
|
||||
title=_(u"Extract and activate %(language)s embedded subtitles", language=display_language(lang)),
|
||||
summary=_("Extracts embedded subtitles of all episodes for the current series "
|
||||
"with all configured default modifications")
|
||||
))
|
||||
|
||||
# add refresh
|
||||
oc.add(DirectoryObject(
|
||||
@@ -162,9 +183,10 @@ def SeasonExtractEmbedded(**kwargs):
|
||||
item_title = kwargs.pop("item_title")
|
||||
title = kwargs.pop("title")
|
||||
force = kwargs.pop("force", False)
|
||||
mode = kwargs.pop("mode", "season")
|
||||
|
||||
Thread.Create(season_extract_embedded, **{"rating_key": rating_key, "requested_language": requested_language,
|
||||
"with_mods": with_mods, "force": force})
|
||||
"with_mods": with_mods, "force": force, "mode": mode})
|
||||
|
||||
kwargs["header"] = _("Success")
|
||||
kwargs["message"] = _(u"Extracting of embedded subtitles for %s triggered", title)
|
||||
@@ -173,59 +195,17 @@ def SeasonExtractEmbedded(**kwargs):
|
||||
return MetadataMenu(randomize=timestamp(), title=item_title, **kwargs)
|
||||
|
||||
|
||||
def multi_extract_embedded(stream_list, refresh=False, with_mods=False, single_thread=True):
|
||||
def execute():
|
||||
for video_part_map, plexapi_part, stream_index, language, set_current in stream_list:
|
||||
plexapi_item = video_part_map.keys()[0].plexapi_metadata["item"]
|
||||
|
||||
extract_embedded_sub(rating_key=plexapi_item.rating_key, part_id=plexapi_part.id,
|
||||
plex_item=plexapi_item, part=plexapi_part, scanned_videos=video_part_map,
|
||||
stream_index=stream_index, set_current=set_current,
|
||||
language=language, with_mods=with_mods, refresh=refresh)
|
||||
|
||||
if single_thread:
|
||||
with Thread.Lock(key="extract_embedded"):
|
||||
execute()
|
||||
else:
|
||||
execute()
|
||||
|
||||
|
||||
def season_extract_embedded(rating_key, requested_language, with_mods=False, force=False):
|
||||
# get stored subtitle info for item id
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
try:
|
||||
for data in get_all_items(key="children", value=rating_key, base="library/metadata"):
|
||||
item = get_item(data[MI_KEY])
|
||||
if item:
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
for part in get_all_parts(item):
|
||||
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(part.id, requested_language)
|
||||
if not embedded_subs or force:
|
||||
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language,
|
||||
get_forced=config.forced_only)
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
set_current = not current or force
|
||||
refresh = not current
|
||||
|
||||
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
|
||||
stream_index=str(stream.index), set_current=set_current,
|
||||
refresh=refresh, language=requested_language, with_mods=with_mods)
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
@route(PREFIX + '/ignore_list')
|
||||
def IgnoreListMenu():
|
||||
oc = SubFolderObjectContainer(title2="Ignore list", replace_parent=True)
|
||||
for key in ignore_list.key_order:
|
||||
values = ignore_list[key]
|
||||
ref_list = get_decision_list()
|
||||
include = ref_list.store == "include"
|
||||
list_title = _("Include list" if include else "Ignore list")
|
||||
oc = SubFolderObjectContainer(title2=list_title, replace_parent=True)
|
||||
for key in ref_list.key_order:
|
||||
values = ref_list[key]
|
||||
for value in values:
|
||||
add_ignore_options(oc, key, title=ignore_list.get_title(key, value), rating_key=value,
|
||||
callback_menu=IgnoreMenu)
|
||||
add_incl_excl_options(oc, key, title=ref_list.get_title(key, value), rating_key=value,
|
||||
callback_menu=InclExclMenu)
|
||||
return oc
|
||||
|
||||
|
||||
@@ -235,16 +215,17 @@ def HistoryMenu():
|
||||
history = get_history()
|
||||
oc = SubFolderObjectContainer(title2=_("History"), replace_parent=True)
|
||||
|
||||
for item in history.items:
|
||||
for item in history.items[:100]:
|
||||
possible_language = item.language
|
||||
language_display = item.lang_name if not possible_language else display_language(possible_language)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, title=item.title, item_title=item.item_title,
|
||||
rating_key=item.rating_key),
|
||||
title=u"%s (%s)" % (item.item_title, item.mode_verbose),
|
||||
title=u"%s (%s)" % (item.item_title, _(item.mode_verbose)),
|
||||
summary=_(u"%s in %s (%s, score: %s), %s", language_display, item.section_title,
|
||||
item.provider_name, item.score, df(item.time))
|
||||
_(item.provider_name), item.score, df(item.time)),
|
||||
thumb=item.thumb or default_thumb
|
||||
))
|
||||
|
||||
history.destroy()
|
||||
@@ -269,6 +250,40 @@ def replace_item(obj, key, replace_value):
|
||||
return obj
|
||||
|
||||
|
||||
def check_connections():
|
||||
# debug drone
|
||||
Log.Debug("Checking connections ...")
|
||||
log_buffer = []
|
||||
try:
|
||||
from subliminal_patch.refiners.drone import SonarrClient, RadarrClient
|
||||
log_buffer.append(["----- Connections -----"])
|
||||
for key, cls in [("sonarr", SonarrClient), ("radarr", RadarrClient)]:
|
||||
if key in config.refiner_settings:
|
||||
cname = key.capitalize()
|
||||
try:
|
||||
status = cls(**config.refiner_settings[key]).status(timeout=5)
|
||||
except HTTPError, e:
|
||||
if e.response.status_code == 401:
|
||||
log_buffer.append(("%s: NOT WORKING - BAD API KEY", cname))
|
||||
else:
|
||||
log_buffer.append(("%s: NOT WORKING - %s", cname, traceback.format_exc()))
|
||||
except:
|
||||
log_buffer.append(("%s: NOT WORKING - %s", cname, traceback.format_exc()))
|
||||
else:
|
||||
if status and status["version"]:
|
||||
log_buffer.append(("%s: OK - %s", cname, status["version"]))
|
||||
else:
|
||||
log_buffer.append(("%s: NOT WORKING - %s", cname))
|
||||
except:
|
||||
log_buffer.append(("Something went really wrong when evaluating Sonarr/Radarr: %s", traceback.format_exc()))
|
||||
finally:
|
||||
Core.log.setLevel(logging.DEBUG)
|
||||
for entry in log_buffer:
|
||||
Log.Debug(*entry)
|
||||
|
||||
Core.log.setLevel(logging.getLevelName(Prefs["log_level"]))
|
||||
|
||||
|
||||
@route(PREFIX + '/ValidatePrefs', enforce_route=True)
|
||||
def ValidatePrefs():
|
||||
Core.log.setLevel(logging.DEBUG)
|
||||
@@ -324,8 +339,10 @@ def ValidatePrefs():
|
||||
for attr in [
|
||||
"version", "app_support_path", "data_path", "data_items_path", "enable_agent",
|
||||
"enable_channel", "permissions_ok", "missing_permissions", "fs_encoding",
|
||||
"subtitle_destination_folder", "new_style_cache", "dbm_supported", "lang_list", "providers",
|
||||
"plex_transcoder", "refiner_settings", "unrar", "adv_cfg_path"]:
|
||||
"subtitle_destination_folder", "include", "include_exclude_paths", "include_exclude_sz_files",
|
||||
"new_style_cache", "dbm_supported", "lang_list", "providers", "normal_subs", "forced_only", "forced_also",
|
||||
"plex_transcoder", "refiner_settings", "unrar", "adv_cfg_path", "use_custom_dns",
|
||||
"has_anticaptcha", "anticaptcha_cls", "mediainfo_bin"]:
|
||||
|
||||
value = getattr(config, attr)
|
||||
if isinstance(value, dict):
|
||||
@@ -333,6 +350,9 @@ def ValidatePrefs():
|
||||
Log.Debug("config.%s: %s", attr, d)
|
||||
continue
|
||||
|
||||
if attr in ("api_key",):
|
||||
value = "xxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
|
||||
Log.Debug("config.%s: %s", attr, value)
|
||||
|
||||
for attr in ["plugin_log_path", "server_log_path"]:
|
||||
@@ -354,30 +374,8 @@ def ValidatePrefs():
|
||||
"subtitles.save.filesystem", ]:
|
||||
Log.Debug("Pref.%s: %s", attr, Prefs[attr])
|
||||
|
||||
# debug drone
|
||||
if "sonarr" in config.refiner_settings or "radarr" in config.refiner_settings:
|
||||
Log.Debug("----- Connections -----")
|
||||
try:
|
||||
from subliminal_patch.refiners.drone import SonarrClient, RadarrClient
|
||||
for key, cls in [("sonarr", SonarrClient), ("radarr", RadarrClient)]:
|
||||
if key in config.refiner_settings:
|
||||
cname = key.capitalize()
|
||||
try:
|
||||
status = cls(**config.refiner_settings[key]).status()
|
||||
except HTTPError, e:
|
||||
if e.response.status_code == 401:
|
||||
Log.Debug("%s: NOT WORKING - BAD API KEY", cname)
|
||||
else:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
|
||||
except:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname, traceback.format_exc())
|
||||
else:
|
||||
if status and status["version"]:
|
||||
Log.Debug("%s: OK - %s", cname, status["version"])
|
||||
else:
|
||||
Log.Debug("%s: NOT WORKING - %s", cname)
|
||||
except:
|
||||
Log.Debug("Something went really wrong when evaluating Sonarr/Radarr: %s", traceback.format_exc())
|
||||
Thread.Create(check_connections)
|
||||
|
||||
# fixme: check existance of and os access of logs
|
||||
Log.Debug("----- Environment -----")
|
||||
|
||||
@@ -1,26 +1,16 @@
|
||||
# coding=utf-8
|
||||
import traceback
|
||||
import types
|
||||
import datetime
|
||||
import subprocess
|
||||
import os
|
||||
import operator
|
||||
|
||||
from func import enable_channel_wrapper, route_wrapper, register_route_function
|
||||
from subzero.language import Language
|
||||
from func import enable_channel_wrapper, route_wrapper
|
||||
from support.i18n import is_localized_string, _
|
||||
from support.items import get_kind, get_item_thumb, get_item, get_item_kind_from_item, refresh_item
|
||||
from support.helpers import get_video_display_title, pad_title, display_language, quote_args, is_stream_forced
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_item_thumb
|
||||
from support.helpers import get_video_display_title, pad_title
|
||||
from support.ignore import get_decision_list
|
||||
from support.lib import get_intent
|
||||
from support.config import config
|
||||
from subzero.constants import ICON_SUB, ICON
|
||||
from support.plex_media import get_part, get_plex_metadata
|
||||
from support.scheduler import scheduler
|
||||
from support.scanning import scan_videos
|
||||
from support.storage import save_subtitles
|
||||
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
|
||||
default_thumb = R(ICON_SUB)
|
||||
main_icon = ICON if not config.is_development else "icon-dev.jpg"
|
||||
@@ -31,7 +21,7 @@ route = route_wrapper
|
||||
handler = enable_channel_wrapper(handler)
|
||||
|
||||
|
||||
def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None, add_kind=True):
|
||||
def add_incl_excl_options(oc, kind, callback_menu=None, title=None, rating_key=None, add_kind=True):
|
||||
"""
|
||||
|
||||
:param oc: oc to add our options to
|
||||
@@ -43,21 +33,28 @@ def add_ignore_options(oc, kind, callback_menu=None, title=None, rating_key=None
|
||||
"""
|
||||
# try to translate kind to the ignore key
|
||||
use_kind = kind
|
||||
if kind not in ignore_list:
|
||||
use_kind = ignore_list.translate_key(kind)
|
||||
if not use_kind or use_kind not in ignore_list:
|
||||
ref_list = get_decision_list()
|
||||
if kind not in ref_list:
|
||||
use_kind = ref_list.translate_key(kind)
|
||||
if not use_kind or use_kind not in ref_list:
|
||||
return
|
||||
|
||||
in_list = rating_key in ignore_list[use_kind]
|
||||
in_list = rating_key in ref_list[use_kind]
|
||||
include = ref_list.store == "include"
|
||||
|
||||
t = u"Ignore %(kind)s \"%(title)s\""
|
||||
if in_list:
|
||||
t = u"Un-ignore %(kind)s \"%(title)s\""
|
||||
if include:
|
||||
t = u"Enable Sub-Zero for %(kind)s \"%(title)s\""
|
||||
if in_list:
|
||||
t = u"Disable Sub-Zero for %(kind)s \"%(title)s\""
|
||||
else:
|
||||
t = u"Ignore %(kind)s \"%(title)s\""
|
||||
if in_list:
|
||||
t = u"Un-ignore %(kind)s \"%(title)s\""
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(callback_menu, kind=use_kind, sure=False, todo="not_set", rating_key=rating_key, title=title),
|
||||
key=Callback(callback_menu, kind=use_kind, sure=False, todo="not_set", rating_key=str(rating_key), title=title),
|
||||
title=_(t,
|
||||
kind=ignore_list.verbose(kind) if add_kind else "",
|
||||
kind=ref_list.verbose(kind) if add_kind else "",
|
||||
title=unicode(title))
|
||||
)
|
||||
)
|
||||
@@ -96,10 +93,12 @@ def set_refresh_menu_state(state_or_media, media_type="movies"):
|
||||
# store it in last state and remove the current
|
||||
Dict["last_refresh_state"] = Dict["current_refresh_state"]
|
||||
Dict["current_refresh_state"] = None
|
||||
Dict.Save()
|
||||
return
|
||||
|
||||
if isinstance(state_or_media, types.StringTypes) or is_localized_string(state_or_media):
|
||||
Dict["current_refresh_state"] = unicode(state_or_media)
|
||||
Dict.Save()
|
||||
return
|
||||
|
||||
media = state_or_media
|
||||
@@ -123,6 +122,7 @@ def set_refresh_menu_state(state_or_media, media_type="movies"):
|
||||
|
||||
Dict["current_refresh_state"] = unicode(_(t,
|
||||
title=unicode(title)))
|
||||
Dict.Save()
|
||||
|
||||
|
||||
def get_item_task_data(task_name, rating_key, language):
|
||||
@@ -143,70 +143,6 @@ def debounce(func):
|
||||
return func
|
||||
|
||||
|
||||
def extract_embedded_sub(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.pop("part_id")
|
||||
stream_index = kwargs.pop("stream_index")
|
||||
with_mods = kwargs.pop("with_mods", False)
|
||||
language = Language.fromietf(kwargs.pop("language"))
|
||||
refresh = kwargs.pop("refresh", True)
|
||||
set_current = kwargs.pop("set_current", True)
|
||||
|
||||
plex_item = kwargs.pop("plex_item", get_item(rating_key))
|
||||
item_type = get_item_kind_from_item(plex_item)
|
||||
part = kwargs.pop("part", get_part(plex_item, part_id))
|
||||
scanned_videos = kwargs.pop("scanned_videos", None)
|
||||
|
||||
any_successful = False
|
||||
|
||||
if part:
|
||||
if not scanned_videos:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
scanned_videos = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if str(stream.index) == stream_index:
|
||||
is_forced = is_stream_forced(stream)
|
||||
bn = os.path.basename(part.file)
|
||||
|
||||
set_refresh_menu_state(_(u"Extracting subtitle %(stream_index)s of %(filename)s",
|
||||
stream_index=stream_index,
|
||||
filename=bn))
|
||||
Log.Info(u"Extracting stream %s (%s) of %s", stream_index, display_language(language), bn)
|
||||
|
||||
out_codec = stream.codec if stream.codec != "mov_text" else "srt"
|
||||
|
||||
args = [
|
||||
config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", out_codec, "-"
|
||||
]
|
||||
output = None
|
||||
try:
|
||||
output = subprocess.check_output(quote_args(args), stderr=subprocess.PIPE, shell=True)
|
||||
except:
|
||||
Log.Error("Extraction failed: %s", traceback.format_exc())
|
||||
|
||||
if output:
|
||||
subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None)
|
||||
subtitle.content = output
|
||||
subtitle.provider_name = "embedded"
|
||||
subtitle.id = "stream_%s" % stream_index
|
||||
subtitle.score = 0
|
||||
subtitle.set_encoding("utf-8")
|
||||
|
||||
# fixme: speedup video; only video.name is needed
|
||||
save_successful = save_subtitles(scanned_videos, {scanned_videos.keys()[0]: [subtitle]}, mode="m",
|
||||
set_current=set_current, is_forced=is_forced)
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
if save_successful and refresh:
|
||||
refresh_item(rating_key)
|
||||
|
||||
any_successful = True
|
||||
|
||||
return any_successful
|
||||
|
||||
|
||||
class SZObjectContainer(ObjectContainer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
skip_pin_lock = kwargs.pop("skip_pin_lock", False)
|
||||
|
||||
@@ -19,6 +19,10 @@ sys.modules["support.i18n"] = i18n
|
||||
|
||||
helpers._ = i18n._
|
||||
|
||||
import history
|
||||
|
||||
sys.modules["support.history"] = history
|
||||
|
||||
import plex_media
|
||||
sys.modules["support.plex_media"] = plex_media
|
||||
|
||||
@@ -49,6 +53,10 @@ import missing_subtitles
|
||||
|
||||
sys.modules["support.missing_subtitles"] = missing_subtitles
|
||||
|
||||
import extract
|
||||
|
||||
sys.modules["support.extract"] = extract
|
||||
|
||||
import tasks
|
||||
|
||||
sys.modules["support.tasks"] = tasks
|
||||
@@ -57,10 +65,6 @@ import ignore
|
||||
|
||||
sys.modules["support.ignore"] = ignore
|
||||
|
||||
import history
|
||||
|
||||
sys.modules["support.history"] = history
|
||||
|
||||
import data
|
||||
|
||||
sys.modules["support.data"] = data
|
||||
|
||||
@@ -85,7 +85,7 @@ class PlexActivityManager(object):
|
||||
(rating_key, next_ep.rating_key, int(next_ep.season.index), int(next_ep.index))
|
||||
|
||||
else:
|
||||
if config.activity_mode == "hybrid":
|
||||
if config.activity_mode in ("hybrid", "hybrid-plus"):
|
||||
keys_to_refresh.append(rating_key)
|
||||
elif config.activity_mode == "refresh":
|
||||
keys_to_refresh.append(rating_key)
|
||||
|
||||
+344
-90
@@ -1,5 +1,6 @@
|
||||
# coding=utf-8
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import inspect
|
||||
@@ -7,23 +8,29 @@ import sys
|
||||
import rarfile
|
||||
import jstyleson
|
||||
import datetime
|
||||
import stat
|
||||
import traceback
|
||||
import socket
|
||||
import requests
|
||||
|
||||
import subliminal
|
||||
import subliminal_patch
|
||||
import subzero.constants
|
||||
import lib
|
||||
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError
|
||||
|
||||
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError, \
|
||||
DownloadLimitPerDayExceeded
|
||||
from subliminal_patch.core import is_windows_special_path
|
||||
from whichdb import whichdb
|
||||
|
||||
from subliminal_patch.exceptions import TooManyRequests
|
||||
from subliminal_patch.exceptions import TooManyRequests, APIThrottled
|
||||
from subzero.language import Language
|
||||
from subliminal.cli import MutexLock
|
||||
from subzero.lib.io import FileIO, get_viable_encoding
|
||||
from subzero.lib.dict import Dicked
|
||||
from subzero.lib.which import find_executable
|
||||
from subzero.util import get_root_path
|
||||
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW, MEDIA_TYPE_TO_STRING
|
||||
from subzero.prefs import get_user_prefs, update_user_prefs
|
||||
from dogpile.cache.region import register_backend as register_cache_backend
|
||||
from lib import Plex
|
||||
from helpers import check_write_permissions, cast_bool, cast_int, mswindows
|
||||
@@ -43,7 +50,8 @@ VIDEO_EXTS = ['3g2', '3gp', 'asf', 'asx', 'avc', 'avi', 'avs', 'bivx', 'bup', 'd
|
||||
'wtv', 'xsp', 'xvid',
|
||||
'webm']
|
||||
|
||||
IGNORE_FN = ("subzero.ignore", ".subzero.ignore", ".nosz")
|
||||
EXCLUDE_FN = ("subzero.ignore", ".subzero.ignore", "subzero.exclude", ".subzero.exclude", ".nosz")
|
||||
INCLUDE_FN = ("subzero.include", ".subzero.include", ".sz")
|
||||
|
||||
VERSION_RE = re.compile(ur'CFBundleVersion.+?<string>([0-9\.]+)</string>', re.DOTALL)
|
||||
DEV_RE = re.compile(ur'PlexPluginDevMode.+?<string>([01]+)</string>', re.DOTALL)
|
||||
@@ -56,26 +64,38 @@ def int_or_default(s, default):
|
||||
return default
|
||||
|
||||
|
||||
VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable)
|
||||
VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, DownloadLimitPerDayExceeded,
|
||||
ServiceUnavailable, APIThrottled, requests.Timeout, requests.ReadTimeout, socket.timeout)
|
||||
|
||||
def_timeout = (datetime.timedelta(minutes=20), "20 minutes")
|
||||
|
||||
PROVIDER_THROTTLE_MAP = {
|
||||
"default": {
|
||||
TooManyRequests: (datetime.timedelta(hours=1), "1 hour"),
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
|
||||
DownloadLimitPerDayExceeded: (datetime.timedelta(hours=4), "4 hours"),
|
||||
ServiceUnavailable: (datetime.timedelta(minutes=20), "20 minutes"),
|
||||
APIThrottled: (datetime.timedelta(minutes=10), "10 minutes"),
|
||||
AuthenticationError: (datetime.timedelta(hours=2), "2 hours"),
|
||||
requests.Timeout: def_timeout,
|
||||
socket.timeout: def_timeout,
|
||||
requests.ReadTimeout: def_timeout,
|
||||
},
|
||||
"opensubtitles": {
|
||||
TooManyRequests: (datetime.timedelta(hours=3), "3 hours"),
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
|
||||
APIThrottled: (datetime.timedelta(seconds=15), "15 seconds"),
|
||||
},
|
||||
"addic7ed": {
|
||||
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
|
||||
TooManyRequests: (datetime.timedelta(minutes=5), "5 minutes"),
|
||||
AuthenticationError: (datetime.timedelta(hours=24), "24 hours"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class Config(object):
|
||||
config_version = 3
|
||||
libraries_root = None
|
||||
plugin_info = ""
|
||||
version = None
|
||||
@@ -96,6 +116,10 @@ class Config(object):
|
||||
advanced = None
|
||||
debug_i18n = False
|
||||
|
||||
normal_subs = False
|
||||
forced_also = False
|
||||
forced_only = False
|
||||
include = False
|
||||
enable_channel = True
|
||||
enable_agent = True
|
||||
pin = None
|
||||
@@ -108,8 +132,8 @@ class Config(object):
|
||||
max_recent_items_per_library = 200
|
||||
permissions_ok = False
|
||||
missing_permissions = None
|
||||
ignore_sz_files = False
|
||||
ignore_paths = None
|
||||
include_exclude_sz_files = False
|
||||
include_exclude_paths = None
|
||||
fs_encoding = None
|
||||
notify_executable = None
|
||||
sections = None
|
||||
@@ -118,10 +142,10 @@ class Config(object):
|
||||
remove_tags = False
|
||||
fix_ocr = False
|
||||
fix_common = False
|
||||
fix_upper = False
|
||||
reverse_rtl = False
|
||||
colors = ""
|
||||
chmod = None
|
||||
forced_only = False
|
||||
exotic_ext = False
|
||||
treat_und_as_first = False
|
||||
subtitle_sub_dir = None, None
|
||||
@@ -135,10 +159,18 @@ class Config(object):
|
||||
refiner_settings = None
|
||||
exact_filenames = False
|
||||
only_one = False
|
||||
any_language_is_enough = False
|
||||
ignore_for_audio = False
|
||||
ignore_subs_for_empty_audio = False
|
||||
embedded_auto_extract = False
|
||||
ietf_as_alpha3 = False
|
||||
unrar = None
|
||||
adv_cfg_path = None
|
||||
use_custom_dns = None
|
||||
anticaptcha_token = None
|
||||
anticaptcha_cls = None
|
||||
has_anticaptcha = False
|
||||
mediainfo_bin = None
|
||||
|
||||
store_recently_played_amount = 40
|
||||
|
||||
@@ -163,15 +195,25 @@ class Config(object):
|
||||
self.data_items_path = os.path.join(self.data_path, "DataItems")
|
||||
self.universal_plex_token = self.get_universal_plex_token()
|
||||
self.plex_token = os.environ.get("PLEXTOKEN", self.universal_plex_token)
|
||||
self.new_style_cache = cast_bool(Prefs['new_style_cache'])
|
||||
self.pack_cache_dir = self.get_pack_cache_dir()
|
||||
try:
|
||||
self.migrate_prefs()
|
||||
except:
|
||||
Log.Exception("Catastrophic failure when running prefs migration")
|
||||
|
||||
subzero.constants.DEFAULT_TIMEOUT = lib.DEFAULT_TIMEOUT = self.pms_request_timeout = \
|
||||
min(cast_int(Prefs['pms_request_timeout'], 15), 45)
|
||||
self.low_impact_mode = cast_bool(Prefs['low_impact_mode'])
|
||||
self.new_style_cache = cast_bool(Prefs['new_style_cache'])
|
||||
self.pack_cache_dir = self.get_pack_cache_dir()
|
||||
self.advanced = self.get_advanced_config()
|
||||
self.debug_i18n = self.advanced.debug_i18n
|
||||
|
||||
os.environ["SZ_USER_AGENT"] = self.get_user_agent()
|
||||
os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = self.anticaptcha_token = str(Prefs["anticaptcha.api_key"]) or ""
|
||||
acs = str(Prefs["anticaptcha.service"])
|
||||
if acs and acs != "none":
|
||||
os.environ["ANTICAPTCHA_CLASS"] = self.anticaptcha_cls = acs
|
||||
self.has_anticaptcha = self.anticaptcha_token and self.anticaptcha_cls
|
||||
|
||||
self.setup_proxies()
|
||||
self.set_plugin_mode()
|
||||
@@ -179,14 +221,18 @@ class Config(object):
|
||||
self.set_activity_modes()
|
||||
self.parse_rename_mode()
|
||||
|
||||
self.normal_subs = Prefs["subtitles.when"] != "Never"
|
||||
self.forced_also = self.normal_subs and Prefs["subtitles.when_forced"] != "Never"
|
||||
self.forced_only = not self.normal_subs and Prefs["subtitles.when_forced"] != "Never"
|
||||
self.include = \
|
||||
Prefs["subtitles.include_exclude_mode"] == "disable SZ for all items by default, use include lists"
|
||||
self.subtitle_destination_folder = self.get_subtitle_destination_folder()
|
||||
self.subtitle_formats = self.get_subtitle_formats()
|
||||
self.forced_only = cast_bool(Prefs["subtitles.only_foreign"])
|
||||
self.max_recent_items_per_library = int_or_default(Prefs["scheduler.max_recent_items_per_library"], 2000)
|
||||
self.sections = list(Plex["library"].sections())
|
||||
self.missing_permissions = []
|
||||
self.ignore_sz_files = cast_bool(Prefs["subtitles.ignore_fs"])
|
||||
self.ignore_paths = self.parse_ignore_paths()
|
||||
self.include_exclude_sz_files = cast_bool(Prefs["subtitles.include_exclude_fs"])
|
||||
self.include_exclude_paths = self.parse_include_exclude_paths()
|
||||
self.enabled_sections = self.check_enabled_sections()
|
||||
self.permissions_ok = self.check_permissions()
|
||||
self.notify_executable = self.check_notify_executable()
|
||||
@@ -194,6 +240,7 @@ class Config(object):
|
||||
self.remove_tags = cast_bool(Prefs['subtitles.remove_tags'])
|
||||
self.fix_ocr = cast_bool(Prefs['subtitles.fix_ocr'])
|
||||
self.fix_common = cast_bool(Prefs['subtitles.fix_common'])
|
||||
self.fix_upper = cast_bool(Prefs['subtitles.fix_only_uppercase'])
|
||||
self.reverse_rtl = cast_bool(Prefs['subtitles.reverse_rtl'])
|
||||
self.colors = Prefs['subtitles.colors'] if Prefs['subtitles.colors'] != "don't change" else None
|
||||
self.chmod = self.check_chmod()
|
||||
@@ -206,10 +253,72 @@ class Config(object):
|
||||
self.no_refresh = os.environ.get("SZ_NO_REFRESH", False)
|
||||
self.plex_transcoder = self.get_plex_transcoder()
|
||||
self.only_one = cast_bool(Prefs['subtitles.only_one'])
|
||||
self.any_language_is_enough = Prefs['subtitles.any_language_is_enough']
|
||||
self.ignore_for_audio = self.ignore_subs_for_audio()
|
||||
self.embedded_auto_extract = cast_bool(Prefs["subtitles.embedded.autoextract"])
|
||||
self.ietf_as_alpha3 = cast_bool(Prefs["subtitles.language.ietf_normalize"])
|
||||
self.use_custom_dns = self.parse_custom_dns()
|
||||
if not self.advanced.dont_use_mediainfo_mp4:
|
||||
self.mediainfo_bin = self.advanced.mediainfo_bin or find_executable("mediainfo")
|
||||
self.initialized = True
|
||||
|
||||
def migrate_prefs(self):
|
||||
config_version = 0 if "config_version" not in Dict else Dict["config_version"]
|
||||
if config_version < self.config_version:
|
||||
user_prefs = get_user_prefs(Prefs, Log)
|
||||
if user_prefs:
|
||||
update_prefs = {}
|
||||
for i in range(config_version, self.config_version):
|
||||
version = i+1
|
||||
func = "migrate_prefs_to_%i" % version
|
||||
if hasattr(self, func):
|
||||
Log.Info("Migrating user prefs to version %i" % version)
|
||||
try:
|
||||
mig_result = getattr(self, func)(user_prefs, from_version=config_version,
|
||||
to_version=version,
|
||||
current_version=self.config_version,
|
||||
migrated_prefs=update_prefs)
|
||||
update_prefs.update(mig_result)
|
||||
Dict["config_version"] = version
|
||||
Dict.Save()
|
||||
Log.Info("User prefs migrated to version %i" % version)
|
||||
except:
|
||||
Log.Exception("User prefs migration from %i to %i failed" % (self.config_version, version))
|
||||
break
|
||||
|
||||
if update_prefs:
|
||||
update_user_prefs(update_prefs, Prefs, Log)
|
||||
else:
|
||||
Dict["config_version"] = self.config_version
|
||||
Dict.Save()
|
||||
|
||||
def migrate_prefs_to_1(self, user_prefs, **kwargs):
|
||||
update_prefs = {}
|
||||
if "subtitles.only_foreign" in user_prefs and user_prefs["subtitles.only_foreign"] == "true":
|
||||
update_prefs["subtitles.when_forced"] = "1"
|
||||
update_prefs["subtitles.when"] = "0"
|
||||
|
||||
return update_prefs
|
||||
|
||||
def migrate_prefs_to_2(self, user_prefs, **kwargs):
|
||||
update_prefs = {}
|
||||
if "subtitles.ignore_fs" in user_prefs and user_prefs["subtitles.ignore_fs"] == "true":
|
||||
update_prefs["subtitles.include_exclude_fs"] = "true"
|
||||
|
||||
if "subtitles.ignore_paths" in user_prefs and user_prefs["subtitles.ignore_paths"]:
|
||||
update_prefs["subtitles.include_exclude_paths"] = user_prefs["subtitles.ignore_paths"]
|
||||
|
||||
return update_prefs
|
||||
|
||||
def migrate_prefs_to_3(self, user_prefs, **kwargs):
|
||||
if config.new_style_cache:
|
||||
self.init_cache()
|
||||
try:
|
||||
subliminal.region.backend.clear()
|
||||
except:
|
||||
pass
|
||||
return {}
|
||||
|
||||
def init_libraries(self):
|
||||
try_executables = []
|
||||
custom_unrar = os.environ.get("SZ_UNRAR_TOOL")
|
||||
@@ -235,6 +344,19 @@ class Config(object):
|
||||
for exe in try_executables:
|
||||
rarfile.UNRAR_TOOL = exe
|
||||
rarfile.ORIG_UNRAR_TOOL = exe
|
||||
if os.path.isfile(exe) and not os.access(exe, os.X_OK):
|
||||
st = os.stat(exe)
|
||||
try:
|
||||
Log.Debug("setting generic executable permissions for %s", exe)
|
||||
# fixme: too broad?
|
||||
os.chmod(exe, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
||||
except:
|
||||
Log.Debug("failed setting generic executable permissions for %s: %s", exe, traceback.format_exc())
|
||||
try:
|
||||
Log.Debug("setting executable permissions for %s", exe)
|
||||
os.chmod(exe, st.st_mode | stat.S_IEXEC)
|
||||
except:
|
||||
Log.Debug("failed setting executable permissions for %s: %s", exe, traceback.format_exc())
|
||||
try:
|
||||
rarfile.custom_check([rarfile.UNRAR_TOOL], True)
|
||||
except:
|
||||
@@ -252,9 +374,10 @@ class Config(object):
|
||||
|
||||
def init_cache(self):
|
||||
if self.new_style_cache:
|
||||
subliminal.region.configure('subzero.cache.file', expiration_time=datetime.timedelta(days=30),
|
||||
subliminal.region.configure('subzero.cache.file', expiration_time=datetime.timedelta(days=180),
|
||||
arguments={'appname': "sz_cache",
|
||||
'app_cache_dir': self.data_path})
|
||||
'app_cache_dir': self.data_path},
|
||||
replace_existing_backend=True)
|
||||
Log.Info("Using new style file based cache!")
|
||||
return
|
||||
|
||||
@@ -294,14 +417,15 @@ class Config(object):
|
||||
try:
|
||||
subliminal.region.configure('dogpile.cache.dbm', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'filename': dbfn,
|
||||
'lock_factory': MutexLock})
|
||||
'lock_factory': MutexLock},
|
||||
replace_existing_backend=True)
|
||||
Log.Info("Using file based cache!")
|
||||
return
|
||||
except:
|
||||
self.dbm_supported = False
|
||||
|
||||
Log.Warn("Not using file based cache!")
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
subliminal.region.configure('dogpile.cache.memory', replace_existing_backend=True)
|
||||
|
||||
def sync_cache(self):
|
||||
if not self.new_style_cache:
|
||||
@@ -373,7 +497,7 @@ class Config(object):
|
||||
self.enable_channel = True
|
||||
|
||||
# any provider enabled?
|
||||
if not self.providers:
|
||||
if not self.providers_enabled:
|
||||
self.enable_agent = False
|
||||
self.enable_channel = False
|
||||
Log.Warn("No providers enabled, disabling agent and interface!")
|
||||
@@ -421,7 +545,7 @@ class Config(object):
|
||||
return True
|
||||
|
||||
self.missing_permissions = []
|
||||
use_ignore_fs = Prefs["subtitles.ignore_fs"]
|
||||
use_include_exclude_fs = self.include_exclude_sz_files
|
||||
all_permissions_ok = True
|
||||
for section in self.sections:
|
||||
if section.key not in self.enabled_sections:
|
||||
@@ -436,12 +560,12 @@ class Config(object):
|
||||
if not os.path.exists(path_str):
|
||||
continue
|
||||
|
||||
if use_ignore_fs:
|
||||
if use_include_exclude_fs:
|
||||
# check whether we've got an ignore file inside the section path
|
||||
if self.is_physically_ignored(path_str):
|
||||
if not self.is_physically_wanted(path_str):
|
||||
continue
|
||||
|
||||
if self.is_path_ignored(path_str):
|
||||
if not self.is_path_wanted(path_str):
|
||||
# is the path in our ignored paths setting?
|
||||
continue
|
||||
|
||||
@@ -476,29 +600,38 @@ class Config(object):
|
||||
info_file_path = os.path.abspath(os.path.join(curDir, "..", "..", "Info.plist"))
|
||||
return FileIO.read(info_file_path)
|
||||
|
||||
def parse_ignore_paths(self):
|
||||
paths = Prefs["subtitles.ignore_paths"]
|
||||
if paths:
|
||||
def parse_include_exclude_paths(self):
|
||||
paths = Prefs["subtitles.include_exclude_paths"]
|
||||
if paths and paths != "None":
|
||||
try:
|
||||
return [path.strip() for path in paths.split(",")]
|
||||
return [p for p in [path.strip() for path in paths.split(",")]
|
||||
if p != "None" and os.path.exists(p)]
|
||||
except:
|
||||
Log.Error("Couldn't parse your ignore paths settings: %s" % paths)
|
||||
Log.Error("Couldn't parse your include/exclude paths settings: %s" % paths)
|
||||
return []
|
||||
|
||||
def is_physically_ignored(self, folder):
|
||||
def is_physically_wanted(self, folder, ref_fn=None):
|
||||
# check whether we've got an ignore file inside the path
|
||||
for ifn in IGNORE_FN:
|
||||
ret_val = self.include
|
||||
ref_list = INCLUDE_FN if self.include else EXCLUDE_FN
|
||||
for ifn in ref_list:
|
||||
if os.path.isfile(os.path.join(folder, ifn)):
|
||||
Log.Info(u'Ignoring "%s" because "%s" exists', folder, ifn)
|
||||
return True
|
||||
if ref_fn:
|
||||
Log.Info(u'%s "%s" because "%s" exists in "%s"', "Including" if self.include else "Ignoring",
|
||||
ref_fn, ifn, folder)
|
||||
else:
|
||||
Log.Info(u'%s "%s" because "%s" exists', "Including" if self.include else "Ignoring", folder, ifn)
|
||||
|
||||
return False
|
||||
return ret_val
|
||||
|
||||
def is_path_ignored(self, fn):
|
||||
for path in self.ignore_paths:
|
||||
return not ret_val
|
||||
|
||||
def is_path_wanted(self, fn):
|
||||
ret_val = self.include
|
||||
for path in self.include_exclude_paths:
|
||||
if fn.startswith(path):
|
||||
return True
|
||||
return False
|
||||
return ret_val
|
||||
return not ret_val
|
||||
|
||||
def check_notify_executable(self):
|
||||
fn = Prefs["notify_executable"]
|
||||
@@ -538,17 +671,34 @@ class Config(object):
|
||||
enabled_for_primary_agents = {"movie": [], "show": []}
|
||||
enabled_sections = {}
|
||||
|
||||
legacy_agents = {
|
||||
"com.plexapp.agents.thetvdb": [SHOW],
|
||||
"com.plexapp.agents.thetvdbdvdorder": [SHOW],
|
||||
"com.plexapp.agents.hama": [SHOW, MOVIE],
|
||||
"com.plexapp.agents.themoviedb": [SHOW, MOVIE],
|
||||
"com.plexapp.agents.imdb": [SHOW, MOVIE],
|
||||
}
|
||||
|
||||
# find which agents we're enabled for
|
||||
for agent in Plex.agents():
|
||||
if not agent.primary:
|
||||
#if not agent.primary:
|
||||
# continue
|
||||
if agent.identifier not in legacy_agents:
|
||||
continue
|
||||
|
||||
for t in list(agent.media_types):
|
||||
if t.media_type in (MOVIE, SHOW):
|
||||
related_agents = Plex.primary_agent(agent.identifier, t.media_type)
|
||||
#media_types = [t.media_type for t in list(agent.media_types)]
|
||||
media_types = legacy_agents[agent.identifier] + []
|
||||
|
||||
# the new movie agent doesn't populate its media types, workaround
|
||||
if not media_types and agent.identifier == "tv.plex.agents.movie":
|
||||
media_types = [MOVIE]
|
||||
|
||||
for media_type in media_types:
|
||||
if media_type in (MOVIE, SHOW):
|
||||
related_agents = Plex.primary_agent(agent.identifier, media_type)
|
||||
for a in related_agents:
|
||||
if a.identifier == PLUGIN_IDENTIFIER and a.enabled:
|
||||
enabled_for_primary_agents[MEDIA_TYPE_TO_STRING[t.media_type]].append(agent.identifier)
|
||||
enabled_for_primary_agents[MEDIA_TYPE_TO_STRING[media_type]].append(agent.identifier)
|
||||
|
||||
# find the libraries that use them
|
||||
for library in self.sections:
|
||||
@@ -558,8 +708,24 @@ class Config(object):
|
||||
Log.Debug(u"I'm enabled for: %s" % [lib.title for key, lib in enabled_sections.iteritems()])
|
||||
return enabled_sections
|
||||
|
||||
def lang_str_to_list(self, s, l):
|
||||
if len(s) and s != "None":
|
||||
for lang in s.split(u","):
|
||||
lang = lang.strip()
|
||||
if lang == "NULL":
|
||||
l.append(lang)
|
||||
continue
|
||||
try:
|
||||
real_lang = Language.fromietf(lang)
|
||||
except:
|
||||
try:
|
||||
real_lang = Language.fromname(lang)
|
||||
except:
|
||||
continue
|
||||
l.append(real_lang)
|
||||
|
||||
# Prepare a list of languages we want subs for
|
||||
def get_lang_list(self, provider=None):
|
||||
def get_lang_list(self, provider=None, ordered=False):
|
||||
# advanced settings
|
||||
if provider and self.advanced.providers and provider in self.advanced.providers:
|
||||
adv_languages = self.advanced.providers[provider].get("languages", None)
|
||||
@@ -580,46 +746,73 @@ class Config(object):
|
||||
if adv_out:
|
||||
return adv_out
|
||||
|
||||
l = {Language.fromietf(Prefs["langPref1a"])}
|
||||
l = [Language.fromietf(Prefs["langPref1a"])]
|
||||
lang_custom = Prefs["langPrefCustom"].strip()
|
||||
|
||||
if Prefs['subtitles.only_one']:
|
||||
return l
|
||||
return set(l) if not ordered else l
|
||||
|
||||
if Prefs["langPref2a"] != "None":
|
||||
try:
|
||||
l.update({Language.fromietf(Prefs["langPref2a"])})
|
||||
l.append(Language.fromietf(Prefs["langPref2a"]))
|
||||
except:
|
||||
pass
|
||||
|
||||
if Prefs["langPref3a"] != "None":
|
||||
try:
|
||||
l.update({Language.fromietf(Prefs["langPref3a"])})
|
||||
l.append(Language.fromietf(Prefs["langPref3a"]))
|
||||
except:
|
||||
pass
|
||||
|
||||
if len(lang_custom) and lang_custom != "None":
|
||||
for lang in lang_custom.split(u","):
|
||||
lang = lang.strip()
|
||||
try:
|
||||
real_lang = Language.fromietf(lang)
|
||||
except:
|
||||
try:
|
||||
real_lang = Language.fromname(lang)
|
||||
except:
|
||||
continue
|
||||
l.update({real_lang})
|
||||
self.lang_str_to_list(lang_custom, l)
|
||||
if "NULL" in l:
|
||||
l.remove("NULL")
|
||||
|
||||
return l
|
||||
if self.forced_also:
|
||||
if Prefs["subtitles.when_forced"] == "Always":
|
||||
for lang in list(l):
|
||||
l.append(Language.rebuild(lang, forced=True))
|
||||
|
||||
else:
|
||||
for (setting, index) in (("Only for Subtitle Language (1)", 0),
|
||||
("Only for Subtitle Language (2)", 1),
|
||||
("Only for Subtitle Language (3)", 2)):
|
||||
if Prefs["subtitles.when_forced"] == setting:
|
||||
try:
|
||||
l.append(Language.rebuild(list(l)[index], forced=True))
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
elif self.forced_only:
|
||||
for lang in l:
|
||||
lang.forced = True
|
||||
|
||||
if not self.normal_subs:
|
||||
for lang in l[:]:
|
||||
if not lang.forced:
|
||||
l.remove(lang)
|
||||
|
||||
return set(l) if not ordered else l
|
||||
|
||||
lang_list = property(get_lang_list)
|
||||
|
||||
def ignore_subs_for_audio(self):
|
||||
c = Prefs['subtitles.ignore_for_audio'].strip()
|
||||
l = []
|
||||
|
||||
self.lang_str_to_list(c, l)
|
||||
if "NULL" in l:
|
||||
l.remove("NULL")
|
||||
self.ignore_subs_for_empty_audio = True
|
||||
return l
|
||||
|
||||
def get_subtitle_destination_folder(self):
|
||||
if not Prefs["subtitles.save.filesystem"]:
|
||||
return
|
||||
|
||||
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if cast_bool(
|
||||
Prefs["subtitles.save.subFolder.Custom"]) else None
|
||||
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if \
|
||||
Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
return fld_custom or (
|
||||
Prefs["subtitles.save.subFolder"] if Prefs["subtitles.save.subFolder"] != "current folder" else None)
|
||||
|
||||
@@ -632,12 +825,14 @@ class Config(object):
|
||||
out.append("vtt")
|
||||
return out
|
||||
|
||||
def get_providers(self, media_type="series"):
|
||||
providers = {'opensubtitles': cast_bool(Prefs['provider.opensubtitles.enabled']),
|
||||
@property
|
||||
def providers_by_prefs(self):
|
||||
return {'opensubtitles': cast_bool(Prefs['provider.opensubtitles.enabled']),
|
||||
# 'thesubdb': Prefs['provider.thesubdb.enabled'],
|
||||
'podnapisi': cast_bool(Prefs['provider.podnapisi.enabled']),
|
||||
'napisy24': cast_bool(Prefs['provider.napisy24.enabled']),
|
||||
'titlovi': cast_bool(Prefs['provider.titlovi.enabled']),
|
||||
'addic7ed': cast_bool(Prefs['provider.addic7ed.enabled']),
|
||||
'addic7ed': cast_bool(Prefs['provider.addic7ed.enabled']) and self.has_anticaptcha,
|
||||
'tvsubtitles': cast_bool(Prefs['provider.tvsubtitles.enabled']),
|
||||
'legendastv': cast_bool(Prefs['provider.legendastv.enabled']),
|
||||
'napiprojekt': cast_bool(Prefs['provider.napiprojekt.enabled']),
|
||||
@@ -648,8 +843,17 @@ class Config(object):
|
||||
'argenteam': cast_bool(Prefs['provider.argenteam.enabled']),
|
||||
'subscenter': False,
|
||||
'assrt': cast_bool(Prefs['provider.assrt.enabled']),
|
||||
}
|
||||
'bsplayer': cast_bool(Prefs['provider.bsplayer.enabled']),
|
||||
'ktuvit': cast_bool(Prefs['provider.ktuvit.enabled']),
|
||||
'wizdom': cast_bool(Prefs['provider.wizdom.enabled']),
|
||||
}
|
||||
|
||||
@property
|
||||
def providers_enabled(self):
|
||||
return filter(lambda prov: self.providers_by_prefs[prov], self.providers_by_prefs)
|
||||
|
||||
def get_providers(self, media_type="series"):
|
||||
providers = self.providers_by_prefs
|
||||
providers_by_prefs = copy.deepcopy(providers)
|
||||
|
||||
# disable subscene for movies by default
|
||||
@@ -657,6 +861,7 @@ class Config(object):
|
||||
providers["subscene"] = False
|
||||
|
||||
# ditch non-forced-subtitles-reporting providers
|
||||
providers_forced_off = {}
|
||||
if self.forced_only:
|
||||
providers["addic7ed"] = False
|
||||
providers["tvsubtitles"] = False
|
||||
@@ -668,6 +873,12 @@ class Config(object):
|
||||
providers["titlovi"] = False
|
||||
providers["argenteam"] = False
|
||||
providers["assrt"] = False
|
||||
providers["subscene"] = False
|
||||
providers["napisy24"] = False
|
||||
providers["bsplayer"] = False
|
||||
providers["ktuvit"] = False
|
||||
providers["wizdom"] = False
|
||||
providers_forced_off = dict(providers)
|
||||
|
||||
if not self.unrar and providers["legendastv"]:
|
||||
providers["legendastv"] = False
|
||||
@@ -676,7 +887,7 @@ class Config(object):
|
||||
# advanced settings
|
||||
if media_type and self.advanced.providers:
|
||||
for provider, data in self.advanced.providers.iteritems():
|
||||
if provider not in providers or not providers_by_prefs[provider]:
|
||||
if provider not in providers or not providers_by_prefs[provider] or provider in providers_forced_off:
|
||||
continue
|
||||
|
||||
if data["enabled_for"] is not None:
|
||||
@@ -707,27 +918,48 @@ class Config(object):
|
||||
providers = property(get_providers)
|
||||
|
||||
def get_provider_settings(self):
|
||||
os_use_https = self.advanced.providers.opensubtitles.use_https \
|
||||
if self.advanced.providers.opensubtitles.use_https != None else True
|
||||
os_use_https = self.advanced.providers.opensubtitles.get("use_https", True)
|
||||
os_skip_wrong_fps = self.advanced.providers.opensubtitles.get("skip_wrong_fps", True)
|
||||
|
||||
provider_settings = {'addic7ed': {'username': Prefs['provider.addic7ed.username'],
|
||||
'password': Prefs['provider.addic7ed.password'],
|
||||
'use_random_agents': cast_bool(Prefs['provider.addic7ed.use_random_agents1']),
|
||||
'is_vip': cast_bool(Prefs['provider.addic7ed.is_vip']),
|
||||
},
|
||||
'opensubtitles': {'username': Prefs['provider.opensubtitles.username'],
|
||||
'password': Prefs['provider.opensubtitles.password'],
|
||||
'use_tag_search': self.exact_filenames,
|
||||
'only_foreign': self.forced_only,
|
||||
'also_foreign': self.forced_also,
|
||||
'is_vip': cast_bool(Prefs['provider.opensubtitles.is_vip']),
|
||||
'use_ssl': os_use_https,
|
||||
'timeout': self.advanced.providers.opensubtitles.timeout or 15
|
||||
'timeout': self.advanced.providers.opensubtitles.timeout or 15,
|
||||
'skip_wrong_fps': os_skip_wrong_fps,
|
||||
},
|
||||
'podnapisi': {
|
||||
'only_foreign': self.forced_only,
|
||||
'also_foreign': self.forced_also,
|
||||
},
|
||||
'titlovi': {
|
||||
'username': Prefs['provider.titlovi.username'],
|
||||
'password': Prefs['provider.titlovi.password'],
|
||||
},
|
||||
'napisy24': {
|
||||
'username': Prefs['provider.napisy24.username'],
|
||||
'password': Prefs['provider.napisy24.password'],
|
||||
},
|
||||
'subscene': {
|
||||
'only_foreign': self.forced_only,
|
||||
'username': Prefs['provider.subscene.username'],
|
||||
'password': Prefs['provider.subscene.password'],
|
||||
},
|
||||
'legendastv': {'username': Prefs['provider.legendastv.username'],
|
||||
'password': Prefs['provider.legendastv.password'],
|
||||
},
|
||||
'assrt': {'token': Prefs['provider.assrt.token'], }
|
||||
'assrt': {'token': Prefs['provider.assrt.token'], },
|
||||
'ktuvit': {
|
||||
'username': Prefs['provider.ktuvit.username'],
|
||||
'password': Prefs['provider.ktuvit.password'],
|
||||
},
|
||||
}
|
||||
|
||||
return provider_settings
|
||||
@@ -751,10 +983,10 @@ class Config(object):
|
||||
throttle_data = PROVIDER_THROTTLE_MAP.get(name, PROVIDER_THROTTLE_MAP["default"]).get(cls, None) or \
|
||||
PROVIDER_THROTTLE_MAP["default"].get(cls, None)
|
||||
|
||||
if not throttle_data:
|
||||
return
|
||||
|
||||
throttle_delta, throttle_description = throttle_data
|
||||
if throttle_data:
|
||||
throttle_delta, throttle_description = throttle_data
|
||||
else:
|
||||
throttle_delta, throttle_description = datetime.timedelta(minutes=10), "10 minutes"
|
||||
|
||||
if "provider_throttle" not in Dict:
|
||||
Dict["provider_throttle"] = {}
|
||||
@@ -762,8 +994,8 @@ class Config(object):
|
||||
throttle_until = datetime.datetime.now() + throttle_delta
|
||||
Dict["provider_throttle"][name] = (cls_name, throttle_until, throttle_description)
|
||||
|
||||
Log.Info("Throttling %s for %s, until %s, because of: %s", name, throttle_description,
|
||||
throttle_until.strftime("%y/%m/%d %H:%M"), cls_name)
|
||||
Log.Info("Throttling %s for %s, until %s, because of: %s. Exception info: %r", name, throttle_description,
|
||||
throttle_until.strftime("%y/%m/%d %H:%M"), cls_name, exception.message)
|
||||
Dict.Save()
|
||||
|
||||
@property
|
||||
@@ -823,6 +1055,8 @@ class Config(object):
|
||||
mods.append("OCR_fixes")
|
||||
if self.fix_common:
|
||||
mods.append("common")
|
||||
if self.fix_upper:
|
||||
mods.append("fix_uppercase")
|
||||
if self.colors:
|
||||
mods.append("color(name=%s)" % self.colors)
|
||||
if self.reverse_rtl:
|
||||
@@ -853,27 +1087,37 @@ class Config(object):
|
||||
self.activity_mode = "next_episode"
|
||||
|
||||
def get_plex_transcoder(self):
|
||||
paths = []
|
||||
base_path = os.environ.get("PLEX_MEDIA_SERVER_HOME", None)
|
||||
if not base_path:
|
||||
# fall back to bundled plugins path
|
||||
bundle_path = os.environ.get("PLEXBUNDLEDPLUGINSPATH", None)
|
||||
if bundle_path:
|
||||
base_path = os.path.normpath(os.path.join(bundle_path, "..", ".."))
|
||||
if base_path:
|
||||
paths.append(base_path)
|
||||
|
||||
bundle_path = os.environ.get("PLEXBUNDLEDPLUGINSPATH", None)
|
||||
if bundle_path:
|
||||
paths.append(os.path.normpath(os.path.join(bundle_path, "..", "..")))
|
||||
|
||||
paths.append(self.app_support_path)
|
||||
|
||||
bns = []
|
||||
if sys.platform == "darwin":
|
||||
fn = os.path.join(base_path, "MacOS", "Plex Transcoder")
|
||||
bns.append(("MacOS", "Plex Transcoder"))
|
||||
elif mswindows:
|
||||
fn = os.path.join(base_path, "plextranscoder.exe")
|
||||
bns = [("plextranscoder.exe",), ("plex transcoder.exe",)]
|
||||
else:
|
||||
fn = os.path.join(base_path, "Plex Transcoder")
|
||||
bns.append(("Plex Transcoder",))
|
||||
|
||||
if os.path.isfile(fn):
|
||||
return fn
|
||||
for path in paths:
|
||||
for bn in bns:
|
||||
fn = os.path.join(path, *bn)
|
||||
|
||||
# look inside Resources folder as fallback, as well
|
||||
fn = os.path.join(base_path, "Resources", "Plex Transcoder")
|
||||
if os.path.isfile(fn):
|
||||
return fn
|
||||
if os.path.isfile(fn):
|
||||
return fn
|
||||
|
||||
# look inside Resources folder as fallback, as well
|
||||
for vbn in ("Plex Transcoder", "plextranscoder.exe", "plex transcoder.exe"):
|
||||
fn = os.path.join(path, "Resources", vbn)
|
||||
if os.path.isfile(fn):
|
||||
return fn
|
||||
|
||||
def parse_rename_mode(self):
|
||||
# fixme: exact_filenames should be determined via callback combined with info about the current video
|
||||
@@ -926,6 +1170,16 @@ class Config(object):
|
||||
def text_based_formats(self):
|
||||
return self.advanced.text_subtitle_formats or TEXT_SUBTITLE_EXTS
|
||||
|
||||
def parse_custom_dns(self):
|
||||
custom_dns = Prefs['use_custom_dns2'].strip()
|
||||
os.environ["dns_resolvers"] = ""
|
||||
|
||||
if custom_dns and custom_dns != "system":
|
||||
ips = filter(lambda x: x, [d.strip() for d in custom_dns.split(",")])
|
||||
if ips:
|
||||
os.environ["dns_resolvers"] = json.dumps(ips)
|
||||
return os.environ["dns_resolvers"]
|
||||
|
||||
def init_subliminal_patches(self):
|
||||
# configure custom subtitle destination folders for scanning pre-existing subs
|
||||
Log.Debug("Patching subliminal ...")
|
||||
|
||||
@@ -6,8 +6,7 @@ from subzero.language import Language
|
||||
import subliminal_patch as subliminal
|
||||
|
||||
from support.config import config
|
||||
from support.helpers import cast_bool
|
||||
from subtitlehelpers import get_subtitles_from_metadata
|
||||
from support.helpers import audio_streams_match_languages
|
||||
from subliminal_patch import compute_score
|
||||
from support.plex_media import get_blacklist_from_part_map
|
||||
from subzero.video import refine_video
|
||||
@@ -15,32 +14,63 @@ from support.storage import get_pack_data, store_pack_data
|
||||
|
||||
|
||||
def get_missing_languages(video, part):
|
||||
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
|
||||
languages_list = config.get_lang_list(ordered=True)
|
||||
languages = set(languages_list)
|
||||
ignore_langs = set(config.ignore_for_audio)
|
||||
valid_langs_in_media = set()
|
||||
|
||||
if Prefs["subtitles.when"] != "Always":
|
||||
valid_langs_in_media = audio_streams_match_languages(video, languages_list)
|
||||
languages = languages.difference(valid_langs_in_media)
|
||||
if languages:
|
||||
Log.Debug("Languages missing after taking the audio streams into account: %s" % languages)
|
||||
|
||||
if valid_langs_in_media and not languages:
|
||||
Log.Debug("Skipping subtitle search for %s, audio streams are in correct language(s)",
|
||||
video)
|
||||
return set()
|
||||
|
||||
if config.ignore_subs_for_empty_audio and not video.audio_languages:
|
||||
Log.Debug("Skipping subtitle search for %s, ignoring as no audio stream exists", video)
|
||||
return set()
|
||||
|
||||
inter = ignore_langs.intersection(video.audio_languages)
|
||||
if ignore_langs and inter:
|
||||
Log.Debug("Skipping subtitle search for %s, ignoring due to existing audio streams: %s", video, inter)
|
||||
return set()
|
||||
|
||||
# should we treat IETF as alpha3? (ditch the country part)
|
||||
alpha3_map = {}
|
||||
if config.ietf_as_alpha3:
|
||||
for language in languages:
|
||||
if language.country:
|
||||
if language and language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
if not Prefs['subtitles.save.filesystem']:
|
||||
# scan for existing metadata subtitles
|
||||
meta_subs = get_subtitles_from_metadata(part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
if subList:
|
||||
video.subtitle_languages.add(language)
|
||||
Log.Debug("Found metadata subtitle %s for %s", language, video)
|
||||
|
||||
have_languages = video.subtitle_languages.copy()
|
||||
if config.ietf_as_alpha3:
|
||||
for language in have_languages:
|
||||
if language.country:
|
||||
if language and language.country:
|
||||
alpha3_map[language.alpha3] = language.country
|
||||
language.country = None
|
||||
|
||||
missing_languages = (set(str(l) for l in languages) - set(str(l) for l in have_languages))
|
||||
missing_languages = (languages - have_languages)
|
||||
|
||||
if config.any_language_is_enough != "Always search for all configured languages":
|
||||
not_in_forced = "foreign" in config.any_language_is_enough
|
||||
if "External or embedded subtitle" in config.any_language_is_enough:
|
||||
langs = video.subtitle_languages if not not_in_forced else \
|
||||
filter(lambda l: not l.forced, video.subtitle_languages)
|
||||
if langs:
|
||||
Log.Debug("We have at least one subtitle for any configured language.")
|
||||
return set()
|
||||
|
||||
elif "External subtitle" in config.any_language_is_enough:
|
||||
langs = video.external_subtitle_languages if not not_in_forced else \
|
||||
filter(lambda l: not l.forced, video.external_subtitle_languages)
|
||||
if langs:
|
||||
Log.Debug("We have at least one external subtitle for any configured language.")
|
||||
return set()
|
||||
|
||||
# all languages are found if we either really have subs for all languages or we only want to have exactly one language
|
||||
# and we've only found one (the case for a selected language, Prefs['subtitles.only_one'] (one found sub matches any language))
|
||||
@@ -50,7 +80,7 @@ def get_missing_languages(video, part):
|
||||
Log.Debug('Only one language was requested, and we\'ve got a subtitle for %s', video)
|
||||
else:
|
||||
Log.Debug('All languages %r exist for %s', languages, video)
|
||||
return False
|
||||
return set()
|
||||
|
||||
# re-add country codes to the missing languages, in case we've removed them above
|
||||
if config.ietf_as_alpha3:
|
||||
@@ -85,30 +115,32 @@ def language_hook(provider):
|
||||
|
||||
def download_best_subtitles(video_part_map, min_score=0, throttle_time=None, providers=None):
|
||||
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
|
||||
languages = set([Language.fromietf(str(l)) for l in config.lang_list])
|
||||
languages = set([Language.rebuild(l) for l in config.lang_list])
|
||||
if not languages:
|
||||
return
|
||||
|
||||
use_videos = []
|
||||
missing_languages = set()
|
||||
for video, part in video_part_map.iteritems():
|
||||
if not video.ignore_all:
|
||||
missing_languages = get_missing_languages(video, part)
|
||||
p_missing_languages = get_missing_languages(video, part)
|
||||
else:
|
||||
missing_languages = languages
|
||||
p_missing_languages = languages
|
||||
|
||||
if missing_languages:
|
||||
Log.Info(u"%s has missing languages: %s", os.path.basename(video.name), missing_languages)
|
||||
if p_missing_languages:
|
||||
Log.Info(u"%s has missing languages: %s", os.path.basename(video.name), p_missing_languages)
|
||||
refine_video(video, refiner_settings=config.refiner_settings)
|
||||
use_videos.append(video)
|
||||
missing_languages.update(p_missing_languages)
|
||||
|
||||
# prepare blacklist
|
||||
blacklist = get_blacklist_from_part_map(video_part_map, languages)
|
||||
|
||||
if use_videos:
|
||||
if use_videos and missing_languages:
|
||||
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s, languages: %s" %
|
||||
(min_score, hearing_impaired, languages))
|
||||
(min_score, hearing_impaired, missing_languages))
|
||||
|
||||
return subliminal.download_best_subtitles(set(use_videos), languages, min_score, hearing_impaired,
|
||||
return subliminal.download_best_subtitles(set(use_videos), missing_languages, min_score, hearing_impaired,
|
||||
providers=providers or config.providers,
|
||||
provider_configs=config.provider_settings,
|
||||
pool_class=config.provider_pool,
|
||||
|
||||
@@ -0,0 +1,214 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
import subprocess
|
||||
import traceback
|
||||
|
||||
from support.helpers import quote_args, mswindows, get_title_for_video_metadata, cast_bool, \
|
||||
audio_streams_match_languages
|
||||
from support.i18n import _
|
||||
from support.items import get_item_kind_from_item, refresh_item, get_all_items, get_item, MI_KEY
|
||||
from support.storage import get_subtitle_storage, save_subtitles
|
||||
from support.config import config
|
||||
from support.history import get_history
|
||||
from support.plex_media import get_all_parts, get_embedded_subtitle_streams, get_part, get_plex_metadata, \
|
||||
update_stream_info, is_stream_forced
|
||||
from support.scanning import scan_videos
|
||||
from subzero.language import Language
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
|
||||
|
||||
def agent_extract_embedded(video_part_map, set_as_existing=False):
|
||||
try:
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
to_extract = []
|
||||
item_count = 0
|
||||
|
||||
threads = []
|
||||
|
||||
for scanned_video, part_info in video_part_map.iteritems():
|
||||
plexapi_item = scanned_video.plexapi_metadata["item"]
|
||||
stored_subs = subtitle_storage.load_or_new(plexapi_item)
|
||||
valid_langs_in_media = \
|
||||
audio_streams_match_languages(scanned_video, config.get_lang_list(ordered=True))
|
||||
|
||||
if not config.lang_list.difference(valid_langs_in_media):
|
||||
Log.Debug("Skipping embedded subtitle extraction for %s, audio streams are in correct language(s)",
|
||||
plexapi_item.rating_key)
|
||||
continue
|
||||
|
||||
for plexapi_part in get_all_parts(plexapi_item):
|
||||
item_count = item_count + 1
|
||||
used_one_unknown_stream = False
|
||||
used_one_known_stream = False
|
||||
for requested_language in config.lang_list:
|
||||
skip_unknown = used_one_unknown_stream or used_one_known_stream
|
||||
embedded_subs = stored_subs.get_by_provider(plexapi_part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(plexapi_part.id, requested_language) or \
|
||||
requested_language in scanned_video.external_subtitle_languages
|
||||
|
||||
if not embedded_subs:
|
||||
stream_data = get_embedded_subtitle_streams(plexapi_part, requested_language=requested_language,
|
||||
skip_unknown=skip_unknown)
|
||||
|
||||
if stream_data and stream_data[0]["language"]:
|
||||
stream = stream_data[0]["stream"]
|
||||
if stream_data[0]["is_unknown"]:
|
||||
used_one_unknown_stream = True
|
||||
else:
|
||||
used_one_known_stream = True
|
||||
|
||||
to_extract.append(({scanned_video: part_info}, plexapi_part, str(stream.index),
|
||||
str(requested_language), not current))
|
||||
|
||||
if not cast_bool(Prefs["subtitles.search_after_autoextract"]) or set_as_existing:
|
||||
scanned_video.subtitle_languages.update({requested_language})
|
||||
else:
|
||||
Log.Debug("Skipping embedded subtitle extraction for %s, already got %r from %s",
|
||||
plexapi_item.rating_key, requested_language, embedded_subs[0].id)
|
||||
if to_extract:
|
||||
Log.Info("Triggering extraction of %d embedded subtitles of %d items", len(to_extract), item_count)
|
||||
threads.append(Thread.Create(multi_extract_embedded, stream_list=to_extract, refresh=True, with_mods=True,
|
||||
single_thread=not config.advanced.auto_extract_multithread))
|
||||
return threads
|
||||
except:
|
||||
Log.Error("Something went wrong when auto-extracting subtitles, continuing: %s", traceback.format_exc())
|
||||
|
||||
|
||||
def multi_extract_embedded(stream_list, refresh=False, with_mods=False, single_thread=True, extract_mode="a",
|
||||
history_storage=None):
|
||||
def execute():
|
||||
for video_part_map, plexapi_part, stream_index, language, set_current in stream_list:
|
||||
plexapi_item = video_part_map.keys()[0].plexapi_metadata["item"]
|
||||
|
||||
extract_embedded_sub(rating_key=plexapi_item.rating_key, part_id=plexapi_part.id,
|
||||
plex_item=plexapi_item, part=plexapi_part, scanned_videos=video_part_map,
|
||||
stream_index=stream_index, set_current=set_current,
|
||||
language=language, with_mods=with_mods, refresh=refresh, extract_mode=extract_mode,
|
||||
history_storage=history_storage)
|
||||
|
||||
if single_thread:
|
||||
with Thread.Lock(key="extract_embedded"):
|
||||
execute()
|
||||
else:
|
||||
execute()
|
||||
|
||||
|
||||
def season_extract_embedded(rating_key, requested_language, with_mods=False, force=False, mode="season"):
|
||||
# get stored subtitle info for item id
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
|
||||
try:
|
||||
rating_keys = [rating_key]
|
||||
if mode == "series":
|
||||
seasons = get_all_items(key="children", value=rating_key, base="library/metadata")
|
||||
rating_keys = [season[MI_KEY] for season in seasons]
|
||||
|
||||
for rating_key in rating_keys:
|
||||
for data in get_all_items(key="children", value=rating_key, base="library/metadata"):
|
||||
item = get_item(data[MI_KEY])
|
||||
if item:
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
for part in get_all_parts(item):
|
||||
embedded_subs = stored_subs.get_by_provider(part.id, requested_language, "embedded")
|
||||
current = stored_subs.get_any(part.id, requested_language)
|
||||
if not embedded_subs or force:
|
||||
stream_data = get_embedded_subtitle_streams(part, requested_language=requested_language)
|
||||
if stream_data:
|
||||
stream = stream_data[0]["stream"]
|
||||
|
||||
set_current = not current or force
|
||||
refresh = not current
|
||||
|
||||
extract_embedded_sub(rating_key=item.rating_key, part_id=part.id,
|
||||
stream_index=str(stream.index), set_current=set_current,
|
||||
refresh=refresh, language=requested_language, with_mods=with_mods,
|
||||
extract_mode="m")
|
||||
finally:
|
||||
subtitle_storage.destroy()
|
||||
|
||||
|
||||
def extract_embedded_sub(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs.pop("part_id")
|
||||
stream_index = kwargs.pop("stream_index")
|
||||
with_mods = kwargs.pop("with_mods", False)
|
||||
language = Language.fromietf(kwargs.pop("language"))
|
||||
refresh = kwargs.pop("refresh", True)
|
||||
set_current = kwargs.pop("set_current", True)
|
||||
|
||||
plex_item = kwargs.pop("plex_item", get_item(rating_key))
|
||||
item_type = get_item_kind_from_item(plex_item)
|
||||
part = kwargs.pop("part", get_part(plex_item, part_id))
|
||||
scanned_videos = kwargs.pop("scanned_videos", None)
|
||||
extract_mode = kwargs.pop("extract_mode", "a")
|
||||
|
||||
any_successful = False
|
||||
|
||||
from interface.menu_helpers import set_refresh_menu_state
|
||||
|
||||
if part:
|
||||
if not scanned_videos:
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
|
||||
scanned_videos = scan_videos([metadata], ignore_all=True, skip_hashing=True)
|
||||
|
||||
update_stream_info(part)
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if str(stream.index) == stream_index:
|
||||
is_forced = is_stream_forced(stream)
|
||||
bn = os.path.basename(part.file)
|
||||
|
||||
set_refresh_menu_state(_(u"Extracting subtitle %(stream_index)s of %(filename)s",
|
||||
stream_index=stream_index,
|
||||
filename=bn))
|
||||
Log.Info(u"Extracting stream %s (%s) of %s", stream_index, str(language), bn)
|
||||
|
||||
out_codec = stream.codec if stream.codec != "mov_text" else "srt"
|
||||
|
||||
args = [
|
||||
config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", out_codec, "-"
|
||||
]
|
||||
|
||||
cmdline = quote_args(args)
|
||||
Log.Debug(u"Calling: %s", cmdline)
|
||||
if mswindows:
|
||||
Log.Debug("MSWindows: Fixing encoding")
|
||||
cmdline = cmdline.encode("mbcs")
|
||||
|
||||
output = None
|
||||
try:
|
||||
output = subprocess.check_output(cmdline, stderr=subprocess.PIPE, shell=True)
|
||||
except:
|
||||
Log.Error("Extraction failed: %s", traceback.format_exc())
|
||||
|
||||
if output:
|
||||
subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None)
|
||||
subtitle.content = output
|
||||
subtitle.provider_name = "embedded"
|
||||
subtitle.id = "stream_%s" % stream_index
|
||||
subtitle.score = 0
|
||||
subtitle.set_encoding("utf-8")
|
||||
|
||||
# fixme: speedup video; only video.name is needed
|
||||
video = scanned_videos.keys()[0]
|
||||
save_successful = save_subtitles(scanned_videos, {video: [subtitle]}, mode="m",
|
||||
set_current=set_current)
|
||||
set_refresh_menu_state(None)
|
||||
|
||||
if save_successful and refresh:
|
||||
refresh_item(rating_key)
|
||||
|
||||
# add item to history
|
||||
item_title = get_title_for_video_metadata(video.plexapi_metadata,
|
||||
add_section_title=False, add_episode_title=True)
|
||||
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
thumb=video.plexapi_metadata["super_thumb"],
|
||||
subtitle=subtitle, mode=extract_mode)
|
||||
history.destroy()
|
||||
|
||||
any_successful = True
|
||||
|
||||
return any_successful
|
||||
@@ -12,10 +12,12 @@ import subprocess
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
|
||||
from babelfish.exceptions import LanguageError
|
||||
|
||||
import chardet
|
||||
|
||||
from bs4 import UnicodeDammit
|
||||
from subzero.language import Language
|
||||
from subzero.language import Language, language_from_stream
|
||||
from subzero.analytics import track_event
|
||||
|
||||
mswindows = (sys.platform == "win32")
|
||||
@@ -152,6 +154,13 @@ def get_plex_item_display_title(item, kind, parent=None, parent_title=None, sect
|
||||
add_section_title=add_section_title)
|
||||
|
||||
|
||||
def series_num(v):
|
||||
try:
|
||||
return int(v)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def get_video_display_title(kind, title, section_title=None, parent_title=None, season=None, episode=None,
|
||||
add_section_title=False):
|
||||
section_add = ""
|
||||
@@ -159,9 +168,12 @@ def get_video_display_title(kind, title, section_title=None, parent_title=None,
|
||||
section_add = ("%s: " % section_title) if section_title else ""
|
||||
|
||||
if kind in ("season", "show") and parent_title:
|
||||
if season and episode:
|
||||
if series_num(season) is not None and series_num(episode) is not None:
|
||||
return '%s%s S%02dE%02d%s' % (section_add, parent_title, season or 0, episode or 0,
|
||||
(", %s" % title if title else ""))
|
||||
elif series_num(season) is not None:
|
||||
return '%s%s S%02d%s' % (section_add, parent_title, season or 0,
|
||||
(", %s" % title if title else ""))
|
||||
|
||||
return '%s%s%s' % (section_add, parent_title, (", %s" % title if title else ""))
|
||||
return "%s%s" % (section_add, title)
|
||||
@@ -264,6 +276,8 @@ def get_item_hints(data):
|
||||
"title": data["original_title"] or data["series"],
|
||||
}
|
||||
)
|
||||
if hints["title"]:
|
||||
hints["title"] = hints["title"].replace(":", "")
|
||||
return hints
|
||||
|
||||
|
||||
@@ -272,6 +286,7 @@ def notify_executable(exe_info, videos, subtitles, storage):
|
||||
"subtitle_language", "subtitle_path", "subtitle_filename", "provider", "score", "storage", "series_id",
|
||||
"series", "title", "section", "filename", "path", "folder", "season_id", "type", "id", "season"
|
||||
)
|
||||
to_clean = ("PYTHONPATH", "PYTHONHOME")
|
||||
exe, arguments = exe_info
|
||||
for video, video_subtitles in subtitles.items():
|
||||
for subtitle in video_subtitles:
|
||||
@@ -307,8 +322,9 @@ def notify_executable(exe_info, videos, subtitles, storage):
|
||||
env = dict(os.environ)
|
||||
|
||||
# clean out any Plex-PYTHONPATH that may bleed through the spawned process
|
||||
if "PYTHONPATH" in env and "plex" in env["PYTHONPATH"].lower():
|
||||
del env["PYTHONPATH"]
|
||||
for v in to_clean:
|
||||
if v in env and "plex" in env[v].lower():
|
||||
del env[v]
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(quote_args([exe] + prepared_arguments), stdout=subprocess.PIPE,
|
||||
@@ -376,6 +392,44 @@ def get_language_from_stream(lang_code):
|
||||
if lang and lang != "xx":
|
||||
# Log.Debug("Found language: %r", lang)
|
||||
return Language.fromietf(lang)
|
||||
elif lang:
|
||||
try:
|
||||
return language_from_stream(lang_code)
|
||||
except LanguageError:
|
||||
pass
|
||||
|
||||
|
||||
def audio_streams_match_languages(video, languages):
|
||||
without_forced = filter(lambda x: not x.forced, languages)
|
||||
if video.audio_languages and without_forced:
|
||||
if Prefs["subtitles.when"] == "Always":
|
||||
return set()
|
||||
|
||||
elif Prefs["subtitles.when"] == "When main audio stream is not Subtitle Language (1)":
|
||||
if video.audio_languages[0] == without_forced[0]:
|
||||
return set(without_forced)
|
||||
|
||||
elif Prefs["subtitles.when"] == "When any audio stream is not Subtitle Language (1)":
|
||||
if without_forced[0] in video.audio_languages:
|
||||
return set(without_forced)
|
||||
|
||||
elif Prefs["subtitles.when"] == "When main audio stream is not any configured language":
|
||||
if video.audio_languages[0] in without_forced:
|
||||
return set(without_forced)
|
||||
|
||||
elif Prefs["subtitles.when"] == "When any audio stream is not any configured language":
|
||||
matching = set(video.audio_languages).intersection(set(without_forced))
|
||||
if matching:
|
||||
return set(without_forced)
|
||||
|
||||
# if Prefs["subtitles.when_forced"] in [
|
||||
# "Always",
|
||||
# "Only for Subtitle Language (1)",
|
||||
# "Only for Subtitle Language (2)",
|
||||
# "Only for Subtitle Language (3)"
|
||||
# ]:
|
||||
|
||||
return set()
|
||||
|
||||
|
||||
def get_language(lang_short):
|
||||
@@ -383,17 +437,10 @@ def get_language(lang_short):
|
||||
|
||||
|
||||
def display_language(l):
|
||||
return _(str(l).lower())
|
||||
|
||||
|
||||
def is_stream_forced(stream):
|
||||
stream_title = getattr(stream, "title", "") or ""
|
||||
forced = getattr(stream, "forced", False)
|
||||
if not forced and stream_title and "forced" in stream_title.strip().lower():
|
||||
forced = True
|
||||
|
||||
return forced
|
||||
if not l:
|
||||
return "Unknown"
|
||||
return _(str(l.basename).lower()) + ((u" (%s)" % _("forced")) if l.forced else "")
|
||||
|
||||
|
||||
class PartUnknownException(Exception):
|
||||
pass
|
||||
pass
|
||||
|
||||
@@ -81,8 +81,11 @@ def local_string_with_optional_format(key, *args, **kwargs):
|
||||
# fixme: may not be the best idea as this evaluates the string early
|
||||
try:
|
||||
return unicode(SmartLocalStringFormatter(plex_i18n_module.LocalString(core, key, Locale.CurrentLocale), args))
|
||||
except TypeError:
|
||||
except (TypeError, ValueError):
|
||||
Log.Exception("Broken translation!")
|
||||
Log.Debug("EN string: %s", plex_i18n_module.LocalString(core, key, "en"))
|
||||
Log.Debug("%s string: %r", Locale.CurrentLocale,
|
||||
unicode(plex_i18n_module.LocalString(core, key, Locale.CurrentLocale)))
|
||||
return unicode(SmartLocalStringFormatter(plex_i18n_module.LocalString(core, key, "en"), args))
|
||||
|
||||
# check string instances for arguments
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
# coding=utf-8
|
||||
|
||||
from subzero.lib.dict import DictProxy
|
||||
from config import config
|
||||
|
||||
|
||||
class IgnoreDict(DictProxy):
|
||||
class ExcludeDict(DictProxy):
|
||||
store = "ignore"
|
||||
|
||||
# single item keys returned by helpers.items.getItems mapped to their parents
|
||||
@@ -62,4 +63,13 @@ class IgnoreDict(DictProxy):
|
||||
return {"sections": [], "series": [], "videos": [], "titles": {}, "seasons": []}
|
||||
|
||||
|
||||
ignore_list = IgnoreDict(Dict)
|
||||
class IncludeDict(ExcludeDict):
|
||||
store = "include"
|
||||
|
||||
|
||||
exclude_list = ExcludeDict(Dict)
|
||||
include_list = IncludeDict(Dict)
|
||||
|
||||
|
||||
def get_decision_list():
|
||||
return include_list if config.include else exclude_list
|
||||
|
||||
@@ -10,10 +10,10 @@ import time
|
||||
|
||||
import datetime
|
||||
|
||||
from ignore import ignore_list
|
||||
from ignore import get_decision_list
|
||||
from helpers import is_recent, get_plex_item_display_title, query_plex, PartUnknownException
|
||||
from lib import Plex, get_intent
|
||||
from config import config, IGNORE_FN
|
||||
from config import config
|
||||
from subliminal_patch.subtitle import ModifiedSubtitle
|
||||
from subzero.modification import registry as mod_registry, SubtitleModifications
|
||||
from socket import timeout
|
||||
@@ -209,10 +209,13 @@ def get_recent_items():
|
||||
available_keys = ("key", "title", "parent_key", "parent_title", "season", "episode", "added", "filename")
|
||||
recent = []
|
||||
|
||||
ref_list = get_decision_list()
|
||||
|
||||
for section in Plex["library"].sections():
|
||||
if section.type not in ("movie", "show") \
|
||||
or section.key not in config.enabled_sections \
|
||||
or section.key in ignore_list.sections:
|
||||
or ((section.key not in ref_list.sections and config.include)
|
||||
or (section.key in ref_list.sections and not config.include)):
|
||||
Log.Debug(u"Skipping section: %s" % section.title)
|
||||
continue
|
||||
|
||||
@@ -229,14 +232,16 @@ def get_recent_items():
|
||||
matches = [m.groupdict() for m in matcher.finditer(response.content)]
|
||||
for match in matches:
|
||||
data = dict((key, match[key] if key in match else None) for key in available_keys)
|
||||
if section.type == "show" and data["parent_key"] in ignore_list.series:
|
||||
if section.type == "show" and ((data["parent_key"] not in ref_list.series and config.include) or
|
||||
(data["parent_key"] in ref_list.series and not config.include)):
|
||||
Log.Debug(u"Skipping series: %s" % data["parent_title"])
|
||||
continue
|
||||
if data["key"] in ignore_list.videos:
|
||||
if (data["key"] not in ref_list.videos and config.include) or \
|
||||
(data["key"] in ref_list.videos and not config.include):
|
||||
Log.Debug(u"Skipping item: %s" % data["title"])
|
||||
continue
|
||||
if is_physically_ignored(data["filename"], plex_item_type):
|
||||
Log.Debug(u"Skipping item: %s" % data["title"])
|
||||
if not is_physically_wanted(data["filename"], plex_item_type):
|
||||
Log.Debug(u"Skipping item (physically not wanted): %s" % data["title"])
|
||||
continue
|
||||
|
||||
if is_recent(int(data["added"])):
|
||||
@@ -257,63 +262,73 @@ def get_all_items(key, base="library", value=None, flat=False):
|
||||
return get_items(key, base=base, value=value, flat=flat)
|
||||
|
||||
|
||||
def is_ignored(rating_key, item=None):
|
||||
def is_wanted(rating_key, item=None):
|
||||
"""
|
||||
check whether an item, its show/season/section is in the soft or the hard ignore list
|
||||
:param rating_key:
|
||||
:param item:
|
||||
:return:
|
||||
"""
|
||||
# item in soft ignore list
|
||||
if ignore_list["videos"] and rating_key in ignore_list["videos"]:
|
||||
Log.Debug("Item %s is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
|
||||
ref_list = get_decision_list()
|
||||
ret_val = ref_list.store == "include"
|
||||
inc_exc_verbose = "exclude" if not ret_val else "include"
|
||||
|
||||
# item in soft include/exclude list
|
||||
if ref_list["videos"] and rating_key in ref_list["videos"]:
|
||||
Log.Debug("Item %s is in the soft %s list" % (rating_key, inc_exc_verbose))
|
||||
return ret_val
|
||||
|
||||
item = item or get_item(rating_key)
|
||||
kind = get_item_kind(item)
|
||||
|
||||
# show in soft ignore list
|
||||
if kind == "Episode" and ignore_list["series"] and item.show.rating_key in ignore_list["series"]:
|
||||
Log.Debug("Item %s's show is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
# show in soft include/exclude list
|
||||
if kind == "Episode" and ref_list["series"] and item.show.rating_key in ref_list["series"]:
|
||||
Log.Debug("Item %s's show is in the soft %s list" % (rating_key, inc_exc_verbose))
|
||||
return ret_val
|
||||
|
||||
# season in soft ignore list
|
||||
if kind == "Episode" and ignore_list["seasons"] and item.season.rating_key in ignore_list["seasons"]:
|
||||
Log.Debug("Item %s's season is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
# season in soft include/exclude list
|
||||
if kind == "Episode" and ref_list["seasons"] and item.season.rating_key in ref_list["seasons"]:
|
||||
Log.Debug("Item %s's season is in the soft %s list" % (rating_key, inc_exc_verbose))
|
||||
return ret_val
|
||||
|
||||
# section in soft ignore list
|
||||
if ignore_list["sections"] and item.section.key in ignore_list["sections"]:
|
||||
Log.Debug("Item %s's section is in the soft ignore list" % rating_key)
|
||||
return True
|
||||
# section in soft include/exclude list
|
||||
if ref_list["sections"] and item.section.key in ref_list["sections"]:
|
||||
Log.Debug("Item %s's section is in the soft %s list" % (rating_key, inc_exc_verbose))
|
||||
return ret_val
|
||||
|
||||
# physical/path ignore
|
||||
if config.ignore_sz_files or config.ignore_paths:
|
||||
# physical/path include/exclude
|
||||
if config.include_exclude_sz_files or config.include_exclude_paths:
|
||||
for media in item.media:
|
||||
for part in media.parts:
|
||||
if is_physically_ignored(part.file, kind):
|
||||
return True
|
||||
return is_physically_wanted(part.file, kind)
|
||||
|
||||
return False
|
||||
return not ret_val
|
||||
|
||||
|
||||
def is_physically_ignored(fn, kind):
|
||||
if config.ignore_sz_files or config.ignore_paths:
|
||||
def is_physically_wanted(fn, kind):
|
||||
if config.include_exclude_sz_files or config.include_exclude_paths:
|
||||
# normally check current item folder and the library
|
||||
check_ignore_paths = [".", "../"]
|
||||
check_paths = [".", "../"]
|
||||
if kind == "Episode":
|
||||
# series/episode, we've got a season folder here, also
|
||||
check_ignore_paths.append("../../")
|
||||
check_paths.append("../../")
|
||||
|
||||
if config.ignore_paths and config.is_path_ignored(fn):
|
||||
Log.Debug("Item %s's path is manually ignored" % fn)
|
||||
wanted_results = []
|
||||
if config.include_exclude_sz_files:
|
||||
for sub_path in check_paths:
|
||||
wanted_results.append(config.is_physically_wanted(
|
||||
os.path.normpath(os.path.join(os.path.dirname(fn), sub_path)), fn))
|
||||
|
||||
if config.include_exclude_paths:
|
||||
wanted_results.append(config.is_path_wanted(fn))
|
||||
|
||||
if config.include and any(wanted_results):
|
||||
return True
|
||||
elif not config.include and not all(wanted_results):
|
||||
return False
|
||||
|
||||
if config.ignore_sz_files:
|
||||
for sub_path in check_ignore_paths:
|
||||
if config.is_physically_ignored(os.path.normpath(os.path.join(os.path.dirname(fn), sub_path))):
|
||||
Log.Debug("An ignore file exists in either the items or its parent folders")
|
||||
return True
|
||||
return not config.include
|
||||
|
||||
|
||||
def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, parent_rating_key=None):
|
||||
@@ -334,11 +349,13 @@ def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, paren
|
||||
refresh = [item.rating_key for item in list(Plex["library/metadata"].children(int(rating_key)))]
|
||||
|
||||
multiple = len(refresh) > 1
|
||||
wait = config.advanced.get("refresh_after_called", 5)
|
||||
Thread.Sleep(wait)
|
||||
for key in refresh:
|
||||
Log.Info("%s item %s", "Refreshing" if not force else "Forced-refreshing", key)
|
||||
Plex["library/metadata"].refresh(key)
|
||||
if multiple:
|
||||
Thread.Sleep(10.0)
|
||||
Thread.Sleep(wait)
|
||||
|
||||
|
||||
def get_current_sub(rating_key, part_id, language, plex_item=None):
|
||||
@@ -408,8 +425,9 @@ def save_stored_sub(stored_subtitle, rating_key, part_id, language, item_type, p
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
|
||||
stored_subtitle.mods = subtitle.mods
|
||||
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
|
||||
", ".join(stored_subtitle.mods) if stored_subtitle.mods else "none")
|
||||
", ".join(subtitle.mods) if subtitle.mods else "none")
|
||||
except:
|
||||
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
|
||||
|
||||
|
||||
@@ -7,13 +7,15 @@ import helpers
|
||||
import subtitlehelpers
|
||||
|
||||
from config import config as sz_config
|
||||
from subzero.language import ENDSWITH_LANGUAGECODE_RE
|
||||
|
||||
|
||||
SECONDARY_TAGS = ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom', 'hi', 'cc', 'sdh']
|
||||
|
||||
|
||||
def find_subtitles(part):
|
||||
def find_subtitles(part, ignore_parts_cleanup=None):
|
||||
lang_sub_map = {}
|
||||
ignore_parts_cleanup = ignore_parts_cleanup or []
|
||||
part_filename = helpers.unicodize(part.file)
|
||||
part_basename = os.path.splitext(os.path.basename(part_filename))[0]
|
||||
use_filesystem = helpers.cast_bool(Prefs["subtitles.save.filesystem"])
|
||||
@@ -21,6 +23,7 @@ def find_subtitles(part):
|
||||
if Prefs["subtitles.save.subFolder.Custom"] else None
|
||||
|
||||
use_sub_subfolder = Prefs["subtitles.save.subFolder"] != "current folder" and not sub_dir_custom
|
||||
autoclean = helpers.cast_bool(Prefs["subtitles.autoclean"])
|
||||
sub_subfolder = None
|
||||
paths = [os.path.dirname(part_filename)] if use_filesystem else []
|
||||
|
||||
@@ -88,7 +91,10 @@ def find_subtitles(part):
|
||||
media_files.append(root)
|
||||
|
||||
# cleanup any leftover subtitle if no associated media file was found
|
||||
if use_filesystem and helpers.cast_bool(Prefs["subtitles.autoclean"]):
|
||||
if autoclean and ignore_parts_cleanup:
|
||||
Log.Info("Skipping housekeeping of: %s", paths)
|
||||
|
||||
if use_filesystem and autoclean and not ignore_parts_cleanup:
|
||||
for path in paths:
|
||||
# only housekeep in sub_subfolder if sub_subfolder is used
|
||||
if use_sub_subfolder and path != sub_subfolder and not sz_config.advanced.thorough_cleaning:
|
||||
@@ -120,7 +126,7 @@ def find_subtitles(part):
|
||||
root = split_tag[0]
|
||||
|
||||
# get associated media file name without language
|
||||
sub_fn = subtitlehelpers.ENDSWITH_LANGUAGECODE_RE.sub("", root)
|
||||
sub_fn = ENDSWITH_LANGUAGECODE_RE.sub("", root)
|
||||
|
||||
# subtitle basename and basename without possible language tag not found in collected
|
||||
# media files? kill.
|
||||
|
||||
@@ -8,6 +8,7 @@ from babelfish import LanguageReverseError
|
||||
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
from support.helpers import get_plex_item_display_title, cast_bool, get_language_from_stream
|
||||
from support.plex_media import is_stream_forced, update_stream_info
|
||||
from support.items import get_item
|
||||
from support.lib import Plex
|
||||
from support.storage import get_subtitle_storage
|
||||
@@ -31,11 +32,11 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
subtitle_target_dir, tdir_is_absolute = config.subtitle_sub_dir
|
||||
|
||||
missing = set()
|
||||
languages_set = set([Language.fromietf(str(l)) for l in languages])
|
||||
languages_set = set([Language.rebuild(l) for l in languages])
|
||||
for media in item.media:
|
||||
existing_subs = {"internal": [], "external": [], "own_external": [], "count": 0}
|
||||
for part in media.parts:
|
||||
|
||||
update_stream_info(part)
|
||||
# did we already download an external subtitle before?
|
||||
if subtitle_target_dir and stored_subs:
|
||||
for language in languages_set:
|
||||
@@ -79,6 +80,7 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
|
||||
for stream in part.streams:
|
||||
if stream.stream_type == 3:
|
||||
is_forced = is_stream_forced(stream)
|
||||
if stream.index:
|
||||
key = "internal"
|
||||
else:
|
||||
@@ -89,7 +91,7 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not stream.language_code and config.treat_und_as_first:
|
||||
lang = Language.fromietf(str(list(config.lang_list)[0]))
|
||||
lang = Language.rebuild(list(config.lang_list)[0])
|
||||
|
||||
# we can't parse empty language codes
|
||||
elif not stream.language_code or not stream.codec:
|
||||
@@ -101,7 +103,7 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
lang = get_language_from_stream(stream.language_code)
|
||||
if not lang:
|
||||
if config.treat_und_as_first:
|
||||
lang = Language.fromietf(str(list(config.lang_list)[0]))
|
||||
lang = Language.rebuild(list(config.lang_list)[0])
|
||||
else:
|
||||
continue
|
||||
|
||||
@@ -110,10 +112,11 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
|
||||
if lang:
|
||||
# Log.Debug("Found babelfish language: %r", lang)
|
||||
lang.forced = is_forced
|
||||
existing_subs[key].append(lang)
|
||||
existing_subs["count"] = existing_subs["count"] + 1
|
||||
|
||||
missing_from_part = set([Language.fromietf(str(l)) for l in languages])
|
||||
missing_from_part = set([Language.rebuild(l) for l in languages])
|
||||
if existing_subs["count"]:
|
||||
|
||||
# fixme: this is actually somewhat broken with IETF, as Plex doesn't store the country portion
|
||||
@@ -123,7 +126,7 @@ def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_t
|
||||
+ (existing_subs["external"] if external else [])
|
||||
+ existing_subs["own_external"])
|
||||
|
||||
check_languages = set([Language.fromietf(str(l)) for l in languages])
|
||||
check_languages = set([Language.rebuild(l) for l in languages])
|
||||
alpha3_map = {}
|
||||
if config.ietf_as_alpha3:
|
||||
for language in existing_flat:
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
# coding=utf-8
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
import helpers
|
||||
from items import get_item
|
||||
from subzero.language import Language
|
||||
from lib import Plex
|
||||
from support.config import TEXT_SUBTITLE_EXTS, config
|
||||
|
||||
@@ -25,6 +27,9 @@ tvdb_guid_identifier = "com.plexapp.agents.thetvdb://"
|
||||
|
||||
|
||||
def get_plexapi_stream_info(plex_item, part_id=None):
|
||||
if not plex_item:
|
||||
return
|
||||
|
||||
d = {"stream": {}}
|
||||
data = d["stream"]
|
||||
|
||||
@@ -99,6 +104,9 @@ def media_to_videos(media, kind="series"):
|
||||
plex_episode = get_item(ep.id)
|
||||
stream_info = get_plexapi_stream_info(plex_episode)
|
||||
|
||||
if not stream_info:
|
||||
continue
|
||||
|
||||
for item in media.seasons[season].episodes[episode].items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
@@ -107,6 +115,7 @@ def media_to_videos(media, kind="series"):
|
||||
"title": ep.title,
|
||||
"series": media.title, "id": ep.id, "year": year,
|
||||
"series_id": media.id,
|
||||
"super_thumb": plex_item.thumb,
|
||||
"season_id": season_object.id,
|
||||
"imdb_id": None, "series_tvdb_id": series_tvdb_id,
|
||||
"tvdb_id": tvdb_id,
|
||||
@@ -119,21 +128,24 @@ def media_to_videos(media, kind="series"):
|
||||
)
|
||||
else:
|
||||
stream_info = get_plexapi_stream_info(plex_item)
|
||||
imdb_id = None
|
||||
if imdb_guid_identifier in media.guid:
|
||||
imdb_id = media.guid[len(imdb_guid_identifier):].split("?")[0]
|
||||
for item in media.items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
get_metadata_dict(plex_item, part, dict(stream_info, **{"plex_part": part, "type": "movie",
|
||||
"title": media.title, "id": media.id,
|
||||
"series_id": None, "year": year,
|
||||
"season_id": None, "imdb_id": imdb_id,
|
||||
"original_title": original_title,
|
||||
"series_tvdb_id": None, "tvdb_id": None,
|
||||
"section": plex_item.section.title})
|
||||
)
|
||||
)
|
||||
|
||||
if stream_info:
|
||||
imdb_id = None
|
||||
if imdb_guid_identifier in media.guid:
|
||||
imdb_id = media.guid[len(imdb_guid_identifier):].split("?")[0]
|
||||
for item in media.items:
|
||||
for part in item.parts:
|
||||
videos.append(
|
||||
get_metadata_dict(plex_item, part, dict(stream_info, **{"plex_part": part, "type": "movie",
|
||||
"title": media.title, "id": media.id,
|
||||
"super_thumb": plex_item.thumb,
|
||||
"series_id": None, "year": year,
|
||||
"season_id": None, "imdb_id": imdb_id,
|
||||
"original_title": original_title,
|
||||
"series_tvdb_id": None, "tvdb_id": None,
|
||||
"section": plex_item.section.title})
|
||||
)
|
||||
)
|
||||
return videos
|
||||
|
||||
|
||||
@@ -171,37 +183,100 @@ def get_all_parts(plex_item):
|
||||
return parts
|
||||
|
||||
|
||||
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, get_forced=None):
|
||||
def update_stream_info(part):
|
||||
try:
|
||||
return update_stream_info_(part)
|
||||
except:
|
||||
Log.Exception("Getting Mediainfo failed for: %s", part.file)
|
||||
|
||||
|
||||
def update_stream_info_(part):
|
||||
if config.mediainfo_bin and part.container == "mp4":
|
||||
cmdline = '%s --Inform="Text;-%%ID%%_%%Title%%" %s' % (config.mediainfo_bin, helpers.quote(part.file))
|
||||
result = subprocess.check_output(cmdline, stderr=subprocess.PIPE, shell=True)
|
||||
if result:
|
||||
try:
|
||||
stream_titles = {}
|
||||
for pair in result[1:].split("-"):
|
||||
sid, title = pair.split("_")
|
||||
stream_titles[int(sid.strip())] = title.strip()
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
filled = []
|
||||
for stream in part.streams:
|
||||
if stream.index is None:
|
||||
Log.Debug("Found stream with no index: %r", stream)
|
||||
|
||||
index = stream.index+1 if stream.index is not None else 1
|
||||
if index in stream_titles:
|
||||
stream.title = stream_titles[index]
|
||||
filled.append(index-1)
|
||||
if filled:
|
||||
Log.Debug("Filled missing MP4 stream title info for streams: %s", filled)
|
||||
|
||||
|
||||
def is_stream_forced(stream):
|
||||
stream_title = getattr(stream, "title", "") or ""
|
||||
forced = getattr(stream, "forced", False)
|
||||
if not forced and stream_title and "forced" in stream_title.strip().lower():
|
||||
forced = True
|
||||
|
||||
return forced
|
||||
|
||||
|
||||
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, skip_unknown=False):
|
||||
streams = []
|
||||
streams_unknown = []
|
||||
all_streams = []
|
||||
has_unknown = False
|
||||
found_requested_language = False
|
||||
update_stream_info(part)
|
||||
for stream in part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
|
||||
is_forced = is_stream_forced(stream)
|
||||
language = helpers.get_language_from_stream(stream.language_code)
|
||||
if language:
|
||||
language = Language.rebuild(language, forced=is_forced)
|
||||
|
||||
is_unknown = False
|
||||
found_requested_language = requested_language and requested_language == language
|
||||
is_forced = helpers.is_stream_forced(stream)
|
||||
stream_data = None
|
||||
|
||||
if get_forced is not None:
|
||||
if (get_forced and not is_forced) or (not get_forced and is_forced):
|
||||
continue
|
||||
|
||||
if not language and config.treat_und_as_first:
|
||||
if not language:
|
||||
# only consider first unknown subtitle stream
|
||||
if has_unknown and skip_duplicate_unknown:
|
||||
continue
|
||||
if config.treat_und_as_first:
|
||||
if has_unknown and skip_duplicate_unknown:
|
||||
Log.Debug("skipping duplicate unknown")
|
||||
continue
|
||||
|
||||
language = list(config.lang_list)[0]
|
||||
language = Language.rebuild(list(config.lang_list)[0], forced=is_forced)
|
||||
else:
|
||||
language = None
|
||||
is_unknown = True
|
||||
has_unknown = True
|
||||
stream_data = {"stream": stream, "is_unknown": is_unknown, "language": language,
|
||||
"is_forced": is_forced}
|
||||
streams_unknown.append(stream_data)
|
||||
|
||||
if not requested_language or found_requested_language or has_unknown:
|
||||
streams.append({"stream": stream, "is_unknown": is_unknown, "language": language,
|
||||
"is_forced": is_forced})
|
||||
if not requested_language or found_requested_language:
|
||||
stream_data = {"stream": stream, "is_unknown": is_unknown, "language": language,
|
||||
"is_forced": is_forced}
|
||||
streams.append(stream_data)
|
||||
|
||||
if found_requested_language:
|
||||
break
|
||||
|
||||
if stream_data:
|
||||
all_streams.append(stream_data)
|
||||
|
||||
if requested_language:
|
||||
if streams_unknown and not found_requested_language and not skip_unknown:
|
||||
streams = streams_unknown
|
||||
else:
|
||||
streams = all_streams
|
||||
|
||||
return streams
|
||||
|
||||
|
||||
@@ -236,6 +311,9 @@ def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
|
||||
|
||||
stream_info = get_plexapi_stream_info(plex_item, part_id)
|
||||
|
||||
if not stream_info:
|
||||
return
|
||||
|
||||
# get normalized metadata
|
||||
# fixme: duplicated logic of media_to_videos
|
||||
if item_type == "episode":
|
||||
@@ -256,6 +334,7 @@ def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
|
||||
"imdb_id": None,
|
||||
"year": year,
|
||||
"tvdb_id": tvdb_id,
|
||||
"super_thumb": plex_item.show.thumb,
|
||||
"series_tvdb_id": series_tvdb_id,
|
||||
"original_title": original_title,
|
||||
"season": plex_item.season.index,
|
||||
@@ -275,6 +354,7 @@ def get_plex_metadata(rating_key, part_id, item_type, plex_item=None):
|
||||
"imdb_id": imdb_id,
|
||||
"year": plex_item.year,
|
||||
"tvdb_id": None,
|
||||
"super_thumb": plex_item.thumb,
|
||||
"series_tvdb_id": None,
|
||||
"original_title": original_title,
|
||||
"season": None,
|
||||
@@ -355,3 +435,4 @@ class PMSMediaProxy(object):
|
||||
|
||||
m = m.children[0]
|
||||
return parts
|
||||
|
||||
|
||||
@@ -4,15 +4,15 @@ import helpers
|
||||
from babelfish.exceptions import LanguageError
|
||||
|
||||
from support.lib import Plex, get_intent
|
||||
from support.plex_media import get_stream_fps
|
||||
from support.plex_media import get_stream_fps, is_stream_forced, update_stream_info
|
||||
from support.storage import get_subtitle_storage
|
||||
from support.config import config, TEXT_SUBTITLE_EXTS
|
||||
|
||||
from support.subtitlehelpers import get_subtitles_from_metadata
|
||||
from subzero.video import parse_video, set_existing_languages
|
||||
from subzero.language import language_from_stream
|
||||
from subzero.language import language_from_stream, Language
|
||||
|
||||
|
||||
def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False):
|
||||
def prepare_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False):
|
||||
"""
|
||||
returnes a subliminal/guessit-refined parsed video
|
||||
:param pms_video_info:
|
||||
@@ -29,7 +29,7 @@ def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, pr
|
||||
if ignore_all:
|
||||
Log.Debug("Force refresh intended.")
|
||||
|
||||
Log.Debug("Scanning video: %s, external_subtitles=%s, embedded_subtitles=%s" % (
|
||||
Log.Debug("Detecting streams: %s, account_for_external_subtitles=%s, account_for_embedded_subtitles=%s" % (
|
||||
plex_part.file, external_subtitles, embedded_subtitles))
|
||||
|
||||
known_embedded = []
|
||||
@@ -44,36 +44,74 @@ def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, pr
|
||||
|
||||
# embedded subtitles
|
||||
# fixme: skip the whole scanning process if known_embedded == wanted languages?
|
||||
audio_languages = []
|
||||
if plexpy_part:
|
||||
if embedded_subtitles:
|
||||
for stream in plexpy_part.streams:
|
||||
# subtitle stream
|
||||
if stream.stream_type == 3:
|
||||
is_forced = helpers.is_stream_forced(stream)
|
||||
update_stream_info(plexpy_part)
|
||||
for stream in plexpy_part.streams:
|
||||
if stream.stream_type == 2:
|
||||
lang = None
|
||||
try:
|
||||
lang = language_from_stream(stream.language_code)
|
||||
except LanguageError:
|
||||
Log.Info("Couldn't detect embedded audio stream language: %s", stream.language_code)
|
||||
|
||||
if (config.forced_only and is_forced) or \
|
||||
(not config.forced_only and not is_forced):
|
||||
# treat unknown language as lang1?
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = Language.rebuild(list(config.lang_list)[0])
|
||||
Log.Info("Assuming language %s for audio stream: %s", lang, getattr(stream, "index", None))
|
||||
|
||||
# embedded subtitle
|
||||
# fixme: tap into external subtitles here instead of scanning for ourselves later?
|
||||
if stream.codec and getattr(stream, "index", None):
|
||||
if config.exotic_ext or stream.codec.lower() in config.text_based_formats:
|
||||
lang = None
|
||||
try:
|
||||
lang = language_from_stream(stream.language_code)
|
||||
except LanguageError:
|
||||
Log.Debug("Couldn't detect embedded subtitle stream language: %s", stream.language_code)
|
||||
audio_languages.append(lang)
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = list(config.lang_list)[0]
|
||||
# subtitle stream
|
||||
elif stream.stream_type == 3 and embedded_subtitles:
|
||||
is_forced = is_stream_forced(stream)
|
||||
|
||||
if lang:
|
||||
known_embedded.append(lang.alpha3)
|
||||
if ((config.forced_only or config.forced_also) and is_forced) or not is_forced:
|
||||
# embedded subtitle
|
||||
# fixme: tap into external subtitles here instead of scanning for ourselves later?
|
||||
if stream.codec and getattr(stream, "index", None):
|
||||
if config.exotic_ext or stream.codec.lower() in config.text_based_formats:
|
||||
lang = None
|
||||
try:
|
||||
lang = language_from_stream(stream.language_code)
|
||||
except LanguageError:
|
||||
Log.Info("Couldn't detect embedded subtitle stream language: %s", stream.language_code)
|
||||
|
||||
# treat unknown language as lang1?
|
||||
if not lang and config.treat_und_as_first:
|
||||
lang = Language.rebuild(list(config.lang_list)[0])
|
||||
Log.Info("Assuming language %s for subtitle stream: %s", lang,
|
||||
getattr(stream, "index", None))
|
||||
|
||||
if lang:
|
||||
if is_forced:
|
||||
lang.forced = True
|
||||
known_embedded.append(lang)
|
||||
else:
|
||||
Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)
|
||||
|
||||
Log.Debug("Known embedded: %r", known_embedded)
|
||||
# metadata subtitles
|
||||
known_metadata_subs = set()
|
||||
meta_subs = get_subtitles_from_metadata(plex_part)
|
||||
for language, subList in meta_subs.iteritems():
|
||||
try:
|
||||
lang = Language.fromietf(Locale.Language.Match(language))
|
||||
except LanguageError:
|
||||
if config.treat_und_as_first:
|
||||
lang = Language.rebuild(list(config.lang_list)[0])
|
||||
else:
|
||||
continue
|
||||
|
||||
if subList:
|
||||
for key in subList:
|
||||
if key.startswith("subzero_md_forced"):
|
||||
lang = Language.rebuild(lang, forced=True)
|
||||
|
||||
known_metadata_subs.add(lang)
|
||||
Log.Debug("Found metadata subtitle %r:%s for %s", lang, key, plex_part.file)
|
||||
|
||||
Log.Debug("Known metadata subtitles: %r", known_metadata_subs)
|
||||
Log.Debug("Known embedded subtitles: %r", known_embedded)
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load(rating_key)
|
||||
@@ -84,11 +122,17 @@ def scan_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, pr
|
||||
video = parse_video(plex_part.file, hints, skip_hashing=config.low_impact_mode or skip_hashing,
|
||||
providers=providers)
|
||||
|
||||
# set stream languages
|
||||
if audio_languages:
|
||||
video.audio_languages = audio_languages
|
||||
Log.Info("Found audio streams: %s" % ", ".join([str(l) for l in audio_languages]))
|
||||
|
||||
if not ignore_all:
|
||||
set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles,
|
||||
embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
|
||||
forced_only=config.forced_only, stored_subs=stored_subs, languages=config.lang_list,
|
||||
only_one=config.only_one)
|
||||
stored_subs=stored_subs, languages=config.lang_list,
|
||||
only_one=config.only_one, known_metadata_subs=known_metadata_subs,
|
||||
match_strictness=config.ext_match_strictness)
|
||||
|
||||
# add video fps info
|
||||
video.fps = plex_part.fps
|
||||
@@ -115,9 +159,9 @@ def scan_videos(videos, ignore_all=False, providers=None, skip_hashing=False):
|
||||
hints = helpers.get_item_hints(video)
|
||||
video["plex_part"].fps = get_stream_fps(video["plex_part"].streams)
|
||||
p = providers or config.get_providers(media_type="series" if video["type"] == "episode" else "movies")
|
||||
scanned_video = scan_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
|
||||
rating_key=video["id"], providers=p,
|
||||
skip_hashing=skip_hashing)
|
||||
scanned_video = prepare_video(video, ignore_all=force_refresh or ignore_all, hints=hints,
|
||||
rating_key=video["id"], providers=p,
|
||||
skip_hashing=skip_hashing)
|
||||
|
||||
if not scanned_video:
|
||||
continue
|
||||
|
||||
@@ -33,7 +33,7 @@ def store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage_ty
|
||||
video_id = str(video.id)
|
||||
plex_item = get_item(video_id)
|
||||
if not plex_item:
|
||||
Log.Warning("Plex item not found: %s", video_id)
|
||||
Log.Warn("Plex item not found: %s", video_id)
|
||||
continue
|
||||
|
||||
metadata = video.plexapi_metadata
|
||||
@@ -113,8 +113,7 @@ def get_target_folder(file_path):
|
||||
return fld
|
||||
|
||||
|
||||
def save_subtitles_to_file(subtitles, tags=None, forced_tag=None):
|
||||
forced_tag = forced_tag or config.forced_only
|
||||
def save_subtitles_to_file(subtitles, tags=None):
|
||||
for video, video_subtitles in subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
@@ -126,12 +125,12 @@ def save_subtitles_to_file(subtitles, tags=None, forced_tag=None):
|
||||
|
||||
fld = get_target_folder(file_path)
|
||||
subliminal_save_subtitles(file_path, video_subtitles, directory=fld, single=cast_bool(Prefs['subtitles.only_one']),
|
||||
chmod=config.chmod, forced_tag=forced_tag, path_decoder=force_unicode,
|
||||
chmod=config.chmod, path_decoder=force_unicode,
|
||||
debug_mods=config.debug_mods, formats=config.subtitle_formats, tags=tags)
|
||||
return True
|
||||
|
||||
|
||||
def save_subtitles_to_metadata(videos, subtitles, is_forced=False):
|
||||
def save_subtitles_to_metadata(videos, subtitles):
|
||||
for video, video_subtitles in subtitles.items():
|
||||
mediaPart = videos[video]
|
||||
for subtitle in video_subtitles:
|
||||
@@ -143,15 +142,28 @@ def save_subtitles_to_metadata(videos, subtitles, is_forced=False):
|
||||
mp = PMSMediaProxy(video.id).get_part(mediaPart.id)
|
||||
else:
|
||||
mp = mediaPart
|
||||
pm = Proxy.Media(content, ext="srt", forced="1" if is_forced else None)
|
||||
pm = Proxy.Media(content, ext="srt", forced="1" if subtitle.language.forced else None)
|
||||
new_key = "subzero_md" + ("_forced" if subtitle.language.forced else "")
|
||||
lang = Locale.Language.Match(subtitle.language.alpha2)
|
||||
mp.subtitles[lang].validate_keys({})
|
||||
mp.subtitles[lang]["subzero"] = pm
|
||||
|
||||
for key, proxy in getattr(mp.subtitles[lang], "_proxies").iteritems():
|
||||
if not proxy or not len(proxy) >= 5:
|
||||
Log.Debug("Can't parse metadata: %s" % repr(proxy))
|
||||
continue
|
||||
if proxy[0] == "Media":
|
||||
if not key.startswith("subzero_"):
|
||||
if key == "subzero":
|
||||
Log.Debug("Removing legacy metadata subtitle for %s", lang)
|
||||
del mp.subtitles[lang][key]
|
||||
Log.Debug("Existing metadata subtitle for %s: %s", lang, key)
|
||||
|
||||
Log.Debug("Adding metadata sub for %s: %s", lang, subtitle)
|
||||
mp.subtitles[lang][new_key] = pm
|
||||
return True
|
||||
|
||||
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None,
|
||||
set_current=True, is_forced=False):
|
||||
set_current=True):
|
||||
"""
|
||||
|
||||
:param set_current: save the subtitle as the current one
|
||||
@@ -186,7 +198,7 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_
|
||||
if save_to_fs:
|
||||
try:
|
||||
Log.Debug("Using filesystem as subtitle storage")
|
||||
save_subtitles_to_file(downloaded_subtitles, forced_tag=is_forced)
|
||||
save_subtitles_to_file(downloaded_subtitles)
|
||||
except OSError:
|
||||
if cast_bool(Prefs["subtitles.save.metadata_fallback"]):
|
||||
meta_fallback = True
|
||||
@@ -201,13 +213,12 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_
|
||||
Log.Debug("Using metadata as subtitle storage, because filesystem storage failed")
|
||||
else:
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles,
|
||||
is_forced=is_forced)
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
|
||||
|
||||
if not bare_save and save_successful and config.notify_executable:
|
||||
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
|
||||
|
||||
if not bare_save and save_successful or not set_current:
|
||||
if (not bare_save and save_successful) or not set_current:
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode, set_current=set_current)
|
||||
|
||||
return save_successful
|
||||
|
||||
@@ -5,6 +5,7 @@ import helpers
|
||||
|
||||
from config import config, SUBTITLE_EXTS, TEXT_SUBTITLE_EXTS
|
||||
from bs4 import UnicodeDammit
|
||||
from subzero.language import match_ietf_language
|
||||
|
||||
|
||||
class SubtitleHelper(object):
|
||||
@@ -85,19 +86,6 @@ class VobSubSubtitleHelper(SubtitleHelper):
|
||||
#####################################################################################################################
|
||||
|
||||
|
||||
IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
|
||||
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
|
||||
|
||||
|
||||
def match_ietf_language(s):
|
||||
language_match = re.match(".+\.([^\.]+)$" if not helpers.cast_bool(Prefs["subtitles.language.ietf_display"])
|
||||
else IETF_MATCH, s)
|
||||
if language_match and len(language_match.groups()) == 1:
|
||||
language = language_match.groups()[0]
|
||||
return language
|
||||
return s
|
||||
|
||||
|
||||
class DefaultSubtitleHelper(SubtitleHelper):
|
||||
@classmethod
|
||||
def is_helper_for(cls, filename):
|
||||
@@ -133,7 +121,7 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
# Attempt to extract the language from the filename (e.g. Avatar (2009).eng)
|
||||
# IETF support thanks to
|
||||
# https://github.com/hpsbranco/LocalMedia.bundle/commit/4fad9aefedece78a1fa96401304351347f644369
|
||||
lang_part = match_ietf_language(file)
|
||||
lang_part = match_ietf_language(file, ietf=helpers.cast_bool(Prefs["subtitles.language.ietf_display"]))
|
||||
if lang_part != file:
|
||||
language = Locale.Language.Match(lang_part)
|
||||
elif config.only_one:
|
||||
@@ -174,10 +162,11 @@ class DefaultSubtitleHelper(SubtitleHelper):
|
||||
|
||||
Log('Found subtitle file: ' + self.filename + ' language: ' + language + ' codec: ' + str(
|
||||
codec) + ' format: ' + str(format) + ' default: ' + default + ' forced: ' + forced)
|
||||
part.subtitles[language][basename] = Proxy.LocalFile(self.filename, codec=codec, format=format, default=default,
|
||||
key = ("subzero_ex" + "_forced" if forced else "") + basename
|
||||
part.subtitles[language][key] = Proxy.LocalFile(self.filename, codec=codec, format=format, default=default,
|
||||
forced=forced)
|
||||
|
||||
lang_sub_map[language] = [basename]
|
||||
lang_sub_map[language] = [key]
|
||||
return lang_sub_map
|
||||
|
||||
|
||||
@@ -194,9 +183,12 @@ def get_subtitles_from_metadata(part):
|
||||
p_type = proxy[0]
|
||||
|
||||
if p_type == "Media":
|
||||
if not key.startswith("subzero"):
|
||||
continue
|
||||
|
||||
# metadata subtitle
|
||||
Log.Debug(u"Found metadata subtitle: %s, %s" % (language, repr(proxy)))
|
||||
subs[language] = [key]
|
||||
#Log.Debug(u"Found metadata subtitle: %s, %s, %s" % (language, key, repr(proxy)))
|
||||
subs[language].append(key)
|
||||
return subs
|
||||
|
||||
|
||||
|
||||
@@ -16,9 +16,10 @@ from missing_subtitles import items_get_all_missing_subs, refresh_item
|
||||
from scheduler import scheduler
|
||||
from storage import save_subtitles, get_subtitle_storage
|
||||
from support.config import config
|
||||
from support.items import get_recent_items, get_item, is_ignored, get_item_title
|
||||
from support.items import get_recent_items, get_item, is_wanted, get_item_title
|
||||
from support.helpers import track_usage, get_title_for_video_metadata, cast_bool, PartUnknownException
|
||||
from support.plex_media import get_plex_metadata
|
||||
from support.extract import agent_extract_embedded
|
||||
from support.scanning import scan_videos
|
||||
from support.i18n import _
|
||||
from download import download_best_subtitles, pre_download_hook, post_download_hook, language_hook
|
||||
@@ -165,17 +166,22 @@ class SubtitleListingMixin(object):
|
||||
can_verify_series = False
|
||||
|
||||
if can_verify_series and not {"series", "season", "episode"}.issubset(matches):
|
||||
Log.Debug(u"%s: Skipping %s, because it doesn't match our series/episode", self.name, s)
|
||||
continue
|
||||
if "series" not in matches:
|
||||
s.wrong_series = True
|
||||
else:
|
||||
s.wrong_season_ep = True
|
||||
|
||||
orig_matches = matches.copy()
|
||||
score, score_without_hash = compute_score(matches, s, video, hearing_impaired=use_hearing_impaired)
|
||||
|
||||
unsorted_subtitles.append(
|
||||
(s, compute_score(matches, s, video, hearing_impaired=use_hearing_impaired), matches))
|
||||
scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1), reverse=True)
|
||||
(s, score, score_without_hash, matches, orig_matches))
|
||||
scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1, 2), reverse=True)
|
||||
|
||||
subtitles = []
|
||||
for subtitle, score, matches in scored_subtitles:
|
||||
for subtitle, score, score_without_hash, matches, orig_matches in scored_subtitles:
|
||||
# check score
|
||||
if score < min_score:
|
||||
if score < min_score and not subtitle.wrong_series:
|
||||
Log.Info(u'%s: Score %d is below min_score (%d)', self.name, score, min_score)
|
||||
continue
|
||||
subtitle.score = score
|
||||
@@ -235,6 +241,7 @@ class DownloadSubtitleMixin(object):
|
||||
item_title = get_title_for_video_metadata(metadata, add_section_title=False)
|
||||
history = get_history()
|
||||
history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
|
||||
thumb=video.plexapi_metadata["super_thumb"],
|
||||
subtitle=subtitle,
|
||||
mode=mode)
|
||||
history.destroy()
|
||||
@@ -421,7 +428,7 @@ class SearchAllRecentlyAddedMissing(Task):
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
if is_ignored(video_id, item=plex_item):
|
||||
if not is_wanted(video_id, item=plex_item):
|
||||
skip_item()
|
||||
continue
|
||||
|
||||
@@ -446,6 +453,17 @@ class SearchAllRecentlyAddedMissing(Task):
|
||||
Log.Debug(u"%s: Looking for missing subtitles: %s", self.name, get_item_title(plex_item))
|
||||
scanned_parts = scan_videos([metadata], providers=providers)
|
||||
|
||||
# auto extract embedded
|
||||
if config.embedded_auto_extract:
|
||||
if config.plex_transcoder:
|
||||
ts = agent_extract_embedded(scanned_parts, set_as_existing=True)
|
||||
if ts:
|
||||
Log.Debug("Waiting for %i extraction threads to finish" % len(ts))
|
||||
for t in ts:
|
||||
t.join()
|
||||
else:
|
||||
Log.Warn("Plex Transcoder not found, can't auto extract")
|
||||
|
||||
downloaded_subtitles = download_best_subtitles(scanned_parts, min_score=min_score,
|
||||
providers=providers)
|
||||
hit_providers = downloaded_subtitles is not None
|
||||
@@ -478,6 +496,7 @@ class SearchAllRecentlyAddedMissing(Task):
|
||||
for subtitle in video_subtitles:
|
||||
downloads_per_video += 1
|
||||
history.add(item_title, video.id, section_title=metadata["section"],
|
||||
thumb=video.plexapi_metadata["super_thumb"],
|
||||
subtitle=subtitle,
|
||||
mode="a")
|
||||
|
||||
@@ -558,7 +577,7 @@ class LegacySearchAllRecentlyAddedMissing(Task):
|
||||
self.items_done = []
|
||||
recent_items = get_recent_items()
|
||||
missing = items_get_all_missing_subs(recent_items, sleep_after_request=0.2)
|
||||
ids = set([id for added_at, id, title, item, missing_languages in missing if not is_ignored(id, item=item)])
|
||||
ids = set([id for added_at, id, title, item, missing_languages in missing if is_wanted(id, item=item)])
|
||||
self.items_searching = missing
|
||||
self.items_searching_ids = ids
|
||||
self.items_failed = []
|
||||
@@ -651,7 +670,7 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
|
||||
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
|
||||
min_score_extracted_series = config.advanced.find_better_as_extracted_tv_score or 352
|
||||
min_score_extracted_movies = config.advanced.find_better_as_extracted_movie_score or 82
|
||||
min_score_extracted_movies = config.advanced.find_better_as_extracted_movie_score or 112
|
||||
overwrite_manually_modified = cast_bool(
|
||||
Prefs["scheduler.tasks.FindBetterSubtitles.overwrite_manually_modified"])
|
||||
overwrite_manually_selected = cast_bool(
|
||||
@@ -823,7 +842,12 @@ class SubtitleStorageMaintenance(Task):
|
||||
self.running = True
|
||||
Log.Info(u"%s: Running subtitle storage maintenance", self.name)
|
||||
storage = get_subtitle_storage()
|
||||
deleted_items = storage.delete_missing(wanted_languages=set(str(l) for l in config.lang_list))
|
||||
try:
|
||||
deleted_items = storage.delete_missing(wanted_languages=set(str(l) for l in config.lang_list))
|
||||
except OSError:
|
||||
deleted_items = storage.delete_missing(wanted_languages=set(str(l) for l in config.lang_list),
|
||||
scandir_generic=True)
|
||||
|
||||
if deleted_items:
|
||||
Log.Info(u"%s: Subtitle information for %d non-existant videos have been cleaned up",
|
||||
self.name, len(deleted_items))
|
||||
@@ -861,11 +885,18 @@ class MigrateSubtitleStorage(Task):
|
||||
self.running = True
|
||||
Log.Info(u"%s: Running subtitle storage migration", self.name)
|
||||
storage = get_subtitle_storage()
|
||||
for fn in storage.get_all_files():
|
||||
if fn.endswith(".json.gz"):
|
||||
continue
|
||||
Log.Debug(u"%s: Migrating %s", self.name, fn)
|
||||
storage.load(None, fn)
|
||||
|
||||
def migrate(scandir_generic=False):
|
||||
for fn in storage.get_all_files(scandir_generic=scandir_generic):
|
||||
if fn.endswith(".json.gz"):
|
||||
continue
|
||||
Log.Debug(u"%s: Migrating %s", self.name, fn)
|
||||
storage.load(None, fn)
|
||||
|
||||
try:
|
||||
migrate()
|
||||
except OSError:
|
||||
migrate(scandir_generic=True)
|
||||
|
||||
storage.destroy()
|
||||
|
||||
|
||||
+201
-19
@@ -179,10 +179,50 @@
|
||||
"default": "None"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.only_foreign",
|
||||
"label": "Only download foreign/forced subtitles",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
"id": "subtitles.when",
|
||||
"label": "Download subtitles",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"Never",
|
||||
"Always",
|
||||
"When main audio stream is not Subtitle Language (1)",
|
||||
"When main audio stream is not any configured language",
|
||||
"When any audio stream is not Subtitle Language (1)",
|
||||
"When any audio stream is not any configured language"
|
||||
],
|
||||
"default": "Always"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.ignore_for_audio",
|
||||
"label": "Don't download subtitles for Audio languages (use ISO-639-1 codes; comma-separated; NULL=no audio)",
|
||||
"type": "text",
|
||||
"default": "None"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.when_forced",
|
||||
"label": "Download foreign/forced subtitles",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"Never",
|
||||
"Always",
|
||||
"Only for Subtitle Language (1)",
|
||||
"Only for Subtitle Language (2)",
|
||||
"Only for Subtitle Language (3)"
|
||||
],
|
||||
"default": "Never"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.any_language_is_enough",
|
||||
"label": "Don't search for subtitles if a subtitle in any configured language exists as",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"External or embedded subtitle",
|
||||
"External or embedded subtitle (not foreign/forced)",
|
||||
"External subtitle",
|
||||
"External subtitle (not foreign/forced)",
|
||||
"Always search for all configured languages"
|
||||
],
|
||||
"default": "Always search for all configured languages"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.language.ietf_display",
|
||||
@@ -204,7 +244,7 @@
|
||||
},
|
||||
{
|
||||
"id": "subtitles.language.treat_und_as_first",
|
||||
"label": "Embedded subtitles: Treat \"Undefined\" (und) as language 1",
|
||||
"label": "Embedded streams: Treat \"Undefined\" (und) as language 1",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
@@ -252,6 +292,23 @@
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "anticaptcha.service",
|
||||
"label": "AntiCaptcha-Service (needs paid account; enables Addic7ed)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"none",
|
||||
"anti-captcha.com",
|
||||
"deathbycaptcha.com"
|
||||
],
|
||||
"default": "none"
|
||||
},
|
||||
{
|
||||
"id": "anticaptcha.api_key",
|
||||
"label": "AntiCaptcha-Service key (anti-captcha.com: account_key; deathbycaptcha.com: username:password)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.opensubtitles.enabled",
|
||||
"label": "Provider: Enable OpenSubtitles",
|
||||
@@ -285,14 +342,28 @@
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.titlovi.enabled",
|
||||
"label": "Provider: Enable Titlovi.com",
|
||||
"id": "provider.napisy24.enabled",
|
||||
"label": "Provider: Enable Napisy24 (pl)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.napisy24.username",
|
||||
"label": "Napisy24 Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.napisy24.password",
|
||||
"label": "Napisy24 Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.enabled",
|
||||
"label": "Provider: Enable Addic7ed",
|
||||
"label": "Provider: Enable Addic7ed (needs AntiCaptcha)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
@@ -310,6 +381,12 @@
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.is_vip",
|
||||
"label": "Addic7ed VIP? (80 vs 40 downloads per day)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.boost_by2",
|
||||
"label": "Addic7ed: boost score (if requirements met)",
|
||||
@@ -343,11 +420,25 @@
|
||||
"default": "19"
|
||||
},
|
||||
{
|
||||
"id": "provider.addic7ed.use_random_agents1",
|
||||
"label": "Addic7ed: Use random user agents",
|
||||
"id": "provider.titlovi.enabled",
|
||||
"label": "Provider: Enable Titlovi.com (User and Password required)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.titlovi.username",
|
||||
"label": "Titlovi Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.titlovi.password",
|
||||
"label": "Titlovi Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.legendastv.enabled",
|
||||
"label": "Provider: Enable Legendas TV (mostly pt-BR; UNRAR NEEDED)",
|
||||
@@ -386,6 +477,20 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.subscene.username",
|
||||
"label": "SubScene Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.subscene.password",
|
||||
"label": "SubScene Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"default": "",
|
||||
"secure": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.supersubtitles.enabled",
|
||||
"label": "Provider: Enable feliratok.info (Hungarian)",
|
||||
@@ -416,6 +521,38 @@
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.bsplayer.enabled",
|
||||
"label": "Provider: Enable BSPlayer Subtitles",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.wizdom.enabled",
|
||||
"label": "Provider: Enable WizdomSubs (Hebrew)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.ktuvit.enabled",
|
||||
"label": "Provider: Enable Ktuvit (Hebrew)",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "provider.ktuvit.username",
|
||||
"label": "Ktuvit Username",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "provider.ktuvit.password",
|
||||
"label": "Ktuvit Password",
|
||||
"type": "text",
|
||||
"option": "hidden",
|
||||
"secure": "true",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "providers.multithreading",
|
||||
"label": "Search enabled providers simultaneously (multithreading)",
|
||||
@@ -511,9 +648,15 @@
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.fix_only_uppercase",
|
||||
"label": "Fix only uppercase downloaded subtitles",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.reverse_rtl",
|
||||
"label": "Reverse punctuation in RTL languages (heb)",
|
||||
"label": "Reverse punctuation in RTL languages (heb, ara, fas)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
@@ -594,7 +737,7 @@
|
||||
"id": "subtitles.autoclean",
|
||||
"label": "Automatically delete leftover/unused (externally saved) subtitles",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "activity.on_playback",
|
||||
@@ -695,6 +838,29 @@
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "scheduler.tasks.SubtitleStorageMaintenance.frequency",
|
||||
"label": "Scheduler: Periodically run subtitle storage maintenance (SZ internal)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"never",
|
||||
"every 6 hours",
|
||||
"every 12 hours",
|
||||
"every 24 hours",
|
||||
"every 1 days",
|
||||
"every 2 days",
|
||||
"every 3 days",
|
||||
"every 4 days",
|
||||
"every 1 weeks",
|
||||
"every 2 weeks",
|
||||
"every 3 weeks",
|
||||
"every 4 weeks",
|
||||
"every 5 weeks",
|
||||
"every 6 weeks",
|
||||
"every 12 weeks"
|
||||
],
|
||||
"default": "every 1 weeks"
|
||||
},
|
||||
{
|
||||
"id": "history_size",
|
||||
"label": "History: amount of items to store historical data for",
|
||||
@@ -721,17 +887,27 @@
|
||||
"default": "2"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.ignore_fs",
|
||||
"label": "Ignore folders (with \"subzero.ignore/.subzero.ignore/.nosz\" files in them)",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
"id": "subtitles.include_exclude_mode",
|
||||
"label": "Should SZ be enabled or disabled by default? (impacts the settings below and the plugin menu)",
|
||||
"type": "enum",
|
||||
"values": [
|
||||
"enable SZ for all items by default, use ignore mode",
|
||||
"disable SZ for all items by default, use include mode"
|
||||
],
|
||||
"default": "enable SZ for all items by default, use ignore mode"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.ignore_paths",
|
||||
"label": "Ignore anything in the following paths (comma-separated)",
|
||||
"id": "subtitles.include_exclude_paths",
|
||||
"label": "Enable/disable Sub-Zero in the following paths (comma-separated; the setting above impacts this)",
|
||||
"type": "text",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"id": "subtitles.include_exclude_fs",
|
||||
"label": "Use \"subzero.ignore/.subzero.ignore/.nosz\" (ignore mode) or \"subzero.include/.subzero.include/.sz\" (include mode) files inside folders",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "plugin_mode2",
|
||||
"label": "Sub-Zero mode",
|
||||
@@ -798,6 +974,12 @@
|
||||
"type": "text",
|
||||
"default": "15"
|
||||
},
|
||||
{
|
||||
"id": "use_custom_dns2",
|
||||
"label": "Use custom DNS (IPs, comma-separated, set to 'system' for system DNS. Default: Google/CF)",
|
||||
"type": "text",
|
||||
"default": "1.1.1.1, 8.8.8.8"
|
||||
},
|
||||
{
|
||||
"id": "proxy",
|
||||
"label": "HTTP proxy to use for providers (supports credentials)",
|
||||
|
||||
+5
-5
@@ -9,11 +9,11 @@
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>2.5.4</string>
|
||||
<string>2.6.5</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>2.5.7.2663</string>
|
||||
<string>2.6.5.3277</string>
|
||||
<key>PlexFrameworkVersion</key>
|
||||
<string>2</string>
|
||||
<key>PlexPluginClass</key>
|
||||
@@ -23,7 +23,7 @@
|
||||
<key>PlexPluginConsoleLogging</key>
|
||||
<string>0</string>
|
||||
<key>PlexPluginDevMode</key>
|
||||
<string>0</string>
|
||||
<string>1</string>
|
||||
<key>PlexPluginCodePolicy</key>
|
||||
<!-- this allows channels to access some python methods which are otherwise blocked, as well as import external code libraries, and interact with the PMS HTTP API -->
|
||||
<string>Elevated</string>
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
<h1>Sub-Zero for Plex</h1><i>Subtitles done right</i>
|
||||
|
||||
Version 2.5.7.2663
|
||||
Version 2.6.5.3277 DEV
|
||||
|
||||
Originally based on @bramwalet's awesome <a href="https://github.com/bramwalet/Subliminal.bundle">Subliminal.bundle</a>
|
||||
|
||||
@@ -46,7 +46,7 @@ Github: <a href="https://github.com/pannal/Sub-Zero.bundle">http
|
||||
|
||||
3rd party licenses: <a href="https://github.com/pannal/Sub-Zero.bundle/tree/master/Licenses">https://github.com/pannal/Sub-Zero.bundle/tree/master/Licenses</a>
|
||||
|
||||
panni, 2018
|
||||
panni, 2019
|
||||
</div>
|
||||
</string>
|
||||
</dict>
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
||||
@@ -0,0 +1,196 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import functools
|
||||
from collections import namedtuple
|
||||
from threading import RLock
|
||||
|
||||
_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
|
||||
|
||||
|
||||
@functools.wraps(functools.update_wrapper)
|
||||
def update_wrapper(
|
||||
wrapper,
|
||||
wrapped,
|
||||
assigned=functools.WRAPPER_ASSIGNMENTS,
|
||||
updated=functools.WRAPPER_UPDATES,
|
||||
):
|
||||
"""
|
||||
Patch two bugs in functools.update_wrapper.
|
||||
"""
|
||||
# workaround for http://bugs.python.org/issue3445
|
||||
assigned = tuple(attr for attr in assigned if hasattr(wrapped, attr))
|
||||
wrapper = functools.update_wrapper(wrapper, wrapped, assigned, updated)
|
||||
# workaround for https://bugs.python.org/issue17482
|
||||
wrapper.__wrapped__ = wrapped
|
||||
return wrapper
|
||||
|
||||
|
||||
class _HashedSeq(list):
|
||||
__slots__ = 'hashvalue'
|
||||
|
||||
def __init__(self, tup, hash=hash):
|
||||
self[:] = tup
|
||||
self.hashvalue = hash(tup)
|
||||
|
||||
def __hash__(self):
|
||||
return self.hashvalue
|
||||
|
||||
|
||||
def _make_key(
|
||||
args,
|
||||
kwds,
|
||||
typed,
|
||||
kwd_mark=(object(),),
|
||||
fasttypes=set([int, str, frozenset, type(None)]),
|
||||
sorted=sorted,
|
||||
tuple=tuple,
|
||||
type=type,
|
||||
len=len,
|
||||
):
|
||||
'Make a cache key from optionally typed positional and keyword arguments'
|
||||
key = args
|
||||
if kwds:
|
||||
sorted_items = sorted(kwds.items())
|
||||
key += kwd_mark
|
||||
for item in sorted_items:
|
||||
key += item
|
||||
if typed:
|
||||
key += tuple(type(v) for v in args)
|
||||
if kwds:
|
||||
key += tuple(type(v) for k, v in sorted_items)
|
||||
elif len(key) == 1 and type(key[0]) in fasttypes:
|
||||
return key[0]
|
||||
return _HashedSeq(key)
|
||||
|
||||
|
||||
def lru_cache(maxsize=100, typed=False):
|
||||
"""Least-recently-used cache decorator.
|
||||
|
||||
If *maxsize* is set to None, the LRU features are disabled and the cache
|
||||
can grow without bound.
|
||||
|
||||
If *typed* is True, arguments of different types will be cached separately.
|
||||
For example, f(3.0) and f(3) will be treated as distinct calls with
|
||||
distinct results.
|
||||
|
||||
Arguments to the cached function must be hashable.
|
||||
|
||||
View the cache statistics named tuple (hits, misses, maxsize, currsize) with
|
||||
f.cache_info(). Clear the cache and statistics with f.cache_clear().
|
||||
Access the underlying function with f.__wrapped__.
|
||||
|
||||
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
|
||||
|
||||
"""
|
||||
|
||||
# Users should only access the lru_cache through its public API:
|
||||
# cache_info, cache_clear, and f.__wrapped__
|
||||
# The internals of the lru_cache are encapsulated for thread safety and
|
||||
# to allow the implementation to change (including a possible C version).
|
||||
|
||||
def decorating_function(user_function):
|
||||
|
||||
cache = dict()
|
||||
stats = [0, 0] # make statistics updateable non-locally
|
||||
HITS, MISSES = 0, 1 # names for the stats fields
|
||||
make_key = _make_key
|
||||
cache_get = cache.get # bound method to lookup key or return None
|
||||
_len = len # localize the global len() function
|
||||
lock = RLock() # because linkedlist updates aren't threadsafe
|
||||
root = [] # root of the circular doubly linked list
|
||||
root[:] = [root, root, None, None] # initialize by pointing to self
|
||||
nonlocal_root = [root] # make updateable non-locally
|
||||
PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
|
||||
|
||||
if maxsize == 0:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# no caching, just do a statistics update after a successful call
|
||||
result = user_function(*args, **kwds)
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
elif maxsize is None:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# simple caching without ordering or size limit
|
||||
key = make_key(args, kwds, typed)
|
||||
result = cache_get(
|
||||
key, root
|
||||
) # root used here as a unique not-found sentinel
|
||||
if result is not root:
|
||||
stats[HITS] += 1
|
||||
return result
|
||||
result = user_function(*args, **kwds)
|
||||
cache[key] = result
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
else:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# size limited caching that tracks accesses by recency
|
||||
key = make_key(args, kwds, typed) if kwds or typed else args
|
||||
with lock:
|
||||
link = cache_get(key)
|
||||
if link is not None:
|
||||
# record recent use of the key by moving it
|
||||
# to the front of the list
|
||||
root, = nonlocal_root
|
||||
link_prev, link_next, key, result = link
|
||||
link_prev[NEXT] = link_next
|
||||
link_next[PREV] = link_prev
|
||||
last = root[PREV]
|
||||
last[NEXT] = root[PREV] = link
|
||||
link[PREV] = last
|
||||
link[NEXT] = root
|
||||
stats[HITS] += 1
|
||||
return result
|
||||
result = user_function(*args, **kwds)
|
||||
with lock:
|
||||
root, = nonlocal_root
|
||||
if key in cache:
|
||||
# getting here means that this same key was added to the
|
||||
# cache while the lock was released. since the link
|
||||
# update is already done, we need only return the
|
||||
# computed result and update the count of misses.
|
||||
pass
|
||||
elif _len(cache) >= maxsize:
|
||||
# use the old root to store the new key and result
|
||||
oldroot = root
|
||||
oldroot[KEY] = key
|
||||
oldroot[RESULT] = result
|
||||
# empty the oldest link and make it the new root
|
||||
root = nonlocal_root[0] = oldroot[NEXT]
|
||||
oldkey = root[KEY]
|
||||
root[KEY] = root[RESULT] = None
|
||||
# now update the cache dictionary for the new links
|
||||
del cache[oldkey]
|
||||
cache[key] = oldroot
|
||||
else:
|
||||
# put result in a new link at the front of the list
|
||||
last = root[PREV]
|
||||
link = [last, root, key, result]
|
||||
last[NEXT] = root[PREV] = cache[key] = link
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
def cache_info():
|
||||
"""Report cache statistics"""
|
||||
with lock:
|
||||
return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
|
||||
|
||||
def cache_clear():
|
||||
"""Clear the cache and cache statistics"""
|
||||
with lock:
|
||||
cache.clear()
|
||||
root = nonlocal_root[0]
|
||||
root[:] = [root, root, None, None]
|
||||
stats[:] = [0, 0]
|
||||
|
||||
wrapper.__wrapped__ = user_function
|
||||
wrapper.cache_info = cache_info
|
||||
wrapper.cache_clear = cache_clear
|
||||
return update_wrapper(wrapper, user_function)
|
||||
|
||||
return decorating_function
|
||||
@@ -1,3 +1,3 @@
|
||||
from .core import where, old_where
|
||||
from .core import contents, where
|
||||
|
||||
__version__ = "2018.01.18"
|
||||
__version__ = "2020.06.20"
|
||||
|
||||
@@ -1,2 +1,12 @@
|
||||
from certifi import where
|
||||
print(where())
|
||||
import argparse
|
||||
|
||||
from certifi import contents, where
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-c", "--contents", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.contents:
|
||||
print(contents())
|
||||
else:
|
||||
print(where())
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,37 +1,60 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
certifi.py
|
||||
~~~~~~~~~~
|
||||
|
||||
This module returns the installation location of cacert.pem.
|
||||
This module returns the installation location of cacert.pem or its contents.
|
||||
"""
|
||||
import os
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from importlib.resources import path as get_path, read_text
|
||||
|
||||
_CACERT_CTX = None
|
||||
_CACERT_PATH = None
|
||||
|
||||
def where():
|
||||
# This is slightly terrible, but we want to delay extracting the file
|
||||
# in cases where we're inside of a zipimport situation until someone
|
||||
# actually calls where(), but we don't want to re-extract the file
|
||||
# on every call of where(), so we'll do it once then store it in a
|
||||
# global variable.
|
||||
global _CACERT_CTX
|
||||
global _CACERT_PATH
|
||||
if _CACERT_PATH is None:
|
||||
# This is slightly janky, the importlib.resources API wants you to
|
||||
# manage the cleanup of this file, so it doesn't actually return a
|
||||
# path, it returns a context manager that will give you the path
|
||||
# when you enter it and will do any cleanup when you leave it. In
|
||||
# the common case of not needing a temporary file, it will just
|
||||
# return the file system location and the __exit__() is a no-op.
|
||||
#
|
||||
# We also have to hold onto the actual context manager, because
|
||||
# it will do the cleanup whenever it gets garbage collected, so
|
||||
# we will also store that at the global level as well.
|
||||
_CACERT_CTX = get_path("certifi", "cacert.pem")
|
||||
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
||||
|
||||
return _CACERT_PATH
|
||||
|
||||
|
||||
class DeprecatedBundleWarning(DeprecationWarning):
|
||||
"""
|
||||
The weak security bundle is being deprecated. Please bother your service
|
||||
provider to get them to stop using cross-signed roots.
|
||||
"""
|
||||
except ImportError:
|
||||
# This fallback will work for Python versions prior to 3.7 that lack the
|
||||
# importlib.resources module but relies on the existing `where` function
|
||||
# so won't address issues with environments like PyOxidizer that don't set
|
||||
# __file__ on modules.
|
||||
def read_text(_module, _path, encoding="ascii"):
|
||||
with open(where(), "r", encoding=encoding) as data:
|
||||
return data.read()
|
||||
|
||||
# If we don't have importlib.resources, then we will just do the old logic
|
||||
# of assuming we're on the filesystem and munge the path directly.
|
||||
def where():
|
||||
f = os.path.dirname(__file__)
|
||||
|
||||
return os.path.join(f, "cacert.pem")
|
||||
|
||||
|
||||
def where():
|
||||
f = os.path.dirname(__file__)
|
||||
|
||||
return os.path.join(f, 'cacert.pem')
|
||||
|
||||
|
||||
def old_where():
|
||||
warnings.warn(
|
||||
"The weak security bundle has been removed. certifi.old_where() is now an alias "
|
||||
"of certifi.where(). Please update your code to use certifi.where() instead. "
|
||||
"certifi.old_where() will be removed in 2018.",
|
||||
DeprecatedBundleWarning
|
||||
)
|
||||
return where()
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(where())
|
||||
def contents():
|
||||
return read_text("certifi", "cacert.pem", encoding="ascii")
|
||||
|
||||
@@ -1,414 +0,0 @@
|
||||
# Issuer: CN=Entrust.net Secure Server Certification Authority O=Entrust.net OU=www.entrust.net/CPS incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited
|
||||
# Subject: CN=Entrust.net Secure Server Certification Authority O=Entrust.net OU=www.entrust.net/CPS incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited
|
||||
# Label: "Entrust.net Secure Server CA"
|
||||
# Serial: 927650371
|
||||
# MD5 Fingerprint: df:f2:80:73:cc:f1:e6:61:73:fc:f5:42:e9:c5:7c:ee
|
||||
# SHA1 Fingerprint: 99:a6:9b:e6:1a:fe:88:6b:4d:2b:82:00:7c:b8:54:fc:31:7e:15:39
|
||||
# SHA256 Fingerprint: 62:f2:40:27:8c:56:4c:4d:d8:bf:7d:9d:4f:6f:36:6e:a8:94:d2:2f:5f:34:d9:89:a9:83:ac:ec:2f:ff:ed:50
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIE2DCCBEGgAwIBAgIEN0rSQzANBgkqhkiG9w0BAQUFADCBwzELMAkGA1UEBhMC
|
||||
VVMxFDASBgNVBAoTC0VudHJ1c3QubmV0MTswOQYDVQQLEzJ3d3cuZW50cnVzdC5u
|
||||
ZXQvQ1BTIGluY29ycC4gYnkgcmVmLiAobGltaXRzIGxpYWIuKTElMCMGA1UECxMc
|
||||
KGMpIDE5OTkgRW50cnVzdC5uZXQgTGltaXRlZDE6MDgGA1UEAxMxRW50cnVzdC5u
|
||||
ZXQgU2VjdXJlIFNlcnZlciBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05OTA1
|
||||
MjUxNjA5NDBaFw0xOTA1MjUxNjM5NDBaMIHDMQswCQYDVQQGEwJVUzEUMBIGA1UE
|
||||
ChMLRW50cnVzdC5uZXQxOzA5BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5j
|
||||
b3JwLiBieSByZWYuIChsaW1pdHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBF
|
||||
bnRydXN0Lm5ldCBMaW1pdGVkMTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUg
|
||||
U2VydmVyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGdMA0GCSqGSIb3DQEBAQUA
|
||||
A4GLADCBhwKBgQDNKIM0VBuJ8w+vN5Ex/68xYMmo6LIQaO2f55M28Qpku0f1BBc/
|
||||
I0dNxScZgSYMVHINiC3ZH5oSn7yzcdOAGT9HZnuMNSjSuQrfJNqc1lB5gXpa0zf3
|
||||
wkrYKZImZNHkmGw6AIr1NJtl+O3jEP/9uElY3KDegjlrgbEWGWG5VLbmQwIBA6OC
|
||||
AdcwggHTMBEGCWCGSAGG+EIBAQQEAwIABzCCARkGA1UdHwSCARAwggEMMIHeoIHb
|
||||
oIHYpIHVMIHSMQswCQYDVQQGEwJVUzEUMBIGA1UEChMLRW50cnVzdC5uZXQxOzA5
|
||||
BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5jb3JwLiBieSByZWYuIChsaW1p
|
||||
dHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBFbnRydXN0Lm5ldCBMaW1pdGVk
|
||||
MTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUgU2VydmVyIENlcnRpZmljYXRp
|
||||
b24gQXV0aG9yaXR5MQ0wCwYDVQQDEwRDUkwxMCmgJ6AlhiNodHRwOi8vd3d3LmVu
|
||||
dHJ1c3QubmV0L0NSTC9uZXQxLmNybDArBgNVHRAEJDAigA8xOTk5MDUyNTE2MDk0
|
||||
MFqBDzIwMTkwNTI1MTYwOTQwWjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAU8Bdi
|
||||
E1U9s/8KAGv7UISX8+1i0BowHQYDVR0OBBYEFPAXYhNVPbP/CgBr+1CEl/PtYtAa
|
||||
MAwGA1UdEwQFMAMBAf8wGQYJKoZIhvZ9B0EABAwwChsEVjQuMAMCBJAwDQYJKoZI
|
||||
hvcNAQEFBQADgYEAkNwwAvpkdMKnCqV8IY00F6j7Rw7/JXyNEwr75Ji174z4xRAN
|
||||
95K+8cPV1ZVqBLssziY2ZcgxxufuP+NXdYR6Ee9GTxj005i7qIcyunL2POI9n9cd
|
||||
2cNgQ4xYDiKWL2KjLB+6rQXvqzJ4h6BUcxm1XAX5Uj5tLUUL9wqT6u0G+bI=
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 2 Policy Validation Authority
|
||||
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 2 Policy Validation Authority
|
||||
# Label: "ValiCert Class 2 VA"
|
||||
# Serial: 1
|
||||
# MD5 Fingerprint: a9:23:75:9b:ba:49:36:6e:31:c2:db:f2:e7:66:ba:87
|
||||
# SHA1 Fingerprint: 31:7a:2a:d0:7f:2b:33:5e:f5:a1:c3:4e:4b:57:e8:b7:d8:f1:fc:a6
|
||||
# SHA256 Fingerprint: 58:d0:17:27:9c:d4:dc:63:ab:dd:b1:96:a6:c9:90:6c:30:c4:e0:87:83:ea:e8:c1:60:99:54:d6:93:55:59:6b
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
|
||||
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
|
||||
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
|
||||
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
|
||||
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNjAwMTk1NFoXDTE5MDYy
|
||||
NjAwMTk1NFowgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
|
||||
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
|
||||
YXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
|
||||
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
|
||||
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDOOnHK5avIWZJV16vY
|
||||
dA757tn2VUdZZUcOBVXc65g2PFxTXdMwzzjsvUGJ7SVCCSRrCl6zfN1SLUzm1NZ9
|
||||
WlmpZdRJEy0kTRxQb7XBhVQ7/nHk01xC+YDgkRoKWzk2Z/M/VXwbP7RfZHM047QS
|
||||
v4dk+NoS/zcnwbNDu+97bi5p9wIDAQABMA0GCSqGSIb3DQEBBQUAA4GBADt/UG9v
|
||||
UJSZSWI4OB9L+KXIPqeCgfYrx+jFzug6EILLGACOTb2oWH+heQC1u+mNr0HZDzTu
|
||||
IYEZoDJJKPTEjlbVUjP9UNV+mWwD5MlM/Mtsq2azSiGM5bUMMj4QssxsodyamEwC
|
||||
W/POuZ6lcg5Ktz885hZo+L7tdEy8W9ViH0Pd
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=NetLock Expressz (Class C) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
|
||||
# Subject: CN=NetLock Expressz (Class C) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
|
||||
# Label: "NetLock Express (Class C) Root"
|
||||
# Serial: 104
|
||||
# MD5 Fingerprint: 4f:eb:f1:f0:70:c2:80:63:5d:58:9f:da:12:3c:a9:c4
|
||||
# SHA1 Fingerprint: e3:92:51:2f:0a:cf:f5:05:df:f6:de:06:7f:75:37:e1:65:ea:57:4b
|
||||
# SHA256 Fingerprint: 0b:5e:ed:4e:84:64:03:cf:55:e0:65:84:84:40:ed:2a:82:75:8b:f5:b9:aa:1f:25:3d:46:13:cf:a0:80:ff:3f
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIFTzCCBLigAwIBAgIBaDANBgkqhkiG9w0BAQQFADCBmzELMAkGA1UEBhMCSFUx
|
||||
ETAPBgNVBAcTCEJ1ZGFwZXN0MScwJQYDVQQKEx5OZXRMb2NrIEhhbG96YXRiaXp0
|
||||
b25zYWdpIEtmdC4xGjAYBgNVBAsTEVRhbnVzaXR2YW55a2lhZG9rMTQwMgYDVQQD
|
||||
EytOZXRMb2NrIEV4cHJlc3N6IChDbGFzcyBDKSBUYW51c2l0dmFueWtpYWRvMB4X
|
||||
DTk5MDIyNTE0MDgxMVoXDTE5MDIyMDE0MDgxMVowgZsxCzAJBgNVBAYTAkhVMREw
|
||||
DwYDVQQHEwhCdWRhcGVzdDEnMCUGA1UEChMeTmV0TG9jayBIYWxvemF0Yml6dG9u
|
||||
c2FnaSBLZnQuMRowGAYDVQQLExFUYW51c2l0dmFueWtpYWRvazE0MDIGA1UEAxMr
|
||||
TmV0TG9jayBFeHByZXNzeiAoQ2xhc3MgQykgVGFudXNpdHZhbnlraWFkbzCBnzAN
|
||||
BgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA6+ywbGGKIyWvYCDj2Z/8kwvbXY2wobNA
|
||||
OoLO/XXgeDIDhlqGlZHtU/qdQPzm6N3ZW3oDvV3zOwzDUXmbrVWg6dADEK8KuhRC
|
||||
2VImESLH0iDMgqSaqf64gXadarfSNnU+sYYJ9m5tfk63euyucYT2BDMIJTLrdKwW
|
||||
RMbkQJMdf60CAwEAAaOCAp8wggKbMBIGA1UdEwEB/wQIMAYBAf8CAQQwDgYDVR0P
|
||||
AQH/BAQDAgAGMBEGCWCGSAGG+EIBAQQEAwIABzCCAmAGCWCGSAGG+EIBDQSCAlEW
|
||||
ggJNRklHWUVMRU0hIEV6ZW4gdGFudXNpdHZhbnkgYSBOZXRMb2NrIEtmdC4gQWx0
|
||||
YWxhbm9zIFN6b2xnYWx0YXRhc2kgRmVsdGV0ZWxlaWJlbiBsZWlydCBlbGphcmFz
|
||||
b2sgYWxhcGphbiBrZXN6dWx0LiBBIGhpdGVsZXNpdGVzIGZvbHlhbWF0YXQgYSBO
|
||||
ZXRMb2NrIEtmdC4gdGVybWVrZmVsZWxvc3NlZy1iaXp0b3NpdGFzYSB2ZWRpLiBB
|
||||
IGRpZ2l0YWxpcyBhbGFpcmFzIGVsZm9nYWRhc2FuYWsgZmVsdGV0ZWxlIGF6IGVs
|
||||
b2lydCBlbGxlbm9yemVzaSBlbGphcmFzIG1lZ3RldGVsZS4gQXogZWxqYXJhcyBs
|
||||
ZWlyYXNhIG1lZ3RhbGFsaGF0byBhIE5ldExvY2sgS2Z0LiBJbnRlcm5ldCBob25s
|
||||
YXBqYW4gYSBodHRwczovL3d3dy5uZXRsb2NrLm5ldC9kb2NzIGNpbWVuIHZhZ3kg
|
||||
a2VyaGV0byBheiBlbGxlbm9yemVzQG5ldGxvY2submV0IGUtbWFpbCBjaW1lbi4g
|
||||
SU1QT1JUQU5UISBUaGUgaXNzdWFuY2UgYW5kIHRoZSB1c2Ugb2YgdGhpcyBjZXJ0
|
||||
aWZpY2F0ZSBpcyBzdWJqZWN0IHRvIHRoZSBOZXRMb2NrIENQUyBhdmFpbGFibGUg
|
||||
YXQgaHR0cHM6Ly93d3cubmV0bG9jay5uZXQvZG9jcyBvciBieSBlLW1haWwgYXQg
|
||||
Y3BzQG5ldGxvY2submV0LjANBgkqhkiG9w0BAQQFAAOBgQAQrX/XDDKACtiG8XmY
|
||||
ta3UzbM2xJZIwVzNmtkFLp++UOv0JhQQLdRmF/iewSf98e3ke0ugbLWrmldwpu2g
|
||||
pO0u9f38vf5NNwgMvOOWgyL1SRt/Syu0VMGAfJlOHdCM7tCs5ZL6dVb+ZKATj7i4
|
||||
Fp1hBWeAyNDYpQcCNJgEjTME1A==
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=NetLock Uzleti (Class B) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
|
||||
# Subject: CN=NetLock Uzleti (Class B) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
|
||||
# Label: "NetLock Business (Class B) Root"
|
||||
# Serial: 105
|
||||
# MD5 Fingerprint: 39:16:aa:b9:6a:41:e1:14:69:df:9e:6c:3b:72:dc:b6
|
||||
# SHA1 Fingerprint: 87:9f:4b:ee:05:df:98:58:3b:e3:60:d6:33:e7:0d:3f:fe:98:71:af
|
||||
# SHA256 Fingerprint: 39:df:7b:68:2b:7b:93:8f:84:71:54:81:cc:de:8d:60:d8:f2:2e:c5:98:87:7d:0a:aa:c1:2b:59:18:2b:03:12
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIFSzCCBLSgAwIBAgIBaTANBgkqhkiG9w0BAQQFADCBmTELMAkGA1UEBhMCSFUx
|
||||
ETAPBgNVBAcTCEJ1ZGFwZXN0MScwJQYDVQQKEx5OZXRMb2NrIEhhbG96YXRiaXp0
|
||||
b25zYWdpIEtmdC4xGjAYBgNVBAsTEVRhbnVzaXR2YW55a2lhZG9rMTIwMAYDVQQD
|
||||
EylOZXRMb2NrIFV6bGV0aSAoQ2xhc3MgQikgVGFudXNpdHZhbnlraWFkbzAeFw05
|
||||
OTAyMjUxNDEwMjJaFw0xOTAyMjAxNDEwMjJaMIGZMQswCQYDVQQGEwJIVTERMA8G
|
||||
A1UEBxMIQnVkYXBlc3QxJzAlBgNVBAoTHk5ldExvY2sgSGFsb3phdGJpenRvbnNh
|
||||
Z2kgS2Z0LjEaMBgGA1UECxMRVGFudXNpdHZhbnlraWFkb2sxMjAwBgNVBAMTKU5l
|
||||
dExvY2sgVXpsZXRpIChDbGFzcyBCKSBUYW51c2l0dmFueWtpYWRvMIGfMA0GCSqG
|
||||
SIb3DQEBAQUAA4GNADCBiQKBgQCx6gTsIKAjwo84YM/HRrPVG/77uZmeBNwcf4xK
|
||||
gZjupNTKihe5In+DCnVMm8Bp2GQ5o+2So/1bXHQawEfKOml2mrriRBf8TKPV/riX
|
||||
iK+IA4kfpPIEPsgHC+b5sy96YhQJRhTKZPWLgLViqNhr1nGTLbO/CVRY7QbrqHvc
|
||||
Q7GhaQIDAQABo4ICnzCCApswEgYDVR0TAQH/BAgwBgEB/wIBBDAOBgNVHQ8BAf8E
|
||||
BAMCAAYwEQYJYIZIAYb4QgEBBAQDAgAHMIICYAYJYIZIAYb4QgENBIICURaCAk1G
|
||||
SUdZRUxFTSEgRXplbiB0YW51c2l0dmFueSBhIE5ldExvY2sgS2Z0LiBBbHRhbGFu
|
||||
b3MgU3pvbGdhbHRhdGFzaSBGZWx0ZXRlbGVpYmVuIGxlaXJ0IGVsamFyYXNvayBh
|
||||
bGFwamFuIGtlc3p1bHQuIEEgaGl0ZWxlc2l0ZXMgZm9seWFtYXRhdCBhIE5ldExv
|
||||
Y2sgS2Z0LiB0ZXJtZWtmZWxlbG9zc2VnLWJpenRvc2l0YXNhIHZlZGkuIEEgZGln
|
||||
aXRhbGlzIGFsYWlyYXMgZWxmb2dhZGFzYW5hayBmZWx0ZXRlbGUgYXogZWxvaXJ0
|
||||
IGVsbGVub3J6ZXNpIGVsamFyYXMgbWVndGV0ZWxlLiBBeiBlbGphcmFzIGxlaXJh
|
||||
c2EgbWVndGFsYWxoYXRvIGEgTmV0TG9jayBLZnQuIEludGVybmV0IGhvbmxhcGph
|
||||
biBhIGh0dHBzOi8vd3d3Lm5ldGxvY2submV0L2RvY3MgY2ltZW4gdmFneSBrZXJo
|
||||
ZXRvIGF6IGVsbGVub3J6ZXNAbmV0bG9jay5uZXQgZS1tYWlsIGNpbWVuLiBJTVBP
|
||||
UlRBTlQhIFRoZSBpc3N1YW5jZSBhbmQgdGhlIHVzZSBvZiB0aGlzIGNlcnRpZmlj
|
||||
YXRlIGlzIHN1YmplY3QgdG8gdGhlIE5ldExvY2sgQ1BTIGF2YWlsYWJsZSBhdCBo
|
||||
dHRwczovL3d3dy5uZXRsb2NrLm5ldC9kb2NzIG9yIGJ5IGUtbWFpbCBhdCBjcHNA
|
||||
bmV0bG9jay5uZXQuMA0GCSqGSIb3DQEBBAUAA4GBAATbrowXr/gOkDFOzT4JwG06
|
||||
sPgzTEdM43WIEJessDgVkcYplswhwG08pXTP2IKlOcNl40JwuyKQ433bNXbhoLXa
|
||||
n3BukxowOR0w2y7jfLKRstE3Kfq51hdcR0/jHTjrn9V7lagonhVK0dHQKwCXoOKS
|
||||
NitjrFgBazMpUIaD8QFI
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 3 Policy Validation Authority
|
||||
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 3 Policy Validation Authority
|
||||
# Label: "RSA Root Certificate 1"
|
||||
# Serial: 1
|
||||
# MD5 Fingerprint: a2:6f:53:b7:ee:40:db:4a:68:e7:fa:18:d9:10:4b:72
|
||||
# SHA1 Fingerprint: 69:bd:8c:f4:9c:d3:00:fb:59:2e:17:93:ca:55:6a:f3:ec:aa:35:fb
|
||||
# SHA256 Fingerprint: bc:23:f9:8a:31:3c:b9:2d:e3:bb:fc:3a:5a:9f:44:61:ac:39:49:4c:4a:e1:5a:9e:9d:f1:31:e9:9b:73:01:9a
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
|
||||
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
|
||||
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDMgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
|
||||
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
|
||||
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNjAwMjIzM1oXDTE5MDYy
|
||||
NjAwMjIzM1owgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
|
||||
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
|
||||
YXNzIDMgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
|
||||
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
|
||||
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDjmFGWHOjVsQaBalfD
|
||||
cnWTq8+epvzzFlLWLU2fNUSoLgRNB0mKOCn1dzfnt6td3zZxFJmP3MKS8edgkpfs
|
||||
2Ejcv8ECIMYkpChMMFp2bbFc893enhBxoYjHW5tBbcqwuI4V7q0zK89HBFx1cQqY
|
||||
JJgpp0lZpd34t0NiYfPT4tBVPwIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAFa7AliE
|
||||
Zwgs3x/be0kz9dNnnfS0ChCzycUs4pJqcXgn8nCDQtM+z6lU9PHYkhaM0QTLS6vJ
|
||||
n0WuPIqpsHEzXcjFV9+vqDWzf4mH6eglkrh/hXqu1rweN1gqZ8mRzyqBPu3GOd/A
|
||||
PhmcGcwTTYJBtYze4D1gCCAPRX5ron+jjBXu
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 1 Policy Validation Authority
|
||||
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 1 Policy Validation Authority
|
||||
# Label: "ValiCert Class 1 VA"
|
||||
# Serial: 1
|
||||
# MD5 Fingerprint: 65:58:ab:15:ad:57:6c:1e:a8:a7:b5:69:ac:bf:ff:eb
|
||||
# SHA1 Fingerprint: e5:df:74:3c:b6:01:c4:9b:98:43:dc:ab:8c:e8:6a:81:10:9f:e4:8e
|
||||
# SHA256 Fingerprint: f4:c1:49:55:1a:30:13:a3:5b:c7:bf:fe:17:a7:f3:44:9b:c1:ab:5b:5a:0a:e7:4b:06:c2:3b:90:00:4c:01:04
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
|
||||
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
|
||||
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDEgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
|
||||
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
|
||||
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNTIyMjM0OFoXDTE5MDYy
|
||||
NTIyMjM0OFowgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
|
||||
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
|
||||
YXNzIDEgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
|
||||
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
|
||||
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDYWYJ6ibiWuqYvaG9Y
|
||||
LqdUHAZu9OqNSLwxlBfw8068srg1knaw0KWlAdcAAxIiGQj4/xEjm84H9b9pGib+
|
||||
TunRf50sQB1ZaG6m+FiwnRqP0z/x3BkGgagO4DrdyFNFCQbmD3DD+kCmDuJWBQ8Y
|
||||
TfwggtFzVXSNdnKgHZ0dwN0/cQIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAFBoPUn0
|
||||
LBwGlN+VYH+Wexf+T3GtZMjdd9LvWVXoP+iOBSoh8gfStadS/pyxtuJbdxdA6nLW
|
||||
I8sogTLDAHkY7FkXicnGah5xyf23dKUlRWnFSKsZ4UWKJWsZ7uW7EvV/96aNUcPw
|
||||
nXS3qT6gpf+2SQMT2iLM7XGCK5nPOrf1LXLI
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=Equifax Secure eBusiness CA-1 O=Equifax Secure Inc.
|
||||
# Subject: CN=Equifax Secure eBusiness CA-1 O=Equifax Secure Inc.
|
||||
# Label: "Equifax Secure eBusiness CA 1"
|
||||
# Serial: 4
|
||||
# MD5 Fingerprint: 64:9c:ef:2e:44:fc:c6:8f:52:07:d0:51:73:8f:cb:3d
|
||||
# SHA1 Fingerprint: da:40:18:8b:91:89:a3:ed:ee:ae:da:97:fe:2f:9d:f5:b7:d1:8a:41
|
||||
# SHA256 Fingerprint: cf:56:ff:46:a4:a1:86:10:9d:d9:65:84:b5:ee:b5:8a:51:0c:42:75:b0:e5:f9:4f:40:bb:ae:86:5e:19:f6:73
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIICgjCCAeugAwIBAgIBBDANBgkqhkiG9w0BAQQFADBTMQswCQYDVQQGEwJVUzEc
|
||||
MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEmMCQGA1UEAxMdRXF1aWZheCBT
|
||||
ZWN1cmUgZUJ1c2luZXNzIENBLTEwHhcNOTkwNjIxMDQwMDAwWhcNMjAwNjIxMDQw
|
||||
MDAwWjBTMQswCQYDVQQGEwJVUzEcMBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5j
|
||||
LjEmMCQGA1UEAxMdRXF1aWZheCBTZWN1cmUgZUJ1c2luZXNzIENBLTEwgZ8wDQYJ
|
||||
KoZIhvcNAQEBBQADgY0AMIGJAoGBAM4vGbwXt3fek6lfWg0XTzQaDJj0ItlZ1MRo
|
||||
RvC0NcWFAyDGr0WlIVFFQesWWDYyb+JQYmT5/VGcqiTZ9J2DKocKIdMSODRsjQBu
|
||||
WqDZQu4aIZX5UkxVWsUPOE9G+m34LjXWHXzr4vCwdYDIqROsvojvOm6rXyo4YgKw
|
||||
Env+j6YDAgMBAAGjZjBkMBEGCWCGSAGG+EIBAQQEAwIABzAPBgNVHRMBAf8EBTAD
|
||||
AQH/MB8GA1UdIwQYMBaAFEp4MlIR21kWNl7fwRQ2QGpHfEyhMB0GA1UdDgQWBBRK
|
||||
eDJSEdtZFjZe38EUNkBqR3xMoTANBgkqhkiG9w0BAQQFAAOBgQB1W6ibAxHm6VZM
|
||||
zfmpTMANmvPMZWnmJXbMWbfWVMMdzZmsGd20hdXgPfxiIKeES1hl8eL5lSE/9dR+
|
||||
WB5Hh1Q+WKG1tfgq73HnvMP2sUlG4tega+VWeponmHxGYhTnyfxuAxJ5gDgdSIKN
|
||||
/Bf+KpYrtWKmpj29f5JZzVoqgrI3eQ==
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=Equifax Secure Global eBusiness CA-1 O=Equifax Secure Inc.
|
||||
# Subject: CN=Equifax Secure Global eBusiness CA-1 O=Equifax Secure Inc.
|
||||
# Label: "Equifax Secure Global eBusiness CA"
|
||||
# Serial: 1
|
||||
# MD5 Fingerprint: 8f:5d:77:06:27:c4:98:3c:5b:93:78:e7:d7:7d:9b:cc
|
||||
# SHA1 Fingerprint: 7e:78:4a:10:1c:82:65:cc:2d:e1:f1:6d:47:b4:40:ca:d9:0a:19:45
|
||||
# SHA256 Fingerprint: 5f:0b:62:ea:b5:e3:53:ea:65:21:65:16:58:fb:b6:53:59:f4:43:28:0a:4a:fb:d1:04:d7:7d:10:f9:f0:4c:07
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIICkDCCAfmgAwIBAgIBATANBgkqhkiG9w0BAQQFADBaMQswCQYDVQQGEwJVUzEc
|
||||
MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEtMCsGA1UEAxMkRXF1aWZheCBT
|
||||
ZWN1cmUgR2xvYmFsIGVCdXNpbmVzcyBDQS0xMB4XDTk5MDYyMTA0MDAwMFoXDTIw
|
||||
MDYyMTA0MDAwMFowWjELMAkGA1UEBhMCVVMxHDAaBgNVBAoTE0VxdWlmYXggU2Vj
|
||||
dXJlIEluYy4xLTArBgNVBAMTJEVxdWlmYXggU2VjdXJlIEdsb2JhbCBlQnVzaW5l
|
||||
c3MgQ0EtMTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAuucXkAJlsTRVPEnC
|
||||
UdXfp9E3j9HngXNBUmCbnaEXJnitx7HoJpQytd4zjTov2/KaelpzmKNc6fuKcxtc
|
||||
58O/gGzNqfTWK8D3+ZmqY6KxRwIP1ORROhI8bIpaVIRw28HFkM9yRcuoWcDNM50/
|
||||
o5brhTMhHD4ePmBudpxnhcXIw2ECAwEAAaNmMGQwEQYJYIZIAYb4QgEBBAQDAgAH
|
||||
MA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUvqigdHJQa0S3ySPY+6j/s1dr
|
||||
aGwwHQYDVR0OBBYEFL6ooHRyUGtEt8kj2Puo/7NXa2hsMA0GCSqGSIb3DQEBBAUA
|
||||
A4GBADDiAVGqx+pf2rnQZQ8w1j7aDRRJbpGTJxQx78T3LUX47Me/okENI7SS+RkA
|
||||
Z70Br83gcfxaz2TE4JaY0KNA4gGK7ycH8WUBikQtBmV1UsCGECAhX2xrD2yuCRyv
|
||||
8qIYNMR1pHMc8Y3c7635s3a0kr/clRAevsvIO1qEYBlWlKlV
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=Thawte Premium Server CA O=Thawte Consulting cc OU=Certification Services Division
|
||||
# Subject: CN=Thawte Premium Server CA O=Thawte Consulting cc OU=Certification Services Division
|
||||
# Label: "Thawte Premium Server CA"
|
||||
# Serial: 1
|
||||
# MD5 Fingerprint: 06:9f:69:79:16:66:90:02:1b:8c:8c:a2:c3:07:6f:3a
|
||||
# SHA1 Fingerprint: 62:7f:8d:78:27:65:63:99:d2:7d:7f:90:44:c9:fe:b3:f3:3e:fa:9a
|
||||
# SHA256 Fingerprint: ab:70:36:36:5c:71:54:aa:29:c2:c2:9f:5d:41:91:16:3b:16:2a:22:25:01:13:57:d5:6d:07:ff:a7:bc:1f:72
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIDJzCCApCgAwIBAgIBATANBgkqhkiG9w0BAQQFADCBzjELMAkGA1UEBhMCWkEx
|
||||
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD
|
||||
VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv
|
||||
biBTZXJ2aWNlcyBEaXZpc2lvbjEhMB8GA1UEAxMYVGhhd3RlIFByZW1pdW0gU2Vy
|
||||
dmVyIENBMSgwJgYJKoZIhvcNAQkBFhlwcmVtaXVtLXNlcnZlckB0aGF3dGUuY29t
|
||||
MB4XDTk2MDgwMTAwMDAwMFoXDTIwMTIzMTIzNTk1OVowgc4xCzAJBgNVBAYTAlpB
|
||||
MRUwEwYDVQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsG
|
||||
A1UEChMUVGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRp
|
||||
b24gU2VydmljZXMgRGl2aXNpb24xITAfBgNVBAMTGFRoYXd0ZSBQcmVtaXVtIFNl
|
||||
cnZlciBDQTEoMCYGCSqGSIb3DQEJARYZcHJlbWl1bS1zZXJ2ZXJAdGhhd3RlLmNv
|
||||
bTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA0jY2aovXwlue2oFBYo847kkE
|
||||
VdbQ7xwblRZH7xhINTpS9CtqBo87L+pW46+GjZ4X9560ZXUCTe/LCaIhUdib0GfQ
|
||||
ug2SBhRz1JPLlyoAnFxODLz6FVL88kRu2hFKbgifLy3j+ao6hnO2RlNYyIkFvYMR
|
||||
uHM/qgeN9EJN50CdHDcCAwEAAaMTMBEwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG
|
||||
9w0BAQQFAAOBgQAmSCwWwlj66BZ0DKqqX1Q/8tfJeGBeXm43YyJ3Nn6yF8Q0ufUI
|
||||
hfzJATj/Tb7yFkJD57taRvvBxhEf8UqwKEbJw8RCfbz6q1lu1bdRiBHjpIUZa4JM
|
||||
pAwSremkrj/xw0llmozFyD4lt5SZu5IycQfwhl7tUCemDaYj+bvLpgcUQg==
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=Thawte Server CA O=Thawte Consulting cc OU=Certification Services Division
|
||||
# Subject: CN=Thawte Server CA O=Thawte Consulting cc OU=Certification Services Division
|
||||
# Label: "Thawte Server CA"
|
||||
# Serial: 1
|
||||
# MD5 Fingerprint: c5:70:c4:a2:ed:53:78:0c:c8:10:53:81:64:cb:d0:1d
|
||||
# SHA1 Fingerprint: 23:e5:94:94:51:95:f2:41:48:03:b4:d5:64:d2:a3:a3:f5:d8:8b:8c
|
||||
# SHA256 Fingerprint: b4:41:0b:73:e2:e6:ea:ca:47:fb:c4:2f:8f:a4:01:8a:f4:38:1d:c5:4c:fa:a8:44:50:46:1e:ed:09:45:4d:e9
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIDEzCCAnygAwIBAgIBATANBgkqhkiG9w0BAQQFADCBxDELMAkGA1UEBhMCWkEx
|
||||
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD
|
||||
VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv
|
||||
biBTZXJ2aWNlcyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEm
|
||||
MCQGCSqGSIb3DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wHhcNOTYwODAx
|
||||
MDAwMDAwWhcNMjAxMjMxMjM1OTU5WjCBxDELMAkGA1UEBhMCWkExFTATBgNVBAgT
|
||||
DFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYDVQQKExRUaGF3
|
||||
dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBTZXJ2aWNl
|
||||
cyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEmMCQGCSqGSIb3
|
||||
DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wgZ8wDQYJKoZIhvcNAQEBBQAD
|
||||
gY0AMIGJAoGBANOkUG7I/1Zr5s9dtuoMaHVHoqrC2oQl/Kj0R1HahbUgdJSGHg91
|
||||
yekIYfUGbTBuFRkC6VLAYttNmZ7iagxEOM3+vuNkCXDF/rFrKbYvScg71CcEJRCX
|
||||
L+eQbcAoQpnXTEPew/UhbVSfXcNY4cDk2VuwuNy0e982OsK1ZiIS1ocNAgMBAAGj
|
||||
EzARMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEEBQADgYEAB/pMaVz7lcxG
|
||||
7oWDTSEwjsrZqG9JGubaUeNgcGyEYRGhGshIPllDfU+VPaGLtwtimHp1it2ITk6e
|
||||
QNuozDJ0uW8NxuOzRAvZim+aKZuZGCg70eNAKJpaPNW15yAbi8qkq43pUdniTCxZ
|
||||
qdq5snUb9kLy78fyGPmJvKP/iiMucEc=
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
|
||||
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
|
||||
# Label: "Verisign Class 3 Public Primary Certification Authority"
|
||||
# Serial: 149843929435818692848040365716851702463
|
||||
# MD5 Fingerprint: 10:fc:63:5d:f6:26:3e:0d:f3:25:be:5f:79:cd:67:67
|
||||
# SHA1 Fingerprint: 74:2c:31:92:e6:07:e4:24:eb:45:49:54:2b:e1:bb:c5:3e:61:74:e2
|
||||
# SHA256 Fingerprint: e7:68:56:34:ef:ac:f6:9a:ce:93:9a:6b:25:5b:7b:4f:ab:ef:42:93:5b:50:a2:65:ac:b5:cb:60:27:e4:4e:70
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIICPDCCAaUCEHC65B0Q2Sk0tjjKewPMur8wDQYJKoZIhvcNAQECBQAwXzELMAkG
|
||||
A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz
|
||||
cyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2
|
||||
MDEyOTAwMDAwMFoXDTI4MDgwMTIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV
|
||||
BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAzIFB1YmxpYyBQcmlt
|
||||
YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN
|
||||
ADCBiQKBgQDJXFme8huKARS0EN8EQNvjV69qRUCPhAwL0TPZ2RHP7gJYHyX3KqhE
|
||||
BarsAx94f56TuZoAqiN91qyFomNFx3InzPRMxnVx0jnvT0Lwdd8KkMaOIG+YD/is
|
||||
I19wKTakyYbnsZogy1Olhec9vn2a/iRFM9x2Fe0PonFkTGUugWhFpwIDAQABMA0G
|
||||
CSqGSIb3DQEBAgUAA4GBALtMEivPLCYATxQT3ab7/AoRhIzzKBxnki98tsX63/Do
|
||||
lbwdj2wsqFHMc9ikwFPwTtYmwHYBV4GSXiHx0bH/59AhWM1pF+NEHJwZRDmJXNyc
|
||||
AA9WjQKZ7aKQRUzkuxCkPfAyAw7xzvjoyVGM5mKf5p/AfbdynMk2OmufTqj/ZA1k
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
|
||||
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
|
||||
# Label: "Verisign Class 3 Public Primary Certification Authority"
|
||||
# Serial: 80507572722862485515306429940691309246
|
||||
# MD5 Fingerprint: ef:5a:f1:33:ef:f1:cd:bb:51:02:ee:12:14:4b:96:c4
|
||||
# SHA1 Fingerprint: a1:db:63:93:91:6f:17:e4:18:55:09:40:04:15:c7:02:40:b0:ae:6b
|
||||
# SHA256 Fingerprint: a4:b6:b3:99:6f:c2:f3:06:b3:fd:86:81:bd:63:41:3d:8c:50:09:cc:4f:a3:29:c2:cc:f0:e2:fa:1b:14:03:05
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIICPDCCAaUCEDyRMcsf9tAbDpq40ES/Er4wDQYJKoZIhvcNAQEFBQAwXzELMAkG
|
||||
A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz
|
||||
cyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2
|
||||
MDEyOTAwMDAwMFoXDTI4MDgwMjIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV
|
||||
BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAzIFB1YmxpYyBQcmlt
|
||||
YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN
|
||||
ADCBiQKBgQDJXFme8huKARS0EN8EQNvjV69qRUCPhAwL0TPZ2RHP7gJYHyX3KqhE
|
||||
BarsAx94f56TuZoAqiN91qyFomNFx3InzPRMxnVx0jnvT0Lwdd8KkMaOIG+YD/is
|
||||
I19wKTakyYbnsZogy1Olhec9vn2a/iRFM9x2Fe0PonFkTGUugWhFpwIDAQABMA0G
|
||||
CSqGSIb3DQEBBQUAA4GBABByUqkFFBkyCEHwxWsKzH4PIRnN5GfcX6kb5sroc50i
|
||||
2JhucwNhkcV8sEVAbkSdjbCxlnRhLQ2pRdKkkirWmnWXbj9T/UWZYB2oK0z5XqcJ
|
||||
2HUw19JlYD1n1khVdWk/kfVIC0dpImmClr7JyDiGSnoscxlIaU5rfGW/D/xwzoiQ
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority - G2/(c) 1998 VeriSign, Inc. - For authorized use only/VeriSign Trust Network
|
||||
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority - G2/(c) 1998 VeriSign, Inc. - For authorized use only/VeriSign Trust Network
|
||||
# Label: "Verisign Class 3 Public Primary Certification Authority - G2"
|
||||
# Serial: 167285380242319648451154478808036881606
|
||||
# MD5 Fingerprint: a2:33:9b:4c:74:78:73:d4:6c:e7:c1:f3:8d:cb:5c:e9
|
||||
# SHA1 Fingerprint: 85:37:1c:a6:e5:50:14:3d:ce:28:03:47:1b:de:3a:09:e8:f8:77:0f
|
||||
# SHA256 Fingerprint: 83:ce:3c:12:29:68:8a:59:3d:48:5f:81:97:3c:0f:91:95:43:1e:da:37:cc:5e:36:43:0e:79:c7:a8:88:63:8b
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIDAjCCAmsCEH3Z/gfPqB63EHln+6eJNMYwDQYJKoZIhvcNAQEFBQAwgcExCzAJ
|
||||
BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xh
|
||||
c3MgMyBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcy
|
||||
MTowOAYDVQQLEzEoYykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3Jp
|
||||
emVkIHVzZSBvbmx5MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMB4X
|
||||
DTk4MDUxODAwMDAwMFoXDTI4MDgwMTIzNTk1OVowgcExCzAJBgNVBAYTAlVTMRcw
|
||||
FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xhc3MgMyBQdWJsaWMg
|
||||
UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcyMTowOAYDVQQLEzEo
|
||||
YykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5
|
||||
MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMIGfMA0GCSqGSIb3DQEB
|
||||
AQUAA4GNADCBiQKBgQDMXtERXVxp0KvTuWpMmR9ZmDCOFoUgRm1HP9SFIIThbbP4
|
||||
pO0M8RcPO/mn+SXXwc+EY/J8Y8+iR/LGWzOOZEAEaMGAuWQcRXfH2G71lSk8UOg0
|
||||
13gfqLptQ5GVj0VXXn7F+8qkBOvqlzdUMG+7AUcyM83cV5tkaWH4mx0ciU9cZwID
|
||||
AQABMA0GCSqGSIb3DQEBBQUAA4GBAFFNzb5cy5gZnBWyATl4Lk0PZ3BwmcYQWpSk
|
||||
U01UbSuvDV1Ai2TT1+7eVmGSX6bEHRBhNtMsJzzoKQm5EWR0zLVznxxIqbxhAe7i
|
||||
F6YM40AIOw7n60RzKprxaZLvcRTDOaxxp5EJb+RxBrO6WVcmeQD2+A2iMzAo1KpY
|
||||
oJ2daZH9
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=GTE CyberTrust Global Root O=GTE Corporation OU=GTE CyberTrust Solutions, Inc.
|
||||
# Subject: CN=GTE CyberTrust Global Root O=GTE Corporation OU=GTE CyberTrust Solutions, Inc.
|
||||
# Label: "GTE CyberTrust Global Root"
|
||||
# Serial: 421
|
||||
# MD5 Fingerprint: ca:3d:d3:68:f1:03:5c:d0:32:fa:b8:2b:59:e8:5a:db
|
||||
# SHA1 Fingerprint: 97:81:79:50:d8:1c:96:70:cc:34:d8:09:cf:79:44:31:36:7e:f4:74
|
||||
# SHA256 Fingerprint: a5:31:25:18:8d:21:10:aa:96:4b:02:c7:b7:c6:da:32:03:17:08:94:e5:fb:71:ff:fb:66:67:d5:e6:81:0a:36
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIICWjCCAcMCAgGlMA0GCSqGSIb3DQEBBAUAMHUxCzAJBgNVBAYTAlVTMRgwFgYD
|
||||
VQQKEw9HVEUgQ29ycG9yYXRpb24xJzAlBgNVBAsTHkdURSBDeWJlclRydXN0IFNv
|
||||
bHV0aW9ucywgSW5jLjEjMCEGA1UEAxMaR1RFIEN5YmVyVHJ1c3QgR2xvYmFsIFJv
|
||||
b3QwHhcNOTgwODEzMDAyOTAwWhcNMTgwODEzMjM1OTAwWjB1MQswCQYDVQQGEwJV
|
||||
UzEYMBYGA1UEChMPR1RFIENvcnBvcmF0aW9uMScwJQYDVQQLEx5HVEUgQ3liZXJU
|
||||
cnVzdCBTb2x1dGlvbnMsIEluYy4xIzAhBgNVBAMTGkdURSBDeWJlclRydXN0IEds
|
||||
b2JhbCBSb290MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCVD6C28FCc6HrH
|
||||
iM3dFw4usJTQGz0O9pTAipTHBsiQl8i4ZBp6fmw8U+E3KHNgf7KXUwefU/ltWJTS
|
||||
r41tiGeA5u2ylc9yMcqlHHK6XALnZELn+aks1joNrI1CqiQBOeacPwGFVw1Yh0X4
|
||||
04Wqk2kmhXBIgD8SFcd5tB8FLztimQIDAQABMA0GCSqGSIb3DQEBBAUAA4GBAG3r
|
||||
GwnpXtlR22ciYaQqPEh346B8pt5zohQDhT37qw4wxYMWM4ETCJ57NE7fQMh017l9
|
||||
3PR2VX2bY1QY6fDq81yx2YtCHrnAlU66+tXifPVoYb+O7AWXX1uw16OFNMQkpw0P
|
||||
lZPvy5TYnh+dXIVtx6quTx8itc2VrbqnzPmrC3p/
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: C=US, O=Equifax, OU=Equifax Secure Certificate Authority
|
||||
# Subject: C=US, O=Equifax, OU=Equifax Secure Certificate Authority
|
||||
# Label: "Equifax Secure Certificate Authority"
|
||||
# Serial: 903804111
|
||||
# MD5 Fingerprint: 67:cb:9d:c0:13:24:8a:82:9b:b2:17:1e:d1:1b:ec:d4
|
||||
# SHA1 Fingerprint: d2:32:09:ad:23:d3:14:23:21:74:e4:0d:7f:9d:62:13:97:86:63:3a
|
||||
# SHA256 Fingerprint: 08:29:7a:40:47:db:a2:36:80:c7:31:db:6e:31:76:53:ca:78:48:e1:be:bd:3a:0b:01:79:a7:07:f9:2c:f1:78
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIDIDCCAomgAwIBAgIENd70zzANBgkqhkiG9w0BAQUFADBOMQswCQYDVQQGEwJV
|
||||
UzEQMA4GA1UEChMHRXF1aWZheDEtMCsGA1UECxMkRXF1aWZheCBTZWN1cmUgQ2Vy
|
||||
dGlmaWNhdGUgQXV0aG9yaXR5MB4XDTk4MDgyMjE2NDE1MVoXDTE4MDgyMjE2NDE1
|
||||
MVowTjELMAkGA1UEBhMCVVMxEDAOBgNVBAoTB0VxdWlmYXgxLTArBgNVBAsTJEVx
|
||||
dWlmYXggU2VjdXJlIENlcnRpZmljYXRlIEF1dGhvcml0eTCBnzANBgkqhkiG9w0B
|
||||
AQEFAAOBjQAwgYkCgYEAwV2xWGcIYu6gmi0fCG2RFGiYCh7+2gRvE4RiIcPRfM6f
|
||||
BeC4AfBONOziipUEZKzxa1NfBbPLZ4C/QgKO/t0BCezhABRP/PvwDN1Dulsr4R+A
|
||||
cJkVV5MW8Q+XarfCaCMczE1ZMKxRHjuvK9buY0V7xdlfUNLjUA86iOe/FP3gx7kC
|
||||
AwEAAaOCAQkwggEFMHAGA1UdHwRpMGcwZaBjoGGkXzBdMQswCQYDVQQGEwJVUzEQ
|
||||
MA4GA1UEChMHRXF1aWZheDEtMCsGA1UECxMkRXF1aWZheCBTZWN1cmUgQ2VydGlm
|
||||
aWNhdGUgQXV0aG9yaXR5MQ0wCwYDVQQDEwRDUkwxMBoGA1UdEAQTMBGBDzIwMTgw
|
||||
ODIyMTY0MTUxWjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAUSOZo+SvSspXXR9gj
|
||||
IBBPM5iQn9QwHQYDVR0OBBYEFEjmaPkr0rKV10fYIyAQTzOYkJ/UMAwGA1UdEwQF
|
||||
MAMBAf8wGgYJKoZIhvZ9B0EABA0wCxsFVjMuMGMDAgbAMA0GCSqGSIb3DQEBBQUA
|
||||
A4GBAFjOKer89961zgK5F7WF0bnj4JXMJTENAKaSbn+2kmOeUJXRmm/kEd5jhW6Y
|
||||
7qj/WsjTVbJmcVfewCHrPSqnI0kBBIZCe/zuf6IWUrVnZ9NA2zsmWLIodz2uFHdh
|
||||
1voqZiegDfqnc1zqcPGUIWVEX/r87yloqaKHee9570+sB3c4
|
||||
-----END CERTIFICATE-----
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,504 +0,0 @@
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
Version 2.1, February 1999
|
||||
|
||||
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
|
||||
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
[This is the first released version of the Lesser GPL. It also counts
|
||||
as the successor of the GNU Library Public License, version 2, hence
|
||||
the version number 2.1.]
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
Licenses are intended to guarantee your freedom to share and change
|
||||
free software--to make sure the software is free for all its users.
|
||||
|
||||
This license, the Lesser General Public License, applies to some
|
||||
specially designated software packages--typically libraries--of the
|
||||
Free Software Foundation and other authors who decide to use it. You
|
||||
can use it too, but we suggest you first think carefully about whether
|
||||
this license or the ordinary General Public License is the better
|
||||
strategy to use in any particular case, based on the explanations below.
|
||||
|
||||
When we speak of free software, we are referring to freedom of use,
|
||||
not price. Our General Public Licenses are designed to make sure that
|
||||
you have the freedom to distribute copies of free software (and charge
|
||||
for this service if you wish); that you receive source code or can get
|
||||
it if you want it; that you can change the software and use pieces of
|
||||
it in new free programs; and that you are informed that you can do
|
||||
these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
distributors to deny you these rights or to ask you to surrender these
|
||||
rights. These restrictions translate to certain responsibilities for
|
||||
you if you distribute copies of the library or if you modify it.
|
||||
|
||||
For example, if you distribute copies of the library, whether gratis
|
||||
or for a fee, you must give the recipients all the rights that we gave
|
||||
you. You must make sure that they, too, receive or can get the source
|
||||
code. If you link other code with the library, you must provide
|
||||
complete object files to the recipients, so that they can relink them
|
||||
with the library after making changes to the library and recompiling
|
||||
it. And you must show them these terms so they know their rights.
|
||||
|
||||
We protect your rights with a two-step method: (1) we copyright the
|
||||
library, and (2) we offer you this license, which gives you legal
|
||||
permission to copy, distribute and/or modify the library.
|
||||
|
||||
To protect each distributor, we want to make it very clear that
|
||||
there is no warranty for the free library. Also, if the library is
|
||||
modified by someone else and passed on, the recipients should know
|
||||
that what they have is not the original version, so that the original
|
||||
author's reputation will not be affected by problems that might be
|
||||
introduced by others.
|
||||
|
||||
Finally, software patents pose a constant threat to the existence of
|
||||
any free program. We wish to make sure that a company cannot
|
||||
effectively restrict the users of a free program by obtaining a
|
||||
restrictive license from a patent holder. Therefore, we insist that
|
||||
any patent license obtained for a version of the library must be
|
||||
consistent with the full freedom of use specified in this license.
|
||||
|
||||
Most GNU software, including some libraries, is covered by the
|
||||
ordinary GNU General Public License. This license, the GNU Lesser
|
||||
General Public License, applies to certain designated libraries, and
|
||||
is quite different from the ordinary General Public License. We use
|
||||
this license for certain libraries in order to permit linking those
|
||||
libraries into non-free programs.
|
||||
|
||||
When a program is linked with a library, whether statically or using
|
||||
a shared library, the combination of the two is legally speaking a
|
||||
combined work, a derivative of the original library. The ordinary
|
||||
General Public License therefore permits such linking only if the
|
||||
entire combination fits its criteria of freedom. The Lesser General
|
||||
Public License permits more lax criteria for linking other code with
|
||||
the library.
|
||||
|
||||
We call this license the "Lesser" General Public License because it
|
||||
does Less to protect the user's freedom than the ordinary General
|
||||
Public License. It also provides other free software developers Less
|
||||
of an advantage over competing non-free programs. These disadvantages
|
||||
are the reason we use the ordinary General Public License for many
|
||||
libraries. However, the Lesser license provides advantages in certain
|
||||
special circumstances.
|
||||
|
||||
For example, on rare occasions, there may be a special need to
|
||||
encourage the widest possible use of a certain library, so that it becomes
|
||||
a de-facto standard. To achieve this, non-free programs must be
|
||||
allowed to use the library. A more frequent case is that a free
|
||||
library does the same job as widely used non-free libraries. In this
|
||||
case, there is little to gain by limiting the free library to free
|
||||
software only, so we use the Lesser General Public License.
|
||||
|
||||
In other cases, permission to use a particular library in non-free
|
||||
programs enables a greater number of people to use a large body of
|
||||
free software. For example, permission to use the GNU C Library in
|
||||
non-free programs enables many more people to use the whole GNU
|
||||
operating system, as well as its variant, the GNU/Linux operating
|
||||
system.
|
||||
|
||||
Although the Lesser General Public License is Less protective of the
|
||||
users' freedom, it does ensure that the user of a program that is
|
||||
linked with the Library has the freedom and the wherewithal to run
|
||||
that program using a modified version of the Library.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow. Pay close attention to the difference between a
|
||||
"work based on the library" and a "work that uses the library". The
|
||||
former contains code derived from the library, whereas the latter must
|
||||
be combined with the library in order to run.
|
||||
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License Agreement applies to any software library or other
|
||||
program which contains a notice placed by the copyright holder or
|
||||
other authorized party saying it may be distributed under the terms of
|
||||
this Lesser General Public License (also called "this License").
|
||||
Each licensee is addressed as "you".
|
||||
|
||||
A "library" means a collection of software functions and/or data
|
||||
prepared so as to be conveniently linked with application programs
|
||||
(which use some of those functions and data) to form executables.
|
||||
|
||||
The "Library", below, refers to any such software library or work
|
||||
which has been distributed under these terms. A "work based on the
|
||||
Library" means either the Library or any derivative work under
|
||||
copyright law: that is to say, a work containing the Library or a
|
||||
portion of it, either verbatim or with modifications and/or translated
|
||||
straightforwardly into another language. (Hereinafter, translation is
|
||||
included without limitation in the term "modification".)
|
||||
|
||||
"Source code" for a work means the preferred form of the work for
|
||||
making modifications to it. For a library, complete source code means
|
||||
all the source code for all modules it contains, plus any associated
|
||||
interface definition files, plus the scripts used to control compilation
|
||||
and installation of the library.
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running a program using the Library is not restricted, and output from
|
||||
such a program is covered only if its contents constitute a work based
|
||||
on the Library (independent of the use of the Library in a tool for
|
||||
writing it). Whether that is true depends on what the Library does
|
||||
and what the program that uses the Library does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Library's
|
||||
complete source code as you receive it, in any medium, provided that
|
||||
you conspicuously and appropriately publish on each copy an
|
||||
appropriate copyright notice and disclaimer of warranty; keep intact
|
||||
all the notices that refer to this License and to the absence of any
|
||||
warranty; and distribute a copy of this License along with the
|
||||
Library.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy,
|
||||
and you may at your option offer warranty protection in exchange for a
|
||||
fee.
|
||||
|
||||
2. You may modify your copy or copies of the Library or any portion
|
||||
of it, thus forming a work based on the Library, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) The modified work must itself be a software library.
|
||||
|
||||
b) You must cause the files modified to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
c) You must cause the whole of the work to be licensed at no
|
||||
charge to all third parties under the terms of this License.
|
||||
|
||||
d) If a facility in the modified Library refers to a function or a
|
||||
table of data to be supplied by an application program that uses
|
||||
the facility, other than as an argument passed when the facility
|
||||
is invoked, then you must make a good faith effort to ensure that,
|
||||
in the event an application does not supply such function or
|
||||
table, the facility still operates, and performs whatever part of
|
||||
its purpose remains meaningful.
|
||||
|
||||
(For example, a function in a library to compute square roots has
|
||||
a purpose that is entirely well-defined independent of the
|
||||
application. Therefore, Subsection 2d requires that any
|
||||
application-supplied function or table used by this function must
|
||||
be optional: if the application does not supply it, the square
|
||||
root function must still compute square roots.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Library,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Library, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote
|
||||
it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Library.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Library
|
||||
with the Library (or with a work based on the Library) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may opt to apply the terms of the ordinary GNU General Public
|
||||
License instead of this License to a given copy of the Library. To do
|
||||
this, you must alter all the notices that refer to this License, so
|
||||
that they refer to the ordinary GNU General Public License, version 2,
|
||||
instead of to this License. (If a newer version than version 2 of the
|
||||
ordinary GNU General Public License has appeared, then you can specify
|
||||
that version instead if you wish.) Do not make any other change in
|
||||
these notices.
|
||||
|
||||
Once this change is made in a given copy, it is irreversible for
|
||||
that copy, so the ordinary GNU General Public License applies to all
|
||||
subsequent copies and derivative works made from that copy.
|
||||
|
||||
This option is useful when you wish to copy part of the code of
|
||||
the Library into a program that is not a library.
|
||||
|
||||
4. You may copy and distribute the Library (or a portion or
|
||||
derivative of it, under Section 2) in object code or executable form
|
||||
under the terms of Sections 1 and 2 above provided that you accompany
|
||||
it with the complete corresponding machine-readable source code, which
|
||||
must be distributed under the terms of Sections 1 and 2 above on a
|
||||
medium customarily used for software interchange.
|
||||
|
||||
If distribution of object code is made by offering access to copy
|
||||
from a designated place, then offering equivalent access to copy the
|
||||
source code from the same place satisfies the requirement to
|
||||
distribute the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
5. A program that contains no derivative of any portion of the
|
||||
Library, but is designed to work with the Library by being compiled or
|
||||
linked with it, is called a "work that uses the Library". Such a
|
||||
work, in isolation, is not a derivative work of the Library, and
|
||||
therefore falls outside the scope of this License.
|
||||
|
||||
However, linking a "work that uses the Library" with the Library
|
||||
creates an executable that is a derivative of the Library (because it
|
||||
contains portions of the Library), rather than a "work that uses the
|
||||
library". The executable is therefore covered by this License.
|
||||
Section 6 states terms for distribution of such executables.
|
||||
|
||||
When a "work that uses the Library" uses material from a header file
|
||||
that is part of the Library, the object code for the work may be a
|
||||
derivative work of the Library even though the source code is not.
|
||||
Whether this is true is especially significant if the work can be
|
||||
linked without the Library, or if the work is itself a library. The
|
||||
threshold for this to be true is not precisely defined by law.
|
||||
|
||||
If such an object file uses only numerical parameters, data
|
||||
structure layouts and accessors, and small macros and small inline
|
||||
functions (ten lines or less in length), then the use of the object
|
||||
file is unrestricted, regardless of whether it is legally a derivative
|
||||
work. (Executables containing this object code plus portions of the
|
||||
Library will still fall under Section 6.)
|
||||
|
||||
Otherwise, if the work is a derivative of the Library, you may
|
||||
distribute the object code for the work under the terms of Section 6.
|
||||
Any executables containing that work also fall under Section 6,
|
||||
whether or not they are linked directly with the Library itself.
|
||||
|
||||
6. As an exception to the Sections above, you may also combine or
|
||||
link a "work that uses the Library" with the Library to produce a
|
||||
work containing portions of the Library, and distribute that work
|
||||
under terms of your choice, provided that the terms permit
|
||||
modification of the work for the customer's own use and reverse
|
||||
engineering for debugging such modifications.
|
||||
|
||||
You must give prominent notice with each copy of the work that the
|
||||
Library is used in it and that the Library and its use are covered by
|
||||
this License. You must supply a copy of this License. If the work
|
||||
during execution displays copyright notices, you must include the
|
||||
copyright notice for the Library among them, as well as a reference
|
||||
directing the user to the copy of this License. Also, you must do one
|
||||
of these things:
|
||||
|
||||
a) Accompany the work with the complete corresponding
|
||||
machine-readable source code for the Library including whatever
|
||||
changes were used in the work (which must be distributed under
|
||||
Sections 1 and 2 above); and, if the work is an executable linked
|
||||
with the Library, with the complete machine-readable "work that
|
||||
uses the Library", as object code and/or source code, so that the
|
||||
user can modify the Library and then relink to produce a modified
|
||||
executable containing the modified Library. (It is understood
|
||||
that the user who changes the contents of definitions files in the
|
||||
Library will not necessarily be able to recompile the application
|
||||
to use the modified definitions.)
|
||||
|
||||
b) Use a suitable shared library mechanism for linking with the
|
||||
Library. A suitable mechanism is one that (1) uses at run time a
|
||||
copy of the library already present on the user's computer system,
|
||||
rather than copying library functions into the executable, and (2)
|
||||
will operate properly with a modified version of the library, if
|
||||
the user installs one, as long as the modified version is
|
||||
interface-compatible with the version that the work was made with.
|
||||
|
||||
c) Accompany the work with a written offer, valid for at
|
||||
least three years, to give the same user the materials
|
||||
specified in Subsection 6a, above, for a charge no more
|
||||
than the cost of performing this distribution.
|
||||
|
||||
d) If distribution of the work is made by offering access to copy
|
||||
from a designated place, offer equivalent access to copy the above
|
||||
specified materials from the same place.
|
||||
|
||||
e) Verify that the user has already received a copy of these
|
||||
materials or that you have already sent this user a copy.
|
||||
|
||||
For an executable, the required form of the "work that uses the
|
||||
Library" must include any data and utility programs needed for
|
||||
reproducing the executable from it. However, as a special exception,
|
||||
the materials to be distributed need not include anything that is
|
||||
normally distributed (in either source or binary form) with the major
|
||||
components (compiler, kernel, and so on) of the operating system on
|
||||
which the executable runs, unless that component itself accompanies
|
||||
the executable.
|
||||
|
||||
It may happen that this requirement contradicts the license
|
||||
restrictions of other proprietary libraries that do not normally
|
||||
accompany the operating system. Such a contradiction means you cannot
|
||||
use both them and the Library together in an executable that you
|
||||
distribute.
|
||||
|
||||
7. You may place library facilities that are a work based on the
|
||||
Library side-by-side in a single library together with other library
|
||||
facilities not covered by this License, and distribute such a combined
|
||||
library, provided that the separate distribution of the work based on
|
||||
the Library and of the other library facilities is otherwise
|
||||
permitted, and provided that you do these two things:
|
||||
|
||||
a) Accompany the combined library with a copy of the same work
|
||||
based on the Library, uncombined with any other library
|
||||
facilities. This must be distributed under the terms of the
|
||||
Sections above.
|
||||
|
||||
b) Give prominent notice with the combined library of the fact
|
||||
that part of it is a work based on the Library, and explaining
|
||||
where to find the accompanying uncombined form of the same work.
|
||||
|
||||
8. You may not copy, modify, sublicense, link with, or distribute
|
||||
the Library except as expressly provided under this License. Any
|
||||
attempt otherwise to copy, modify, sublicense, link with, or
|
||||
distribute the Library is void, and will automatically terminate your
|
||||
rights under this License. However, parties who have received copies,
|
||||
or rights, from you under this License will not have their licenses
|
||||
terminated so long as such parties remain in full compliance.
|
||||
|
||||
9. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Library or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Library (or any work based on the
|
||||
Library), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Library or works based on it.
|
||||
|
||||
10. Each time you redistribute the Library (or any work based on the
|
||||
Library), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute, link with or modify the Library
|
||||
subject to these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties with
|
||||
this License.
|
||||
|
||||
11. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Library at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Library by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Library.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under any
|
||||
particular circumstance, the balance of the section is intended to apply,
|
||||
and the section as a whole is intended to apply in other circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
12. If the distribution and/or use of the Library is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Library under this License may add
|
||||
an explicit geographical distribution limitation excluding those countries,
|
||||
so that distribution is permitted only in or among countries not thus
|
||||
excluded. In such case, this License incorporates the limitation as if
|
||||
written in the body of this License.
|
||||
|
||||
13. The Free Software Foundation may publish revised and/or new
|
||||
versions of the Lesser General Public License from time to time.
|
||||
Such new versions will be similar in spirit to the present version,
|
||||
but may differ in detail to address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Library
|
||||
specifies a version number of this License which applies to it and
|
||||
"any later version", you have the option of following the terms and
|
||||
conditions either of that version or of any later version published by
|
||||
the Free Software Foundation. If the Library does not specify a
|
||||
license version number, you may choose any version ever published by
|
||||
the Free Software Foundation.
|
||||
|
||||
14. If you wish to incorporate parts of the Library into other free
|
||||
programs whose distribution conditions are incompatible with these,
|
||||
write to the author to ask for permission. For software which is
|
||||
copyrighted by the Free Software Foundation, write to the Free
|
||||
Software Foundation; we sometimes make exceptions for this. Our
|
||||
decision will be guided by the two goals of preserving the free status
|
||||
of all derivatives of our free software and of promoting the sharing
|
||||
and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
|
||||
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
|
||||
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
|
||||
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
|
||||
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
|
||||
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
|
||||
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
|
||||
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
|
||||
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
|
||||
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
|
||||
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
|
||||
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
|
||||
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
|
||||
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
|
||||
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Libraries
|
||||
|
||||
If you develop a new library, and you want it to be of the greatest
|
||||
possible use to the public, we recommend making it free software that
|
||||
everyone can redistribute and change. You can do so by permitting
|
||||
redistribution under these terms (or, alternatively, under the terms of the
|
||||
ordinary General Public License).
|
||||
|
||||
To apply these terms, attach the following notices to the library. It is
|
||||
safest to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least the
|
||||
"copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the library's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the library, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the
|
||||
library `Frob' (a library for tweaking knobs) written by James Random Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1990
|
||||
Ty Coon, President of Vice
|
||||
|
||||
That's all there is to it!
|
||||
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
Chardet: The Universal Character Encoding Detector
|
||||
--------------------------------------------------
|
||||
|
||||
Detects
|
||||
- ASCII, UTF-8, UTF-16 (2 variants), UTF-32 (4 variants)
|
||||
- Big5, GB2312, EUC-TW, HZ-GB-2312, ISO-2022-CN (Traditional and Simplified Chinese)
|
||||
- EUC-JP, SHIFT_JIS, CP932, ISO-2022-JP (Japanese)
|
||||
- EUC-KR, ISO-2022-KR (Korean)
|
||||
- KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251 (Cyrillic)
|
||||
- ISO-8859-2, windows-1250 (Hungarian)
|
||||
- ISO-8859-5, windows-1251 (Bulgarian)
|
||||
- windows-1252 (English)
|
||||
- ISO-8859-7, windows-1253 (Greek)
|
||||
- ISO-8859-8, windows-1255 (Visual and Logical Hebrew)
|
||||
- TIS-620 (Thai)
|
||||
|
||||
Requires Python 2.6 or later
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
Install from `PyPI <https://pypi.python.org/pypi/chardet>`_::
|
||||
|
||||
pip install chardet
|
||||
|
||||
|
||||
Command-line Tool
|
||||
-----------------
|
||||
|
||||
chardet comes with a command-line script which reports on the encodings of one
|
||||
or more files::
|
||||
|
||||
% chardetect somefile someotherfile
|
||||
somefile: windows-1252 with confidence 0.5
|
||||
someotherfile: ascii with confidence 1.0
|
||||
|
||||
About
|
||||
-----
|
||||
|
||||
This is a continuation of Mark Pilgrim's excellent chardet. Previously, two
|
||||
versions needed to be maintained: one that supported python 2.x and one that
|
||||
supported python 3.x. We've recently merged with `Ian Cordasco <https://github.com/sigmavirus24>`_'s
|
||||
`charade <https://github.com/sigmavirus24/charade>`_ fork, so now we have one
|
||||
coherent version that works for Python 2.6+.
|
||||
|
||||
:maintainer: Dan Blanchard
|
||||
@@ -15,18 +15,25 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
__version__ = "2.3.0"
|
||||
from sys import version_info
|
||||
|
||||
from .compat import PY2, PY3
|
||||
from .universaldetector import UniversalDetector
|
||||
from .version import __version__, VERSION
|
||||
|
||||
|
||||
def detect(aBuf):
|
||||
if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
|
||||
(version_info >= (3, 0) and not isinstance(aBuf, bytes))):
|
||||
raise ValueError('Expected a bytes object, not a unicode object')
|
||||
def detect(byte_str):
|
||||
"""
|
||||
Detect the encoding of the given byte string.
|
||||
|
||||
from . import universaldetector
|
||||
u = universaldetector.UniversalDetector()
|
||||
u.reset()
|
||||
u.feed(aBuf)
|
||||
u.close()
|
||||
return u.result
|
||||
:param byte_str: The byte sequence to examine.
|
||||
:type byte_str: ``bytes`` or ``bytearray``
|
||||
"""
|
||||
if not isinstance(byte_str, bytearray):
|
||||
if not isinstance(byte_str, bytes):
|
||||
raise TypeError('Expected object of type bytes or bytearray, got: '
|
||||
'{0}'.format(type(byte_str)))
|
||||
else:
|
||||
byte_str = bytearray(byte_str)
|
||||
detector = UniversalDetector()
|
||||
detector.feed(byte_str)
|
||||
return detector.close()
|
||||
|
||||
@@ -45,7 +45,7 @@ BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||
#Char to FreqOrder table
|
||||
BIG5_TABLE_SIZE = 5376
|
||||
|
||||
Big5CharToFreqOrder = (
|
||||
BIG5_CHAR_TO_FREQ_ORDER = (
|
||||
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
|
||||
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
|
||||
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48
|
||||
@@ -381,545 +381,6 @@ Big5CharToFreqOrder = (
|
||||
938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328
|
||||
3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344
|
||||
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360
|
||||
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 #last 512
|
||||
#Everything below is of no interest for detection purpose
|
||||
2522,1613,4812,5799,3345,3945,2523,5800,4162,5801,1637,4163,2471,4813,3946,5802, # 5392
|
||||
2500,3034,3800,5803,5804,2195,4814,5805,2163,5806,5807,5808,5809,5810,5811,5812, # 5408
|
||||
5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828, # 5424
|
||||
5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844, # 5440
|
||||
5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856,5857,5858,5859,5860, # 5456
|
||||
5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872,5873,5874,5875,5876, # 5472
|
||||
5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888,5889,5890,5891,5892, # 5488
|
||||
5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905,5906,5907,5908, # 5504
|
||||
5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920,5921,5922,5923,5924, # 5520
|
||||
5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936,5937,5938,5939,5940, # 5536
|
||||
5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952,5953,5954,5955,5956, # 5552
|
||||
5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968,5969,5970,5971,5972, # 5568
|
||||
5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984,5985,5986,5987,5988, # 5584
|
||||
5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004, # 5600
|
||||
6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020, # 5616
|
||||
6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032,6033,6034,6035,6036, # 5632
|
||||
6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052, # 5648
|
||||
6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068, # 5664
|
||||
6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084, # 5680
|
||||
6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100, # 5696
|
||||
6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116, # 5712
|
||||
6117,6118,6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,6132, # 5728
|
||||
6133,6134,6135,6136,6137,6138,6139,6140,6141,6142,6143,6144,6145,6146,6147,6148, # 5744
|
||||
6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163,6164, # 5760
|
||||
6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179,6180, # 5776
|
||||
6181,6182,6183,6184,6185,6186,6187,6188,6189,6190,6191,6192,6193,6194,6195,6196, # 5792
|
||||
6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210,6211,6212, # 5808
|
||||
6213,6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,3670,6224,6225,6226,6227, # 5824
|
||||
6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241,6242,6243, # 5840
|
||||
6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,6254,6255,6256,6257,6258,6259, # 5856
|
||||
6260,6261,6262,6263,6264,6265,6266,6267,6268,6269,6270,6271,6272,6273,6274,6275, # 5872
|
||||
6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,4815,6286,6287,6288,6289,6290, # 5888
|
||||
6291,6292,4816,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,6303,6304,6305, # 5904
|
||||
6306,6307,6308,6309,6310,6311,4817,4818,6312,6313,6314,6315,6316,6317,6318,4819, # 5920
|
||||
6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,6334, # 5936
|
||||
6335,6336,6337,4820,6338,6339,6340,6341,6342,6343,6344,6345,6346,6347,6348,6349, # 5952
|
||||
6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363,6364,6365, # 5968
|
||||
6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379,6380,6381, # 5984
|
||||
6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395,6396,6397, # 6000
|
||||
6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,3441,6411,6412, # 6016
|
||||
6413,6414,6415,6416,6417,6418,6419,6420,6421,6422,6423,6424,6425,4440,6426,6427, # 6032
|
||||
6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,6439,6440,6441,6442,6443, # 6048
|
||||
6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,4821,6455,6456,6457,6458, # 6064
|
||||
6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472,6473,6474, # 6080
|
||||
6475,6476,6477,3947,3948,6478,6479,6480,6481,3272,4441,6482,6483,6484,6485,4442, # 6096
|
||||
6486,6487,6488,6489,6490,6491,6492,6493,6494,6495,6496,4822,6497,6498,6499,6500, # 6112
|
||||
6501,6502,6503,6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516, # 6128
|
||||
6517,6518,6519,6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532, # 6144
|
||||
6533,6534,6535,6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548, # 6160
|
||||
6549,6550,6551,6552,6553,6554,6555,6556,2784,6557,4823,6558,6559,6560,6561,6562, # 6176
|
||||
6563,6564,6565,6566,6567,6568,6569,3949,6570,6571,6572,4824,6573,6574,6575,6576, # 6192
|
||||
6577,6578,6579,6580,6581,6582,6583,4825,6584,6585,6586,3950,2785,6587,6588,6589, # 6208
|
||||
6590,6591,6592,6593,6594,6595,6596,6597,6598,6599,6600,6601,6602,6603,6604,6605, # 6224
|
||||
6606,6607,6608,6609,6610,6611,6612,4826,6613,6614,6615,4827,6616,6617,6618,6619, # 6240
|
||||
6620,6621,6622,6623,6624,6625,4164,6626,6627,6628,6629,6630,6631,6632,6633,6634, # 6256
|
||||
3547,6635,4828,6636,6637,6638,6639,6640,6641,6642,3951,2984,6643,6644,6645,6646, # 6272
|
||||
6647,6648,6649,4165,6650,4829,6651,6652,4830,6653,6654,6655,6656,6657,6658,6659, # 6288
|
||||
6660,6661,6662,4831,6663,6664,6665,6666,6667,6668,6669,6670,6671,4166,6672,4832, # 6304
|
||||
3952,6673,6674,6675,6676,4833,6677,6678,6679,4167,6680,6681,6682,3198,6683,6684, # 6320
|
||||
6685,6686,6687,6688,6689,6690,6691,6692,6693,6694,6695,6696,6697,4834,6698,6699, # 6336
|
||||
6700,6701,6702,6703,6704,6705,6706,6707,6708,6709,6710,6711,6712,6713,6714,6715, # 6352
|
||||
6716,6717,6718,6719,6720,6721,6722,6723,6724,6725,6726,6727,6728,6729,6730,6731, # 6368
|
||||
6732,6733,6734,4443,6735,6736,6737,6738,6739,6740,6741,6742,6743,6744,6745,4444, # 6384
|
||||
6746,6747,6748,6749,6750,6751,6752,6753,6754,6755,6756,6757,6758,6759,6760,6761, # 6400
|
||||
6762,6763,6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777, # 6416
|
||||
6778,6779,6780,6781,4168,6782,6783,3442,6784,6785,6786,6787,6788,6789,6790,6791, # 6432
|
||||
4169,6792,6793,6794,6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806, # 6448
|
||||
6807,6808,6809,6810,6811,4835,6812,6813,6814,4445,6815,6816,4446,6817,6818,6819, # 6464
|
||||
6820,6821,6822,6823,6824,6825,6826,6827,6828,6829,6830,6831,6832,6833,6834,6835, # 6480
|
||||
3548,6836,6837,6838,6839,6840,6841,6842,6843,6844,6845,6846,4836,6847,6848,6849, # 6496
|
||||
6850,6851,6852,6853,6854,3953,6855,6856,6857,6858,6859,6860,6861,6862,6863,6864, # 6512
|
||||
6865,6866,6867,6868,6869,6870,6871,6872,6873,6874,6875,6876,6877,3199,6878,6879, # 6528
|
||||
6880,6881,6882,4447,6883,6884,6885,6886,6887,6888,6889,6890,6891,6892,6893,6894, # 6544
|
||||
6895,6896,6897,6898,6899,6900,6901,6902,6903,6904,4170,6905,6906,6907,6908,6909, # 6560
|
||||
6910,6911,6912,6913,6914,6915,6916,6917,6918,6919,6920,6921,6922,6923,6924,6925, # 6576
|
||||
6926,6927,4837,6928,6929,6930,6931,6932,6933,6934,6935,6936,3346,6937,6938,4838, # 6592
|
||||
6939,6940,6941,4448,6942,6943,6944,6945,6946,4449,6947,6948,6949,6950,6951,6952, # 6608
|
||||
6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966,6967,6968, # 6624
|
||||
6969,6970,6971,6972,6973,6974,6975,6976,6977,6978,6979,6980,6981,6982,6983,6984, # 6640
|
||||
6985,6986,6987,6988,6989,6990,6991,6992,6993,6994,3671,6995,6996,6997,6998,4839, # 6656
|
||||
6999,7000,7001,7002,3549,7003,7004,7005,7006,7007,7008,7009,7010,7011,7012,7013, # 6672
|
||||
7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027,7028,7029, # 6688
|
||||
7030,4840,7031,7032,7033,7034,7035,7036,7037,7038,4841,7039,7040,7041,7042,7043, # 6704
|
||||
7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058,7059, # 6720
|
||||
7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,2985,7071,7072,7073,7074, # 6736
|
||||
7075,7076,7077,7078,7079,7080,4842,7081,7082,7083,7084,7085,7086,7087,7088,7089, # 6752
|
||||
7090,7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105, # 6768
|
||||
7106,7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,4450,7119,7120, # 6784
|
||||
7121,7122,7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136, # 6800
|
||||
7137,7138,7139,7140,7141,7142,7143,4843,7144,7145,7146,7147,7148,7149,7150,7151, # 6816
|
||||
7152,7153,7154,7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167, # 6832
|
||||
7168,7169,7170,7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183, # 6848
|
||||
7184,7185,7186,7187,7188,4171,4172,7189,7190,7191,7192,7193,7194,7195,7196,7197, # 6864
|
||||
7198,7199,7200,7201,7202,7203,7204,7205,7206,7207,7208,7209,7210,7211,7212,7213, # 6880
|
||||
7214,7215,7216,7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229, # 6896
|
||||
7230,7231,7232,7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245, # 6912
|
||||
7246,7247,7248,7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261, # 6928
|
||||
7262,7263,7264,7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277, # 6944
|
||||
7278,7279,7280,7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293, # 6960
|
||||
7294,7295,7296,4844,7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308, # 6976
|
||||
7309,7310,7311,7312,7313,7314,7315,7316,4451,7317,7318,7319,7320,7321,7322,7323, # 6992
|
||||
7324,7325,7326,7327,7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339, # 7008
|
||||
7340,7341,7342,7343,7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,4173,7354, # 7024
|
||||
7355,4845,7356,7357,7358,7359,7360,7361,7362,7363,7364,7365,7366,7367,7368,7369, # 7040
|
||||
7370,7371,7372,7373,7374,7375,7376,7377,7378,7379,7380,7381,7382,7383,7384,7385, # 7056
|
||||
7386,7387,7388,4846,7389,7390,7391,7392,7393,7394,7395,7396,7397,7398,7399,7400, # 7072
|
||||
7401,7402,7403,7404,7405,3672,7406,7407,7408,7409,7410,7411,7412,7413,7414,7415, # 7088
|
||||
7416,7417,7418,7419,7420,7421,7422,7423,7424,7425,7426,7427,7428,7429,7430,7431, # 7104
|
||||
7432,7433,7434,7435,7436,7437,7438,7439,7440,7441,7442,7443,7444,7445,7446,7447, # 7120
|
||||
7448,7449,7450,7451,7452,7453,4452,7454,3200,7455,7456,7457,7458,7459,7460,7461, # 7136
|
||||
7462,7463,7464,7465,7466,7467,7468,7469,7470,7471,7472,7473,7474,4847,7475,7476, # 7152
|
||||
7477,3133,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487,7488,7489,7490,7491, # 7168
|
||||
7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,3347,7503,7504,7505,7506, # 7184
|
||||
7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,7520,7521,4848, # 7200
|
||||
7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535,7536,7537, # 7216
|
||||
7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,3801,4849,7550,7551, # 7232
|
||||
7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, # 7248
|
||||
7568,7569,3035,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582, # 7264
|
||||
7583,7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598, # 7280
|
||||
7599,7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614, # 7296
|
||||
7615,7616,4850,7617,7618,3802,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628, # 7312
|
||||
7629,7630,7631,7632,4851,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643, # 7328
|
||||
7644,7645,7646,7647,7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659, # 7344
|
||||
7660,7661,7662,7663,7664,7665,7666,7667,7668,7669,7670,4453,7671,7672,7673,7674, # 7360
|
||||
7675,7676,7677,7678,7679,7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690, # 7376
|
||||
7691,7692,7693,7694,7695,7696,7697,3443,7698,7699,7700,7701,7702,4454,7703,7704, # 7392
|
||||
7705,7706,7707,7708,7709,7710,7711,7712,7713,2472,7714,7715,7716,7717,7718,7719, # 7408
|
||||
7720,7721,7722,7723,7724,7725,7726,7727,7728,7729,7730,7731,3954,7732,7733,7734, # 7424
|
||||
7735,7736,7737,7738,7739,7740,7741,7742,7743,7744,7745,7746,7747,7748,7749,7750, # 7440
|
||||
3134,7751,7752,4852,7753,7754,7755,4853,7756,7757,7758,7759,7760,4174,7761,7762, # 7456
|
||||
7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,7777,7778, # 7472
|
||||
7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791,7792,7793,7794, # 7488
|
||||
7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,4854,7806,7807,7808,7809, # 7504
|
||||
7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824,7825, # 7520
|
||||
4855,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7536
|
||||
7841,7842,7843,7844,7845,7846,7847,3955,7848,7849,7850,7851,7852,7853,7854,7855, # 7552
|
||||
7856,7857,7858,7859,7860,3444,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870, # 7568
|
||||
7871,7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886, # 7584
|
||||
7887,7888,7889,7890,7891,4175,7892,7893,7894,7895,7896,4856,4857,7897,7898,7899, # 7600
|
||||
7900,2598,7901,7902,7903,7904,7905,7906,7907,7908,4455,7909,7910,7911,7912,7913, # 7616
|
||||
7914,3201,7915,7916,7917,7918,7919,7920,7921,4858,7922,7923,7924,7925,7926,7927, # 7632
|
||||
7928,7929,7930,7931,7932,7933,7934,7935,7936,7937,7938,7939,7940,7941,7942,7943, # 7648
|
||||
7944,7945,7946,7947,7948,7949,7950,7951,7952,7953,7954,7955,7956,7957,7958,7959, # 7664
|
||||
7960,7961,7962,7963,7964,7965,7966,7967,7968,7969,7970,7971,7972,7973,7974,7975, # 7680
|
||||
7976,7977,7978,7979,7980,7981,4859,7982,7983,7984,7985,7986,7987,7988,7989,7990, # 7696
|
||||
7991,7992,7993,7994,7995,7996,4860,7997,7998,7999,8000,8001,8002,8003,8004,8005, # 7712
|
||||
8006,8007,8008,8009,8010,8011,8012,8013,8014,8015,8016,4176,8017,8018,8019,8020, # 7728
|
||||
8021,8022,8023,4861,8024,8025,8026,8027,8028,8029,8030,8031,8032,8033,8034,8035, # 7744
|
||||
8036,4862,4456,8037,8038,8039,8040,4863,8041,8042,8043,8044,8045,8046,8047,8048, # 7760
|
||||
8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063,8064, # 7776
|
||||
8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,8080, # 7792
|
||||
8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,8096, # 7808
|
||||
8097,8098,8099,4864,4177,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110, # 7824
|
||||
8111,8112,8113,8114,8115,8116,8117,8118,8119,8120,4178,8121,8122,8123,8124,8125, # 7840
|
||||
8126,8127,8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141, # 7856
|
||||
8142,8143,8144,8145,4865,4866,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155, # 7872
|
||||
8156,8157,8158,8159,8160,8161,8162,8163,8164,8165,4179,8166,8167,8168,8169,8170, # 7888
|
||||
8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181,4457,8182,8183,8184,8185, # 7904
|
||||
8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197,8198,8199,8200,8201, # 7920
|
||||
8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213,8214,8215,8216,8217, # 7936
|
||||
8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229,8230,8231,8232,8233, # 7952
|
||||
8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245,8246,8247,8248,8249, # 7968
|
||||
8250,8251,8252,8253,8254,8255,8256,3445,8257,8258,8259,8260,8261,8262,4458,8263, # 7984
|
||||
8264,8265,8266,8267,8268,8269,8270,8271,8272,4459,8273,8274,8275,8276,3550,8277, # 8000
|
||||
8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,4460,8290,8291,8292, # 8016
|
||||
8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,4867, # 8032
|
||||
8308,8309,8310,8311,8312,3551,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322, # 8048
|
||||
8323,8324,8325,8326,4868,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337, # 8064
|
||||
8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353, # 8080
|
||||
8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,4869,4461,8364,8365,8366,8367, # 8096
|
||||
8368,8369,8370,4870,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382, # 8112
|
||||
8383,8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398, # 8128
|
||||
8399,8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,4871,8411,8412,8413, # 8144
|
||||
8414,8415,8416,8417,8418,8419,8420,8421,8422,4462,8423,8424,8425,8426,8427,8428, # 8160
|
||||
8429,8430,8431,8432,8433,2986,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443, # 8176
|
||||
8444,8445,8446,8447,8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459, # 8192
|
||||
8460,8461,8462,8463,8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475, # 8208
|
||||
8476,8477,8478,4180,8479,8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490, # 8224
|
||||
8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506, # 8240
|
||||
8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522, # 8256
|
||||
8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538, # 8272
|
||||
8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554, # 8288
|
||||
8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,4872,8565,8566,8567,8568,8569, # 8304
|
||||
8570,8571,8572,8573,4873,8574,8575,8576,8577,8578,8579,8580,8581,8582,8583,8584, # 8320
|
||||
8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597,8598,8599,8600, # 8336
|
||||
8601,8602,8603,8604,8605,3803,8606,8607,8608,8609,8610,8611,8612,8613,4874,3804, # 8352
|
||||
8614,8615,8616,8617,8618,8619,8620,8621,3956,8622,8623,8624,8625,8626,8627,8628, # 8368
|
||||
8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,2865,8639,8640,8641,8642,8643, # 8384
|
||||
8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,4463,8657,8658, # 8400
|
||||
8659,4875,4876,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672, # 8416
|
||||
8673,8674,8675,8676,8677,8678,8679,8680,8681,4464,8682,8683,8684,8685,8686,8687, # 8432
|
||||
8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, # 8448
|
||||
8704,8705,8706,8707,8708,8709,2261,8710,8711,8712,8713,8714,8715,8716,8717,8718, # 8464
|
||||
8719,8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,4181, # 8480
|
||||
8734,8735,8736,8737,8738,8739,8740,8741,8742,8743,8744,8745,8746,8747,8748,8749, # 8496
|
||||
8750,8751,8752,8753,8754,8755,8756,8757,8758,8759,8760,8761,8762,8763,4877,8764, # 8512
|
||||
8765,8766,8767,8768,8769,8770,8771,8772,8773,8774,8775,8776,8777,8778,8779,8780, # 8528
|
||||
8781,8782,8783,8784,8785,8786,8787,8788,4878,8789,4879,8790,8791,8792,4880,8793, # 8544
|
||||
8794,8795,8796,8797,8798,8799,8800,8801,4881,8802,8803,8804,8805,8806,8807,8808, # 8560
|
||||
8809,8810,8811,8812,8813,8814,8815,3957,8816,8817,8818,8819,8820,8821,8822,8823, # 8576
|
||||
8824,8825,8826,8827,8828,8829,8830,8831,8832,8833,8834,8835,8836,8837,8838,8839, # 8592
|
||||
8840,8841,8842,8843,8844,8845,8846,8847,4882,8848,8849,8850,8851,8852,8853,8854, # 8608
|
||||
8855,8856,8857,8858,8859,8860,8861,8862,8863,8864,8865,8866,8867,8868,8869,8870, # 8624
|
||||
8871,8872,8873,8874,8875,8876,8877,8878,8879,8880,8881,8882,8883,8884,3202,8885, # 8640
|
||||
8886,8887,8888,8889,8890,8891,8892,8893,8894,8895,8896,8897,8898,8899,8900,8901, # 8656
|
||||
8902,8903,8904,8905,8906,8907,8908,8909,8910,8911,8912,8913,8914,8915,8916,8917, # 8672
|
||||
8918,8919,8920,8921,8922,8923,8924,4465,8925,8926,8927,8928,8929,8930,8931,8932, # 8688
|
||||
4883,8933,8934,8935,8936,8937,8938,8939,8940,8941,8942,8943,2214,8944,8945,8946, # 8704
|
||||
8947,8948,8949,8950,8951,8952,8953,8954,8955,8956,8957,8958,8959,8960,8961,8962, # 8720
|
||||
8963,8964,8965,4884,8966,8967,8968,8969,8970,8971,8972,8973,8974,8975,8976,8977, # 8736
|
||||
8978,8979,8980,8981,8982,8983,8984,8985,8986,8987,8988,8989,8990,8991,8992,4885, # 8752
|
||||
8993,8994,8995,8996,8997,8998,8999,9000,9001,9002,9003,9004,9005,9006,9007,9008, # 8768
|
||||
9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,4182,9022,9023, # 8784
|
||||
9024,9025,9026,9027,9028,9029,9030,9031,9032,9033,9034,9035,9036,9037,9038,9039, # 8800
|
||||
9040,9041,9042,9043,9044,9045,9046,9047,9048,9049,9050,9051,9052,9053,9054,9055, # 8816
|
||||
9056,9057,9058,9059,9060,9061,9062,9063,4886,9064,9065,9066,9067,9068,9069,4887, # 8832
|
||||
9070,9071,9072,9073,9074,9075,9076,9077,9078,9079,9080,9081,9082,9083,9084,9085, # 8848
|
||||
9086,9087,9088,9089,9090,9091,9092,9093,9094,9095,9096,9097,9098,9099,9100,9101, # 8864
|
||||
9102,9103,9104,9105,9106,9107,9108,9109,9110,9111,9112,9113,9114,9115,9116,9117, # 8880
|
||||
9118,9119,9120,9121,9122,9123,9124,9125,9126,9127,9128,9129,9130,9131,9132,9133, # 8896
|
||||
9134,9135,9136,9137,9138,9139,9140,9141,3958,9142,9143,9144,9145,9146,9147,9148, # 8912
|
||||
9149,9150,9151,4888,9152,9153,9154,9155,9156,9157,9158,9159,9160,9161,9162,9163, # 8928
|
||||
9164,9165,9166,9167,9168,9169,9170,9171,9172,9173,9174,9175,4889,9176,9177,9178, # 8944
|
||||
9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194, # 8960
|
||||
9195,9196,9197,9198,9199,9200,9201,9202,9203,4890,9204,9205,9206,9207,9208,9209, # 8976
|
||||
9210,9211,9212,9213,9214,9215,9216,9217,9218,9219,9220,9221,9222,4466,9223,9224, # 8992
|
||||
9225,9226,9227,9228,9229,9230,9231,9232,9233,9234,9235,9236,9237,9238,9239,9240, # 9008
|
||||
9241,9242,9243,9244,9245,4891,9246,9247,9248,9249,9250,9251,9252,9253,9254,9255, # 9024
|
||||
9256,9257,4892,9258,9259,9260,9261,4893,4894,9262,9263,9264,9265,9266,9267,9268, # 9040
|
||||
9269,9270,9271,9272,9273,4467,9274,9275,9276,9277,9278,9279,9280,9281,9282,9283, # 9056
|
||||
9284,9285,3673,9286,9287,9288,9289,9290,9291,9292,9293,9294,9295,9296,9297,9298, # 9072
|
||||
9299,9300,9301,9302,9303,9304,9305,9306,9307,9308,9309,9310,9311,9312,9313,9314, # 9088
|
||||
9315,9316,9317,9318,9319,9320,9321,9322,4895,9323,9324,9325,9326,9327,9328,9329, # 9104
|
||||
9330,9331,9332,9333,9334,9335,9336,9337,9338,9339,9340,9341,9342,9343,9344,9345, # 9120
|
||||
9346,9347,4468,9348,9349,9350,9351,9352,9353,9354,9355,9356,9357,9358,9359,9360, # 9136
|
||||
9361,9362,9363,9364,9365,9366,9367,9368,9369,9370,9371,9372,9373,4896,9374,4469, # 9152
|
||||
9375,9376,9377,9378,9379,4897,9380,9381,9382,9383,9384,9385,9386,9387,9388,9389, # 9168
|
||||
9390,9391,9392,9393,9394,9395,9396,9397,9398,9399,9400,9401,9402,9403,9404,9405, # 9184
|
||||
9406,4470,9407,2751,9408,9409,3674,3552,9410,9411,9412,9413,9414,9415,9416,9417, # 9200
|
||||
9418,9419,9420,9421,4898,9422,9423,9424,9425,9426,9427,9428,9429,3959,9430,9431, # 9216
|
||||
9432,9433,9434,9435,9436,4471,9437,9438,9439,9440,9441,9442,9443,9444,9445,9446, # 9232
|
||||
9447,9448,9449,9450,3348,9451,9452,9453,9454,9455,9456,9457,9458,9459,9460,9461, # 9248
|
||||
9462,9463,9464,9465,9466,9467,9468,9469,9470,9471,9472,4899,9473,9474,9475,9476, # 9264
|
||||
9477,4900,9478,9479,9480,9481,9482,9483,9484,9485,9486,9487,9488,3349,9489,9490, # 9280
|
||||
9491,9492,9493,9494,9495,9496,9497,9498,9499,9500,9501,9502,9503,9504,9505,9506, # 9296
|
||||
9507,9508,9509,9510,9511,9512,9513,9514,9515,9516,9517,9518,9519,9520,4901,9521, # 9312
|
||||
9522,9523,9524,9525,9526,4902,9527,9528,9529,9530,9531,9532,9533,9534,9535,9536, # 9328
|
||||
9537,9538,9539,9540,9541,9542,9543,9544,9545,9546,9547,9548,9549,9550,9551,9552, # 9344
|
||||
9553,9554,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568, # 9360
|
||||
9569,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,9581,9582,9583,9584, # 9376
|
||||
3805,9585,9586,9587,9588,9589,9590,9591,9592,9593,9594,9595,9596,9597,9598,9599, # 9392
|
||||
9600,9601,9602,4903,9603,9604,9605,9606,9607,4904,9608,9609,9610,9611,9612,9613, # 9408
|
||||
9614,4905,9615,9616,9617,9618,9619,9620,9621,9622,9623,9624,9625,9626,9627,9628, # 9424
|
||||
9629,9630,9631,9632,4906,9633,9634,9635,9636,9637,9638,9639,9640,9641,9642,9643, # 9440
|
||||
4907,9644,9645,9646,9647,9648,9649,9650,9651,9652,9653,9654,9655,9656,9657,9658, # 9456
|
||||
9659,9660,9661,9662,9663,9664,9665,9666,9667,9668,9669,9670,9671,9672,4183,9673, # 9472
|
||||
9674,9675,9676,9677,4908,9678,9679,9680,9681,4909,9682,9683,9684,9685,9686,9687, # 9488
|
||||
9688,9689,9690,4910,9691,9692,9693,3675,9694,9695,9696,2945,9697,9698,9699,9700, # 9504
|
||||
9701,9702,9703,9704,9705,4911,9706,9707,9708,9709,9710,9711,9712,9713,9714,9715, # 9520
|
||||
9716,9717,9718,9719,9720,9721,9722,9723,9724,9725,9726,9727,9728,9729,9730,9731, # 9536
|
||||
9732,9733,9734,9735,4912,9736,9737,9738,9739,9740,4913,9741,9742,9743,9744,9745, # 9552
|
||||
9746,9747,9748,9749,9750,9751,9752,9753,9754,9755,9756,9757,9758,4914,9759,9760, # 9568
|
||||
9761,9762,9763,9764,9765,9766,9767,9768,9769,9770,9771,9772,9773,9774,9775,9776, # 9584
|
||||
9777,9778,9779,9780,9781,9782,4915,9783,9784,9785,9786,9787,9788,9789,9790,9791, # 9600
|
||||
9792,9793,4916,9794,9795,9796,9797,9798,9799,9800,9801,9802,9803,9804,9805,9806, # 9616
|
||||
9807,9808,9809,9810,9811,9812,9813,9814,9815,9816,9817,9818,9819,9820,9821,9822, # 9632
|
||||
9823,9824,9825,9826,9827,9828,9829,9830,9831,9832,9833,9834,9835,9836,9837,9838, # 9648
|
||||
9839,9840,9841,9842,9843,9844,9845,9846,9847,9848,9849,9850,9851,9852,9853,9854, # 9664
|
||||
9855,9856,9857,9858,9859,9860,9861,9862,9863,9864,9865,9866,9867,9868,4917,9869, # 9680
|
||||
9870,9871,9872,9873,9874,9875,9876,9877,9878,9879,9880,9881,9882,9883,9884,9885, # 9696
|
||||
9886,9887,9888,9889,9890,9891,9892,4472,9893,9894,9895,9896,9897,3806,9898,9899, # 9712
|
||||
9900,9901,9902,9903,9904,9905,9906,9907,9908,9909,9910,9911,9912,9913,9914,4918, # 9728
|
||||
9915,9916,9917,4919,9918,9919,9920,9921,4184,9922,9923,9924,9925,9926,9927,9928, # 9744
|
||||
9929,9930,9931,9932,9933,9934,9935,9936,9937,9938,9939,9940,9941,9942,9943,9944, # 9760
|
||||
9945,9946,4920,9947,9948,9949,9950,9951,9952,9953,9954,9955,4185,9956,9957,9958, # 9776
|
||||
9959,9960,9961,9962,9963,9964,9965,4921,9966,9967,9968,4473,9969,9970,9971,9972, # 9792
|
||||
9973,9974,9975,9976,9977,4474,9978,9979,9980,9981,9982,9983,9984,9985,9986,9987, # 9808
|
||||
9988,9989,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000,10001,10002,10003, # 9824
|
||||
10004,10005,10006,10007,10008,10009,10010,10011,10012,10013,10014,10015,10016,10017,10018,10019, # 9840
|
||||
10020,10021,4922,10022,4923,10023,10024,10025,10026,10027,10028,10029,10030,10031,10032,10033, # 9856
|
||||
10034,10035,10036,10037,10038,10039,10040,10041,10042,10043,10044,10045,10046,10047,10048,4924, # 9872
|
||||
10049,10050,10051,10052,10053,10054,10055,10056,10057,10058,10059,10060,10061,10062,10063,10064, # 9888
|
||||
10065,10066,10067,10068,10069,10070,10071,10072,10073,10074,10075,10076,10077,10078,10079,10080, # 9904
|
||||
10081,10082,10083,10084,10085,10086,10087,4475,10088,10089,10090,10091,10092,10093,10094,10095, # 9920
|
||||
10096,10097,4476,10098,10099,10100,10101,10102,10103,10104,10105,10106,10107,10108,10109,10110, # 9936
|
||||
10111,2174,10112,10113,10114,10115,10116,10117,10118,10119,10120,10121,10122,10123,10124,10125, # 9952
|
||||
10126,10127,10128,10129,10130,10131,10132,10133,10134,10135,10136,10137,10138,10139,10140,3807, # 9968
|
||||
4186,4925,10141,10142,10143,10144,10145,10146,10147,4477,4187,10148,10149,10150,10151,10152, # 9984
|
||||
10153,4188,10154,10155,10156,10157,10158,10159,10160,10161,4926,10162,10163,10164,10165,10166, #10000
|
||||
10167,10168,10169,10170,10171,10172,10173,10174,10175,10176,10177,10178,10179,10180,10181,10182, #10016
|
||||
10183,10184,10185,10186,10187,10188,10189,10190,10191,10192,3203,10193,10194,10195,10196,10197, #10032
|
||||
10198,10199,10200,4478,10201,10202,10203,10204,4479,10205,10206,10207,10208,10209,10210,10211, #10048
|
||||
10212,10213,10214,10215,10216,10217,10218,10219,10220,10221,10222,10223,10224,10225,10226,10227, #10064
|
||||
10228,10229,10230,10231,10232,10233,10234,4927,10235,10236,10237,10238,10239,10240,10241,10242, #10080
|
||||
10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258, #10096
|
||||
10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,4480, #10112
|
||||
4928,4929,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287, #10128
|
||||
10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303, #10144
|
||||
10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319, #10160
|
||||
10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,4930, #10176
|
||||
10335,10336,10337,10338,10339,10340,10341,10342,4931,10343,10344,10345,10346,10347,10348,10349, #10192
|
||||
10350,10351,10352,10353,10354,10355,3088,10356,2786,10357,10358,10359,10360,4189,10361,10362, #10208
|
||||
10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,4932,10376,10377, #10224
|
||||
10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,4933, #10240
|
||||
10393,10394,10395,4934,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407, #10256
|
||||
10408,10409,10410,10411,10412,3446,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422, #10272
|
||||
10423,4935,10424,10425,10426,10427,10428,10429,10430,4936,10431,10432,10433,10434,10435,10436, #10288
|
||||
10437,10438,10439,10440,10441,10442,10443,4937,10444,10445,10446,10447,4481,10448,10449,10450, #10304
|
||||
10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466, #10320
|
||||
10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482, #10336
|
||||
10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495,10496,10497,10498, #10352
|
||||
10499,10500,10501,10502,10503,10504,10505,4938,10506,10507,10508,10509,10510,2552,10511,10512, #10368
|
||||
10513,10514,10515,10516,3447,10517,10518,10519,10520,10521,10522,10523,10524,10525,10526,10527, #10384
|
||||
10528,10529,10530,10531,10532,10533,10534,10535,10536,10537,10538,10539,10540,10541,10542,10543, #10400
|
||||
4482,10544,4939,10545,10546,10547,10548,10549,10550,10551,10552,10553,10554,10555,10556,10557, #10416
|
||||
10558,10559,10560,10561,10562,10563,10564,10565,10566,10567,3676,4483,10568,10569,10570,10571, #10432
|
||||
10572,3448,10573,10574,10575,10576,10577,10578,10579,10580,10581,10582,10583,10584,10585,10586, #10448
|
||||
10587,10588,10589,10590,10591,10592,10593,10594,10595,10596,10597,10598,10599,10600,10601,10602, #10464
|
||||
10603,10604,10605,10606,10607,10608,10609,10610,10611,10612,10613,10614,10615,10616,10617,10618, #10480
|
||||
10619,10620,10621,10622,10623,10624,10625,10626,10627,4484,10628,10629,10630,10631,10632,4940, #10496
|
||||
10633,10634,10635,10636,10637,10638,10639,10640,10641,10642,10643,10644,10645,10646,10647,10648, #10512
|
||||
10649,10650,10651,10652,10653,10654,10655,10656,4941,10657,10658,10659,2599,10660,10661,10662, #10528
|
||||
10663,10664,10665,10666,3089,10667,10668,10669,10670,10671,10672,10673,10674,10675,10676,10677, #10544
|
||||
10678,10679,10680,4942,10681,10682,10683,10684,10685,10686,10687,10688,10689,10690,10691,10692, #10560
|
||||
10693,10694,10695,10696,10697,4485,10698,10699,10700,10701,10702,10703,10704,4943,10705,3677, #10576
|
||||
10706,10707,10708,10709,10710,10711,10712,4944,10713,10714,10715,10716,10717,10718,10719,10720, #10592
|
||||
10721,10722,10723,10724,10725,10726,10727,10728,4945,10729,10730,10731,10732,10733,10734,10735, #10608
|
||||
10736,10737,10738,10739,10740,10741,10742,10743,10744,10745,10746,10747,10748,10749,10750,10751, #10624
|
||||
10752,10753,10754,10755,10756,10757,10758,10759,10760,10761,4946,10762,10763,10764,10765,10766, #10640
|
||||
10767,4947,4948,10768,10769,10770,10771,10772,10773,10774,10775,10776,10777,10778,10779,10780, #10656
|
||||
10781,10782,10783,10784,10785,10786,10787,10788,10789,10790,10791,10792,10793,10794,10795,10796, #10672
|
||||
10797,10798,10799,10800,10801,10802,10803,10804,10805,10806,10807,10808,10809,10810,10811,10812, #10688
|
||||
10813,10814,10815,10816,10817,10818,10819,10820,10821,10822,10823,10824,10825,10826,10827,10828, #10704
|
||||
10829,10830,10831,10832,10833,10834,10835,10836,10837,10838,10839,10840,10841,10842,10843,10844, #10720
|
||||
10845,10846,10847,10848,10849,10850,10851,10852,10853,10854,10855,10856,10857,10858,10859,10860, #10736
|
||||
10861,10862,10863,10864,10865,10866,10867,10868,10869,10870,10871,10872,10873,10874,10875,10876, #10752
|
||||
10877,10878,4486,10879,10880,10881,10882,10883,10884,10885,4949,10886,10887,10888,10889,10890, #10768
|
||||
10891,10892,10893,10894,10895,10896,10897,10898,10899,10900,10901,10902,10903,10904,10905,10906, #10784
|
||||
10907,10908,10909,10910,10911,10912,10913,10914,10915,10916,10917,10918,10919,4487,10920,10921, #10800
|
||||
10922,10923,10924,10925,10926,10927,10928,10929,10930,10931,10932,4950,10933,10934,10935,10936, #10816
|
||||
10937,10938,10939,10940,10941,10942,10943,10944,10945,10946,10947,10948,10949,4488,10950,10951, #10832
|
||||
10952,10953,10954,10955,10956,10957,10958,10959,4190,10960,10961,10962,10963,10964,10965,10966, #10848
|
||||
10967,10968,10969,10970,10971,10972,10973,10974,10975,10976,10977,10978,10979,10980,10981,10982, #10864
|
||||
10983,10984,10985,10986,10987,10988,10989,10990,10991,10992,10993,10994,10995,10996,10997,10998, #10880
|
||||
10999,11000,11001,11002,11003,11004,11005,11006,3960,11007,11008,11009,11010,11011,11012,11013, #10896
|
||||
11014,11015,11016,11017,11018,11019,11020,11021,11022,11023,11024,11025,11026,11027,11028,11029, #10912
|
||||
11030,11031,11032,4951,11033,11034,11035,11036,11037,11038,11039,11040,11041,11042,11043,11044, #10928
|
||||
11045,11046,11047,4489,11048,11049,11050,11051,4952,11052,11053,11054,11055,11056,11057,11058, #10944
|
||||
4953,11059,11060,11061,11062,11063,11064,11065,11066,11067,11068,11069,11070,11071,4954,11072, #10960
|
||||
11073,11074,11075,11076,11077,11078,11079,11080,11081,11082,11083,11084,11085,11086,11087,11088, #10976
|
||||
11089,11090,11091,11092,11093,11094,11095,11096,11097,11098,11099,11100,11101,11102,11103,11104, #10992
|
||||
11105,11106,11107,11108,11109,11110,11111,11112,11113,11114,11115,3808,11116,11117,11118,11119, #11008
|
||||
11120,11121,11122,11123,11124,11125,11126,11127,11128,11129,11130,11131,11132,11133,11134,4955, #11024
|
||||
11135,11136,11137,11138,11139,11140,11141,11142,11143,11144,11145,11146,11147,11148,11149,11150, #11040
|
||||
11151,11152,11153,11154,11155,11156,11157,11158,11159,11160,11161,4956,11162,11163,11164,11165, #11056
|
||||
11166,11167,11168,11169,11170,11171,11172,11173,11174,11175,11176,11177,11178,11179,11180,4957, #11072
|
||||
11181,11182,11183,11184,11185,11186,4958,11187,11188,11189,11190,11191,11192,11193,11194,11195, #11088
|
||||
11196,11197,11198,11199,11200,3678,11201,11202,11203,11204,11205,11206,4191,11207,11208,11209, #11104
|
||||
11210,11211,11212,11213,11214,11215,11216,11217,11218,11219,11220,11221,11222,11223,11224,11225, #11120
|
||||
11226,11227,11228,11229,11230,11231,11232,11233,11234,11235,11236,11237,11238,11239,11240,11241, #11136
|
||||
11242,11243,11244,11245,11246,11247,11248,11249,11250,11251,4959,11252,11253,11254,11255,11256, #11152
|
||||
11257,11258,11259,11260,11261,11262,11263,11264,11265,11266,11267,11268,11269,11270,11271,11272, #11168
|
||||
11273,11274,11275,11276,11277,11278,11279,11280,11281,11282,11283,11284,11285,11286,11287,11288, #11184
|
||||
11289,11290,11291,11292,11293,11294,11295,11296,11297,11298,11299,11300,11301,11302,11303,11304, #11200
|
||||
11305,11306,11307,11308,11309,11310,11311,11312,11313,11314,3679,11315,11316,11317,11318,4490, #11216
|
||||
11319,11320,11321,11322,11323,11324,11325,11326,11327,11328,11329,11330,11331,11332,11333,11334, #11232
|
||||
11335,11336,11337,11338,11339,11340,11341,11342,11343,11344,11345,11346,11347,4960,11348,11349, #11248
|
||||
11350,11351,11352,11353,11354,11355,11356,11357,11358,11359,11360,11361,11362,11363,11364,11365, #11264
|
||||
11366,11367,11368,11369,11370,11371,11372,11373,11374,11375,11376,11377,3961,4961,11378,11379, #11280
|
||||
11380,11381,11382,11383,11384,11385,11386,11387,11388,11389,11390,11391,11392,11393,11394,11395, #11296
|
||||
11396,11397,4192,11398,11399,11400,11401,11402,11403,11404,11405,11406,11407,11408,11409,11410, #11312
|
||||
11411,4962,11412,11413,11414,11415,11416,11417,11418,11419,11420,11421,11422,11423,11424,11425, #11328
|
||||
11426,11427,11428,11429,11430,11431,11432,11433,11434,11435,11436,11437,11438,11439,11440,11441, #11344
|
||||
11442,11443,11444,11445,11446,11447,11448,11449,11450,11451,11452,11453,11454,11455,11456,11457, #11360
|
||||
11458,11459,11460,11461,11462,11463,11464,11465,11466,11467,11468,11469,4963,11470,11471,4491, #11376
|
||||
11472,11473,11474,11475,4964,11476,11477,11478,11479,11480,11481,11482,11483,11484,11485,11486, #11392
|
||||
11487,11488,11489,11490,11491,11492,4965,11493,11494,11495,11496,11497,11498,11499,11500,11501, #11408
|
||||
11502,11503,11504,11505,11506,11507,11508,11509,11510,11511,11512,11513,11514,11515,11516,11517, #11424
|
||||
11518,11519,11520,11521,11522,11523,11524,11525,11526,11527,11528,11529,3962,11530,11531,11532, #11440
|
||||
11533,11534,11535,11536,11537,11538,11539,11540,11541,11542,11543,11544,11545,11546,11547,11548, #11456
|
||||
11549,11550,11551,11552,11553,11554,11555,11556,11557,11558,11559,11560,11561,11562,11563,11564, #11472
|
||||
4193,4194,11565,11566,11567,11568,11569,11570,11571,11572,11573,11574,11575,11576,11577,11578, #11488
|
||||
11579,11580,11581,11582,11583,11584,11585,11586,11587,11588,11589,11590,11591,4966,4195,11592, #11504
|
||||
11593,11594,11595,11596,11597,11598,11599,11600,11601,11602,11603,11604,3090,11605,11606,11607, #11520
|
||||
11608,11609,11610,4967,11611,11612,11613,11614,11615,11616,11617,11618,11619,11620,11621,11622, #11536
|
||||
11623,11624,11625,11626,11627,11628,11629,11630,11631,11632,11633,11634,11635,11636,11637,11638, #11552
|
||||
11639,11640,11641,11642,11643,11644,11645,11646,11647,11648,11649,11650,11651,11652,11653,11654, #11568
|
||||
11655,11656,11657,11658,11659,11660,11661,11662,11663,11664,11665,11666,11667,11668,11669,11670, #11584
|
||||
11671,11672,11673,11674,4968,11675,11676,11677,11678,11679,11680,11681,11682,11683,11684,11685, #11600
|
||||
11686,11687,11688,11689,11690,11691,11692,11693,3809,11694,11695,11696,11697,11698,11699,11700, #11616
|
||||
11701,11702,11703,11704,11705,11706,11707,11708,11709,11710,11711,11712,11713,11714,11715,11716, #11632
|
||||
11717,11718,3553,11719,11720,11721,11722,11723,11724,11725,11726,11727,11728,11729,11730,4969, #11648
|
||||
11731,11732,11733,11734,11735,11736,11737,11738,11739,11740,4492,11741,11742,11743,11744,11745, #11664
|
||||
11746,11747,11748,11749,11750,11751,11752,4970,11753,11754,11755,11756,11757,11758,11759,11760, #11680
|
||||
11761,11762,11763,11764,11765,11766,11767,11768,11769,11770,11771,11772,11773,11774,11775,11776, #11696
|
||||
11777,11778,11779,11780,11781,11782,11783,11784,11785,11786,11787,11788,11789,11790,4971,11791, #11712
|
||||
11792,11793,11794,11795,11796,11797,4972,11798,11799,11800,11801,11802,11803,11804,11805,11806, #11728
|
||||
11807,11808,11809,11810,4973,11811,11812,11813,11814,11815,11816,11817,11818,11819,11820,11821, #11744
|
||||
11822,11823,11824,11825,11826,11827,11828,11829,11830,11831,11832,11833,11834,3680,3810,11835, #11760
|
||||
11836,4974,11837,11838,11839,11840,11841,11842,11843,11844,11845,11846,11847,11848,11849,11850, #11776
|
||||
11851,11852,11853,11854,11855,11856,11857,11858,11859,11860,11861,11862,11863,11864,11865,11866, #11792
|
||||
11867,11868,11869,11870,11871,11872,11873,11874,11875,11876,11877,11878,11879,11880,11881,11882, #11808
|
||||
11883,11884,4493,11885,11886,11887,11888,11889,11890,11891,11892,11893,11894,11895,11896,11897, #11824
|
||||
11898,11899,11900,11901,11902,11903,11904,11905,11906,11907,11908,11909,11910,11911,11912,11913, #11840
|
||||
11914,11915,4975,11916,11917,11918,11919,11920,11921,11922,11923,11924,11925,11926,11927,11928, #11856
|
||||
11929,11930,11931,11932,11933,11934,11935,11936,11937,11938,11939,11940,11941,11942,11943,11944, #11872
|
||||
11945,11946,11947,11948,11949,4976,11950,11951,11952,11953,11954,11955,11956,11957,11958,11959, #11888
|
||||
11960,11961,11962,11963,11964,11965,11966,11967,11968,11969,11970,11971,11972,11973,11974,11975, #11904
|
||||
11976,11977,11978,11979,11980,11981,11982,11983,11984,11985,11986,11987,4196,11988,11989,11990, #11920
|
||||
11991,11992,4977,11993,11994,11995,11996,11997,11998,11999,12000,12001,12002,12003,12004,12005, #11936
|
||||
12006,12007,12008,12009,12010,12011,12012,12013,12014,12015,12016,12017,12018,12019,12020,12021, #11952
|
||||
12022,12023,12024,12025,12026,12027,12028,12029,12030,12031,12032,12033,12034,12035,12036,12037, #11968
|
||||
12038,12039,12040,12041,12042,12043,12044,12045,12046,12047,12048,12049,12050,12051,12052,12053, #11984
|
||||
12054,12055,12056,12057,12058,12059,12060,12061,4978,12062,12063,12064,12065,12066,12067,12068, #12000
|
||||
12069,12070,12071,12072,12073,12074,12075,12076,12077,12078,12079,12080,12081,12082,12083,12084, #12016
|
||||
12085,12086,12087,12088,12089,12090,12091,12092,12093,12094,12095,12096,12097,12098,12099,12100, #12032
|
||||
12101,12102,12103,12104,12105,12106,12107,12108,12109,12110,12111,12112,12113,12114,12115,12116, #12048
|
||||
12117,12118,12119,12120,12121,12122,12123,4979,12124,12125,12126,12127,12128,4197,12129,12130, #12064
|
||||
12131,12132,12133,12134,12135,12136,12137,12138,12139,12140,12141,12142,12143,12144,12145,12146, #12080
|
||||
12147,12148,12149,12150,12151,12152,12153,12154,4980,12155,12156,12157,12158,12159,12160,4494, #12096
|
||||
12161,12162,12163,12164,3811,12165,12166,12167,12168,12169,4495,12170,12171,4496,12172,12173, #12112
|
||||
12174,12175,12176,3812,12177,12178,12179,12180,12181,12182,12183,12184,12185,12186,12187,12188, #12128
|
||||
12189,12190,12191,12192,12193,12194,12195,12196,12197,12198,12199,12200,12201,12202,12203,12204, #12144
|
||||
12205,12206,12207,12208,12209,12210,12211,12212,12213,12214,12215,12216,12217,12218,12219,12220, #12160
|
||||
12221,4981,12222,12223,12224,12225,12226,12227,12228,12229,12230,12231,12232,12233,12234,12235, #12176
|
||||
4982,12236,12237,12238,12239,12240,12241,12242,12243,12244,12245,4983,12246,12247,12248,12249, #12192
|
||||
4984,12250,12251,12252,12253,12254,12255,12256,12257,12258,12259,12260,12261,12262,12263,12264, #12208
|
||||
4985,12265,4497,12266,12267,12268,12269,12270,12271,12272,12273,12274,12275,12276,12277,12278, #12224
|
||||
12279,12280,12281,12282,12283,12284,12285,12286,12287,4986,12288,12289,12290,12291,12292,12293, #12240
|
||||
12294,12295,12296,2473,12297,12298,12299,12300,12301,12302,12303,12304,12305,12306,12307,12308, #12256
|
||||
12309,12310,12311,12312,12313,12314,12315,12316,12317,12318,12319,3963,12320,12321,12322,12323, #12272
|
||||
12324,12325,12326,12327,12328,12329,12330,12331,12332,4987,12333,12334,12335,12336,12337,12338, #12288
|
||||
12339,12340,12341,12342,12343,12344,12345,12346,12347,12348,12349,12350,12351,12352,12353,12354, #12304
|
||||
12355,12356,12357,12358,12359,3964,12360,12361,12362,12363,12364,12365,12366,12367,12368,12369, #12320
|
||||
12370,3965,12371,12372,12373,12374,12375,12376,12377,12378,12379,12380,12381,12382,12383,12384, #12336
|
||||
12385,12386,12387,12388,12389,12390,12391,12392,12393,12394,12395,12396,12397,12398,12399,12400, #12352
|
||||
12401,12402,12403,12404,12405,12406,12407,12408,4988,12409,12410,12411,12412,12413,12414,12415, #12368
|
||||
12416,12417,12418,12419,12420,12421,12422,12423,12424,12425,12426,12427,12428,12429,12430,12431, #12384
|
||||
12432,12433,12434,12435,12436,12437,12438,3554,12439,12440,12441,12442,12443,12444,12445,12446, #12400
|
||||
12447,12448,12449,12450,12451,12452,12453,12454,12455,12456,12457,12458,12459,12460,12461,12462, #12416
|
||||
12463,12464,4989,12465,12466,12467,12468,12469,12470,12471,12472,12473,12474,12475,12476,12477, #12432
|
||||
12478,12479,12480,4990,12481,12482,12483,12484,12485,12486,12487,12488,12489,4498,12490,12491, #12448
|
||||
12492,12493,12494,12495,12496,12497,12498,12499,12500,12501,12502,12503,12504,12505,12506,12507, #12464
|
||||
12508,12509,12510,12511,12512,12513,12514,12515,12516,12517,12518,12519,12520,12521,12522,12523, #12480
|
||||
12524,12525,12526,12527,12528,12529,12530,12531,12532,12533,12534,12535,12536,12537,12538,12539, #12496
|
||||
12540,12541,12542,12543,12544,12545,12546,12547,12548,12549,12550,12551,4991,12552,12553,12554, #12512
|
||||
12555,12556,12557,12558,12559,12560,12561,12562,12563,12564,12565,12566,12567,12568,12569,12570, #12528
|
||||
12571,12572,12573,12574,12575,12576,12577,12578,3036,12579,12580,12581,12582,12583,3966,12584, #12544
|
||||
12585,12586,12587,12588,12589,12590,12591,12592,12593,12594,12595,12596,12597,12598,12599,12600, #12560
|
||||
12601,12602,12603,12604,12605,12606,12607,12608,12609,12610,12611,12612,12613,12614,12615,12616, #12576
|
||||
12617,12618,12619,12620,12621,12622,12623,12624,12625,12626,12627,12628,12629,12630,12631,12632, #12592
|
||||
12633,12634,12635,12636,12637,12638,12639,12640,12641,12642,12643,12644,12645,12646,4499,12647, #12608
|
||||
12648,12649,12650,12651,12652,12653,12654,12655,12656,12657,12658,12659,12660,12661,12662,12663, #12624
|
||||
12664,12665,12666,12667,12668,12669,12670,12671,12672,12673,12674,12675,12676,12677,12678,12679, #12640
|
||||
12680,12681,12682,12683,12684,12685,12686,12687,12688,12689,12690,12691,12692,12693,12694,12695, #12656
|
||||
12696,12697,12698,4992,12699,12700,12701,12702,12703,12704,12705,12706,12707,12708,12709,12710, #12672
|
||||
12711,12712,12713,12714,12715,12716,12717,12718,12719,12720,12721,12722,12723,12724,12725,12726, #12688
|
||||
12727,12728,12729,12730,12731,12732,12733,12734,12735,12736,12737,12738,12739,12740,12741,12742, #12704
|
||||
12743,12744,12745,12746,12747,12748,12749,12750,12751,12752,12753,12754,12755,12756,12757,12758, #12720
|
||||
12759,12760,12761,12762,12763,12764,12765,12766,12767,12768,12769,12770,12771,12772,12773,12774, #12736
|
||||
12775,12776,12777,12778,4993,2175,12779,12780,12781,12782,12783,12784,12785,12786,4500,12787, #12752
|
||||
12788,12789,12790,12791,12792,12793,12794,12795,12796,12797,12798,12799,12800,12801,12802,12803, #12768
|
||||
12804,12805,12806,12807,12808,12809,12810,12811,12812,12813,12814,12815,12816,12817,12818,12819, #12784
|
||||
12820,12821,12822,12823,12824,12825,12826,4198,3967,12827,12828,12829,12830,12831,12832,12833, #12800
|
||||
12834,12835,12836,12837,12838,12839,12840,12841,12842,12843,12844,12845,12846,12847,12848,12849, #12816
|
||||
12850,12851,12852,12853,12854,12855,12856,12857,12858,12859,12860,12861,4199,12862,12863,12864, #12832
|
||||
12865,12866,12867,12868,12869,12870,12871,12872,12873,12874,12875,12876,12877,12878,12879,12880, #12848
|
||||
12881,12882,12883,12884,12885,12886,12887,4501,12888,12889,12890,12891,12892,12893,12894,12895, #12864
|
||||
12896,12897,12898,12899,12900,12901,12902,12903,12904,12905,12906,12907,12908,12909,12910,12911, #12880
|
||||
12912,4994,12913,12914,12915,12916,12917,12918,12919,12920,12921,12922,12923,12924,12925,12926, #12896
|
||||
12927,12928,12929,12930,12931,12932,12933,12934,12935,12936,12937,12938,12939,12940,12941,12942, #12912
|
||||
12943,12944,12945,12946,12947,12948,12949,12950,12951,12952,12953,12954,12955,12956,1772,12957, #12928
|
||||
12958,12959,12960,12961,12962,12963,12964,12965,12966,12967,12968,12969,12970,12971,12972,12973, #12944
|
||||
12974,12975,12976,12977,12978,12979,12980,12981,12982,12983,12984,12985,12986,12987,12988,12989, #12960
|
||||
12990,12991,12992,12993,12994,12995,12996,12997,4502,12998,4503,12999,13000,13001,13002,13003, #12976
|
||||
4504,13004,13005,13006,13007,13008,13009,13010,13011,13012,13013,13014,13015,13016,13017,13018, #12992
|
||||
13019,13020,13021,13022,13023,13024,13025,13026,13027,13028,13029,3449,13030,13031,13032,13033, #13008
|
||||
13034,13035,13036,13037,13038,13039,13040,13041,13042,13043,13044,13045,13046,13047,13048,13049, #13024
|
||||
13050,13051,13052,13053,13054,13055,13056,13057,13058,13059,13060,13061,13062,13063,13064,13065, #13040
|
||||
13066,13067,13068,13069,13070,13071,13072,13073,13074,13075,13076,13077,13078,13079,13080,13081, #13056
|
||||
13082,13083,13084,13085,13086,13087,13088,13089,13090,13091,13092,13093,13094,13095,13096,13097, #13072
|
||||
13098,13099,13100,13101,13102,13103,13104,13105,13106,13107,13108,13109,13110,13111,13112,13113, #13088
|
||||
13114,13115,13116,13117,13118,3968,13119,4995,13120,13121,13122,13123,13124,13125,13126,13127, #13104
|
||||
4505,13128,13129,13130,13131,13132,13133,13134,4996,4506,13135,13136,13137,13138,13139,4997, #13120
|
||||
13140,13141,13142,13143,13144,13145,13146,13147,13148,13149,13150,13151,13152,13153,13154,13155, #13136
|
||||
13156,13157,13158,13159,4998,13160,13161,13162,13163,13164,13165,13166,13167,13168,13169,13170, #13152
|
||||
13171,13172,13173,13174,13175,13176,4999,13177,13178,13179,13180,13181,13182,13183,13184,13185, #13168
|
||||
13186,13187,13188,13189,13190,13191,13192,13193,13194,13195,13196,13197,13198,13199,13200,13201, #13184
|
||||
13202,13203,13204,13205,13206,5000,13207,13208,13209,13210,13211,13212,13213,13214,13215,13216, #13200
|
||||
13217,13218,13219,13220,13221,13222,13223,13224,13225,13226,13227,4200,5001,13228,13229,13230, #13216
|
||||
13231,13232,13233,13234,13235,13236,13237,13238,13239,13240,3969,13241,13242,13243,13244,3970, #13232
|
||||
13245,13246,13247,13248,13249,13250,13251,13252,13253,13254,13255,13256,13257,13258,13259,13260, #13248
|
||||
13261,13262,13263,13264,13265,13266,13267,13268,3450,13269,13270,13271,13272,13273,13274,13275, #13264
|
||||
13276,5002,13277,13278,13279,13280,13281,13282,13283,13284,13285,13286,13287,13288,13289,13290, #13280
|
||||
13291,13292,13293,13294,13295,13296,13297,13298,13299,13300,13301,13302,3813,13303,13304,13305, #13296
|
||||
13306,13307,13308,13309,13310,13311,13312,13313,13314,13315,13316,13317,13318,13319,13320,13321, #13312
|
||||
13322,13323,13324,13325,13326,13327,13328,4507,13329,13330,13331,13332,13333,13334,13335,13336, #13328
|
||||
13337,13338,13339,13340,13341,5003,13342,13343,13344,13345,13346,13347,13348,13349,13350,13351, #13344
|
||||
13352,13353,13354,13355,13356,13357,13358,13359,13360,13361,13362,13363,13364,13365,13366,13367, #13360
|
||||
5004,13368,13369,13370,13371,13372,13373,13374,13375,13376,13377,13378,13379,13380,13381,13382, #13376
|
||||
13383,13384,13385,13386,13387,13388,13389,13390,13391,13392,13393,13394,13395,13396,13397,13398, #13392
|
||||
13399,13400,13401,13402,13403,13404,13405,13406,13407,13408,13409,13410,13411,13412,13413,13414, #13408
|
||||
13415,13416,13417,13418,13419,13420,13421,13422,13423,13424,13425,13426,13427,13428,13429,13430, #13424
|
||||
13431,13432,4508,13433,13434,13435,4201,13436,13437,13438,13439,13440,13441,13442,13443,13444, #13440
|
||||
13445,13446,13447,13448,13449,13450,13451,13452,13453,13454,13455,13456,13457,5005,13458,13459, #13456
|
||||
13460,13461,13462,13463,13464,13465,13466,13467,13468,13469,13470,4509,13471,13472,13473,13474, #13472
|
||||
13475,13476,13477,13478,13479,13480,13481,13482,13483,13484,13485,13486,13487,13488,13489,13490, #13488
|
||||
13491,13492,13493,13494,13495,13496,13497,13498,13499,13500,13501,13502,13503,13504,13505,13506, #13504
|
||||
13507,13508,13509,13510,13511,13512,13513,13514,13515,13516,13517,13518,13519,13520,13521,13522, #13520
|
||||
13523,13524,13525,13526,13527,13528,13529,13530,13531,13532,13533,13534,13535,13536,13537,13538, #13536
|
||||
13539,13540,13541,13542,13543,13544,13545,13546,13547,13548,13549,13550,13551,13552,13553,13554, #13552
|
||||
13555,13556,13557,13558,13559,13560,13561,13562,13563,13564,13565,13566,13567,13568,13569,13570, #13568
|
||||
13571,13572,13573,13574,13575,13576,13577,13578,13579,13580,13581,13582,13583,13584,13585,13586, #13584
|
||||
13587,13588,13589,13590,13591,13592,13593,13594,13595,13596,13597,13598,13599,13600,13601,13602, #13600
|
||||
13603,13604,13605,13606,13607,13608,13609,13610,13611,13612,13613,13614,13615,13616,13617,13618, #13616
|
||||
13619,13620,13621,13622,13623,13624,13625,13626,13627,13628,13629,13630,13631,13632,13633,13634, #13632
|
||||
13635,13636,13637,13638,13639,13640,13641,13642,5006,13643,13644,13645,13646,13647,13648,13649, #13648
|
||||
13650,13651,5007,13652,13653,13654,13655,13656,13657,13658,13659,13660,13661,13662,13663,13664, #13664
|
||||
13665,13666,13667,13668,13669,13670,13671,13672,13673,13674,13675,13676,13677,13678,13679,13680, #13680
|
||||
13681,13682,13683,13684,13685,13686,13687,13688,13689,13690,13691,13692,13693,13694,13695,13696, #13696
|
||||
13697,13698,13699,13700,13701,13702,13703,13704,13705,13706,13707,13708,13709,13710,13711,13712, #13712
|
||||
13713,13714,13715,13716,13717,13718,13719,13720,13721,13722,13723,13724,13725,13726,13727,13728, #13728
|
||||
13729,13730,13731,13732,13733,13734,13735,13736,13737,13738,13739,13740,13741,13742,13743,13744, #13744
|
||||
13745,13746,13747,13748,13749,13750,13751,13752,13753,13754,13755,13756,13757,13758,13759,13760, #13760
|
||||
13761,13762,13763,13764,13765,13766,13767,13768,13769,13770,13771,13772,13773,13774,3273,13775, #13776
|
||||
13776,13777,13778,13779,13780,13781,13782,13783,13784,13785,13786,13787,13788,13789,13790,13791, #13792
|
||||
13792,13793,13794,13795,13796,13797,13798,13799,13800,13801,13802,13803,13804,13805,13806,13807, #13808
|
||||
13808,13809,13810,13811,13812,13813,13814,13815,13816,13817,13818,13819,13820,13821,13822,13823, #13824
|
||||
13824,13825,13826,13827,13828,13829,13830,13831,13832,13833,13834,13835,13836,13837,13838,13839, #13840
|
||||
13840,13841,13842,13843,13844,13845,13846,13847,13848,13849,13850,13851,13852,13853,13854,13855, #13856
|
||||
13856,13857,13858,13859,13860,13861,13862,13863,13864,13865,13866,13867,13868,13869,13870,13871, #13872
|
||||
13872,13873,13874,13875,13876,13877,13878,13879,13880,13881,13882,13883,13884,13885,13886,13887, #13888
|
||||
13888,13889,13890,13891,13892,13893,13894,13895,13896,13897,13898,13899,13900,13901,13902,13903, #13904
|
||||
13904,13905,13906,13907,13908,13909,13910,13911,13912,13913,13914,13915,13916,13917,13918,13919, #13920
|
||||
13920,13921,13922,13923,13924,13925,13926,13927,13928,13929,13930,13931,13932,13933,13934,13935, #13936
|
||||
13936,13937,13938,13939,13940,13941,13942,13943,13944,13945,13946,13947,13948,13949,13950,13951, #13952
|
||||
13952,13953,13954,13955,13956,13957,13958,13959,13960,13961,13962,13963,13964,13965,13966,13967, #13968
|
||||
13968,13969,13970,13971,13972) #13973
|
||||
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376
|
||||
)
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -28,15 +28,20 @@
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import Big5DistributionAnalysis
|
||||
from .mbcssm import Big5SMModel
|
||||
from .mbcssm import BIG5_SM_MODEL
|
||||
|
||||
|
||||
class Big5Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
MultiByteCharSetProber.__init__(self)
|
||||
self._mCodingSM = CodingStateMachine(Big5SMModel)
|
||||
self._mDistributionAnalyzer = Big5DistributionAnalysis()
|
||||
super(Big5Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
|
||||
self.distribution_analyzer = Big5DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
def get_charset_name(self):
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "Big5"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Chinese"
|
||||
|
||||
@@ -25,82 +25,84 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
|
||||
from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
|
||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
|
||||
from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
|
||||
EUCKR_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
|
||||
from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
|
||||
GB2312_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
|
||||
from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
|
||||
BIG5_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
|
||||
from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
|
||||
JIS_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .compat import wrap_ord
|
||||
|
||||
ENOUGH_DATA_THRESHOLD = 1024
|
||||
SURE_YES = 0.99
|
||||
SURE_NO = 0.01
|
||||
MINIMUM_DATA_THRESHOLD = 3
|
||||
|
||||
|
||||
class CharDistributionAnalysis:
|
||||
class CharDistributionAnalysis(object):
|
||||
ENOUGH_DATA_THRESHOLD = 1024
|
||||
SURE_YES = 0.99
|
||||
SURE_NO = 0.01
|
||||
MINIMUM_DATA_THRESHOLD = 3
|
||||
|
||||
def __init__(self):
|
||||
# Mapping table to get frequency order from char order (get from
|
||||
# GetOrder())
|
||||
self._mCharToFreqOrder = None
|
||||
self._mTableSize = None # Size of above table
|
||||
self._char_to_freq_order = None
|
||||
self._table_size = None # Size of above table
|
||||
# This is a constant value which varies from language to language,
|
||||
# used in calculating confidence. See
|
||||
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
|
||||
# for further detail.
|
||||
self._mTypicalDistributionRatio = None
|
||||
self.typical_distribution_ratio = None
|
||||
self._done = None
|
||||
self._total_chars = None
|
||||
self._freq_chars = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""reset analyser, clear any state"""
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
self._mDone = False
|
||||
self._mTotalChars = 0 # Total characters encountered
|
||||
self._done = False
|
||||
self._total_chars = 0 # Total characters encountered
|
||||
# The number of characters whose frequency order is less than 512
|
||||
self._mFreqChars = 0
|
||||
self._freq_chars = 0
|
||||
|
||||
def feed(self, aBuf, aCharLen):
|
||||
def feed(self, char, char_len):
|
||||
"""feed a character with known length"""
|
||||
if aCharLen == 2:
|
||||
if char_len == 2:
|
||||
# we only care about 2-bytes character in our distribution analysis
|
||||
order = self.get_order(aBuf)
|
||||
order = self.get_order(char)
|
||||
else:
|
||||
order = -1
|
||||
if order >= 0:
|
||||
self._mTotalChars += 1
|
||||
self._total_chars += 1
|
||||
# order is valid
|
||||
if order < self._mTableSize:
|
||||
if 512 > self._mCharToFreqOrder[order]:
|
||||
self._mFreqChars += 1
|
||||
if order < self._table_size:
|
||||
if 512 > self._char_to_freq_order[order]:
|
||||
self._freq_chars += 1
|
||||
|
||||
def get_confidence(self):
|
||||
"""return confidence based on existing data"""
|
||||
# if we didn't receive any character in our consideration range,
|
||||
# return negative answer
|
||||
if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:
|
||||
return SURE_NO
|
||||
if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
|
||||
return self.SURE_NO
|
||||
|
||||
if self._mTotalChars != self._mFreqChars:
|
||||
r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
|
||||
* self._mTypicalDistributionRatio))
|
||||
if r < SURE_YES:
|
||||
if self._total_chars != self._freq_chars:
|
||||
r = (self._freq_chars / ((self._total_chars - self._freq_chars)
|
||||
* self.typical_distribution_ratio))
|
||||
if r < self.SURE_YES:
|
||||
return r
|
||||
|
||||
# normalize confidence (we don't want to be 100% sure)
|
||||
return SURE_YES
|
||||
return self.SURE_YES
|
||||
|
||||
def got_enough_data(self):
|
||||
# It is not necessary to receive all data to draw conclusion.
|
||||
# For charset detection, certain amount of data is enough
|
||||
return self._mTotalChars > ENOUGH_DATA_THRESHOLD
|
||||
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
|
||||
|
||||
def get_order(self, aBuf):
|
||||
def get_order(self, byte_str):
|
||||
# We do not handle characters based on the original encoding string,
|
||||
# but convert this encoding string to a number, here called order.
|
||||
# This allows multiple encodings of a language to share one frequency
|
||||
@@ -110,55 +112,55 @@ class CharDistributionAnalysis:
|
||||
|
||||
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
CharDistributionAnalysis.__init__(self)
|
||||
self._mCharToFreqOrder = EUCTWCharToFreqOrder
|
||||
self._mTableSize = EUCTW_TABLE_SIZE
|
||||
self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
|
||||
super(EUCTWDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCTW_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, aBuf):
|
||||
def get_order(self, byte_str):
|
||||
# for euc-TW encoding, we are interested
|
||||
# first byte range: 0xc4 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char = wrap_ord(aBuf[0])
|
||||
first_char = byte_str[0]
|
||||
if first_char >= 0xC4:
|
||||
return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1
|
||||
return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
CharDistributionAnalysis.__init__(self)
|
||||
self._mCharToFreqOrder = EUCKRCharToFreqOrder
|
||||
self._mTableSize = EUCKR_TABLE_SIZE
|
||||
self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
|
||||
super(EUCKRDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCKR_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, aBuf):
|
||||
def get_order(self, byte_str):
|
||||
# for euc-KR encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char = wrap_ord(aBuf[0])
|
||||
first_char = byte_str[0]
|
||||
if first_char >= 0xB0:
|
||||
return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1
|
||||
return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class GB2312DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
CharDistributionAnalysis.__init__(self)
|
||||
self._mCharToFreqOrder = GB2312CharToFreqOrder
|
||||
self._mTableSize = GB2312_TABLE_SIZE
|
||||
self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
|
||||
super(GB2312DistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = GB2312_TABLE_SIZE
|
||||
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, aBuf):
|
||||
def get_order(self, byte_str):
|
||||
# for GB2312 encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if (first_char >= 0xB0) and (second_char >= 0xA1):
|
||||
return 94 * (first_char - 0xB0) + second_char - 0xA1
|
||||
else:
|
||||
@@ -167,17 +169,17 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis):
|
||||
|
||||
class Big5DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
CharDistributionAnalysis.__init__(self)
|
||||
self._mCharToFreqOrder = Big5CharToFreqOrder
|
||||
self._mTableSize = BIG5_TABLE_SIZE
|
||||
self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
|
||||
super(Big5DistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = BIG5_TABLE_SIZE
|
||||
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, aBuf):
|
||||
def get_order(self, byte_str):
|
||||
# for big5 encoding, we are interested
|
||||
# first byte range: 0xa4 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if first_char >= 0xA4:
|
||||
if second_char >= 0xA1:
|
||||
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
|
||||
@@ -189,17 +191,17 @@ class Big5DistributionAnalysis(CharDistributionAnalysis):
|
||||
|
||||
class SJISDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
CharDistributionAnalysis.__init__(self)
|
||||
self._mCharToFreqOrder = JISCharToFreqOrder
|
||||
self._mTableSize = JIS_TABLE_SIZE
|
||||
self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
super(SJISDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, aBuf):
|
||||
def get_order(self, byte_str):
|
||||
# for sjis encoding, we are interested
|
||||
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if (first_char >= 0x81) and (first_char <= 0x9F):
|
||||
order = 188 * (first_char - 0x81)
|
||||
elif (first_char >= 0xE0) and (first_char <= 0xEF):
|
||||
@@ -214,18 +216,18 @@ class SJISDistributionAnalysis(CharDistributionAnalysis):
|
||||
|
||||
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
CharDistributionAnalysis.__init__(self)
|
||||
self._mCharToFreqOrder = JISCharToFreqOrder
|
||||
self._mTableSize = JIS_TABLE_SIZE
|
||||
self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
super(EUCJPDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, aBuf):
|
||||
def get_order(self, byte_str):
|
||||
# for euc-JP encoding, we are interested
|
||||
# first byte range: 0xa0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
char = wrap_ord(aBuf[0])
|
||||
char = byte_str[0]
|
||||
if char >= 0xA0:
|
||||
return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1
|
||||
return 94 * (char - 0xA1) + byte_str[1] - 0xa1
|
||||
else:
|
||||
return -1
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
@@ -13,94 +13,94 @@
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from . import constants
|
||||
import sys
|
||||
from .enums import ProbingState
|
||||
from .charsetprober import CharSetProber
|
||||
|
||||
|
||||
class CharSetGroupProber(CharSetProber):
|
||||
def __init__(self):
|
||||
CharSetProber.__init__(self)
|
||||
self._mActiveNum = 0
|
||||
self._mProbers = []
|
||||
self._mBestGuessProber = None
|
||||
def __init__(self, lang_filter=None):
|
||||
super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
|
||||
self._active_num = 0
|
||||
self.probers = []
|
||||
self._best_guess_prober = None
|
||||
|
||||
def reset(self):
|
||||
CharSetProber.reset(self)
|
||||
self._mActiveNum = 0
|
||||
for prober in self._mProbers:
|
||||
super(CharSetGroupProber, self).reset()
|
||||
self._active_num = 0
|
||||
for prober in self.probers:
|
||||
if prober:
|
||||
prober.reset()
|
||||
prober.active = True
|
||||
self._mActiveNum += 1
|
||||
self._mBestGuessProber = None
|
||||
self._active_num += 1
|
||||
self._best_guess_prober = None
|
||||
|
||||
def get_charset_name(self):
|
||||
if not self._mBestGuessProber:
|
||||
@property
|
||||
def charset_name(self):
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._mBestGuessProber:
|
||||
if not self._best_guess_prober:
|
||||
return None
|
||||
# self._mBestGuessProber = self._mProbers[0]
|
||||
return self._mBestGuessProber.get_charset_name()
|
||||
return self._best_guess_prober.charset_name
|
||||
|
||||
def feed(self, aBuf):
|
||||
for prober in self._mProbers:
|
||||
@property
|
||||
def language(self):
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._best_guess_prober:
|
||||
return None
|
||||
return self._best_guess_prober.language
|
||||
|
||||
def feed(self, byte_str):
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
continue
|
||||
st = prober.feed(aBuf)
|
||||
if not st:
|
||||
state = prober.feed(byte_str)
|
||||
if not state:
|
||||
continue
|
||||
if st == constants.eFoundIt:
|
||||
self._mBestGuessProber = prober
|
||||
return self.get_state()
|
||||
elif st == constants.eNotMe:
|
||||
if state == ProbingState.FOUND_IT:
|
||||
self._best_guess_prober = prober
|
||||
return self.state
|
||||
elif state == ProbingState.NOT_ME:
|
||||
prober.active = False
|
||||
self._mActiveNum -= 1
|
||||
if self._mActiveNum <= 0:
|
||||
self._mState = constants.eNotMe
|
||||
return self.get_state()
|
||||
return self.get_state()
|
||||
self._active_num -= 1
|
||||
if self._active_num <= 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
return self.state
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
st = self.get_state()
|
||||
if st == constants.eFoundIt:
|
||||
state = self.state
|
||||
if state == ProbingState.FOUND_IT:
|
||||
return 0.99
|
||||
elif st == constants.eNotMe:
|
||||
elif state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
bestConf = 0.0
|
||||
self._mBestGuessProber = None
|
||||
for prober in self._mProbers:
|
||||
best_conf = 0.0
|
||||
self._best_guess_prober = None
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
if constants._debug:
|
||||
sys.stderr.write(prober.get_charset_name()
|
||||
+ ' not active\n')
|
||||
self.logger.debug('%s not active', prober.charset_name)
|
||||
continue
|
||||
cf = prober.get_confidence()
|
||||
if constants._debug:
|
||||
sys.stderr.write('%s confidence = %s\n' %
|
||||
(prober.get_charset_name(), cf))
|
||||
if bestConf < cf:
|
||||
bestConf = cf
|
||||
self._mBestGuessProber = prober
|
||||
if not self._mBestGuessProber:
|
||||
conf = prober.get_confidence()
|
||||
self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
|
||||
if best_conf < conf:
|
||||
best_conf = conf
|
||||
self._best_guess_prober = prober
|
||||
if not self._best_guess_prober:
|
||||
return 0.0
|
||||
return bestConf
|
||||
# else:
|
||||
# self._mBestGuessProber = self._mProbers[0]
|
||||
# return self._mBestGuessProber.get_confidence()
|
||||
return best_conf
|
||||
|
||||
@@ -26,37 +26,120 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from . import constants
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .enums import ProbingState
|
||||
|
||||
class CharSetProber:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class CharSetProber(object):
|
||||
|
||||
SHORTCUT_THRESHOLD = 0.95
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
self._state = None
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def reset(self):
|
||||
self._mState = constants.eDetecting
|
||||
self._state = ProbingState.DETECTING
|
||||
|
||||
def get_charset_name(self):
|
||||
@property
|
||||
def charset_name(self):
|
||||
return None
|
||||
|
||||
def feed(self, aBuf):
|
||||
def feed(self, buf):
|
||||
pass
|
||||
|
||||
def get_state(self):
|
||||
return self._mState
|
||||
@property
|
||||
def state(self):
|
||||
return self._state
|
||||
|
||||
def get_confidence(self):
|
||||
return 0.0
|
||||
|
||||
def filter_high_bit_only(self, aBuf):
|
||||
aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
|
||||
return aBuf
|
||||
@staticmethod
|
||||
def filter_high_byte_only(buf):
|
||||
buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
|
||||
return buf
|
||||
|
||||
def filter_without_english_letters(self, aBuf):
|
||||
aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
|
||||
return aBuf
|
||||
@staticmethod
|
||||
def filter_international_words(buf):
|
||||
"""
|
||||
We define three types of bytes:
|
||||
alphabet: english alphabets [a-zA-Z]
|
||||
international: international characters [\x80-\xFF]
|
||||
marker: everything else [^a-zA-Z\x80-\xFF]
|
||||
|
||||
def filter_with_english_letters(self, aBuf):
|
||||
# TODO
|
||||
return aBuf
|
||||
The input buffer can be thought to contain a series of words delimited
|
||||
by markers. This function works to filter all words that contain at
|
||||
least one international character. All contiguous sequences of markers
|
||||
are replaced by a single space ascii character.
|
||||
|
||||
This filter applies to all scripts which do not use English characters.
|
||||
"""
|
||||
filtered = bytearray()
|
||||
|
||||
# This regex expression filters out only words that have at-least one
|
||||
# international character. The word may include one marker character at
|
||||
# the end.
|
||||
words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
|
||||
buf)
|
||||
|
||||
for word in words:
|
||||
filtered.extend(word[:-1])
|
||||
|
||||
# If the last character in the word is a marker, replace it with a
|
||||
# space as markers shouldn't affect our analysis (they are used
|
||||
# similarly across all languages and may thus have similar
|
||||
# frequencies).
|
||||
last_char = word[-1:]
|
||||
if not last_char.isalpha() and last_char < b'\x80':
|
||||
last_char = b' '
|
||||
filtered.extend(last_char)
|
||||
|
||||
return filtered
|
||||
|
||||
@staticmethod
|
||||
def filter_with_english_letters(buf):
|
||||
"""
|
||||
Returns a copy of ``buf`` that retains only the sequences of English
|
||||
alphabet and high byte characters that are not between <> characters.
|
||||
Also retains English alphabet and high byte characters immediately
|
||||
before occurrences of >.
|
||||
|
||||
This filter can be applied to all scripts which contain both English
|
||||
characters and extended ASCII characters, but is currently only used by
|
||||
``Latin1Prober``.
|
||||
"""
|
||||
filtered = bytearray()
|
||||
in_tag = False
|
||||
prev = 0
|
||||
|
||||
for curr in range(len(buf)):
|
||||
# Slice here to get bytes instead of an int with Python 3
|
||||
buf_char = buf[curr:curr + 1]
|
||||
# Check if we're coming out of or entering an HTML tag
|
||||
if buf_char == b'>':
|
||||
in_tag = False
|
||||
elif buf_char == b'<':
|
||||
in_tag = True
|
||||
|
||||
# If current character is not extended-ASCII and not alphabetic...
|
||||
if buf_char < b'\x80' and not buf_char.isalpha():
|
||||
# ...and we're not in a tag
|
||||
if curr > prev and not in_tag:
|
||||
# Keep everything after last non-extended-ASCII,
|
||||
# non-alphabetic character
|
||||
filtered.extend(buf[prev:curr])
|
||||
# Output a space to delimit stretch we kept
|
||||
filtered.extend(b' ')
|
||||
prev = curr + 1
|
||||
|
||||
# If we're not in a tag...
|
||||
if not in_tag:
|
||||
# Keep everything after last non-extended-ASCII, non-alphabetic
|
||||
# character
|
||||
filtered.extend(buf[prev:])
|
||||
|
||||
return filtered
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
+13
-8
@@ -17,9 +17,9 @@ from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from io import open
|
||||
|
||||
from chardet import __version__
|
||||
from chardet.compat import PY2
|
||||
from chardet.universaldetector import UniversalDetector
|
||||
|
||||
|
||||
@@ -35,9 +35,15 @@ def description_of(lines, name='stdin'):
|
||||
"""
|
||||
u = UniversalDetector()
|
||||
for line in lines:
|
||||
line = bytearray(line)
|
||||
u.feed(line)
|
||||
# shortcut out of the loop to save reading further - particularly useful if we read a BOM.
|
||||
if u.done:
|
||||
break
|
||||
u.close()
|
||||
result = u.result
|
||||
if PY2:
|
||||
name = name.decode(sys.getfilesystemencoding(), 'ignore')
|
||||
if result['encoding']:
|
||||
return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
|
||||
result['confidence'])
|
||||
@@ -46,23 +52,22 @@ def description_of(lines, name='stdin'):
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
'''
|
||||
"""
|
||||
Handles command line arguments and gets things started.
|
||||
|
||||
:param argv: List of arguments, as if specified on the command-line.
|
||||
If None, ``sys.argv[1:]`` is used instead.
|
||||
:type argv: list of str
|
||||
'''
|
||||
"""
|
||||
# Get command line arguments
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Takes one or more file paths and reports their detected \
|
||||
encodings",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
conflict_handler='resolve')
|
||||
encodings")
|
||||
parser.add_argument('input',
|
||||
help='File whose encoding we would like to determine.',
|
||||
help='File whose encoding we would like to determine. \
|
||||
(default: stdin)',
|
||||
type=argparse.FileType('rb'), nargs='*',
|
||||
default=[sys.stdin])
|
||||
default=[sys.stdin if PY2 else sys.stdin.buffer])
|
||||
parser.add_argument('--version', action='version',
|
||||
version='%(prog)s {0}'.format(__version__))
|
||||
args = parser.parse_args(argv)
|
||||
@@ -25,37 +25,64 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .constants import eStart
|
||||
from .compat import wrap_ord
|
||||
import logging
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
|
||||
class CodingStateMachine:
|
||||
class CodingStateMachine(object):
|
||||
"""
|
||||
A state machine to verify a byte sequence for a particular encoding. For
|
||||
each byte the detector receives, it will feed that byte to every active
|
||||
state machine available, one byte at a time. The state machine changes its
|
||||
state based on its previous state and the byte it receives. There are 3
|
||||
states in a state machine that are of interest to an auto-detector:
|
||||
|
||||
START state: This is the state to start with, or a legal byte sequence
|
||||
(i.e. a valid code point) for character has been identified.
|
||||
|
||||
ME state: This indicates that the state machine identified a byte sequence
|
||||
that is specific to the charset it is designed for and that
|
||||
there is no other possible encoding which can contain this byte
|
||||
sequence. This will to lead to an immediate positive answer for
|
||||
the detector.
|
||||
|
||||
ERROR state: This indicates the state machine identified an illegal byte
|
||||
sequence for that encoding. This will lead to an immediate
|
||||
negative answer for this encoding. Detector will exclude this
|
||||
encoding from consideration from here on.
|
||||
"""
|
||||
def __init__(self, sm):
|
||||
self._mModel = sm
|
||||
self._mCurrentBytePos = 0
|
||||
self._mCurrentCharLen = 0
|
||||
self._model = sm
|
||||
self._curr_byte_pos = 0
|
||||
self._curr_char_len = 0
|
||||
self._curr_state = None
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._mCurrentState = eStart
|
||||
self._curr_state = MachineState.START
|
||||
|
||||
def next_state(self, c):
|
||||
# for each byte we get its class
|
||||
# if it is first byte, we also get byte length
|
||||
# PY3K: aBuf is a byte stream, so c is an int, not a byte
|
||||
byteCls = self._mModel['classTable'][wrap_ord(c)]
|
||||
if self._mCurrentState == eStart:
|
||||
self._mCurrentBytePos = 0
|
||||
self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
|
||||
# from byte's class and stateTable, we get its next state
|
||||
curr_state = (self._mCurrentState * self._mModel['classFactor']
|
||||
+ byteCls)
|
||||
self._mCurrentState = self._mModel['stateTable'][curr_state]
|
||||
self._mCurrentBytePos += 1
|
||||
return self._mCurrentState
|
||||
byte_class = self._model['class_table'][c]
|
||||
if self._curr_state == MachineState.START:
|
||||
self._curr_byte_pos = 0
|
||||
self._curr_char_len = self._model['char_len_table'][byte_class]
|
||||
# from byte's class and state_table, we get its next state
|
||||
curr_state = (self._curr_state * self._model['class_factor']
|
||||
+ byte_class)
|
||||
self._curr_state = self._model['state_table'][curr_state]
|
||||
self._curr_byte_pos += 1
|
||||
return self._curr_state
|
||||
|
||||
def get_current_charlen(self):
|
||||
return self._mCurrentCharLen
|
||||
return self._curr_char_len
|
||||
|
||||
def get_coding_state_machine(self):
|
||||
return self._mModel['name']
|
||||
return self._model['name']
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._model['language']
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# Contributor(s):
|
||||
# Ian Cordasco - port to Python
|
||||
# Dan Blanchard
|
||||
# Ian Cordasco
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
@@ -22,13 +23,12 @@ import sys
|
||||
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
PY2 = True
|
||||
PY3 = False
|
||||
base_str = (str, unicode)
|
||||
text_type = unicode
|
||||
else:
|
||||
PY2 = False
|
||||
PY3 = True
|
||||
base_str = (bytes, str)
|
||||
|
||||
|
||||
def wrap_ord(a):
|
||||
if sys.version_info < (3, 0) and isinstance(a, base_str):
|
||||
return ord(a)
|
||||
else:
|
||||
return a
|
||||
text_type = str
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
_debug = 0
|
||||
|
||||
eDetecting = 0
|
||||
eFoundIt = 1
|
||||
eNotMe = 2
|
||||
|
||||
eStart = 0
|
||||
eError = 1
|
||||
eItsMe = 2
|
||||
|
||||
SHORTCUT_THRESHOLD = 0.95
|
||||
@@ -25,20 +25,25 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCKRDistributionAnalysis
|
||||
from .mbcssm import CP949SMModel
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .mbcssm import CP949_SM_MODEL
|
||||
|
||||
|
||||
class CP949Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
MultiByteCharSetProber.__init__(self)
|
||||
self._mCodingSM = CodingStateMachine(CP949SMModel)
|
||||
super(CP949Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
|
||||
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
|
||||
# not different.
|
||||
self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
|
||||
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
def get_charset_name(self):
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "CP949"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Korean"
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
All of the Enums that are used throughout the chardet package.
|
||||
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
|
||||
class InputState(object):
|
||||
"""
|
||||
This enum represents the different states a universal detector can be in.
|
||||
"""
|
||||
PURE_ASCII = 0
|
||||
ESC_ASCII = 1
|
||||
HIGH_BYTE = 2
|
||||
|
||||
|
||||
class LanguageFilter(object):
|
||||
"""
|
||||
This enum represents the different language filters we can apply to a
|
||||
``UniversalDetector``.
|
||||
"""
|
||||
CHINESE_SIMPLIFIED = 0x01
|
||||
CHINESE_TRADITIONAL = 0x02
|
||||
JAPANESE = 0x04
|
||||
KOREAN = 0x08
|
||||
NON_CJK = 0x10
|
||||
ALL = 0x1F
|
||||
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
|
||||
CJK = CHINESE | JAPANESE | KOREAN
|
||||
|
||||
|
||||
class ProbingState(object):
|
||||
"""
|
||||
This enum represents the different states a prober can be in.
|
||||
"""
|
||||
DETECTING = 0
|
||||
FOUND_IT = 1
|
||||
NOT_ME = 2
|
||||
|
||||
|
||||
class MachineState(object):
|
||||
"""
|
||||
This enum represents the different states a state machine can be in.
|
||||
"""
|
||||
START = 0
|
||||
ERROR = 1
|
||||
ITS_ME = 2
|
||||
|
||||
|
||||
class SequenceLikelihood(object):
|
||||
"""
|
||||
This enum represents the likelihood of a character following the previous one.
|
||||
"""
|
||||
NEGATIVE = 0
|
||||
UNLIKELY = 1
|
||||
LIKELY = 2
|
||||
POSITIVE = 3
|
||||
|
||||
@classmethod
|
||||
def get_num_categories(cls):
|
||||
""":returns: The number of likelihood categories in the enum."""
|
||||
return 4
|
||||
|
||||
|
||||
class CharacterCategory(object):
|
||||
"""
|
||||
This enum represents the different categories language models for
|
||||
``SingleByteCharsetProber`` put characters into.
|
||||
|
||||
Anything less than CONTROL is considered a letter.
|
||||
"""
|
||||
UNDEFINED = 255
|
||||
LINE_BREAK = 254
|
||||
SYMBOL = 253
|
||||
DIGIT = 252
|
||||
CONTROL = 251
|
||||
@@ -25,62 +25,77 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from . import constants
|
||||
from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
|
||||
ISO2022KRSMModel)
|
||||
from .charsetprober import CharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .compat import wrap_ord
|
||||
from .enums import LanguageFilter, ProbingState, MachineState
|
||||
from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
|
||||
ISO2022KR_SM_MODEL)
|
||||
|
||||
|
||||
class EscCharSetProber(CharSetProber):
|
||||
def __init__(self):
|
||||
CharSetProber.__init__(self)
|
||||
self._mCodingSM = [
|
||||
CodingStateMachine(HZSMModel),
|
||||
CodingStateMachine(ISO2022CNSMModel),
|
||||
CodingStateMachine(ISO2022JPSMModel),
|
||||
CodingStateMachine(ISO2022KRSMModel)
|
||||
]
|
||||
"""
|
||||
This CharSetProber uses a "code scheme" approach for detecting encodings,
|
||||
whereby easily recognizable escape or shift sequences are relied on to
|
||||
identify these encodings.
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||
self.coding_sm = []
|
||||
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
|
||||
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
|
||||
if self.lang_filter & LanguageFilter.JAPANESE:
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
|
||||
if self.lang_filter & LanguageFilter.KOREAN:
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
|
||||
self.active_sm_count = None
|
||||
self._detected_charset = None
|
||||
self._detected_language = None
|
||||
self._state = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
CharSetProber.reset(self)
|
||||
for codingSM in self._mCodingSM:
|
||||
if not codingSM:
|
||||
super(EscCharSetProber, self).reset()
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm:
|
||||
continue
|
||||
codingSM.active = True
|
||||
codingSM.reset()
|
||||
self._mActiveSM = len(self._mCodingSM)
|
||||
self._mDetectedCharset = None
|
||||
coding_sm.active = True
|
||||
coding_sm.reset()
|
||||
self.active_sm_count = len(self.coding_sm)
|
||||
self._detected_charset = None
|
||||
self._detected_language = None
|
||||
|
||||
def get_charset_name(self):
|
||||
return self._mDetectedCharset
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self._detected_charset
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._detected_language
|
||||
|
||||
def get_confidence(self):
|
||||
if self._mDetectedCharset:
|
||||
if self._detected_charset:
|
||||
return 0.99
|
||||
else:
|
||||
return 0.00
|
||||
|
||||
def feed(self, aBuf):
|
||||
for c in aBuf:
|
||||
# PY3K: aBuf is a byte array, so c is an int, not a byte
|
||||
for codingSM in self._mCodingSM:
|
||||
if not codingSM:
|
||||
def feed(self, byte_str):
|
||||
for c in byte_str:
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm or not coding_sm.active:
|
||||
continue
|
||||
if not codingSM.active:
|
||||
continue
|
||||
codingState = codingSM.next_state(wrap_ord(c))
|
||||
if codingState == constants.eError:
|
||||
codingSM.active = False
|
||||
self._mActiveSM -= 1
|
||||
if self._mActiveSM <= 0:
|
||||
self._mState = constants.eNotMe
|
||||
return self.get_state()
|
||||
elif codingState == constants.eItsMe:
|
||||
self._mState = constants.eFoundIt
|
||||
self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8
|
||||
return self.get_state()
|
||||
coding_state = coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
coding_sm.active = False
|
||||
self.active_sm_count -= 1
|
||||
if self.active_sm_count <= 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
return self.state
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
self._detected_charset = coding_sm.get_coding_state_machine()
|
||||
self._detected_language = coding_sm.language
|
||||
return self.state
|
||||
|
||||
return self.get_state()
|
||||
return self.state
|
||||
|
||||
@@ -25,9 +25,9 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .constants import eStart, eError, eItsMe
|
||||
from .enums import MachineState
|
||||
|
||||
HZ_cls = (
|
||||
HZ_CLS = (
|
||||
1,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
@@ -62,24 +62,25 @@ HZ_cls = (
|
||||
1,1,1,1,1,1,1,1, # f8 - ff
|
||||
)
|
||||
|
||||
HZ_st = (
|
||||
eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07
|
||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
|
||||
eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17
|
||||
5,eError, 6,eError, 5, 5, 4,eError,# 18-1f
|
||||
4,eError, 4, 4, 4,eError, 4,eError,# 20-27
|
||||
4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f
|
||||
HZ_ST = (
|
||||
MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17
|
||||
5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f
|
||||
4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27
|
||||
4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f
|
||||
)
|
||||
|
||||
HZCharLenTable = (0, 0, 0, 0, 0, 0)
|
||||
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
HZSMModel = {'classTable': HZ_cls,
|
||||
'classFactor': 6,
|
||||
'stateTable': HZ_st,
|
||||
'charLenTable': HZCharLenTable,
|
||||
'name': "HZ-GB-2312"}
|
||||
HZ_SM_MODEL = {'class_table': HZ_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': HZ_ST,
|
||||
'char_len_table': HZ_CHAR_LEN_TABLE,
|
||||
'name': "HZ-GB-2312",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022CN_cls = (
|
||||
ISO2022CN_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
@@ -114,26 +115,27 @@ ISO2022CN_cls = (
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022CN_st = (
|
||||
eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
|
||||
eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f
|
||||
eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
|
||||
eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f
|
||||
eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27
|
||||
5, 6,eError,eError,eError,eError,eError,eError,# 28-2f
|
||||
eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37
|
||||
eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f
|
||||
ISO2022CN_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f
|
||||
)
|
||||
|
||||
ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
|
||||
'classFactor': 9,
|
||||
'stateTable': ISO2022CN_st,
|
||||
'charLenTable': ISO2022CNCharLenTable,
|
||||
'name': "ISO-2022-CN"}
|
||||
ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS,
|
||||
'class_factor': 9,
|
||||
'state_table': ISO2022CN_ST,
|
||||
'char_len_table': ISO2022CN_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-CN",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022JP_cls = (
|
||||
ISO2022JP_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,2,2, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
@@ -168,27 +170,28 @@ ISO2022JP_cls = (
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022JP_st = (
|
||||
eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
|
||||
eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f
|
||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
|
||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f
|
||||
eError, 5,eError,eError,eError, 4,eError,eError,# 20-27
|
||||
eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f
|
||||
eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37
|
||||
eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f
|
||||
eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47
|
||||
ISO2022JP_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47
|
||||
)
|
||||
|
||||
ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
|
||||
'classFactor': 10,
|
||||
'stateTable': ISO2022JP_st,
|
||||
'charLenTable': ISO2022JPCharLenTable,
|
||||
'name': "ISO-2022-JP"}
|
||||
ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': ISO2022JP_ST,
|
||||
'char_len_table': ISO2022JP_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-JP",
|
||||
'language': 'Japanese'}
|
||||
|
||||
ISO2022KR_cls = (
|
||||
ISO2022KR_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
@@ -223,20 +226,21 @@ ISO2022KR_cls = (
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022KR_st = (
|
||||
eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07
|
||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
|
||||
eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17
|
||||
eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f
|
||||
eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27
|
||||
ISO2022KR_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27
|
||||
)
|
||||
|
||||
ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)
|
||||
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': ISO2022KR_ST,
|
||||
'char_len_table': ISO2022KR_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-KR",
|
||||
'language': 'Korean'}
|
||||
|
||||
ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
|
||||
'classFactor': 6,
|
||||
'stateTable': ISO2022KR_st,
|
||||
'charLenTable': ISO2022KRCharLenTable,
|
||||
'name': "ISO-2022-KR"}
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -25,66 +25,68 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import sys
|
||||
from . import constants
|
||||
from .enums import ProbingState, MachineState
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCJPDistributionAnalysis
|
||||
from .jpcntx import EUCJPContextAnalysis
|
||||
from .mbcssm import EUCJPSMModel
|
||||
from .mbcssm import EUCJP_SM_MODEL
|
||||
|
||||
|
||||
class EUCJPProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
MultiByteCharSetProber.__init__(self)
|
||||
self._mCodingSM = CodingStateMachine(EUCJPSMModel)
|
||||
self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
|
||||
self._mContextAnalyzer = EUCJPContextAnalysis()
|
||||
super(EUCJPProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
|
||||
self.distribution_analyzer = EUCJPDistributionAnalysis()
|
||||
self.context_analyzer = EUCJPContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
MultiByteCharSetProber.reset(self)
|
||||
self._mContextAnalyzer.reset()
|
||||
super(EUCJPProber, self).reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
def get_charset_name(self):
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-JP"
|
||||
|
||||
def feed(self, aBuf):
|
||||
aLen = len(aBuf)
|
||||
for i in range(0, aLen):
|
||||
# PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
|
||||
codingState = self._mCodingSM.next_state(aBuf[i])
|
||||
if codingState == constants.eError:
|
||||
if constants._debug:
|
||||
sys.stderr.write(self.get_charset_name()
|
||||
+ ' prober hit error at byte ' + str(i)
|
||||
+ '\n')
|
||||
self._mState = constants.eNotMe
|
||||
@property
|
||||
def language(self):
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
# PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif codingState == constants.eItsMe:
|
||||
self._mState = constants.eFoundIt
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif codingState == constants.eStart:
|
||||
charLen = self._mCodingSM.get_current_charlen()
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._mLastChar[1] = aBuf[0]
|
||||
self._mContextAnalyzer.feed(self._mLastChar, charLen)
|
||||
self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.context_analyzer.feed(self._last_char, char_len)
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
|
||||
self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
|
||||
charLen)
|
||||
self.context_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._mLastChar[0] = aBuf[aLen - 1]
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.get_state() == constants.eDetecting:
|
||||
if (self._mContextAnalyzer.got_enough_data() and
|
||||
(self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
|
||||
self._mState = constants.eFoundIt
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.context_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.get_state()
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
contxtCf = self._mContextAnalyzer.get_confidence()
|
||||
distribCf = self._mDistributionAnalyzer.get_confidence()
|
||||
return max(contxtCf, distribCf)
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
||||
|
||||
@@ -13,12 +13,12 @@
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
@@ -35,15 +35,15 @@
|
||||
#
|
||||
# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24
|
||||
# Random Distribution Ration = 512 / (2350-512) = 0.279.
|
||||
#
|
||||
# Typical Distribution Ratio
|
||||
#
|
||||
# Typical Distribution Ratio
|
||||
|
||||
EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0
|
||||
|
||||
EUCKR_TABLE_SIZE = 2352
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
EUCKRCharToFreqOrder = ( \
|
||||
# Char to FreqOrder table ,
|
||||
EUCKR_CHAR_TO_FREQ_ORDER = (
|
||||
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87,
|
||||
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
|
||||
1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734,
|
||||
@@ -191,406 +191,5 @@ EUCKRCharToFreqOrder = ( \
|
||||
1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628,
|
||||
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
|
||||
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256
|
||||
#Everything below is of no interest for detection purpose
|
||||
2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658,
|
||||
2659,2660,2661,2662,2663,2664,2665,2666,2667,2668,2669,2670,2671,2672,2673,2674,
|
||||
2675,2676,2677,2678,2679,2680,2681,2682,2683,2684,2685,2686,2687,2688,2689,2690,
|
||||
2691,2692,2693,2694,2695,2696,2697,2698,2699,1542, 880,2700,2701,2702,2703,2704,
|
||||
2705,2706,2707,2708,2709,2710,2711,2712,2713,2714,2715,2716,2717,2718,2719,2720,
|
||||
2721,2722,2723,2724,2725,1543,2726,2727,2728,2729,2730,2731,2732,1544,2733,2734,
|
||||
2735,2736,2737,2738,2739,2740,2741,2742,2743,2744,2745,2746,2747,2748,2749,2750,
|
||||
2751,2752,2753,2754,1545,2755,2756,2757,2758,2759,2760,2761,2762,2763,2764,2765,
|
||||
2766,1546,2767,1547,2768,2769,2770,2771,2772,2773,2774,2775,2776,2777,2778,2779,
|
||||
2780,2781,2782,2783,2784,2785,2786,1548,2787,2788,2789,1109,2790,2791,2792,2793,
|
||||
2794,2795,2796,2797,2798,2799,2800,2801,2802,2803,2804,2805,2806,2807,2808,2809,
|
||||
2810,2811,2812,1329,2813,2814,2815,2816,2817,2818,2819,2820,2821,2822,2823,2824,
|
||||
2825,2826,2827,2828,2829,2830,2831,2832,2833,2834,2835,2836,2837,2838,2839,2840,
|
||||
2841,2842,2843,2844,2845,2846,2847,2848,2849,2850,2851,2852,2853,2854,2855,2856,
|
||||
1549,2857,2858,2859,2860,1550,2861,2862,1551,2863,2864,2865,2866,2867,2868,2869,
|
||||
2870,2871,2872,2873,2874,1110,1330,2875,2876,2877,2878,2879,2880,2881,2882,2883,
|
||||
2884,2885,2886,2887,2888,2889,2890,2891,2892,2893,2894,2895,2896,2897,2898,2899,
|
||||
2900,2901,2902,2903,2904,2905,2906,2907,2908,2909,2910,2911,2912,2913,2914,2915,
|
||||
2916,2917,2918,2919,2920,2921,2922,2923,2924,2925,2926,2927,2928,2929,2930,1331,
|
||||
2931,2932,2933,2934,2935,2936,2937,2938,2939,2940,2941,2942,2943,1552,2944,2945,
|
||||
2946,2947,2948,2949,2950,2951,2952,2953,2954,2955,2956,2957,2958,2959,2960,2961,
|
||||
2962,2963,2964,1252,2965,2966,2967,2968,2969,2970,2971,2972,2973,2974,2975,2976,
|
||||
2977,2978,2979,2980,2981,2982,2983,2984,2985,2986,2987,2988,2989,2990,2991,2992,
|
||||
2993,2994,2995,2996,2997,2998,2999,3000,3001,3002,3003,3004,3005,3006,3007,3008,
|
||||
3009,3010,3011,3012,1553,3013,3014,3015,3016,3017,1554,3018,1332,3019,3020,3021,
|
||||
3022,3023,3024,3025,3026,3027,3028,3029,3030,3031,3032,3033,3034,3035,3036,3037,
|
||||
3038,3039,3040,3041,3042,3043,3044,3045,3046,3047,3048,3049,3050,1555,3051,3052,
|
||||
3053,1556,1557,3054,3055,3056,3057,3058,3059,3060,3061,3062,3063,3064,3065,3066,
|
||||
3067,1558,3068,3069,3070,3071,3072,3073,3074,3075,3076,1559,3077,3078,3079,3080,
|
||||
3081,3082,3083,1253,3084,3085,3086,3087,3088,3089,3090,3091,3092,3093,3094,3095,
|
||||
3096,3097,3098,3099,3100,3101,3102,3103,3104,3105,3106,3107,3108,1152,3109,3110,
|
||||
3111,3112,3113,1560,3114,3115,3116,3117,1111,3118,3119,3120,3121,3122,3123,3124,
|
||||
3125,3126,3127,3128,3129,3130,3131,3132,3133,3134,3135,3136,3137,3138,3139,3140,
|
||||
3141,3142,3143,3144,3145,3146,3147,3148,3149,3150,3151,3152,3153,3154,3155,3156,
|
||||
3157,3158,3159,3160,3161,3162,3163,3164,3165,3166,3167,3168,3169,3170,3171,3172,
|
||||
3173,3174,3175,3176,1333,3177,3178,3179,3180,3181,3182,3183,3184,3185,3186,3187,
|
||||
3188,3189,1561,3190,3191,1334,3192,3193,3194,3195,3196,3197,3198,3199,3200,3201,
|
||||
3202,3203,3204,3205,3206,3207,3208,3209,3210,3211,3212,3213,3214,3215,3216,3217,
|
||||
3218,3219,3220,3221,3222,3223,3224,3225,3226,3227,3228,3229,3230,3231,3232,3233,
|
||||
3234,1562,3235,3236,3237,3238,3239,3240,3241,3242,3243,3244,3245,3246,3247,3248,
|
||||
3249,3250,3251,3252,3253,3254,3255,3256,3257,3258,3259,3260,3261,3262,3263,3264,
|
||||
3265,3266,3267,3268,3269,3270,3271,3272,3273,3274,3275,3276,3277,1563,3278,3279,
|
||||
3280,3281,3282,3283,3284,3285,3286,3287,3288,3289,3290,3291,3292,3293,3294,3295,
|
||||
3296,3297,3298,3299,3300,3301,3302,3303,3304,3305,3306,3307,3308,3309,3310,3311,
|
||||
3312,3313,3314,3315,3316,3317,3318,3319,3320,3321,3322,3323,3324,3325,3326,3327,
|
||||
3328,3329,3330,3331,3332,3333,3334,3335,3336,3337,3338,3339,3340,3341,3342,3343,
|
||||
3344,3345,3346,3347,3348,3349,3350,3351,3352,3353,3354,3355,3356,3357,3358,3359,
|
||||
3360,3361,3362,3363,3364,1335,3365,3366,3367,3368,3369,3370,3371,3372,3373,3374,
|
||||
3375,3376,3377,3378,3379,3380,3381,3382,3383,3384,3385,3386,3387,1336,3388,3389,
|
||||
3390,3391,3392,3393,3394,3395,3396,3397,3398,3399,3400,3401,3402,3403,3404,3405,
|
||||
3406,3407,3408,3409,3410,3411,3412,3413,3414,1337,3415,3416,3417,3418,3419,1338,
|
||||
3420,3421,3422,1564,1565,3423,3424,3425,3426,3427,3428,3429,3430,3431,1254,3432,
|
||||
3433,3434,1339,3435,3436,3437,3438,3439,1566,3440,3441,3442,3443,3444,3445,3446,
|
||||
3447,3448,3449,3450,3451,3452,3453,3454,1255,3455,3456,3457,3458,3459,1567,1191,
|
||||
3460,1568,1569,3461,3462,3463,1570,3464,3465,3466,3467,3468,1571,3469,3470,3471,
|
||||
3472,3473,1572,3474,3475,3476,3477,3478,3479,3480,3481,3482,3483,3484,3485,3486,
|
||||
1340,3487,3488,3489,3490,3491,3492,1021,3493,3494,3495,3496,3497,3498,1573,3499,
|
||||
1341,3500,3501,3502,3503,3504,3505,3506,3507,3508,3509,3510,3511,1342,3512,3513,
|
||||
3514,3515,3516,1574,1343,3517,3518,3519,1575,3520,1576,3521,3522,3523,3524,3525,
|
||||
3526,3527,3528,3529,3530,3531,3532,3533,3534,3535,3536,3537,3538,3539,3540,3541,
|
||||
3542,3543,3544,3545,3546,3547,3548,3549,3550,3551,3552,3553,3554,3555,3556,3557,
|
||||
3558,3559,3560,3561,3562,3563,3564,3565,3566,3567,3568,3569,3570,3571,3572,3573,
|
||||
3574,3575,3576,3577,3578,3579,3580,1577,3581,3582,1578,3583,3584,3585,3586,3587,
|
||||
3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603,
|
||||
3604,1579,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618,
|
||||
3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,1580,3630,3631,1581,3632,
|
||||
3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,3643,3644,3645,3646,3647,3648,
|
||||
3649,3650,3651,3652,3653,3654,3655,3656,1582,3657,3658,3659,3660,3661,3662,3663,
|
||||
3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675,3676,3677,3678,3679,
|
||||
3680,3681,3682,3683,3684,3685,3686,3687,3688,3689,3690,3691,3692,3693,3694,3695,
|
||||
3696,3697,3698,3699,3700,1192,3701,3702,3703,3704,1256,3705,3706,3707,3708,1583,
|
||||
1257,3709,3710,3711,3712,3713,3714,3715,3716,1584,3717,3718,3719,3720,3721,3722,
|
||||
3723,3724,3725,3726,3727,3728,3729,3730,3731,3732,3733,3734,3735,3736,3737,3738,
|
||||
3739,3740,3741,3742,3743,3744,3745,1344,3746,3747,3748,3749,3750,3751,3752,3753,
|
||||
3754,3755,3756,1585,3757,3758,3759,3760,3761,3762,3763,3764,3765,3766,1586,3767,
|
||||
3768,3769,3770,3771,3772,3773,3774,3775,3776,3777,3778,1345,3779,3780,3781,3782,
|
||||
3783,3784,3785,3786,3787,3788,3789,3790,3791,3792,3793,3794,3795,1346,1587,3796,
|
||||
3797,1588,3798,3799,3800,3801,3802,3803,3804,3805,3806,1347,3807,3808,3809,3810,
|
||||
3811,1589,3812,3813,3814,3815,3816,3817,3818,3819,3820,3821,1590,3822,3823,1591,
|
||||
1348,3824,3825,3826,3827,3828,3829,3830,1592,3831,3832,1593,3833,3834,3835,3836,
|
||||
3837,3838,3839,3840,3841,3842,3843,3844,1349,3845,3846,3847,3848,3849,3850,3851,
|
||||
3852,3853,3854,3855,3856,3857,3858,1594,3859,3860,3861,3862,3863,3864,3865,3866,
|
||||
3867,3868,3869,1595,3870,3871,3872,3873,1596,3874,3875,3876,3877,3878,3879,3880,
|
||||
3881,3882,3883,3884,3885,3886,1597,3887,3888,3889,3890,3891,3892,3893,3894,3895,
|
||||
1598,3896,3897,3898,1599,1600,3899,1350,3900,1351,3901,3902,1352,3903,3904,3905,
|
||||
3906,3907,3908,3909,3910,3911,3912,3913,3914,3915,3916,3917,3918,3919,3920,3921,
|
||||
3922,3923,3924,1258,3925,3926,3927,3928,3929,3930,3931,1193,3932,1601,3933,3934,
|
||||
3935,3936,3937,3938,3939,3940,3941,3942,3943,1602,3944,3945,3946,3947,3948,1603,
|
||||
3949,3950,3951,3952,3953,3954,3955,3956,3957,3958,3959,3960,3961,3962,3963,3964,
|
||||
3965,1604,3966,3967,3968,3969,3970,3971,3972,3973,3974,3975,3976,3977,1353,3978,
|
||||
3979,3980,3981,3982,3983,3984,3985,3986,3987,3988,3989,3990,3991,1354,3992,3993,
|
||||
3994,3995,3996,3997,3998,3999,4000,4001,4002,4003,4004,4005,4006,4007,4008,4009,
|
||||
4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,1355,4024,
|
||||
4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040,
|
||||
1605,4041,4042,4043,4044,4045,4046,4047,4048,4049,4050,4051,4052,4053,4054,4055,
|
||||
4056,4057,4058,4059,4060,1606,4061,4062,4063,4064,1607,4065,4066,4067,4068,4069,
|
||||
4070,4071,4072,4073,4074,4075,4076,1194,4077,4078,1608,4079,4080,4081,4082,4083,
|
||||
4084,4085,4086,4087,1609,4088,4089,4090,4091,4092,4093,4094,4095,4096,4097,4098,
|
||||
4099,4100,4101,4102,4103,4104,4105,4106,4107,4108,1259,4109,4110,4111,4112,4113,
|
||||
4114,4115,4116,4117,4118,4119,4120,4121,4122,4123,4124,1195,4125,4126,4127,1610,
|
||||
4128,4129,4130,4131,4132,4133,4134,4135,4136,4137,1356,4138,4139,4140,4141,4142,
|
||||
4143,4144,1611,4145,4146,4147,4148,4149,4150,4151,4152,4153,4154,4155,4156,4157,
|
||||
4158,4159,4160,4161,4162,4163,4164,4165,4166,4167,4168,4169,4170,4171,4172,4173,
|
||||
4174,4175,4176,4177,4178,4179,4180,4181,4182,4183,4184,4185,4186,4187,4188,4189,
|
||||
4190,4191,4192,4193,4194,4195,4196,4197,4198,4199,4200,4201,4202,4203,4204,4205,
|
||||
4206,4207,4208,4209,4210,4211,4212,4213,4214,4215,4216,4217,4218,4219,1612,4220,
|
||||
4221,4222,4223,4224,4225,4226,4227,1357,4228,1613,4229,4230,4231,4232,4233,4234,
|
||||
4235,4236,4237,4238,4239,4240,4241,4242,4243,1614,4244,4245,4246,4247,4248,4249,
|
||||
4250,4251,4252,4253,4254,4255,4256,4257,4258,4259,4260,4261,4262,4263,4264,4265,
|
||||
4266,4267,4268,4269,4270,1196,1358,4271,4272,4273,4274,4275,4276,4277,4278,4279,
|
||||
4280,4281,4282,4283,4284,4285,4286,4287,1615,4288,4289,4290,4291,4292,4293,4294,
|
||||
4295,4296,4297,4298,4299,4300,4301,4302,4303,4304,4305,4306,4307,4308,4309,4310,
|
||||
4311,4312,4313,4314,4315,4316,4317,4318,4319,4320,4321,4322,4323,4324,4325,4326,
|
||||
4327,4328,4329,4330,4331,4332,4333,4334,1616,4335,4336,4337,4338,4339,4340,4341,
|
||||
4342,4343,4344,4345,4346,4347,4348,4349,4350,4351,4352,4353,4354,4355,4356,4357,
|
||||
4358,4359,4360,1617,4361,4362,4363,4364,4365,1618,4366,4367,4368,4369,4370,4371,
|
||||
4372,4373,4374,4375,4376,4377,4378,4379,4380,4381,4382,4383,4384,4385,4386,4387,
|
||||
4388,4389,4390,4391,4392,4393,4394,4395,4396,4397,4398,4399,4400,4401,4402,4403,
|
||||
4404,4405,4406,4407,4408,4409,4410,4411,4412,4413,4414,4415,4416,1619,4417,4418,
|
||||
4419,4420,4421,4422,4423,4424,4425,1112,4426,4427,4428,4429,4430,1620,4431,4432,
|
||||
4433,4434,4435,4436,4437,4438,4439,4440,4441,4442,1260,1261,4443,4444,4445,4446,
|
||||
4447,4448,4449,4450,4451,4452,4453,4454,4455,1359,4456,4457,4458,4459,4460,4461,
|
||||
4462,4463,4464,4465,1621,4466,4467,4468,4469,4470,4471,4472,4473,4474,4475,4476,
|
||||
4477,4478,4479,4480,4481,4482,4483,4484,4485,4486,4487,4488,4489,1055,4490,4491,
|
||||
4492,4493,4494,4495,4496,4497,4498,4499,4500,4501,4502,4503,4504,4505,4506,4507,
|
||||
4508,4509,4510,4511,4512,4513,4514,4515,4516,4517,4518,1622,4519,4520,4521,1623,
|
||||
4522,4523,4524,4525,4526,4527,4528,4529,4530,4531,4532,4533,4534,4535,1360,4536,
|
||||
4537,4538,4539,4540,4541,4542,4543, 975,4544,4545,4546,4547,4548,4549,4550,4551,
|
||||
4552,4553,4554,4555,4556,4557,4558,4559,4560,4561,4562,4563,4564,4565,4566,4567,
|
||||
4568,4569,4570,4571,1624,4572,4573,4574,4575,4576,1625,4577,4578,4579,4580,4581,
|
||||
4582,4583,4584,1626,4585,4586,4587,4588,4589,4590,4591,4592,4593,4594,4595,1627,
|
||||
4596,4597,4598,4599,4600,4601,4602,4603,4604,4605,4606,4607,4608,4609,4610,4611,
|
||||
4612,4613,4614,4615,1628,4616,4617,4618,4619,4620,4621,4622,4623,4624,4625,4626,
|
||||
4627,4628,4629,4630,4631,4632,4633,4634,4635,4636,4637,4638,4639,4640,4641,4642,
|
||||
4643,4644,4645,4646,4647,4648,4649,1361,4650,4651,4652,4653,4654,4655,4656,4657,
|
||||
4658,4659,4660,4661,1362,4662,4663,4664,4665,4666,4667,4668,4669,4670,4671,4672,
|
||||
4673,4674,4675,4676,4677,4678,4679,4680,4681,4682,1629,4683,4684,4685,4686,4687,
|
||||
1630,4688,4689,4690,4691,1153,4692,4693,4694,1113,4695,4696,4697,4698,4699,4700,
|
||||
4701,4702,4703,4704,4705,4706,4707,4708,4709,4710,4711,1197,4712,4713,4714,4715,
|
||||
4716,4717,4718,4719,4720,4721,4722,4723,4724,4725,4726,4727,4728,4729,4730,4731,
|
||||
4732,4733,4734,4735,1631,4736,1632,4737,4738,4739,4740,4741,4742,4743,4744,1633,
|
||||
4745,4746,4747,4748,4749,1262,4750,4751,4752,4753,4754,1363,4755,4756,4757,4758,
|
||||
4759,4760,4761,4762,4763,4764,4765,4766,4767,4768,1634,4769,4770,4771,4772,4773,
|
||||
4774,4775,4776,4777,4778,1635,4779,4780,4781,4782,4783,4784,4785,4786,4787,4788,
|
||||
4789,1636,4790,4791,4792,4793,4794,4795,4796,4797,4798,4799,4800,4801,4802,4803,
|
||||
4804,4805,4806,1637,4807,4808,4809,1638,4810,4811,4812,4813,4814,4815,4816,4817,
|
||||
4818,1639,4819,4820,4821,4822,4823,4824,4825,4826,4827,4828,4829,4830,4831,4832,
|
||||
4833,1077,4834,4835,4836,4837,4838,4839,4840,4841,4842,4843,4844,4845,4846,4847,
|
||||
4848,4849,4850,4851,4852,4853,4854,4855,4856,4857,4858,4859,4860,4861,4862,4863,
|
||||
4864,4865,4866,4867,4868,4869,4870,4871,4872,4873,4874,4875,4876,4877,4878,4879,
|
||||
4880,4881,4882,4883,1640,4884,4885,1641,4886,4887,4888,4889,4890,4891,4892,4893,
|
||||
4894,4895,4896,4897,4898,4899,4900,4901,4902,4903,4904,4905,4906,4907,4908,4909,
|
||||
4910,4911,1642,4912,4913,4914,1364,4915,4916,4917,4918,4919,4920,4921,4922,4923,
|
||||
4924,4925,4926,4927,4928,4929,4930,4931,1643,4932,4933,4934,4935,4936,4937,4938,
|
||||
4939,4940,4941,4942,4943,4944,4945,4946,4947,4948,4949,4950,4951,4952,4953,4954,
|
||||
4955,4956,4957,4958,4959,4960,4961,4962,4963,4964,4965,4966,4967,4968,4969,4970,
|
||||
4971,4972,4973,4974,4975,4976,4977,4978,4979,4980,1644,4981,4982,4983,4984,1645,
|
||||
4985,4986,1646,4987,4988,4989,4990,4991,4992,4993,4994,4995,4996,4997,4998,4999,
|
||||
5000,5001,5002,5003,5004,5005,1647,5006,1648,5007,5008,5009,5010,5011,5012,1078,
|
||||
5013,5014,5015,5016,5017,5018,5019,5020,5021,5022,5023,5024,5025,5026,5027,5028,
|
||||
1365,5029,5030,5031,5032,5033,5034,5035,5036,5037,5038,5039,1649,5040,5041,5042,
|
||||
5043,5044,5045,1366,5046,5047,5048,5049,5050,5051,5052,5053,5054,5055,1650,5056,
|
||||
5057,5058,5059,5060,5061,5062,5063,5064,5065,5066,5067,5068,5069,5070,5071,5072,
|
||||
5073,5074,5075,5076,5077,1651,5078,5079,5080,5081,5082,5083,5084,5085,5086,5087,
|
||||
5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102,5103,
|
||||
5104,5105,5106,5107,5108,5109,5110,1652,5111,5112,5113,5114,5115,5116,5117,5118,
|
||||
1367,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,1653,5130,5131,5132,
|
||||
5133,5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,
|
||||
5149,1368,5150,1654,5151,1369,5152,5153,5154,5155,5156,5157,5158,5159,5160,5161,
|
||||
5162,5163,5164,5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,5176,5177,
|
||||
5178,1370,5179,5180,5181,5182,5183,5184,5185,5186,5187,5188,5189,5190,5191,5192,
|
||||
5193,5194,5195,5196,5197,5198,1655,5199,5200,5201,5202,1656,5203,5204,5205,5206,
|
||||
1371,5207,1372,5208,5209,5210,5211,1373,5212,5213,1374,5214,5215,5216,5217,5218,
|
||||
5219,5220,5221,5222,5223,5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,
|
||||
5235,5236,5237,5238,5239,5240,5241,5242,5243,5244,5245,5246,5247,1657,5248,5249,
|
||||
5250,5251,1658,1263,5252,5253,5254,5255,5256,1375,5257,5258,5259,5260,5261,5262,
|
||||
5263,5264,5265,5266,5267,5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,
|
||||
5279,5280,5281,5282,5283,1659,5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,
|
||||
5294,5295,5296,5297,5298,5299,5300,1660,5301,5302,5303,5304,5305,5306,5307,5308,
|
||||
5309,5310,5311,5312,5313,5314,5315,5316,5317,5318,5319,5320,5321,1376,5322,5323,
|
||||
5324,5325,5326,5327,5328,5329,5330,5331,5332,5333,1198,5334,5335,5336,5337,5338,
|
||||
5339,5340,5341,5342,5343,1661,5344,5345,5346,5347,5348,5349,5350,5351,5352,5353,
|
||||
5354,5355,5356,5357,5358,5359,5360,5361,5362,5363,5364,5365,5366,5367,5368,5369,
|
||||
5370,5371,5372,5373,5374,5375,5376,5377,5378,5379,5380,5381,5382,5383,5384,5385,
|
||||
5386,5387,5388,5389,5390,5391,5392,5393,5394,5395,5396,5397,5398,1264,5399,5400,
|
||||
5401,5402,5403,5404,5405,5406,5407,5408,5409,5410,5411,5412,1662,5413,5414,5415,
|
||||
5416,1663,5417,5418,5419,5420,5421,5422,5423,5424,5425,5426,5427,5428,5429,5430,
|
||||
5431,5432,5433,5434,5435,5436,5437,5438,1664,5439,5440,5441,5442,5443,5444,5445,
|
||||
5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456,5457,5458,5459,5460,5461,
|
||||
5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472,5473,5474,5475,5476,5477,
|
||||
5478,1154,5479,5480,5481,5482,5483,5484,5485,1665,5486,5487,5488,5489,5490,5491,
|
||||
5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504,5505,5506,5507,
|
||||
5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520,5521,5522,5523,
|
||||
5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536,5537,5538,5539,
|
||||
5540,5541,5542,5543,5544,5545,5546,5547,5548,1377,5549,5550,5551,5552,5553,5554,
|
||||
5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568,5569,5570,
|
||||
1114,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584,5585,
|
||||
5586,5587,5588,5589,5590,5591,5592,1378,5593,5594,5595,5596,5597,5598,5599,5600,
|
||||
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,1379,5615,
|
||||
5616,5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,
|
||||
5632,5633,5634,1380,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,
|
||||
5647,5648,5649,1381,1056,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,
|
||||
1666,5661,5662,5663,5664,5665,5666,5667,5668,1667,5669,1668,5670,5671,5672,5673,
|
||||
5674,5675,5676,5677,5678,1155,5679,5680,5681,5682,5683,5684,5685,5686,5687,5688,
|
||||
5689,5690,5691,5692,5693,5694,5695,5696,5697,5698,1669,5699,5700,5701,5702,5703,
|
||||
5704,5705,1670,5706,5707,5708,5709,5710,1671,5711,5712,5713,5714,1382,5715,5716,
|
||||
5717,5718,5719,5720,5721,5722,5723,5724,5725,1672,5726,5727,1673,1674,5728,5729,
|
||||
5730,5731,5732,5733,5734,5735,5736,1675,5737,5738,5739,5740,5741,5742,5743,5744,
|
||||
1676,5745,5746,5747,5748,5749,5750,5751,1383,5752,5753,5754,5755,5756,5757,5758,
|
||||
5759,5760,5761,5762,5763,5764,5765,5766,5767,5768,1677,5769,5770,5771,5772,5773,
|
||||
1678,5774,5775,5776, 998,5777,5778,5779,5780,5781,5782,5783,5784,5785,1384,5786,
|
||||
5787,5788,5789,5790,5791,5792,5793,5794,5795,5796,5797,5798,5799,5800,1679,5801,
|
||||
5802,5803,1115,1116,5804,5805,5806,5807,5808,5809,5810,5811,5812,5813,5814,5815,
|
||||
5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828,5829,5830,5831,
|
||||
5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844,5845,5846,5847,
|
||||
5848,5849,5850,5851,5852,5853,5854,5855,1680,5856,5857,5858,5859,5860,5861,5862,
|
||||
5863,5864,1681,5865,5866,5867,1682,5868,5869,5870,5871,5872,5873,5874,5875,5876,
|
||||
5877,5878,5879,1683,5880,1684,5881,5882,5883,5884,1685,5885,5886,5887,5888,5889,
|
||||
5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905,
|
||||
5906,5907,1686,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920,
|
||||
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,1687,
|
||||
5936,5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,
|
||||
5952,1688,1689,5953,1199,5954,5955,5956,5957,5958,5959,5960,5961,1690,5962,5963,
|
||||
5964,5965,5966,5967,5968,5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,
|
||||
5980,5981,1385,5982,1386,5983,5984,5985,5986,5987,5988,5989,5990,5991,5992,5993,
|
||||
5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004,6005,6006,6007,6008,6009,
|
||||
6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020,6021,6022,6023,6024,6025,
|
||||
6026,6027,1265,6028,6029,1691,6030,6031,6032,6033,6034,6035,6036,6037,6038,6039,
|
||||
6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055,
|
||||
6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071,
|
||||
6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084,1692,6085,6086,
|
||||
6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100,6101,6102,
|
||||
6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116,6117,6118,
|
||||
6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,1693,6132,6133,
|
||||
6134,6135,6136,1694,6137,6138,6139,6140,6141,1695,6142,6143,6144,6145,6146,6147,
|
||||
6148,6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163,
|
||||
6164,6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179,
|
||||
6180,6181,6182,6183,6184,6185,1696,6186,6187,6188,6189,6190,6191,6192,6193,6194,
|
||||
6195,6196,6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210,
|
||||
6211,6212,6213,6214,6215,6216,6217,6218,6219,1697,6220,6221,6222,6223,6224,6225,
|
||||
6226,6227,6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241,
|
||||
6242,6243,6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,1698,6254,6255,6256,
|
||||
6257,6258,6259,6260,6261,6262,6263,1200,6264,6265,6266,6267,6268,6269,6270,6271, #1024
|
||||
6272,6273,6274,6275,6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,6286,6287,
|
||||
6288,6289,6290,6291,6292,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,1699,
|
||||
6303,6304,1700,6305,6306,6307,6308,6309,6310,6311,6312,6313,6314,6315,6316,6317,
|
||||
6318,6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,
|
||||
6334,6335,6336,6337,6338,6339,1701,6340,6341,6342,6343,6344,1387,6345,6346,6347,
|
||||
6348,6349,6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363,
|
||||
6364,6365,6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379,
|
||||
6380,6381,6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395,
|
||||
6396,6397,6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,6411,
|
||||
6412,6413,1702,6414,6415,6416,6417,6418,6419,6420,6421,6422,1703,6423,6424,6425,
|
||||
6426,6427,6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,1704,6439,6440,
|
||||
6441,6442,6443,6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,6455,6456,
|
||||
6457,6458,6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472,
|
||||
6473,6474,6475,6476,6477,6478,6479,6480,6481,6482,6483,6484,6485,6486,6487,6488,
|
||||
6489,6490,6491,6492,6493,6494,6495,6496,6497,6498,6499,6500,6501,6502,6503,1266,
|
||||
6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,
|
||||
6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532,6533,6534,6535,
|
||||
6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548,6549,6550,6551,
|
||||
1705,1706,6552,6553,6554,6555,6556,6557,6558,6559,6560,6561,6562,6563,6564,6565,
|
||||
6566,6567,6568,6569,6570,6571,6572,6573,6574,6575,6576,6577,6578,6579,6580,6581,
|
||||
6582,6583,6584,6585,6586,6587,6588,6589,6590,6591,6592,6593,6594,6595,6596,6597,
|
||||
6598,6599,6600,6601,6602,6603,6604,6605,6606,6607,6608,6609,6610,6611,6612,6613,
|
||||
6614,6615,6616,6617,6618,6619,6620,6621,6622,6623,6624,6625,6626,6627,6628,6629,
|
||||
6630,6631,6632,6633,6634,6635,6636,6637,1388,6638,6639,6640,6641,6642,6643,6644,
|
||||
1707,6645,6646,6647,6648,6649,6650,6651,6652,6653,6654,6655,6656,6657,6658,6659,
|
||||
6660,6661,6662,6663,1708,6664,6665,6666,6667,6668,6669,6670,6671,6672,6673,6674,
|
||||
1201,6675,6676,6677,6678,6679,6680,6681,6682,6683,6684,6685,6686,6687,6688,6689,
|
||||
6690,6691,6692,6693,6694,6695,6696,6697,6698,6699,6700,6701,6702,6703,6704,6705,
|
||||
6706,6707,6708,6709,6710,6711,6712,6713,6714,6715,6716,6717,6718,6719,6720,6721,
|
||||
6722,6723,6724,6725,1389,6726,6727,6728,6729,6730,6731,6732,6733,6734,6735,6736,
|
||||
1390,1709,6737,6738,6739,6740,6741,6742,1710,6743,6744,6745,6746,1391,6747,6748,
|
||||
6749,6750,6751,6752,6753,6754,6755,6756,6757,1392,6758,6759,6760,6761,6762,6763,
|
||||
6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777,6778,6779,
|
||||
6780,1202,6781,6782,6783,6784,6785,6786,6787,6788,6789,6790,6791,6792,6793,6794,
|
||||
6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806,6807,6808,6809,1711,
|
||||
6810,6811,6812,6813,6814,6815,6816,6817,6818,6819,6820,6821,6822,6823,6824,6825,
|
||||
6826,6827,6828,6829,6830,6831,6832,6833,6834,6835,6836,1393,6837,6838,6839,6840,
|
||||
6841,6842,6843,6844,6845,6846,6847,6848,6849,6850,6851,6852,6853,6854,6855,6856,
|
||||
6857,6858,6859,6860,6861,6862,6863,6864,6865,6866,6867,6868,6869,6870,6871,6872,
|
||||
6873,6874,6875,6876,6877,6878,6879,6880,6881,6882,6883,6884,6885,6886,6887,6888,
|
||||
6889,6890,6891,6892,6893,6894,6895,6896,6897,6898,6899,6900,6901,6902,1712,6903,
|
||||
6904,6905,6906,6907,6908,6909,6910,1713,6911,6912,6913,6914,6915,6916,6917,6918,
|
||||
6919,6920,6921,6922,6923,6924,6925,6926,6927,6928,6929,6930,6931,6932,6933,6934,
|
||||
6935,6936,6937,6938,6939,6940,6941,6942,6943,6944,6945,6946,6947,6948,6949,6950,
|
||||
6951,6952,6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966,
|
||||
6967,6968,6969,6970,6971,6972,6973,6974,1714,6975,6976,6977,6978,6979,6980,6981,
|
||||
6982,6983,6984,6985,6986,6987,6988,1394,6989,6990,6991,6992,6993,6994,6995,6996,
|
||||
6997,6998,6999,7000,1715,7001,7002,7003,7004,7005,7006,7007,7008,7009,7010,7011,
|
||||
7012,7013,7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027,
|
||||
7028,1716,7029,7030,7031,7032,7033,7034,7035,7036,7037,7038,7039,7040,7041,7042,
|
||||
7043,7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058,
|
||||
7059,7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,7071,7072,7073,7074,
|
||||
7075,7076,7077,7078,7079,7080,7081,7082,7083,7084,7085,7086,7087,7088,7089,7090,
|
||||
7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105,7106,
|
||||
7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,7119,7120,7121,7122,
|
||||
7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136,7137,7138,
|
||||
7139,7140,7141,7142,7143,7144,7145,7146,7147,7148,7149,7150,7151,7152,7153,7154,
|
||||
7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167,7168,7169,7170,
|
||||
7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183,7184,7185,7186,
|
||||
7187,7188,7189,7190,7191,7192,7193,7194,7195,7196,7197,7198,7199,7200,7201,7202,
|
||||
7203,7204,7205,7206,7207,1395,7208,7209,7210,7211,7212,7213,1717,7214,7215,7216,
|
||||
7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229,7230,7231,7232,
|
||||
7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245,7246,7247,7248,
|
||||
7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261,7262,7263,7264,
|
||||
7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277,7278,7279,7280,
|
||||
7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293,7294,7295,7296,
|
||||
7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308,7309,7310,7311,7312,
|
||||
7313,1718,7314,7315,7316,7317,7318,7319,7320,7321,7322,7323,7324,7325,7326,7327,
|
||||
7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339,7340,7341,7342,7343,
|
||||
7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,7354,7355,7356,7357,7358,7359,
|
||||
7360,7361,7362,7363,7364,7365,7366,7367,7368,7369,7370,7371,7372,7373,7374,7375,
|
||||
7376,7377,7378,7379,7380,7381,7382,7383,7384,7385,7386,7387,7388,7389,7390,7391,
|
||||
7392,7393,7394,7395,7396,7397,7398,7399,7400,7401,7402,7403,7404,7405,7406,7407,
|
||||
7408,7409,7410,7411,7412,7413,7414,7415,7416,7417,7418,7419,7420,7421,7422,7423,
|
||||
7424,7425,7426,7427,7428,7429,7430,7431,7432,7433,7434,7435,7436,7437,7438,7439,
|
||||
7440,7441,7442,7443,7444,7445,7446,7447,7448,7449,7450,7451,7452,7453,7454,7455,
|
||||
7456,7457,7458,7459,7460,7461,7462,7463,7464,7465,7466,7467,7468,7469,7470,7471,
|
||||
7472,7473,7474,7475,7476,7477,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487,
|
||||
7488,7489,7490,7491,7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,7503,
|
||||
7504,7505,7506,7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,
|
||||
7520,7521,7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535,
|
||||
7536,7537,7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,7550,7551,
|
||||
7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567,
|
||||
7568,7569,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582,7583,
|
||||
7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598,7599,
|
||||
7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614,7615,
|
||||
7616,7617,7618,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628,7629,7630,7631,
|
||||
7632,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643,7644,7645,7646,7647,
|
||||
7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659,7660,7661,7662,7663,
|
||||
7664,7665,7666,7667,7668,7669,7670,7671,7672,7673,7674,7675,7676,7677,7678,7679,
|
||||
7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690,7691,7692,7693,7694,7695,
|
||||
7696,7697,7698,7699,7700,7701,7702,7703,7704,7705,7706,7707,7708,7709,7710,7711,
|
||||
7712,7713,7714,7715,7716,7717,7718,7719,7720,7721,7722,7723,7724,7725,7726,7727,
|
||||
7728,7729,7730,7731,7732,7733,7734,7735,7736,7737,7738,7739,7740,7741,7742,7743,
|
||||
7744,7745,7746,7747,7748,7749,7750,7751,7752,7753,7754,7755,7756,7757,7758,7759,
|
||||
7760,7761,7762,7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775,
|
||||
7776,7777,7778,7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791,
|
||||
7792,7793,7794,7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,7806,7807,
|
||||
7808,7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,
|
||||
7824,7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,
|
||||
7840,7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,
|
||||
7856,7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,
|
||||
7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,
|
||||
7888,7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,
|
||||
7904,7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,
|
||||
7920,7921,7922,7923,7924,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935,
|
||||
7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951,
|
||||
7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967,
|
||||
7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983,
|
||||
7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999,
|
||||
8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015,
|
||||
8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031,
|
||||
8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047,
|
||||
8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063,
|
||||
8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,
|
||||
8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,
|
||||
8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111,
|
||||
8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127,
|
||||
8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,
|
||||
8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,
|
||||
8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,
|
||||
8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,
|
||||
8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,
|
||||
8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,
|
||||
8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,
|
||||
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,
|
||||
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,
|
||||
8272,8273,8274,8275,8276,8277,8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,
|
||||
8288,8289,8290,8291,8292,8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,
|
||||
8304,8305,8306,8307,8308,8309,8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,
|
||||
8320,8321,8322,8323,8324,8325,8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,
|
||||
8336,8337,8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,
|
||||
8352,8353,8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,
|
||||
8368,8369,8370,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,
|
||||
8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,
|
||||
8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,
|
||||
8416,8417,8418,8419,8420,8421,8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,
|
||||
8432,8433,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,
|
||||
8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,
|
||||
8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,
|
||||
8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,
|
||||
8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,
|
||||
8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,
|
||||
8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,
|
||||
8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,
|
||||
8560,8561,8562,8563,8564,8565,8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,
|
||||
8576,8577,8578,8579,8580,8581,8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,
|
||||
8592,8593,8594,8595,8596,8597,8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,
|
||||
8608,8609,8610,8611,8612,8613,8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,
|
||||
8624,8625,8626,8627,8628,8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,
|
||||
8640,8641,8642,8643,8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,
|
||||
8656,8657,8658,8659,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,
|
||||
8672,8673,8674,8675,8676,8677,8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,
|
||||
8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,
|
||||
8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,
|
||||
8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,
|
||||
8736,8737,8738,8739,8740,8741)
|
||||
)
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -28,15 +28,20 @@
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCKRDistributionAnalysis
|
||||
from .mbcssm import EUCKRSMModel
|
||||
from .mbcssm import EUCKR_SM_MODEL
|
||||
|
||||
|
||||
class EUCKRProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
MultiByteCharSetProber.__init__(self)
|
||||
self._mCodingSM = CodingStateMachine(EUCKRSMModel)
|
||||
self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
|
||||
super(EUCKRProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
|
||||
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
def get_charset_name(self):
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-KR"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Korean"
|
||||
|
||||
@@ -44,385 +44,344 @@
|
||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
EUCTW_TABLE_SIZE = 8102
|
||||
EUCTW_TABLE_SIZE = 5376
|
||||
|
||||
EUCTWCharToFreqOrder = (
|
||||
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
|
||||
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
|
||||
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
|
||||
63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790
|
||||
3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806
|
||||
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822
|
||||
7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838
|
||||
630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854
|
||||
179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870
|
||||
995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886
|
||||
2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902
|
||||
1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918
|
||||
3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934
|
||||
706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950
|
||||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966
|
||||
3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982
|
||||
2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998
|
||||
437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014
|
||||
3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030
|
||||
1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046
|
||||
7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062
|
||||
266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078
|
||||
7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094
|
||||
1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110
|
||||
32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126
|
||||
188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142
|
||||
3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158
|
||||
3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174
|
||||
324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190
|
||||
2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206
|
||||
2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222
|
||||
314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238
|
||||
287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254
|
||||
3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270
|
||||
1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286
|
||||
1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302
|
||||
1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318
|
||||
2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334
|
||||
265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350
|
||||
4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366
|
||||
1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382
|
||||
7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398
|
||||
2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414
|
||||
383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430
|
||||
98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446
|
||||
523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462
|
||||
710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478
|
||||
7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494
|
||||
379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510
|
||||
1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526
|
||||
585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542
|
||||
690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558
|
||||
7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574
|
||||
1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590
|
||||
544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606
|
||||
3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622
|
||||
4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638
|
||||
3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654
|
||||
279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670
|
||||
610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686
|
||||
1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702
|
||||
4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718
|
||||
3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734
|
||||
3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750
|
||||
2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766
|
||||
7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782
|
||||
3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798
|
||||
7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814
|
||||
1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830
|
||||
2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846
|
||||
1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862
|
||||
78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878
|
||||
1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894
|
||||
4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910
|
||||
3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926
|
||||
534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942
|
||||
165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958
|
||||
626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974
|
||||
2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990
|
||||
7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006
|
||||
1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022
|
||||
2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038
|
||||
1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054
|
||||
1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070
|
||||
7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086
|
||||
7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102
|
||||
7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118
|
||||
3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134
|
||||
4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150
|
||||
1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166
|
||||
7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182
|
||||
2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198
|
||||
7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214
|
||||
3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230
|
||||
3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246
|
||||
7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262
|
||||
2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278
|
||||
7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294
|
||||
862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310
|
||||
4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326
|
||||
2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342
|
||||
7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358
|
||||
3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374
|
||||
2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390
|
||||
2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406
|
||||
294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422
|
||||
2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438
|
||||
1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454
|
||||
1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470
|
||||
2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486
|
||||
1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502
|
||||
7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518
|
||||
7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534
|
||||
2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550
|
||||
4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566
|
||||
1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582
|
||||
7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598
|
||||
829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614
|
||||
4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630
|
||||
375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646
|
||||
2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662
|
||||
444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678
|
||||
1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694
|
||||
1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710
|
||||
730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726
|
||||
3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742
|
||||
3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758
|
||||
1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774
|
||||
3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790
|
||||
7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806
|
||||
7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822
|
||||
1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838
|
||||
2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854
|
||||
1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870
|
||||
3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886
|
||||
2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902
|
||||
3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918
|
||||
2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934
|
||||
4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950
|
||||
4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966
|
||||
3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982
|
||||
97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998
|
||||
3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014
|
||||
424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030
|
||||
3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046
|
||||
3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062
|
||||
3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078
|
||||
1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094
|
||||
7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110
|
||||
199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126
|
||||
7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142
|
||||
1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158
|
||||
391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174
|
||||
4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190
|
||||
3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206
|
||||
397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222
|
||||
2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238
|
||||
2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254
|
||||
3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270
|
||||
1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286
|
||||
4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302
|
||||
2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318
|
||||
1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334
|
||||
1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350
|
||||
2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366
|
||||
3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382
|
||||
1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398
|
||||
7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414
|
||||
1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430
|
||||
4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446
|
||||
1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462
|
||||
135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478
|
||||
1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494
|
||||
3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510
|
||||
3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526
|
||||
2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542
|
||||
1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558
|
||||
4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574
|
||||
660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590
|
||||
7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606
|
||||
2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622
|
||||
3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638
|
||||
4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654
|
||||
790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670
|
||||
7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686
|
||||
7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702
|
||||
1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718
|
||||
4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734
|
||||
3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750
|
||||
2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766
|
||||
3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782
|
||||
3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798
|
||||
2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814
|
||||
1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830
|
||||
4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846
|
||||
3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862
|
||||
3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878
|
||||
2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894
|
||||
4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910
|
||||
7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926
|
||||
3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942
|
||||
2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958
|
||||
3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974
|
||||
1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990
|
||||
2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006
|
||||
3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022
|
||||
4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038
|
||||
2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054
|
||||
2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070
|
||||
7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086
|
||||
1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102
|
||||
2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118
|
||||
1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134
|
||||
3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150
|
||||
4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166
|
||||
2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182
|
||||
3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198
|
||||
3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214
|
||||
2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230
|
||||
4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246
|
||||
2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262
|
||||
3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278
|
||||
4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294
|
||||
7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310
|
||||
3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326
|
||||
194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342
|
||||
1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358
|
||||
4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374
|
||||
1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390
|
||||
4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406
|
||||
7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422
|
||||
510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438
|
||||
7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454
|
||||
2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470
|
||||
1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486
|
||||
1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502
|
||||
3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518
|
||||
509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534
|
||||
552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550
|
||||
478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566
|
||||
3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582
|
||||
2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598
|
||||
751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614
|
||||
7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630
|
||||
1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646
|
||||
3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662
|
||||
7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678
|
||||
1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694
|
||||
7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710
|
||||
4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726
|
||||
1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742
|
||||
2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758
|
||||
2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774
|
||||
4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790
|
||||
802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806
|
||||
809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822
|
||||
3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838
|
||||
3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854
|
||||
1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870
|
||||
2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886
|
||||
7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902
|
||||
1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918
|
||||
1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934
|
||||
3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950
|
||||
919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966
|
||||
1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982
|
||||
4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998
|
||||
7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014
|
||||
2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030
|
||||
3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046
|
||||
516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062
|
||||
1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078
|
||||
2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094
|
||||
2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110
|
||||
7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126
|
||||
7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142
|
||||
7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158
|
||||
2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174
|
||||
2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190
|
||||
1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206
|
||||
4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222
|
||||
3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238
|
||||
3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254
|
||||
4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270
|
||||
4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286
|
||||
2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302
|
||||
2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318
|
||||
7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334
|
||||
4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350
|
||||
7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366
|
||||
2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382
|
||||
1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398
|
||||
3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414
|
||||
4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430
|
||||
2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446
|
||||
120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462
|
||||
2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478
|
||||
1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494
|
||||
2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510
|
||||
2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526
|
||||
4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542
|
||||
7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558
|
||||
1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574
|
||||
3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590
|
||||
7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606
|
||||
1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622
|
||||
8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638
|
||||
2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654
|
||||
8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670
|
||||
2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686
|
||||
2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702
|
||||
8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718
|
||||
8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734
|
||||
8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750
|
||||
408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766
|
||||
8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782
|
||||
4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798
|
||||
3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814
|
||||
8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830
|
||||
1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846
|
||||
8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862
|
||||
425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878
|
||||
1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894
|
||||
479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910
|
||||
4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926
|
||||
1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942
|
||||
4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958
|
||||
1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974
|
||||
433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990
|
||||
3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006
|
||||
4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022
|
||||
8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038
|
||||
938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054
|
||||
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
|
||||
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
|
||||
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
|
||||
#Everything below is of no interest for detection purpose
|
||||
2515,1613,4582,8119,3312,3866,2516,8120,4058,8121,1637,4059,2466,4583,3867,8122, # 8118
|
||||
2493,3016,3734,8123,8124,2192,8125,8126,2162,8127,8128,8129,8130,8131,8132,8133, # 8134
|
||||
8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,8144,8145,8146,8147,8148,8149, # 8150
|
||||
8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,8160,8161,8162,8163,8164,8165, # 8166
|
||||
8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181, # 8182
|
||||
8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197, # 8198
|
||||
8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213, # 8214
|
||||
8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229, # 8230
|
||||
8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245, # 8246
|
||||
8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,8256,8257,8258,8259,8260,8261, # 8262
|
||||
8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,8272,8273,8274,8275,8276,8277, # 8278
|
||||
8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,8290,8291,8292,8293, # 8294
|
||||
8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,8308,8309, # 8310
|
||||
8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322,8323,8324,8325, # 8326
|
||||
8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337,8338,8339,8340,8341, # 8342
|
||||
8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353,8354,8355,8356,8357, # 8358
|
||||
8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,8368,8369,8370,8371,8372,8373, # 8374
|
||||
8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,8384,8385,8386,8387,8388,8389, # 8390
|
||||
8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,8400,8401,8402,8403,8404,8405, # 8406
|
||||
8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,8416,8417,8418,8419,8420,8421, # 8422
|
||||
8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,8432,8433,8434,8435,8436,8437, # 8438
|
||||
8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,8448,8449,8450,8451,8452,8453, # 8454
|
||||
8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,8464,8465,8466,8467,8468,8469, # 8470
|
||||
8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,8480,8481,8482,8483,8484,8485, # 8486
|
||||
8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501, # 8502
|
||||
8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517, # 8518
|
||||
8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533, # 8534
|
||||
8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549, # 8550
|
||||
8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,8565, # 8566
|
||||
8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,8576,8577,8578,8579,8580,8581, # 8582
|
||||
8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597, # 8598
|
||||
8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,8608,8609,8610,8611,8612,8613, # 8614
|
||||
8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,8624,8625,8626,8627,8628,8629, # 8630
|
||||
8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,8640,8641,8642,8643,8644,8645, # 8646
|
||||
8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,8657,8658,8659,8660,8661, # 8662
|
||||
8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672,8673,8674,8675,8676,8677, # 8678
|
||||
8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, # 8694
|
||||
8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710
|
||||
8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726
|
||||
8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742
|
||||
EUCTW_CHAR_TO_FREQ_ORDER = (
|
||||
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
|
||||
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
|
||||
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
|
||||
63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790
|
||||
3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806
|
||||
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822
|
||||
7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838
|
||||
630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854
|
||||
179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870
|
||||
995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886
|
||||
2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902
|
||||
1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918
|
||||
3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934
|
||||
706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950
|
||||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966
|
||||
3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982
|
||||
2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998
|
||||
437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014
|
||||
3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030
|
||||
1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046
|
||||
7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062
|
||||
266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078
|
||||
7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094
|
||||
1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110
|
||||
32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126
|
||||
188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142
|
||||
3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158
|
||||
3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174
|
||||
324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190
|
||||
2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206
|
||||
2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222
|
||||
314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238
|
||||
287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254
|
||||
3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270
|
||||
1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286
|
||||
1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302
|
||||
1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318
|
||||
2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334
|
||||
265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350
|
||||
4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366
|
||||
1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382
|
||||
7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398
|
||||
2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414
|
||||
383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430
|
||||
98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446
|
||||
523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462
|
||||
710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478
|
||||
7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494
|
||||
379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510
|
||||
1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526
|
||||
585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542
|
||||
690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558
|
||||
7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574
|
||||
1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590
|
||||
544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606
|
||||
3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622
|
||||
4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638
|
||||
3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654
|
||||
279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670
|
||||
610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686
|
||||
1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702
|
||||
4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718
|
||||
3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734
|
||||
3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750
|
||||
2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766
|
||||
7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782
|
||||
3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798
|
||||
7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814
|
||||
1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830
|
||||
2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846
|
||||
1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862
|
||||
78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878
|
||||
1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894
|
||||
4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910
|
||||
3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926
|
||||
534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942
|
||||
165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958
|
||||
626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974
|
||||
2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990
|
||||
7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006
|
||||
1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022
|
||||
2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038
|
||||
1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054
|
||||
1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070
|
||||
7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086
|
||||
7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102
|
||||
7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118
|
||||
3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134
|
||||
4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150
|
||||
1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166
|
||||
7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182
|
||||
2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198
|
||||
7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214
|
||||
3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230
|
||||
3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246
|
||||
7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262
|
||||
2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278
|
||||
7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294
|
||||
862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310
|
||||
4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326
|
||||
2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342
|
||||
7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358
|
||||
3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374
|
||||
2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390
|
||||
2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406
|
||||
294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422
|
||||
2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438
|
||||
1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454
|
||||
1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470
|
||||
2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486
|
||||
1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502
|
||||
7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518
|
||||
7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534
|
||||
2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550
|
||||
4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566
|
||||
1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582
|
||||
7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598
|
||||
829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614
|
||||
4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630
|
||||
375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646
|
||||
2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662
|
||||
444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678
|
||||
1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694
|
||||
1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710
|
||||
730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726
|
||||
3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742
|
||||
3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758
|
||||
1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774
|
||||
3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790
|
||||
7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806
|
||||
7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822
|
||||
1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838
|
||||
2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854
|
||||
1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870
|
||||
3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886
|
||||
2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902
|
||||
3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918
|
||||
2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934
|
||||
4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950
|
||||
4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966
|
||||
3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982
|
||||
97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998
|
||||
3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014
|
||||
424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030
|
||||
3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046
|
||||
3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062
|
||||
3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078
|
||||
1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094
|
||||
7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110
|
||||
199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126
|
||||
7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142
|
||||
1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158
|
||||
391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174
|
||||
4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190
|
||||
3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206
|
||||
397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222
|
||||
2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238
|
||||
2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254
|
||||
3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270
|
||||
1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286
|
||||
4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302
|
||||
2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318
|
||||
1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334
|
||||
1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350
|
||||
2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366
|
||||
3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382
|
||||
1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398
|
||||
7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414
|
||||
1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430
|
||||
4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446
|
||||
1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462
|
||||
135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478
|
||||
1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494
|
||||
3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510
|
||||
3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526
|
||||
2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542
|
||||
1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558
|
||||
4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574
|
||||
660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590
|
||||
7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606
|
||||
2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622
|
||||
3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638
|
||||
4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654
|
||||
790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670
|
||||
7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686
|
||||
7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702
|
||||
1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718
|
||||
4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734
|
||||
3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750
|
||||
2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766
|
||||
3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782
|
||||
3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798
|
||||
2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814
|
||||
1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830
|
||||
4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846
|
||||
3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862
|
||||
3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878
|
||||
2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894
|
||||
4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910
|
||||
7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926
|
||||
3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942
|
||||
2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958
|
||||
3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974
|
||||
1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990
|
||||
2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006
|
||||
3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022
|
||||
4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038
|
||||
2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054
|
||||
2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070
|
||||
7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086
|
||||
1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102
|
||||
2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118
|
||||
1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134
|
||||
3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150
|
||||
4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166
|
||||
2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182
|
||||
3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198
|
||||
3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214
|
||||
2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230
|
||||
4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246
|
||||
2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262
|
||||
3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278
|
||||
4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294
|
||||
7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310
|
||||
3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326
|
||||
194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342
|
||||
1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358
|
||||
4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374
|
||||
1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390
|
||||
4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406
|
||||
7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422
|
||||
510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438
|
||||
7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454
|
||||
2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470
|
||||
1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486
|
||||
1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502
|
||||
3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518
|
||||
509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534
|
||||
552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550
|
||||
478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566
|
||||
3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582
|
||||
2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598
|
||||
751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614
|
||||
7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630
|
||||
1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646
|
||||
3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662
|
||||
7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678
|
||||
1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694
|
||||
7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710
|
||||
4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726
|
||||
1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742
|
||||
2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758
|
||||
2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774
|
||||
4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790
|
||||
802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806
|
||||
809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822
|
||||
3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838
|
||||
3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854
|
||||
1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870
|
||||
2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886
|
||||
7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902
|
||||
1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918
|
||||
1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934
|
||||
3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950
|
||||
919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966
|
||||
1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982
|
||||
4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998
|
||||
7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014
|
||||
2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030
|
||||
3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046
|
||||
516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062
|
||||
1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078
|
||||
2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094
|
||||
2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110
|
||||
7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126
|
||||
7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142
|
||||
7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158
|
||||
2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174
|
||||
2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190
|
||||
1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206
|
||||
4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222
|
||||
3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238
|
||||
3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254
|
||||
4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270
|
||||
4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286
|
||||
2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302
|
||||
2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318
|
||||
7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334
|
||||
4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350
|
||||
7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366
|
||||
2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382
|
||||
1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398
|
||||
3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414
|
||||
4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430
|
||||
2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446
|
||||
120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462
|
||||
2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478
|
||||
1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494
|
||||
2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510
|
||||
2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526
|
||||
4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542
|
||||
7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558
|
||||
1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574
|
||||
3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590
|
||||
7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606
|
||||
1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622
|
||||
8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638
|
||||
2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654
|
||||
8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670
|
||||
2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686
|
||||
2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702
|
||||
8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718
|
||||
8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734
|
||||
8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750
|
||||
408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766
|
||||
8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782
|
||||
4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798
|
||||
3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814
|
||||
8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830
|
||||
1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846
|
||||
8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862
|
||||
425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878
|
||||
1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894
|
||||
479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910
|
||||
4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926
|
||||
1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942
|
||||
4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958
|
||||
1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974
|
||||
433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990
|
||||
3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006
|
||||
4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022
|
||||
8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038
|
||||
938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054
|
||||
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
|
||||
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
|
||||
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
|
||||
)
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -13,12 +13,12 @@
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
@@ -28,14 +28,19 @@
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCTWDistributionAnalysis
|
||||
from .mbcssm import EUCTWSMModel
|
||||
from .mbcssm import EUCTW_SM_MODEL
|
||||
|
||||
class EUCTWProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
MultiByteCharSetProber.__init__(self)
|
||||
self._mCodingSM = CodingStateMachine(EUCTWSMModel)
|
||||
self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
|
||||
super(EUCTWProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
|
||||
self.distribution_analyzer = EUCTWDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
def get_charset_name(self):
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-TW"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Taiwan"
|
||||
|
||||
@@ -43,7 +43,7 @@ GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
|
||||
|
||||
GB2312_TABLE_SIZE = 3760
|
||||
|
||||
GB2312CharToFreqOrder = (
|
||||
GB2312_CHAR_TO_FREQ_ORDER = (
|
||||
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
|
||||
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
|
||||
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
|
||||
@@ -278,195 +278,6 @@ GB2312CharToFreqOrder = (
|
||||
1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232,
|
||||
1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624,
|
||||
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189,
|
||||
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, # last 512
|
||||
#Everything below is of no interest for detection purpose
|
||||
5508,6484,3900,3414,3974,4441,4024,3537,4037,5628,5099,3633,6485,3148,6486,3636,
|
||||
5509,3257,5510,5973,5445,5872,4941,4403,3174,4627,5873,6276,2286,4230,5446,5874,
|
||||
5122,6102,6103,4162,5447,5123,5323,4849,6277,3980,3851,5066,4246,5774,5067,6278,
|
||||
3001,2807,5695,3346,5775,5974,5158,5448,6487,5975,5976,5776,3598,6279,5696,4806,
|
||||
4211,4154,6280,6488,6489,6490,6281,4212,5037,3374,4171,6491,4562,4807,4722,4827,
|
||||
5977,6104,4532,4079,5159,5324,5160,4404,3858,5359,5875,3975,4288,4610,3486,4512,
|
||||
5325,3893,5360,6282,6283,5560,2522,4231,5978,5186,5449,2569,3878,6284,5401,3578,
|
||||
4415,6285,4656,5124,5979,2506,4247,4449,3219,3417,4334,4969,4329,6492,4576,4828,
|
||||
4172,4416,4829,5402,6286,3927,3852,5361,4369,4830,4477,4867,5876,4173,6493,6105,
|
||||
4657,6287,6106,5877,5450,6494,4155,4868,5451,3700,5629,4384,6288,6289,5878,3189,
|
||||
4881,6107,6290,6495,4513,6496,4692,4515,4723,5100,3356,6497,6291,3810,4080,5561,
|
||||
3570,4430,5980,6498,4355,5697,6499,4724,6108,6109,3764,4050,5038,5879,4093,3226,
|
||||
6292,5068,5217,4693,3342,5630,3504,4831,4377,4466,4309,5698,4431,5777,6293,5778,
|
||||
4272,3706,6110,5326,3752,4676,5327,4273,5403,4767,5631,6500,5699,5880,3475,5039,
|
||||
6294,5562,5125,4348,4301,4482,4068,5126,4593,5700,3380,3462,5981,5563,3824,5404,
|
||||
4970,5511,3825,4738,6295,6501,5452,4516,6111,5881,5564,6502,6296,5982,6503,4213,
|
||||
4163,3454,6504,6112,4009,4450,6113,4658,6297,6114,3035,6505,6115,3995,4904,4739,
|
||||
4563,4942,4110,5040,3661,3928,5362,3674,6506,5292,3612,4791,5565,4149,5983,5328,
|
||||
5259,5021,4725,4577,4564,4517,4364,6298,5405,4578,5260,4594,4156,4157,5453,3592,
|
||||
3491,6507,5127,5512,4709,4922,5984,5701,4726,4289,6508,4015,6116,5128,4628,3424,
|
||||
4241,5779,6299,4905,6509,6510,5454,5702,5780,6300,4365,4923,3971,6511,5161,3270,
|
||||
3158,5985,4100, 867,5129,5703,6117,5363,3695,3301,5513,4467,6118,6512,5455,4232,
|
||||
4242,4629,6513,3959,4478,6514,5514,5329,5986,4850,5162,5566,3846,4694,6119,5456,
|
||||
4869,5781,3779,6301,5704,5987,5515,4710,6302,5882,6120,4392,5364,5705,6515,6121,
|
||||
6516,6517,3736,5988,5457,5989,4695,2457,5883,4551,5782,6303,6304,6305,5130,4971,
|
||||
6122,5163,6123,4870,3263,5365,3150,4871,6518,6306,5783,5069,5706,3513,3498,4409,
|
||||
5330,5632,5366,5458,5459,3991,5990,4502,3324,5991,5784,3696,4518,5633,4119,6519,
|
||||
4630,5634,4417,5707,4832,5992,3418,6124,5993,5567,4768,5218,6520,4595,3458,5367,
|
||||
6125,5635,6126,4202,6521,4740,4924,6307,3981,4069,4385,6308,3883,2675,4051,3834,
|
||||
4302,4483,5568,5994,4972,4101,5368,6309,5164,5884,3922,6127,6522,6523,5261,5460,
|
||||
5187,4164,5219,3538,5516,4111,3524,5995,6310,6311,5369,3181,3386,2484,5188,3464,
|
||||
5569,3627,5708,6524,5406,5165,4677,4492,6312,4872,4851,5885,4468,5996,6313,5709,
|
||||
5710,6128,2470,5886,6314,5293,4882,5785,3325,5461,5101,6129,5711,5786,6525,4906,
|
||||
6526,6527,4418,5887,5712,4808,2907,3701,5713,5888,6528,3765,5636,5331,6529,6530,
|
||||
3593,5889,3637,4943,3692,5714,5787,4925,6315,6130,5462,4405,6131,6132,6316,5262,
|
||||
6531,6532,5715,3859,5716,5070,4696,5102,3929,5788,3987,4792,5997,6533,6534,3920,
|
||||
4809,5000,5998,6535,2974,5370,6317,5189,5263,5717,3826,6536,3953,5001,4883,3190,
|
||||
5463,5890,4973,5999,4741,6133,6134,3607,5570,6000,4711,3362,3630,4552,5041,6318,
|
||||
6001,2950,2953,5637,4646,5371,4944,6002,2044,4120,3429,6319,6537,5103,4833,6538,
|
||||
6539,4884,4647,3884,6003,6004,4758,3835,5220,5789,4565,5407,6540,6135,5294,4697,
|
||||
4852,6320,6321,3206,4907,6541,6322,4945,6542,6136,6543,6323,6005,4631,3519,6544,
|
||||
5891,6545,5464,3784,5221,6546,5571,4659,6547,6324,6137,5190,6548,3853,6549,4016,
|
||||
4834,3954,6138,5332,3827,4017,3210,3546,4469,5408,5718,3505,4648,5790,5131,5638,
|
||||
5791,5465,4727,4318,6325,6326,5792,4553,4010,4698,3439,4974,3638,4335,3085,6006,
|
||||
5104,5042,5166,5892,5572,6327,4356,4519,5222,5573,5333,5793,5043,6550,5639,5071,
|
||||
4503,6328,6139,6551,6140,3914,3901,5372,6007,5640,4728,4793,3976,3836,4885,6552,
|
||||
4127,6553,4451,4102,5002,6554,3686,5105,6555,5191,5072,5295,4611,5794,5296,6556,
|
||||
5893,5264,5894,4975,5466,5265,4699,4976,4370,4056,3492,5044,4886,6557,5795,4432,
|
||||
4769,4357,5467,3940,4660,4290,6141,4484,4770,4661,3992,6329,4025,4662,5022,4632,
|
||||
4835,4070,5297,4663,4596,5574,5132,5409,5895,6142,4504,5192,4664,5796,5896,3885,
|
||||
5575,5797,5023,4810,5798,3732,5223,4712,5298,4084,5334,5468,6143,4052,4053,4336,
|
||||
4977,4794,6558,5335,4908,5576,5224,4233,5024,4128,5469,5225,4873,6008,5045,4729,
|
||||
4742,4633,3675,4597,6559,5897,5133,5577,5003,5641,5719,6330,6560,3017,2382,3854,
|
||||
4406,4811,6331,4393,3964,4946,6561,2420,3722,6562,4926,4378,3247,1736,4442,6332,
|
||||
5134,6333,5226,3996,2918,5470,4319,4003,4598,4743,4744,4485,3785,3902,5167,5004,
|
||||
5373,4394,5898,6144,4874,1793,3997,6334,4085,4214,5106,5642,4909,5799,6009,4419,
|
||||
4189,3330,5899,4165,4420,5299,5720,5227,3347,6145,4081,6335,2876,3930,6146,3293,
|
||||
3786,3910,3998,5900,5300,5578,2840,6563,5901,5579,6147,3531,5374,6564,6565,5580,
|
||||
4759,5375,6566,6148,3559,5643,6336,6010,5517,6337,6338,5721,5902,3873,6011,6339,
|
||||
6567,5518,3868,3649,5722,6568,4771,4947,6569,6149,4812,6570,2853,5471,6340,6341,
|
||||
5644,4795,6342,6012,5723,6343,5724,6013,4349,6344,3160,6150,5193,4599,4514,4493,
|
||||
5168,4320,6345,4927,3666,4745,5169,5903,5005,4928,6346,5725,6014,4730,4203,5046,
|
||||
4948,3395,5170,6015,4150,6016,5726,5519,6347,5047,3550,6151,6348,4197,4310,5904,
|
||||
6571,5581,2965,6152,4978,3960,4291,5135,6572,5301,5727,4129,4026,5905,4853,5728,
|
||||
5472,6153,6349,4533,2700,4505,5336,4678,3583,5073,2994,4486,3043,4554,5520,6350,
|
||||
6017,5800,4487,6351,3931,4103,5376,6352,4011,4321,4311,4190,5136,6018,3988,3233,
|
||||
4350,5906,5645,4198,6573,5107,3432,4191,3435,5582,6574,4139,5410,6353,5411,3944,
|
||||
5583,5074,3198,6575,6354,4358,6576,5302,4600,5584,5194,5412,6577,6578,5585,5413,
|
||||
5303,4248,5414,3879,4433,6579,4479,5025,4854,5415,6355,4760,4772,3683,2978,4700,
|
||||
3797,4452,3965,3932,3721,4910,5801,6580,5195,3551,5907,3221,3471,3029,6019,3999,
|
||||
5908,5909,5266,5267,3444,3023,3828,3170,4796,5646,4979,4259,6356,5647,5337,3694,
|
||||
6357,5648,5338,4520,4322,5802,3031,3759,4071,6020,5586,4836,4386,5048,6581,3571,
|
||||
4679,4174,4949,6154,4813,3787,3402,3822,3958,3215,3552,5268,4387,3933,4950,4359,
|
||||
6021,5910,5075,3579,6358,4234,4566,5521,6359,3613,5049,6022,5911,3375,3702,3178,
|
||||
4911,5339,4521,6582,6583,4395,3087,3811,5377,6023,6360,6155,4027,5171,5649,4421,
|
||||
4249,2804,6584,2270,6585,4000,4235,3045,6156,5137,5729,4140,4312,3886,6361,4330,
|
||||
6157,4215,6158,3500,3676,4929,4331,3713,4930,5912,4265,3776,3368,5587,4470,4855,
|
||||
3038,4980,3631,6159,6160,4132,4680,6161,6362,3923,4379,5588,4255,6586,4121,6587,
|
||||
6363,4649,6364,3288,4773,4774,6162,6024,6365,3543,6588,4274,3107,3737,5050,5803,
|
||||
4797,4522,5589,5051,5730,3714,4887,5378,4001,4523,6163,5026,5522,4701,4175,2791,
|
||||
3760,6589,5473,4224,4133,3847,4814,4815,4775,3259,5416,6590,2738,6164,6025,5304,
|
||||
3733,5076,5650,4816,5590,6591,6165,6592,3934,5269,6593,3396,5340,6594,5804,3445,
|
||||
3602,4042,4488,5731,5732,3525,5591,4601,5196,6166,6026,5172,3642,4612,3202,4506,
|
||||
4798,6366,3818,5108,4303,5138,5139,4776,3332,4304,2915,3415,4434,5077,5109,4856,
|
||||
2879,5305,4817,6595,5913,3104,3144,3903,4634,5341,3133,5110,5651,5805,6167,4057,
|
||||
5592,2945,4371,5593,6596,3474,4182,6367,6597,6168,4507,4279,6598,2822,6599,4777,
|
||||
4713,5594,3829,6169,3887,5417,6170,3653,5474,6368,4216,2971,5228,3790,4579,6369,
|
||||
5733,6600,6601,4951,4746,4555,6602,5418,5475,6027,3400,4665,5806,6171,4799,6028,
|
||||
5052,6172,3343,4800,4747,5006,6370,4556,4217,5476,4396,5229,5379,5477,3839,5914,
|
||||
5652,5807,4714,3068,4635,5808,6173,5342,4192,5078,5419,5523,5734,6174,4557,6175,
|
||||
4602,6371,6176,6603,5809,6372,5735,4260,3869,5111,5230,6029,5112,6177,3126,4681,
|
||||
5524,5915,2706,3563,4748,3130,6178,4018,5525,6604,6605,5478,4012,4837,6606,4534,
|
||||
4193,5810,4857,3615,5479,6030,4082,3697,3539,4086,5270,3662,4508,4931,5916,4912,
|
||||
5811,5027,3888,6607,4397,3527,3302,3798,2775,2921,2637,3966,4122,4388,4028,4054,
|
||||
1633,4858,5079,3024,5007,3982,3412,5736,6608,3426,3236,5595,3030,6179,3427,3336,
|
||||
3279,3110,6373,3874,3039,5080,5917,5140,4489,3119,6374,5812,3405,4494,6031,4666,
|
||||
4141,6180,4166,6032,5813,4981,6609,5081,4422,4982,4112,3915,5653,3296,3983,6375,
|
||||
4266,4410,5654,6610,6181,3436,5082,6611,5380,6033,3819,5596,4535,5231,5306,5113,
|
||||
6612,4952,5918,4275,3113,6613,6376,6182,6183,5814,3073,4731,4838,5008,3831,6614,
|
||||
4888,3090,3848,4280,5526,5232,3014,5655,5009,5737,5420,5527,6615,5815,5343,5173,
|
||||
5381,4818,6616,3151,4953,6617,5738,2796,3204,4360,2989,4281,5739,5174,5421,5197,
|
||||
3132,5141,3849,5142,5528,5083,3799,3904,4839,5480,2880,4495,3448,6377,6184,5271,
|
||||
5919,3771,3193,6034,6035,5920,5010,6036,5597,6037,6378,6038,3106,5422,6618,5423,
|
||||
5424,4142,6619,4889,5084,4890,4313,5740,6620,3437,5175,5307,5816,4199,5198,5529,
|
||||
5817,5199,5656,4913,5028,5344,3850,6185,2955,5272,5011,5818,4567,4580,5029,5921,
|
||||
3616,5233,6621,6622,6186,4176,6039,6379,6380,3352,5200,5273,2908,5598,5234,3837,
|
||||
5308,6623,6624,5819,4496,4323,5309,5201,6625,6626,4983,3194,3838,4167,5530,5922,
|
||||
5274,6381,6382,3860,3861,5599,3333,4292,4509,6383,3553,5481,5820,5531,4778,6187,
|
||||
3955,3956,4324,4389,4218,3945,4325,3397,2681,5923,4779,5085,4019,5482,4891,5382,
|
||||
5383,6040,4682,3425,5275,4094,6627,5310,3015,5483,5657,4398,5924,3168,4819,6628,
|
||||
5925,6629,5532,4932,4613,6041,6630,4636,6384,4780,4204,5658,4423,5821,3989,4683,
|
||||
5822,6385,4954,6631,5345,6188,5425,5012,5384,3894,6386,4490,4104,6632,5741,5053,
|
||||
6633,5823,5926,5659,5660,5927,6634,5235,5742,5824,4840,4933,4820,6387,4859,5928,
|
||||
4955,6388,4143,3584,5825,5346,5013,6635,5661,6389,5014,5484,5743,4337,5176,5662,
|
||||
6390,2836,6391,3268,6392,6636,6042,5236,6637,4158,6638,5744,5663,4471,5347,3663,
|
||||
4123,5143,4293,3895,6639,6640,5311,5929,5826,3800,6189,6393,6190,5664,5348,3554,
|
||||
3594,4749,4603,6641,5385,4801,6043,5827,4183,6642,5312,5426,4761,6394,5665,6191,
|
||||
4715,2669,6643,6644,5533,3185,5427,5086,5930,5931,5386,6192,6044,6645,4781,4013,
|
||||
5745,4282,4435,5534,4390,4267,6045,5746,4984,6046,2743,6193,3501,4087,5485,5932,
|
||||
5428,4184,4095,5747,4061,5054,3058,3862,5933,5600,6646,5144,3618,6395,3131,5055,
|
||||
5313,6396,4650,4956,3855,6194,3896,5202,4985,4029,4225,6195,6647,5828,5486,5829,
|
||||
3589,3002,6648,6397,4782,5276,6649,6196,6650,4105,3803,4043,5237,5830,6398,4096,
|
||||
3643,6399,3528,6651,4453,3315,4637,6652,3984,6197,5535,3182,3339,6653,3096,2660,
|
||||
6400,6654,3449,5934,4250,4236,6047,6401,5831,6655,5487,3753,4062,5832,6198,6199,
|
||||
6656,3766,6657,3403,4667,6048,6658,4338,2897,5833,3880,2797,3780,4326,6659,5748,
|
||||
5015,6660,5387,4351,5601,4411,6661,3654,4424,5935,4339,4072,5277,4568,5536,6402,
|
||||
6662,5238,6663,5349,5203,6200,5204,6201,5145,4536,5016,5056,4762,5834,4399,4957,
|
||||
6202,6403,5666,5749,6664,4340,6665,5936,5177,5667,6666,6667,3459,4668,6404,6668,
|
||||
6669,4543,6203,6670,4276,6405,4480,5537,6671,4614,5205,5668,6672,3348,2193,4763,
|
||||
6406,6204,5937,5602,4177,5669,3419,6673,4020,6205,4443,4569,5388,3715,3639,6407,
|
||||
6049,4058,6206,6674,5938,4544,6050,4185,4294,4841,4651,4615,5488,6207,6408,6051,
|
||||
5178,3241,3509,5835,6208,4958,5836,4341,5489,5278,6209,2823,5538,5350,5206,5429,
|
||||
6675,4638,4875,4073,3516,4684,4914,4860,5939,5603,5389,6052,5057,3237,5490,3791,
|
||||
6676,6409,6677,4821,4915,4106,5351,5058,4243,5539,4244,5604,4842,4916,5239,3028,
|
||||
3716,5837,5114,5605,5390,5940,5430,6210,4332,6678,5540,4732,3667,3840,6053,4305,
|
||||
3408,5670,5541,6410,2744,5240,5750,6679,3234,5606,6680,5607,5671,3608,4283,4159,
|
||||
4400,5352,4783,6681,6411,6682,4491,4802,6211,6412,5941,6413,6414,5542,5751,6683,
|
||||
4669,3734,5942,6684,6415,5943,5059,3328,4670,4144,4268,6685,6686,6687,6688,4372,
|
||||
3603,6689,5944,5491,4373,3440,6416,5543,4784,4822,5608,3792,4616,5838,5672,3514,
|
||||
5391,6417,4892,6690,4639,6691,6054,5673,5839,6055,6692,6056,5392,6212,4038,5544,
|
||||
5674,4497,6057,6693,5840,4284,5675,4021,4545,5609,6418,4454,6419,6213,4113,4472,
|
||||
5314,3738,5087,5279,4074,5610,4959,4063,3179,4750,6058,6420,6214,3476,4498,4716,
|
||||
5431,4960,4685,6215,5241,6694,6421,6216,6695,5841,5945,6422,3748,5946,5179,3905,
|
||||
5752,5545,5947,4374,6217,4455,6423,4412,6218,4803,5353,6696,3832,5280,6219,4327,
|
||||
4702,6220,6221,6059,4652,5432,6424,3749,4751,6425,5753,4986,5393,4917,5948,5030,
|
||||
5754,4861,4733,6426,4703,6697,6222,4671,5949,4546,4961,5180,6223,5031,3316,5281,
|
||||
6698,4862,4295,4934,5207,3644,6427,5842,5950,6428,6429,4570,5843,5282,6430,6224,
|
||||
5088,3239,6060,6699,5844,5755,6061,6431,2701,5546,6432,5115,5676,4039,3993,3327,
|
||||
4752,4425,5315,6433,3941,6434,5677,4617,4604,3074,4581,6225,5433,6435,6226,6062,
|
||||
4823,5756,5116,6227,3717,5678,4717,5845,6436,5679,5846,6063,5847,6064,3977,3354,
|
||||
6437,3863,5117,6228,5547,5394,4499,4524,6229,4605,6230,4306,4500,6700,5951,6065,
|
||||
3693,5952,5089,4366,4918,6701,6231,5548,6232,6702,6438,4704,5434,6703,6704,5953,
|
||||
4168,6705,5680,3420,6706,5242,4407,6066,3812,5757,5090,5954,4672,4525,3481,5681,
|
||||
4618,5395,5354,5316,5955,6439,4962,6707,4526,6440,3465,4673,6067,6441,5682,6708,
|
||||
5435,5492,5758,5683,4619,4571,4674,4804,4893,4686,5493,4753,6233,6068,4269,6442,
|
||||
6234,5032,4705,5146,5243,5208,5848,6235,6443,4963,5033,4640,4226,6236,5849,3387,
|
||||
6444,6445,4436,4437,5850,4843,5494,4785,4894,6709,4361,6710,5091,5956,3331,6237,
|
||||
4987,5549,6069,6711,4342,3517,4473,5317,6070,6712,6071,4706,6446,5017,5355,6713,
|
||||
6714,4988,5436,6447,4734,5759,6715,4735,4547,4456,4754,6448,5851,6449,6450,3547,
|
||||
5852,5318,6451,6452,5092,4205,6716,6238,4620,4219,5611,6239,6072,4481,5760,5957,
|
||||
5958,4059,6240,6453,4227,4537,6241,5761,4030,4186,5244,5209,3761,4457,4876,3337,
|
||||
5495,5181,6242,5959,5319,5612,5684,5853,3493,5854,6073,4169,5613,5147,4895,6074,
|
||||
5210,6717,5182,6718,3830,6243,2798,3841,6075,6244,5855,5614,3604,4606,5496,5685,
|
||||
5118,5356,6719,6454,5960,5357,5961,6720,4145,3935,4621,5119,5962,4261,6721,6455,
|
||||
4786,5963,4375,4582,6245,6246,6247,6076,5437,4877,5856,3376,4380,6248,4160,6722,
|
||||
5148,6456,5211,6457,6723,4718,6458,6724,6249,5358,4044,3297,6459,6250,5857,5615,
|
||||
5497,5245,6460,5498,6725,6251,6252,5550,3793,5499,2959,5396,6461,6462,4572,5093,
|
||||
5500,5964,3806,4146,6463,4426,5762,5858,6077,6253,4755,3967,4220,5965,6254,4989,
|
||||
5501,6464,4352,6726,6078,4764,2290,5246,3906,5438,5283,3767,4964,2861,5763,5094,
|
||||
6255,6256,4622,5616,5859,5860,4707,6727,4285,4708,4824,5617,6257,5551,4787,5212,
|
||||
4965,4935,4687,6465,6728,6466,5686,6079,3494,4413,2995,5247,5966,5618,6729,5967,
|
||||
5764,5765,5687,5502,6730,6731,6080,5397,6467,4990,6258,6732,4538,5060,5619,6733,
|
||||
4719,5688,5439,5018,5149,5284,5503,6734,6081,4607,6259,5120,3645,5861,4583,6260,
|
||||
4584,4675,5620,4098,5440,6261,4863,2379,3306,4585,5552,5689,4586,5285,6735,4864,
|
||||
6736,5286,6082,6737,4623,3010,4788,4381,4558,5621,4587,4896,3698,3161,5248,4353,
|
||||
4045,6262,3754,5183,4588,6738,6263,6739,6740,5622,3936,6741,6468,6742,6264,5095,
|
||||
6469,4991,5968,6743,4992,6744,6083,4897,6745,4256,5766,4307,3108,3968,4444,5287,
|
||||
3889,4343,6084,4510,6085,4559,6086,4898,5969,6746,5623,5061,4919,5249,5250,5504,
|
||||
5441,6265,5320,4878,3242,5862,5251,3428,6087,6747,4237,5624,5442,6266,5553,4539,
|
||||
6748,2585,3533,5398,4262,6088,5150,4736,4438,6089,6267,5505,4966,6749,6268,6750,
|
||||
6269,5288,5554,3650,6090,6091,4624,6092,5690,6751,5863,4270,5691,4277,5555,5864,
|
||||
6752,5692,4720,4865,6470,5151,4688,4825,6753,3094,6754,6471,3235,4653,6755,5213,
|
||||
5399,6756,3201,4589,5865,4967,6472,5866,6473,5019,3016,6757,5321,4756,3957,4573,
|
||||
6093,4993,5767,4721,6474,6758,5625,6759,4458,6475,6270,6760,5556,4994,5214,5252,
|
||||
6271,3875,5768,6094,5034,5506,4376,5769,6761,2120,6476,5253,5770,6762,5771,5970,
|
||||
3990,5971,5557,5558,5772,6477,6095,2787,4641,5972,5121,6096,6097,6272,6763,3703,
|
||||
5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978,
|
||||
4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767)
|
||||
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512
|
||||
)
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -13,12 +13,12 @@
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
@@ -28,14 +28,19 @@
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import GB2312DistributionAnalysis
|
||||
from .mbcssm import GB2312SMModel
|
||||
from .mbcssm import GB2312_SM_MODEL
|
||||
|
||||
class GB2312Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
MultiByteCharSetProber.__init__(self)
|
||||
self._mCodingSM = CodingStateMachine(GB2312SMModel)
|
||||
self._mDistributionAnalyzer = GB2312DistributionAnalysis()
|
||||
super(GB2312Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
|
||||
self.distribution_analyzer = GB2312DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
def get_charset_name(self):
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "GB2312"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Chinese"
|
||||
|
||||
@@ -26,8 +26,7 @@
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .constants import eNotMe, eDetecting
|
||||
from .compat import wrap_ord
|
||||
from .enums import ProbingState
|
||||
|
||||
# This prober doesn't actually recognize a language or a charset.
|
||||
# It is a helper prober for the use of the Hebrew model probers
|
||||
@@ -126,56 +125,59 @@ from .compat import wrap_ord
|
||||
# model probers scores. The answer is returned in the form of the name of the
|
||||
# charset identified, either "windows-1255" or "ISO-8859-8".
|
||||
|
||||
# windows-1255 / ISO-8859-8 code points of interest
|
||||
FINAL_KAF = 0xea
|
||||
NORMAL_KAF = 0xeb
|
||||
FINAL_MEM = 0xed
|
||||
NORMAL_MEM = 0xee
|
||||
FINAL_NUN = 0xef
|
||||
NORMAL_NUN = 0xf0
|
||||
FINAL_PE = 0xf3
|
||||
NORMAL_PE = 0xf4
|
||||
FINAL_TSADI = 0xf5
|
||||
NORMAL_TSADI = 0xf6
|
||||
|
||||
# Minimum Visual vs Logical final letter score difference.
|
||||
# If the difference is below this, don't rely solely on the final letter score
|
||||
# distance.
|
||||
MIN_FINAL_CHAR_DISTANCE = 5
|
||||
|
||||
# Minimum Visual vs Logical model score difference.
|
||||
# If the difference is below this, don't rely at all on the model score
|
||||
# distance.
|
||||
MIN_MODEL_DISTANCE = 0.01
|
||||
|
||||
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
||||
LOGICAL_HEBREW_NAME = "windows-1255"
|
||||
|
||||
|
||||
class HebrewProber(CharSetProber):
|
||||
# windows-1255 / ISO-8859-8 code points of interest
|
||||
FINAL_KAF = 0xea
|
||||
NORMAL_KAF = 0xeb
|
||||
FINAL_MEM = 0xed
|
||||
NORMAL_MEM = 0xee
|
||||
FINAL_NUN = 0xef
|
||||
NORMAL_NUN = 0xf0
|
||||
FINAL_PE = 0xf3
|
||||
NORMAL_PE = 0xf4
|
||||
FINAL_TSADI = 0xf5
|
||||
NORMAL_TSADI = 0xf6
|
||||
|
||||
# Minimum Visual vs Logical final letter score difference.
|
||||
# If the difference is below this, don't rely solely on the final letter score
|
||||
# distance.
|
||||
MIN_FINAL_CHAR_DISTANCE = 5
|
||||
|
||||
# Minimum Visual vs Logical model score difference.
|
||||
# If the difference is below this, don't rely at all on the model score
|
||||
# distance.
|
||||
MIN_MODEL_DISTANCE = 0.01
|
||||
|
||||
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
||||
LOGICAL_HEBREW_NAME = "windows-1255"
|
||||
|
||||
def __init__(self):
|
||||
CharSetProber.__init__(self)
|
||||
self._mLogicalProber = None
|
||||
self._mVisualProber = None
|
||||
super(HebrewProber, self).__init__()
|
||||
self._final_char_logical_score = None
|
||||
self._final_char_visual_score = None
|
||||
self._prev = None
|
||||
self._before_prev = None
|
||||
self._logical_prober = None
|
||||
self._visual_prober = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._mFinalCharLogicalScore = 0
|
||||
self._mFinalCharVisualScore = 0
|
||||
self._final_char_logical_score = 0
|
||||
self._final_char_visual_score = 0
|
||||
# The two last characters seen in the previous buffer,
|
||||
# mPrev and mBeforePrev are initialized to space in order to simulate
|
||||
# a word delimiter at the beginning of the data
|
||||
self._mPrev = ' '
|
||||
self._mBeforePrev = ' '
|
||||
self._prev = ' '
|
||||
self._before_prev = ' '
|
||||
# These probers are owned by the group prober.
|
||||
|
||||
def set_model_probers(self, logicalProber, visualProber):
|
||||
self._mLogicalProber = logicalProber
|
||||
self._mVisualProber = visualProber
|
||||
self._logical_prober = logicalProber
|
||||
self._visual_prober = visualProber
|
||||
|
||||
def is_final(self, c):
|
||||
return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,
|
||||
FINAL_TSADI]
|
||||
return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN,
|
||||
self.FINAL_PE, self.FINAL_TSADI]
|
||||
|
||||
def is_non_final(self, c):
|
||||
# The normal Tsadi is not a good Non-Final letter due to words like
|
||||
@@ -188,9 +190,10 @@ class HebrewProber(CharSetProber):
|
||||
# for example legally end with a Non-Final Pe or Kaf. However, the
|
||||
# benefit of these letters as Non-Final letters outweighs the damage
|
||||
# since these words are quite rare.
|
||||
return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
|
||||
return c in [self.NORMAL_KAF, self.NORMAL_MEM,
|
||||
self.NORMAL_NUN, self.NORMAL_PE]
|
||||
|
||||
def feed(self, aBuf):
|
||||
def feed(self, byte_str):
|
||||
# Final letter analysis for logical-visual decision.
|
||||
# Look for evidence that the received buffer is either logical Hebrew
|
||||
# or visual Hebrew.
|
||||
@@ -217,67 +220,73 @@ class HebrewProber(CharSetProber):
|
||||
# We automatically filter out all 7-bit characters (replace them with
|
||||
# spaces) so the word boundary detection works properly. [MAP]
|
||||
|
||||
if self.get_state() == eNotMe:
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
# Both model probers say it's not them. No reason to continue.
|
||||
return eNotMe
|
||||
return ProbingState.NOT_ME
|
||||
|
||||
aBuf = self.filter_high_bit_only(aBuf)
|
||||
byte_str = self.filter_high_byte_only(byte_str)
|
||||
|
||||
for cur in aBuf:
|
||||
for cur in byte_str:
|
||||
if cur == ' ':
|
||||
# We stand on a space - a word just ended
|
||||
if self._mBeforePrev != ' ':
|
||||
# next-to-last char was not a space so self._mPrev is not a
|
||||
if self._before_prev != ' ':
|
||||
# next-to-last char was not a space so self._prev is not a
|
||||
# 1 letter word
|
||||
if self.is_final(self._mPrev):
|
||||
if self.is_final(self._prev):
|
||||
# case (1) [-2:not space][-1:final letter][cur:space]
|
||||
self._mFinalCharLogicalScore += 1
|
||||
elif self.is_non_final(self._mPrev):
|
||||
self._final_char_logical_score += 1
|
||||
elif self.is_non_final(self._prev):
|
||||
# case (2) [-2:not space][-1:Non-Final letter][
|
||||
# cur:space]
|
||||
self._mFinalCharVisualScore += 1
|
||||
self._final_char_visual_score += 1
|
||||
else:
|
||||
# Not standing on a space
|
||||
if ((self._mBeforePrev == ' ') and
|
||||
(self.is_final(self._mPrev)) and (cur != ' ')):
|
||||
if ((self._before_prev == ' ') and
|
||||
(self.is_final(self._prev)) and (cur != ' ')):
|
||||
# case (3) [-2:space][-1:final letter][cur:not space]
|
||||
self._mFinalCharVisualScore += 1
|
||||
self._mBeforePrev = self._mPrev
|
||||
self._mPrev = cur
|
||||
self._final_char_visual_score += 1
|
||||
self._before_prev = self._prev
|
||||
self._prev = cur
|
||||
|
||||
# Forever detecting, till the end or until both model probers return
|
||||
# eNotMe (handled above)
|
||||
return eDetecting
|
||||
# ProbingState.NOT_ME (handled above)
|
||||
return ProbingState.DETECTING
|
||||
|
||||
def get_charset_name(self):
|
||||
@property
|
||||
def charset_name(self):
|
||||
# Make the decision: is it Logical or Visual?
|
||||
# If the final letter score distance is dominant enough, rely on it.
|
||||
finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore
|
||||
if finalsub >= MIN_FINAL_CHAR_DISTANCE:
|
||||
return LOGICAL_HEBREW_NAME
|
||||
if finalsub <= -MIN_FINAL_CHAR_DISTANCE:
|
||||
return VISUAL_HEBREW_NAME
|
||||
finalsub = self._final_char_logical_score - self._final_char_visual_score
|
||||
if finalsub >= self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# It's not dominant enough, try to rely on the model scores instead.
|
||||
modelsub = (self._mLogicalProber.get_confidence()
|
||||
- self._mVisualProber.get_confidence())
|
||||
if modelsub > MIN_MODEL_DISTANCE:
|
||||
return LOGICAL_HEBREW_NAME
|
||||
if modelsub < -MIN_MODEL_DISTANCE:
|
||||
return VISUAL_HEBREW_NAME
|
||||
modelsub = (self._logical_prober.get_confidence()
|
||||
- self._visual_prober.get_confidence())
|
||||
if modelsub > self.MIN_MODEL_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if modelsub < -self.MIN_MODEL_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# Still no good, back to final letter distance, maybe it'll save the
|
||||
# day.
|
||||
if finalsub < 0.0:
|
||||
return VISUAL_HEBREW_NAME
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# (finalsub > 0 - Logical) or (don't know what to do) default to
|
||||
# Logical.
|
||||
return LOGICAL_HEBREW_NAME
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
|
||||
def get_state(self):
|
||||
@property
|
||||
def language(self):
|
||||
return 'Hebrew'
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
# Remain active as long as any of the model probers are active.
|
||||
if (self._mLogicalProber.get_state() == eNotMe) and \
|
||||
(self._mVisualProber.get_state() == eNotMe):
|
||||
return eNotMe
|
||||
return eDetecting
|
||||
if (self._logical_prober.state == ProbingState.NOT_ME) and \
|
||||
(self._visual_prober.state == ProbingState.NOT_ME):
|
||||
return ProbingState.NOT_ME
|
||||
return ProbingState.DETECTING
|
||||
|
||||
@@ -46,7 +46,7 @@ JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
|
||||
# Char to FreqOrder table ,
|
||||
JIS_TABLE_SIZE = 4368
|
||||
|
||||
JISCharToFreqOrder = (
|
||||
JIS_CHAR_TO_FREQ_ORDER = (
|
||||
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16
|
||||
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32
|
||||
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48
|
||||
@@ -320,250 +320,6 @@ JISCharToFreqOrder = (
|
||||
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336
|
||||
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352
|
||||
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512
|
||||
#Everything below is of no interest for detection purpose
|
||||
2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, # 4384
|
||||
6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, # 4400
|
||||
6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, # 4416
|
||||
6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, # 4432
|
||||
6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, # 4448
|
||||
4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, # 4464
|
||||
4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, # 4480
|
||||
3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, # 4496
|
||||
3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, # 4512
|
||||
4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, # 4528
|
||||
3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, # 4544
|
||||
6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, # 4560
|
||||
4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, # 4576
|
||||
6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, # 4592
|
||||
6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, # 4608
|
||||
6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, # 4624
|
||||
6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, # 4640
|
||||
6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, # 4656
|
||||
6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, # 4672
|
||||
3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, # 4688
|
||||
3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, # 4704
|
||||
6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, # 4720
|
||||
2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, # 4736
|
||||
4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, # 4752
|
||||
4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, # 4768
|
||||
4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, # 4784
|
||||
6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, # 4800
|
||||
3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, # 4816
|
||||
4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, # 4832
|
||||
4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, # 4848
|
||||
6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, # 4864
|
||||
4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, # 4880
|
||||
6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, # 4896
|
||||
3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, # 4912
|
||||
2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, # 4928
|
||||
4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, # 4944
|
||||
2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, # 4960
|
||||
6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, # 4976
|
||||
4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, # 4992
|
||||
6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, # 5008
|
||||
6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, # 5024
|
||||
6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, # 5040
|
||||
4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, # 5056
|
||||
6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, # 5072
|
||||
2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, # 5088
|
||||
6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, # 5104
|
||||
4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, # 5120
|
||||
6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, # 5136
|
||||
4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, # 5152
|
||||
4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, # 5168
|
||||
6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, # 5184
|
||||
6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, # 5200
|
||||
6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, # 5216
|
||||
3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, # 5232
|
||||
1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, # 5248
|
||||
3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, # 5264
|
||||
3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, # 5280
|
||||
4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, # 5296
|
||||
6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, # 5312
|
||||
3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, # 5328
|
||||
6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, # 5344
|
||||
3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, # 5360
|
||||
3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, # 5376
|
||||
2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, # 5392
|
||||
6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, # 5408
|
||||
6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, # 5424
|
||||
3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, # 5440
|
||||
6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, # 5456
|
||||
3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, # 5472
|
||||
6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, # 5488
|
||||
6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, # 5504
|
||||
6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, # 5520
|
||||
4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, # 5536
|
||||
6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, # 5552
|
||||
4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, # 5568
|
||||
3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, # 5584
|
||||
3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, # 5600
|
||||
6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, # 5616
|
||||
6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, # 5632
|
||||
4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, # 5648
|
||||
6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, # 5664
|
||||
6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, # 5680
|
||||
6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, # 5696
|
||||
6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, # 5712
|
||||
6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, # 5728
|
||||
6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, # 5744
|
||||
4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, # 5760
|
||||
4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, # 5776
|
||||
3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, # 5792
|
||||
6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, # 5808
|
||||
4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, # 5824
|
||||
2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, # 5840
|
||||
6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, # 5856
|
||||
6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, # 5872
|
||||
4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, # 5888
|
||||
2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, # 5904
|
||||
4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, # 5920
|
||||
2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, # 5936
|
||||
4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, # 5952
|
||||
4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, # 5968
|
||||
4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, # 5984
|
||||
6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, # 6000
|
||||
3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, # 6016
|
||||
6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, # 6032
|
||||
3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, # 6048
|
||||
6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, # 6064
|
||||
2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, # 6080
|
||||
3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, # 6096
|
||||
7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, # 6112
|
||||
2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, # 6128
|
||||
3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, # 6144
|
||||
3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, # 6160
|
||||
3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, # 6176
|
||||
3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, # 6192
|
||||
7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, # 6208
|
||||
7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, # 6224
|
||||
7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, # 6240
|
||||
7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, # 6256
|
||||
7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, # 6272
|
||||
4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, # 6288
|
||||
3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, # 6304
|
||||
3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, # 6320
|
||||
4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, # 6336
|
||||
3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, # 6352
|
||||
3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, # 6368
|
||||
7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, # 6384
|
||||
4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, # 6400
|
||||
7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, # 6416
|
||||
7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, # 6432
|
||||
7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, # 6448
|
||||
7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, # 6464
|
||||
7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, # 6480
|
||||
4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, # 6496
|
||||
4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, # 6512
|
||||
7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, # 6528
|
||||
3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, # 6544
|
||||
4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, # 6560
|
||||
7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, # 6576
|
||||
7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, # 6592
|
||||
4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, # 6608
|
||||
3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, # 6624
|
||||
3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, # 6640
|
||||
7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, # 6656
|
||||
4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, # 6672
|
||||
4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, # 6688
|
||||
4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, # 6704
|
||||
4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, # 6720
|
||||
4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, # 6736
|
||||
4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, # 6752
|
||||
7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, # 6768
|
||||
7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, # 6784
|
||||
7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, # 6800
|
||||
7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, # 6816
|
||||
7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, # 6832
|
||||
2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, # 6848
|
||||
3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, # 6864
|
||||
7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, # 6880
|
||||
7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, # 6896
|
||||
3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, # 6912
|
||||
4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, # 6928
|
||||
3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, # 6944
|
||||
3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, # 6960
|
||||
2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, # 6976
|
||||
7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, # 6992
|
||||
7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, # 7008
|
||||
4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, # 7024
|
||||
3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, # 7040
|
||||
3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, # 7056
|
||||
7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, # 7072
|
||||
7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, # 7088
|
||||
7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, # 7104
|
||||
4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, # 7120
|
||||
7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, # 7136
|
||||
2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, # 7152
|
||||
3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, # 7168
|
||||
4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, # 7184
|
||||
7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, # 7200
|
||||
4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, # 7216
|
||||
4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, # 7232
|
||||
7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, # 7248
|
||||
7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, # 7264
|
||||
5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, # 7280
|
||||
7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, # 7296
|
||||
7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, # 7312
|
||||
7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, # 7328
|
||||
7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, # 7344
|
||||
7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, # 7360
|
||||
5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, # 7376
|
||||
5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, # 7392
|
||||
7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, # 7408
|
||||
3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, # 7424
|
||||
7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, # 7440
|
||||
7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, # 7456
|
||||
3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, # 7472
|
||||
7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, # 7488
|
||||
7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, # 7504
|
||||
1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, # 7520
|
||||
3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, # 7536
|
||||
4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, # 7552
|
||||
2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, # 7568
|
||||
3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, # 7584
|
||||
2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, # 7600
|
||||
5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, # 7616
|
||||
4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, # 7632
|
||||
4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, # 7648
|
||||
5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, # 7664
|
||||
7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, # 7680
|
||||
7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, # 7696
|
||||
7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, # 7712
|
||||
7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, # 7728
|
||||
3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, # 7744
|
||||
7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, # 7760
|
||||
3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, # 7776
|
||||
7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, # 7792
|
||||
4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, # 7808
|
||||
7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, # 7824
|
||||
7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7840
|
||||
7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, # 7856
|
||||
7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, # 7872
|
||||
7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, # 7888
|
||||
7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, # 7904
|
||||
7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, # 7920
|
||||
7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, # 7936
|
||||
7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, # 7952
|
||||
7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, # 7968
|
||||
7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, # 7984
|
||||
7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, # 8000
|
||||
8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, # 8016
|
||||
8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, # 8032
|
||||
8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, # 8048
|
||||
8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, # 8064
|
||||
8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, # 8080
|
||||
8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, # 8096
|
||||
8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, # 8112
|
||||
8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, # 8128
|
||||
8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, # 8144
|
||||
8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, # 8160
|
||||
8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, # 8176
|
||||
8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, # 8192
|
||||
8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, # 8208
|
||||
8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, # 8224
|
||||
8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240
|
||||
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256
|
||||
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272
|
||||
)
|
||||
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -25,13 +25,6 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .compat import wrap_ord
|
||||
|
||||
NUM_OF_CATEGORY = 6
|
||||
DONT_KNOW = -1
|
||||
ENOUGH_REL_THRESHOLD = 100
|
||||
MAX_REL_THRESHOLD = 1000
|
||||
MINIMUM_DATA_THRESHOLD = 4
|
||||
|
||||
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
|
||||
jp2CharContext = (
|
||||
@@ -120,24 +113,35 @@ jp2CharContext = (
|
||||
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
|
||||
)
|
||||
|
||||
class JapaneseContextAnalysis:
|
||||
class JapaneseContextAnalysis(object):
|
||||
NUM_OF_CATEGORY = 6
|
||||
DONT_KNOW = -1
|
||||
ENOUGH_REL_THRESHOLD = 100
|
||||
MAX_REL_THRESHOLD = 1000
|
||||
MINIMUM_DATA_THRESHOLD = 4
|
||||
|
||||
def __init__(self):
|
||||
self._total_rel = None
|
||||
self._rel_sample = None
|
||||
self._need_to_skip_char_num = None
|
||||
self._last_char_order = None
|
||||
self._done = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._mTotalRel = 0 # total sequence received
|
||||
# category counters, each interger counts sequence in its category
|
||||
self._mRelSample = [0] * NUM_OF_CATEGORY
|
||||
self._total_rel = 0 # total sequence received
|
||||
# category counters, each integer counts sequence in its category
|
||||
self._rel_sample = [0] * self.NUM_OF_CATEGORY
|
||||
# if last byte in current buffer is not the last byte of a character,
|
||||
# we need to know how many bytes to skip in next buffer
|
||||
self._mNeedToSkipCharNum = 0
|
||||
self._mLastCharOrder = -1 # The order of previous char
|
||||
self._need_to_skip_char_num = 0
|
||||
self._last_char_order = -1 # The order of previous char
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
self._mDone = False
|
||||
self._done = False
|
||||
|
||||
def feed(self, aBuf, aLen):
|
||||
if self._mDone:
|
||||
def feed(self, byte_str, num_bytes):
|
||||
if self._done:
|
||||
return
|
||||
|
||||
# The buffer we got is byte oriented, and a character may span in more than one
|
||||
@@ -147,81 +151,83 @@ class JapaneseContextAnalysis:
|
||||
# well and analyse the character once it is complete, but since a
|
||||
# character will not make much difference, by simply skipping
|
||||
# this character will simply our logic and improve performance.
|
||||
i = self._mNeedToSkipCharNum
|
||||
while i < aLen:
|
||||
order, charLen = self.get_order(aBuf[i:i + 2])
|
||||
i += charLen
|
||||
if i > aLen:
|
||||
self._mNeedToSkipCharNum = i - aLen
|
||||
self._mLastCharOrder = -1
|
||||
i = self._need_to_skip_char_num
|
||||
while i < num_bytes:
|
||||
order, char_len = self.get_order(byte_str[i:i + 2])
|
||||
i += char_len
|
||||
if i > num_bytes:
|
||||
self._need_to_skip_char_num = i - num_bytes
|
||||
self._last_char_order = -1
|
||||
else:
|
||||
if (order != -1) and (self._mLastCharOrder != -1):
|
||||
self._mTotalRel += 1
|
||||
if self._mTotalRel > MAX_REL_THRESHOLD:
|
||||
self._mDone = True
|
||||
if (order != -1) and (self._last_char_order != -1):
|
||||
self._total_rel += 1
|
||||
if self._total_rel > self.MAX_REL_THRESHOLD:
|
||||
self._done = True
|
||||
break
|
||||
self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1
|
||||
self._mLastCharOrder = order
|
||||
self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1
|
||||
self._last_char_order = order
|
||||
|
||||
def got_enough_data(self):
|
||||
return self._mTotalRel > ENOUGH_REL_THRESHOLD
|
||||
return self._total_rel > self.ENOUGH_REL_THRESHOLD
|
||||
|
||||
def get_confidence(self):
|
||||
# This is just one way to calculate confidence. It works well for me.
|
||||
if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
|
||||
return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel
|
||||
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
|
||||
return (self._total_rel - self._rel_sample[0]) / self._total_rel
|
||||
else:
|
||||
return DONT_KNOW
|
||||
return self.DONT_KNOW
|
||||
|
||||
def get_order(self, aBuf):
|
||||
def get_order(self, byte_str):
|
||||
return -1, 1
|
||||
|
||||
class SJISContextAnalysis(JapaneseContextAnalysis):
|
||||
def __init__(self):
|
||||
self.charset_name = "SHIFT_JIS"
|
||||
super(SJISContextAnalysis, self).__init__()
|
||||
self._charset_name = "SHIFT_JIS"
|
||||
|
||||
def get_charset_name(self):
|
||||
return self.charset_name
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self._charset_name
|
||||
|
||||
def get_order(self, aBuf):
|
||||
if not aBuf:
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = wrap_ord(aBuf[0])
|
||||
if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
|
||||
charLen = 2
|
||||
first_char = byte_str[0]
|
||||
if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC):
|
||||
char_len = 2
|
||||
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
|
||||
self.charset_name = "CP932"
|
||||
self._charset_name = "CP932"
|
||||
else:
|
||||
charLen = 1
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(aBuf) > 1:
|
||||
second_char = wrap_ord(aBuf[1])
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 202) and (0x9F <= second_char <= 0xF1):
|
||||
return second_char - 0x9F, charLen
|
||||
return second_char - 0x9F, char_len
|
||||
|
||||
return -1, charLen
|
||||
return -1, char_len
|
||||
|
||||
class EUCJPContextAnalysis(JapaneseContextAnalysis):
|
||||
def get_order(self, aBuf):
|
||||
if not aBuf:
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = wrap_ord(aBuf[0])
|
||||
first_char = byte_str[0]
|
||||
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
|
||||
charLen = 2
|
||||
char_len = 2
|
||||
elif first_char == 0x8F:
|
||||
charLen = 3
|
||||
char_len = 3
|
||||
else:
|
||||
charLen = 1
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(aBuf) > 1:
|
||||
second_char = wrap_ord(aBuf[1])
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
|
||||
return second_char - 0xA1, charLen
|
||||
return second_char - 0xA1, char_len
|
||||
|
||||
return -1, char_len
|
||||
|
||||
return -1, charLen
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -210,20 +210,19 @@ BulgarianLangModel = (
|
||||
)
|
||||
|
||||
Latin5BulgarianModel = {
|
||||
'charToOrderMap': Latin5_BulgarianCharToOrderMap,
|
||||
'precedenceMatrix': BulgarianLangModel,
|
||||
'mTypicalPositiveRatio': 0.969392,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "ISO-8859-5"
|
||||
'char_to_order_map': Latin5_BulgarianCharToOrderMap,
|
||||
'precedence_matrix': BulgarianLangModel,
|
||||
'typical_positive_ratio': 0.969392,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-5",
|
||||
'language': 'Bulgairan',
|
||||
}
|
||||
|
||||
Win1251BulgarianModel = {
|
||||
'charToOrderMap': win1251BulgarianCharToOrderMap,
|
||||
'precedenceMatrix': BulgarianLangModel,
|
||||
'mTypicalPositiveRatio': 0.969392,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "windows-1251"
|
||||
'char_to_order_map': win1251BulgarianCharToOrderMap,
|
||||
'precedence_matrix': BulgarianLangModel,
|
||||
'typical_positive_ratio': 0.969392,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1251",
|
||||
'language': 'Bulgarian',
|
||||
}
|
||||
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
|
||||
# KOI8-R language model
|
||||
# Character Mapping Table:
|
||||
KOI8R_CharToOrderMap = (
|
||||
KOI8R_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
@@ -46,7 +46,7 @@ KOI8R_CharToOrderMap = (
|
||||
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
|
||||
)
|
||||
|
||||
win1251_CharToOrderMap = (
|
||||
win1251_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
@@ -65,7 +65,7 @@ win1251_CharToOrderMap = (
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||
)
|
||||
|
||||
latin5_CharToOrderMap = (
|
||||
latin5_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
@@ -84,7 +84,7 @@ latin5_CharToOrderMap = (
|
||||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
||||
)
|
||||
|
||||
macCyrillic_CharToOrderMap = (
|
||||
macCyrillic_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
@@ -103,7 +103,7 @@ macCyrillic_CharToOrderMap = (
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
|
||||
)
|
||||
|
||||
IBM855_CharToOrderMap = (
|
||||
IBM855_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
@@ -122,7 +122,7 @@ IBM855_CharToOrderMap = (
|
||||
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
|
||||
)
|
||||
|
||||
IBM866_CharToOrderMap = (
|
||||
IBM866_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
@@ -279,51 +279,55 @@ RussianLangModel = (
|
||||
)
|
||||
|
||||
Koi8rModel = {
|
||||
'charToOrderMap': KOI8R_CharToOrderMap,
|
||||
'precedenceMatrix': RussianLangModel,
|
||||
'mTypicalPositiveRatio': 0.976601,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "KOI8-R"
|
||||
'char_to_order_map': KOI8R_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "KOI8-R",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Win1251CyrillicModel = {
|
||||
'charToOrderMap': win1251_CharToOrderMap,
|
||||
'precedenceMatrix': RussianLangModel,
|
||||
'mTypicalPositiveRatio': 0.976601,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "windows-1251"
|
||||
'char_to_order_map': win1251_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1251",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Latin5CyrillicModel = {
|
||||
'charToOrderMap': latin5_CharToOrderMap,
|
||||
'precedenceMatrix': RussianLangModel,
|
||||
'mTypicalPositiveRatio': 0.976601,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "ISO-8859-5"
|
||||
'char_to_order_map': latin5_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-5",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
MacCyrillicModel = {
|
||||
'charToOrderMap': macCyrillic_CharToOrderMap,
|
||||
'precedenceMatrix': RussianLangModel,
|
||||
'mTypicalPositiveRatio': 0.976601,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "MacCyrillic"
|
||||
};
|
||||
'char_to_order_map': macCyrillic_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "MacCyrillic",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Ibm866Model = {
|
||||
'charToOrderMap': IBM866_CharToOrderMap,
|
||||
'precedenceMatrix': RussianLangModel,
|
||||
'mTypicalPositiveRatio': 0.976601,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "IBM866"
|
||||
'char_to_order_map': IBM866_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "IBM866",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Ibm855Model = {
|
||||
'charToOrderMap': IBM855_CharToOrderMap,
|
||||
'precedenceMatrix': RussianLangModel,
|
||||
'mTypicalPositiveRatio': 0.976601,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "IBM855"
|
||||
'char_to_order_map': IBM855_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "IBM855",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
Latin7_CharToOrderMap = (
|
||||
Latin7_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
@@ -50,7 +50,7 @@ Latin7_CharToOrderMap = (
|
||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
|
||||
)
|
||||
|
||||
win1253_CharToOrderMap = (
|
||||
win1253_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
@@ -207,19 +207,19 @@ GreekLangModel = (
|
||||
)
|
||||
|
||||
Latin7GreekModel = {
|
||||
'charToOrderMap': Latin7_CharToOrderMap,
|
||||
'precedenceMatrix': GreekLangModel,
|
||||
'mTypicalPositiveRatio': 0.982851,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "ISO-8859-7"
|
||||
'char_to_order_map': Latin7_char_to_order_map,
|
||||
'precedence_matrix': GreekLangModel,
|
||||
'typical_positive_ratio': 0.982851,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-7",
|
||||
'language': 'Greek',
|
||||
}
|
||||
|
||||
Win1253GreekModel = {
|
||||
'charToOrderMap': win1253_CharToOrderMap,
|
||||
'precedenceMatrix': GreekLangModel,
|
||||
'mTypicalPositiveRatio': 0.982851,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "windows-1253"
|
||||
'char_to_order_map': win1253_char_to_order_map,
|
||||
'precedence_matrix': GreekLangModel,
|
||||
'typical_positive_ratio': 0.982851,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1253",
|
||||
'language': 'Greek',
|
||||
}
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
# Windows-1255 language model
|
||||
# Character Mapping Table:
|
||||
win1255_CharToOrderMap = (
|
||||
WIN1255_CHAR_TO_ORDER_MAP = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
@@ -59,7 +59,7 @@ win1255_CharToOrderMap = (
|
||||
# first 1024 sequences: 1.5981%
|
||||
# rest sequences: 0.087%
|
||||
# negative sequences: 0.0015%
|
||||
HebrewLangModel = (
|
||||
HEBREW_LANG_MODEL = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
|
||||
3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
|
||||
@@ -191,11 +191,10 @@ HebrewLangModel = (
|
||||
)
|
||||
|
||||
Win1255HebrewModel = {
|
||||
'charToOrderMap': win1255_CharToOrderMap,
|
||||
'precedenceMatrix': HebrewLangModel,
|
||||
'mTypicalPositiveRatio': 0.984004,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "windows-1255"
|
||||
'char_to_order_map': WIN1255_CHAR_TO_ORDER_MAP,
|
||||
'precedence_matrix': HEBREW_LANG_MODEL,
|
||||
'typical_positive_ratio': 0.984004,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1255",
|
||||
'language': 'Hebrew',
|
||||
}
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -207,19 +207,19 @@ HungarianLangModel = (
|
||||
)
|
||||
|
||||
Latin2HungarianModel = {
|
||||
'charToOrderMap': Latin2_HungarianCharToOrderMap,
|
||||
'precedenceMatrix': HungarianLangModel,
|
||||
'mTypicalPositiveRatio': 0.947368,
|
||||
'keepEnglishLetter': True,
|
||||
'charsetName': "ISO-8859-2"
|
||||
'char_to_order_map': Latin2_HungarianCharToOrderMap,
|
||||
'precedence_matrix': HungarianLangModel,
|
||||
'typical_positive_ratio': 0.947368,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "ISO-8859-2",
|
||||
'language': 'Hungarian',
|
||||
}
|
||||
|
||||
Win1250HungarianModel = {
|
||||
'charToOrderMap': win1250HungarianCharToOrderMap,
|
||||
'precedenceMatrix': HungarianLangModel,
|
||||
'mTypicalPositiveRatio': 0.947368,
|
||||
'keepEnglishLetter': True,
|
||||
'charsetName': "windows-1250"
|
||||
'char_to_order_map': win1250HungarianCharToOrderMap,
|
||||
'precedence_matrix': HungarianLangModel,
|
||||
'typical_positive_ratio': 0.947368,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "windows-1250",
|
||||
'language': 'Hungarian',
|
||||
}
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -190,11 +190,10 @@ ThaiLangModel = (
|
||||
)
|
||||
|
||||
TIS620ThaiModel = {
|
||||
'charToOrderMap': TIS620CharToOrderMap,
|
||||
'precedenceMatrix': ThaiLangModel,
|
||||
'mTypicalPositiveRatio': 0.926386,
|
||||
'keepEnglishLetter': False,
|
||||
'charsetName': "TIS-620"
|
||||
'char_to_order_map': TIS620CharToOrderMap,
|
||||
'precedence_matrix': ThaiLangModel,
|
||||
'typical_positive_ratio': 0.926386,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "TIS-620",
|
||||
'language': 'Thai',
|
||||
}
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Özgür Baskın - Turkish Language Model
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
Latin5_TurkishCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255, 23, 37, 47, 39, 29, 52, 36, 45, 53, 60, 16, 49, 20, 46, 42,
|
||||
48, 69, 44, 35, 31, 51, 38, 62, 65, 43, 56,255,255,255,255,255,
|
||||
255, 1, 21, 28, 12, 2, 18, 27, 25, 3, 24, 10, 5, 13, 4, 15,
|
||||
26, 64, 7, 8, 9, 14, 32, 57, 58, 11, 22,255,255,255,255,255,
|
||||
180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165,
|
||||
164,163,162,161,160,159,101,158,157,156,155,154,153,152,151,106,
|
||||
150,149,148,147,146,145,144,100,143,142,141,140,139,138,137,136,
|
||||
94, 80, 93,135,105,134,133, 63,132,131,130,129,128,127,126,125,
|
||||
124,104, 73, 99, 79, 85,123, 54,122, 98, 92,121,120, 91,103,119,
|
||||
68,118,117, 97,116,115, 50, 90,114,113,112,111, 55, 41, 40, 86,
|
||||
89, 70, 59, 78, 71, 82, 88, 33, 77, 66, 84, 83,110, 75, 61, 96,
|
||||
30, 67,109, 74, 87,102, 34, 95, 81,108, 76, 72, 17, 6, 19,107,
|
||||
)
|
||||
|
||||
TurkishLangModel = (
|
||||
3,2,3,3,3,1,3,3,3,3,3,3,3,3,2,1,1,3,3,1,3,3,0,3,3,3,3,3,0,3,1,3,
|
||||
3,2,1,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
||||
3,2,2,3,3,0,3,3,3,3,3,3,3,2,3,1,0,3,3,1,3,3,0,3,3,3,3,3,0,3,0,3,
|
||||
3,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,0,1,0,1,
|
||||
3,3,2,3,3,0,3,3,3,3,3,3,3,2,3,1,1,3,3,0,3,3,1,2,3,3,3,3,0,3,0,3,
|
||||
3,1,1,0,0,0,1,0,0,0,0,1,1,0,1,2,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,2,0,3,2,1,2,2,1,3,3,0,0,0,2,
|
||||
2,2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,
|
||||
3,3,3,2,3,3,1,2,3,3,3,3,3,3,3,1,3,2,1,0,3,2,0,1,2,3,3,2,1,0,0,2,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,
|
||||
1,0,1,3,3,1,3,3,3,3,3,3,3,1,2,0,0,2,3,0,2,3,0,0,2,2,2,3,0,3,0,1,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,0,2,3,2,3,3,1,0,0,2,
|
||||
3,2,0,0,1,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1,
|
||||
3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,0,3,3,0,0,2,1,0,0,2,3,2,2,0,0,0,2,
|
||||
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,2,0,0,1,
|
||||
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,0,1,3,2,1,1,3,2,3,2,1,0,0,2,
|
||||
2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,
|
||||
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,2,0,2,3,0,0,2,2,2,2,0,0,0,2,
|
||||
3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
||||
3,3,3,3,3,3,3,2,2,2,2,3,2,3,3,0,3,3,1,1,2,2,0,0,2,2,3,2,0,0,1,3,
|
||||
0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,
|
||||
3,3,3,2,3,3,3,2,1,2,2,3,2,3,3,0,3,2,0,0,1,1,0,1,1,2,1,2,0,0,0,1,
|
||||
0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,
|
||||
3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,1,3,1,1,0,3,2,1,1,3,3,2,3,1,0,0,1,
|
||||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,1,
|
||||
3,2,2,3,3,0,3,3,3,3,3,3,3,2,2,1,0,3,3,1,3,3,0,1,3,3,2,3,0,3,0,3,
|
||||
2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
2,2,2,3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,2,0,3,3,0,3,2,3,3,3,0,3,1,3,
|
||||
2,0,0,0,0,0,0,0,0,0,0,1,0,1,2,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
||||
3,3,3,1,2,3,3,1,0,0,1,0,0,3,3,2,3,0,0,2,0,0,2,0,2,0,0,0,2,0,2,0,
|
||||
0,3,1,0,1,0,0,0,2,2,1,0,1,1,2,1,2,2,2,0,2,1,1,0,0,0,2,0,0,0,0,0,
|
||||
1,2,1,3,3,0,3,3,3,3,3,2,3,0,0,0,0,2,3,0,2,3,1,0,2,3,1,3,0,3,0,2,
|
||||
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,3,3,2,2,3,2,2,0,1,2,3,0,1,2,1,0,1,0,0,0,1,0,2,2,0,0,0,1,
|
||||
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,
|
||||
3,3,3,1,3,3,1,1,3,3,1,1,3,3,1,0,2,1,2,0,2,1,0,0,1,1,2,1,0,0,0,2,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,0,2,1,3,0,0,2,0,0,3,3,0,3,0,0,1,0,1,2,0,0,1,1,2,2,0,1,0,
|
||||
0,1,2,1,1,0,1,0,1,1,1,1,1,0,1,1,1,2,2,1,2,0,1,0,0,0,0,0,0,1,0,0,
|
||||
3,3,3,2,3,2,3,3,0,2,2,2,3,3,3,0,3,0,0,0,2,2,0,1,2,1,1,1,0,0,0,1,
|
||||
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
3,3,3,3,3,3,2,1,2,2,3,3,3,3,2,0,2,0,0,0,2,2,0,0,2,1,3,3,0,0,1,1,
|
||||
1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,
|
||||
1,1,2,3,3,0,3,3,3,3,3,3,2,2,0,2,0,2,3,2,3,2,2,2,2,2,2,2,1,3,2,3,
|
||||
2,0,2,1,2,2,2,2,1,1,2,2,1,2,2,1,2,0,0,2,1,1,0,2,1,0,0,1,0,0,0,1,
|
||||
2,3,3,1,1,1,0,1,1,1,2,3,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,
|
||||
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,2,3,2,3,2,2,1,3,3,3,0,2,1,2,0,2,1,0,0,1,1,1,1,1,0,0,1,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
||||
3,3,3,2,3,3,3,3,3,2,3,1,2,3,3,1,2,0,0,0,0,0,0,0,3,2,1,1,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
3,3,3,2,2,3,3,2,1,1,1,1,1,3,3,0,3,1,0,0,1,1,0,0,3,1,2,1,0,0,0,0,
|
||||
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||
3,3,3,2,2,3,2,2,2,3,2,1,1,3,3,0,3,0,0,0,0,1,0,0,3,1,1,2,0,0,0,1,
|
||||
1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,1,1,3,3,0,3,3,3,3,3,2,2,2,1,2,0,2,1,2,2,1,1,0,1,2,2,2,2,2,2,2,
|
||||
0,0,2,1,2,1,2,1,0,1,1,3,1,2,1,1,2,0,0,2,0,1,0,1,0,1,0,0,0,1,0,1,
|
||||
3,3,3,1,3,3,3,0,1,1,0,2,2,3,1,0,3,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,0,0,2,2,1,0,0,1,0,0,3,3,1,3,0,0,1,1,0,2,0,3,0,0,0,2,0,1,1,
|
||||
0,1,2,0,1,2,2,0,2,2,2,2,1,0,2,1,1,0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,3,2,3,2,0,2,2,2,1,3,2,0,2,1,2,0,1,2,0,0,1,0,2,2,0,0,0,2,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,
|
||||
3,3,3,0,3,3,1,1,2,3,1,0,3,2,3,0,3,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,
|
||||
1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,3,0,3,3,2,3,3,2,2,0,0,0,0,1,2,0,1,3,0,0,0,3,1,1,0,3,0,2,
|
||||
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,2,2,1,0,3,1,1,1,1,3,3,2,3,0,0,1,0,1,2,0,2,2,0,2,2,0,2,1,
|
||||
0,2,2,1,1,1,1,0,2,1,1,0,1,1,1,1,2,1,2,1,2,0,1,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,0,1,1,3,0,0,1,1,0,0,2,2,0,3,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0,
|
||||
0,3,1,0,1,0,1,0,2,0,0,1,0,1,0,1,1,1,2,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,0,2,0,1,1,1,0,0,3,3,0,2,0,0,1,0,0,2,1,1,0,1,0,1,0,1,0,
|
||||
0,2,0,1,2,0,2,0,2,1,1,0,1,0,2,1,1,0,2,1,1,0,1,0,0,0,1,1,0,0,0,0,
|
||||
3,2,3,0,1,0,0,0,0,0,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,0,2,0,0,0,
|
||||
0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,0,2,3,0,0,1,0,1,0,2,3,2,3,0,0,1,3,0,2,1,0,0,0,0,2,0,1,0,
|
||||
0,2,1,0,0,1,1,0,2,1,0,0,1,0,0,1,1,0,1,1,2,0,1,0,0,0,0,1,0,0,0,0,
|
||||
3,2,2,0,0,1,1,0,0,0,0,0,0,3,1,1,1,0,0,0,0,0,1,0,0,0,0,0,2,0,1,0,
|
||||
0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,3,0,2,3,2,2,1,2,2,1,1,2,0,1,3,2,2,2,0,0,2,2,0,0,0,1,2,1,
|
||||
3,0,2,1,1,0,1,1,1,0,1,2,2,2,1,1,2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,
|
||||
0,1,1,2,3,0,3,3,3,2,2,2,2,1,0,1,0,1,0,1,2,2,0,0,2,2,1,3,1,1,2,1,
|
||||
0,0,1,1,2,0,1,1,0,0,1,2,0,2,1,1,2,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,
|
||||
3,3,2,0,0,3,1,0,0,0,0,0,0,3,2,1,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
||||
0,2,1,1,0,0,1,0,1,2,0,0,1,1,0,0,2,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,0,0,1,0,0,0,0,1,0,0,3,3,2,2,0,0,1,0,0,2,0,1,0,0,0,2,0,1,0,
|
||||
0,0,1,1,0,0,2,0,2,1,0,0,1,1,2,1,2,0,2,1,2,1,1,1,0,0,1,1,0,0,0,0,
|
||||
3,3,2,0,0,2,2,0,0,0,1,1,0,2,2,1,3,1,0,1,0,1,2,0,0,0,0,0,1,0,1,0,
|
||||
0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,0,0,0,1,0,0,1,0,0,2,3,1,2,0,0,1,0,0,2,0,0,0,1,0,2,0,2,0,
|
||||
0,1,1,2,2,1,2,0,2,1,1,0,0,1,1,0,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,1,2,1,0,0,1,1,0,3,3,1,2,0,0,1,0,0,2,0,2,0,1,1,2,0,0,0,
|
||||
0,0,1,1,1,1,2,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
|
||||
3,3,3,0,2,2,3,2,0,0,1,0,0,2,3,1,0,0,0,0,0,0,2,0,2,0,0,0,2,0,0,0,
|
||||
0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,0,0,0,0,0,0,0,1,0,0,2,2,2,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
||||
0,0,2,1,1,0,1,0,2,1,1,0,0,1,1,2,1,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0,
|
||||
0,0,0,2,2,0,2,1,1,1,1,2,2,0,0,1,0,1,0,0,1,3,0,0,0,0,1,0,0,2,1,0,
|
||||
0,0,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
2,0,0,2,3,0,2,3,1,2,2,0,2,0,0,2,0,2,1,1,1,2,1,0,0,1,2,1,1,2,1,0,
|
||||
1,0,2,0,1,0,1,1,0,0,2,2,1,2,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,1,2,0,0,0,1,0,0,3,2,0,1,0,0,1,0,0,2,0,0,0,1,2,1,0,1,0,
|
||||
0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,2,2,0,2,2,1,1,0,1,1,1,1,1,0,0,1,2,1,1,1,0,1,0,0,0,1,1,1,1,
|
||||
0,0,2,1,0,1,1,1,0,1,1,2,1,2,1,1,2,0,1,1,2,1,0,2,0,0,0,0,0,0,0,0,
|
||||
3,2,2,0,0,2,0,0,0,0,0,0,0,2,2,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0,
|
||||
0,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,2,0,2,2,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,
|
||||
2,0,1,0,1,0,1,1,0,0,1,2,0,1,0,1,1,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,1,1,0,0,0,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,1,2,0,1,0,
|
||||
0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,1,0,1,1,1,0,0,0,0,1,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
1,1,2,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,
|
||||
0,0,1,2,2,0,2,1,2,1,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
2,2,2,0,0,0,1,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
Latin5TurkishModel = {
|
||||
'char_to_order_map': Latin5_TurkishCharToOrderMap,
|
||||
'precedence_matrix': TurkishLangModel,
|
||||
'typical_positive_ratio': 0.970290,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "ISO-8859-9",
|
||||
'language': 'Turkish',
|
||||
}
|
||||
@@ -27,8 +27,7 @@
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .constants import eNotMe
|
||||
from .compat import wrap_ord
|
||||
from .enums import ProbingState
|
||||
|
||||
FREQ_CAT_NUM = 4
|
||||
|
||||
@@ -82,7 +81,7 @@ Latin1_CharToClass = (
|
||||
# 2 : normal
|
||||
# 3 : very likely
|
||||
Latin1ClassModel = (
|
||||
# UDF OTH ASC ASS ACV ACO ASV ASO
|
||||
# UDF OTH ASC ASS ACV ACO ASV ASO
|
||||
0, 0, 0, 0, 0, 0, 0, 0, # UDF
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # OTH
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # ASC
|
||||
@@ -96,40 +95,47 @@ Latin1ClassModel = (
|
||||
|
||||
class Latin1Prober(CharSetProber):
|
||||
def __init__(self):
|
||||
CharSetProber.__init__(self)
|
||||
super(Latin1Prober, self).__init__()
|
||||
self._last_char_class = None
|
||||
self._freq_counter = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._mLastCharClass = OTH
|
||||
self._mFreqCounter = [0] * FREQ_CAT_NUM
|
||||
self._last_char_class = OTH
|
||||
self._freq_counter = [0] * FREQ_CAT_NUM
|
||||
CharSetProber.reset(self)
|
||||
|
||||
def get_charset_name(self):
|
||||
return "windows-1252"
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "ISO-8859-1"
|
||||
|
||||
def feed(self, aBuf):
|
||||
aBuf = self.filter_with_english_letters(aBuf)
|
||||
for c in aBuf:
|
||||
charClass = Latin1_CharToClass[wrap_ord(c)]
|
||||
freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
|
||||
+ charClass]
|
||||
@property
|
||||
def language(self):
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str):
|
||||
byte_str = self.filter_with_english_letters(byte_str)
|
||||
for c in byte_str:
|
||||
char_class = Latin1_CharToClass[c]
|
||||
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
|
||||
+ char_class]
|
||||
if freq == 0:
|
||||
self._mState = eNotMe
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
self._mFreqCounter[freq] += 1
|
||||
self._mLastCharClass = charClass
|
||||
self._freq_counter[freq] += 1
|
||||
self._last_char_class = char_class
|
||||
|
||||
return self.get_state()
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
if self.get_state() == eNotMe:
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
|
||||
total = sum(self._mFreqCounter)
|
||||
total = sum(self._freq_counter)
|
||||
if total < 0.01:
|
||||
confidence = 0.0
|
||||
else:
|
||||
confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
|
||||
confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
|
||||
/ total)
|
||||
if confidence < 0.0:
|
||||
confidence = 0.0
|
||||
|
||||
@@ -27,60 +27,65 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import sys
|
||||
from . import constants
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState, MachineState
|
||||
|
||||
|
||||
class MultiByteCharSetProber(CharSetProber):
|
||||
def __init__(self):
|
||||
CharSetProber.__init__(self)
|
||||
self._mDistributionAnalyzer = None
|
||||
self._mCodingSM = None
|
||||
self._mLastChar = [0, 0]
|
||||
"""
|
||||
MultiByteCharSetProber
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||
self.distribution_analyzer = None
|
||||
self.coding_sm = None
|
||||
self._last_char = [0, 0]
|
||||
|
||||
def reset(self):
|
||||
CharSetProber.reset(self)
|
||||
if self._mCodingSM:
|
||||
self._mCodingSM.reset()
|
||||
if self._mDistributionAnalyzer:
|
||||
self._mDistributionAnalyzer.reset()
|
||||
self._mLastChar = [0, 0]
|
||||
super(MultiByteCharSetProber, self).reset()
|
||||
if self.coding_sm:
|
||||
self.coding_sm.reset()
|
||||
if self.distribution_analyzer:
|
||||
self.distribution_analyzer.reset()
|
||||
self._last_char = [0, 0]
|
||||
|
||||
def get_charset_name(self):
|
||||
pass
|
||||
@property
|
||||
def charset_name(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def feed(self, aBuf):
|
||||
aLen = len(aBuf)
|
||||
for i in range(0, aLen):
|
||||
codingState = self._mCodingSM.next_state(aBuf[i])
|
||||
if codingState == constants.eError:
|
||||
if constants._debug:
|
||||
sys.stderr.write(self.get_charset_name()
|
||||
+ ' prober hit error at byte ' + str(i)
|
||||
+ '\n')
|
||||
self._mState = constants.eNotMe
|
||||
@property
|
||||
def language(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif codingState == constants.eItsMe:
|
||||
self._mState = constants.eFoundIt
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif codingState == constants.eStart:
|
||||
charLen = self._mCodingSM.get_current_charlen()
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._mLastChar[1] = aBuf[0]
|
||||
self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
|
||||
charLen)
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._mLastChar[0] = aBuf[aLen - 1]
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.get_state() == constants.eDetecting:
|
||||
if (self._mDistributionAnalyzer.got_enough_data() and
|
||||
(self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
|
||||
self._mState = constants.eFoundIt
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.distribution_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.get_state()
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
return self._mDistributionAnalyzer.get_confidence()
|
||||
return self.distribution_analyzer.get_confidence()
|
||||
|
||||
@@ -39,9 +39,9 @@ from .euctwprober import EUCTWProber
|
||||
|
||||
|
||||
class MBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self):
|
||||
CharSetGroupProber.__init__(self)
|
||||
self._mProbers = [
|
||||
def __init__(self, lang_filter=None):
|
||||
super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
|
||||
self.probers = [
|
||||
UTF8Prober(),
|
||||
SJISProber(),
|
||||
EUCJPProber(),
|
||||
|
||||
@@ -25,11 +25,11 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .constants import eStart, eError, eItsMe
|
||||
from .enums import MachineState
|
||||
|
||||
# BIG5
|
||||
|
||||
BIG5_cls = (
|
||||
BIG5_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
@@ -64,23 +64,23 @@ BIG5_cls = (
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
BIG5_st = (
|
||||
eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07
|
||||
eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f
|
||||
eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17
|
||||
BIG5_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
|
||||
)
|
||||
|
||||
Big5CharLenTable = (0, 1, 1, 2, 0)
|
||||
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
|
||||
|
||||
Big5SMModel = {'classTable': BIG5_cls,
|
||||
'classFactor': 5,
|
||||
'stateTable': BIG5_st,
|
||||
'charLenTable': Big5CharLenTable,
|
||||
'name': 'Big5'}
|
||||
BIG5_SM_MODEL = {'class_table': BIG5_CLS,
|
||||
'class_factor': 5,
|
||||
'state_table': BIG5_ST,
|
||||
'char_len_table': BIG5_CHAR_LEN_TABLE,
|
||||
'name': 'Big5'}
|
||||
|
||||
# CP949
|
||||
|
||||
CP949_cls = (
|
||||
CP949_CLS = (
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
|
||||
@@ -99,28 +99,28 @@ CP949_cls = (
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
|
||||
)
|
||||
|
||||
CP949_st = (
|
||||
CP949_ST = (
|
||||
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
|
||||
eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eStart
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError
|
||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe
|
||||
eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3
|
||||
eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4
|
||||
eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5
|
||||
eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
|
||||
)
|
||||
|
||||
CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
|
||||
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
|
||||
|
||||
CP949SMModel = {'classTable': CP949_cls,
|
||||
'classFactor': 10,
|
||||
'stateTable': CP949_st,
|
||||
'charLenTable': CP949CharLenTable,
|
||||
'name': 'CP949'}
|
||||
CP949_SM_MODEL = {'class_table': CP949_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': CP949_ST,
|
||||
'char_len_table': CP949_CHAR_LEN_TABLE,
|
||||
'name': 'CP949'}
|
||||
|
||||
# EUC-JP
|
||||
|
||||
EUCJP_cls = (
|
||||
EUCJP_CLS = (
|
||||
4,4,4,4,4,4,4,4, # 00 - 07
|
||||
4,4,4,4,4,4,5,5, # 08 - 0f
|
||||
4,4,4,4,4,4,4,4, # 10 - 17
|
||||
@@ -155,25 +155,25 @@ EUCJP_cls = (
|
||||
0,0,0,0,0,0,0,5 # f8 - ff
|
||||
)
|
||||
|
||||
EUCJP_st = (
|
||||
3, 4, 3, 5,eStart,eError,eError,eError,#00-07
|
||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
|
||||
eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17
|
||||
eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f
|
||||
3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27
|
||||
EUCJP_ST = (
|
||||
3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
|
||||
3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
|
||||
)
|
||||
|
||||
EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)
|
||||
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
|
||||
|
||||
EUCJPSMModel = {'classTable': EUCJP_cls,
|
||||
'classFactor': 6,
|
||||
'stateTable': EUCJP_st,
|
||||
'charLenTable': EUCJPCharLenTable,
|
||||
'name': 'EUC-JP'}
|
||||
EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': EUCJP_ST,
|
||||
'char_len_table': EUCJP_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-JP'}
|
||||
|
||||
# EUC-KR
|
||||
|
||||
EUCKR_cls = (
|
||||
EUCKR_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
@@ -208,22 +208,22 @@ EUCKR_cls = (
|
||||
2,2,2,2,2,2,2,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCKR_st = (
|
||||
eError,eStart, 3,eError,eError,eError,eError,eError,#00-07
|
||||
eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f
|
||||
EUCKR_ST = (
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
|
||||
)
|
||||
|
||||
EUCKRCharLenTable = (0, 1, 2, 0)
|
||||
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
|
||||
|
||||
EUCKRSMModel = {'classTable': EUCKR_cls,
|
||||
'classFactor': 4,
|
||||
'stateTable': EUCKR_st,
|
||||
'charLenTable': EUCKRCharLenTable,
|
||||
EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
|
||||
'class_factor': 4,
|
||||
'state_table': EUCKR_ST,
|
||||
'char_len_table': EUCKR_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-KR'}
|
||||
|
||||
# EUC-TW
|
||||
|
||||
EUCTW_cls = (
|
||||
EUCTW_CLS = (
|
||||
2,2,2,2,2,2,2,2, # 00 - 07
|
||||
2,2,2,2,2,2,0,0, # 08 - 0f
|
||||
2,2,2,2,2,2,2,2, # 10 - 17
|
||||
@@ -258,26 +258,26 @@ EUCTW_cls = (
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCTW_st = (
|
||||
eError,eError,eStart, 3, 3, 3, 4,eError,#00-07
|
||||
eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
|
||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17
|
||||
eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f
|
||||
5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27
|
||||
eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
|
||||
EUCTW_ST = (
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
|
||||
MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
|
||||
MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)
|
||||
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
|
||||
|
||||
EUCTWSMModel = {'classTable': EUCTW_cls,
|
||||
'classFactor': 7,
|
||||
'stateTable': EUCTW_st,
|
||||
'charLenTable': EUCTWCharLenTable,
|
||||
EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': EUCTW_ST,
|
||||
'char_len_table': EUCTW_CHAR_LEN_TABLE,
|
||||
'name': 'x-euc-tw'}
|
||||
|
||||
# GB2312
|
||||
|
||||
GB2312_cls = (
|
||||
GB2312_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
@@ -312,31 +312,31 @@ GB2312_cls = (
|
||||
6,6,6,6,6,6,6,0 # f8 - ff
|
||||
)
|
||||
|
||||
GB2312_st = (
|
||||
eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07
|
||||
eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
|
||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17
|
||||
4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f
|
||||
eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27
|
||||
eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
|
||||
GB2312_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
|
||||
4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
# To be accurate, the length of class 6 can be either 2 or 4.
|
||||
# But it is not necessary to discriminate between the two since
|
||||
# it is used for frequency analysis only, and we are validing
|
||||
# it is used for frequency analysis only, and we are validating
|
||||
# each code range there as well. So it is safe to set it to be
|
||||
# 2 here.
|
||||
GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)
|
||||
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
|
||||
|
||||
GB2312SMModel = {'classTable': GB2312_cls,
|
||||
'classFactor': 7,
|
||||
'stateTable': GB2312_st,
|
||||
'charLenTable': GB2312CharLenTable,
|
||||
'name': 'GB2312'}
|
||||
GB2312_SM_MODEL = {'class_table': GB2312_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': GB2312_ST,
|
||||
'char_len_table': GB2312_CHAR_LEN_TABLE,
|
||||
'name': 'GB2312'}
|
||||
|
||||
# Shift_JIS
|
||||
|
||||
SJIS_cls = (
|
||||
SJIS_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
@@ -373,23 +373,23 @@ SJIS_cls = (
|
||||
3,3,3,3,3,0,0,0) # f8 - ff
|
||||
|
||||
|
||||
SJIS_st = (
|
||||
eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07
|
||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
|
||||
eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17
|
||||
SJIS_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
|
||||
)
|
||||
|
||||
SJISCharLenTable = (0, 1, 1, 2, 0, 0)
|
||||
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
|
||||
|
||||
SJISSMModel = {'classTable': SJIS_cls,
|
||||
'classFactor': 6,
|
||||
'stateTable': SJIS_st,
|
||||
'charLenTable': SJISCharLenTable,
|
||||
SJIS_SM_MODEL = {'class_table': SJIS_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': SJIS_ST,
|
||||
'char_len_table': SJIS_CHAR_LEN_TABLE,
|
||||
'name': 'Shift_JIS'}
|
||||
|
||||
# UCS2-BE
|
||||
|
||||
UCS2BE_cls = (
|
||||
UCS2BE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
@@ -424,27 +424,27 @@ UCS2BE_cls = (
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2BE_st = (
|
||||
5, 7, 7,eError, 4, 3,eError,eError,#00-07
|
||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
|
||||
eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17
|
||||
6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f
|
||||
6, 6, 6, 6, 5, 7, 7,eError,#20-27
|
||||
5, 8, 6, 6,eError, 6, 6, 6,#28-2f
|
||||
6, 6, 6, 6,eError,eError,eStart,eStart #30-37
|
||||
UCS2BE_ST = (
|
||||
5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
|
||||
6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
|
||||
5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
|
||||
6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)
|
||||
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
|
||||
|
||||
UCS2BESMModel = {'classTable': UCS2BE_cls,
|
||||
'classFactor': 6,
|
||||
'stateTable': UCS2BE_st,
|
||||
'charLenTable': UCS2BECharLenTable,
|
||||
'name': 'UTF-16BE'}
|
||||
UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2BE_ST,
|
||||
'char_len_table': UCS2BE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16BE'}
|
||||
|
||||
# UCS2-LE
|
||||
|
||||
UCS2LE_cls = (
|
||||
UCS2LE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
@@ -479,27 +479,27 @@ UCS2LE_cls = (
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2LE_st = (
|
||||
6, 6, 7, 6, 4, 3,eError,eError,#00-07
|
||||
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
|
||||
eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17
|
||||
5, 5, 5,eError, 5,eError, 6, 6,#18-1f
|
||||
7, 6, 8, 8, 5, 5, 5,eError,#20-27
|
||||
5, 5, 5,eError,eError,eError, 5, 5,#28-2f
|
||||
5, 5, 5,eError, 5,eError,eStart,eStart #30-37
|
||||
UCS2LE_ST = (
|
||||
6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
|
||||
7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
|
||||
5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)
|
||||
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
|
||||
|
||||
UCS2LESMModel = {'classTable': UCS2LE_cls,
|
||||
'classFactor': 6,
|
||||
'stateTable': UCS2LE_st,
|
||||
'charLenTable': UCS2LECharLenTable,
|
||||
UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2LE_ST,
|
||||
'char_len_table': UCS2LE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16LE'}
|
||||
|
||||
# UTF-8
|
||||
|
||||
UTF8_cls = (
|
||||
UTF8_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
@@ -534,39 +534,39 @@ UTF8_cls = (
|
||||
12,13,13,13,14,15,0,0 # f8 - ff
|
||||
)
|
||||
|
||||
UTF8_st = (
|
||||
eError,eStart,eError,eError,eError,eError, 12, 10,#00-07
|
||||
UTF8_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07
|
||||
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,#10-17
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,#18-1f
|
||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27
|
||||
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f
|
||||
eError,eError, 5, 5, 5, 5,eError,eError,#30-37
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,#38-3f
|
||||
eError,eError,eError, 5, 5, 5,eError,eError,#40-47
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,#48-4f
|
||||
eError,eError, 7, 7, 7, 7,eError,eError,#50-57
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,#58-5f
|
||||
eError,eError,eError,eError, 7, 7,eError,eError,#60-67
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,#68-6f
|
||||
eError,eError, 9, 9, 9, 9,eError,eError,#70-77
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,#78-7f
|
||||
eError,eError,eError,eError,eError, 9,eError,eError,#80-87
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,#88-8f
|
||||
eError,eError, 12, 12, 12, 12,eError,eError,#90-97
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,#98-9f
|
||||
eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,#a8-af
|
||||
eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7
|
||||
eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf
|
||||
eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7
|
||||
eError,eError,eError,eError,eError,eError,eError,eError #c8-cf
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
|
||||
MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
|
||||
MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
|
||||
MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
|
||||
)
|
||||
|
||||
UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
|
||||
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
|
||||
|
||||
UTF8SMModel = {'classTable': UTF8_cls,
|
||||
'classFactor': 16,
|
||||
'stateTable': UTF8_st,
|
||||
'charLenTable': UTF8CharLenTable,
|
||||
'name': 'UTF-8'}
|
||||
UTF8_SM_MODEL = {'class_table': UTF8_CLS,
|
||||
'class_factor': 16,
|
||||
'state_table': UTF8_ST,
|
||||
'char_len_table': UTF8_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-8'}
|
||||
|
||||
@@ -26,95 +26,107 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import sys
|
||||
from . import constants
|
||||
from .charsetprober import CharSetProber
|
||||
from .compat import wrap_ord
|
||||
|
||||
SAMPLE_SIZE = 64
|
||||
SB_ENOUGH_REL_THRESHOLD = 1024
|
||||
POSITIVE_SHORTCUT_THRESHOLD = 0.95
|
||||
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
|
||||
SYMBOL_CAT_ORDER = 250
|
||||
NUMBER_OF_SEQ_CAT = 4
|
||||
POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
|
||||
#NEGATIVE_CAT = 0
|
||||
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
|
||||
|
||||
|
||||
class SingleByteCharSetProber(CharSetProber):
|
||||
def __init__(self, model, reversed=False, nameProber=None):
|
||||
CharSetProber.__init__(self)
|
||||
self._mModel = model
|
||||
SAMPLE_SIZE = 64
|
||||
SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2
|
||||
POSITIVE_SHORTCUT_THRESHOLD = 0.95
|
||||
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
|
||||
|
||||
def __init__(self, model, reversed=False, name_prober=None):
|
||||
super(SingleByteCharSetProber, self).__init__()
|
||||
self._model = model
|
||||
# TRUE if we need to reverse every pair in the model lookup
|
||||
self._mReversed = reversed
|
||||
self._reversed = reversed
|
||||
# Optional auxiliary prober for name decision
|
||||
self._mNameProber = nameProber
|
||||
self._name_prober = name_prober
|
||||
self._last_order = None
|
||||
self._seq_counters = None
|
||||
self._total_seqs = None
|
||||
self._total_char = None
|
||||
self._freq_char = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
CharSetProber.reset(self)
|
||||
super(SingleByteCharSetProber, self).reset()
|
||||
# char order of last character
|
||||
self._mLastOrder = 255
|
||||
self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
|
||||
self._mTotalSeqs = 0
|
||||
self._mTotalChar = 0
|
||||
self._last_order = 255
|
||||
self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
|
||||
self._total_seqs = 0
|
||||
self._total_char = 0
|
||||
# characters that fall in our sampling range
|
||||
self._mFreqChar = 0
|
||||
self._freq_char = 0
|
||||
|
||||
def get_charset_name(self):
|
||||
if self._mNameProber:
|
||||
return self._mNameProber.get_charset_name()
|
||||
@property
|
||||
def charset_name(self):
|
||||
if self._name_prober:
|
||||
return self._name_prober.charset_name
|
||||
else:
|
||||
return self._mModel['charsetName']
|
||||
return self._model['charset_name']
|
||||
|
||||
def feed(self, aBuf):
|
||||
if not self._mModel['keepEnglishLetter']:
|
||||
aBuf = self.filter_without_english_letters(aBuf)
|
||||
aLen = len(aBuf)
|
||||
if not aLen:
|
||||
return self.get_state()
|
||||
for c in aBuf:
|
||||
order = self._mModel['charToOrderMap'][wrap_ord(c)]
|
||||
if order < SYMBOL_CAT_ORDER:
|
||||
self._mTotalChar += 1
|
||||
if order < SAMPLE_SIZE:
|
||||
self._mFreqChar += 1
|
||||
if self._mLastOrder < SAMPLE_SIZE:
|
||||
self._mTotalSeqs += 1
|
||||
if not self._mReversed:
|
||||
i = (self._mLastOrder * SAMPLE_SIZE) + order
|
||||
model = self._mModel['precedenceMatrix'][i]
|
||||
@property
|
||||
def language(self):
|
||||
if self._name_prober:
|
||||
return self._name_prober.language
|
||||
else:
|
||||
return self._model.get('language')
|
||||
|
||||
def feed(self, byte_str):
|
||||
if not self._model['keep_english_letter']:
|
||||
byte_str = self.filter_international_words(byte_str)
|
||||
if not byte_str:
|
||||
return self.state
|
||||
char_to_order_map = self._model['char_to_order_map']
|
||||
for i, c in enumerate(byte_str):
|
||||
# XXX: Order is in range 1-64, so one would think we want 0-63 here,
|
||||
# but that leads to 27 more test failures than before.
|
||||
order = char_to_order_map[c]
|
||||
# XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
|
||||
# CharacterCategory.SYMBOL is actually 253, so we use CONTROL
|
||||
# to make it closer to the original intent. The only difference
|
||||
# is whether or not we count digits and control characters for
|
||||
# _total_char purposes.
|
||||
if order < CharacterCategory.CONTROL:
|
||||
self._total_char += 1
|
||||
if order < self.SAMPLE_SIZE:
|
||||
self._freq_char += 1
|
||||
if self._last_order < self.SAMPLE_SIZE:
|
||||
self._total_seqs += 1
|
||||
if not self._reversed:
|
||||
i = (self._last_order * self.SAMPLE_SIZE) + order
|
||||
model = self._model['precedence_matrix'][i]
|
||||
else: # reverse the order of the letters in the lookup
|
||||
i = (order * SAMPLE_SIZE) + self._mLastOrder
|
||||
model = self._mModel['precedenceMatrix'][i]
|
||||
self._mSeqCounters[model] += 1
|
||||
self._mLastOrder = order
|
||||
i = (order * self.SAMPLE_SIZE) + self._last_order
|
||||
model = self._model['precedence_matrix'][i]
|
||||
self._seq_counters[model] += 1
|
||||
self._last_order = order
|
||||
|
||||
if self.get_state() == constants.eDetecting:
|
||||
if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:
|
||||
cf = self.get_confidence()
|
||||
if cf > POSITIVE_SHORTCUT_THRESHOLD:
|
||||
if constants._debug:
|
||||
sys.stderr.write('%s confidence = %s, we have a'
|
||||
'winner\n' %
|
||||
(self._mModel['charsetName'], cf))
|
||||
self._mState = constants.eFoundIt
|
||||
elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
|
||||
if constants._debug:
|
||||
sys.stderr.write('%s confidence = %s, below negative'
|
||||
'shortcut threshhold %s\n' %
|
||||
(self._mModel['charsetName'], cf,
|
||||
NEGATIVE_SHORTCUT_THRESHOLD))
|
||||
self._mState = constants.eNotMe
|
||||
charset_name = self._model['charset_name']
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
|
||||
confidence = self.get_confidence()
|
||||
if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
|
||||
self.logger.debug('%s confidence = %s, we have a winner',
|
||||
charset_name, confidence)
|
||||
self._state = ProbingState.FOUND_IT
|
||||
elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
|
||||
self.logger.debug('%s confidence = %s, below negative '
|
||||
'shortcut threshhold %s', charset_name,
|
||||
confidence,
|
||||
self.NEGATIVE_SHORTCUT_THRESHOLD)
|
||||
self._state = ProbingState.NOT_ME
|
||||
|
||||
return self.get_state()
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
r = 0.01
|
||||
if self._mTotalSeqs > 0:
|
||||
r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
|
||||
/ self._mModel['mTypicalPositiveRatio'])
|
||||
r = r * self._mFreqChar / self._mTotalChar
|
||||
if self._total_seqs > 0:
|
||||
r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
|
||||
self._total_seqs / self._model['typical_positive_ratio'])
|
||||
r = r * self._freq_char / self._total_char
|
||||
if r >= 1.0:
|
||||
r = 0.99
|
||||
return r
|
||||
|
||||
@@ -33,16 +33,17 @@ from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
|
||||
Ibm866Model, Ibm855Model)
|
||||
from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
|
||||
from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
|
||||
from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
|
||||
# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
|
||||
from .langthaimodel import TIS620ThaiModel
|
||||
from .langhebrewmodel import Win1255HebrewModel
|
||||
from .hebrewprober import HebrewProber
|
||||
from .langturkishmodel import Latin5TurkishModel
|
||||
|
||||
|
||||
class SBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self):
|
||||
CharSetGroupProber.__init__(self)
|
||||
self._mProbers = [
|
||||
super(SBCSGroupProber, self).__init__()
|
||||
self.probers = [
|
||||
SingleByteCharSetProber(Win1251CyrillicModel),
|
||||
SingleByteCharSetProber(Koi8rModel),
|
||||
SingleByteCharSetProber(Latin5CyrillicModel),
|
||||
@@ -53,17 +54,20 @@ class SBCSGroupProber(CharSetGroupProber):
|
||||
SingleByteCharSetProber(Win1253GreekModel),
|
||||
SingleByteCharSetProber(Latin5BulgarianModel),
|
||||
SingleByteCharSetProber(Win1251BulgarianModel),
|
||||
SingleByteCharSetProber(Latin2HungarianModel),
|
||||
SingleByteCharSetProber(Win1250HungarianModel),
|
||||
# TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
|
||||
# after we retrain model.
|
||||
# SingleByteCharSetProber(Latin2HungarianModel),
|
||||
# SingleByteCharSetProber(Win1250HungarianModel),
|
||||
SingleByteCharSetProber(TIS620ThaiModel),
|
||||
SingleByteCharSetProber(Latin5TurkishModel),
|
||||
]
|
||||
hebrewProber = HebrewProber()
|
||||
logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
|
||||
False, hebrewProber)
|
||||
visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
|
||||
hebrewProber)
|
||||
hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
|
||||
self._mProbers.extend([hebrewProber, logicalHebrewProber,
|
||||
visualHebrewProber])
|
||||
hebrew_prober = HebrewProber()
|
||||
logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
|
||||
False, hebrew_prober)
|
||||
visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
|
||||
hebrew_prober)
|
||||
hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
|
||||
self.probers.extend([hebrew_prober, logical_hebrew_prober,
|
||||
visual_hebrew_prober])
|
||||
|
||||
self.reset()
|
||||
|
||||
@@ -25,67 +25,68 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import sys
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import SJISDistributionAnalysis
|
||||
from .jpcntx import SJISContextAnalysis
|
||||
from .mbcssm import SJISSMModel
|
||||
from . import constants
|
||||
from .mbcssm import SJIS_SM_MODEL
|
||||
from .enums import ProbingState, MachineState
|
||||
|
||||
|
||||
class SJISProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
MultiByteCharSetProber.__init__(self)
|
||||
self._mCodingSM = CodingStateMachine(SJISSMModel)
|
||||
self._mDistributionAnalyzer = SJISDistributionAnalysis()
|
||||
self._mContextAnalyzer = SJISContextAnalysis()
|
||||
super(SJISProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
|
||||
self.distribution_analyzer = SJISDistributionAnalysis()
|
||||
self.context_analyzer = SJISContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
MultiByteCharSetProber.reset(self)
|
||||
self._mContextAnalyzer.reset()
|
||||
super(SJISProber, self).reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
def get_charset_name(self):
|
||||
return self._mContextAnalyzer.get_charset_name()
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self.context_analyzer.charset_name
|
||||
|
||||
def feed(self, aBuf):
|
||||
aLen = len(aBuf)
|
||||
for i in range(0, aLen):
|
||||
codingState = self._mCodingSM.next_state(aBuf[i])
|
||||
if codingState == constants.eError:
|
||||
if constants._debug:
|
||||
sys.stderr.write(self.get_charset_name()
|
||||
+ ' prober hit error at byte ' + str(i)
|
||||
+ '\n')
|
||||
self._mState = constants.eNotMe
|
||||
@property
|
||||
def language(self):
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif codingState == constants.eItsMe:
|
||||
self._mState = constants.eFoundIt
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif codingState == constants.eStart:
|
||||
charLen = self._mCodingSM.get_current_charlen()
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._mLastChar[1] = aBuf[0]
|
||||
self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
|
||||
charLen)
|
||||
self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.context_analyzer.feed(self._last_char[2 - char_len:],
|
||||
char_len)
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
|
||||
- charLen], charLen)
|
||||
self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
|
||||
charLen)
|
||||
self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
|
||||
- char_len], char_len)
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._mLastChar[0] = aBuf[aLen - 1]
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.get_state() == constants.eDetecting:
|
||||
if (self._mContextAnalyzer.got_enough_data() and
|
||||
(self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
|
||||
self._mState = constants.eFoundIt
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.context_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.get_state()
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
contxtCf = self._mContextAnalyzer.get_confidence()
|
||||
distribCf = self._mDistributionAnalyzer.get_confidence()
|
||||
return max(contxtCf, distribCf)
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
||||
|
||||
@@ -25,146 +25,262 @@
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
"""
|
||||
Module containing the UniversalDetector detector class, which is the primary
|
||||
class a user of ``chardet`` should use.
|
||||
|
||||
:author: Mark Pilgrim (initial port to Python)
|
||||
:author: Shy Shalom (original C code)
|
||||
:author: Dan Blanchard (major refactoring for 3.0)
|
||||
:author: Ian Cordasco
|
||||
"""
|
||||
|
||||
|
||||
from . import constants
|
||||
import sys
|
||||
import codecs
|
||||
from .latin1prober import Latin1Prober # windows-1252
|
||||
from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets
|
||||
from .sbcsgroupprober import SBCSGroupProber # single-byte character sets
|
||||
from .escprober import EscCharSetProber # ISO-2122, etc.
|
||||
import logging
|
||||
import re
|
||||
|
||||
MINIMUM_THRESHOLD = 0.20
|
||||
ePureAscii = 0
|
||||
eEscAscii = 1
|
||||
eHighbyte = 2
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .enums import InputState, LanguageFilter, ProbingState
|
||||
from .escprober import EscCharSetProber
|
||||
from .latin1prober import Latin1Prober
|
||||
from .mbcsgroupprober import MBCSGroupProber
|
||||
from .sbcsgroupprober import SBCSGroupProber
|
||||
|
||||
|
||||
class UniversalDetector:
|
||||
def __init__(self):
|
||||
self._highBitDetector = re.compile(b'[\x80-\xFF]')
|
||||
self._escDetector = re.compile(b'(\033|~{)')
|
||||
self._mEscCharSetProber = None
|
||||
self._mCharSetProbers = []
|
||||
class UniversalDetector(object):
|
||||
"""
|
||||
The ``UniversalDetector`` class underlies the ``chardet.detect`` function
|
||||
and coordinates all of the different charset probers.
|
||||
|
||||
To get a ``dict`` containing an encoding and its confidence, you can simply
|
||||
run:
|
||||
|
||||
.. code::
|
||||
|
||||
u = UniversalDetector()
|
||||
u.feed(some_bytes)
|
||||
u.close()
|
||||
detected = u.result
|
||||
|
||||
"""
|
||||
|
||||
MINIMUM_THRESHOLD = 0.20
|
||||
HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]')
|
||||
ESC_DETECTOR = re.compile(b'(\033|~{)')
|
||||
WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]')
|
||||
ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252',
|
||||
'iso-8859-2': 'Windows-1250',
|
||||
'iso-8859-5': 'Windows-1251',
|
||||
'iso-8859-6': 'Windows-1256',
|
||||
'iso-8859-7': 'Windows-1253',
|
||||
'iso-8859-8': 'Windows-1255',
|
||||
'iso-8859-9': 'Windows-1254',
|
||||
'iso-8859-13': 'Windows-1257'}
|
||||
|
||||
def __init__(self, lang_filter=LanguageFilter.ALL):
|
||||
self._esc_charset_prober = None
|
||||
self._charset_probers = []
|
||||
self.result = None
|
||||
self.done = None
|
||||
self._got_data = None
|
||||
self._input_state = None
|
||||
self._last_char = None
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._has_win_bytes = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.result = {'encoding': None, 'confidence': 0.0}
|
||||
"""
|
||||
Reset the UniversalDetector and all of its probers back to their
|
||||
initial states. This is called by ``__init__``, so you only need to
|
||||
call this directly in between analyses of different documents.
|
||||
"""
|
||||
self.result = {'encoding': None, 'confidence': 0.0, 'language': None}
|
||||
self.done = False
|
||||
self._mStart = True
|
||||
self._mGotData = False
|
||||
self._mInputState = ePureAscii
|
||||
self._mLastChar = b''
|
||||
if self._mEscCharSetProber:
|
||||
self._mEscCharSetProber.reset()
|
||||
for prober in self._mCharSetProbers:
|
||||
self._got_data = False
|
||||
self._has_win_bytes = False
|
||||
self._input_state = InputState.PURE_ASCII
|
||||
self._last_char = b''
|
||||
if self._esc_charset_prober:
|
||||
self._esc_charset_prober.reset()
|
||||
for prober in self._charset_probers:
|
||||
prober.reset()
|
||||
|
||||
def feed(self, aBuf):
|
||||
def feed(self, byte_str):
|
||||
"""
|
||||
Takes a chunk of a document and feeds it through all of the relevant
|
||||
charset probers.
|
||||
|
||||
After calling ``feed``, you can check the value of the ``done``
|
||||
attribute to see if you need to continue feeding the
|
||||
``UniversalDetector`` more data, or if it has made a prediction
|
||||
(in the ``result`` attribute).
|
||||
|
||||
.. note::
|
||||
You should always call ``close`` when you're done feeding in your
|
||||
document if ``done`` is not already ``True``.
|
||||
"""
|
||||
if self.done:
|
||||
return
|
||||
|
||||
aLen = len(aBuf)
|
||||
if not aLen:
|
||||
if not len(byte_str):
|
||||
return
|
||||
|
||||
if not self._mGotData:
|
||||
if not isinstance(byte_str, bytearray):
|
||||
byte_str = bytearray(byte_str)
|
||||
|
||||
# First check for known BOMs, since these are guaranteed to be correct
|
||||
if not self._got_data:
|
||||
# If the data starts with BOM, we know it is UTF
|
||||
if aBuf[:3] == codecs.BOM_UTF8:
|
||||
if byte_str.startswith(codecs.BOM_UTF8):
|
||||
# EF BB BF UTF-8 with BOM
|
||||
self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0}
|
||||
elif aBuf[:4] == codecs.BOM_UTF32_LE:
|
||||
self.result = {'encoding': "UTF-8-SIG",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith((codecs.BOM_UTF32_LE,
|
||||
codecs.BOM_UTF32_BE)):
|
||||
# FF FE 00 00 UTF-32, little-endian BOM
|
||||
self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
|
||||
elif aBuf[:4] == codecs.BOM_UTF32_BE:
|
||||
# 00 00 FE FF UTF-32, big-endian BOM
|
||||
self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
|
||||
elif aBuf[:4] == b'\xFE\xFF\x00\x00':
|
||||
self.result = {'encoding': "UTF-32",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith(b'\xFE\xFF\x00\x00'):
|
||||
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
||||
self.result = {
|
||||
'encoding': "X-ISO-10646-UCS-4-3412",
|
||||
'confidence': 1.0
|
||||
}
|
||||
elif aBuf[:4] == b'\x00\x00\xFF\xFE':
|
||||
self.result = {'encoding': "X-ISO-10646-UCS-4-3412",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith(b'\x00\x00\xFF\xFE'):
|
||||
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
||||
self.result = {
|
||||
'encoding': "X-ISO-10646-UCS-4-2143",
|
||||
'confidence': 1.0
|
||||
}
|
||||
elif aBuf[:2] == codecs.BOM_LE:
|
||||
self.result = {'encoding': "X-ISO-10646-UCS-4-2143",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)):
|
||||
# FF FE UTF-16, little endian BOM
|
||||
self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
|
||||
elif aBuf[:2] == codecs.BOM_BE:
|
||||
# FE FF UTF-16, big endian BOM
|
||||
self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}
|
||||
self.result = {'encoding': "UTF-16",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
|
||||
self._mGotData = True
|
||||
if self.result['encoding'] and (self.result['confidence'] > 0.0):
|
||||
self.done = True
|
||||
return
|
||||
|
||||
if self._mInputState == ePureAscii:
|
||||
if self._highBitDetector.search(aBuf):
|
||||
self._mInputState = eHighbyte
|
||||
elif ((self._mInputState == ePureAscii) and
|
||||
self._escDetector.search(self._mLastChar + aBuf)):
|
||||
self._mInputState = eEscAscii
|
||||
|
||||
self._mLastChar = aBuf[-1:]
|
||||
|
||||
if self._mInputState == eEscAscii:
|
||||
if not self._mEscCharSetProber:
|
||||
self._mEscCharSetProber = EscCharSetProber()
|
||||
if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
|
||||
self.result = {'encoding': self._mEscCharSetProber.get_charset_name(),
|
||||
'confidence': self._mEscCharSetProber.get_confidence()}
|
||||
self._got_data = True
|
||||
if self.result['encoding'] is not None:
|
||||
self.done = True
|
||||
elif self._mInputState == eHighbyte:
|
||||
if not self._mCharSetProbers:
|
||||
self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
|
||||
Latin1Prober()]
|
||||
for prober in self._mCharSetProbers:
|
||||
if prober.feed(aBuf) == constants.eFoundIt:
|
||||
self.result = {'encoding': prober.get_charset_name(),
|
||||
'confidence': prober.get_confidence()}
|
||||
return
|
||||
|
||||
# If none of those matched and we've only see ASCII so far, check
|
||||
# for high bytes and escape sequences
|
||||
if self._input_state == InputState.PURE_ASCII:
|
||||
if self.HIGH_BYTE_DETECTOR.search(byte_str):
|
||||
self._input_state = InputState.HIGH_BYTE
|
||||
elif self._input_state == InputState.PURE_ASCII and \
|
||||
self.ESC_DETECTOR.search(self._last_char + byte_str):
|
||||
self._input_state = InputState.ESC_ASCII
|
||||
|
||||
self._last_char = byte_str[-1:]
|
||||
|
||||
# If we've seen escape sequences, use the EscCharSetProber, which
|
||||
# uses a simple state machine to check for known escape sequences in
|
||||
# HZ and ISO-2022 encodings, since those are the only encodings that
|
||||
# use such sequences.
|
||||
if self._input_state == InputState.ESC_ASCII:
|
||||
if not self._esc_charset_prober:
|
||||
self._esc_charset_prober = EscCharSetProber(self.lang_filter)
|
||||
if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||
self.result = {'encoding':
|
||||
self._esc_charset_prober.charset_name,
|
||||
'confidence':
|
||||
self._esc_charset_prober.get_confidence(),
|
||||
'language':
|
||||
self._esc_charset_prober.language}
|
||||
self.done = True
|
||||
# If we've seen high bytes (i.e., those with values greater than 127),
|
||||
# we need to do more complicated checks using all our multi-byte and
|
||||
# single-byte probers that are left. The single-byte probers
|
||||
# use character bigram distributions to determine the encoding, whereas
|
||||
# the multi-byte probers use a combination of character unigram and
|
||||
# bigram distributions.
|
||||
elif self._input_state == InputState.HIGH_BYTE:
|
||||
if not self._charset_probers:
|
||||
self._charset_probers = [MBCSGroupProber(self.lang_filter)]
|
||||
# If we're checking non-CJK encodings, use single-byte prober
|
||||
if self.lang_filter & LanguageFilter.NON_CJK:
|
||||
self._charset_probers.append(SBCSGroupProber())
|
||||
self._charset_probers.append(Latin1Prober())
|
||||
for prober in self._charset_probers:
|
||||
if prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||
self.result = {'encoding': prober.charset_name,
|
||||
'confidence': prober.get_confidence(),
|
||||
'language': prober.language}
|
||||
self.done = True
|
||||
break
|
||||
if self.WIN_BYTE_DETECTOR.search(byte_str):
|
||||
self._has_win_bytes = True
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Stop analyzing the current document and come up with a final
|
||||
prediction.
|
||||
|
||||
:returns: The ``result`` attribute, a ``dict`` with the keys
|
||||
`encoding`, `confidence`, and `language`.
|
||||
"""
|
||||
# Don't bother with checks if we're already done
|
||||
if self.done:
|
||||
return
|
||||
if not self._mGotData:
|
||||
if constants._debug:
|
||||
sys.stderr.write('no data received!\n')
|
||||
return
|
||||
return self.result
|
||||
self.done = True
|
||||
|
||||
if self._mInputState == ePureAscii:
|
||||
self.result = {'encoding': 'ascii', 'confidence': 1.0}
|
||||
return self.result
|
||||
if not self._got_data:
|
||||
self.logger.debug('no data received!')
|
||||
|
||||
if self._mInputState == eHighbyte:
|
||||
proberConfidence = None
|
||||
maxProberConfidence = 0.0
|
||||
maxProber = None
|
||||
for prober in self._mCharSetProbers:
|
||||
# Default to ASCII if it is all we've seen so far
|
||||
elif self._input_state == InputState.PURE_ASCII:
|
||||
self.result = {'encoding': 'ascii',
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
|
||||
# If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD
|
||||
elif self._input_state == InputState.HIGH_BYTE:
|
||||
prober_confidence = None
|
||||
max_prober_confidence = 0.0
|
||||
max_prober = None
|
||||
for prober in self._charset_probers:
|
||||
if not prober:
|
||||
continue
|
||||
proberConfidence = prober.get_confidence()
|
||||
if proberConfidence > maxProberConfidence:
|
||||
maxProberConfidence = proberConfidence
|
||||
maxProber = prober
|
||||
if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):
|
||||
self.result = {'encoding': maxProber.get_charset_name(),
|
||||
'confidence': maxProber.get_confidence()}
|
||||
return self.result
|
||||
prober_confidence = prober.get_confidence()
|
||||
if prober_confidence > max_prober_confidence:
|
||||
max_prober_confidence = prober_confidence
|
||||
max_prober = prober
|
||||
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
|
||||
charset_name = max_prober.charset_name
|
||||
lower_charset_name = max_prober.charset_name.lower()
|
||||
confidence = max_prober.get_confidence()
|
||||
# Use Windows encoding name instead of ISO-8859 if we saw any
|
||||
# extra Windows-specific bytes
|
||||
if lower_charset_name.startswith('iso-8859'):
|
||||
if self._has_win_bytes:
|
||||
charset_name = self.ISO_WIN_MAP.get(lower_charset_name,
|
||||
charset_name)
|
||||
self.result = {'encoding': charset_name,
|
||||
'confidence': confidence,
|
||||
'language': max_prober.language}
|
||||
|
||||
if constants._debug:
|
||||
sys.stderr.write('no probers hit minimum threshhold\n')
|
||||
for prober in self._mCharSetProbers[0].mProbers:
|
||||
if not prober:
|
||||
continue
|
||||
sys.stderr.write('%s confidence = %s\n' %
|
||||
(prober.get_charset_name(),
|
||||
prober.get_confidence()))
|
||||
# Log all prober confidences if none met MINIMUM_THRESHOLD
|
||||
if self.logger.getEffectiveLevel() == logging.DEBUG:
|
||||
if self.result['encoding'] is None:
|
||||
self.logger.debug('no probers hit minimum threshold')
|
||||
for group_prober in self._charset_probers:
|
||||
if not group_prober:
|
||||
continue
|
||||
if isinstance(group_prober, CharSetGroupProber):
|
||||
for prober in group_prober.probers:
|
||||
self.logger.debug('%s %s confidence = %s',
|
||||
prober.charset_name,
|
||||
prober.language,
|
||||
prober.get_confidence())
|
||||
else:
|
||||
self.logger.debug('%s %s confidence = %s',
|
||||
prober.charset_name,
|
||||
prober.language,
|
||||
prober.get_confidence())
|
||||
return self.result
|
||||
|
||||
@@ -25,52 +25,58 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from . import constants
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState, MachineState
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .mbcssm import UTF8SMModel
|
||||
from .mbcssm import UTF8_SM_MODEL
|
||||
|
||||
ONE_CHAR_PROB = 0.5
|
||||
|
||||
|
||||
class UTF8Prober(CharSetProber):
|
||||
ONE_CHAR_PROB = 0.5
|
||||
|
||||
def __init__(self):
|
||||
CharSetProber.__init__(self)
|
||||
self._mCodingSM = CodingStateMachine(UTF8SMModel)
|
||||
super(UTF8Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
|
||||
self._num_mb_chars = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
CharSetProber.reset(self)
|
||||
self._mCodingSM.reset()
|
||||
self._mNumOfMBChar = 0
|
||||
super(UTF8Prober, self).reset()
|
||||
self.coding_sm.reset()
|
||||
self._num_mb_chars = 0
|
||||
|
||||
def get_charset_name(self):
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "utf-8"
|
||||
|
||||
def feed(self, aBuf):
|
||||
for c in aBuf:
|
||||
codingState = self._mCodingSM.next_state(c)
|
||||
if codingState == constants.eError:
|
||||
self._mState = constants.eNotMe
|
||||
break
|
||||
elif codingState == constants.eItsMe:
|
||||
self._mState = constants.eFoundIt
|
||||
break
|
||||
elif codingState == constants.eStart:
|
||||
if self._mCodingSM.get_current_charlen() >= 2:
|
||||
self._mNumOfMBChar += 1
|
||||
@property
|
||||
def language(self):
|
||||
return ""
|
||||
|
||||
if self.get_state() == constants.eDetecting:
|
||||
if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
|
||||
self._mState = constants.eFoundIt
|
||||
def feed(self, byte_str):
|
||||
for c in byte_str:
|
||||
coding_state = self.coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
if self.coding_sm.get_current_charlen() >= 2:
|
||||
self._num_mb_chars += 1
|
||||
|
||||
return self.get_state()
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if self.get_confidence() > self.SHORTCUT_THRESHOLD:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
unlike = 0.99
|
||||
if self._mNumOfMBChar < 6:
|
||||
for i in range(0, self._mNumOfMBChar):
|
||||
unlike = unlike * ONE_CHAR_PROB
|
||||
if self._num_mb_chars < 6:
|
||||
unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
|
||||
return 1.0 - unlike
|
||||
else:
|
||||
return unlike
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
This module exists only to simplify retrieving the version number of chardet
|
||||
from within setup.py and from chardet subpackages.
|
||||
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
__version__ = "3.0.4"
|
||||
VERSION = __version__.split('.')
|
||||
@@ -0,0 +1,311 @@
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import ssl
|
||||
|
||||
from copy import deepcopy
|
||||
from time import sleep
|
||||
from collections import OrderedDict
|
||||
|
||||
from requests.sessions import Session
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.packages.urllib3.util.ssl_ import create_urllib3_context
|
||||
|
||||
from .interpreters import JavaScriptInterpreter
|
||||
from .user_agent import User_Agent
|
||||
|
||||
try:
|
||||
from requests_toolbelt.utils import dump
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import brotli
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
from urlparse import urlunparse
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlunparse
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
__version__ = '1.1.9'
|
||||
|
||||
BUG_REPORT = 'Cloudflare may have changed their technique, or there may be a bug in the script.'
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
|
||||
class CipherSuiteAdapter(HTTPAdapter):
|
||||
|
||||
def __init__(self, cipherSuite=None, **kwargs):
|
||||
self.cipherSuite = cipherSuite
|
||||
|
||||
if hasattr(ssl, 'PROTOCOL_TLS'):
|
||||
self.ssl_context = create_urllib3_context(
|
||||
ssl_version=getattr(ssl, 'PROTOCOL_TLSv1_3', ssl.PROTOCOL_TLSv1_2),
|
||||
ciphers=self.cipherSuite
|
||||
)
|
||||
else:
|
||||
self.ssl_context = create_urllib3_context(ssl_version=ssl.PROTOCOL_TLSv1)
|
||||
|
||||
super(CipherSuiteAdapter, self).__init__(**kwargs)
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
def init_poolmanager(self, *args, **kwargs):
|
||||
kwargs['ssl_context'] = self.ssl_context
|
||||
return super(CipherSuiteAdapter, self).init_poolmanager(*args, **kwargs)
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
def proxy_manager_for(self, *args, **kwargs):
|
||||
kwargs['ssl_context'] = self.ssl_context
|
||||
return super(CipherSuiteAdapter, self).proxy_manager_for(*args, **kwargs)
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
|
||||
class CloudScraper(Session):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.debug = kwargs.pop('debug', False)
|
||||
self.delay = kwargs.pop('delay', None)
|
||||
self.interpreter = kwargs.pop('interpreter', 'js2py')
|
||||
self.allow_brotli = kwargs.pop('allow_brotli', True if 'brotli' in sys.modules.keys() else False)
|
||||
self.cipherSuite = None
|
||||
|
||||
super(CloudScraper, self).__init__(*args, **kwargs)
|
||||
|
||||
if 'requests' in self.headers['User-Agent']:
|
||||
# Set a random User-Agent if no custom User-Agent has been set
|
||||
self.headers = User_Agent(allow_brotli=self.allow_brotli).headers
|
||||
|
||||
self.mount('https://', CipherSuiteAdapter(self.loadCipherSuite()))
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
@staticmethod
|
||||
def debugRequest(req):
|
||||
try:
|
||||
print(dump.dump_all(req).decode('utf-8'))
|
||||
except: # noqa
|
||||
pass
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
def loadCipherSuite(self):
|
||||
if self.cipherSuite:
|
||||
return self.cipherSuite
|
||||
|
||||
self.cipherSuite = ''
|
||||
|
||||
if hasattr(ssl, 'PROTOCOL_TLS'):
|
||||
ciphers = [
|
||||
'ECDHE-ECDSA-AES128-GCM-SHA256', 'ECDHE-RSA-AES128-GCM-SHA256', 'ECDHE-ECDSA-AES256-GCM-SHA384',
|
||||
'ECDHE-RSA-AES256-GCM-SHA384', 'ECDHE-ECDSA-CHACHA20-POLY1305-SHA256', 'ECDHE-RSA-CHACHA20-POLY1305-SHA256',
|
||||
'ECDHE-RSA-AES128-CBC-SHA', 'ECDHE-RSA-AES256-CBC-SHA', 'RSA-AES128-GCM-SHA256', 'RSA-AES256-GCM-SHA384',
|
||||
'ECDHE-RSA-AES128-GCM-SHA256', 'RSA-AES256-SHA', '3DES-EDE-CBC'
|
||||
]
|
||||
|
||||
if hasattr(ssl, 'PROTOCOL_TLSv1_3'):
|
||||
ciphers.insert(0, ['GREASE_3A', 'GREASE_6A', 'AES128-GCM-SHA256', 'AES256-GCM-SHA256', 'AES256-GCM-SHA384', 'CHACHA20-POLY1305-SHA256'])
|
||||
|
||||
ctx = ssl.SSLContext(getattr(ssl, 'PROTOCOL_TLSv1_3', ssl.PROTOCOL_TLSv1_2))
|
||||
|
||||
for cipher in ciphers:
|
||||
try:
|
||||
ctx.set_ciphers(cipher)
|
||||
self.cipherSuite = '{}:{}'.format(self.cipherSuite, cipher).rstrip(':')
|
||||
except ssl.SSLError:
|
||||
pass
|
||||
|
||||
return self.cipherSuite
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
def request(self, method, url, *args, **kwargs):
|
||||
ourSuper = super(CloudScraper, self)
|
||||
resp = ourSuper.request(method, url, *args, **kwargs)
|
||||
|
||||
if resp.headers.get('Content-Encoding') == 'br':
|
||||
if self.allow_brotli and resp._content:
|
||||
resp._content = brotli.decompress(resp.content)
|
||||
else:
|
||||
logging.warning('Brotli content detected, But option is disabled, we will not continue.')
|
||||
return resp
|
||||
|
||||
# Debug request
|
||||
if self.debug:
|
||||
self.debugRequest(resp)
|
||||
|
||||
# Check if Cloudflare anti-bot is on
|
||||
if self.isChallengeRequest(resp):
|
||||
if resp.request.method != 'GET':
|
||||
# Work around if the initial request is not a GET,
|
||||
# Supersede with a GET then re-request the original METHOD.
|
||||
self.request('GET', resp.url)
|
||||
resp = ourSuper.request(method, url, *args, **kwargs)
|
||||
else:
|
||||
# Solve Challenge
|
||||
resp = self.sendChallengeResponse(resp, **kwargs)
|
||||
|
||||
return resp
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
@staticmethod
|
||||
def isChallengeRequest(resp):
|
||||
if resp.headers.get('Server', '').startswith('cloudflare'):
|
||||
if b'why_captcha' in resp.content or b'/cdn-cgi/l/chk_captcha' in resp.content:
|
||||
raise ValueError('Captcha')
|
||||
|
||||
return (
|
||||
resp.status_code in [429, 503]
|
||||
and all(s in resp.content for s in [b'jschl_vc', b'jschl_answer'])
|
||||
)
|
||||
|
||||
return False
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
def sendChallengeResponse(self, resp, **original_kwargs):
|
||||
body = resp.text
|
||||
|
||||
# Cloudflare requires a delay before solving the challenge
|
||||
if not self.delay:
|
||||
try:
|
||||
delay = float(re.search(r'submit\(\);\r?\n\s*},\s*([0-9]+)', body).group(1)) / float(1000)
|
||||
if isinstance(delay, (int, float)):
|
||||
self.delay = delay
|
||||
except: # noqa
|
||||
pass
|
||||
|
||||
sleep(self.delay)
|
||||
|
||||
parsed_url = urlparse(resp.url)
|
||||
domain = parsed_url.netloc
|
||||
submit_url = '{}://{}/cdn-cgi/l/chk_jschl'.format(parsed_url.scheme, domain)
|
||||
|
||||
cloudflare_kwargs = deepcopy(original_kwargs)
|
||||
|
||||
try:
|
||||
params = OrderedDict()
|
||||
|
||||
s = re.search(r'name="s"\svalue="(?P<s_value>[^"]+)', body)
|
||||
if s:
|
||||
params['s'] = s.group('s_value')
|
||||
|
||||
params.update(
|
||||
[
|
||||
('jschl_vc', re.search(r'name="jschl_vc" value="(\w+)"', body).group(1)),
|
||||
('pass', re.search(r'name="pass" value="(.+?)"', body).group(1))
|
||||
]
|
||||
)
|
||||
|
||||
params = cloudflare_kwargs.setdefault('params', params)
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError('Unable to parse Cloudflare anti-bots page: {} {}'.format(e.message, BUG_REPORT))
|
||||
|
||||
# Solve the Javascript challenge
|
||||
params['jschl_answer'] = JavaScriptInterpreter.dynamicImport(self.interpreter).solveChallenge(body, domain)
|
||||
|
||||
# Requests transforms any request into a GET after a redirect,
|
||||
# so the redirect has to be handled manually here to allow for
|
||||
# performing other types of requests even as the first request.
|
||||
|
||||
cloudflare_kwargs['allow_redirects'] = False
|
||||
|
||||
redirect = self.request(resp.request.method, submit_url, **cloudflare_kwargs)
|
||||
redirect_location = urlparse(redirect.headers['Location'])
|
||||
if not redirect_location.netloc:
|
||||
redirect_url = urlunparse(
|
||||
(
|
||||
parsed_url.scheme,
|
||||
domain,
|
||||
redirect_location.path,
|
||||
redirect_location.params,
|
||||
redirect_location.query,
|
||||
redirect_location.fragment
|
||||
)
|
||||
)
|
||||
return self.request(resp.request.method, redirect_url, **original_kwargs)
|
||||
|
||||
return self.request(resp.request.method, redirect.headers['Location'], **original_kwargs)
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
@classmethod
|
||||
def create_scraper(cls, sess=None, **kwargs):
|
||||
"""
|
||||
Convenience function for creating a ready-to-go CloudScraper object.
|
||||
"""
|
||||
scraper = cls(**kwargs)
|
||||
|
||||
if sess:
|
||||
attrs = ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']
|
||||
for attr in attrs:
|
||||
val = getattr(sess, attr, None)
|
||||
if val:
|
||||
setattr(scraper, attr, val)
|
||||
|
||||
return scraper
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
# Functions for integrating cloudscraper with other applications and scripts
|
||||
@classmethod
|
||||
def get_tokens(cls, url, **kwargs):
|
||||
scraper = cls.create_scraper(
|
||||
debug=kwargs.pop('debug', False),
|
||||
delay=kwargs.pop('delay', None),
|
||||
interpreter=kwargs.pop('interpreter', 'js2py'),
|
||||
allow_brotli=kwargs.pop('allow_brotli', True),
|
||||
)
|
||||
|
||||
try:
|
||||
resp = scraper.get(url, **kwargs)
|
||||
resp.raise_for_status()
|
||||
except Exception:
|
||||
logging.error('"{}" returned an error. Could not collect tokens.'.format(url))
|
||||
raise
|
||||
|
||||
domain = urlparse(resp.url).netloc
|
||||
# noinspection PyUnusedLocal
|
||||
cookie_domain = None
|
||||
|
||||
for d in scraper.cookies.list_domains():
|
||||
if d.startswith('.') and d in ('.{}'.format(domain)):
|
||||
cookie_domain = d
|
||||
break
|
||||
else:
|
||||
raise ValueError('Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM ("I\'m Under Attack Mode") enabled?')
|
||||
|
||||
return (
|
||||
{
|
||||
'__cfduid': scraper.cookies.get('__cfduid', '', domain=cookie_domain),
|
||||
'cf_clearance': scraper.cookies.get('cf_clearance', '', domain=cookie_domain)
|
||||
},
|
||||
scraper.headers['User-Agent']
|
||||
)
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
@classmethod
|
||||
def get_cookie_string(cls, url, **kwargs):
|
||||
"""
|
||||
Convenience function for building a Cookie HTTP header value.
|
||||
"""
|
||||
tokens, user_agent = cls.get_tokens(url, **kwargs)
|
||||
return '; '.join('='.join(pair) for pair in tokens.items()), user_agent
|
||||
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
create_scraper = CloudScraper.create_scraper
|
||||
get_tokens = CloudScraper.get_tokens
|
||||
get_cookie_string = CloudScraper.get_cookie_string
|
||||
@@ -0,0 +1,89 @@
|
||||
import re
|
||||
import sys
|
||||
import logging
|
||||
import abc
|
||||
|
||||
if sys.version_info >= (3, 4):
|
||||
ABC = abc.ABC # noqa
|
||||
else:
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
BUG_REPORT = 'Cloudflare may have changed their technique, or there may be a bug in the script.'
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
interpreters = {}
|
||||
|
||||
|
||||
class JavaScriptInterpreter(ABC):
|
||||
@abc.abstractmethod
|
||||
def __init__(self, name):
|
||||
interpreters[name] = self
|
||||
|
||||
@classmethod
|
||||
def dynamicImport(cls, name):
|
||||
if name not in interpreters:
|
||||
try:
|
||||
__import__('{}.{}'.format(cls.__module__, name))
|
||||
if not isinstance(interpreters.get(name), JavaScriptInterpreter):
|
||||
raise ImportError('The interpreter was not initialized.')
|
||||
except ImportError:
|
||||
logging.error('Unable to load {} interpreter'.format(name))
|
||||
raise
|
||||
|
||||
return interpreters[name]
|
||||
|
||||
@abc.abstractmethod
|
||||
def eval(self, jsEnv, js):
|
||||
pass
|
||||
|
||||
def solveChallenge(self, body, domain):
|
||||
try:
|
||||
js = re.search(
|
||||
r'setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n',
|
||||
body
|
||||
).group(1)
|
||||
except Exception:
|
||||
raise ValueError('Unable to identify Cloudflare IUAM Javascript on website. {}'.format(BUG_REPORT))
|
||||
|
||||
js = re.sub(r'\s{2,}', ' ', js, flags=re.MULTILINE | re.DOTALL).replace('\'; 121\'', '')
|
||||
js += '\na.value;'
|
||||
|
||||
jsEnv = '''
|
||||
String.prototype.italics=function(str) {{return "<i>" + this + "</i>";}};
|
||||
var document = {{
|
||||
createElement: function () {{
|
||||
return {{ firstChild: {{ href: "https://{domain}/" }} }}
|
||||
}},
|
||||
getElementById: function () {{
|
||||
return {{"innerHTML": "{innerHTML}"}};
|
||||
}}
|
||||
}};
|
||||
'''
|
||||
|
||||
try:
|
||||
innerHTML = re.search(
|
||||
r'<div(?: [^<>]*)? id="([^<>]*?)">([^<>]*?)</div>',
|
||||
body,
|
||||
re.MULTILINE | re.DOTALL
|
||||
)
|
||||
innerHTML = innerHTML.group(2) if innerHTML else ''
|
||||
|
||||
except: # noqa
|
||||
logging.error('Error extracting Cloudflare IUAM Javascript. {}'.format(BUG_REPORT))
|
||||
raise
|
||||
|
||||
try:
|
||||
result = self.eval(
|
||||
re.sub(r'\s{2,}', ' ', jsEnv.format(domain=domain, innerHTML=innerHTML), flags=re.MULTILINE | re.DOTALL),
|
||||
js
|
||||
)
|
||||
|
||||
float(result)
|
||||
except Exception:
|
||||
logging.error('Error executing Cloudflare IUAM Javascript. {}'.format(BUG_REPORT))
|
||||
raise
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,32 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import js2py
|
||||
import logging
|
||||
import base64
|
||||
|
||||
from . import JavaScriptInterpreter
|
||||
|
||||
from .jsunfuck import jsunfuck
|
||||
|
||||
|
||||
class ChallengeInterpreter(JavaScriptInterpreter):
|
||||
|
||||
def __init__(self):
|
||||
super(ChallengeInterpreter, self).__init__('js2py')
|
||||
|
||||
def eval(self, jsEnv, js):
|
||||
if js2py.eval_js('(+(+!+[]+[+!+[]]+(!![]+[])[!+[]+!+[]+!+[]]+[!+[]+!+[]]+[+[]])+[])[+!+[]]') == '1':
|
||||
logging.warning('WARNING - Please upgrade your js2py https://github.com/PiotrDabkowski/Js2Py, applying work around for the meantime.')
|
||||
js = jsunfuck(js)
|
||||
|
||||
def atob(s):
|
||||
return base64.b64decode('{}'.format(s)).decode('utf-8')
|
||||
|
||||
js2py.disable_pyimport()
|
||||
context = js2py.EvalJs({'atob': atob})
|
||||
result = context.eval('{}{}'.format(jsEnv, js))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
ChallengeInterpreter()
|
||||
@@ -0,0 +1,97 @@
|
||||
MAPPING = {
|
||||
'a': '(false+"")[1]',
|
||||
'b': '([]["entries"]()+"")[2]',
|
||||
'c': '([]["fill"]+"")[3]',
|
||||
'd': '(undefined+"")[2]',
|
||||
'e': '(true+"")[3]',
|
||||
'f': '(false+"")[0]',
|
||||
'g': '(false+[0]+String)[20]',
|
||||
'h': '(+(101))["to"+String["name"]](21)[1]',
|
||||
'i': '([false]+undefined)[10]',
|
||||
'j': '([]["entries"]()+"")[3]',
|
||||
'k': '(+(20))["to"+String["name"]](21)',
|
||||
'l': '(false+"")[2]',
|
||||
'm': '(Number+"")[11]',
|
||||
'n': '(undefined+"")[1]',
|
||||
'o': '(true+[]["fill"])[10]',
|
||||
'p': '(+(211))["to"+String["name"]](31)[1]',
|
||||
'q': '(+(212))["to"+String["name"]](31)[1]',
|
||||
'r': '(true+"")[1]',
|
||||
's': '(false+"")[3]',
|
||||
't': '(true+"")[0]',
|
||||
'u': '(undefined+"")[0]',
|
||||
'v': '(+(31))["to"+String["name"]](32)',
|
||||
'w': '(+(32))["to"+String["name"]](33)',
|
||||
'x': '(+(101))["to"+String["name"]](34)[1]',
|
||||
'y': '(NaN+[Infinity])[10]',
|
||||
'z': '(+(35))["to"+String["name"]](36)',
|
||||
'A': '(+[]+Array)[10]',
|
||||
'B': '(+[]+Boolean)[10]',
|
||||
'C': 'Function("return escape")()(("")["italics"]())[2]',
|
||||
'D': 'Function("return escape")()([]["fill"])["slice"]("-1")',
|
||||
'E': '(RegExp+"")[12]',
|
||||
'F': '(+[]+Function)[10]',
|
||||
'G': '(false+Function("return Date")()())[30]',
|
||||
'I': '(Infinity+"")[0]',
|
||||
'M': '(true+Function("return Date")()())[30]',
|
||||
'N': '(NaN+"")[0]',
|
||||
'O': '(NaN+Function("return{}")())[11]',
|
||||
'R': '(+[]+RegExp)[10]',
|
||||
'S': '(+[]+String)[10]',
|
||||
'T': '(NaN+Function("return Date")()())[30]',
|
||||
'U': '(NaN+Function("return{}")()["to"+String["name"]]["call"]())[11]',
|
||||
' ': '(NaN+[]["fill"])[11]',
|
||||
'"': '("")["fontcolor"]()[12]',
|
||||
'%': 'Function("return escape")()([]["fill"])[21]',
|
||||
'&': '("")["link"](0+")[10]',
|
||||
'(': '(undefined+[]["fill"])[22]',
|
||||
')': '([0]+false+[]["fill"])[20]',
|
||||
'+': '(+(+!+[]+(!+[]+[])[!+[]+!+[]+!+[]]+[+!+[]]+[+[]]+[+[]])+[])[2]',
|
||||
',': '([]["slice"]["call"](false+"")+"")[1]',
|
||||
'-': '(+(.+[0000000001])+"")[2]',
|
||||
'.': '(+(+!+[]+[+!+[]]+(!![]+[])[!+[]+!+[]+!+[]]+[!+[]+!+[]]+[+[]])+[])[+!+[]]',
|
||||
'/': '(false+[0])["italics"]()[10]',
|
||||
':': '(RegExp()+"")[3]',
|
||||
';': '("")["link"](")[14]',
|
||||
'<': '("")["italics"]()[0]',
|
||||
'=': '("")["fontcolor"]()[11]',
|
||||
'>': '("")["italics"]()[2]',
|
||||
'?': '(RegExp()+"")[2]',
|
||||
'[': '([]["entries"]()+"")[0]',
|
||||
']': '([]["entries"]()+"")[22]',
|
||||
'{': '(true+[]["fill"])[20]',
|
||||
'}': '([]["fill"]+"")["slice"]("-1")'
|
||||
}
|
||||
|
||||
SIMPLE = {
|
||||
'false': '![]',
|
||||
'true': '!![]',
|
||||
'undefined': '[][[]]',
|
||||
'NaN': '+[![]]',
|
||||
'Infinity': '+(+!+[]+(!+[]+[])[!+[]+!+[]+!+[]]+[+!+[]]+[+[]]+[+[]]+[+[]])' # +"1e1000"
|
||||
}
|
||||
|
||||
CONSTRUCTORS = {
|
||||
'Array': '[]',
|
||||
'Number': '(+[])',
|
||||
'String': '([]+[])',
|
||||
'Boolean': '(![])',
|
||||
'Function': '[]["fill"]',
|
||||
'RegExp': 'Function("return/"+false+"/")()'
|
||||
}
|
||||
|
||||
|
||||
def jsunfuck(jsfuckString):
|
||||
for key in sorted(MAPPING, key=lambda k: len(MAPPING[k]), reverse=True):
|
||||
if MAPPING.get(key) in jsfuckString:
|
||||
jsfuckString = jsfuckString.replace(MAPPING.get(key), '"{}"'.format(key))
|
||||
|
||||
for key in sorted(SIMPLE, key=lambda k: len(SIMPLE[k]), reverse=True):
|
||||
if SIMPLE.get(key) in jsfuckString:
|
||||
jsfuckString = jsfuckString.replace(SIMPLE.get(key), '{}'.format(key))
|
||||
|
||||
# for key in sorted(CONSTRUCTORS, key=lambda k: len(CONSTRUCTORS[k]), reverse=True):
|
||||
# if CONSTRUCTORS.get(key) in jsfuckString:
|
||||
# jsfuckString = jsfuckString.replace(CONSTRUCTORS.get(key), '{}'.format(key))
|
||||
|
||||
return jsfuckString
|
||||
@@ -0,0 +1,46 @@
|
||||
import base64
|
||||
import logging
|
||||
import subprocess
|
||||
|
||||
from . import JavaScriptInterpreter
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
BUG_REPORT = 'Cloudflare may have changed their technique, or there may be a bug in the script.'
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
|
||||
class ChallengeInterpreter(JavaScriptInterpreter):
|
||||
|
||||
def __init__(self):
|
||||
super(ChallengeInterpreter, self).__init__('nodejs')
|
||||
|
||||
def eval(self, jsEnv, js):
|
||||
try:
|
||||
js = 'var atob = function(str) {return Buffer.from(str, "base64").toString("binary");};' \
|
||||
'var challenge = atob("%s");' \
|
||||
'var context = {atob: atob};' \
|
||||
'var options = {filename: "iuam-challenge.js", timeout: 4000};' \
|
||||
'var answer = require("vm").runInNewContext(challenge, context, options);' \
|
||||
'process.stdout.write(String(answer));' \
|
||||
% base64.b64encode('{}{}'.format(jsEnv, js).encode('UTF-8')).decode('ascii')
|
||||
|
||||
return subprocess.check_output(['node', '-e', js])
|
||||
|
||||
except OSError as e:
|
||||
if e.errno == 2:
|
||||
raise EnvironmentError(
|
||||
'Missing Node.js runtime. Node is required and must be in the PATH (check with `node -v`). Your Node binary may be called `nodejs` rather than `node`, '
|
||||
'in which case you may need to run `apt-get install nodejs-legacy` on some Debian-based systems. (Please read the cloudscraper'
|
||||
' README\'s Dependencies section: https://github.com/VeNoMouS/cloudscraper#dependencies.'
|
||||
)
|
||||
raise
|
||||
except Exception:
|
||||
logging.error('Error executing Cloudflare IUAM Javascript. %s' % BUG_REPORT)
|
||||
raise
|
||||
|
||||
pass
|
||||
|
||||
|
||||
ChallengeInterpreter()
|
||||
@@ -0,0 +1,40 @@
|
||||
import os
|
||||
import json
|
||||
import random
|
||||
import logging
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
|
||||
class User_Agent():
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.headers = None
|
||||
self.loadUserAgent(*args, **kwargs)
|
||||
|
||||
##########################################################################################################################################################
|
||||
|
||||
def loadUserAgent(self, *args, **kwargs):
|
||||
browser = kwargs.pop('browser', 'chrome')
|
||||
|
||||
user_agents = json.load(
|
||||
open(os.path.join(os.path.dirname(__file__), 'browsers.json'), 'r'),
|
||||
object_pairs_hook=OrderedDict
|
||||
)
|
||||
|
||||
if not user_agents.get(browser):
|
||||
logging.error('Sorry "{}" browser User-Agent was not found.'.format(browser))
|
||||
raise
|
||||
|
||||
user_agent = random.choice(user_agents.get(browser))
|
||||
|
||||
self.headers = user_agent.get('headers')
|
||||
self.headers['User-Agent'] = random.choice(user_agent.get('User-Agent'))
|
||||
|
||||
if not kwargs.get('allow_brotli', False):
|
||||
if 'br' in self.headers['Accept-Encoding']:
|
||||
self.headers['Accept-Encoding'] = ','.join([encoding for encoding in self.headers['Accept-Encoding'].split(',') if encoding.strip() != 'br']).strip()
|
||||
@@ -0,0 +1,336 @@
|
||||
{
|
||||
"chrome": [
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.101 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.101 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.101 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.113 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, , br"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.170 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"User-Agent": null,
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.40 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.40 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.28 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.28 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.28 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.28 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.28 Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Linux; Android 8.1.0; SM-N960F Build/M1AJQ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 8.0.0; SM-G965F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 Build/OPD1.170816.010) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 8.0.0; Pixel Build/OPR6.170623.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.1.1; SM-A530F Build/NMF26X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.1; Pixel Build/NDE63H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.0; SM-G955F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.0; SM-G950F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.0; SM-T825 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 6.0.1; SM-G930F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 6.0; Nexus 6 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 6.0; XT1092 Build/MPE24.49-18) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 6.0.1; SM-N910C Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 5.0.2; SM-G920F Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 5.0; Nexus 6 Build/LRX21O) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 9; Pixel 3 XL Build/PD1A.180720.030) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 9; Pixel 3 Build/PD1A.180720.030) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 9; Pixel 2 Build/PPR1.180610.009) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 4.4; Nexus 5 Build/KRT16M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 4.4.2; SM-T530 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 4.4.4; SM-N910C Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 5.1.1; Nexus 9 Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.1.1; SM-N950F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.90 Mobile Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-US,en;q=0.9"
|
||||
}
|
||||
},
|
||||
{
|
||||
"User-Agent": [
|
||||
"Mozilla/5.0 (Linux; Android 8.1.0; SM-T835 Build/M1AJQ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 5.0; XT1092 Build/LXE22.46-19) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.85 Mobile Safari/537.36"
|
||||
],
|
||||
"headers": {
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": null,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,516 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
"""Death by Captcha HTTP and socket API clients.
|
||||
|
||||
There are two types of Death by Captcha (DBC hereinafter) API: HTTP and
|
||||
socket ones. Both offer the same functionalily, with the socket API
|
||||
sporting faster responses and using way less connections.
|
||||
|
||||
To access the socket API, use SocketClient class; for the HTTP API, use
|
||||
HttpClient class. Both are thread-safe. SocketClient keeps a persistent
|
||||
connection opened and serializes all API requests sent through it, thus
|
||||
it is advised to keep a pool of them if you're script is heavily
|
||||
multithreaded.
|
||||
|
||||
Both SocketClient and HttpClient give you the following methods:
|
||||
|
||||
get_user()
|
||||
Returns your DBC account details as a dict with the following keys:
|
||||
|
||||
"user": your account numeric ID; if login fails, it will be the only
|
||||
item with the value of 0;
|
||||
"rate": your CAPTCHA rate, i.e. how much you will be charged for one
|
||||
solved CAPTCHA in US cents;
|
||||
"balance": your DBC account balance in US cents;
|
||||
"is_banned": flag indicating whether your account is suspended or not.
|
||||
|
||||
get_balance()
|
||||
Returns your DBC account balance in US cents.
|
||||
|
||||
get_captcha(cid)
|
||||
Returns an uploaded CAPTCHA details as a dict with the following keys:
|
||||
|
||||
"captcha": the CAPTCHA numeric ID; if no such CAPTCHAs found, it will
|
||||
be the only item with the value of 0;
|
||||
"text": the CAPTCHA text, if solved, otherwise None;
|
||||
"is_correct": flag indicating whether the CAPTCHA was solved correctly
|
||||
(DBC can detect that in rare cases).
|
||||
|
||||
The only argument `cid` is the CAPTCHA numeric ID.
|
||||
|
||||
get_text(cid)
|
||||
Returns an uploaded CAPTCHA text (None if not solved). The only argument
|
||||
`cid` is the CAPTCHA numeric ID.
|
||||
|
||||
report(cid)
|
||||
Reports an incorrectly solved CAPTCHA. The only argument `cid` is the
|
||||
CAPTCHA numeric ID. Returns True on success, False otherwise.
|
||||
|
||||
upload(captcha)
|
||||
Uploads a CAPTCHA. The only argument `captcha` can be either file-like
|
||||
object (any object with `read` method defined, actually, so StringIO
|
||||
will do), or CAPTCHA image file name. On successul upload you'll get
|
||||
the CAPTCHA details dict (see get_captcha() method).
|
||||
|
||||
NOTE: AT THIS POINT THE UPLOADED CAPTCHA IS NOT SOLVED YET! You have
|
||||
to poll for its status periodically using get_captcha() or get_text()
|
||||
method until the CAPTCHA is solved and you get the text.
|
||||
|
||||
decode(captcha, timeout=DEFAULT_TIMEOUT)
|
||||
A convenient method that uploads a CAPTCHA and polls for its status
|
||||
periodically, but no longer than `timeout` (defaults to 60 seconds).
|
||||
If solved, you'll get the CAPTCHA details dict (see get_captcha()
|
||||
method for details). See upload() method for details on `captcha`
|
||||
argument.
|
||||
|
||||
Visit http://www.deathbycaptcha.com/user/api for updates.
|
||||
|
||||
"""
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import errno
|
||||
import imghdr
|
||||
import random
|
||||
import os
|
||||
import select
|
||||
import socket
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import urllib
|
||||
import urllib2
|
||||
try:
|
||||
from json import read as json_decode, write as json_encode
|
||||
except ImportError:
|
||||
try:
|
||||
from json import loads as json_decode, dumps as json_encode
|
||||
except ImportError:
|
||||
from simplejson import loads as json_decode, dumps as json_encode
|
||||
|
||||
|
||||
# API version and unique software ID
|
||||
API_VERSION = 'DBC/Python v4.6'
|
||||
|
||||
# Default CAPTCHA timeout and decode() polling interval
|
||||
DEFAULT_TIMEOUT = 60
|
||||
DEFAULT_TOKEN_TIMEOUT = 120
|
||||
POLLS_INTERVAL = [1, 1, 2, 3, 2, 2, 3, 2, 2]
|
||||
DFLT_POLL_INTERVAL = 3
|
||||
|
||||
# Base HTTP API url
|
||||
HTTP_BASE_URL = 'http://api.dbcapi.me/api'
|
||||
|
||||
# Preferred HTTP API server's response content type, do not change
|
||||
HTTP_RESPONSE_TYPE = 'application/json'
|
||||
|
||||
# Socket API server's host & ports range
|
||||
SOCKET_HOST = 'api.dbcapi.me'
|
||||
SOCKET_PORTS = range(8123, 8131)
|
||||
|
||||
|
||||
def _load_image(captcha):
|
||||
if hasattr(captcha, 'read'):
|
||||
img = captcha.read()
|
||||
elif type(captcha) == bytearray:
|
||||
img = captcha
|
||||
else:
|
||||
img = ''
|
||||
try:
|
||||
captcha_file = open(captcha, 'rb')
|
||||
except Exception:
|
||||
raise
|
||||
else:
|
||||
img = captcha_file.read()
|
||||
captcha_file.close()
|
||||
if not len(img):
|
||||
raise ValueError('CAPTCHA image is empty')
|
||||
elif imghdr.what(None, img) is None:
|
||||
raise TypeError('Unknown CAPTCHA image type')
|
||||
else:
|
||||
return img
|
||||
|
||||
|
||||
class AccessDeniedException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Client(object):
|
||||
|
||||
"""Death by Captcha API Client."""
|
||||
|
||||
def __init__(self, username, password):
|
||||
self.is_verbose = False
|
||||
self.userpwd = {'username': username, 'password': password}
|
||||
|
||||
def _log(self, cmd, msg=''):
|
||||
if self.is_verbose:
|
||||
print '%d %s %s' % (time.time(), cmd, msg.rstrip())
|
||||
return self
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def connect(self):
|
||||
pass
|
||||
|
||||
def get_user(self):
|
||||
"""Fetch user details -- ID, balance, rate and banned status."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_balance(self):
|
||||
"""Fetch user balance (in US cents)."""
|
||||
return self.get_user().get('balance')
|
||||
|
||||
def get_captcha(self, cid):
|
||||
"""Fetch a CAPTCHA details -- ID, text and correctness flag."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_text(self, cid):
|
||||
"""Fetch a CAPTCHA text."""
|
||||
return self.get_captcha(cid).get('text') or None
|
||||
|
||||
def report(self, cid):
|
||||
"""Report a CAPTCHA as incorrectly solved."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def upload(self, captcha):
|
||||
"""Upload a CAPTCHA.
|
||||
|
||||
Accepts file names and file-like objects. Returns CAPTCHA details
|
||||
dict on success.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def decode(self, captcha=None, timeout=None, **kwargs):
|
||||
"""
|
||||
Try to solve a CAPTCHA.
|
||||
|
||||
See Client.upload() for arguments details.
|
||||
|
||||
Uploads a CAPTCHA, polls for its status periodically with arbitrary
|
||||
timeout (in seconds), returns CAPTCHA details if (correctly) solved.
|
||||
"""
|
||||
if not timeout:
|
||||
if not captcha:
|
||||
timeout = DEFAULT_TOKEN_TIMEOUT
|
||||
else:
|
||||
timeout = DEFAULT_TIMEOUT
|
||||
|
||||
deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT)
|
||||
uploaded_captcha = self.upload(captcha, **kwargs)
|
||||
if uploaded_captcha:
|
||||
intvl_idx = 0 # POLL_INTERVAL index
|
||||
while deadline > time.time() and not uploaded_captcha.get('text'):
|
||||
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
|
||||
time.sleep(intvl)
|
||||
pulled = self.get_captcha(uploaded_captcha['captcha'])
|
||||
if pulled['captcha'] == uploaded_captcha['captcha']:
|
||||
uploaded_captcha = pulled
|
||||
if uploaded_captcha.get('text') and \
|
||||
uploaded_captcha.get('is_correct'):
|
||||
return uploaded_captcha
|
||||
|
||||
def _get_poll_interval(self, idx):
|
||||
"""Returns poll interval and next index depending on index provided"""
|
||||
|
||||
if len(POLLS_INTERVAL) > idx:
|
||||
intvl = POLLS_INTERVAL[idx]
|
||||
else:
|
||||
intvl = DFLT_POLL_INTERVAL
|
||||
idx += 1
|
||||
|
||||
return intvl, idx
|
||||
|
||||
|
||||
class HttpClient(Client):
|
||||
|
||||
"""Death by Captcha HTTP API client."""
|
||||
|
||||
def __init__(self, *args):
|
||||
Client.__init__(self, *args)
|
||||
self.opener = urllib2.build_opener(urllib2.HTTPRedirectHandler())
|
||||
|
||||
def _call(self, cmd, payload=None, headers=None):
|
||||
if headers is None:
|
||||
headers = {}
|
||||
headers['Accept'] = HTTP_RESPONSE_TYPE
|
||||
headers['User-Agent'] = API_VERSION
|
||||
if hasattr(payload, 'items'):
|
||||
payload = urllib.urlencode(payload)
|
||||
self._log('SEND', '%s %d %s' % (cmd, len(payload), payload))
|
||||
else:
|
||||
self._log('SEND', '%s' % cmd)
|
||||
if payload is not None:
|
||||
headers['Content-Length'] = len(payload)
|
||||
try:
|
||||
response = self.opener.open(urllib2.Request(
|
||||
HTTP_BASE_URL + '/' + cmd.strip('/'),
|
||||
data=payload,
|
||||
headers=headers
|
||||
)).read()
|
||||
except urllib2.HTTPError, err:
|
||||
if 403 == err.code:
|
||||
raise AccessDeniedException('Access denied, please check'
|
||||
' your credentials and/or balance')
|
||||
elif 400 == err.code or 413 == err.code:
|
||||
raise ValueError("CAPTCHA was rejected by the service, check"
|
||||
" if it's a valid image")
|
||||
elif 503 == err.code:
|
||||
raise OverflowError("CAPTCHA was rejected due to service"
|
||||
" overload, try again later")
|
||||
else:
|
||||
raise err
|
||||
else:
|
||||
self._log('RECV', '%d %s' % (len(response), response))
|
||||
try:
|
||||
return json_decode(response)
|
||||
except Exception:
|
||||
raise RuntimeError('Invalid API response')
|
||||
return {}
|
||||
|
||||
def get_user(self):
|
||||
return self._call('user', self.userpwd.copy()) or {'user': 0}
|
||||
|
||||
def get_captcha(self, cid):
|
||||
return self._call('captcha/%d' % cid) or {'captcha': 0}
|
||||
|
||||
def report(self, cid):
|
||||
return not self._call('captcha/%d/report' % cid,
|
||||
self.userpwd.copy()).get('is_correct')
|
||||
|
||||
def upload(self, captcha=None, **kwargs):
|
||||
boundary = binascii.hexlify(os.urandom(16))
|
||||
banner = kwargs.get('banner', '')
|
||||
if banner:
|
||||
kwargs['banner'] = 'base64:' + base64.b64encode(_load_image(banner))
|
||||
body = '\r\n'.join(('\r\n'.join((
|
||||
'--%s' % boundary,
|
||||
'Content-Disposition: form-data; name="%s"' % k,
|
||||
'Content-Type: text/plain',
|
||||
'Content-Length: %d' % len(str(v)),
|
||||
'',
|
||||
str(v)
|
||||
))) for k, v in self.userpwd.items())
|
||||
|
||||
body += '\r\n'.join(('\r\n'.join((
|
||||
'--%s' % boundary,
|
||||
'Content-Disposition: form-data; name="%s"' % k,
|
||||
'Content-Type: text/plain',
|
||||
'Content-Length: %d' % len(str(v)),
|
||||
'',
|
||||
str(v)
|
||||
))) for k, v in kwargs.items())
|
||||
|
||||
if captcha:
|
||||
img = _load_image(captcha)
|
||||
body += '\r\n'.join((
|
||||
'',
|
||||
'--%s' % boundary,
|
||||
'Content-Disposition: form-data; name="captchafile"; '
|
||||
'filename="captcha"',
|
||||
'Content-Type: application/octet-stream',
|
||||
'Content-Length: %d' % len(img),
|
||||
'',
|
||||
img,
|
||||
'--%s--' % boundary,
|
||||
''
|
||||
))
|
||||
|
||||
response = self._call('captcha', body, {
|
||||
'Content-Type': 'multipart/form-data; boundary="%s"' % boundary
|
||||
}) or {}
|
||||
if response.get('captcha'):
|
||||
return response
|
||||
|
||||
|
||||
class SocketClient(Client):
|
||||
|
||||
"""Death by Captcha socket API client."""
|
||||
|
||||
TERMINATOR = '\r\n'
|
||||
|
||||
def __init__(self, *args):
|
||||
Client.__init__(self, *args)
|
||||
self.socket_lock = threading.Lock()
|
||||
self.socket = None
|
||||
|
||||
def close(self):
|
||||
if self.socket:
|
||||
self._log('CLOSE')
|
||||
try:
|
||||
self.socket.shutdown(socket.SHUT_RDWR)
|
||||
except socket.error:
|
||||
pass
|
||||
finally:
|
||||
self.socket.close()
|
||||
self.socket = None
|
||||
|
||||
def connect(self):
|
||||
if not self.socket:
|
||||
self._log('CONN')
|
||||
host = (socket.gethostbyname(SOCKET_HOST),
|
||||
random.choice(SOCKET_PORTS))
|
||||
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
self.socket.settimeout(0)
|
||||
try:
|
||||
self.socket.connect(host)
|
||||
except socket.error, err:
|
||||
if (err.args[0] not in
|
||||
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
|
||||
self.close()
|
||||
raise err
|
||||
return self.socket
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
def _sendrecv(self, sock, buf):
|
||||
self._log('SEND', buf)
|
||||
fds = [sock]
|
||||
buf += self.TERMINATOR
|
||||
response = ''
|
||||
intvl_idx = 0
|
||||
while True:
|
||||
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
|
||||
rds, wrs, exs = select.select((not buf and fds) or [],
|
||||
(buf and fds) or [],
|
||||
fds,
|
||||
intvl)
|
||||
if exs:
|
||||
raise IOError('select() failed')
|
||||
try:
|
||||
if wrs:
|
||||
while buf:
|
||||
buf = buf[wrs[0].send(buf):]
|
||||
elif rds:
|
||||
while True:
|
||||
s = rds[0].recv(256)
|
||||
if not s:
|
||||
raise IOError('recv(): connection lost')
|
||||
else:
|
||||
response += s
|
||||
except socket.error, err:
|
||||
if (err.args[0] not in
|
||||
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
|
||||
raise err
|
||||
if response.endswith(self.TERMINATOR):
|
||||
self._log('RECV', response)
|
||||
return response.rstrip(self.TERMINATOR)
|
||||
raise IOError('send/recv timed out')
|
||||
|
||||
def _call(self, cmd, data=None):
|
||||
if data is None:
|
||||
data = {}
|
||||
data['cmd'] = cmd
|
||||
data['version'] = API_VERSION
|
||||
request = json_encode(data)
|
||||
|
||||
response = None
|
||||
for _ in range(2):
|
||||
if not self.socket and cmd != 'login':
|
||||
self._call('login', self.userpwd.copy())
|
||||
self.socket_lock.acquire()
|
||||
try:
|
||||
sock = self.connect()
|
||||
response = self._sendrecv(sock, request)
|
||||
except IOError, err:
|
||||
sys.stderr.write(str(err) + "\n")
|
||||
self.close()
|
||||
except socket.error, err:
|
||||
sys.stderr.write(str(err) + "\n")
|
||||
self.close()
|
||||
raise IOError('Connection refused')
|
||||
else:
|
||||
break
|
||||
finally:
|
||||
self.socket_lock.release()
|
||||
|
||||
if response is None:
|
||||
raise IOError('Connection lost or timed out during API request')
|
||||
|
||||
try:
|
||||
response = json_decode(response)
|
||||
except Exception:
|
||||
raise RuntimeError('Invalid API response')
|
||||
|
||||
if not response.get('error'):
|
||||
return response
|
||||
|
||||
error = response['error']
|
||||
if error in ('not-logged-in', 'invalid-credentials'):
|
||||
raise AccessDeniedException('Access denied, check your credentials')
|
||||
elif 'banned' == error:
|
||||
raise AccessDeniedException('Access denied, account is suspended')
|
||||
elif 'insufficient-funds' == error:
|
||||
raise AccessDeniedException(
|
||||
'CAPTCHA was rejected due to low balance')
|
||||
elif 'invalid-captcha' == error:
|
||||
raise ValueError('CAPTCHA is not a valid image')
|
||||
elif 'service-overload' == error:
|
||||
raise OverflowError(
|
||||
'CAPTCHA was rejected due to service overload, try again later')
|
||||
else:
|
||||
self.socket_lock.acquire()
|
||||
self.close()
|
||||
self.socket_lock.release()
|
||||
raise RuntimeError('API server error occured: %s' % error)
|
||||
|
||||
def get_user(self):
|
||||
return self._call('user') or {'user': 0}
|
||||
|
||||
def get_captcha(self, cid):
|
||||
return self._call('captcha', {'captcha': cid}) or {'captcha': 0}
|
||||
|
||||
def upload(self, captcha=None, **kwargs):
|
||||
data = {}
|
||||
if captcha:
|
||||
data['captcha'] = base64.b64encode(_load_image(captcha))
|
||||
if kwargs:
|
||||
banner = kwargs.get('banner', '')
|
||||
if banner:
|
||||
kwargs['banner'] = base64.b64encode(_load_image(banner))
|
||||
data.update(kwargs)
|
||||
response = self._call('upload', data)
|
||||
if response.get('captcha'):
|
||||
uploaded_captcha = dict(
|
||||
(k, response.get(k))
|
||||
for k in ('captcha', 'text', 'is_correct')
|
||||
)
|
||||
if not uploaded_captcha['text']:
|
||||
uploaded_captcha['text'] = None
|
||||
return uploaded_captcha
|
||||
|
||||
def report(self, cid):
|
||||
return not self._call('report', {'captcha': cid}).get('is_correct')
|
||||
|
||||
|
||||
if '__main__' == __name__:
|
||||
# Put your DBC username & password here:
|
||||
# client = HttpClient(sys.argv[1], sys.argv[2])
|
||||
client = SocketClient(sys.argv[1], sys.argv[2])
|
||||
client.is_verbose = True
|
||||
|
||||
print 'Your balance is %s US cents' % client.get_balance()
|
||||
|
||||
for fn in sys.argv[3:]:
|
||||
try:
|
||||
# Put your CAPTCHA image file name or file-like object, and optional
|
||||
# solving timeout (in seconds) here:
|
||||
captcha = client.decode(fn, DEFAULT_TIMEOUT)
|
||||
except Exception, e:
|
||||
sys.stderr.write('Failed uploading CAPTCHA: %s\n' % (e, ))
|
||||
captcha = None
|
||||
|
||||
if captcha:
|
||||
print 'CAPTCHA %d solved: %s' % \
|
||||
(captcha['captcha'], captcha['text'])
|
||||
|
||||
# Report as incorrectly solved if needed. Make sure the CAPTCHA was
|
||||
# in fact incorrectly solved!
|
||||
# try:
|
||||
# client.report(captcha['captcha'])
|
||||
# except Exception, e:
|
||||
# sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))
|
||||
@@ -0,0 +1,54 @@
|
||||
# Copyright (C) 2003-2007, 2009, 2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""dnspython DNS toolkit"""
|
||||
|
||||
__all__ = [
|
||||
'dnssec',
|
||||
'e164',
|
||||
'edns',
|
||||
'entropy',
|
||||
'exception',
|
||||
'flags',
|
||||
'hash',
|
||||
'inet',
|
||||
'ipv4',
|
||||
'ipv6',
|
||||
'message',
|
||||
'name',
|
||||
'namedict',
|
||||
'node',
|
||||
'opcode',
|
||||
'query',
|
||||
'rcode',
|
||||
'rdata',
|
||||
'rdataclass',
|
||||
'rdataset',
|
||||
'rdatatype',
|
||||
'renderer',
|
||||
'resolver',
|
||||
'reversename',
|
||||
'rrset',
|
||||
'set',
|
||||
'tokenizer',
|
||||
'tsig',
|
||||
'tsigkeyring',
|
||||
'ttl',
|
||||
'rdtypes',
|
||||
'update',
|
||||
'version',
|
||||
'wiredata',
|
||||
'zone',
|
||||
]
|
||||
@@ -0,0 +1,47 @@
|
||||
import sys
|
||||
import decimal
|
||||
from decimal import Context
|
||||
|
||||
if sys.version_info > (3,):
|
||||
long = int
|
||||
xrange = range
|
||||
else:
|
||||
long = long # pylint: disable=long-builtin
|
||||
xrange = xrange # pylint: disable=xrange-builtin
|
||||
|
||||
# unicode / binary types
|
||||
if sys.version_info > (3,):
|
||||
text_type = str
|
||||
binary_type = bytes
|
||||
string_types = (str,)
|
||||
unichr = chr
|
||||
def maybe_decode(x):
|
||||
return x.decode()
|
||||
def maybe_encode(x):
|
||||
return x.encode()
|
||||
else:
|
||||
text_type = unicode # pylint: disable=unicode-builtin, undefined-variable
|
||||
binary_type = str
|
||||
string_types = (
|
||||
basestring, # pylint: disable=basestring-builtin, undefined-variable
|
||||
)
|
||||
unichr = unichr # pylint: disable=unichr-builtin
|
||||
def maybe_decode(x):
|
||||
return x
|
||||
def maybe_encode(x):
|
||||
return x
|
||||
|
||||
|
||||
def round_py2_compat(what):
|
||||
"""
|
||||
Python 2 and Python 3 use different rounding strategies in round(). This
|
||||
function ensures that results are python2/3 compatible and backward
|
||||
compatible with previous py2 releases
|
||||
:param what: float
|
||||
:return: rounded long
|
||||
"""
|
||||
d = Context(
|
||||
prec=len(str(long(what))), # round to integer with max precision
|
||||
rounding=decimal.ROUND_HALF_UP
|
||||
).create_decimal(str(what)) # str(): python 2.6 compat
|
||||
return long(d)
|
||||
@@ -0,0 +1,457 @@
|
||||
# Copyright (C) 2003-2007, 2009, 2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""Common DNSSEC-related functions and constants."""
|
||||
|
||||
from io import BytesIO
|
||||
import struct
|
||||
import time
|
||||
|
||||
import dns.exception
|
||||
import dns.hash
|
||||
import dns.name
|
||||
import dns.node
|
||||
import dns.rdataset
|
||||
import dns.rdata
|
||||
import dns.rdatatype
|
||||
import dns.rdataclass
|
||||
from ._compat import string_types
|
||||
|
||||
|
||||
class UnsupportedAlgorithm(dns.exception.DNSException):
|
||||
|
||||
"""The DNSSEC algorithm is not supported."""
|
||||
|
||||
|
||||
class ValidationFailure(dns.exception.DNSException):
|
||||
|
||||
"""The DNSSEC signature is invalid."""
|
||||
|
||||
RSAMD5 = 1
|
||||
DH = 2
|
||||
DSA = 3
|
||||
ECC = 4
|
||||
RSASHA1 = 5
|
||||
DSANSEC3SHA1 = 6
|
||||
RSASHA1NSEC3SHA1 = 7
|
||||
RSASHA256 = 8
|
||||
RSASHA512 = 10
|
||||
ECDSAP256SHA256 = 13
|
||||
ECDSAP384SHA384 = 14
|
||||
INDIRECT = 252
|
||||
PRIVATEDNS = 253
|
||||
PRIVATEOID = 254
|
||||
|
||||
_algorithm_by_text = {
|
||||
'RSAMD5': RSAMD5,
|
||||
'DH': DH,
|
||||
'DSA': DSA,
|
||||
'ECC': ECC,
|
||||
'RSASHA1': RSASHA1,
|
||||
'DSANSEC3SHA1': DSANSEC3SHA1,
|
||||
'RSASHA1NSEC3SHA1': RSASHA1NSEC3SHA1,
|
||||
'RSASHA256': RSASHA256,
|
||||
'RSASHA512': RSASHA512,
|
||||
'INDIRECT': INDIRECT,
|
||||
'ECDSAP256SHA256': ECDSAP256SHA256,
|
||||
'ECDSAP384SHA384': ECDSAP384SHA384,
|
||||
'PRIVATEDNS': PRIVATEDNS,
|
||||
'PRIVATEOID': PRIVATEOID,
|
||||
}
|
||||
|
||||
# We construct the inverse mapping programmatically to ensure that we
|
||||
# cannot make any mistakes (e.g. omissions, cut-and-paste errors) that
|
||||
# would cause the mapping not to be true inverse.
|
||||
|
||||
_algorithm_by_value = dict((y, x) for x, y in _algorithm_by_text.items())
|
||||
|
||||
|
||||
def algorithm_from_text(text):
|
||||
"""Convert text into a DNSSEC algorithm value
|
||||
@rtype: int"""
|
||||
|
||||
value = _algorithm_by_text.get(text.upper())
|
||||
if value is None:
|
||||
value = int(text)
|
||||
return value
|
||||
|
||||
|
||||
def algorithm_to_text(value):
|
||||
"""Convert a DNSSEC algorithm value to text
|
||||
@rtype: string"""
|
||||
|
||||
text = _algorithm_by_value.get(value)
|
||||
if text is None:
|
||||
text = str(value)
|
||||
return text
|
||||
|
||||
|
||||
def _to_rdata(record, origin):
|
||||
s = BytesIO()
|
||||
record.to_wire(s, origin=origin)
|
||||
return s.getvalue()
|
||||
|
||||
|
||||
def key_id(key, origin=None):
|
||||
rdata = _to_rdata(key, origin)
|
||||
rdata = bytearray(rdata)
|
||||
if key.algorithm == RSAMD5:
|
||||
return (rdata[-3] << 8) + rdata[-2]
|
||||
else:
|
||||
total = 0
|
||||
for i in range(len(rdata) // 2):
|
||||
total += (rdata[2 * i] << 8) + \
|
||||
rdata[2 * i + 1]
|
||||
if len(rdata) % 2 != 0:
|
||||
total += rdata[len(rdata) - 1] << 8
|
||||
total += ((total >> 16) & 0xffff)
|
||||
return total & 0xffff
|
||||
|
||||
|
||||
def make_ds(name, key, algorithm, origin=None):
|
||||
if algorithm.upper() == 'SHA1':
|
||||
dsalg = 1
|
||||
hash = dns.hash.hashes['SHA1']()
|
||||
elif algorithm.upper() == 'SHA256':
|
||||
dsalg = 2
|
||||
hash = dns.hash.hashes['SHA256']()
|
||||
else:
|
||||
raise UnsupportedAlgorithm('unsupported algorithm "%s"' % algorithm)
|
||||
|
||||
if isinstance(name, string_types):
|
||||
name = dns.name.from_text(name, origin)
|
||||
hash.update(name.canonicalize().to_wire())
|
||||
hash.update(_to_rdata(key, origin))
|
||||
digest = hash.digest()
|
||||
|
||||
dsrdata = struct.pack("!HBB", key_id(key), key.algorithm, dsalg) + digest
|
||||
return dns.rdata.from_wire(dns.rdataclass.IN, dns.rdatatype.DS, dsrdata, 0,
|
||||
len(dsrdata))
|
||||
|
||||
|
||||
def _find_candidate_keys(keys, rrsig):
|
||||
candidate_keys = []
|
||||
value = keys.get(rrsig.signer)
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, dns.node.Node):
|
||||
try:
|
||||
rdataset = value.find_rdataset(dns.rdataclass.IN,
|
||||
dns.rdatatype.DNSKEY)
|
||||
except KeyError:
|
||||
return None
|
||||
else:
|
||||
rdataset = value
|
||||
for rdata in rdataset:
|
||||
if rdata.algorithm == rrsig.algorithm and \
|
||||
key_id(rdata) == rrsig.key_tag:
|
||||
candidate_keys.append(rdata)
|
||||
return candidate_keys
|
||||
|
||||
|
||||
def _is_rsa(algorithm):
|
||||
return algorithm in (RSAMD5, RSASHA1,
|
||||
RSASHA1NSEC3SHA1, RSASHA256,
|
||||
RSASHA512)
|
||||
|
||||
|
||||
def _is_dsa(algorithm):
|
||||
return algorithm in (DSA, DSANSEC3SHA1)
|
||||
|
||||
|
||||
def _is_ecdsa(algorithm):
|
||||
return _have_ecdsa and (algorithm in (ECDSAP256SHA256, ECDSAP384SHA384))
|
||||
|
||||
|
||||
def _is_md5(algorithm):
|
||||
return algorithm == RSAMD5
|
||||
|
||||
|
||||
def _is_sha1(algorithm):
|
||||
return algorithm in (DSA, RSASHA1,
|
||||
DSANSEC3SHA1, RSASHA1NSEC3SHA1)
|
||||
|
||||
|
||||
def _is_sha256(algorithm):
|
||||
return algorithm in (RSASHA256, ECDSAP256SHA256)
|
||||
|
||||
|
||||
def _is_sha384(algorithm):
|
||||
return algorithm == ECDSAP384SHA384
|
||||
|
||||
|
||||
def _is_sha512(algorithm):
|
||||
return algorithm == RSASHA512
|
||||
|
||||
|
||||
def _make_hash(algorithm):
|
||||
if _is_md5(algorithm):
|
||||
return dns.hash.hashes['MD5']()
|
||||
if _is_sha1(algorithm):
|
||||
return dns.hash.hashes['SHA1']()
|
||||
if _is_sha256(algorithm):
|
||||
return dns.hash.hashes['SHA256']()
|
||||
if _is_sha384(algorithm):
|
||||
return dns.hash.hashes['SHA384']()
|
||||
if _is_sha512(algorithm):
|
||||
return dns.hash.hashes['SHA512']()
|
||||
raise ValidationFailure('unknown hash for algorithm %u' % algorithm)
|
||||
|
||||
|
||||
def _make_algorithm_id(algorithm):
|
||||
if _is_md5(algorithm):
|
||||
oid = [0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x05]
|
||||
elif _is_sha1(algorithm):
|
||||
oid = [0x2b, 0x0e, 0x03, 0x02, 0x1a]
|
||||
elif _is_sha256(algorithm):
|
||||
oid = [0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01]
|
||||
elif _is_sha512(algorithm):
|
||||
oid = [0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03]
|
||||
else:
|
||||
raise ValidationFailure('unknown algorithm %u' % algorithm)
|
||||
olen = len(oid)
|
||||
dlen = _make_hash(algorithm).digest_size
|
||||
idbytes = [0x30] + [8 + olen + dlen] + \
|
||||
[0x30, olen + 4] + [0x06, olen] + oid + \
|
||||
[0x05, 0x00] + [0x04, dlen]
|
||||
return struct.pack('!%dB' % len(idbytes), *idbytes)
|
||||
|
||||
|
||||
def _validate_rrsig(rrset, rrsig, keys, origin=None, now=None):
|
||||
"""Validate an RRset against a single signature rdata
|
||||
|
||||
The owner name of the rrsig is assumed to be the same as the owner name
|
||||
of the rrset.
|
||||
|
||||
@param rrset: The RRset to validate
|
||||
@type rrset: dns.rrset.RRset or (dns.name.Name, dns.rdataset.Rdataset)
|
||||
tuple
|
||||
@param rrsig: The signature rdata
|
||||
@type rrsig: dns.rrset.Rdata
|
||||
@param keys: The key dictionary.
|
||||
@type keys: a dictionary keyed by dns.name.Name with node or rdataset
|
||||
values
|
||||
@param origin: The origin to use for relative names
|
||||
@type origin: dns.name.Name or None
|
||||
@param now: The time to use when validating the signatures. The default
|
||||
is the current time.
|
||||
@type now: int
|
||||
"""
|
||||
|
||||
if isinstance(origin, string_types):
|
||||
origin = dns.name.from_text(origin, dns.name.root)
|
||||
|
||||
for candidate_key in _find_candidate_keys(keys, rrsig):
|
||||
if not candidate_key:
|
||||
raise ValidationFailure('unknown key')
|
||||
|
||||
# For convenience, allow the rrset to be specified as a (name,
|
||||
# rdataset) tuple as well as a proper rrset
|
||||
if isinstance(rrset, tuple):
|
||||
rrname = rrset[0]
|
||||
rdataset = rrset[1]
|
||||
else:
|
||||
rrname = rrset.name
|
||||
rdataset = rrset
|
||||
|
||||
if now is None:
|
||||
now = time.time()
|
||||
if rrsig.expiration < now:
|
||||
raise ValidationFailure('expired')
|
||||
if rrsig.inception > now:
|
||||
raise ValidationFailure('not yet valid')
|
||||
|
||||
hash = _make_hash(rrsig.algorithm)
|
||||
|
||||
if _is_rsa(rrsig.algorithm):
|
||||
keyptr = candidate_key.key
|
||||
(bytes_,) = struct.unpack('!B', keyptr[0:1])
|
||||
keyptr = keyptr[1:]
|
||||
if bytes_ == 0:
|
||||
(bytes_,) = struct.unpack('!H', keyptr[0:2])
|
||||
keyptr = keyptr[2:]
|
||||
rsa_e = keyptr[0:bytes_]
|
||||
rsa_n = keyptr[bytes_:]
|
||||
keylen = len(rsa_n) * 8
|
||||
pubkey = Crypto.PublicKey.RSA.construct(
|
||||
(Crypto.Util.number.bytes_to_long(rsa_n),
|
||||
Crypto.Util.number.bytes_to_long(rsa_e)))
|
||||
sig = (Crypto.Util.number.bytes_to_long(rrsig.signature),)
|
||||
elif _is_dsa(rrsig.algorithm):
|
||||
keyptr = candidate_key.key
|
||||
(t,) = struct.unpack('!B', keyptr[0:1])
|
||||
keyptr = keyptr[1:]
|
||||
octets = 64 + t * 8
|
||||
dsa_q = keyptr[0:20]
|
||||
keyptr = keyptr[20:]
|
||||
dsa_p = keyptr[0:octets]
|
||||
keyptr = keyptr[octets:]
|
||||
dsa_g = keyptr[0:octets]
|
||||
keyptr = keyptr[octets:]
|
||||
dsa_y = keyptr[0:octets]
|
||||
pubkey = Crypto.PublicKey.DSA.construct(
|
||||
(Crypto.Util.number.bytes_to_long(dsa_y),
|
||||
Crypto.Util.number.bytes_to_long(dsa_g),
|
||||
Crypto.Util.number.bytes_to_long(dsa_p),
|
||||
Crypto.Util.number.bytes_to_long(dsa_q)))
|
||||
(dsa_r, dsa_s) = struct.unpack('!20s20s', rrsig.signature[1:])
|
||||
sig = (Crypto.Util.number.bytes_to_long(dsa_r),
|
||||
Crypto.Util.number.bytes_to_long(dsa_s))
|
||||
elif _is_ecdsa(rrsig.algorithm):
|
||||
if rrsig.algorithm == ECDSAP256SHA256:
|
||||
curve = ecdsa.curves.NIST256p
|
||||
key_len = 32
|
||||
elif rrsig.algorithm == ECDSAP384SHA384:
|
||||
curve = ecdsa.curves.NIST384p
|
||||
key_len = 48
|
||||
else:
|
||||
# shouldn't happen
|
||||
raise ValidationFailure('unknown ECDSA curve')
|
||||
keyptr = candidate_key.key
|
||||
x = Crypto.Util.number.bytes_to_long(keyptr[0:key_len])
|
||||
y = Crypto.Util.number.bytes_to_long(keyptr[key_len:key_len * 2])
|
||||
assert ecdsa.ecdsa.point_is_valid(curve.generator, x, y)
|
||||
point = ecdsa.ellipticcurve.Point(curve.curve, x, y, curve.order)
|
||||
verifying_key = ecdsa.keys.VerifyingKey.from_public_point(point,
|
||||
curve)
|
||||
pubkey = ECKeyWrapper(verifying_key, key_len)
|
||||
r = rrsig.signature[:key_len]
|
||||
s = rrsig.signature[key_len:]
|
||||
sig = ecdsa.ecdsa.Signature(Crypto.Util.number.bytes_to_long(r),
|
||||
Crypto.Util.number.bytes_to_long(s))
|
||||
else:
|
||||
raise ValidationFailure('unknown algorithm %u' % rrsig.algorithm)
|
||||
|
||||
hash.update(_to_rdata(rrsig, origin)[:18])
|
||||
hash.update(rrsig.signer.to_digestable(origin))
|
||||
|
||||
if rrsig.labels < len(rrname) - 1:
|
||||
suffix = rrname.split(rrsig.labels + 1)[1]
|
||||
rrname = dns.name.from_text('*', suffix)
|
||||
rrnamebuf = rrname.to_digestable(origin)
|
||||
rrfixed = struct.pack('!HHI', rdataset.rdtype, rdataset.rdclass,
|
||||
rrsig.original_ttl)
|
||||
rrlist = sorted(rdataset)
|
||||
for rr in rrlist:
|
||||
hash.update(rrnamebuf)
|
||||
hash.update(rrfixed)
|
||||
rrdata = rr.to_digestable(origin)
|
||||
rrlen = struct.pack('!H', len(rrdata))
|
||||
hash.update(rrlen)
|
||||
hash.update(rrdata)
|
||||
|
||||
digest = hash.digest()
|
||||
|
||||
if _is_rsa(rrsig.algorithm):
|
||||
# PKCS1 algorithm identifier goop
|
||||
digest = _make_algorithm_id(rrsig.algorithm) + digest
|
||||
padlen = keylen // 8 - len(digest) - 3
|
||||
digest = struct.pack('!%dB' % (2 + padlen + 1),
|
||||
*([0, 1] + [0xFF] * padlen + [0])) + digest
|
||||
elif _is_dsa(rrsig.algorithm) or _is_ecdsa(rrsig.algorithm):
|
||||
pass
|
||||
else:
|
||||
# Raise here for code clarity; this won't actually ever happen
|
||||
# since if the algorithm is really unknown we'd already have
|
||||
# raised an exception above
|
||||
raise ValidationFailure('unknown algorithm %u' % rrsig.algorithm)
|
||||
|
||||
if pubkey.verify(digest, sig):
|
||||
return
|
||||
raise ValidationFailure('verify failure')
|
||||
|
||||
|
||||
def _validate(rrset, rrsigset, keys, origin=None, now=None):
|
||||
"""Validate an RRset
|
||||
|
||||
@param rrset: The RRset to validate
|
||||
@type rrset: dns.rrset.RRset or (dns.name.Name, dns.rdataset.Rdataset)
|
||||
tuple
|
||||
@param rrsigset: The signature RRset
|
||||
@type rrsigset: dns.rrset.RRset or (dns.name.Name, dns.rdataset.Rdataset)
|
||||
tuple
|
||||
@param keys: The key dictionary.
|
||||
@type keys: a dictionary keyed by dns.name.Name with node or rdataset
|
||||
values
|
||||
@param origin: The origin to use for relative names
|
||||
@type origin: dns.name.Name or None
|
||||
@param now: The time to use when validating the signatures. The default
|
||||
is the current time.
|
||||
@type now: int
|
||||
"""
|
||||
|
||||
if isinstance(origin, string_types):
|
||||
origin = dns.name.from_text(origin, dns.name.root)
|
||||
|
||||
if isinstance(rrset, tuple):
|
||||
rrname = rrset[0]
|
||||
else:
|
||||
rrname = rrset.name
|
||||
|
||||
if isinstance(rrsigset, tuple):
|
||||
rrsigname = rrsigset[0]
|
||||
rrsigrdataset = rrsigset[1]
|
||||
else:
|
||||
rrsigname = rrsigset.name
|
||||
rrsigrdataset = rrsigset
|
||||
|
||||
rrname = rrname.choose_relativity(origin)
|
||||
rrsigname = rrname.choose_relativity(origin)
|
||||
if rrname != rrsigname:
|
||||
raise ValidationFailure("owner names do not match")
|
||||
|
||||
for rrsig in rrsigrdataset:
|
||||
try:
|
||||
_validate_rrsig(rrset, rrsig, keys, origin, now)
|
||||
return
|
||||
except ValidationFailure:
|
||||
pass
|
||||
raise ValidationFailure("no RRSIGs validated")
|
||||
|
||||
|
||||
def _need_pycrypto(*args, **kwargs):
|
||||
raise NotImplementedError("DNSSEC validation requires pycrypto")
|
||||
|
||||
try:
|
||||
import Crypto.PublicKey.RSA
|
||||
import Crypto.PublicKey.DSA
|
||||
import Crypto.Util.number
|
||||
validate = _validate
|
||||
validate_rrsig = _validate_rrsig
|
||||
_have_pycrypto = True
|
||||
except ImportError:
|
||||
validate = _need_pycrypto
|
||||
validate_rrsig = _need_pycrypto
|
||||
_have_pycrypto = False
|
||||
|
||||
try:
|
||||
import ecdsa
|
||||
import ecdsa.ecdsa
|
||||
import ecdsa.ellipticcurve
|
||||
import ecdsa.keys
|
||||
_have_ecdsa = True
|
||||
|
||||
class ECKeyWrapper(object):
|
||||
|
||||
def __init__(self, key, key_len):
|
||||
self.key = key
|
||||
self.key_len = key_len
|
||||
|
||||
def verify(self, digest, sig):
|
||||
diglong = Crypto.Util.number.bytes_to_long(digest)
|
||||
return self.key.pubkey.verifies(diglong, sig)
|
||||
|
||||
except ImportError:
|
||||
_have_ecdsa = False
|
||||
@@ -0,0 +1,85 @@
|
||||
# Copyright (C) 2006, 2007, 2009, 2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""DNS E.164 helpers
|
||||
|
||||
@var public_enum_domain: The DNS public ENUM domain, e164.arpa.
|
||||
@type public_enum_domain: dns.name.Name object
|
||||
"""
|
||||
|
||||
|
||||
import dns.exception
|
||||
import dns.name
|
||||
import dns.resolver
|
||||
from ._compat import string_types
|
||||
|
||||
public_enum_domain = dns.name.from_text('e164.arpa.')
|
||||
|
||||
|
||||
def from_e164(text, origin=public_enum_domain):
|
||||
"""Convert an E.164 number in textual form into a Name object whose
|
||||
value is the ENUM domain name for that number.
|
||||
@param text: an E.164 number in textual form.
|
||||
@type text: str
|
||||
@param origin: The domain in which the number should be constructed.
|
||||
The default is e164.arpa.
|
||||
@type origin: dns.name.Name object or None
|
||||
@rtype: dns.name.Name object
|
||||
"""
|
||||
parts = [d for d in text if d.isdigit()]
|
||||
parts.reverse()
|
||||
return dns.name.from_text('.'.join(parts), origin=origin)
|
||||
|
||||
|
||||
def to_e164(name, origin=public_enum_domain, want_plus_prefix=True):
|
||||
"""Convert an ENUM domain name into an E.164 number.
|
||||
@param name: the ENUM domain name.
|
||||
@type name: dns.name.Name object.
|
||||
@param origin: A domain containing the ENUM domain name. The
|
||||
name is relativized to this domain before being converted to text.
|
||||
@type origin: dns.name.Name object or None
|
||||
@param want_plus_prefix: if True, add a '+' to the beginning of the
|
||||
returned number.
|
||||
@rtype: str
|
||||
"""
|
||||
if origin is not None:
|
||||
name = name.relativize(origin)
|
||||
dlabels = [d for d in name.labels if d.isdigit() and len(d) == 1]
|
||||
if len(dlabels) != len(name.labels):
|
||||
raise dns.exception.SyntaxError('non-digit labels in ENUM domain name')
|
||||
dlabels.reverse()
|
||||
text = b''.join(dlabels)
|
||||
if want_plus_prefix:
|
||||
text = b'+' + text
|
||||
return text
|
||||
|
||||
|
||||
def query(number, domains, resolver=None):
|
||||
"""Look for NAPTR RRs for the specified number in the specified domains.
|
||||
|
||||
e.g. lookup('16505551212', ['e164.dnspython.org.', 'e164.arpa.'])
|
||||
"""
|
||||
if resolver is None:
|
||||
resolver = dns.resolver.get_default_resolver()
|
||||
e_nx = dns.resolver.NXDOMAIN()
|
||||
for domain in domains:
|
||||
if isinstance(domain, string_types):
|
||||
domain = dns.name.from_text(domain)
|
||||
qname = dns.e164.from_e164(number, domain)
|
||||
try:
|
||||
return resolver.query(qname, 'NAPTR')
|
||||
except dns.resolver.NXDOMAIN as e:
|
||||
e_nx += e
|
||||
raise e_nx
|
||||
@@ -0,0 +1,150 @@
|
||||
# Copyright (C) 2009, 2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""EDNS Options"""
|
||||
|
||||
NSID = 3
|
||||
|
||||
|
||||
class Option(object):
|
||||
|
||||
"""Base class for all EDNS option types.
|
||||
"""
|
||||
|
||||
def __init__(self, otype):
|
||||
"""Initialize an option.
|
||||
@param otype: The rdata type
|
||||
@type otype: int
|
||||
"""
|
||||
self.otype = otype
|
||||
|
||||
def to_wire(self, file):
|
||||
"""Convert an option to wire format.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def from_wire(cls, otype, wire, current, olen):
|
||||
"""Build an EDNS option object from wire format
|
||||
|
||||
@param otype: The option type
|
||||
@type otype: int
|
||||
@param wire: The wire-format message
|
||||
@type wire: string
|
||||
@param current: The offset in wire of the beginning of the rdata.
|
||||
@type current: int
|
||||
@param olen: The length of the wire-format option data
|
||||
@type olen: int
|
||||
@rtype: dns.edns.Option instance"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _cmp(self, other):
|
||||
"""Compare an EDNS option with another option of the same type.
|
||||
Return < 0 if self < other, 0 if self == other,
|
||||
and > 0 if self > other.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, Option):
|
||||
return False
|
||||
if self.otype != other.otype:
|
||||
return False
|
||||
return self._cmp(other) == 0
|
||||
|
||||
def __ne__(self, other):
|
||||
if not isinstance(other, Option):
|
||||
return False
|
||||
if self.otype != other.otype:
|
||||
return False
|
||||
return self._cmp(other) != 0
|
||||
|
||||
def __lt__(self, other):
|
||||
if not isinstance(other, Option) or \
|
||||
self.otype != other.otype:
|
||||
return NotImplemented
|
||||
return self._cmp(other) < 0
|
||||
|
||||
def __le__(self, other):
|
||||
if not isinstance(other, Option) or \
|
||||
self.otype != other.otype:
|
||||
return NotImplemented
|
||||
return self._cmp(other) <= 0
|
||||
|
||||
def __ge__(self, other):
|
||||
if not isinstance(other, Option) or \
|
||||
self.otype != other.otype:
|
||||
return NotImplemented
|
||||
return self._cmp(other) >= 0
|
||||
|
||||
def __gt__(self, other):
|
||||
if not isinstance(other, Option) or \
|
||||
self.otype != other.otype:
|
||||
return NotImplemented
|
||||
return self._cmp(other) > 0
|
||||
|
||||
|
||||
class GenericOption(Option):
|
||||
|
||||
"""Generate Rdata Class
|
||||
|
||||
This class is used for EDNS option types for which we have no better
|
||||
implementation.
|
||||
"""
|
||||
|
||||
def __init__(self, otype, data):
|
||||
super(GenericOption, self).__init__(otype)
|
||||
self.data = data
|
||||
|
||||
def to_wire(self, file):
|
||||
file.write(self.data)
|
||||
|
||||
@classmethod
|
||||
def from_wire(cls, otype, wire, current, olen):
|
||||
return cls(otype, wire[current: current + olen])
|
||||
|
||||
def _cmp(self, other):
|
||||
if self.data == other.data:
|
||||
return 0
|
||||
if self.data > other.data:
|
||||
return 1
|
||||
return -1
|
||||
|
||||
_type_to_class = {
|
||||
}
|
||||
|
||||
|
||||
def get_option_class(otype):
|
||||
cls = _type_to_class.get(otype)
|
||||
if cls is None:
|
||||
cls = GenericOption
|
||||
return cls
|
||||
|
||||
|
||||
def option_from_wire(otype, wire, current, olen):
|
||||
"""Build an EDNS option object from wire format
|
||||
|
||||
@param otype: The option type
|
||||
@type otype: int
|
||||
@param wire: The wire-format message
|
||||
@type wire: string
|
||||
@param current: The offset in wire of the beginning of the rdata.
|
||||
@type current: int
|
||||
@param olen: The length of the wire-format option data
|
||||
@type olen: int
|
||||
@rtype: dns.edns.Option instance"""
|
||||
|
||||
cls = get_option_class(otype)
|
||||
return cls.from_wire(otype, wire, current, olen)
|
||||
@@ -0,0 +1,141 @@
|
||||
# Copyright (C) 2009, 2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
from ._compat import long, binary_type
|
||||
try:
|
||||
import threading as _threading
|
||||
except ImportError:
|
||||
import dummy_threading as _threading
|
||||
|
||||
|
||||
class EntropyPool(object):
|
||||
|
||||
def __init__(self, seed=None):
|
||||
self.pool_index = 0
|
||||
self.digest = None
|
||||
self.next_byte = 0
|
||||
self.lock = _threading.Lock()
|
||||
try:
|
||||
import hashlib
|
||||
self.hash = hashlib.sha1()
|
||||
self.hash_len = 20
|
||||
except ImportError:
|
||||
try:
|
||||
import sha
|
||||
self.hash = sha.new()
|
||||
self.hash_len = 20
|
||||
except ImportError:
|
||||
import md5 # pylint: disable=import-error
|
||||
self.hash = md5.new()
|
||||
self.hash_len = 16
|
||||
self.pool = bytearray(b'\0' * self.hash_len)
|
||||
if seed is not None:
|
||||
self.stir(bytearray(seed))
|
||||
self.seeded = True
|
||||
self.seed_pid = os.getpid()
|
||||
else:
|
||||
self.seeded = False
|
||||
self.seed_pid = 0
|
||||
|
||||
def stir(self, entropy, already_locked=False):
|
||||
if not already_locked:
|
||||
self.lock.acquire()
|
||||
try:
|
||||
for c in entropy:
|
||||
if self.pool_index == self.hash_len:
|
||||
self.pool_index = 0
|
||||
b = c & 0xff
|
||||
self.pool[self.pool_index] ^= b
|
||||
self.pool_index += 1
|
||||
finally:
|
||||
if not already_locked:
|
||||
self.lock.release()
|
||||
|
||||
def _maybe_seed(self):
|
||||
if not self.seeded or self.seed_pid != os.getpid():
|
||||
try:
|
||||
seed = os.urandom(16)
|
||||
except Exception:
|
||||
try:
|
||||
r = open('/dev/urandom', 'rb', 0)
|
||||
try:
|
||||
seed = r.read(16)
|
||||
finally:
|
||||
r.close()
|
||||
except Exception:
|
||||
seed = str(time.time())
|
||||
self.seeded = True
|
||||
self.seed_pid = os.getpid()
|
||||
self.digest = None
|
||||
seed = bytearray(seed)
|
||||
self.stir(seed, True)
|
||||
|
||||
def random_8(self):
|
||||
self.lock.acquire()
|
||||
try:
|
||||
self._maybe_seed()
|
||||
if self.digest is None or self.next_byte == self.hash_len:
|
||||
self.hash.update(binary_type(self.pool))
|
||||
self.digest = bytearray(self.hash.digest())
|
||||
self.stir(self.digest, True)
|
||||
self.next_byte = 0
|
||||
value = self.digest[self.next_byte]
|
||||
self.next_byte += 1
|
||||
finally:
|
||||
self.lock.release()
|
||||
return value
|
||||
|
||||
def random_16(self):
|
||||
return self.random_8() * 256 + self.random_8()
|
||||
|
||||
def random_32(self):
|
||||
return self.random_16() * 65536 + self.random_16()
|
||||
|
||||
def random_between(self, first, last):
|
||||
size = last - first + 1
|
||||
if size > long(4294967296):
|
||||
raise ValueError('too big')
|
||||
if size > 65536:
|
||||
rand = self.random_32
|
||||
max = long(4294967295)
|
||||
elif size > 256:
|
||||
rand = self.random_16
|
||||
max = 65535
|
||||
else:
|
||||
rand = self.random_8
|
||||
max = 255
|
||||
return first + size * rand() // (max + 1)
|
||||
|
||||
pool = EntropyPool()
|
||||
|
||||
try:
|
||||
system_random = random.SystemRandom()
|
||||
except Exception:
|
||||
system_random = None
|
||||
|
||||
def random_16():
|
||||
if system_random is not None:
|
||||
return system_random.randrange(0, 65536)
|
||||
else:
|
||||
return pool.random_16()
|
||||
|
||||
def between(first, last):
|
||||
if system_random is not None:
|
||||
return system_random.randrange(first, last + 1)
|
||||
else:
|
||||
return pool.random_between(first, last)
|
||||
@@ -0,0 +1,128 @@
|
||||
# Copyright (C) 2003-2007, 2009-2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""Common DNS Exceptions."""
|
||||
|
||||
|
||||
class DNSException(Exception):
|
||||
|
||||
"""Abstract base class shared by all dnspython exceptions.
|
||||
|
||||
It supports two basic modes of operation:
|
||||
|
||||
a) Old/compatible mode is used if __init__ was called with
|
||||
empty **kwargs.
|
||||
In compatible mode all *args are passed to standard Python Exception class
|
||||
as before and all *args are printed by standard __str__ implementation.
|
||||
Class variable msg (or doc string if msg is None) is returned from str()
|
||||
if *args is empty.
|
||||
|
||||
b) New/parametrized mode is used if __init__ was called with
|
||||
non-empty **kwargs.
|
||||
In the new mode *args has to be empty and all kwargs has to exactly match
|
||||
set in class variable self.supp_kwargs. All kwargs are stored inside
|
||||
self.kwargs and used in new __str__ implementation to construct
|
||||
formatted message based on self.fmt string.
|
||||
|
||||
In the simplest case it is enough to override supp_kwargs and fmt
|
||||
class variables to get nice parametrized messages.
|
||||
"""
|
||||
msg = None # non-parametrized message
|
||||
supp_kwargs = set() # accepted parameters for _fmt_kwargs (sanity check)
|
||||
fmt = None # message parametrized with results from _fmt_kwargs
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._check_params(*args, **kwargs)
|
||||
if kwargs:
|
||||
self.kwargs = self._check_kwargs(**kwargs)
|
||||
self.msg = str(self)
|
||||
else:
|
||||
self.kwargs = dict() # defined but empty for old mode exceptions
|
||||
if self.msg is None:
|
||||
# doc string is better implicit message than empty string
|
||||
self.msg = self.__doc__
|
||||
if args:
|
||||
super(DNSException, self).__init__(*args)
|
||||
else:
|
||||
super(DNSException, self).__init__(self.msg)
|
||||
|
||||
def _check_params(self, *args, **kwargs):
|
||||
"""Old exceptions supported only args and not kwargs.
|
||||
|
||||
For sanity we do not allow to mix old and new behavior."""
|
||||
if args or kwargs:
|
||||
assert bool(args) != bool(kwargs), \
|
||||
'keyword arguments are mutually exclusive with positional args'
|
||||
|
||||
def _check_kwargs(self, **kwargs):
|
||||
if kwargs:
|
||||
assert set(kwargs.keys()) == self.supp_kwargs, \
|
||||
'following set of keyword args is required: %s' % (
|
||||
self.supp_kwargs)
|
||||
return kwargs
|
||||
|
||||
def _fmt_kwargs(self, **kwargs):
|
||||
"""Format kwargs before printing them.
|
||||
|
||||
Resulting dictionary has to have keys necessary for str.format call
|
||||
on fmt class variable.
|
||||
"""
|
||||
fmtargs = {}
|
||||
for kw, data in kwargs.items():
|
||||
if isinstance(data, (list, set)):
|
||||
# convert list of <someobj> to list of str(<someobj>)
|
||||
fmtargs[kw] = list(map(str, data))
|
||||
if len(fmtargs[kw]) == 1:
|
||||
# remove list brackets [] from single-item lists
|
||||
fmtargs[kw] = fmtargs[kw].pop()
|
||||
else:
|
||||
fmtargs[kw] = data
|
||||
return fmtargs
|
||||
|
||||
def __str__(self):
|
||||
if self.kwargs and self.fmt:
|
||||
# provide custom message constructed from keyword arguments
|
||||
fmtargs = self._fmt_kwargs(**self.kwargs)
|
||||
return self.fmt.format(**fmtargs)
|
||||
else:
|
||||
# print *args directly in the same way as old DNSException
|
||||
return super(DNSException, self).__str__()
|
||||
|
||||
|
||||
class FormError(DNSException):
|
||||
|
||||
"""DNS message is malformed."""
|
||||
|
||||
|
||||
class SyntaxError(DNSException):
|
||||
|
||||
"""Text input is malformed."""
|
||||
|
||||
|
||||
class UnexpectedEnd(SyntaxError):
|
||||
|
||||
"""Text input ended unexpectedly."""
|
||||
|
||||
|
||||
class TooBig(DNSException):
|
||||
|
||||
"""The DNS message is too big."""
|
||||
|
||||
|
||||
class Timeout(DNSException):
|
||||
|
||||
"""The DNS operation timed out."""
|
||||
supp_kwargs = set(['timeout'])
|
||||
fmt = "The DNS operation timed out after {timeout} seconds"
|
||||
@@ -0,0 +1,112 @@
|
||||
# Copyright (C) 2001-2007, 2009-2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""DNS Message Flags."""
|
||||
|
||||
# Standard DNS flags
|
||||
|
||||
QR = 0x8000
|
||||
AA = 0x0400
|
||||
TC = 0x0200
|
||||
RD = 0x0100
|
||||
RA = 0x0080
|
||||
AD = 0x0020
|
||||
CD = 0x0010
|
||||
|
||||
# EDNS flags
|
||||
|
||||
DO = 0x8000
|
||||
|
||||
_by_text = {
|
||||
'QR': QR,
|
||||
'AA': AA,
|
||||
'TC': TC,
|
||||
'RD': RD,
|
||||
'RA': RA,
|
||||
'AD': AD,
|
||||
'CD': CD
|
||||
}
|
||||
|
||||
_edns_by_text = {
|
||||
'DO': DO
|
||||
}
|
||||
|
||||
|
||||
# We construct the inverse mappings programmatically to ensure that we
|
||||
# cannot make any mistakes (e.g. omissions, cut-and-paste errors) that
|
||||
# would cause the mappings not to be true inverses.
|
||||
|
||||
_by_value = dict((y, x) for x, y in _by_text.items())
|
||||
|
||||
_edns_by_value = dict((y, x) for x, y in _edns_by_text.items())
|
||||
|
||||
|
||||
def _order_flags(table):
|
||||
order = list(table.items())
|
||||
order.sort()
|
||||
order.reverse()
|
||||
return order
|
||||
|
||||
_flags_order = _order_flags(_by_value)
|
||||
|
||||
_edns_flags_order = _order_flags(_edns_by_value)
|
||||
|
||||
|
||||
def _from_text(text, table):
|
||||
flags = 0
|
||||
tokens = text.split()
|
||||
for t in tokens:
|
||||
flags = flags | table[t.upper()]
|
||||
return flags
|
||||
|
||||
|
||||
def _to_text(flags, table, order):
|
||||
text_flags = []
|
||||
for k, v in order:
|
||||
if flags & k != 0:
|
||||
text_flags.append(v)
|
||||
return ' '.join(text_flags)
|
||||
|
||||
|
||||
def from_text(text):
|
||||
"""Convert a space-separated list of flag text values into a flags
|
||||
value.
|
||||
@rtype: int"""
|
||||
|
||||
return _from_text(text, _by_text)
|
||||
|
||||
|
||||
def to_text(flags):
|
||||
"""Convert a flags value into a space-separated list of flag text
|
||||
values.
|
||||
@rtype: string"""
|
||||
|
||||
return _to_text(flags, _by_value, _flags_order)
|
||||
|
||||
|
||||
def edns_from_text(text):
|
||||
"""Convert a space-separated list of EDNS flag text values into a EDNS
|
||||
flags value.
|
||||
@rtype: int"""
|
||||
|
||||
return _from_text(text, _edns_by_text)
|
||||
|
||||
|
||||
def edns_to_text(flags):
|
||||
"""Convert an EDNS flags value into a space-separated list of EDNS flag
|
||||
text values.
|
||||
@rtype: string"""
|
||||
|
||||
return _to_text(flags, _edns_by_value, _edns_flags_order)
|
||||
@@ -0,0 +1,69 @@
|
||||
# Copyright (C) 2003-2007, 2009-2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""DNS GENERATE range conversion."""
|
||||
|
||||
import dns
|
||||
|
||||
|
||||
def from_text(text):
|
||||
"""Convert the text form of a range in a GENERATE statement to an
|
||||
integer.
|
||||
|
||||
@param text: the textual range
|
||||
@type text: string
|
||||
@return: The start, stop and step values.
|
||||
@rtype: tuple
|
||||
"""
|
||||
# TODO, figure out the bounds on start, stop and step.
|
||||
|
||||
step = 1
|
||||
cur = ''
|
||||
state = 0
|
||||
# state 0 1 2 3 4
|
||||
# x - y / z
|
||||
|
||||
if text and text[0] == '-':
|
||||
raise dns.exception.SyntaxError("Start cannot be a negative number")
|
||||
|
||||
for c in text:
|
||||
if c == '-' and state == 0:
|
||||
start = int(cur)
|
||||
cur = ''
|
||||
state = 2
|
||||
elif c == '/':
|
||||
stop = int(cur)
|
||||
cur = ''
|
||||
state = 4
|
||||
elif c.isdigit():
|
||||
cur += c
|
||||
else:
|
||||
raise dns.exception.SyntaxError("Could not parse %s" % (c))
|
||||
|
||||
if state in (1, 3):
|
||||
raise dns.exception.SyntaxError()
|
||||
|
||||
if state == 2:
|
||||
stop = int(cur)
|
||||
|
||||
if state == 4:
|
||||
step = int(cur)
|
||||
|
||||
assert step >= 1
|
||||
assert start >= 0
|
||||
assert start <= stop
|
||||
# TODO, can start == stop?
|
||||
|
||||
return (start, stop, step)
|
||||
@@ -0,0 +1,31 @@
|
||||
# Copyright (C) 2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""Hashing backwards compatibility wrapper"""
|
||||
|
||||
import hashlib
|
||||
|
||||
|
||||
hashes = {}
|
||||
hashes['MD5'] = hashlib.md5
|
||||
hashes['SHA1'] = hashlib.sha1
|
||||
hashes['SHA224'] = hashlib.sha224
|
||||
hashes['SHA256'] = hashlib.sha256
|
||||
hashes['SHA384'] = hashlib.sha384
|
||||
hashes['SHA512'] = hashlib.sha512
|
||||
|
||||
|
||||
def get(algorithm):
|
||||
return hashes[algorithm.upper()]
|
||||
@@ -0,0 +1,111 @@
|
||||
# Copyright (C) 2003-2007, 2009-2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""Generic Internet address helper functions."""
|
||||
|
||||
import socket
|
||||
|
||||
import dns.ipv4
|
||||
import dns.ipv6
|
||||
|
||||
|
||||
# We assume that AF_INET is always defined.
|
||||
|
||||
AF_INET = socket.AF_INET
|
||||
|
||||
# AF_INET6 might not be defined in the socket module, but we need it.
|
||||
# We'll try to use the socket module's value, and if it doesn't work,
|
||||
# we'll use our own value.
|
||||
|
||||
try:
|
||||
AF_INET6 = socket.AF_INET6
|
||||
except AttributeError:
|
||||
AF_INET6 = 9999
|
||||
|
||||
|
||||
def inet_pton(family, text):
|
||||
"""Convert the textual form of a network address into its binary form.
|
||||
|
||||
@param family: the address family
|
||||
@type family: int
|
||||
@param text: the textual address
|
||||
@type text: string
|
||||
@raises NotImplementedError: the address family specified is not
|
||||
implemented.
|
||||
@rtype: string
|
||||
"""
|
||||
|
||||
if family == AF_INET:
|
||||
return dns.ipv4.inet_aton(text)
|
||||
elif family == AF_INET6:
|
||||
return dns.ipv6.inet_aton(text)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def inet_ntop(family, address):
|
||||
"""Convert the binary form of a network address into its textual form.
|
||||
|
||||
@param family: the address family
|
||||
@type family: int
|
||||
@param address: the binary address
|
||||
@type address: string
|
||||
@raises NotImplementedError: the address family specified is not
|
||||
implemented.
|
||||
@rtype: string
|
||||
"""
|
||||
if family == AF_INET:
|
||||
return dns.ipv4.inet_ntoa(address)
|
||||
elif family == AF_INET6:
|
||||
return dns.ipv6.inet_ntoa(address)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def af_for_address(text):
|
||||
"""Determine the address family of a textual-form network address.
|
||||
|
||||
@param text: the textual address
|
||||
@type text: string
|
||||
@raises ValueError: the address family cannot be determined from the input.
|
||||
@rtype: int
|
||||
"""
|
||||
try:
|
||||
dns.ipv4.inet_aton(text)
|
||||
return AF_INET
|
||||
except Exception:
|
||||
try:
|
||||
dns.ipv6.inet_aton(text)
|
||||
return AF_INET6
|
||||
except:
|
||||
raise ValueError
|
||||
|
||||
|
||||
def is_multicast(text):
|
||||
"""Is the textual-form network address a multicast address?
|
||||
|
||||
@param text: the textual address
|
||||
@raises ValueError: the address family cannot be determined from the input.
|
||||
@rtype: bool
|
||||
"""
|
||||
try:
|
||||
first = ord(dns.ipv4.inet_aton(text)[0])
|
||||
return first >= 224 and first <= 239
|
||||
except Exception:
|
||||
try:
|
||||
first = ord(dns.ipv6.inet_aton(text)[0])
|
||||
return first == 255
|
||||
except Exception:
|
||||
raise ValueError
|
||||
@@ -0,0 +1,59 @@
|
||||
# Copyright (C) 2003-2007, 2009-2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""IPv4 helper functions."""
|
||||
|
||||
import struct
|
||||
|
||||
import dns.exception
|
||||
from ._compat import binary_type
|
||||
|
||||
def inet_ntoa(address):
|
||||
"""Convert an IPv4 address in network form to text form.
|
||||
|
||||
@param address: The IPv4 address
|
||||
@type address: string
|
||||
@returns: string
|
||||
"""
|
||||
if len(address) != 4:
|
||||
raise dns.exception.SyntaxError
|
||||
if not isinstance(address, bytearray):
|
||||
address = bytearray(address)
|
||||
return (u'%u.%u.%u.%u' % (address[0], address[1],
|
||||
address[2], address[3])).encode()
|
||||
|
||||
def inet_aton(text):
|
||||
"""Convert an IPv4 address in text form to network form.
|
||||
|
||||
@param text: The IPv4 address
|
||||
@type text: string
|
||||
@returns: string
|
||||
"""
|
||||
if not isinstance(text, binary_type):
|
||||
text = text.encode()
|
||||
parts = text.split(b'.')
|
||||
if len(parts) != 4:
|
||||
raise dns.exception.SyntaxError
|
||||
for part in parts:
|
||||
if not part.isdigit():
|
||||
raise dns.exception.SyntaxError
|
||||
if len(part) > 1 and part[0] == '0':
|
||||
# No leading zeros
|
||||
raise dns.exception.SyntaxError
|
||||
try:
|
||||
bytes = [int(part) for part in parts]
|
||||
return struct.pack('BBBB', *bytes)
|
||||
except:
|
||||
raise dns.exception.SyntaxError
|
||||
@@ -0,0 +1,172 @@
|
||||
# Copyright (C) 2003-2007, 2009-2011 Nominum, Inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose with or without fee is hereby granted,
|
||||
# provided that the above copyright notice and this permission notice
|
||||
# appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""IPv6 helper functions."""
|
||||
|
||||
import re
|
||||
import binascii
|
||||
|
||||
import dns.exception
|
||||
import dns.ipv4
|
||||
from ._compat import xrange, binary_type, maybe_decode
|
||||
|
||||
_leading_zero = re.compile(b'0+([0-9a-f]+)')
|
||||
|
||||
def inet_ntoa(address):
|
||||
"""Convert a network format IPv6 address into text.
|
||||
|
||||
@param address: the binary address
|
||||
@type address: string
|
||||
@rtype: string
|
||||
@raises ValueError: the address isn't 16 bytes long
|
||||
"""
|
||||
|
||||
if len(address) != 16:
|
||||
raise ValueError("IPv6 addresses are 16 bytes long")
|
||||
hex = binascii.hexlify(address)
|
||||
chunks = []
|
||||
i = 0
|
||||
l = len(hex)
|
||||
while i < l:
|
||||
chunk = hex[i : i + 4]
|
||||
# strip leading zeros. we do this with an re instead of
|
||||
# with lstrip() because lstrip() didn't support chars until
|
||||
# python 2.2.2
|
||||
m = _leading_zero.match(chunk)
|
||||
if not m is None:
|
||||
chunk = m.group(1)
|
||||
chunks.append(chunk)
|
||||
i += 4
|
||||
#
|
||||
# Compress the longest subsequence of 0-value chunks to ::
|
||||
#
|
||||
best_start = 0
|
||||
best_len = 0
|
||||
start = -1
|
||||
last_was_zero = False
|
||||
for i in xrange(8):
|
||||
if chunks[i] != b'0':
|
||||
if last_was_zero:
|
||||
end = i
|
||||
current_len = end - start
|
||||
if current_len > best_len:
|
||||
best_start = start
|
||||
best_len = current_len
|
||||
last_was_zero = False
|
||||
elif not last_was_zero:
|
||||
start = i
|
||||
last_was_zero = True
|
||||
if last_was_zero:
|
||||
end = 8
|
||||
current_len = end - start
|
||||
if current_len > best_len:
|
||||
best_start = start
|
||||
best_len = current_len
|
||||
if best_len > 1:
|
||||
if best_start == 0 and \
|
||||
(best_len == 6 or
|
||||
best_len == 5 and chunks[5] == b'ffff'):
|
||||
# We have an embedded IPv4 address
|
||||
if best_len == 6:
|
||||
prefix = b'::'
|
||||
else:
|
||||
prefix = b'::ffff:'
|
||||
hex = prefix + dns.ipv4.inet_ntoa(address[12:])
|
||||
else:
|
||||
hex = b':'.join(chunks[:best_start]) + b'::' + \
|
||||
b':'.join(chunks[best_start + best_len:])
|
||||
else:
|
||||
hex = b':'.join(chunks)
|
||||
return maybe_decode(hex)
|
||||
|
||||
_v4_ending = re.compile(b'(.*):(\d+\.\d+\.\d+\.\d+)$')
|
||||
_colon_colon_start = re.compile(b'::.*')
|
||||
_colon_colon_end = re.compile(b'.*::$')
|
||||
|
||||
def inet_aton(text):
|
||||
"""Convert a text format IPv6 address into network format.
|
||||
|
||||
@param text: the textual address
|
||||
@type text: string
|
||||
@rtype: string
|
||||
@raises dns.exception.SyntaxError: the text was not properly formatted
|
||||
"""
|
||||
|
||||
#
|
||||
# Our aim here is not something fast; we just want something that works.
|
||||
#
|
||||
if not isinstance(text, binary_type):
|
||||
text = text.encode()
|
||||
|
||||
if text == b'::':
|
||||
text = b'0::'
|
||||
#
|
||||
# Get rid of the icky dot-quad syntax if we have it.
|
||||
#
|
||||
m = _v4_ending.match(text)
|
||||
if not m is None:
|
||||
b = bytearray(dns.ipv4.inet_aton(m.group(2)))
|
||||
text = (u"%s:%02x%02x:%02x%02x" % (m.group(1).decode(), b[0], b[1],
|
||||
b[2], b[3])).encode()
|
||||
#
|
||||
# Try to turn '::<whatever>' into ':<whatever>'; if no match try to
|
||||
# turn '<whatever>::' into '<whatever>:'
|
||||
#
|
||||
m = _colon_colon_start.match(text)
|
||||
if not m is None:
|
||||
text = text[1:]
|
||||
else:
|
||||
m = _colon_colon_end.match(text)
|
||||
if not m is None:
|
||||
text = text[:-1]
|
||||
#
|
||||
# Now canonicalize into 8 chunks of 4 hex digits each
|
||||
#
|
||||
chunks = text.split(b':')
|
||||
l = len(chunks)
|
||||
if l > 8:
|
||||
raise dns.exception.SyntaxError
|
||||
seen_empty = False
|
||||
canonical = []
|
||||
for c in chunks:
|
||||
if c == b'':
|
||||
if seen_empty:
|
||||
raise dns.exception.SyntaxError
|
||||
seen_empty = True
|
||||
for i in xrange(0, 8 - l + 1):
|
||||
canonical.append(b'0000')
|
||||
else:
|
||||
lc = len(c)
|
||||
if lc > 4:
|
||||
raise dns.exception.SyntaxError
|
||||
if lc != 4:
|
||||
c = (b'0' * (4 - lc)) + c
|
||||
canonical.append(c)
|
||||
if l < 8 and not seen_empty:
|
||||
raise dns.exception.SyntaxError
|
||||
text = b''.join(canonical)
|
||||
|
||||
#
|
||||
# Finally we can go to binary.
|
||||
#
|
||||
try:
|
||||
return binascii.unhexlify(text)
|
||||
except (binascii.Error, TypeError):
|
||||
raise dns.exception.SyntaxError
|
||||
|
||||
_mapped_prefix = b'\x00' * 10 + b'\xff\xff'
|
||||
|
||||
def is_mapped(address):
|
||||
return address.startswith(_mapped_prefix)
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user