Compare commits

..

102 Commits

Author SHA1 Message Date
pannal 4da63a8fd7 Update README.md 2015-09-23 14:40:42 +02:00
panni fa27789608 fixed typo 2015-09-23 14:31:55 +02:00
panni f9e9f35157 Merge branch 'deep_scan_subs'
Conflicts:
	Contents/Code/__init__.py
2015-09-23 14:29:21 +02:00
panni 4a6604f0ab custom folder now takes precedence; also scan subfolders for existing subtitles if configured; update custom folder settings description; remove direct subliminal.video patch and move it to subliminal_patch.patch_video 2015-09-23 14:26:21 +02:00
panni 971d1221da don't die on missing header; maybe fixes #13 2015-09-23 13:36:18 +02:00
panni ba69885477 fix saving subs to video folder without custom_path given; should fix #14 2015-09-23 12:46:07 +02:00
panni 8e23098037 add basic functionality to scan custom (sub-) folders for subtitles 2015-09-19 04:35:48 +02:00
pannal 8da7bf029c Update README.md 2015-09-18 03:48:34 +02:00
pannal e16e58cbfa Update README.md 2015-09-18 03:29:34 +02:00
pannal abb7cd3bfa Update README.md 2015-09-18 03:19:04 +02:00
pannal bfa06f3989 Update README.md 2015-09-18 03:16:37 +02:00
pannal c63529939d Merge pull request #11 from pannal/guessit-0.11.0
update guessit to 0.11.0
2015-09-18 03:16:20 +02:00
panni 2814f57e89 update guessit to 0.11.0 2015-09-18 03:14:21 +02:00
panni 70476883c6 Merge branch 'master' of github.com:pannal/Subliminal.bundle 2015-09-18 03:11:20 +02:00
panni b5ed209453 Revert "update guessit to 0.11.0"
This reverts commit be7687f15d.
2015-09-18 03:10:58 +02:00
panni be7687f15d update guessit to 0.11.0 2015-09-18 03:08:55 +02:00
pannal b7fb8e1e76 Update README.md 2015-09-18 02:56:40 +02:00
pannal 1a03720a7d Update README.md 2015-09-18 02:49:34 +02:00
pannal cb4099109a Update README.md 2015-09-18 02:49:19 +02:00
pannal 131504e7ee Merge pull request #10 from pannal/provider_fixes
Provider fixes/addons
2015-09-18 02:42:31 +02:00
pannal b0c7b480d6 Update README.md 2015-09-18 02:40:03 +02:00
panni e543c927cf add third optional language; update option description 2015-09-18 02:32:16 +02:00
panni 897b602d71 correct typo 2015-09-18 02:27:13 +02:00
panni d94421dcf3 add support for 'fa', Persian (Farsi) 2015-09-18 02:17:30 +02:00
panni e371b99dca add support for pt-br, Portuguese Brasil 2015-09-18 02:16:03 +02:00
panni 49d10e5ff7 remove leftover addic7ed score boost; add use_random_agents option to addic7ed 2015-09-18 02:08:01 +02:00
pannal d959f5b826 Update README.md 2015-09-18 01:07:47 +02:00
pannal 709f5cb605 Merge pull request #7 from pannal/provider_fixes
Provider fixes for newest subliminal
2015-09-18 01:06:48 +02:00
panni b11a051c23 patch language converted for addic7ed to support French (Canadian) 2015-09-18 00:57:54 +02:00
panni 1a77902079 move injection of language converters to subliminal_patch; don't discard provider simply because of LanguageReverseError 2015-09-18 00:43:33 +02:00
pannal 481dc2f3b4 Update README.md 2015-09-13 04:40:55 +02:00
panni 732aa91889 re-add language converters for addic7ed and tvsubtitles 2015-09-12 16:20:34 +02:00
panni 0df4c55548 update babelfish to 0.5.5-dev; remove leftover patch.py 2015-09-12 16:20:10 +02:00
panni 7c72ed41fb moved contents of patch.py into separate files; patch addic7ed provider 2015-09-12 16:04:39 +02:00
panni 83ace14faf patch addic7ed provider to use random user agents (again); honor selected providers again; more info on why a provider was discarded 2015-09-12 15:57:19 +02:00
panni 9b1c3538b3 Merge branch 'master' of github.com:pannal/Subliminal.bundle 2015-09-11 22:11:21 +02:00
panni 27a6e51cd3 bugfix; forgot six.py in the last release 2015-09-11 22:10:39 +02:00
pannal 86fad21cf0 Update README.md 2015-09-11 18:58:28 +02:00
pannal 5d081c3d65 Update README.md 2015-09-11 18:48:17 +02:00
pannal ca74c0af0a Merge pull request #1 from pannal/test
Merge test branch into master
2015-09-11 18:46:34 +02:00
panni 002ec90b09 patch subliminal to work inside Plex's sandbox; this now works with the newest subliminal version 2015-09-11 16:09:04 +02:00
panni 6f42199100 subliminal: don't ignore 'badly encoded' filenames 2015-09-10 23:58:51 +02:00
panni 87bb2493d1 adjust addicted score 2015-09-10 23:57:12 +02:00
panni 716a66e9fa update subliminal to current master 2015-09-10 19:40:41 +02:00
panni 88cc95239a update guessit 2015-06-26 15:03:06 +02:00
panni 924470d2c0 contribute to thetvdbdvdorder 2015-06-26 14:42:34 +02:00
panni 45d5200b89 adjust addicted parameters 2015-06-26 14:42:07 +02:00
panni 8f82554927 fix guessit's detection of release_group 2015-05-26 02:41:13 +02:00
panni 423688c352 boost addic7ed score 2015-05-25 21:17:27 +02:00
panni 8207223002 Merge remote-tracking branch 'source/master' 2015-05-25 20:05:32 +02:00
Bram Walet 3b3fdb34e3 Merge branch 'master' of https://github.com/bramwalet/Subliminal.bundle 2015-02-15 17:19:11 +01:00
Bram Walet ecce1fca9c Fix #12 2015-02-15 17:16:31 +01:00
Bram Walet 2011100251 updated guessit 0.8 -> 0.10.1 2015-02-15 17:04:59 +01:00
Bram Walet 14e42e57ea Update subliminal with latest 0.80-dev 2015-02-15 16:59:09 +01:00
bramwalet 834f18f3d5 Merge pull request #7 from pannal/fix-origin-1
fix wrong usage of subFolder setting
2014-08-01 09:13:23 +02:00
panni cf9a916e95 fix wrong usage of subFolder setting 2014-08-01 03:01:30 +02:00
panni a8a26ec642 fix wrong setting usage in saveSubtitlesToFile 2014-08-01 02:49:05 +02:00
panni e6c398589c Merge remote-tracking branch 'source/master' 2014-08-01 01:42:29 +02:00
bramwalet c5332644f1 Added installation and configuration instructions 2014-07-20 09:27:26 +02:00
bramwalet bd0c134ae0 Update README.md 2014-07-19 16:11:12 +02:00
bramwalet d3282648fd Update README.md 2014-07-19 16:10:51 +02:00
bramwalet 1156817c71 Update README.md 2014-07-19 16:08:05 +02:00
panni e1af48bbc2 Merge remote-tracking branch 'source/master' 2014-07-19 16:04:41 +02:00
bramwalet d6dd8379ab Create License 2014-07-19 15:54:32 +02:00
bramwalet bc73e559d1 Updated README 2014-07-19 15:53:45 +02:00
Bram Walet a6d8c9d5fc Added license files of libraries 2014-07-19 15:43:48 +02:00
Bram Walet a5d8a8b1d8 Added prefs for hearing impaired and minimum score, fixes #3 2014-07-19 15:25:24 +02:00
Bram Walet ae28116c59 Properties to influence what existing subtitles are scanned, bugfix for TV series 2014-07-19 15:06:21 +02:00
Bram Walet b03403cf72 Added preference to save subtitle to filesystem, default false. Fixes #5 2014-07-19 14:50:56 +02:00
Bram Walet f5736fcd3b Changed preferences names, prepared scan output for saving as metadata 2014-07-19 14:23:49 +02:00
Bram Walet 4193c245a5 Renamed and reordered Preferences 2014-07-19 14:00:33 +02:00
Bram Walet c649d5b5fd Refactored movie and tv scanning (remove duplicates) 2014-07-19 13:56:12 +02:00
Bram Walet 1fa70995a3 Merge branch 'pannal-master' 2014-07-19 13:15:52 +02:00
Bram Walet 3afee79415 Merge branch 'master' of https://github.com/pannal/Subliminal.bundle into pannal-master 2014-07-19 13:15:04 +02:00
panni 5e43c1936e remove obsolete folder 2014-07-19 13:11:50 +02:00
Bram Walet fcae524771 TV series subtitles are stored as metadata. 2014-07-19 12:11:27 +02:00
panni 098da50e23 need to bool() this...why? 2014-07-19 05:36:54 +02:00
panni 9b3544bff7 broken, fix None 2014-07-19 05:34:08 +02:00
panni 063cae161b move subfolder logic to function; add support for movies; add scan all default subfolders option 2014-07-19 05:11:13 +02:00
panni 825e073e08 wrong variable used 2014-07-19 04:45:15 +02:00
panni 77098e1dc3 basic custom subfolder support; needs more localmedia support 2014-07-19 04:27:15 +02:00
panni e2210b7624 add prefs for subfolder; use prefs 2014-07-19 04:15:07 +02:00
panni ffa9051d69 Merge remote-tracking branch 'source/master' 2014-07-19 03:43:06 +02:00
panni bad0dbfc71 reapply gitignore 2014-07-19 03:42:40 +02:00
panni 53b938b83d basic support for subtitles in subfolders; temp hardcoded 2014-07-19 03:36:14 +02:00
Bram Walet e6c0e5fe7a Removed deprecated constants 2014-07-18 17:05:26 +02:00
Bram Walet d8513e910d Merged origin/master to local branch 2014-07-18 17:00:41 +02:00
Bram Walet c2d984a908 Provider settings from preferences as dict 2014-07-18 16:57:52 +02:00
bramwalet 15b7f134be Fixes issue #2
Configure dogpile cache to be in memory. Tvsubtitles & Addic7ed providers will work.
2014-07-18 10:23:13 +02:00
bramwalet 3463718195 Merge pull request #1 from pannal/master
Fixed movie subtitles uninitialized variable error
2014-07-18 08:20:04 +02:00
panni 9cf1b759d7 Fixed movie subtitles uninitialized variable error 2014-07-18 00:34:51 +02:00
Bram Walet e8b9d6dd1f Removed .pyc files 2014-07-17 20:20:20 +02:00
Bram Walet 66280ded50 Fixed movie update 2014-07-17 20:18:59 +02:00
Bram Walet 55860e5f18 Implemented find best subtitles, enabling/disabling providers, provider settings
Enabled dogpile.cache to be in memory to make tvsubtitles and addic7ed work.
2014-07-17 20:08:03 +02:00
Bram Walet d7bc17a485 Implemented find best subtitles, enabling/disabling providers, provider settings 2014-07-16 22:26:18 +02:00
Bram Walet be3a291cbc Removed obsolete setuptools library, only pkg_resources was needed. 2014-07-14 21:46:25 +02:00
Bram Walet 4696bfe364 Scanning TV media file by subliminal and searching opensubtitles for subtitles 2014-07-14 21:38:37 +02:00
Bram Walet a81533d2cf Added stevedore to dependencies (guessit needs it) 2014-07-14 21:36:03 +02:00
unknown fc9a8dcf48 Added .settings to .gitignore 2014-07-13 12:10:56 +02:00
unknown 5e6d53fe63 Merge branch 'master' of https://github.com/bramwalet/Subliminal.bundle 2014-07-13 12:09:46 +02:00
unknown 0c68e8cf47 Initial commit, plugin boots (dependencies work) in Plex, but no subtitles are searched. 2014-07-13 12:08:02 +02:00
bramwalet d41a0cdda4 Initial commit 2014-07-13 12:02:05 +02:00
722 changed files with 218016 additions and 104331 deletions
-8
View File
@@ -1,8 +0,0 @@
[report]
exclude_lines =
pragma: no cover
raise NotImplementedError
def __repr__
if __name__ == .__main__.:
omit =
subliminal/cli.py
+11 -20
View File
@@ -1,7 +1,6 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
@@ -9,12 +8,11 @@ __pycache__/
# Distribution / packaging
.Python
env/
bin/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
@@ -24,12 +22,6 @@ var/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
@@ -38,27 +30,26 @@ pip-delete-this-directory.txt
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
# Translations
*.mo
*.pot
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
.settings
# Rope
.ropeproject
# Django stuff:
*.log
*.pot
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Pycharm
.idea
# Subliminal
tests/data/mkv/
-49
View File
@@ -1,49 +0,0 @@
sudo: false
language: python
python:
- "2.7"
- "3.3"
- "3.4"
- "3.5"
env:
- PARSER=native
- PARSER=lxml
addons:
apt:
packages:
- unrar
matrix:
include:
- python: "3.5"
env:
- PARSER=native
- VCR_RECORD_MODE=all
- PYTEST_ADDOPTS="-m integration"
allow_failures:
- python: "3.5"
env:
- PARSER=native
- VCR_RECORD_MODE=all
- PYTEST_ADDOPTS="-m integration"
cache:
directories:
- $HOME/.cache/pip
- tests/data/mkv
before_cache:
- rm -f $HOME/.cache/pip/log/debug.log
install:
- pip install -e .[test]
- if [ $PARSER = "lxml" ]; then pip install lxml; fi
- pip install coveralls
script: python setup.py test --addopts "--cov subliminal --verbose $PYTEST_ADDOPTS"
after_success: coveralls
-19
View File
@@ -1,19 +0,0 @@
Contributing
============
Issues
------
Issues are intended for bug report and feature requests. For any bug report please make sure to include the complete
stack trace and DEBUG level logs as well as reproduce steps.
If you use the CLI, you can create a debug log file with `subliminal --debug [...] 2> debug.log`.
Pull Requests
-------------
You can contribute code and documentation with pull requests. Any code contribution must be unit tested and the pull
request open against the *develop* branch.
Translations
------------
Contribution to translations can be made on [subliminal's transifex page](https://www.transifex.com/subliminal/subliminal/)
Subliminal is configured to work with [transifex-client](http://docs.transifex.com/client/)
+172
View File
@@ -0,0 +1,172 @@
# hdbits.org
import string, os, urllib, zipfile, re, copy
from babelfish import Language
from datetime import timedelta
import subliminal
import subliminal_patch
import logger
OS_PLEX_USERAGENT = 'plexapp.com v9.0'
DEPENDENCY_MODULE_NAMES = ['subliminal', 'subliminal_patch', 'enzyme', 'guessit', 'requests']
def Start():
HTTP.CacheTime = 0
HTTP.Headers['User-agent'] = OS_PLEX_USERAGENT
Log.Debug("START CALLED")
logger.registerLoggingHander(DEPENDENCY_MODULE_NAMES)
# configured cache to be in memory as per https://github.com/Diaoul/subliminal/issues/303
subliminal.region.configure('dogpile.cache.memory')
def ValidatePrefs():
Log.Debug("Validate Prefs called.")
return
# Prepare a list of languages we want subs for
def getLangList():
langList = {Language.fromietf(Prefs["langPref1"])}
if(Prefs["langPref2"] != "None"):
langList.update({Language.fromietf(Prefs["langPref2"])})
if(Prefs["langPref3"] != "None"):
langList.update({Language.fromietf(Prefs["langPref3"])})
return langList
def getSubtitleDestinationFolder():
if not Prefs["subtitles.save.filesystem"]:
return
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if bool(Prefs["subtitles.save.subFolder.Custom"]) else None
return fld_custom or (Prefs["subtitles.save.subFolder"] if Prefs["subtitles.save.subFolder"] != "current folder" else None)
def initSubliminalPatches():
# configure custom subtitle destination folders for scanning pre-existing subs
dest_folder = getSubtitleDestinationFolder()
subliminal_patch.patch_video.CUSTOM_PATHS = [dest_folder] if dest_folder else []
def getProviders():
providers = {'opensubtitles' : Prefs['provider.opensubtitles.enabled'],
'thesubdb' : Prefs['provider.thesubdb.enabled'],
'podnapisi' : Prefs['provider.podnapisi.enabled'],
'addic7ed' : Prefs['provider.addic7ed.enabled'],
'tvsubtitles' : Prefs['provider.tvsubtitles.enabled']
}
return filter(lambda prov: providers[prov], providers)
def getProviderSettings():
provider_settings = {'addic7ed': {'username': Prefs['provider.addic7ed.username'],
'password': Prefs['provider.addic7ed.password'],
'use_random_agents': Prefs['provider.addic7ed.use_random_agents'],
},
}
return provider_settings
def scanTvMedia(media):
videos = {}
for season in media.seasons:
for episode in media.seasons[season].episodes:
for item in media.seasons[season].episodes[episode].items:
for part in item.parts:
scannedVideo = scanVideo(part)
videos[scannedVideo] = part
return videos
def scanMovieMedia(media):
videos = {}
for item in media.items:
for part in item.parts:
scannedVideo = scanVideo(part)
videos[scannedVideo] = part
return videos
def scanVideo(part):
embedded_subtitles = Prefs['subtitles.scan.embedded']
external_subtitles = Prefs['subtitles.scan.external']
Log.Debug("Scanning video: %s, subtitles=%s, embedded_subtitles=%s" % (part.file, external_subtitles, embedded_subtitles))
try:
return subliminal.video.scan_video(part.file, subtitles=external_subtitles, embedded_subtitles=embedded_subtitles)
except ValueError:
Log.Warn("File could not be guessed by subliminal")
def downloadBestSubtitles(videos):
min_score = int(Prefs['subtitles.search.minimumScore'])
hearing_impaired = Prefs['subtitles.search.hearingImpaired']
Log.Debug("Download best subtitles using settings: min_score: %s, hearing_impaired: %s" %(min_score, hearing_impaired))
return subliminal.api.download_best_subtitles(videos, getLangList(), min_score, hearing_impaired, providers=getProviders(), provider_configs=getProviderSettings())
def saveSubtitles(videos, subtitles):
if Prefs['subtitles.save.filesystem']:
Log.Debug("Saving subtitles to filesystem")
saveSubtitlesToFile(subtitles)
else:
Log.Debug("Saving subtitles as metadata")
saveSubtitlesToMetadata(videos, subtitles)
def saveSubtitlesToFile(subtitles):
fld_custom = Prefs["subtitles.save.subFolder.Custom"].strip() if bool(Prefs["subtitles.save.subFolder.Custom"]) else None
for video, video_subtitles in subtitles.items():
fld = None
if fld_custom or Prefs["subtitles.save.subFolder"] != "current folder":
# specific subFolder requested, create it if it doesn't exist
fld_base = os.path.split(video.name)[0]
if fld_custom:
if fld_custom.startswith("/"):
# absolute folder
fld = fld_custom
else:
fld = os.path.join(fld_base, fld_custom)
else:
fld = os.path.join(fld_base, Prefs["subtitles.save.subFolder"])
if not os.path.exists(fld):
os.makedirs(fld)
subliminal.api.save_subtitles(video, video_subtitles, directory=fld)
def saveSubtitlesToMetadata(videos, subtitles):
for video, video_subtitles in subtitles.items():
mediaPart = videos[video]
for subtitle in video_subtitles:
mediaPart.subtitles[Locale.Language.Match(subtitle.language.alpha2)][subtitle.page_link] = Proxy.Media(subtitle.content, ext="srt")
class SubliminalSubtitlesAgentMovies(Agent.Movies):
name = 'Subliminal Movie Subtitles'
languages = [Locale.Language.English]
primary_provider = False
contributes_to = ['com.plexapp.agents.imdb']
def search(self, results, media, lang):
Log.Debug("MOVIE SEARCH CALLED")
results.Append(MetadataSearchResult(id='null', score=100))
def update(self, metadata, media, lang):
Log.Debug("MOVIE UPDATE CALLED")
initSubliminalPatches()
videos = scanMovieMedia(media)
subtitles = downloadBestSubtitles(videos.keys())
saveSubtitles(videos, subtitles)
class SubliminalSubtitlesAgentTvShows(Agent.TV_Shows):
name = 'Subliminal TV Subtitles'
languages = [Locale.Language.English]
primary_provider = False
contributes_to = ['com.plexapp.agents.thetvdb', 'com.plexapp.agents.thetvdbdvdorder']
def search(self, results, media, lang):
Log.Debug("TV SEARCH CALLED")
results.Append(MetadataSearchResult(id='null', score=100))
def update(self, metadata, media, lang):
Log.Debug("TvUpdate. Lang %s" % lang)
initSubliminalPatches()
videos = scanTvMedia(media)
subtitles = downloadBestSubtitles(videos.keys())
saveSubtitles(videos, subtitles)
+33
View File
@@ -0,0 +1,33 @@
import logging
def registerLoggingHander(dependencies):
plexHandler = PlexLoggerHandler()
for dependency in dependencies:
Log.Debug("Registering LoggerHandler for dependency: %s" % dependency)
log = logging.getLogger(dependency)
log.setLevel('DEBUG')
log.addHandler(plexHandler)
class PlexLoggerHandler(logging.StreamHandler):
def __init__(self, level=0):
super(PlexLoggerHandler, self).__init__(level)
def getFormattedString(self, record):
return record.name + ": " + record.getMessage()
def emit(self, record):
if record.levelno == logging.DEBUG:
Log.Debug(self.getFormattedString(record))
elif record.levelno == logging.INFO:
Log.Info(self.getFormattedString(record))
elif record.levelno == logging.WARNING:
Log.Warn(self.getFormattedString(record))
elif record.levelno == logging.ERROR:
Log.Error(self.getFormattedString(record))
elif record.levelno == logging.CRITICAL:
Log.Critical(self.getFormattedString(record))
elif record.levelno == logging.FATAL:
Log.Exception(self.getFormattedString(record))
else:
Log.Error("UNKNOWN LEVEL: %s", record.getMessage())
+117
View File
@@ -0,0 +1,117 @@
[
{
"id": "provider.addic7ed.username",
"label": "Addic7ed Username",
"type": "text",
"default": "Username"
},
{
"id": "provider.addic7ed.password",
"label": "Addic7ed Password",
"type": "text",
"option": "hidden",
"default": "",
"secure": "true"
},
{
"id": "provider.addic7ed.use_random_agents",
"label": "Addic7ed: Use random user agents (should not be necessary)",
"type": "bool",
"default": "false"
},
{
"id": "langPref1",
"label": "Subtitle Language (1)",
"type": "enum",
"values": ["sq","ar","be","bs","bg","ca","zh","cs","da","nl","en","et","fi","fr","de","el","he","hi","hu","is","id","it","ja","ko","lv","lt","mk","ms","no","fa","pl","pt","pt-br","ro","ru","sr","sk","sl","es","sv","th","tr","uk","vi","hr"],
"default": "en"
},
{
"id": "langPref2",
"label": "Subtitle Language (2)",
"type": "enum",
"values": ["None", "sq","ar","be","bs","bg","ca","zh","cs","da","nl","en","et","fi","fr","de","el","he","hi","hu","is","id","it","ja","ko","lv","lt","mk","ms","no","fa","pl","pt","pt-br","ro","ru","sr","sk","sl","es","sv","th","tr","uk","vi","hr"],
"default": "None"
},
{
"id": "langPref3",
"label": "Subtitle Language (3)",
"type": "enum",
"values": ["None", "sq","ar","be","bs","bg","ca","zh","cs","da","nl","en","et","fi","fr","de","el","he","hi","hu","is","id","it","ja","ko","lv","lt","mk","ms","no","fa","pl","pt","pt-br","ro","ru","sr","sk","sl","es","sv","th","tr","uk","vi","hr"],
"default": "None"
},
{
"id": "provider.opensubtitles.enabled",
"label": "Provider: Enable OpenSubtitles",
"type": "bool",
"default": "true"
},
{
"id": "provider.thesubdb.enabled",
"label": "Provider: Enable TheSubDB",
"type": "bool",
"default": "true"
},
{
"id": "provider.podnapisi.enabled",
"label": "Provider: Enable Podnapisi.NET",
"type": "bool",
"default": "true"
},
{
"id": "provider.addic7ed.enabled",
"label": "Provider: Enable Addic7ed",
"type": "bool",
"default": "true"
},
{
"id": "provider.tvsubtitles.enabled",
"label": "Provider: Enable TVsubtitles.net",
"type": "bool",
"default": "true"
},
{
"id": "subtitles.scan.embedded",
"label": "Scan: include embedded subtitles (skip if existing)",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.scan.external",
"label": "Scan: include external subtitles (skip if existing)",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.search.minimumScore",
"label": "Minimum score for subtitles to download",
"type": "enum",
"values": ["100","95","90","85","80","75","70","65","60","55","50","45","40","35","30","25","20","15","10","5","0"],
"default": "0"
},
{
"id": "subtitles.search.hearingImpaired",
"label": "Download hearing impaired subtitles.",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.save.filesystem",
"label": "Store subtitles next to media files (instead of metadata)",
"type": "bool",
"default": "false"
},
{
"id": "subtitles.save.subFolder",
"label": "Subtitle Folder (\"current folder\" is the folder the current media file lives in)",
"type": "enum",
"values": ["current folder", "sub", "subs", "subtitle", "subtitles"],
"default": "current folder"
},
{
"id": "subtitles.save.subFolder.Custom",
"label": "Custom Subtitle folder (overrides \"Subtitle Folder\"; computes to real paths; use for example \"bla\" as a subfolder of the current media file folder or an absolute path)",
"type": "text",
"default": ""
},
]
+33
View File
@@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundleExecutable</key>
<string>Test Plug-in</string>
<key>CFBundleIdentifier</key>
<string>com.plexapp.agents.subliminal</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
<string>1.0</string>
<key>PlexFrameworkVersion</key>
<string>2</string>
<key>PlexPluginClass</key>
<string>Agent</string>
<key>PlexPluginMode</key>
<string>AlwaysOn</string>
<key>PlexPluginConsoleLogging</key>
<string>1</string>
<key>PlexPluginDevMode</key>
<string>1</string>
<key>PlexPluginCodePolicy</key>
<!-- this allows channels to access some python methods which are otherwise blocked, as well as import external code libraries, and interact with the PMS HTTP API -->
<string>Elevated</string>
</dict>
</plist>
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
__title__ = 'babelfish'
__version__ = '0.5.5-dev'
__author__ = 'Antoine Bertin'
__license__ = 'BSD'
__copyright__ = 'Copyright 2015 the BabelFish authors'
import sys
if sys.version_info[0] >= 3:
basestr = str
else:
basestr = basestring
from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
CountryReverseConverter)
from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
from .script import SCRIPTS, SCRIPT_MATRIX, Script
@@ -0,0 +1,287 @@
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
import collections
from pkg_resources import iter_entry_points, EntryPoint
from ..exceptions import LanguageConvertError, LanguageReverseError
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
class CaseInsensitiveDict(collections.MutableMapping):
"""A case-insensitive ``dict``-like object.
Implements all methods and operations of
``collections.MutableMapping`` as well as dict's ``copy``. Also
provides ``lower_items``.
All keys are expected to be strings. The structure remembers the
case of the last key to be set, and ``iter(instance)``,
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
will contain case-sensitive keys. However, querying and contains
testing is case insensitive:
cid = CaseInsensitiveDict()
cid['English'] = 'eng'
cid['ENGLISH'] == 'eng' # True
list(cid) == ['English'] # True
If the constructor, ``.update``, or equality comparison
operations are given keys that have equal ``.lower()``s, the
behavior is undefined.
"""
def __init__(self, data=None, **kwargs):
self._store = dict()
if data is None:
data = {}
self.update(data, **kwargs)
def __setitem__(self, key, value):
# Use the lowercased key for lookups, but store the actual
# key alongside the value.
self._store[key.lower()] = (key, value)
def __getitem__(self, key):
return self._store[key.lower()][1]
def __delitem__(self, key):
del self._store[key.lower()]
def __iter__(self):
return (casedkey for casedkey, mappedvalue in self._store.values())
def __len__(self):
return len(self._store)
def lower_items(self):
"""Like iteritems(), but with all lowercase keys."""
return (
(lowerkey, keyval[1])
for (lowerkey, keyval)
in self._store.items()
)
def __eq__(self, other):
if isinstance(other, collections.Mapping):
other = CaseInsensitiveDict(other)
else:
return NotImplemented
# Compare insensitively
return dict(self.lower_items()) == dict(other.lower_items())
# Copy is required
def copy(self):
return CaseInsensitiveDict(self._store.values())
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
class LanguageConverter(object):
"""A :class:`LanguageConverter` supports converting an alpha3 language code with an
alpha2 country code and a script code into a custom code
.. attribute:: codes
Set of possible custom codes
"""
def convert(self, alpha3, country=None, script=None):
"""Convert an alpha3 language code with an alpha2 country code and a script code
into a custom code
:param string alpha3: ISO-639-3 language code
:param country: ISO-3166 country code, if any
:type country: string or None
:param script: ISO-15924 script code, if any
:type script: string or None
:return: the corresponding custom code
:rtype: string
:raise: :class:`~babelfish.exceptions.LanguageConvertError`
"""
raise NotImplementedError
class LanguageReverseConverter(LanguageConverter):
"""A :class:`LanguageConverter` able to reverse a custom code into a alpha3
ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
"""
def reverse(self, code):
"""Reverse a custom code into alpha3, country and script code
:param string code: custom code to reverse
:return: the corresponding alpha3 ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
:rtype: tuple
:raise: :class:`~babelfish.exceptions.LanguageReverseError`
"""
raise NotImplementedError
class LanguageEquivalenceConverter(LanguageReverseConverter):
"""A :class:`LanguageEquivalenceConverter` is a utility class that allows you to easily define a
:class:`LanguageReverseConverter` by only specifying the dict from alpha3 to their corresponding symbols.
You must specify the dict of equivalence as a class variable named SYMBOLS.
If you also set the class variable CASE_SENSITIVE to ``True`` then the reverse conversion function will be
case-sensitive (it is case-insensitive by default).
Example::
class MyCodeConverter(babelfish.LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {'fra': 'mycode1', 'eng': 'mycode2'}
"""
CASE_SENSITIVE = False
def __init__(self):
self.codes = set()
self.to_symbol = {}
if self.CASE_SENSITIVE:
self.from_symbol = {}
else:
self.from_symbol = CaseInsensitiveDict()
for alpha3, symbol in self.SYMBOLS.items():
self.to_symbol[alpha3] = symbol
self.from_symbol[symbol] = (alpha3, None, None)
self.codes.add(symbol)
def convert(self, alpha3, country=None, script=None):
try:
return self.to_symbol[alpha3]
except KeyError:
raise LanguageConvertError(alpha3, country, script)
def reverse(self, code):
try:
return self.from_symbol[code]
except KeyError:
raise LanguageReverseError(code)
class CountryConverter(object):
"""A :class:`CountryConverter` supports converting an alpha2 country code
into a custom code
.. attribute:: codes
Set of possible custom codes
"""
def convert(self, alpha2):
"""Convert an alpha2 country code into a custom code
:param string alpha2: ISO-3166-1 language code
:return: the corresponding custom code
:rtype: string
:raise: :class:`~babelfish.exceptions.CountryConvertError`
"""
raise NotImplementedError
class CountryReverseConverter(CountryConverter):
"""A :class:`CountryConverter` able to reverse a custom code into a alpha2
ISO-3166-1 country code
"""
def reverse(self, code):
"""Reverse a custom code into alpha2 code
:param string code: custom code to reverse
:return: the corresponding alpha2 ISO-3166-1 country code
:rtype: string
:raise: :class:`~babelfish.exceptions.CountryReverseError`
"""
raise NotImplementedError
class ConverterManager(object):
"""Manager for babelfish converters behaving like a dict with lazy loading
Loading is done in this order:
* Entry point converters
* Registered converters
* Internal converters
.. attribute:: entry_point
The entry point where to look for converters
.. attribute:: internal_converters
Internal converters with entry point syntax
"""
entry_point = ''
internal_converters = []
def __init__(self):
#: Registered converters with entry point syntax
self.registered_converters = []
#: Loaded converters
self.converters = {}
def __getitem__(self, name):
"""Get a converter, lazy loading it if necessary"""
if name in self.converters:
return self.converters[name]
for ep in iter_entry_points(self.entry_point):
if ep.name == name:
self.converters[ep.name] = ep.load()()
return self.converters[ep.name]
for ep in (EntryPoint.parse(c) for c in self.registered_converters + self.internal_converters):
if ep.name == name:
# `require` argument of ep.load() is deprecated in newer versions of setuptools
if hasattr(ep, 'resolve'):
plugin = ep.resolve()
elif hasattr(ep, '_load'):
plugin = ep._load()
else:
plugin = ep.load(require=False)
self.converters[ep.name] = plugin()
return self.converters[ep.name]
raise KeyError(name)
def __setitem__(self, name, converter):
"""Load a converter"""
self.converters[name] = converter
def __delitem__(self, name):
"""Unload a converter"""
del self.converters[name]
def __iter__(self):
"""Iterator over loaded converters"""
return iter(self.converters)
def register(self, entry_point):
"""Register a converter
:param string entry_point: converter to register (entry point syntax)
:raise: ValueError if already registered
"""
if entry_point in self.registered_converters:
raise ValueError('Already registered')
self.registered_converters.insert(0, entry_point)
def unregister(self, entry_point):
"""Unregister a converter
:param string entry_point: converter to unregister (entry point syntax)
"""
self.registered_converters.remove(entry_point)
def __contains__(self, name):
return name in self.converters
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha2Converter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha2:
SYMBOLS[iso_language.alpha3] = iso_language.alpha2
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha3BConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha3b:
SYMBOLS[iso_language.alpha3] = iso_language.alpha3b
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha3TConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha3t:
SYMBOLS[iso_language.alpha3] = iso_language.alpha3t
@@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import CountryReverseConverter, CaseInsensitiveDict
from ..country import COUNTRY_MATRIX
from ..exceptions import CountryConvertError, CountryReverseError
class CountryNameConverter(CountryReverseConverter):
def __init__(self):
self.codes = set()
self.to_name = {}
self.from_name = CaseInsensitiveDict()
for country in COUNTRY_MATRIX:
self.codes.add(country.name)
self.to_name[country.alpha2] = country.name
self.from_name[country.name] = country.alpha2
def convert(self, alpha2):
if alpha2 not in self.to_name:
raise CountryConvertError(alpha2)
return self.to_name[alpha2]
def reverse(self, name):
if name not in self.from_name:
raise CountryReverseError(name)
return self.from_name[name]
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class NameConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = False
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.name:
SYMBOLS[iso_language.alpha3] = iso_language.name
@@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageReverseConverter, CaseInsensitiveDict
from ..exceptions import LanguageReverseError
from ..language import language_converters
class OpenSubtitlesConverter(LanguageReverseConverter):
def __init__(self):
self.alpha3b_converter = language_converters['alpha3b']
self.alpha2_converter = language_converters['alpha2']
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
'scc': ('srp', None), 'mne': ('srp', 'ME')})
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(['pob', 'pb', 'scc', 'mne']))
def convert(self, alpha3, country=None, script=None):
alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
if (alpha3b, country) in self.to_opensubtitles:
return self.to_opensubtitles[(alpha3b, country)]
return alpha3b
def reverse(self, opensubtitles):
if opensubtitles in self.from_opensubtitles:
return self.from_opensubtitles[opensubtitles]
for conv in [self.alpha3b_converter, self.alpha2_converter]:
try:
return conv.reverse(opensubtitles)
except LanguageReverseError:
pass
raise LanguageReverseError(opensubtitles)
@@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageConverter
from ..exceptions import LanguageConvertError
from ..language import LANGUAGE_MATRIX
class ScopeConverter(LanguageConverter):
FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
SYMBOLS[iso_language.alpha3] = iso_language.scope
codes = set(SYMBOLS.values())
def convert(self, alpha3, country=None, script=None):
if self.SYMBOLS[alpha3] in self.FULLNAME:
return self.FULLNAME[self.SYMBOLS[alpha3]]
raise LanguageConvertError(alpha3, country, script)
@@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageConverter
from ..exceptions import LanguageConvertError
from ..language import LANGUAGE_MATRIX
class LanguageTypeConverter(LanguageConverter):
FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
SYMBOLS[iso_language.alpha3] = iso_language.type
codes = set(SYMBOLS.values())
def convert(self, alpha3, country=None, script=None):
if self.SYMBOLS[alpha3] in self.FULLNAME:
return self.FULLNAME[self.SYMBOLS[alpha3]]
raise LanguageConvertError(alpha3, country, script)
@@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from functools import partial
from pkg_resources import resource_stream # @UnresolvedImport
from .converters import ConverterManager
from . import basestr
COUNTRIES = {}
COUNTRY_MATRIX = []
#: The namedtuple used in the :data:`COUNTRY_MATRIX`
IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
f = resource_stream('babelfish', 'data/iso-3166-1.txt')
f.readline()
for l in f:
iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
COUNTRIES[iso_country.alpha2] = iso_country.name
COUNTRY_MATRIX.append(iso_country)
f.close()
class CountryConverterManager(ConverterManager):
""":class:`~babelfish.converters.ConverterManager` for country converters"""
entry_point = 'babelfish.country_converters'
internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
country_converters = CountryConverterManager()
class CountryMeta(type):
"""The :class:`Country` metaclass
Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
"""
def __getattr__(cls, name):
if name.startswith('from'):
return partial(cls.fromcode, converter=name[4:])
return type.__getattribute__(cls, name)
class Country(CountryMeta(str('CountryBase'), (object,), {})):
"""A country on Earth
A country is represented by a 2-letter code from the ISO-3166 standard
:param string country: 2-letter ISO-3166 country code
"""
def __init__(self, country):
if country not in COUNTRIES:
raise ValueError('%r is not a valid country' % country)
#: ISO-3166 2-letter country code
self.alpha2 = country
@classmethod
def fromcode(cls, code, converter):
"""Create a :class:`Country` by its `code` using `converter` to
:meth:`~babelfish.converters.CountryReverseConverter.reverse` it
:param string code: the code to reverse
:param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
:return: the corresponding :class:`Country` instance
:rtype: :class:`Country`
"""
return cls(country_converters[converter].reverse(code))
def __getstate__(self):
return self.alpha2
def __setstate__(self, state):
self.alpha2 = state
def __getattr__(self, name):
return country_converters[name].convert(self.alpha2)
def __hash__(self):
return hash(self.alpha2)
def __eq__(self, other):
if isinstance(other, basestr):
return str(self) == other
if not isinstance(other, Country):
return False
return self.alpha2 == other.alpha2
def __ne__(self, other):
return not self == other
def __repr__(self):
return '<Country [%s]>' % self
def __str__(self):
return self.alpha2
@@ -0,0 +1,45 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
import os.path
import tempfile
import zipfile
import requests
DATA_DIR = os.path.dirname(__file__)
# iso-3166-1.txt
print('Downloading ISO-3166-1 standard (ISO country codes)...')
with open(os.path.join(DATA_DIR, 'iso-3166-1.txt'), 'w') as f:
r = requests.get('http://www.iso.org/iso/home/standards/country_codes/country_names_and_code_elements_txt.htm')
f.write(r.content.strip())
# iso-639-3.tab
print('Downloading ISO-639-3 standard (ISO language codes)...')
with tempfile.TemporaryFile() as f:
r = requests.get('http://www-01.sil.org/iso639-3/iso-639-3_Code_Tables_20130531.zip')
f.write(r.content)
with zipfile.ZipFile(f) as z:
z.extract('iso-639-3.tab', DATA_DIR)
# iso-15924
print('Downloading ISO-15924 standard (ISO script codes)...')
with tempfile.TemporaryFile() as f:
r = requests.get('http://www.unicode.org/iso15924/iso15924.txt.zip')
f.write(r.content)
with zipfile.ZipFile(f) as z:
z.extract('iso15924-utf8-20131012.txt', DATA_DIR)
# opensubtitles supported languages
print('Downloading OpenSubtitles supported languages...')
with open(os.path.join(DATA_DIR, 'opensubtitles_languages.txt'), 'w') as f:
r = requests.get('http://www.opensubtitles.org/addons/export_languages.php')
f.write(r.content)
print('Done!')
@@ -0,0 +1,250 @@
Country Name;ISO 3166-1-alpha-2 code
AFGHANISTAN;AF
ÅLAND ISLANDS;AX
ALBANIA;AL
ALGERIA;DZ
AMERICAN SAMOA;AS
ANDORRA;AD
ANGOLA;AO
ANGUILLA;AI
ANTARCTICA;AQ
ANTIGUA AND BARBUDA;AG
ARGENTINA;AR
ARMENIA;AM
ARUBA;AW
AUSTRALIA;AU
AUSTRIA;AT
AZERBAIJAN;AZ
BAHAMAS;BS
BAHRAIN;BH
BANGLADESH;BD
BARBADOS;BB
BELARUS;BY
BELGIUM;BE
BELIZE;BZ
BENIN;BJ
BERMUDA;BM
BHUTAN;BT
BOLIVIA, PLURINATIONAL STATE OF;BO
BONAIRE, SINT EUSTATIUS AND SABA;BQ
BOSNIA AND HERZEGOVINA;BA
BOTSWANA;BW
BOUVET ISLAND;BV
BRAZIL;BR
BRITISH INDIAN OCEAN TERRITORY;IO
BRUNEI DARUSSALAM;BN
BULGARIA;BG
BURKINA FASO;BF
BURUNDI;BI
CAMBODIA;KH
CAMEROON;CM
CANADA;CA
CAPE VERDE;CV
CAYMAN ISLANDS;KY
CENTRAL AFRICAN REPUBLIC;CF
CHAD;TD
CHILE;CL
CHINA;CN
CHRISTMAS ISLAND;CX
COCOS (KEELING) ISLANDS;CC
COLOMBIA;CO
COMOROS;KM
CONGO;CG
CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
COOK ISLANDS;CK
COSTA RICA;CR
CÔTE D'IVOIRE;CI
CROATIA;HR
CUBA;CU
CURAÇAO;CW
CYPRUS;CY
CZECH REPUBLIC;CZ
DENMARK;DK
DJIBOUTI;DJ
DOMINICA;DM
DOMINICAN REPUBLIC;DO
ECUADOR;EC
EGYPT;EG
EL SALVADOR;SV
EQUATORIAL GUINEA;GQ
ERITREA;ER
ESTONIA;EE
ETHIOPIA;ET
FALKLAND ISLANDS (MALVINAS);FK
FAROE ISLANDS;FO
FIJI;FJ
FINLAND;FI
FRANCE;FR
FRENCH GUIANA;GF
FRENCH POLYNESIA;PF
FRENCH SOUTHERN TERRITORIES;TF
GABON;GA
GAMBIA;GM
GEORGIA;GE
GERMANY;DE
GHANA;GH
GIBRALTAR;GI
GREECE;GR
GREENLAND;GL
GRENADA;GD
GUADELOUPE;GP
GUAM;GU
GUATEMALA;GT
GUERNSEY;GG
GUINEA;GN
GUINEA-BISSAU;GW
GUYANA;GY
HAITI;HT
HEARD ISLAND AND MCDONALD ISLANDS;HM
HOLY SEE (VATICAN CITY STATE);VA
HONDURAS;HN
HONG KONG;HK
HUNGARY;HU
ICELAND;IS
INDIA;IN
INDONESIA;ID
IRAN, ISLAMIC REPUBLIC OF;IR
IRAQ;IQ
IRELAND;IE
ISLE OF MAN;IM
ISRAEL;IL
ITALY;IT
JAMAICA;JM
JAPAN;JP
JERSEY;JE
JORDAN;JO
KAZAKHSTAN;KZ
KENYA;KE
KIRIBATI;KI
KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
KOREA, REPUBLIC OF;KR
KUWAIT;KW
KYRGYZSTAN;KG
LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
LATVIA;LV
LEBANON;LB
LESOTHO;LS
LIBERIA;LR
LIBYA;LY
LIECHTENSTEIN;LI
LITHUANIA;LT
LUXEMBOURG;LU
MACAO;MO
MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
MADAGASCAR;MG
MALAWI;MW
MALAYSIA;MY
MALDIVES;MV
MALI;ML
MALTA;MT
MARSHALL ISLANDS;MH
MARTINIQUE;MQ
MAURITANIA;MR
MAURITIUS;MU
MAYOTTE;YT
MEXICO;MX
MICRONESIA, FEDERATED STATES OF;FM
MOLDOVA, REPUBLIC OF;MD
MONACO;MC
MONGOLIA;MN
MONTENEGRO;ME
MONTSERRAT;MS
MOROCCO;MA
MOZAMBIQUE;MZ
MYANMAR;MM
NAMIBIA;NA
NAURU;NR
NEPAL;NP
NETHERLANDS;NL
NEW CALEDONIA;NC
NEW ZEALAND;NZ
NICARAGUA;NI
NIGER;NE
NIGERIA;NG
NIUE;NU
NORFOLK ISLAND;NF
NORTHERN MARIANA ISLANDS;MP
NORWAY;NO
OMAN;OM
PAKISTAN;PK
PALAU;PW
PALESTINE, STATE OF;PS
PANAMA;PA
PAPUA NEW GUINEA;PG
PARAGUAY;PY
PERU;PE
PHILIPPINES;PH
PITCAIRN;PN
POLAND;PL
PORTUGAL;PT
PUERTO RICO;PR
QATAR;QA
RÉUNION;RE
ROMANIA;RO
RUSSIAN FEDERATION;RU
RWANDA;RW
SAINT BARTHÉLEMY;BL
SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
SAINT KITTS AND NEVIS;KN
SAINT LUCIA;LC
SAINT MARTIN (FRENCH PART);MF
SAINT PIERRE AND MIQUELON;PM
SAINT VINCENT AND THE GRENADINES;VC
SAMOA;WS
SAN MARINO;SM
SAO TOME AND PRINCIPE;ST
SAUDI ARABIA;SA
SENEGAL;SN
SERBIA;RS
SEYCHELLES;SC
SIERRA LEONE;SL
SINGAPORE;SG
SINT MAARTEN (DUTCH PART);SX
SLOVAKIA;SK
SLOVENIA;SI
SOLOMON ISLANDS;SB
SOMALIA;SO
SOUTH AFRICA;ZA
SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
SOUTH SUDAN;SS
SPAIN;ES
SRI LANKA;LK
SUDAN;SD
SURINAME;SR
SVALBARD AND JAN MAYEN;SJ
SWAZILAND;SZ
SWEDEN;SE
SWITZERLAND;CH
SYRIAN ARAB REPUBLIC;SY
TAIWAN, PROVINCE OF CHINA;TW
TAJIKISTAN;TJ
TANZANIA, UNITED REPUBLIC OF;TZ
THAILAND;TH
TIMOR-LESTE;TL
TOGO;TG
TOKELAU;TK
TONGA;TO
TRINIDAD AND TOBAGO;TT
TUNISIA;TN
TURKEY;TR
TURKMENISTAN;TM
TURKS AND CAICOS ISLANDS;TC
TUVALU;TV
UGANDA;UG
UKRAINE;UA
UNITED ARAB EMIRATES;AE
UNITED KINGDOM;GB
UNITED STATES;US
UNITED STATES MINOR OUTLYING ISLANDS;UM
URUGUAY;UY
UZBEKISTAN;UZ
VANUATU;VU
VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
VIET NAM;VN
VIRGIN ISLANDS, BRITISH;VG
VIRGIN ISLANDS, U.S.;VI
WALLIS AND FUTUNA;WF
WESTERN SAHARA;EH
YEMEN;YE
ZAMBIA;ZM
ZIMBABWE;ZW
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,176 @@
#
# ISO 15924 - Codes for the representation of names of scripts
# Codes pour la représentation des noms d’écritures
# Format:
# Code;N°;English Name;Nom français;PVA;Date
#
Afak;439;Afaka;afaka;;2010-12-21
Aghb;239;Caucasian Albanian;aghbanien;;2012-10-16
Ahom;338;Ahom, Tai Ahom;âhom;;2012-11-01
Arab;160;Arabic;arabe;Arabic;2004-05-01
Armi;124;Imperial Aramaic;araméen impérial;Imperial_Aramaic;2009-06-01
Armn;230;Armenian;arménien;Armenian;2004-05-01
Avst;134;Avestan;avestique;Avestan;2009-06-01
Bali;360;Balinese;balinais;Balinese;2006-10-10
Bamu;435;Bamum;bamoum;Bamum;2009-06-01
Bass;259;Bassa Vah;bassa;;2010-03-26
Batk;365;Batak;batik;Batak;2010-07-23
Beng;325;Bengali;bengalî;Bengali;2004-05-01
Blis;550;Blissymbols;symboles Bliss;;2004-05-01
Bopo;285;Bopomofo;bopomofo;Bopomofo;2004-05-01
Brah;300;Brahmi;brahma;Brahmi;2010-07-23
Brai;570;Braille;braille;Braille;2004-05-01
Bugi;367;Buginese;bouguis;Buginese;2006-06-21
Buhd;372;Buhid;bouhide;Buhid;2004-05-01
Cakm;349;Chakma;chakma;Chakma;2012-02-06
Cans;440;Unified Canadian Aboriginal Syllabics;syllabaire autochtone canadien unifié;Canadian_Aboriginal;2004-05-29
Cari;201;Carian;carien;Carian;2007-07-02
Cham;358;Cham;cham (čam, tcham);Cham;2009-11-11
Cher;445;Cherokee;tchérokî;Cherokee;2004-05-01
Cirt;291;Cirth;cirth;;2004-05-01
Copt;204;Coptic;copte;Coptic;2006-06-21
Cprt;403;Cypriot;syllabaire chypriote;Cypriot;2004-05-01
Cyrl;220;Cyrillic;cyrillique;Cyrillic;2004-05-01
Cyrs;221;Cyrillic (Old Church Slavonic variant);cyrillique (variante slavonne);;2004-05-01
Deva;315;Devanagari (Nagari);dévanâgarî;Devanagari;2004-05-01
Dsrt;250;Deseret (Mormon);déseret (mormon);Deseret;2004-05-01
Dupl;755;Duployan shorthand, Duployan stenography;sténographie Duployé;;2010-07-18
Egyd;070;Egyptian demotic;démotique égyptien;;2004-05-01
Egyh;060;Egyptian hieratic;hiératique égyptien;;2004-05-01
Egyp;050;Egyptian hieroglyphs;hiéroglyphes égyptiens;Egyptian_Hieroglyphs;2009-06-01
Elba;226;Elbasan;elbasan;;2010-07-18
Ethi;430;Ethiopic (Geʻez);éthiopien (geʻez, guèze);Ethiopic;2004-10-25
Geor;240;Georgian (Mkhedruli);géorgien (mkhédrouli);Georgian;2004-05-29
Geok;241;Khutsuri (Asomtavruli and Nuskhuri);khoutsouri (assomtavrouli et nouskhouri);Georgian;2012-10-16
Glag;225;Glagolitic;glagolitique;Glagolitic;2006-06-21
Goth;206;Gothic;gotique;Gothic;2004-05-01
Gran;343;Grantha;grantha;;2009-11-11
Grek;200;Greek;grec;Greek;2004-05-01
Gujr;320;Gujarati;goudjarâtî (gujrâtî);Gujarati;2004-05-01
Guru;310;Gurmukhi;gourmoukhî;Gurmukhi;2004-05-01
Hang;286;Hangul (Hangŭl, Hangeul);hangûl (hangŭl, hangeul);Hangul;2004-05-29
Hani;500;Han (Hanzi, Kanji, Hanja);idéogrammes han (sinogrammes);Han;2009-02-23
Hano;371;Hanunoo (Hanunóo);hanounóo;Hanunoo;2004-05-29
Hans;501;Han (Simplified variant);idéogrammes han (variante simplifiée);;2004-05-29
Hant;502;Han (Traditional variant);idéogrammes han (variante traditionnelle);;2004-05-29
Hatr;127;Hatran;hatrénien;;2012-11-01
Hebr;125;Hebrew;hébreu;Hebrew;2004-05-01
Hira;410;Hiragana;hiragana;Hiragana;2004-05-01
Hluw;080;Anatolian Hieroglyphs (Luwian Hieroglyphs, Hittite Hieroglyphs);hiéroglyphes anatoliens (hiéroglyphes louvites, hiéroglyphes hittites);;2011-12-09
Hmng;450;Pahawh Hmong;pahawh hmong;;2004-05-01
Hrkt;412;Japanese syllabaries (alias for Hiragana + Katakana);syllabaires japonais (alias pour hiragana + katakana);Katakana_Or_Hiragana;2011-06-21
Hung;176;Old Hungarian (Hungarian Runic);runes hongroises (ancien hongrois);;2012-10-16
Inds;610;Indus (Harappan);indus;;2004-05-01
Ital;210;Old Italic (Etruscan, Oscan, etc.);ancien italique (étrusque, osque, etc.);Old_Italic;2004-05-29
Java;361;Javanese;javanais;Javanese;2009-06-01
Jpan;413;Japanese (alias for Han + Hiragana + Katakana);japonais (alias pour han + hiragana + katakana);;2006-06-21
Jurc;510;Jurchen;jurchen;;2010-12-21
Kali;357;Kayah Li;kayah li;Kayah_Li;2007-07-02
Kana;411;Katakana;katakana;Katakana;2004-05-01
Khar;305;Kharoshthi;kharochthî;Kharoshthi;2006-06-21
Khmr;355;Khmer;khmer;Khmer;2004-05-29
Khoj;322;Khojki;khojkî;;2011-06-21
Knda;345;Kannada;kannara (canara);Kannada;2004-05-29
Kore;287;Korean (alias for Hangul + Han);coréen (alias pour hangûl + han);;2007-06-13
Kpel;436;Kpelle;kpèllé;;2010-03-26
Kthi;317;Kaithi;kaithî;Kaithi;2009-06-01
Lana;351;Tai Tham (Lanna);taï tham (lanna);Tai_Tham;2009-06-01
Laoo;356;Lao;laotien;Lao;2004-05-01
Latf;217;Latin (Fraktur variant);latin (variante brisée);;2004-05-01
Latg;216;Latin (Gaelic variant);latin (variante gaélique);;2004-05-01
Latn;215;Latin;latin;Latin;2004-05-01
Lepc;335;Lepcha (Róng);lepcha (róng);Lepcha;2007-07-02
Limb;336;Limbu;limbou;Limbu;2004-05-29
Lina;400;Linear A;linéaire A;;2004-05-01
Linb;401;Linear B;linéaire B;Linear_B;2004-05-29
Lisu;399;Lisu (Fraser);lisu (Fraser);Lisu;2009-06-01
Loma;437;Loma;loma;;2010-03-26
Lyci;202;Lycian;lycien;Lycian;2007-07-02
Lydi;116;Lydian;lydien;Lydian;2007-07-02
Mahj;314;Mahajani;mahâjanî;;2012-10-16
Mand;140;Mandaic, Mandaean;mandéen;Mandaic;2010-07-23
Mani;139;Manichaean;manichéen;;2007-07-15
Maya;090;Mayan hieroglyphs;hiéroglyphes mayas;;2004-05-01
Mend;438;Mende Kikakui;mendé kikakui;;2013-10-12
Merc;101;Meroitic Cursive;cursif méroïtique;Meroitic_Cursive;2012-02-06
Mero;100;Meroitic Hieroglyphs;hiéroglyphes méroïtiques;Meroitic_Hieroglyphs;2012-02-06
Mlym;347;Malayalam;malayâlam;Malayalam;2004-05-01
Modi;323;Modi, Moḍī;modî;;2013-10-12
Moon;218;Moon (Moon code, Moon script, Moon type);écriture Moon;;2006-12-11
Mong;145;Mongolian;mongol;Mongolian;2004-05-01
Mroo;199;Mro, Mru;mro;;2010-12-21
Mtei;337;Meitei Mayek (Meithei, Meetei);meitei mayek;Meetei_Mayek;2009-06-01
Mult;323; Multani;multanî;;2012-11-01
Mymr;350;Myanmar (Burmese);birman;Myanmar;2004-05-01
Narb;106;Old North Arabian (Ancient North Arabian);nord-arabique;;2010-03-26
Nbat;159;Nabataean;nabatéen;;2010-03-26
Nkgb;420;Nakhi Geba ('Na-'Khi ²Ggŏ-¹baw, Naxi Geba);nakhi géba;;2009-02-23
Nkoo;165;NKo;nko;Nko;2006-10-10
Nshu;499;Nüshu;nüshu;;2010-12-21
Ogam;212;Ogham;ogam;Ogham;2004-05-01
Olck;261;Ol Chiki (Ol Cemet, Ol, Santali);ol tchiki;Ol_Chiki;2007-07-02
Orkh;175;Old Turkic, Orkhon Runic;orkhon;Old_Turkic;2009-06-01
Orya;327;Oriya;oriyâ;Oriya;2004-05-01
Osma;260;Osmanya;osmanais;Osmanya;2004-05-01
Palm;126;Palmyrene;palmyrénien;;2010-03-26
Pauc;263;Pau Cin Hau;paou chin haou;;2013-10-12
Perm;227;Old Permic;ancien permien;;2004-05-01
Phag;331;Phags-pa;phags pa;Phags_Pa;2006-10-10
Phli;131;Inscriptional Pahlavi;pehlevi des inscriptions;Inscriptional_Pahlavi;2009-06-01
Phlp;132;Psalter Pahlavi;pehlevi des psautiers;;2007-11-26
Phlv;133;Book Pahlavi;pehlevi des livres;;2007-07-15
Phnx;115;Phoenician;phénicien;Phoenician;2006-10-10
Plrd;282;Miao (Pollard);miao (Pollard);Miao;2012-02-06
Prti;130;Inscriptional Parthian;parthe des inscriptions;Inscriptional_Parthian;2009-06-01
Qaaa;900;Reserved for private use (start);réservé à lusage privé (début);;2004-05-29
Qabx;949;Reserved for private use (end);réservé à lusage privé (fin);;2004-05-29
Rjng;363;Rejang (Redjang, Kaganga);redjang (kaganga);Rejang;2009-02-23
Roro;620;Rongorongo;rongorongo;;2004-05-01
Runr;211;Runic;runique;Runic;2004-05-01
Samr;123;Samaritan;samaritain;Samaritan;2009-06-01
Sara;292;Sarati;sarati;;2004-05-29
Sarb;105;Old South Arabian;sud-arabique, himyarite;Old_South_Arabian;2009-06-01
Saur;344;Saurashtra;saurachtra;Saurashtra;2007-07-02
Sgnw;095;SignWriting;SignÉcriture, SignWriting;;2006-10-10
Shaw;281;Shavian (Shaw);shavien (Shaw);Shavian;2004-05-01
Shrd;319;Sharada, Śāradā;charada, shard;Sharada;2012-02-06
Sidd;302;Siddham, Siddhaṃ, Siddhamātṛkā;siddham;;2013-10-12
Sind;318;Khudawadi, Sindhi;khoudawadî, sindhî;;2010-12-21
Sinh;348;Sinhala;singhalais;Sinhala;2004-05-01
Sora;398;Sora Sompeng;sora sompeng;Sora_Sompeng;2012-02-06
Sund;362;Sundanese;sundanais;Sundanese;2007-07-02
Sylo;316;Syloti Nagri;sylotî nâgrî;Syloti_Nagri;2006-06-21
Syrc;135;Syriac;syriaque;Syriac;2004-05-01
Syre;138;Syriac (Estrangelo variant);syriaque (variante estranghélo);;2004-05-01
Syrj;137;Syriac (Western variant);syriaque (variante occidentale);;2004-05-01
Syrn;136;Syriac (Eastern variant);syriaque (variante orientale);;2004-05-01
Tagb;373;Tagbanwa;tagbanoua;Tagbanwa;2004-05-01
Takr;321;Takri, Ṭākrī, Ṭāṅkrī;tâkrî;Takri;2012-02-06
Tale;353;Tai Le;taï-le;Tai_Le;2004-10-25
Talu;354;New Tai Lue;nouveau taï-lue;New_Tai_Lue;2006-06-21
Taml;346;Tamil;tamoul;Tamil;2004-05-01
Tang;520;Tangut;tangoute;;2010-12-21
Tavt;359;Tai Viet;taï viêt;Tai_Viet;2009-06-01
Telu;340;Telugu;télougou;Telugu;2004-05-01
Teng;290;Tengwar;tengwar;;2004-05-01
Tfng;120;Tifinagh (Berber);tifinagh (berbère);Tifinagh;2006-06-21
Tglg;370;Tagalog (Baybayin, Alibata);tagal (baybayin, alibata);Tagalog;2009-02-23
Thaa;170;Thaana;thâna;Thaana;2004-05-01
Thai;352;Thai;thaï;Thai;2004-05-01
Tibt;330;Tibetan;tibétain;Tibetan;2004-05-01
Tirh;326;Tirhuta;tirhouta;;2011-12-09
Ugar;040;Ugaritic;ougaritique;Ugaritic;2004-05-01
Vaii;470;Vai;vaï;Vai;2007-07-02
Visp;280;Visible Speech;parole visible;;2004-05-01
Wara;262;Warang Citi (Varang Kshiti);warang citi;;2009-11-11
Wole;480;Woleai;woléaï;;2010-12-21
Xpeo;030;Old Persian;cunéiforme persépolitain;Old_Persian;2006-06-21
Xsux;020;Cuneiform, Sumero-Akkadian;cunéiforme suméro-akkadien;Cuneiform;2006-10-10
Yiii;460;Yi;yi;Yi;2004-05-01
Zinh;994;Code for inherited script;codet pour écriture héritée;Inherited;2009-02-23
Zmth;995;Mathematical notation;notation mathématique;;2007-11-26
Zsym;996;Symbols;symboles;;2007-11-26
Zxxx;997;Code for unwritten documents;codet pour les documents non écrits;;2011-06-21
Zyyy;998;Code for undetermined script;codet pour écriture indéterminée;Common;2004-05-29
Zzzz;999;Code for uncoded script;codet pour écriture non codée;Unknown;2006-10-10
@@ -0,0 +1,474 @@
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
aar aa Afar, afar 0 0
abk ab Abkhazian 0 0
ace Achinese 0 0
ach Acoli 0 0
ada Adangme 0 0
ady adyghé 0 0
afa Afro-Asiatic (Other) 0 0
afh Afrihili 0 0
afr af Afrikaans 1 0
ain Ainu 0 0
aka ak Akan 0 0
akk Akkadian 0 0
alb sq Albanian 1 1
ale Aleut 0 0
alg Algonquian languages 0 0
alt Southern Altai 0 0
amh am Amharic 0 0
ang English, Old (ca.450-1100) 0 0
apa Apache languages 0 0
ara ar Arabic 1 1
arc Aramaic 0 0
arg an Aragonese 0 0
arm hy Armenian 1 0
arn Araucanian 0 0
arp Arapaho 0 0
art Artificial (Other) 0 0
arw Arawak 0 0
asm as Assamese 0 0
ast Asturian, Bable 0 0
ath Athapascan languages 0 0
aus Australian languages 0 0
ava av Avaric 0 0
ave ae Avestan 0 0
awa Awadhi 0 0
aym ay Aymara 0 0
aze az Azerbaijani 0 0
bad Banda 0 0
bai Bamileke languages 0 0
bak ba Bashkir 0 0
bal Baluchi 0 0
bam bm Bambara 0 0
ban Balinese 0 0
baq eu Basque 1 1
bas Basa 0 0
bat Baltic (Other) 0 0
bej Beja 0 0
bel be Belarusian 0 0
bem Bemba 0 0
ben bn Bengali 1 0
ber Berber (Other) 0 0
bho Bhojpuri 0 0
bih bh Bihari 0 0
bik Bikol 0 0
bin Bini 0 0
bis bi Bislama 0 0
bla Siksika 0 0
bnt Bantu (Other) 0 0
bos bs Bosnian 1 0
bra Braj 0 0
bre br Breton 1 0
btk Batak (Indonesia) 0 0
bua Buriat 0 0
bug Buginese 0 0
bul bg Bulgarian 1 1
bur my Burmese 1 0
byn Blin 0 0
cad Caddo 0 0
cai Central American Indian (Other) 0 0
car Carib 0 0
cat ca Catalan 1 1
cau Caucasian (Other) 0 0
ceb Cebuano 0 0
cel Celtic (Other) 0 0
cha ch Chamorro 0 0
chb Chibcha 0 0
che ce Chechen 0 0
chg Chagatai 0 0
chi zh Chinese 1 1
chk Chuukese 0 0
chm Mari 0 0
chn Chinook jargon 0 0
cho Choctaw 0 0
chp Chipewyan 0 0
chr Cherokee 0 0
chu cu Church Slavic 0 0
chv cv Chuvash 0 0
chy Cheyenne 0 0
cmc Chamic languages 0 0
cop Coptic 0 0
cor kw Cornish 0 0
cos co Corsican 0 0
cpe Creoles and pidgins, English based (Other) 0 0
cpf Creoles and pidgins, French-based (Other) 0 0
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
cre cr Cree 0 0
crh Crimean Tatar 0 0
crp Creoles and pidgins (Other) 0 0
csb Kashubian 0 0
cus Cushitic (Other)' couchitiques, autres langues 0 0
cze cs Czech 1 1
dak Dakota 0 0
dan da Danish 1 1
dar Dargwa 0 0
day Dayak 0 0
del Delaware 0 0
den Slave (Athapascan) 0 0
dgr Dogrib 0 0
din Dinka 0 0
div dv Divehi 0 0
doi Dogri 0 0
dra Dravidian (Other) 0 0
dua Duala 0 0
dum Dutch, Middle (ca.1050-1350) 0 0
dut nl Dutch 1 1
dyu Dyula 0 0
dzo dz Dzongkha 0 0
efi Efik 0 0
egy Egyptian (Ancient) 0 0
eka Ekajuk 0 0
elx Elamite 0 0
eng en English 1 1
enm English, Middle (1100-1500) 0 0
epo eo Esperanto 1 0
est et Estonian 1 1
ewe ee Ewe 0 0
ewo Ewondo 0 0
fan Fang 0 0
fao fo Faroese 0 0
fat Fanti 0 0
fij fj Fijian 0 0
fil Filipino 0 0
fin fi Finnish 1 1
fiu Finno-Ugrian (Other) 0 0
fon Fon 0 0
fre fr French 1 1
frm French, Middle (ca.1400-1600) 0 0
fro French, Old (842-ca.1400) 0 0
fry fy Frisian 0 0
ful ff Fulah 0 0
fur Friulian 0 0
gaa Ga 0 0
gay Gayo 0 0
gba Gbaya 0 0
gem Germanic (Other) 0 0
geo ka Georgian 1 1
ger de German 1 1
gez Geez 0 0
gil Gilbertese 0 0
gla gd Gaelic 0 0
gle ga Irish 0 0
glg gl Galician 1 1
glv gv Manx 0 0
gmh German, Middle High (ca.1050-1500) 0 0
goh German, Old High (ca.750-1050) 0 0
gon Gondi 0 0
gor Gorontalo 0 0
got Gothic 0 0
grb Grebo 0 0
grc Greek, Ancient (to 1453) 0 0
ell el Greek 1 1
grn gn Guarani 0 0
guj gu Gujarati 0 0
gwi Gwich´in 0 0
hai Haida 0 0
hat ht Haitian 0 0
hau ha Hausa 0 0
haw Hawaiian 0 0
heb he Hebrew 1 1
her hz Herero 0 0
hil Hiligaynon 0 0
him Himachali 0 0
hin hi Hindi 1 1
hit Hittite 0 0
hmn Hmong 0 0
hmo ho Hiri Motu 0 0
hrv hr Croatian 1 1
hun hu Hungarian 1 1
hup Hupa 0 0
iba Iban 0 0
ibo ig Igbo 0 0
ice is Icelandic 1 1
ido io Ido 0 0
iii ii Sichuan Yi 0 0
ijo Ijo 0 0
iku iu Inuktitut 0 0
ile ie Interlingue 0 0
ilo Iloko 0 0
ina ia Interlingua (International Auxiliary Language Asso 0 0
inc Indic (Other) 0 0
ind id Indonesian 1 1
ine Indo-European (Other) 0 0
inh Ingush 0 0
ipk ik Inupiaq 0 0
ira Iranian (Other) 0 0
iro Iroquoian languages 0 0
ita it Italian 1 1
jav jv Javanese 0 0
jpn ja Japanese 1 1
jpr Judeo-Persian 0 0
jrb Judeo-Arabic 0 0
kaa Kara-Kalpak 0 0
kab Kabyle 0 0
kac Kachin 0 0
kal kl Kalaallisut 0 0
kam Kamba 0 0
kan kn Kannada 0 0
kar Karen 0 0
kas ks Kashmiri 0 0
kau kr Kanuri 0 0
kaw Kawi 0 0
kaz kk Kazakh 1 0
kbd Kabardian 0 0
kha Khasi 0 0
khi Khoisan (Other) 0 0
khm km Khmer 1 1
kho Khotanese 0 0
kik ki Kikuyu 0 0
kin rw Kinyarwanda 0 0
kir ky Kirghiz 0 0
kmb Kimbundu 0 0
kok Konkani 0 0
kom kv Komi 0 0
kon kg Kongo 0 0
kor ko Korean 1 1
kos Kosraean 0 0
kpe Kpelle 0 0
krc Karachay-Balkar 0 0
kro Kru 0 0
kru Kurukh 0 0
kua kj Kuanyama 0 0
kum Kumyk 0 0
kur ku Kurdish 0 0
kut Kutenai 0 0
lad Ladino 0 0
lah Lahnda 0 0
lam Lamba 0 0
lao lo Lao 0 0
lat la Latin 0 0
lav lv Latvian 1 0
lez Lezghian 0 0
lim li Limburgan 0 0
lin ln Lingala 0 0
lit lt Lithuanian 1 0
lol Mongo 0 0
loz Lozi 0 0
ltz lb Luxembourgish 1 0
lua Luba-Lulua 0 0
lub lu Luba-Katanga 0 0
lug lg Ganda 0 0
lui Luiseno 0 0
lun Lunda 0 0
luo Luo (Kenya and Tanzania) 0 0
lus lushai 0 0
mac mk Macedonian 1 1
mad Madurese 0 0
mag Magahi 0 0
mah mh Marshallese 0 0
mai Maithili 0 0
mak Makasar 0 0
mal ml Malayalam 1 0
man Mandingo 0 0
mao mi Maori 0 0
map Austronesian (Other) 0 0
mar mr Marathi 0 0
mas Masai 0 0
may ms Malay 1 1
mdf Moksha 0 0
mdr Mandar 0 0
men Mende 0 0
mga Irish, Middle (900-1200) 0 0
mic Mi'kmaq 0 0
min Minangkabau 0 0
mis Miscellaneous languages 0 0
mkh Mon-Khmer (Other) 0 0
mlg mg Malagasy 0 0
mlt mt Maltese 0 0
mnc Manchu 0 0
mni Manipuri 0 0
mno Manobo languages 0 0
moh Mohawk 0 0
mol mo Moldavian 0 0
mon mn Mongolian 1 0
mos Mossi 0 0
mwl Mirandese 0 0
mul Multiple languages 0 0
mun Munda languages 0 0
mus Creek 0 0
mwr Marwari 0 0
myn Mayan languages 0 0
myv Erzya 0 0
nah Nahuatl 0 0
nai North American Indian 0 0
nap Neapolitan 0 0
nau na Nauru 0 0
nav nv Navajo 0 0
nbl nr Ndebele, South 0 0
nde nd Ndebele, North 0 0
ndo ng Ndonga 0 0
nds Low German 0 0
nep ne Nepali 0 0
new Nepal Bhasa 0 0
nia Nias 0 0
nic Niger-Kordofanian (Other) 0 0
niu Niuean 0 0
nno nn Norwegian Nynorsk 0 0
nob nb Norwegian Bokmal 0 0
nog Nogai 0 0
non Norse, Old 0 0
nor no Norwegian 1 1
nso Northern Sotho 0 0
nub Nubian languages 0 0
nwc Classical Newari 0 0
nya ny Chichewa 0 0
nym Nyamwezi 0 0
nyn Nyankole 0 0
nyo Nyoro 0 0
nzi Nzima 0 0
oci oc Occitan 1 1
oji oj Ojibwa 0 0
ori or Oriya 0 0
orm om Oromo 0 0
osa Osage 0 0
oss os Ossetian 0 0
ota Turkish, Ottoman (1500-1928) 0 0
oto Otomian languages 0 0
paa Papuan (Other) 0 0
pag Pangasinan 0 0
pal Pahlavi 0 0
pam Pampanga 0 0
pan pa Panjabi 0 0
pap Papiamento 0 0
pau Palauan 0 0
peo Persian, Old (ca.600-400 B.C.) 0 0
per fa Persian 1 1
phi Philippine (Other) 0 0
phn Phoenician 0 0
pli pi Pali 0 0
pol pl Polish 1 1
pon Pohnpeian 0 0
por pt Portuguese 1 1
pra Prakrit languages 0 0
pro Provençal, Old (to 1500) 0 0
pus ps Pushto 0 0
que qu Quechua 0 0
raj Rajasthani 0 0
rap Rapanui 0 0
rar Rarotongan 0 0
roa Romance (Other) 0 0
roh rm Raeto-Romance 0 0
rom Romany 0 0
run rn Rundi 0 0
rup Aromanian 0 0
rus ru Russian 1 1
sad Sandawe 0 0
sag sg Sango 0 0
sah Yakut 0 0
sai South American Indian (Other) 0 0
sal Salishan languages 0 0
sam Samaritan Aramaic 0 0
san sa Sanskrit 0 0
sas Sasak 0 0
sat Santali 0 0
scc sr Serbian 1 1
scn Sicilian 0 0
sco Scots 0 0
sel Selkup 0 0
sem Semitic (Other) 0 0
sga Irish, Old (to 900) 0 0
sgn Sign Languages 0 0
shn Shan 0 0
sid Sidamo 0 0
sin si Sinhalese 1 1
sio Siouan languages 0 0
sit Sino-Tibetan (Other) 0 0
sla Slavic (Other) 0 0
slo sk Slovak 1 1
slv sl Slovenian 1 1
sma Southern Sami 0 0
sme se Northern Sami 0 0
smi Sami languages (Other) 0 0
smj Lule Sami 0 0
smn Inari Sami 0 0
smo sm Samoan 0 0
sms Skolt Sami 0 0
sna sn Shona 0 0
snd sd Sindhi 0 0
snk Soninke 0 0
sog Sogdian 0 0
som so Somali 0 0
son Songhai 0 0
sot st Sotho, Southern 0 0
spa es Spanish 1 1
srd sc Sardinian 0 0
srr Serer 0 0
ssa Nilo-Saharan (Other) 0 0
ssw ss Swati 0 0
suk Sukuma 0 0
sun su Sundanese 0 0
sus Susu 0 0
sux Sumerian 0 0
swa sw Swahili 1 0
swe sv Swedish 1 1
syr Syriac 1 0
tah ty Tahitian 0 0
tai Tai (Other) 0 0
tam ta Tamil 1 0
tat tt Tatar 0 0
tel te Telugu 1 0
tem Timne 0 0
ter Tereno 0 0
tet Tetum 0 0
tgk tg Tajik 0 0
tgl tl Tagalog 1 1
tha th Thai 1 1
tib bo Tibetan 0 0
tig Tigre 0 0
tir ti Tigrinya 0 0
tiv Tiv 0 0
tkl Tokelau 0 0
tlh Klingon 0 0
tli Tlingit 0 0
tmh Tamashek 0 0
tog Tonga (Nyasa) 0 0
ton to Tonga (Tonga Islands) 0 0
tpi Tok Pisin 0 0
tsi Tsimshian 0 0
tsn tn Tswana 0 0
tso ts Tsonga 0 0
tuk tk Turkmen 0 0
tum Tumbuka 0 0
tup Tupi languages 0 0
tur tr Turkish 1 1
tut Altaic (Other) 0 0
tvl Tuvalu 0 0
twi tw Twi 0 0
tyv Tuvinian 0 0
udm Udmurt 0 0
uga Ugaritic 0 0
uig ug Uighur 0 0
ukr uk Ukrainian 1 1
umb Umbundu 0 0
und Undetermined 0 0
urd ur Urdu 1 0
uzb uz Uzbek 0 0
vai Vai 0 0
ven ve Venda 0 0
vie vi Vietnamese 1 1
vol vo Volapük 0 0
vot Votic 0 0
wak Wakashan languages 0 0
wal Walamo 0 0
war Waray 0 0
was Washo 0 0
wel cy Welsh 0 0
wen Sorbian languages 0 0
wln wa Walloon 0 0
wol wo Wolof 0 0
xal Kalmyk 0 0
xho xh Xhosa 0 0
yao Yao 0 0
yap Yapese 0 0
yid yi Yiddish 0 0
yor yo Yoruba 0 0
ypk Yupik languages 0 0
zap Zapotec 0 0
zen Zenaga 0 0
zha za Zhuang 0 0
znd Zande 0 0
zul zu Zulu 0 0
zun Zuni 0 0
rum ro Romanian 1 1
pob pb Brazilian 1 1
mne Montenegrin 1 0
@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
class Error(Exception):
"""Base class for all exceptions in babelfish"""
pass
class LanguageError(Error, AttributeError):
"""Base class for all language exceptions in babelfish"""
pass
class LanguageConvertError(LanguageError):
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
:param string alpha3: alpha3 code that failed conversion
:param country: country code that failed conversion, if any
:type country: string or None
:param script: script code that failed conversion, if any
:type script: string or None
"""
def __init__(self, alpha3, country=None, script=None):
self.alpha3 = alpha3
self.country = country
self.script = script
def __str__(self):
s = self.alpha3
if self.country is not None:
s += '-' + self.country
if self.script is not None:
s += '-' + self.script
return s
class LanguageReverseError(LanguageError):
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
:param string code: code that failed reverse conversion
"""
def __init__(self, code):
self.code = code
def __str__(self):
return repr(self.code)
class CountryError(Error, AttributeError):
"""Base class for all country exceptions in babelfish"""
pass
class CountryConvertError(CountryError):
"""Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
:param string alpha2: alpha2 code that failed conversion
"""
def __init__(self, alpha2):
self.alpha2 = alpha2
def __str__(self):
return self.alpha2
class CountryReverseError(CountryError):
"""Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
:param string code: code that failed reverse conversion
"""
def __init__(self, code):
self.code = code
def __str__(self):
return repr(self.code)
@@ -0,0 +1,185 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from functools import partial
from pkg_resources import resource_stream # @UnresolvedImport
from .converters import ConverterManager
from .country import Country
from .exceptions import LanguageConvertError
from .script import Script
from . import basestr
LANGUAGES = set()
LANGUAGE_MATRIX = []
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
f = resource_stream('babelfish', 'data/iso-639-3.tab')
f.readline()
for l in f:
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
LANGUAGES.add(iso_language.alpha3)
LANGUAGE_MATRIX.append(iso_language)
f.close()
class LanguageConverterManager(ConverterManager):
""":class:`~babelfish.converters.ConverterManager` for language converters"""
entry_point = 'babelfish.language_converters'
internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
'name = babelfish.converters.name:NameConverter',
'scope = babelfish.converters.scope:ScopeConverter',
'type = babelfish.converters.type:LanguageTypeConverter',
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
language_converters = LanguageConverterManager()
class LanguageMeta(type):
"""The :class:`Language` metaclass
Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
"""
def __getattr__(cls, name):
if name.startswith('from'):
return partial(cls.fromcode, converter=name[4:])
return type.__getattribute__(cls, name)
class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
"""A human language
A human language is composed of a language part following the ISO-639
standard and can be country-specific when a :class:`~babelfish.country.Country`
is specified.
The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
:param string language: the language as a 3-letter ISO-639-3 code
:param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
:type country: string or :class:`~babelfish.country.Country` or None
:param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
:type script: string or :class:`~babelfish.script.Script` or None
:param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
:type unknown: string or None
:raise: ValueError if the language could not be recognized and `unknown` is ``None``
"""
def __init__(self, language, country=None, script=None, unknown=None):
if unknown is not None and language not in LANGUAGES:
language = unknown
if language not in LANGUAGES:
raise ValueError('%r is not a valid language' % language)
self.alpha3 = language
self.country = None
if isinstance(country, Country):
self.country = country
elif country is None:
self.country = None
else:
self.country = Country(country)
self.script = None
if isinstance(script, Script):
self.script = script
elif script is None:
self.script = None
else:
self.script = Script(script)
@classmethod
def fromcode(cls, code, converter):
"""Create a :class:`Language` by its `code` using `converter` to
:meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
:param string code: the code to reverse
:param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
:return: the corresponding :class:`Language` instance
:rtype: :class:`Language`
"""
return cls(*language_converters[converter].reverse(code))
@classmethod
def fromietf(cls, ietf):
"""Create a :class:`Language` by from an IETF language code
:param string ietf: the ietf code
:return: the corresponding :class:`Language` instance
:rtype: :class:`Language`
"""
subtags = ietf.split('-')
language_subtag = subtags.pop(0).lower()
if len(language_subtag) == 2:
language = cls.fromalpha2(language_subtag)
else:
language = cls(language_subtag)
while subtags:
subtag = subtags.pop(0)
if len(subtag) == 2:
language.country = Country(subtag.upper())
else:
language.script = Script(subtag.capitalize())
if language.script is not None:
if subtags:
raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
break
return language
def __getstate__(self):
return self.alpha3, self.country, self.script
def __setstate__(self, state):
self.alpha3, self.country, self.script = state
def __getattr__(self, name):
alpha3 = self.alpha3
country = self.country.alpha2 if self.country is not None else None
script = self.script.code if self.script is not None else None
try:
return language_converters[name].convert(alpha3, country, script)
except KeyError:
raise AttributeError(name)
def __hash__(self):
return hash(str(self))
def __eq__(self, other):
if isinstance(other, basestr):
return str(self) == other
if not isinstance(other, Language):
return False
return (self.alpha3 == other.alpha3 and
self.country == other.country and
self.script == other.script)
def __ne__(self, other):
return not self == other
def __bool__(self):
return self.alpha3 != 'und'
__nonzero__ = __bool__
def __repr__(self):
return '<Language [%s]>' % self
def __str__(self):
try:
s = self.alpha2
except LanguageConvertError:
s = self.alpha3
if self.country is not None:
s += '-' + str(self.country)
if self.script is not None:
s += '-' + str(self.script)
return s
@@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from pkg_resources import resource_stream # @UnresolvedImport
from . import basestr
#: Script code to script name mapping
SCRIPTS = {}
#: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
SCRIPT_MATRIX = []
#: The namedtuple used in the :data:`SCRIPT_MATRIX`
IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
f = resource_stream('babelfish', 'data/iso15924-utf8-20131012.txt')
f.readline()
for l in f:
l = l.decode('utf-8').strip()
if not l or l.startswith('#'):
continue
script = IsoScript._make(l.split(';'))
SCRIPT_MATRIX.append(script)
SCRIPTS[script.code] = script.name
f.close()
class Script(object):
"""A human writing system
A script is represented by a 4-letter code from the ISO-15924 standard
:param string script: 4-letter ISO-15924 script code
"""
def __init__(self, script):
if script not in SCRIPTS:
raise ValueError('%r is not a valid script' % script)
#: ISO-15924 4-letter script code
self.code = script
@property
def name(self):
"""English name of the script"""
return SCRIPTS[self.code]
def __getstate__(self):
return self.code
def __setstate__(self, state):
self.code = state
def __hash__(self):
return hash(self.code)
def __eq__(self, other):
if isinstance(other, basestr):
return self.code == other
if not isinstance(other, Script):
return False
return self.code == other.code
def __ne__(self, other):
return not self == other
def __repr__(self):
return '<Script [%s]>' % self
def __str__(self):
return self.code
@@ -0,0 +1,368 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
import re
import sys
import pickle
from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
from pkg_resources import resource_stream # @UnresolvedImport
from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
if sys.version_info[:2] <= (2, 6):
_MAX_LENGTH = 80
def safe_repr(obj, short=False):
try:
result = repr(obj)
except Exception:
result = object.__repr__(obj)
if not short or len(result) < _MAX_LENGTH:
return result
return result[:_MAX_LENGTH] + ' [truncated]...'
class _AssertRaisesContext(object):
"""A context manager used to implement TestCase.assertRaises* methods."""
def __init__(self, expected, test_case, expected_regexp=None):
self.expected = expected
self.failureException = test_case.failureException
self.expected_regexp = expected_regexp
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, tb):
if exc_type is None:
try:
exc_name = self.expected.__name__
except AttributeError:
exc_name = str(self.expected)
raise self.failureException(
"{0} not raised".format(exc_name))
if not issubclass(exc_type, self.expected):
# let unexpected exceptions pass through
return False
self.exception = exc_value # store for later retrieval
if self.expected_regexp is None:
return True
expected_regexp = self.expected_regexp
if isinstance(expected_regexp, basestring):
expected_regexp = re.compile(expected_regexp)
if not expected_regexp.search(str(exc_value)):
raise self.failureException('"%s" does not match "%s"' %
(expected_regexp.pattern, str(exc_value)))
return True
class _Py26FixTestCase(object):
def assertIsNone(self, obj, msg=None):
"""Same as self.assertTrue(obj is None), with a nicer default message."""
if obj is not None:
standardMsg = '%s is not None' % (safe_repr(obj),)
self.fail(self._formatMessage(msg, standardMsg))
def assertIsNotNone(self, obj, msg=None):
"""Included for symmetry with assertIsNone."""
if obj is None:
standardMsg = 'unexpectedly None'
self.fail(self._formatMessage(msg, standardMsg))
def assertIn(self, member, container, msg=None):
"""Just like self.assertTrue(a in b), but with a nicer default message."""
if member not in container:
standardMsg = '%s not found in %s' % (safe_repr(member),
safe_repr(container))
self.fail(self._formatMessage(msg, standardMsg))
def assertNotIn(self, member, container, msg=None):
"""Just like self.assertTrue(a not in b), but with a nicer default message."""
if member in container:
standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
safe_repr(container))
self.fail(self._formatMessage(msg, standardMsg))
def assertIs(self, expr1, expr2, msg=None):
"""Just like self.assertTrue(a is b), but with a nicer default message."""
if expr1 is not expr2:
standardMsg = '%s is not %s' % (safe_repr(expr1),
safe_repr(expr2))
self.fail(self._formatMessage(msg, standardMsg))
def assertIsNot(self, expr1, expr2, msg=None):
"""Just like self.assertTrue(a is not b), but with a nicer default message."""
if expr1 is expr2:
standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
self.fail(self._formatMessage(msg, standardMsg))
else:
class _Py26FixTestCase(object):
pass
class TestScript(TestCase, _Py26FixTestCase):
def test_wrong_script(self):
self.assertRaises(ValueError, lambda: Script('Azer'))
def test_eq(self):
self.assertEqual(Script('Latn'), Script('Latn'))
def test_ne(self):
self.assertNotEqual(Script('Cyrl'), Script('Latn'))
def test_hash(self):
self.assertEqual(hash(Script('Hira')), hash('Hira'))
def test_pickle(self):
self.assertEqual(pickle.loads(pickle.dumps(Script('Latn'))), Script('Latn'))
class TestCountry(TestCase, _Py26FixTestCase):
def test_wrong_country(self):
self.assertRaises(ValueError, lambda: Country('ZZ'))
def test_eq(self):
self.assertEqual(Country('US'), Country('US'))
def test_ne(self):
self.assertNotEqual(Country('GB'), Country('US'))
self.assertIsNotNone(Country('US'))
def test_hash(self):
self.assertEqual(hash(Country('US')), hash('US'))
def test_pickle(self):
for country in [Country('GB'), Country('US')]:
self.assertEqual(pickle.loads(pickle.dumps(country)), country)
def test_converter_name(self):
self.assertEqual(Country('US').name, 'UNITED STATES')
self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
self.assertEqual(len(country_converters['name'].codes), 249)
class TestLanguage(TestCase, _Py26FixTestCase):
def test_languages(self):
self.assertEqual(len(LANGUAGES), 7874)
def test_wrong_language(self):
self.assertRaises(ValueError, lambda: Language('zzz'))
def test_unknown_language(self):
self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
def test_converter_alpha2(self):
self.assertEqual(Language('eng').alpha2, 'en')
self.assertEqual(Language.fromalpha2('en'), Language('eng'))
self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
self.assertEqual(len(language_converters['alpha2'].codes), 184)
def test_converter_alpha3b(self):
self.assertEqual(Language('fra').alpha3b, 'fre')
self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
self.assertEqual(len(language_converters['alpha3b'].codes), 418)
def test_converter_alpha3t(self):
self.assertEqual(Language('fra').alpha3t, 'fra')
self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
self.assertEqual(len(language_converters['alpha3t'].codes), 418)
def test_converter_name(self):
self.assertEqual(Language('eng').name, 'English')
self.assertEqual(Language.fromname('English'), Language('eng'))
self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
self.assertEqual(len(language_converters['name'].codes), 7874)
def test_converter_scope(self):
self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
self.assertEqual(Language('eng').scope, 'individual')
self.assertEqual(Language('und').scope, 'special')
def test_converter_type(self):
self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
self.assertEqual(Language('eng').type, 'living')
self.assertEqual(Language('und').type, 'special')
def test_converter_opensubtitles(self):
self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
# Montenegrin is not recognized as an ISO language (yet?) but for now it is
# unofficially accepted as Serbian from Montenegro
self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
self.assertEqual(len(language_converters['opensubtitles'].codes), 606)
# test with all the LANGUAGES from the opensubtitles api
# downloaded from: http://www.opensubtitles.org/addons/export_languages.php
f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
f.readline()
for l in f:
idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
if not int(upload_enabled) and not int(web_enabled):
# do not test LANGUAGES that are too esoteric / not widely available
continue
self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
if alpha2:
self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
f.close()
def test_fromietf_country_script(self):
language = Language.fromietf('fra-FR-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_country_no_script(self):
language = Language.fromietf('fra-FR')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertIsNone(language.script)
def test_fromietf_no_country_no_script(self):
language = Language.fromietf('fra-FR')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertIsNone(language.script)
def test_fromietf_no_country_script(self):
language = Language.fromietf('fra-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertIsNone(language.country)
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_alpha2_language(self):
language = Language.fromietf('fr-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertIsNone(language.country)
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_wrong_language(self):
self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
def test_fromietf_wrong_country(self):
self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
def test_fromietf_wrong_script(self):
self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
def test_eq(self):
self.assertEqual(Language('eng'), Language('eng'))
def test_ne(self):
self.assertNotEqual(Language('fra'), Language('eng'))
self.assertIsNotNone(Language('fra'))
def test_nonzero(self):
self.assertFalse(bool(Language('und')))
self.assertTrue(bool(Language('eng')))
def test_language_hasattr(self):
self.assertTrue(hasattr(Language('fra'), 'alpha3'))
self.assertTrue(hasattr(Language('fra'), 'alpha2'))
self.assertFalse(hasattr(Language('bej'), 'alpha2'))
def test_country(self):
self.assertEqual(Language('por', 'BR').country, Country('BR'))
self.assertEqual(Language('eng', Country('US')).country, Country('US'))
def test_eq_with_country(self):
self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
def test_ne_with_country(self):
self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
def test_script(self):
self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
def test_eq_with_script(self):
self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
def test_ne_with_script(self):
self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
def test_eq_with_country_and_script(self):
self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
def test_ne_with_country_and_script(self):
self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
def test_hash(self):
self.assertEqual(hash(Language('fra')), hash('fr'))
self.assertEqual(hash(Language('ace')), hash('ace'))
self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
def test_pickle(self):
for lang in [Language('fra'),
Language('eng', 'US'),
Language('srp', script='Latn'),
Language('eng', 'US', 'Latn')]:
self.assertEqual(pickle.loads(pickle.dumps(lang)), lang)
def test_str(self):
self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
def test_register_converter(self):
class TestConverter(LanguageReverseConverter):
def __init__(self):
self.to_test = {'fra': 'test1', 'eng': 'test2'}
self.from_test = {'test1': 'fra', 'test2': 'eng'}
def convert(self, alpha3, country=None, script=None):
if alpha3 not in self.to_test:
raise LanguageConvertError(alpha3, country, script)
return self.to_test[alpha3]
def reverse(self, test):
if test not in self.from_test:
raise LanguageReverseError(test)
return (self.from_test[test], None)
language = Language('fra')
self.assertFalse(hasattr(language, 'test'))
language_converters['test'] = TestConverter()
self.assertTrue(hasattr(language, 'test'))
self.assertIn('test', language_converters)
self.assertEqual(Language('fra').test, 'test1')
self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
del language_converters['test']
self.assertNotIn('test', language_converters)
self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
self.assertRaises(AttributeError, lambda: Language('fra').test)
def suite():
suite = TestSuite()
suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
return suite
if __name__ == '__main__':
TextTestRunner().run(suite())
+43
View File
@@ -0,0 +1,43 @@
Behold, mortal, the origins of Beautiful Soup...
================================================
Leonard Richardson is the primary programmer.
Aaron DeVore is awesome.
Mark Pilgrim provided the encoding detection code that forms the base
of UnicodeDammit.
Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
Soup 4 working under Python 3.
Simon Willison wrote soupselect, which was used to make Beautiful Soup
support CSS selectors.
Sam Ruby helped with a lot of edge cases.
Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his
work in solving the nestable tags conundrum.
An incomplete list of people have contributed patches to Beautiful
Soup:
Istvan Albert, Andrew Lin, Anthony Baxter, Andrew Boyko, Tony Chang,
Zephyr Fang, Fuzzy, Roman Gaufman, Yoni Gilad, Richie Hindle, Peteris
Krumins, Kent Johnson, Ben Last, Robert Leftwich, Staffan Malmgren,
Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon", Ed
Oskiewicz, Greg Phillips, Giles Radford, Arthur Rudolph, Marko
Samastur, Jouni Seppänen, Alexander Schmolck, Andy Theyers, Glyn
Webster, Paul Wright, Danny Yoo
An incomplete list of people who made suggestions or found bugs or
found ways to break Beautiful Soup:
Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel,
Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes,
Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams,
warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison,
Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed
Summers, Dennis Sutch, Chris Smith, Aaron Sweep^W Swartz, Stuart
Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de
Sousa Rocha, Yichun Wei, Per Vognsen
+26
View File
@@ -0,0 +1,26 @@
Beautiful Soup is made available under the MIT license:
Copyright (c) 2004-2012 Leonard Richardson
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE, DAMMIT.
Beautiful Soup incorporates code from the html5lib library, which is
also made available under the MIT license.
File diff suppressed because it is too large Load Diff
+63
View File
@@ -0,0 +1,63 @@
= Introduction =
>>> from bs4 import BeautifulSoup
>>> soup = BeautifulSoup("<p>Some<b>bad<i>HTML")
>>> print soup.prettify()
<html>
<body>
<p>
Some
<b>
bad
<i>
HTML
</i>
</b>
</p>
</body>
</html>
>>> soup.find(text="bad")
u'bad'
>>> soup.i
<i>HTML</i>
>>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml")
>>> print soup.prettify()
<?xml version="1.0" encoding="utf-8">
<tag1>
Some
<tag2 />
bad
<tag3>
XML
</tag3>
</tag1>
= Full documentation =
The bs4/doc/ directory contains full documentation in Sphinx
format. Run "make html" in that directory to create HTML
documentation.
= Running the unit tests =
Beautiful Soup supports unit test discovery from the project root directory:
$ nosetests
$ python -m unittest discover -s bs4 # Python 2.7 and up
If you checked out the source tree, you should see a script in the
home directory called test-all-versions. This script will run the unit
tests under Python 2.7, then create a temporary Python 3 conversion of
the source and run the unit tests again under Python 3.
= Links =
Homepage: http://www.crummy.com/software/BeautifulSoup/bs4/
Documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
http://readthedocs.org/docs/beautiful-soup-4/
Discussion group: http://groups.google.com/group/beautifulsoup/
Development: https://code.launchpad.net/beautifulsoup/
Bug tracker: https://bugs.launchpad.net/beautifulsoup/
+406
View File
@@ -0,0 +1,406 @@
"""Beautiful Soup
Elixir and Tonic
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
Beautiful Soup uses a pluggable XML or HTML parser to parse a
(possibly invalid) document into a tree representation. Beautiful Soup
provides provides methods and Pythonic idioms that make it easy to
navigate, search, and modify the parse tree.
Beautiful Soup works with Python 2.6 and up. It works better if lxml
and/or html5lib is installed.
For more than you ever wanted to know about Beautiful Soup, see the
documentation:
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.3.2"
__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson"
__license__ = "MIT"
__all__ = ['BeautifulSoup']
import os
import re
import warnings
from .builder import builder_registry, ParserRejectedMarkup
from .dammit import UnicodeDammit
from .element import (
CData,
Comment,
DEFAULT_OUTPUT_ENCODING,
Declaration,
Doctype,
NavigableString,
PageElement,
ProcessingInstruction,
ResultSet,
SoupStrainer,
Tag,
)
# The very first thing we do is give a useful error if someone is
# running this code under Python 3 without converting it.
syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
class BeautifulSoup(Tag):
"""
This class defines the basic interface called by the tree builders.
These methods will be called by the parser:
reset()
feed(markup)
The tree builder may call these methods from its feed() implementation:
handle_starttag(name, attrs) # See note about return value
handle_endtag(name)
handle_data(data) # Appends to the current data node
endData(containerClass=NavigableString) # Ends the current data node
No matter how complicated the underlying parser is, you should be
able to build a tree using 'start tag' events, 'end tag' events,
'data' events, and "done with data" events.
If you encounter an empty-element tag (aka a self-closing tag,
like HTML's <br> tag), call handle_starttag and then
handle_endtag.
"""
ROOT_TAG_NAME = u'[document]'
# If the end-user gives no indication which tree builder they
# want, look for one with these features.
DEFAULT_BUILDER_FEATURES = ['html', 'fast']
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, **kwargs):
"""The Soup object is initialized as the 'root tag', and the
provided markup (which can be a string or a file-like object)
is fed into the underlying parser."""
if 'convertEntities' in kwargs:
warnings.warn(
"BS4 does not respect the convertEntities argument to the "
"BeautifulSoup constructor. Entities are always converted "
"to Unicode characters.")
if 'markupMassage' in kwargs:
del kwargs['markupMassage']
warnings.warn(
"BS4 does not respect the markupMassage argument to the "
"BeautifulSoup constructor. The tree builder is responsible "
"for any necessary markup massage.")
if 'smartQuotesTo' in kwargs:
del kwargs['smartQuotesTo']
warnings.warn(
"BS4 does not respect the smartQuotesTo argument to the "
"BeautifulSoup constructor. Smart quotes are always converted "
"to Unicode characters.")
if 'selfClosingTags' in kwargs:
del kwargs['selfClosingTags']
warnings.warn(
"BS4 does not respect the selfClosingTags argument to the "
"BeautifulSoup constructor. The tree builder is responsible "
"for understanding self-closing tags.")
if 'isHTML' in kwargs:
del kwargs['isHTML']
warnings.warn(
"BS4 does not respect the isHTML argument to the "
"BeautifulSoup constructor. You can pass in features='html' "
"or features='xml' to get a builder capable of handling "
"one or the other.")
def deprecated_argument(old_name, new_name):
if old_name in kwargs:
warnings.warn(
'The "%s" argument to the BeautifulSoup constructor '
'has been renamed to "%s."' % (old_name, new_name))
value = kwargs[old_name]
del kwargs[old_name]
return value
return None
parse_only = parse_only or deprecated_argument(
"parseOnlyThese", "parse_only")
from_encoding = from_encoding or deprecated_argument(
"fromEncoding", "from_encoding")
if len(kwargs) > 0:
arg = kwargs.keys().pop()
raise TypeError(
"__init__() got an unexpected keyword argument '%s'" % arg)
if builder is None:
if isinstance(features, basestring):
features = [features]
if features is None or len(features) == 0:
features = self.DEFAULT_BUILDER_FEATURES
builder_class = builder_registry.lookup(*features)
if builder_class is None:
raise FeatureNotFound(
"Couldn't find a tree builder with the features you "
"requested: %s. Do you need to install a parser library?"
% ",".join(features))
builder = builder_class()
self.builder = builder
self.is_xml = builder.is_xml
self.builder.soup = self
self.parse_only = parse_only
if hasattr(markup, 'read'): # It's a file-type object.
markup = markup.read()
elif len(markup) <= 256:
# Print out warnings for a couple beginner problems
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
# just in case that's what the user really wants.
if (isinstance(markup, unicode)
and not os.path.supports_unicode_filenames):
possible_filename = markup.encode("utf8")
else:
possible_filename = markup
is_file = False
try:
is_file = os.path.exists(possible_filename)
except Exception, e:
# This is almost certainly a problem involving
# characters not valid in filenames on this
# system. Just let it go.
pass
if is_file:
warnings.warn(
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
if markup[:5] == "http:" or markup[:6] == "https:":
# TODO: This is ugly but I couldn't get it to work in
# Python 3 otherwise.
if ((isinstance(markup, bytes) and not b' ' in markup)
or (isinstance(markup, unicode) and not u' ' in markup)):
warnings.warn(
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
for (self.markup, self.original_encoding, self.declared_html_encoding,
self.contains_replacement_characters) in (
self.builder.prepare_markup(markup, from_encoding)):
self.reset()
try:
self._feed()
break
except ParserRejectedMarkup:
pass
# Clear out the markup and remove the builder's circular
# reference to this object.
self.markup = None
self.builder.soup = None
def _feed(self):
# Convert the document to Unicode.
self.builder.reset()
self.builder.feed(self.markup)
# Close out any unfinished strings and close all the open tags.
self.endData()
while self.currentTag.name != self.ROOT_TAG_NAME:
self.popTag()
def reset(self):
Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
self.hidden = 1
self.builder.reset()
self.current_data = []
self.currentTag = None
self.tagStack = []
self.preserve_whitespace_tag_stack = []
self.pushTag(self)
def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
"""Create a new tag associated with this soup."""
return Tag(None, self.builder, name, namespace, nsprefix, attrs)
def new_string(self, s, subclass=NavigableString):
"""Create a new NavigableString associated with this soup."""
navigable = subclass(s)
navigable.setup()
return navigable
def insert_before(self, successor):
raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
def insert_after(self, successor):
raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
def popTag(self):
tag = self.tagStack.pop()
if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
self.preserve_whitespace_tag_stack.pop()
#print "Pop", tag.name
if self.tagStack:
self.currentTag = self.tagStack[-1]
return self.currentTag
def pushTag(self, tag):
#print "Push", tag.name
if self.currentTag:
self.currentTag.contents.append(tag)
self.tagStack.append(tag)
self.currentTag = self.tagStack[-1]
if tag.name in self.builder.preserve_whitespace_tags:
self.preserve_whitespace_tag_stack.append(tag)
def endData(self, containerClass=NavigableString):
if self.current_data:
current_data = u''.join(self.current_data)
# If whitespace is not preserved, and this string contains
# nothing but ASCII spaces, replace it with a single space
# or newline.
if not self.preserve_whitespace_tag_stack:
strippable = True
for i in current_data:
if i not in self.ASCII_SPACES:
strippable = False
break
if strippable:
if '\n' in current_data:
current_data = '\n'
else:
current_data = ' '
# Reset the data collector.
self.current_data = []
# Should we add this string to the tree at all?
if self.parse_only and len(self.tagStack) <= 1 and \
(not self.parse_only.text or \
not self.parse_only.search(current_data)):
return
o = containerClass(current_data)
self.object_was_parsed(o)
def object_was_parsed(self, o, parent=None, most_recent_element=None):
"""Add an object to the parse tree."""
parent = parent or self.currentTag
most_recent_element = most_recent_element or self._most_recent_element
o.setup(parent, most_recent_element)
if most_recent_element is not None:
most_recent_element.next_element = o
self._most_recent_element = o
parent.contents.append(o)
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
"""Pops the tag stack up to and including the most recent
instance of the given tag. If inclusivePop is false, pops the tag
stack up to but *not* including the most recent instqance of
the given tag."""
#print "Popping to %s" % name
if name == self.ROOT_TAG_NAME:
# The BeautifulSoup object itself can never be popped.
return
most_recently_popped = None
stack_size = len(self.tagStack)
for i in range(stack_size - 1, 0, -1):
t = self.tagStack[i]
if (name == t.name and nsprefix == t.prefix):
if inclusivePop:
most_recently_popped = self.popTag()
break
most_recently_popped = self.popTag()
return most_recently_popped
def handle_starttag(self, name, namespace, nsprefix, attrs):
"""Push a start tag on to the stack.
If this method returns None, the tag was rejected by the
SoupStrainer. You should proceed as if the tag had not occured
in the document. For instance, if this was a self-closing tag,
don't call handle_endtag.
"""
# print "Start tag %s: %s" % (name, attrs)
self.endData()
if (self.parse_only and len(self.tagStack) <= 1
and (self.parse_only.text
or not self.parse_only.search_tag(name, attrs))):
return None
tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
self.currentTag, self._most_recent_element)
if tag is None:
return tag
if self._most_recent_element:
self._most_recent_element.next_element = tag
self._most_recent_element = tag
self.pushTag(tag)
return tag
def handle_endtag(self, name, nsprefix=None):
#print "End tag: " + name
self.endData()
self._popToTag(name, nsprefix)
def handle_data(self, data):
self.current_data.append(data)
def decode(self, pretty_print=False,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"):
"""Returns a string or Unicode representation of this document.
To get Unicode, pass None for encoding."""
if self.is_xml:
# Print the XML declaration
encoding_part = ''
if eventual_encoding != None:
encoding_part = ' encoding="%s"' % eventual_encoding
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
else:
prefix = u''
if not pretty_print:
indent_level = None
else:
indent_level = 0
return prefix + super(BeautifulSoup, self).decode(
indent_level, eventual_encoding, formatter)
# Alias to make it easier to type import: 'from bs4 import _soup'
_s = BeautifulSoup
_soup = BeautifulSoup
class BeautifulStoneSoup(BeautifulSoup):
"""Deprecated interface to an XML parser."""
def __init__(self, *args, **kwargs):
kwargs['features'] = 'xml'
warnings.warn(
'The BeautifulStoneSoup class is deprecated. Instead of using '
'it, pass features="xml" into the BeautifulSoup constructor.')
super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
class StopParsing(Exception):
pass
class FeatureNotFound(ValueError):
pass
#By default, act as an HTML pretty-printer.
if __name__ == '__main__':
import sys
soup = BeautifulSoup(sys.stdin)
print soup.prettify()
@@ -0,0 +1,321 @@
from collections import defaultdict
import itertools
import sys
from bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
whitespace_re
)
__all__ = [
'HTMLTreeBuilder',
'SAXTreeBuilder',
'TreeBuilder',
'TreeBuilderRegistry',
]
# Some useful features for a TreeBuilder to have.
FAST = 'fast'
PERMISSIVE = 'permissive'
STRICT = 'strict'
XML = 'xml'
HTML = 'html'
HTML_5 = 'html5'
class TreeBuilderRegistry(object):
def __init__(self):
self.builders_for_feature = defaultdict(list)
self.builders = []
def register(self, treebuilder_class):
"""Register a treebuilder based on its advertised features."""
for feature in treebuilder_class.features:
self.builders_for_feature[feature].insert(0, treebuilder_class)
self.builders.insert(0, treebuilder_class)
def lookup(self, *features):
if len(self.builders) == 0:
# There are no builders at all.
return None
if len(features) == 0:
# They didn't ask for any features. Give them the most
# recently registered builder.
return self.builders[0]
# Go down the list of features in order, and eliminate any builders
# that don't match every feature.
features = list(features)
features.reverse()
candidates = None
candidate_set = None
while len(features) > 0:
feature = features.pop()
we_have_the_feature = self.builders_for_feature.get(feature, [])
if len(we_have_the_feature) > 0:
if candidates is None:
candidates = we_have_the_feature
candidate_set = set(candidates)
else:
# Eliminate any candidates that don't have this feature.
candidate_set = candidate_set.intersection(
set(we_have_the_feature))
# The only valid candidates are the ones in candidate_set.
# Go through the original list of candidates and pick the first one
# that's in candidate_set.
if candidate_set is None:
return None
for candidate in candidates:
if candidate in candidate_set:
return candidate
return None
# The BeautifulSoup class will take feature lists from developers and use them
# to look up builders in this registry.
builder_registry = TreeBuilderRegistry()
class TreeBuilder(object):
"""Turn a document into a Beautiful Soup object tree."""
features = []
is_xml = False
preserve_whitespace_tags = set()
empty_element_tags = None # A tag will be considered an empty-element
# tag when and only when it has no contents.
# A value for these tag/attribute combinations is a space- or
# comma-separated list of CDATA, rather than a single CDATA.
cdata_list_attributes = {}
def __init__(self):
self.soup = None
def reset(self):
pass
def can_be_empty_element(self, tag_name):
"""Might a tag with this name be an empty-element tag?
The final markup may or may not actually present this tag as
self-closing.
For instance: an HTMLBuilder does not consider a <p> tag to be
an empty-element tag (it's not in
HTMLBuilder.empty_element_tags). This means an empty <p> tag
will be presented as "<p></p>", not "<p />".
The default implementation has no opinion about which tags are
empty-element tags, so a tag will be presented as an
empty-element tag if and only if it has no contents.
"<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
be left alone.
"""
if self.empty_element_tags is None:
return True
return tag_name in self.empty_element_tags
def feed(self, markup):
raise NotImplementedError()
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
return markup, None, None, False
def test_fragment_to_document(self, fragment):
"""Wrap an HTML fragment to make it look like a document.
Different parsers do this differently. For instance, lxml
introduces an empty <head> tag, and html5lib
doesn't. Abstracting this away lets us write simple tests
which run HTML fragments through the parser and compare the
results against other HTML fragments.
This method should not be used outside of tests.
"""
return fragment
def set_up_substitutions(self, tag):
return False
def _replace_cdata_list_attribute_values(self, tag_name, attrs):
"""Replaces class="foo bar" with class=["foo", "bar"]
Modifies its input in place.
"""
if not attrs:
return attrs
if self.cdata_list_attributes:
universal = self.cdata_list_attributes.get('*', [])
tag_specific = self.cdata_list_attributes.get(
tag_name.lower(), None)
for attr in attrs.keys():
if attr in universal or (tag_specific and attr in tag_specific):
# We have a "class"-type attribute whose string
# value is a whitespace-separated list of
# values. Split it into a list.
value = attrs[attr]
if isinstance(value, basestring):
values = whitespace_re.split(value)
else:
# html5lib sometimes calls setAttributes twice
# for the same tag when rearranging the parse
# tree. On the second call the attribute value
# here is already a list. If this happens,
# leave the value alone rather than trying to
# split it again.
values = value
attrs[attr] = values
return attrs
class SAXTreeBuilder(TreeBuilder):
"""A Beautiful Soup treebuilder that listens for SAX events."""
def feed(self, markup):
raise NotImplementedError()
def close(self):
pass
def startElement(self, name, attrs):
attrs = dict((key[1], value) for key, value in list(attrs.items()))
#print "Start %s, %r" % (name, attrs)
self.soup.handle_starttag(name, attrs)
def endElement(self, name):
#print "End %s" % name
self.soup.handle_endtag(name)
def startElementNS(self, nsTuple, nodeName, attrs):
# Throw away (ns, nodeName) for now.
self.startElement(nodeName, attrs)
def endElementNS(self, nsTuple, nodeName):
# Throw away (ns, nodeName) for now.
self.endElement(nodeName)
#handler.endElementNS((ns, node.nodeName), node.nodeName)
def startPrefixMapping(self, prefix, nodeValue):
# Ignore the prefix for now.
pass
def endPrefixMapping(self, prefix):
# Ignore the prefix for now.
# handler.endPrefixMapping(prefix)
pass
def characters(self, content):
self.soup.handle_data(content)
def startDocument(self):
pass
def endDocument(self):
pass
class HTMLTreeBuilder(TreeBuilder):
"""This TreeBuilder knows facts about HTML.
Such as which tags are empty-element tags.
"""
preserve_whitespace_tags = set(['pre', 'textarea'])
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
'spacer', 'link', 'frame', 'base'])
# The HTML standard defines these attributes as containing a
# space-separated list of values, not a single value. That is,
# class="foo bar" means that the 'class' attribute has two values,
# 'foo' and 'bar', not the single value 'foo bar'. When we
# encounter one of these attributes, we will parse its value into
# a list of values if possible. Upon output, the list will be
# converted back into a string.
cdata_list_attributes = {
"*" : ['class', 'accesskey', 'dropzone'],
"a" : ['rel', 'rev'],
"link" : ['rel', 'rev'],
"td" : ["headers"],
"th" : ["headers"],
"td" : ["headers"],
"form" : ["accept-charset"],
"object" : ["archive"],
# These are HTML5 specific, as are *.accesskey and *.dropzone above.
"area" : ["rel"],
"icon" : ["sizes"],
"iframe" : ["sandbox"],
"output" : ["for"],
}
def set_up_substitutions(self, tag):
# We are only interested in <meta> tags
if tag.name != 'meta':
return False
http_equiv = tag.get('http-equiv')
content = tag.get('content')
charset = tag.get('charset')
# We are interested in <meta> tags that say what encoding the
# document was originally in. This means HTML 5-style <meta>
# tags that provide the "charset" attribute. It also means
# HTML 4-style <meta> tags that provide the "content"
# attribute and have "http-equiv" set to "content-type".
#
# In both cases we will replace the value of the appropriate
# attribute with a standin object that can take on any
# encoding.
meta_encoding = None
if charset is not None:
# HTML 5 style:
# <meta charset="utf8">
meta_encoding = charset
tag['charset'] = CharsetMetaAttributeValue(charset)
elif (content is not None and http_equiv is not None
and http_equiv.lower() == 'content-type'):
# HTML 4 style:
# <meta http-equiv="content-type" content="text/html; charset=utf8">
tag['content'] = ContentMetaAttributeValue(content)
return (meta_encoding is not None)
def register_treebuilders_from(module):
"""Copy TreeBuilders from the given module into this module."""
# I'm fairly sure this is not the best way to do this.
this_module = sys.modules['bs4.builder']
for name in module.__all__:
obj = getattr(module, name)
if issubclass(obj, TreeBuilder):
setattr(this_module, name, obj)
this_module.__all__.append(name)
# Register the builder while we're at it.
this_module.builder_registry.register(obj)
class ParserRejectedMarkup(Exception):
pass
# Builders are registered in reverse order of priority, so that custom
# builder registrations will take precedence. In general, we want lxml
# to take precedence over html5lib, because it's faster. And we only
# want to use HTMLParser as a last result.
from . import _htmlparser
register_treebuilders_from(_htmlparser)
try:
from . import _html5lib
register_treebuilders_from(_html5lib)
except ImportError:
# They don't have html5lib installed.
pass
try:
from . import _lxml
register_treebuilders_from(_lxml)
except ImportError:
# They don't have lxml installed.
pass
@@ -0,0 +1,285 @@
__all__ = [
'HTML5TreeBuilder',
]
import warnings
from bs4.builder import (
PERMISSIVE,
HTML,
HTML_5,
HTMLTreeBuilder,
)
from bs4.element import NamespacedAttribute
import html5lib
from html5lib.constants import namespaces
from bs4.element import (
Comment,
Doctype,
NavigableString,
Tag,
)
class HTML5TreeBuilder(HTMLTreeBuilder):
"""Use html5lib to build a tree."""
features = ['html5lib', PERMISSIVE, HTML_5, HTML]
def prepare_markup(self, markup, user_specified_encoding):
# Store the user-specified encoding for use later on.
self.user_specified_encoding = user_specified_encoding
yield (markup, None, None, False)
# These methods are defined by Beautiful Soup.
def feed(self, markup):
if self.soup.parse_only is not None:
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
doc = parser.parse(markup, encoding=self.user_specified_encoding)
# Set the character encoding detected by the tokenizer.
if isinstance(markup, unicode):
# We need to special-case this because html5lib sets
# charEncoding to UTF-8 if it gets Unicode input.
doc.original_encoding = None
else:
doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
def create_treebuilder(self, namespaceHTMLElements):
self.underlying_builder = TreeBuilderForHtml5lib(
self.soup, namespaceHTMLElements)
return self.underlying_builder
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<html><head></head><body>%s</body></html>' % fragment
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
def __init__(self, soup, namespaceHTMLElements):
self.soup = soup
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
def documentClass(self):
self.soup.reset()
return Element(self.soup, self.soup, None)
def insertDoctype(self, token):
name = token["name"]
publicId = token["publicId"]
systemId = token["systemId"]
doctype = Doctype.for_name_and_ids(name, publicId, systemId)
self.soup.object_was_parsed(doctype)
def elementClass(self, name, namespace):
tag = self.soup.new_tag(name, namespace)
return Element(tag, self.soup, namespace)
def commentClass(self, data):
return TextNode(Comment(data), self.soup)
def fragmentClass(self):
self.soup = BeautifulSoup("")
self.soup.name = "[document_fragment]"
return Element(self.soup, self.soup, None)
def appendChild(self, node):
# XXX This code is not covered by the BS4 tests.
self.soup.append(node.element)
def getDocument(self):
return self.soup
def getFragment(self):
return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
class AttrList(object):
def __init__(self, element):
self.element = element
self.attrs = dict(self.element.attrs)
def __iter__(self):
return list(self.attrs.items()).__iter__()
def __setitem__(self, name, value):
"set attr", name, value
self.element[name] = value
def items(self):
return list(self.attrs.items())
def keys(self):
return list(self.attrs.keys())
def __len__(self):
return len(self.attrs)
def __getitem__(self, name):
return self.attrs[name]
def __contains__(self, name):
return name in list(self.attrs.keys())
class Element(html5lib.treebuilders._base.Node):
def __init__(self, element, soup, namespace):
html5lib.treebuilders._base.Node.__init__(self, element.name)
self.element = element
self.soup = soup
self.namespace = namespace
def appendChild(self, node):
string_child = child = None
if isinstance(node, basestring):
# Some other piece of code decided to pass in a string
# instead of creating a TextElement object to contain the
# string.
string_child = child = node
elif isinstance(node, Tag):
# Some other piece of code decided to pass in a Tag
# instead of creating an Element object to contain the
# Tag.
child = node
elif node.element.__class__ == NavigableString:
string_child = child = node.element
else:
child = node.element
if not isinstance(child, basestring) and child.parent is not None:
node.element.extract()
if (string_child and self.element.contents
and self.element.contents[-1].__class__ == NavigableString):
# We are appending a string onto another string.
# TODO This has O(n^2) performance, for input like
# "a</a>a</a>a</a>..."
old_element = self.element.contents[-1]
new_element = self.soup.new_string(old_element + string_child)
old_element.replace_with(new_element)
self.soup._most_recent_element = new_element
else:
if isinstance(node, basestring):
# Create a brand new NavigableString from this string.
child = self.soup.new_string(node)
# Tell Beautiful Soup to act as if it parsed this element
# immediately after the parent's last descendant. (Or
# immediately after the parent, if it has no children.)
if self.element.contents:
most_recent_element = self.element._last_descendant(False)
else:
most_recent_element = self.element
self.soup.object_was_parsed(
child, parent=self.element,
most_recent_element=most_recent_element)
def getAttributes(self):
return AttrList(self.element)
def setAttributes(self, attributes):
if attributes is not None and len(attributes) > 0:
converted_attributes = []
for name, value in list(attributes.items()):
if isinstance(name, tuple):
new_name = NamespacedAttribute(*name)
del attributes[name]
attributes[new_name] = value
self.soup.builder._replace_cdata_list_attribute_values(
self.name, attributes)
for name, value in attributes.items():
self.element[name] = value
# The attributes may contain variables that need substitution.
# Call set_up_substitutions manually.
#
# The Tag constructor called this method when the Tag was created,
# but we just set/changed the attributes, so call it again.
self.soup.builder.set_up_substitutions(self.element)
attributes = property(getAttributes, setAttributes)
def insertText(self, data, insertBefore=None):
if insertBefore:
text = TextNode(self.soup.new_string(data), self.soup)
self.insertBefore(data, insertBefore)
else:
self.appendChild(data)
def insertBefore(self, node, refNode):
index = self.element.index(refNode.element)
if (node.element.__class__ == NavigableString and self.element.contents
and self.element.contents[index-1].__class__ == NavigableString):
# (See comments in appendChild)
old_node = self.element.contents[index-1]
new_str = self.soup.new_string(old_node + node.element)
old_node.replace_with(new_str)
else:
self.element.insert(index, node.element)
node.parent = self
def removeChild(self, node):
node.element.extract()
def reparentChildren(self, new_parent):
"""Move all of this tag's children into another tag."""
element = self.element
new_parent_element = new_parent.element
# Determine what this tag's next_element will be once all the children
# are removed.
final_next_element = element.next_sibling
new_parents_last_descendant = new_parent_element._last_descendant(False, False)
if len(new_parent_element.contents) > 0:
# The new parent already contains children. We will be
# appending this tag's children to the end.
new_parents_last_child = new_parent_element.contents[-1]
new_parents_last_descendant_next_element = new_parents_last_descendant.next_element
else:
# The new parent contains no children.
new_parents_last_child = None
new_parents_last_descendant_next_element = new_parent_element.next_element
to_append = element.contents
append_after = new_parent.element.contents
if len(to_append) > 0:
# Set the first child's previous_element and previous_sibling
# to elements within the new parent
first_child = to_append[0]
first_child.previous_element = new_parents_last_descendant
first_child.previous_sibling = new_parents_last_child
# Fix the last child's next_element and next_sibling
last_child = to_append[-1]
last_child.next_element = new_parents_last_descendant_next_element
last_child.next_sibling = None
for child in to_append:
child.parent = new_parent_element
new_parent_element.contents.append(child)
# Now that this element has no children, change its .next_element.
element.contents = []
element.next_element = final_next_element
def cloneNode(self):
tag = self.soup.new_tag(self.element.name, self.namespace)
node = Element(tag, self.soup, self.namespace)
for key,value in self.attributes:
node.attributes[key] = value
return node
def hasContent(self):
return self.element.contents
def getNameTuple(self):
if self.namespace == None:
return namespaces["html"], self.name
else:
return self.namespace, self.name
nameTuple = property(getNameTuple)
class TextNode(Element):
def __init__(self, element, soup):
html5lib.treebuilders._base.Node.__init__(self, None)
self.element = element
self.soup = soup
def cloneNode(self):
raise NotImplementedError
@@ -0,0 +1,258 @@
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
__all__ = [
'HTMLParserTreeBuilder',
]
from HTMLParser import (
HTMLParser,
HTMLParseError,
)
import sys
import warnings
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
# argument, which we'd like to set to False. Unfortunately,
# http://bugs.python.org/issue13273 makes strict=True a better bet
# before Python 3.2.3.
#
# At the end of this file, we monkeypatch HTMLParser so that
# strict=True works well on Python 3.2.2.
major, minor, release = sys.version_info[:3]
CONSTRUCTOR_TAKES_STRICT = (
major > 3
or (major == 3 and minor > 2)
or (major == 3 and minor == 2 and release >= 3))
from bs4.element import (
CData,
Comment,
Declaration,
Doctype,
ProcessingInstruction,
)
from bs4.dammit import EntitySubstitution, UnicodeDammit
from bs4.builder import (
HTML,
HTMLTreeBuilder,
STRICT,
)
HTMLPARSER = 'html.parser'
class BeautifulSoupHTMLParser(HTMLParser):
def handle_starttag(self, name, attrs):
# XXX namespace
attr_dict = {}
for key, value in attrs:
# Change None attribute values to the empty string
# for consistency with the other tree builders.
if value is None:
value = ''
attr_dict[key] = value
attrvalue = '""'
self.soup.handle_starttag(name, None, None, attr_dict)
def handle_endtag(self, name):
self.soup.handle_endtag(name)
def handle_data(self, data):
self.soup.handle_data(data)
def handle_charref(self, name):
# XXX workaround for a bug in HTMLParser. Remove this once
# it's fixed.
if name.startswith('x'):
real_name = int(name.lstrip('x'), 16)
elif name.startswith('X'):
real_name = int(name.lstrip('X'), 16)
else:
real_name = int(name)
try:
data = unichr(real_name)
except (ValueError, OverflowError), e:
data = u"\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
def handle_entityref(self, name):
character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
if character is not None:
data = character
else:
data = "&%s;" % name
self.handle_data(data)
def handle_comment(self, data):
self.soup.endData()
self.soup.handle_data(data)
self.soup.endData(Comment)
def handle_decl(self, data):
self.soup.endData()
if data.startswith("DOCTYPE "):
data = data[len("DOCTYPE "):]
elif data == 'DOCTYPE':
# i.e. "<!DOCTYPE>"
data = ''
self.soup.handle_data(data)
self.soup.endData(Doctype)
def unknown_decl(self, data):
if data.upper().startswith('CDATA['):
cls = CData
data = data[len('CDATA['):]
else:
cls = Declaration
self.soup.endData()
self.soup.handle_data(data)
self.soup.endData(cls)
def handle_pi(self, data):
self.soup.endData()
if data.endswith("?") and data.lower().startswith("xml"):
# "An XHTML processing instruction using the trailing '?'
# will cause the '?' to be included in data." - HTMLParser
# docs.
#
# Strip the question mark so we don't end up with two
# question marks.
data = data[:-1]
self.soup.handle_data(data)
self.soup.endData(ProcessingInstruction)
class HTMLParserTreeBuilder(HTMLTreeBuilder):
is_xml = False
features = [HTML, STRICT, HTMLPARSER]
def __init__(self, *args, **kwargs):
if CONSTRUCTOR_TAKES_STRICT:
kwargs['strict'] = False
self.parser_args = (args, kwargs)
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
"""
:return: A 4-tuple (markup, original encoding, encoding
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, unicode):
yield (markup, None, None, False)
return
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
yield (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
def feed(self, markup):
args, kwargs = self.parser_args
parser = BeautifulSoupHTMLParser(*args, **kwargs)
parser.soup = self.soup
try:
parser.feed(markup)
except HTMLParseError, e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
# string.
#
# XXX This code can be removed once most Python 3 users are on 3.2.3.
if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
import re
attrfind_tolerant = re.compile(
r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
(?:\s+ # whitespace before attribute name
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
(?:\s*=\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|\"[^\"]*\" # LIT-enclosed value
|[^'\">\s]+ # bare value
)
)?
)
)*
\s* # trailing whitespace
""", re.VERBOSE)
BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
from html.parser import tagfind, attrfind
def parse_starttag(self, i):
self.__starttag_text = None
endpos = self.check_for_whole_start_tag(i)
if endpos < 0:
return endpos
rawdata = self.rawdata
self.__starttag_text = rawdata[i:endpos]
# Now parse the data between i+1 and j into a tag and attrs
attrs = []
match = tagfind.match(rawdata, i+1)
assert match, 'unexpected call to parse_starttag()'
k = match.end()
self.lasttag = tag = rawdata[i+1:k].lower()
while k < endpos:
if self.strict:
m = attrfind.match(rawdata, k)
else:
m = attrfind_tolerant.match(rawdata, k)
if not m:
break
attrname, rest, attrvalue = m.group(1, 2, 3)
if not rest:
attrvalue = None
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
attrvalue[:1] == '"' == attrvalue[-1:]:
attrvalue = attrvalue[1:-1]
if attrvalue:
attrvalue = self.unescape(attrvalue)
attrs.append((attrname.lower(), attrvalue))
k = m.end()
end = rawdata[k:endpos].strip()
if end not in (">", "/>"):
lineno, offset = self.getpos()
if "\n" in self.__starttag_text:
lineno = lineno + self.__starttag_text.count("\n")
offset = len(self.__starttag_text) \
- self.__starttag_text.rfind("\n")
else:
offset = offset + len(self.__starttag_text)
if self.strict:
self.error("junk characters in start tag: %r"
% (rawdata[k:endpos][:20],))
self.handle_data(rawdata[i:endpos])
return endpos
if end.endswith('/>'):
# XHTML-style empty tag: <span attr="value" />
self.handle_startendtag(tag, attrs)
else:
self.handle_starttag(tag, attrs)
if tag in self.CDATA_CONTENT_ELEMENTS:
self.set_cdata_mode(tag)
return endpos
def set_cdata_mode(self, elem):
self.cdata_elem = elem.lower()
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
BeautifulSoupHTMLParser.parse_starttag = parse_starttag
BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
CONSTRUCTOR_TAKES_STRICT = True
@@ -0,0 +1,233 @@
__all__ = [
'LXMLTreeBuilderForXML',
'LXMLTreeBuilder',
]
from io import BytesIO
from StringIO import StringIO
import collections
from lxml import etree
from bs4.element import Comment, Doctype, NamespacedAttribute
from bs4.builder import (
FAST,
HTML,
HTMLTreeBuilder,
PERMISSIVE,
ParserRejectedMarkup,
TreeBuilder,
XML)
from bs4.dammit import EncodingDetector
LXML = 'lxml'
class LXMLTreeBuilderForXML(TreeBuilder):
DEFAULT_PARSER_CLASS = etree.XMLParser
is_xml = True
# Well, it's permissive by XML parser standards.
features = [LXML, XML, FAST, PERMISSIVE]
CHUNK_SIZE = 512
# This namespace mapping is specified in the XML Namespace
# standard.
DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
def default_parser(self, encoding):
# This can either return a parser object or a class, which
# will be instantiated with default arguments.
if self._default_parser is not None:
return self._default_parser
return etree.XMLParser(
target=self, strip_cdata=False, recover=True, encoding=encoding)
def parser_for(self, encoding):
# Use the default parser.
parser = self.default_parser(encoding)
if isinstance(parser, collections.Callable):
# Instantiate the parser with default arguments
parser = parser(target=self, strip_cdata=False, encoding=encoding)
return parser
def __init__(self, parser=None, empty_element_tags=None):
# TODO: Issue a warning if parser is present but not a
# callable, since that means there's no way to create new
# parsers for different encodings.
self._default_parser = parser
if empty_element_tags is not None:
self.empty_element_tags = set(empty_element_tags)
self.soup = None
self.nsmaps = [self.DEFAULT_NSMAPS]
def _getNsTag(self, tag):
# Split the namespace URL out of a fully-qualified lxml tag
# name. Copied from lxml's src/lxml/sax.py.
if tag[0] == '{':
return tuple(tag[1:].split('}', 1))
else:
return (None, tag)
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
"""
:yield: A series of 4-tuples.
(markup, encoding, declared encoding,
has undergone character replacement)
Each 4-tuple represents a strategy for parsing the document.
"""
if isinstance(markup, unicode):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
yield markup, None, document_declared_encoding, False
if isinstance(markup, unicode):
# No, apparently not. Convert the Unicode to UTF-8 and
# tell lxml to parse it as UTF-8.
yield (markup.encode("utf8"), "utf8",
document_declared_encoding, False)
# Instead of using UnicodeDammit to convert the bytestring to
# Unicode using different encodings, use EncodingDetector to
# iterate over the encodings, and tell lxml to try to parse
# the document as each one in turn.
is_html = not self.is_xml
try_encodings = [user_specified_encoding, document_declared_encoding]
detector = EncodingDetector(markup, try_encodings, is_html)
for encoding in detector.encodings:
yield (detector.markup, encoding, document_declared_encoding, False)
def feed(self, markup):
if isinstance(markup, bytes):
markup = BytesIO(markup)
elif isinstance(markup, unicode):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
# or the parser won't be initialized.
data = markup.read(self.CHUNK_SIZE)
try:
self.parser = self.parser_for(self.soup.original_encoding)
self.parser.feed(data)
while len(data) != 0:
# Now call feed() on the rest of the data, chunk by chunk.
data = markup.read(self.CHUNK_SIZE)
if len(data) != 0:
self.parser.feed(data)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
raise ParserRejectedMarkup(str(e))
def close(self):
self.nsmaps = [self.DEFAULT_NSMAPS]
def start(self, name, attrs, nsmap={}):
# Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
attrs = dict(attrs)
nsprefix = None
# Invert each namespace map as it comes in.
if len(self.nsmaps) > 1:
# There are no new namespaces for this tag, but
# non-default namespaces are in play, so we need a
# separate tag stack to know when they end.
self.nsmaps.append(None)
elif len(nsmap) > 0:
# A new namespace mapping has come into play.
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
self.nsmaps.append(inverted_nsmap)
# Also treat the namespace mapping as a set of attributes on the
# tag, so we can recreate it later.
attrs = attrs.copy()
for prefix, namespace in nsmap.items():
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
# Namespaces are in play. Find any attributes that came in
# from lxml with namespaces attached to their names, and
# turn then into NamespacedAttribute objects.
new_attrs = {}
for attr, value in attrs.items():
namespace, attr = self._getNsTag(attr)
if namespace is None:
new_attrs[attr] = value
else:
nsprefix = self._prefix_for_namespace(namespace)
attr = NamespacedAttribute(nsprefix, attr, namespace)
new_attrs[attr] = value
attrs = new_attrs
namespace, name = self._getNsTag(name)
nsprefix = self._prefix_for_namespace(namespace)
self.soup.handle_starttag(name, namespace, nsprefix, attrs)
def _prefix_for_namespace(self, namespace):
"""Find the currently active prefix for the given namespace."""
if namespace is None:
return None
for inverted_nsmap in reversed(self.nsmaps):
if inverted_nsmap is not None and namespace in inverted_nsmap:
return inverted_nsmap[namespace]
return None
def end(self, name):
self.soup.endData()
completed_tag = self.soup.tagStack[-1]
namespace, name = self._getNsTag(name)
nsprefix = None
if namespace is not None:
for inverted_nsmap in reversed(self.nsmaps):
if inverted_nsmap is not None and namespace in inverted_nsmap:
nsprefix = inverted_nsmap[namespace]
break
self.soup.handle_endtag(name, nsprefix)
if len(self.nsmaps) > 1:
# This tag, or one of its parents, introduced a namespace
# mapping, so pop it off the stack.
self.nsmaps.pop()
def pi(self, target, data):
pass
def data(self, content):
self.soup.handle_data(content)
def doctype(self, name, pubid, system):
self.soup.endData()
doctype = Doctype.for_name_and_ids(name, pubid, system)
self.soup.object_was_parsed(doctype)
def comment(self, content):
"Handle comments as Comment objects."
self.soup.endData()
self.soup.handle_data(content)
self.soup.endData(Comment)
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
features = [LXML, HTML, FAST, PERMISSIVE]
is_xml = False
def default_parser(self, encoding):
return etree.HTMLParser
def feed(self, markup):
encoding = self.soup.original_encoding
try:
self.parser = self.parser_for(encoding)
self.parser.feed(markup)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
raise ParserRejectedMarkup(str(e))
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<html><body>%s</body></html>' % fragment
+829
View File
@@ -0,0 +1,829 @@
# -*- coding: utf-8 -*-
"""Beautiful Soup bonus library: Unicode, Dammit
This library converts a bytestream to Unicode through any means
necessary. It is heavily based on code from Mark Pilgrim's Universal
Feed Parser. It works best on XML and XML, but it does not rewrite the
XML or HTML to reflect a new encoding; that's the tree builder's job.
"""
import codecs
from htmlentitydefs import codepoint2name
import re
import logging
import string
# Import a library to autodetect character encodings.
chardet_type = None
try:
# First try the fast C implementation.
# PyPI package: cchardet
import cchardet
def chardet_dammit(s):
return cchardet.detect(s)['encoding']
except ImportError:
try:
# Fall back to the pure Python implementation
# Debian package: python-chardet
# PyPI package: chardet
import chardet
def chardet_dammit(s):
return chardet.detect(s)['encoding']
#import chardet.constants
#chardet.constants._debug = 1
except ImportError:
# No chardet available.
def chardet_dammit(s):
return None
# Available from http://cjkpython.i18n.org/.
try:
import iconv_codec
except ImportError:
pass
xml_encoding_re = re.compile(
'^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
html_meta_re = re.compile(
'<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
class EntitySubstitution(object):
"""Substitute XML or HTML entities for the corresponding characters."""
def _populate_class_variables():
lookup = {}
reverse_lookup = {}
characters_for_re = []
for codepoint, name in list(codepoint2name.items()):
character = unichr(codepoint)
if codepoint != 34:
# There's no point in turning the quotation mark into
# &quot;, unless it happens within an attribute value, which
# is handled elsewhere.
characters_for_re.append(character)
lookup[character] = name
# But we do want to turn &quot; into the quotation mark.
reverse_lookup[name] = character
re_definition = "[%s]" % "".join(characters_for_re)
return lookup, reverse_lookup, re.compile(re_definition)
(CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
CHARACTER_TO_XML_ENTITY = {
"'": "apos",
'"': "quot",
"&": "amp",
"<": "lt",
">": "gt",
}
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
")")
AMPERSAND_OR_BRACKET = re.compile("([<>&])")
@classmethod
def _substitute_html_entity(cls, matchobj):
entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
return "&%s;" % entity
@classmethod
def _substitute_xml_entity(cls, matchobj):
"""Used with a regular expression to substitute the
appropriate XML entity for an XML special character."""
entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
return "&%s;" % entity
@classmethod
def quoted_attribute_value(self, value):
"""Make a value into a quoted XML attribute, possibly escaping it.
Most strings will be quoted using double quotes.
Bob's Bar -> "Bob's Bar"
If a string contains double quotes, it will be quoted using
single quotes.
Welcome to "my bar" -> 'Welcome to "my bar"'
If a string contains both single and double quotes, the
double quotes will be escaped, and the string will be quoted
using double quotes.
Welcome to "Bob's Bar" -> "Welcome to &quot;Bob's bar&quot;
"""
quote_with = '"'
if '"' in value:
if "'" in value:
# The string contains both single and double
# quotes. Turn the double quotes into
# entities. We quote the double quotes rather than
# the single quotes because the entity name is
# "&quot;" whether this is HTML or XML. If we
# quoted the single quotes, we'd have to decide
# between &apos; and &squot;.
replace_with = "&quot;"
value = value.replace('"', replace_with)
else:
# There are double quotes but no single quotes.
# We can use single quotes to quote the attribute.
quote_with = "'"
return quote_with + value + quote_with
@classmethod
def substitute_xml(cls, value, make_quoted_attribute=False):
"""Substitute XML entities for special XML characters.
:param value: A string to be substituted. The less-than sign
will become &lt;, the greater-than sign will become &gt;,
and any ampersands will become &amp;. If you want ampersands
that appear to be part of an entity definition to be left
alone, use substitute_xml_containing_entities() instead.
:param make_quoted_attribute: If True, then the string will be
quoted, as befits an attribute value.
"""
# Escape angle brackets and ampersands.
value = cls.AMPERSAND_OR_BRACKET.sub(
cls._substitute_xml_entity, value)
if make_quoted_attribute:
value = cls.quoted_attribute_value(value)
return value
@classmethod
def substitute_xml_containing_entities(
cls, value, make_quoted_attribute=False):
"""Substitute XML entities for special XML characters.
:param value: A string to be substituted. The less-than sign will
become &lt;, the greater-than sign will become &gt;, and any
ampersands that are not part of an entity defition will
become &amp;.
:param make_quoted_attribute: If True, then the string will be
quoted, as befits an attribute value.
"""
# Escape angle brackets, and ampersands that aren't part of
# entities.
value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
cls._substitute_xml_entity, value)
if make_quoted_attribute:
value = cls.quoted_attribute_value(value)
return value
@classmethod
def substitute_html(cls, s):
"""Replace certain Unicode characters with named HTML entities.
This differs from data.encode(encoding, 'xmlcharrefreplace')
in that the goal is to make the result more readable (to those
with ASCII displays) rather than to recover from
errors. There's absolutely nothing wrong with a UTF-8 string
containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
character with "&eacute;" will make it more readable to some
people.
"""
return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
cls._substitute_html_entity, s)
class EncodingDetector:
"""Suggests a number of possible encodings for a bytestring.
Order of precedence:
1. Encodings you specifically tell EncodingDetector to try first
(the override_encodings argument to the constructor).
2. An encoding declared within the bytestring itself, either in an
XML declaration (if the bytestring is to be interpreted as an XML
document), or in a <meta> tag (if the bytestring is to be
interpreted as an HTML document.)
3. An encoding detected through textual analysis by chardet,
cchardet, or a similar external library.
4. UTF-8.
5. Windows-1252.
"""
def __init__(self, markup, override_encodings=None, is_html=False):
self.override_encodings = override_encodings or []
self.chardet_encoding = None
self.is_html = is_html
self.declared_encoding = None
# First order of business: strip a byte-order mark.
self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup)
def _usable(self, encoding, tried):
if encoding is not None:
encoding = encoding.lower()
if encoding not in tried:
tried.add(encoding)
return True
return False
@property
def encodings(self):
"""Yield a number of encodings that might work for this markup."""
tried = set()
for e in self.override_encodings:
if self._usable(e, tried):
yield e
# Did the document originally start with a byte-order mark
# that indicated its encoding?
if self._usable(self.sniffed_encoding, tried):
yield self.sniffed_encoding
# Look within the document for an XML or HTML encoding
# declaration.
if self.declared_encoding is None:
self.declared_encoding = self.find_declared_encoding(
self.markup, self.is_html)
if self._usable(self.declared_encoding, tried):
yield self.declared_encoding
# Use third-party character set detection to guess at the
# encoding.
if self.chardet_encoding is None:
self.chardet_encoding = chardet_dammit(self.markup)
if self._usable(self.chardet_encoding, tried):
yield self.chardet_encoding
# As a last-ditch effort, try utf-8 and windows-1252.
for e in ('utf-8', 'windows-1252'):
if self._usable(e, tried):
yield e
@classmethod
def strip_byte_order_mark(cls, data):
"""If a byte-order mark is present, strip it and return the encoding it implies."""
encoding = None
if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
and (data[2:4] != '\x00\x00'):
encoding = 'utf-16be'
data = data[2:]
elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \
and (data[2:4] != '\x00\x00'):
encoding = 'utf-16le'
data = data[2:]
elif data[:3] == b'\xef\xbb\xbf':
encoding = 'utf-8'
data = data[3:]
elif data[:4] == b'\x00\x00\xfe\xff':
encoding = 'utf-32be'
data = data[4:]
elif data[:4] == b'\xff\xfe\x00\x00':
encoding = 'utf-32le'
data = data[4:]
return data, encoding
@classmethod
def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False):
"""Given a document, tries to find its declared encoding.
An XML encoding is declared at the beginning of the document.
An HTML encoding is declared in a <meta> tag, hopefully near the
beginning of the document.
"""
if search_entire_document:
xml_endpos = html_endpos = len(markup)
else:
xml_endpos = 1024
html_endpos = max(2048, int(len(markup) * 0.05))
declared_encoding = None
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
if not declared_encoding_match and is_html:
declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
if declared_encoding_match is not None:
declared_encoding = declared_encoding_match.groups()[0].decode(
'ascii')
if declared_encoding:
return declared_encoding.lower()
return None
class UnicodeDammit:
"""A class for detecting the encoding of a *ML document and
converting it to a Unicode string. If the source encoding is
windows-1252, can replace MS smart quotes with their HTML or XML
equivalents."""
# This dictionary maps commonly seen values for "charset" in HTML
# meta tags to the corresponding Python codec names. It only covers
# values that aren't in Python's aliases and can't be determined
# by the heuristics in find_codec.
CHARSET_ALIASES = {"macintosh": "mac-roman",
"x-sjis": "shift-jis"}
ENCODINGS_WITH_SMART_QUOTES = [
"windows-1252",
"iso-8859-1",
"iso-8859-2",
]
def __init__(self, markup, override_encodings=[],
smart_quotes_to=None, is_html=False):
self.smart_quotes_to = smart_quotes_to
self.tried_encodings = []
self.contains_replacement_characters = False
self.is_html = is_html
self.detector = EncodingDetector(markup, override_encodings, is_html)
# Short-circuit if the data is in Unicode to begin with.
if isinstance(markup, unicode) or markup == '':
self.markup = markup
self.unicode_markup = unicode(markup)
self.original_encoding = None
return
# The encoding detector may have stripped a byte-order mark.
# Use the stripped markup from this point on.
self.markup = self.detector.markup
u = None
for encoding in self.detector.encodings:
markup = self.detector.markup
u = self._convert_from(encoding)
if u is not None:
break
if not u:
# None of the encodings worked. As an absolute last resort,
# try them again with character replacement.
for encoding in self.detector.encodings:
if encoding != "ascii":
u = self._convert_from(encoding, "replace")
if u is not None:
logging.warning(
"Some characters could not be decoded, and were "
"replaced with REPLACEMENT CHARACTER.")
self.contains_replacement_characters = True
break
# If none of that worked, we could at this point force it to
# ASCII, but that would destroy so much data that I think
# giving up is better.
self.unicode_markup = u
if not u:
self.original_encoding = None
def _sub_ms_char(self, match):
"""Changes a MS smart quote character to an XML or HTML
entity, or an ASCII character."""
orig = match.group(1)
if self.smart_quotes_to == 'ascii':
sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
else:
sub = self.MS_CHARS.get(orig)
if type(sub) == tuple:
if self.smart_quotes_to == 'xml':
sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
else:
sub = '&'.encode() + sub[0].encode() + ';'.encode()
else:
sub = sub.encode()
return sub
def _convert_from(self, proposed, errors="strict"):
proposed = self.find_codec(proposed)
if not proposed or (proposed, errors) in self.tried_encodings:
return None
self.tried_encodings.append((proposed, errors))
markup = self.markup
# Convert smart quotes to HTML if coming from an encoding
# that might have them.
if (self.smart_quotes_to is not None
and proposed in self.ENCODINGS_WITH_SMART_QUOTES):
smart_quotes_re = b"([\x80-\x9f])"
smart_quotes_compiled = re.compile(smart_quotes_re)
markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
try:
#print "Trying to convert document to %s (errors=%s)" % (
# proposed, errors)
u = self._to_unicode(markup, proposed, errors)
self.markup = u
self.original_encoding = proposed
except Exception as e:
#print "That didn't work!"
#print e
return None
#print "Correct encoding: %s" % proposed
return self.markup
def _to_unicode(self, data, encoding, errors="strict"):
'''Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases'''
return unicode(data, encoding, errors)
@property
def declared_html_encoding(self):
if not self.is_html:
return None
return self.detector.declared_encoding
def find_codec(self, charset):
value = (self._codec(self.CHARSET_ALIASES.get(charset, charset))
or (charset and self._codec(charset.replace("-", "")))
or (charset and self._codec(charset.replace("-", "_")))
or (charset and charset.lower())
or charset
)
if value:
return value.lower()
return None
def _codec(self, charset):
if not charset:
return charset
codec = None
try:
codecs.lookup(charset)
codec = charset
except (LookupError, ValueError):
pass
return codec
# A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
MS_CHARS = {b'\x80': ('euro', '20AC'),
b'\x81': ' ',
b'\x82': ('sbquo', '201A'),
b'\x83': ('fnof', '192'),
b'\x84': ('bdquo', '201E'),
b'\x85': ('hellip', '2026'),
b'\x86': ('dagger', '2020'),
b'\x87': ('Dagger', '2021'),
b'\x88': ('circ', '2C6'),
b'\x89': ('permil', '2030'),
b'\x8A': ('Scaron', '160'),
b'\x8B': ('lsaquo', '2039'),
b'\x8C': ('OElig', '152'),
b'\x8D': '?',
b'\x8E': ('#x17D', '17D'),
b'\x8F': '?',
b'\x90': '?',
b'\x91': ('lsquo', '2018'),
b'\x92': ('rsquo', '2019'),
b'\x93': ('ldquo', '201C'),
b'\x94': ('rdquo', '201D'),
b'\x95': ('bull', '2022'),
b'\x96': ('ndash', '2013'),
b'\x97': ('mdash', '2014'),
b'\x98': ('tilde', '2DC'),
b'\x99': ('trade', '2122'),
b'\x9a': ('scaron', '161'),
b'\x9b': ('rsaquo', '203A'),
b'\x9c': ('oelig', '153'),
b'\x9d': '?',
b'\x9e': ('#x17E', '17E'),
b'\x9f': ('Yuml', ''),}
# A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
# horrors like stripping diacritical marks to turn á into a, but also
# contains non-horrors like turning “ into ".
MS_CHARS_TO_ASCII = {
b'\x80' : 'EUR',
b'\x81' : ' ',
b'\x82' : ',',
b'\x83' : 'f',
b'\x84' : ',,',
b'\x85' : '...',
b'\x86' : '+',
b'\x87' : '++',
b'\x88' : '^',
b'\x89' : '%',
b'\x8a' : 'S',
b'\x8b' : '<',
b'\x8c' : 'OE',
b'\x8d' : '?',
b'\x8e' : 'Z',
b'\x8f' : '?',
b'\x90' : '?',
b'\x91' : "'",
b'\x92' : "'",
b'\x93' : '"',
b'\x94' : '"',
b'\x95' : '*',
b'\x96' : '-',
b'\x97' : '--',
b'\x98' : '~',
b'\x99' : '(TM)',
b'\x9a' : 's',
b'\x9b' : '>',
b'\x9c' : 'oe',
b'\x9d' : '?',
b'\x9e' : 'z',
b'\x9f' : 'Y',
b'\xa0' : ' ',
b'\xa1' : '!',
b'\xa2' : 'c',
b'\xa3' : 'GBP',
b'\xa4' : '$', #This approximation is especially parochial--this is the
#generic currency symbol.
b'\xa5' : 'YEN',
b'\xa6' : '|',
b'\xa7' : 'S',
b'\xa8' : '..',
b'\xa9' : '',
b'\xaa' : '(th)',
b'\xab' : '<<',
b'\xac' : '!',
b'\xad' : ' ',
b'\xae' : '(R)',
b'\xaf' : '-',
b'\xb0' : 'o',
b'\xb1' : '+-',
b'\xb2' : '2',
b'\xb3' : '3',
b'\xb4' : ("'", 'acute'),
b'\xb5' : 'u',
b'\xb6' : 'P',
b'\xb7' : '*',
b'\xb8' : ',',
b'\xb9' : '1',
b'\xba' : '(th)',
b'\xbb' : '>>',
b'\xbc' : '1/4',
b'\xbd' : '1/2',
b'\xbe' : '3/4',
b'\xbf' : '?',
b'\xc0' : 'A',
b'\xc1' : 'A',
b'\xc2' : 'A',
b'\xc3' : 'A',
b'\xc4' : 'A',
b'\xc5' : 'A',
b'\xc6' : 'AE',
b'\xc7' : 'C',
b'\xc8' : 'E',
b'\xc9' : 'E',
b'\xca' : 'E',
b'\xcb' : 'E',
b'\xcc' : 'I',
b'\xcd' : 'I',
b'\xce' : 'I',
b'\xcf' : 'I',
b'\xd0' : 'D',
b'\xd1' : 'N',
b'\xd2' : 'O',
b'\xd3' : 'O',
b'\xd4' : 'O',
b'\xd5' : 'O',
b'\xd6' : 'O',
b'\xd7' : '*',
b'\xd8' : 'O',
b'\xd9' : 'U',
b'\xda' : 'U',
b'\xdb' : 'U',
b'\xdc' : 'U',
b'\xdd' : 'Y',
b'\xde' : 'b',
b'\xdf' : 'B',
b'\xe0' : 'a',
b'\xe1' : 'a',
b'\xe2' : 'a',
b'\xe3' : 'a',
b'\xe4' : 'a',
b'\xe5' : 'a',
b'\xe6' : 'ae',
b'\xe7' : 'c',
b'\xe8' : 'e',
b'\xe9' : 'e',
b'\xea' : 'e',
b'\xeb' : 'e',
b'\xec' : 'i',
b'\xed' : 'i',
b'\xee' : 'i',
b'\xef' : 'i',
b'\xf0' : 'o',
b'\xf1' : 'n',
b'\xf2' : 'o',
b'\xf3' : 'o',
b'\xf4' : 'o',
b'\xf5' : 'o',
b'\xf6' : 'o',
b'\xf7' : '/',
b'\xf8' : 'o',
b'\xf9' : 'u',
b'\xfa' : 'u',
b'\xfb' : 'u',
b'\xfc' : 'u',
b'\xfd' : 'y',
b'\xfe' : 'b',
b'\xff' : 'y',
}
# A map used when removing rogue Windows-1252/ISO-8859-1
# characters in otherwise UTF-8 documents.
#
# Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in
# Windows-1252.
WINDOWS_1252_TO_UTF8 = {
0x80 : b'\xe2\x82\xac', # €
0x82 : b'\xe2\x80\x9a', #
0x83 : b'\xc6\x92', # ƒ
0x84 : b'\xe2\x80\x9e', # „
0x85 : b'\xe2\x80\xa6', # …
0x86 : b'\xe2\x80\xa0', # †
0x87 : b'\xe2\x80\xa1', # ‡
0x88 : b'\xcb\x86', # ˆ
0x89 : b'\xe2\x80\xb0', # ‰
0x8a : b'\xc5\xa0', # Š
0x8b : b'\xe2\x80\xb9', #
0x8c : b'\xc5\x92', # Œ
0x8e : b'\xc5\xbd', # Ž
0x91 : b'\xe2\x80\x98', #
0x92 : b'\xe2\x80\x99', #
0x93 : b'\xe2\x80\x9c', # “
0x94 : b'\xe2\x80\x9d', # ”
0x95 : b'\xe2\x80\xa2', # •
0x96 : b'\xe2\x80\x93', #
0x97 : b'\xe2\x80\x94', # —
0x98 : b'\xcb\x9c', # ˜
0x99 : b'\xe2\x84\xa2', # ™
0x9a : b'\xc5\xa1', # š
0x9b : b'\xe2\x80\xba', #
0x9c : b'\xc5\x93', # œ
0x9e : b'\xc5\xbe', # ž
0x9f : b'\xc5\xb8', # Ÿ
0xa0 : b'\xc2\xa0', #  
0xa1 : b'\xc2\xa1', # ¡
0xa2 : b'\xc2\xa2', # ¢
0xa3 : b'\xc2\xa3', # £
0xa4 : b'\xc2\xa4', # ¤
0xa5 : b'\xc2\xa5', # ¥
0xa6 : b'\xc2\xa6', # ¦
0xa7 : b'\xc2\xa7', # §
0xa8 : b'\xc2\xa8', # ¨
0xa9 : b'\xc2\xa9', # ©
0xaa : b'\xc2\xaa', # ª
0xab : b'\xc2\xab', # «
0xac : b'\xc2\xac', # ¬
0xad : b'\xc2\xad', # ­
0xae : b'\xc2\xae', # ®
0xaf : b'\xc2\xaf', # ¯
0xb0 : b'\xc2\xb0', # °
0xb1 : b'\xc2\xb1', # ±
0xb2 : b'\xc2\xb2', # ²
0xb3 : b'\xc2\xb3', # ³
0xb4 : b'\xc2\xb4', # ´
0xb5 : b'\xc2\xb5', # µ
0xb6 : b'\xc2\xb6', # ¶
0xb7 : b'\xc2\xb7', # ·
0xb8 : b'\xc2\xb8', # ¸
0xb9 : b'\xc2\xb9', # ¹
0xba : b'\xc2\xba', # º
0xbb : b'\xc2\xbb', # »
0xbc : b'\xc2\xbc', # ¼
0xbd : b'\xc2\xbd', # ½
0xbe : b'\xc2\xbe', # ¾
0xbf : b'\xc2\xbf', # ¿
0xc0 : b'\xc3\x80', # À
0xc1 : b'\xc3\x81', # Á
0xc2 : b'\xc3\x82', # Â
0xc3 : b'\xc3\x83', # Ã
0xc4 : b'\xc3\x84', # Ä
0xc5 : b'\xc3\x85', # Å
0xc6 : b'\xc3\x86', # Æ
0xc7 : b'\xc3\x87', # Ç
0xc8 : b'\xc3\x88', # È
0xc9 : b'\xc3\x89', # É
0xca : b'\xc3\x8a', # Ê
0xcb : b'\xc3\x8b', # Ë
0xcc : b'\xc3\x8c', # Ì
0xcd : b'\xc3\x8d', # Í
0xce : b'\xc3\x8e', # Î
0xcf : b'\xc3\x8f', # Ï
0xd0 : b'\xc3\x90', # Ð
0xd1 : b'\xc3\x91', # Ñ
0xd2 : b'\xc3\x92', # Ò
0xd3 : b'\xc3\x93', # Ó
0xd4 : b'\xc3\x94', # Ô
0xd5 : b'\xc3\x95', # Õ
0xd6 : b'\xc3\x96', # Ö
0xd7 : b'\xc3\x97', # ×
0xd8 : b'\xc3\x98', # Ø
0xd9 : b'\xc3\x99', # Ù
0xda : b'\xc3\x9a', # Ú
0xdb : b'\xc3\x9b', # Û
0xdc : b'\xc3\x9c', # Ü
0xdd : b'\xc3\x9d', # Ý
0xde : b'\xc3\x9e', # Þ
0xdf : b'\xc3\x9f', # ß
0xe0 : b'\xc3\xa0', # à
0xe1 : b'\xa1', # á
0xe2 : b'\xc3\xa2', # â
0xe3 : b'\xc3\xa3', # ã
0xe4 : b'\xc3\xa4', # ä
0xe5 : b'\xc3\xa5', # å
0xe6 : b'\xc3\xa6', # æ
0xe7 : b'\xc3\xa7', # ç
0xe8 : b'\xc3\xa8', # è
0xe9 : b'\xc3\xa9', # é
0xea : b'\xc3\xaa', # ê
0xeb : b'\xc3\xab', # ë
0xec : b'\xc3\xac', # ì
0xed : b'\xc3\xad', # í
0xee : b'\xc3\xae', # î
0xef : b'\xc3\xaf', # ï
0xf0 : b'\xc3\xb0', # ð
0xf1 : b'\xc3\xb1', # ñ
0xf2 : b'\xc3\xb2', # ò
0xf3 : b'\xc3\xb3', # ó
0xf4 : b'\xc3\xb4', # ô
0xf5 : b'\xc3\xb5', # õ
0xf6 : b'\xc3\xb6', # ö
0xf7 : b'\xc3\xb7', # ÷
0xf8 : b'\xc3\xb8', # ø
0xf9 : b'\xc3\xb9', # ù
0xfa : b'\xc3\xba', # ú
0xfb : b'\xc3\xbb', # û
0xfc : b'\xc3\xbc', # ü
0xfd : b'\xc3\xbd', # ý
0xfe : b'\xc3\xbe', # þ
}
MULTIBYTE_MARKERS_AND_SIZES = [
(0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF
(0xe0, 0xef, 3), # 3-byte characters start with E0-EF
(0xf0, 0xf4, 4), # 4-byte characters start with F0-F4
]
FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0]
LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1]
@classmethod
def detwingle(cls, in_bytes, main_encoding="utf8",
embedded_encoding="windows-1252"):
"""Fix characters from one encoding embedded in some other encoding.
Currently the only situation supported is Windows-1252 (or its
subset ISO-8859-1), embedded in UTF-8.
The input must be a bytestring. If you've already converted
the document to Unicode, you're too late.
The output is a bytestring in which `embedded_encoding`
characters have been converted to their `main_encoding`
equivalents.
"""
if embedded_encoding.replace('_', '-').lower() not in (
'windows-1252', 'windows_1252'):
raise NotImplementedError(
"Windows-1252 and ISO-8859-1 are the only currently supported "
"embedded encodings.")
if main_encoding.lower() not in ('utf8', 'utf-8'):
raise NotImplementedError(
"UTF-8 is the only currently supported main encoding.")
byte_chunks = []
chunk_start = 0
pos = 0
while pos < len(in_bytes):
byte = in_bytes[pos]
if not isinstance(byte, int):
# Python 2.x
byte = ord(byte)
if (byte >= cls.FIRST_MULTIBYTE_MARKER
and byte <= cls.LAST_MULTIBYTE_MARKER):
# This is the start of a UTF-8 multibyte character. Skip
# to the end.
for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES:
if byte >= start and byte <= end:
pos += size
break
elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8:
# We found a Windows-1252 character!
# Save the string up to this point as a chunk.
byte_chunks.append(in_bytes[chunk_start:pos])
# Now translate the Windows-1252 character into UTF-8
# and add it as another, one-byte chunk.
byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte])
pos += 1
chunk_start = pos
else:
# Go on to the next character.
pos += 1
if chunk_start == 0:
# The string is unchanged.
return in_bytes
else:
# Store the final chunk.
byte_chunks.append(in_bytes[chunk_start:])
return b''.join(byte_chunks)
+204
View File
@@ -0,0 +1,204 @@
"""Diagnostic functions, mainly for use when doing tech support."""
import cProfile
from StringIO import StringIO
from HTMLParser import HTMLParser
import bs4
from bs4 import BeautifulSoup, __version__
from bs4.builder import builder_registry
import os
import pstats
import random
import tempfile
import time
import traceback
import sys
import cProfile
def diagnose(data):
"""Diagnostic suite for isolating common problems."""
print "Diagnostic running on Beautiful Soup %s" % __version__
print "Python version %s" % sys.version
basic_parsers = ["html.parser", "html5lib", "lxml"]
for name in basic_parsers:
for builder in builder_registry.builders:
if name in builder.features:
break
else:
basic_parsers.remove(name)
print (
"I noticed that %s is not installed. Installing it may help." %
name)
if 'lxml' in basic_parsers:
basic_parsers.append(["lxml", "xml"])
from lxml import etree
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
if 'html5lib' in basic_parsers:
import html5lib
print "Found html5lib version %s" % html5lib.__version__
if hasattr(data, 'read'):
data = data.read()
elif os.path.exists(data):
print '"%s" looks like a filename. Reading data from the file.' % data
data = open(data).read()
elif data.startswith("http:") or data.startswith("https:"):
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
return
print
for parser in basic_parsers:
print "Trying to parse your markup with %s" % parser
success = False
try:
soup = BeautifulSoup(data, parser)
success = True
except Exception, e:
print "%s could not parse the markup." % parser
traceback.print_exc()
if success:
print "Here's what %s did with the markup:" % parser
print soup.prettify()
print "-" * 80
def lxml_trace(data, html=True, **kwargs):
"""Print out the lxml events that occur during parsing.
This lets you see how lxml parses a document when no Beautiful
Soup code is running.
"""
from lxml import etree
for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
print("%s, %4s, %s" % (event, element.tag, element.text))
class AnnouncingParser(HTMLParser):
"""Announces HTMLParser parse events, without doing anything else."""
def _p(self, s):
print(s)
def handle_starttag(self, name, attrs):
self._p("%s START" % name)
def handle_endtag(self, name):
self._p("%s END" % name)
def handle_data(self, data):
self._p("%s DATA" % data)
def handle_charref(self, name):
self._p("%s CHARREF" % name)
def handle_entityref(self, name):
self._p("%s ENTITYREF" % name)
def handle_comment(self, data):
self._p("%s COMMENT" % data)
def handle_decl(self, data):
self._p("%s DECL" % data)
def unknown_decl(self, data):
self._p("%s UNKNOWN-DECL" % data)
def handle_pi(self, data):
self._p("%s PI" % data)
def htmlparser_trace(data):
"""Print out the HTMLParser events that occur during parsing.
This lets you see how HTMLParser parses a document when no
Beautiful Soup code is running.
"""
parser = AnnouncingParser()
parser.feed(data)
_vowels = "aeiou"
_consonants = "bcdfghjklmnpqrstvwxyz"
def rword(length=5):
"Generate a random word-like string."
s = ''
for i in range(length):
if i % 2 == 0:
t = _consonants
else:
t = _vowels
s += random.choice(t)
return s
def rsentence(length=4):
"Generate a random sentence-like string."
return " ".join(rword(random.randint(4,9)) for i in range(length))
def rdoc(num_elements=1000):
"""Randomly generate an invalid HTML document."""
tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
elements = []
for i in range(num_elements):
choice = random.randint(0,3)
if choice == 0:
# New tag.
tag_name = random.choice(tag_names)
elements.append("<%s>" % tag_name)
elif choice == 1:
elements.append(rsentence(random.randint(1,4)))
elif choice == 2:
# Close a tag.
tag_name = random.choice(tag_names)
elements.append("</%s>" % tag_name)
return "<html>" + "\n".join(elements) + "</html>"
def benchmark_parsers(num_elements=100000):
"""Very basic head-to-head performance benchmark."""
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
data = rdoc(num_elements)
print "Generated a large invalid HTML document (%d bytes)." % len(data)
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
success = False
try:
a = time.time()
soup = BeautifulSoup(data, parser)
b = time.time()
success = True
except Exception, e:
print "%s could not parse the markup." % parser
traceback.print_exc()
if success:
print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
from lxml import etree
a = time.time()
etree.HTML(data)
b = time.time()
print "Raw lxml parsed the markup in %.2fs." % (b-a)
import html5lib
parser = html5lib.HTMLParser()
a = time.time()
parser.parse(data)
b = time.time()
print "Raw html5lib parsed the markup in %.2fs." % (b-a)
def profile(num_elements=100000, parser="lxml"):
filehandle = tempfile.NamedTemporaryFile()
filename = filehandle.name
data = rdoc(num_elements)
vars = dict(bs4=bs4, data=data, parser=parser)
cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
stats = pstats.Stats(filename)
# stats.strip_dirs()
stats.sort_stats("cumulative")
stats.print_stats('_html5lib|bs4', 50)
if __name__ == '__main__':
diagnose(sys.stdin.read())
File diff suppressed because it is too large Load Diff
+592
View File
@@ -0,0 +1,592 @@
"""Helper classes for tests."""
import copy
import functools
import unittest
from unittest import TestCase
from bs4 import BeautifulSoup
from bs4.element import (
CharsetMetaAttributeValue,
Comment,
ContentMetaAttributeValue,
Doctype,
SoupStrainer,
)
from bs4.builder import HTMLParserTreeBuilder
default_builder = HTMLParserTreeBuilder
class SoupTest(unittest.TestCase):
@property
def default_builder(self):
return default_builder()
def soup(self, markup, **kwargs):
"""Build a Beautiful Soup object from markup."""
builder = kwargs.pop('builder', self.default_builder)
return BeautifulSoup(markup, builder=builder, **kwargs)
def document_for(self, markup):
"""Turn an HTML fragment into a document.
The details depend on the builder.
"""
return self.default_builder.test_fragment_to_document(markup)
def assertSoupEquals(self, to_parse, compare_parsed_to=None):
builder = self.default_builder
obj = BeautifulSoup(to_parse, builder=builder)
if compare_parsed_to is None:
compare_parsed_to = to_parse
self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
class HTMLTreeBuilderSmokeTest(object):
"""A basic test of a treebuilder's competence.
Any HTML treebuilder, present or future, should be able to pass
these tests. With invalid markup, there's room for interpretation,
and different parsers can handle it differently. But with the
markup in these tests, there's not much room for interpretation.
"""
def assertDoctypeHandled(self, doctype_fragment):
"""Assert that a given doctype string is handled correctly."""
doctype_str, soup = self._document_with_doctype(doctype_fragment)
# Make sure a Doctype object was created.
doctype = soup.contents[0]
self.assertEqual(doctype.__class__, Doctype)
self.assertEqual(doctype, doctype_fragment)
self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
# Make sure that the doctype was correctly associated with the
# parse tree and that the rest of the document parsed.
self.assertEqual(soup.p.contents[0], 'foo')
def _document_with_doctype(self, doctype_fragment):
"""Generate and parse a document with the given doctype."""
doctype = '<!DOCTYPE %s>' % doctype_fragment
markup = doctype + '\n<p>foo</p>'
soup = self.soup(markup)
return doctype, soup
def test_normal_doctypes(self):
"""Make sure normal, everyday HTML doctypes are handled correctly."""
self.assertDoctypeHandled("html")
self.assertDoctypeHandled(
'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
def test_empty_doctype(self):
soup = self.soup("<!DOCTYPE>")
doctype = soup.contents[0]
self.assertEqual("", doctype.strip())
def test_public_doctype_with_url(self):
doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
self.assertDoctypeHandled(doctype)
def test_system_doctype(self):
self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
def test_namespaced_system_doctype(self):
# We can handle a namespaced doctype with a system ID.
self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
def test_namespaced_public_doctype(self):
# Test a namespaced doctype with a public id.
self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
def test_real_xhtml_document(self):
"""A real XHTML document should come out more or less the same as it went in."""
markup = b"""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Hello.</title></head>
<body>Goodbye.</body>
</html>"""
soup = self.soup(markup)
self.assertEqual(
soup.encode("utf-8").replace(b"\n", b""),
markup.replace(b"\n", b""))
def test_deepcopy(self):
"""Make sure you can copy the tree builder.
This is important because the builder is part of a
BeautifulSoup object, and we want to be able to copy that.
"""
copy.deepcopy(self.default_builder)
def test_p_tag_is_never_empty_element(self):
"""A <p> tag is never designated as an empty-element tag.
Even if the markup shows it as an empty-element tag, it
shouldn't be presented that way.
"""
soup = self.soup("<p/>")
self.assertFalse(soup.p.is_empty_element)
self.assertEqual(str(soup.p), "<p></p>")
def test_unclosed_tags_get_closed(self):
"""A tag that's not closed by the end of the document should be closed.
This applies to all tags except empty-element tags.
"""
self.assertSoupEquals("<p>", "<p></p>")
self.assertSoupEquals("<b>", "<b></b>")
self.assertSoupEquals("<br>", "<br/>")
def test_br_is_always_empty_element_tag(self):
"""A <br> tag is designated as an empty-element tag.
Some parsers treat <br></br> as one <br/> tag, some parsers as
two tags, but it should always be an empty-element tag.
"""
soup = self.soup("<br></br>")
self.assertTrue(soup.br.is_empty_element)
self.assertEqual(str(soup.br), "<br/>")
def test_nested_formatting_elements(self):
self.assertSoupEquals("<em><em></em></em>")
def test_comment(self):
# Comments are represented as Comment objects.
markup = "<p>foo<!--foobar-->baz</p>"
self.assertSoupEquals(markup)
soup = self.soup(markup)
comment = soup.find(text="foobar")
self.assertEqual(comment.__class__, Comment)
# The comment is properly integrated into the tree.
foo = soup.find(text="foo")
self.assertEqual(comment, foo.next_element)
baz = soup.find(text="baz")
self.assertEqual(comment, baz.previous_element)
def test_preserved_whitespace_in_pre_and_textarea(self):
"""Whitespace must be preserved in <pre> and <textarea> tags."""
self.assertSoupEquals("<pre> </pre>")
self.assertSoupEquals("<textarea> woo </textarea>")
def test_nested_inline_elements(self):
"""Inline elements can be nested indefinitely."""
b_tag = "<b>Inside a B tag</b>"
self.assertSoupEquals(b_tag)
nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
self.assertSoupEquals(nested_b_tag)
double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
self.assertSoupEquals(nested_b_tag)
def test_nested_block_level_elements(self):
"""Block elements can be nested."""
soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
blockquote = soup.blockquote
self.assertEqual(blockquote.p.b.string, 'Foo')
self.assertEqual(blockquote.b.string, 'Foo')
def test_correctly_nested_tables(self):
"""One table can go inside another one."""
markup = ('<table id="1">'
'<tr>'
"<td>Here's another table:"
'<table id="2">'
'<tr><td>foo</td></tr>'
'</table></td>')
self.assertSoupEquals(
markup,
'<table id="1"><tr><td>Here\'s another table:'
'<table id="2"><tr><td>foo</td></tr></table>'
'</td></tr></table>')
self.assertSoupEquals(
"<table><thead><tr><td>Foo</td></tr></thead>"
"<tbody><tr><td>Bar</td></tr></tbody>"
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
def test_deeply_nested_multivalued_attribute(self):
# html5lib can set the attributes of the same tag many times
# as it rearranges the tree. This has caused problems with
# multivalued attributes.
markup = '<table><div><div class="css"></div></div></table>'
soup = self.soup(markup)
self.assertEqual(["css"], soup.div.div['class'])
def test_angle_brackets_in_attribute_values_are_escaped(self):
self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
def test_entities_in_attributes_converted_to_unicode(self):
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
def test_entities_in_text_converted_to_unicode(self):
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
self.assertSoupEquals("<p>pi&ntilde;ata</p>", expect)
def test_quot_entity_converted_to_quotation_mark(self):
self.assertSoupEquals("<p>I said &quot;good day!&quot;</p>",
'<p>I said "good day!"</p>')
def test_out_of_range_entity(self):
expect = u"\N{REPLACEMENT CHARACTER}"
self.assertSoupEquals("&#10000000000000;", expect)
self.assertSoupEquals("&#x10000000000000;", expect)
self.assertSoupEquals("&#1000000000;", expect)
def test_multipart_strings(self):
"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
self.assertEqual("p", soup.h2.string.next_element.name)
self.assertEqual("p", soup.p.name)
def test_basic_namespaces(self):
"""Parsers don't need to *understand* namespaces, but at the
very least they should not choke on namespaces or lose
data."""
markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
soup = self.soup(markup)
self.assertEqual(markup, soup.encode())
html = soup.html
self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
self.assertEqual(
'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
self.assertEqual(
'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
def test_multivalued_attribute_value_becomes_list(self):
markup = b'<a class="foo bar">'
soup = self.soup(markup)
self.assertEqual(['foo', 'bar'], soup.a['class'])
#
# Generally speaking, tests below this point are more tests of
# Beautiful Soup than tests of the tree builders. But parsers are
# weird, so we run these tests separately for every tree builder
# to detect any differences between them.
#
def test_can_parse_unicode_document(self):
# A seemingly innocuous document... but it's in Unicode! And
# it contains characters that can't be represented in the
# encoding found in the declaration! The horror!
markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
soup = self.soup(markup)
self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
def test_soupstrainer(self):
"""Parsers should be able to work with SoupStrainers."""
strainer = SoupStrainer("b")
soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
parse_only=strainer)
self.assertEqual(soup.decode(), "<b>bold</b>")
def test_single_quote_attribute_values_become_double_quotes(self):
self.assertSoupEquals("<foo attr='bar'></foo>",
'<foo attr="bar"></foo>')
def test_attribute_values_with_nested_quotes_are_left_alone(self):
text = """<foo attr='bar "brawls" happen'>a</foo>"""
self.assertSoupEquals(text)
def test_attribute_values_with_double_nested_quotes_get_quoted(self):
text = """<foo attr='bar "brawls" happen'>a</foo>"""
soup = self.soup(text)
soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
self.assertSoupEquals(
soup.foo.decode(),
"""<foo attr="Brawls happen at &quot;Bob\'s Bar&quot;">a</foo>""")
def test_ampersand_in_attribute_value_gets_escaped(self):
self.assertSoupEquals('<this is="really messed up & stuff"></this>',
'<this is="really messed up &amp; stuff"></this>')
self.assertSoupEquals(
'<a href="http://example.org?a=1&b=2;3">foo</a>',
'<a href="http://example.org?a=1&amp;b=2;3">foo</a>')
def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
self.assertSoupEquals('<a href="http://example.org?a=1&amp;b=2;3"></a>')
def test_entities_in_strings_converted_during_parsing(self):
# Both XML and HTML entities are converted to Unicode characters
# during parsing.
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
self.assertSoupEquals(text, expected)
def test_smart_quotes_converted_on_the_way_in(self):
# Microsoft smart quotes are converted to Unicode characters during
# parsing.
quote = b"<p>\x91Foo\x92</p>"
soup = self.soup(quote)
self.assertEqual(
soup.p.string,
u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
def test_non_breaking_spaces_converted_on_the_way_in(self):
soup = self.soup("<a>&nbsp;&nbsp;</a>")
self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
def test_entities_converted_on_the_way_out(self):
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
soup = self.soup(text)
self.assertEqual(soup.p.encode("utf-8"), expected)
def test_real_iso_latin_document(self):
# Smoke test of interrelated functionality, using an
# easy-to-understand document.
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
# That's because we're going to encode it into ISO-Latin-1, and use
# that to test.
iso_latin_html = unicode_html.encode("iso-8859-1")
# Parse the ISO-Latin-1 HTML.
soup = self.soup(iso_latin_html)
# Encode it to UTF-8.
result = soup.encode("utf-8")
# What do we expect the result to look like? Well, it would
# look like unicode_html, except that the META tag would say
# UTF-8 instead of ISO-Latin-1.
expected = unicode_html.replace("ISO-Latin-1", "utf-8")
# And, of course, it would be in UTF-8, not Unicode.
expected = expected.encode("utf-8")
# Ta-da!
self.assertEqual(result, expected)
def test_real_shift_jis_document(self):
# Smoke test to make sure the parser can handle a document in
# Shift-JIS encoding, without choking.
shift_jis_html = (
b'<html><head></head><body><pre>'
b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
b'</pre></body></html>')
unicode_html = shift_jis_html.decode("shift-jis")
soup = self.soup(unicode_html)
# Make sure the parse tree is correctly encoded to various
# encodings.
self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
def test_real_hebrew_document(self):
# A real-world test to make sure we can convert ISO-8859-9 (a
# Hebrew encoding) to UTF-8.
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
soup = self.soup(
hebrew_document, from_encoding="iso8859-8")
self.assertEqual(soup.original_encoding, 'iso8859-8')
self.assertEqual(
soup.encode('utf-8'),
hebrew_document.decode("iso8859-8").encode("utf-8"))
def test_meta_tag_reflects_current_encoding(self):
# Here's the <meta> tag saying that a document is
# encoded in Shift-JIS.
meta_tag = ('<meta content="text/html; charset=x-sjis" '
'http-equiv="Content-type"/>')
# Here's a document incorporating that meta tag.
shift_jis_html = (
'<html><head>\n%s\n'
'<meta http-equiv="Content-language" content="ja"/>'
'</head><body>Shift-JIS markup goes here.') % meta_tag
soup = self.soup(shift_jis_html)
# Parse the document, and the charset is seemingly unaffected.
parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
content = parsed_meta['content']
self.assertEqual('text/html; charset=x-sjis', content)
# But that value is actually a ContentMetaAttributeValue object.
self.assertTrue(isinstance(content, ContentMetaAttributeValue))
# And it will take on a value that reflects its current
# encoding.
self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
# For the rest of the story, see TestSubstitutions in
# test_tree.py.
def test_html5_style_meta_tag_reflects_current_encoding(self):
# Here's the <meta> tag saying that a document is
# encoded in Shift-JIS.
meta_tag = ('<meta id="encoding" charset="x-sjis" />')
# Here's a document incorporating that meta tag.
shift_jis_html = (
'<html><head>\n%s\n'
'<meta http-equiv="Content-language" content="ja"/>'
'</head><body>Shift-JIS markup goes here.') % meta_tag
soup = self.soup(shift_jis_html)
# Parse the document, and the charset is seemingly unaffected.
parsed_meta = soup.find('meta', id="encoding")
charset = parsed_meta['charset']
self.assertEqual('x-sjis', charset)
# But that value is actually a CharsetMetaAttributeValue object.
self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
# And it will take on a value that reflects its current
# encoding.
self.assertEqual('utf8', charset.encode("utf8"))
def test_tag_with_no_attributes_can_have_attributes_added(self):
data = self.soup("<a>text</a>")
data.a['foo'] = 'bar'
self.assertEqual('<a foo="bar">text</a>', data.a.decode())
class XMLTreeBuilderSmokeTest(object):
def test_docstring_generated(self):
soup = self.soup("<root/>")
self.assertEqual(
soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
def test_real_xhtml_document(self):
"""A real XHTML document should come out *exactly* the same as it went in."""
markup = b"""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Hello.</title></head>
<body>Goodbye.</body>
</html>"""
soup = self.soup(markup)
self.assertEqual(
soup.encode("utf-8"), markup)
def test_formatter_processes_script_tag_for_xml_documents(self):
doc = """
<script type="text/javascript">
</script>
"""
soup = BeautifulSoup(doc, "xml")
# lxml would have stripped this while parsing, but we can add
# it later.
soup.script.string = 'console.log("< < hey > > ");'
encoded = soup.encode()
self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
def test_can_parse_unicode_document(self):
markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
soup = self.soup(markup)
self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
def test_popping_namespaced_tag(self):
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
soup = self.soup(markup)
self.assertEqual(
unicode(soup.rss), markup)
def test_docstring_includes_correct_encoding(self):
soup = self.soup("<root/>")
self.assertEqual(
soup.encode("latin1"),
b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
def test_large_xml_document(self):
"""A large XML document should come out the same as it went in."""
markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
+ b'0' * (2**12)
+ b'</root>')
soup = self.soup(markup)
self.assertEqual(soup.encode("utf-8"), markup)
def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
self.assertSoupEquals("<p>", "<p/>")
self.assertSoupEquals("<p>foo</p>")
def test_namespaces_are_preserved(self):
markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
soup = self.soup(markup)
root = soup.root
self.assertEqual("http://example.com/", root['xmlns:a'])
self.assertEqual("http://example.net/", root['xmlns:b'])
def test_closing_namespaced_tag(self):
markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.p), markup)
def test_namespaced_attributes(self):
markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.foo), markup)
def test_namespaced_attributes_xml_namespace(self):
markup = '<foo xml:lang="fr">bar</foo>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.foo), markup)
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
"""Smoke test for a tree builder that supports HTML5."""
def test_real_xhtml_document(self):
# Since XHTML is not HTML5, HTML5 parsers are not tested to handle
# XHTML documents in any particular way.
pass
def test_html_tags_have_namespace(self):
markup = "<a>"
soup = self.soup(markup)
self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
def test_svg_tags_have_namespace(self):
markup = '<svg><circle/></svg>'
soup = self.soup(markup)
namespace = "http://www.w3.org/2000/svg"
self.assertEqual(namespace, soup.svg.namespace)
self.assertEqual(namespace, soup.circle.namespace)
def test_mathml_tags_have_namespace(self):
markup = '<math><msqrt>5</msqrt></math>'
soup = self.soup(markup)
namespace = 'http://www.w3.org/1998/Math/MathML'
self.assertEqual(namespace, soup.math.namespace)
self.assertEqual(namespace, soup.msqrt.namespace)
def test_xml_declaration_becomes_comment(self):
markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
soup = self.soup(markup)
self.assertTrue(isinstance(soup.contents[0], Comment))
self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
self.assertEqual("html", soup.contents[0].next_element.name)
def skipIf(condition, reason):
def nothing(test, *args, **kwargs):
return None
def decorator(test_item):
if condition:
return nothing
else:
return test_item
return decorator
@@ -0,0 +1 @@
"The beautifulsoup tests."
@@ -0,0 +1,141 @@
"""Tests of the builder registry."""
import unittest
from bs4 import BeautifulSoup
from bs4.builder import (
builder_registry as registry,
HTMLParserTreeBuilder,
TreeBuilderRegistry,
)
try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError:
HTML5LIB_PRESENT = False
try:
from bs4.builder import (
LXMLTreeBuilderForXML,
LXMLTreeBuilder,
)
LXML_PRESENT = True
except ImportError:
LXML_PRESENT = False
class BuiltInRegistryTest(unittest.TestCase):
"""Test the built-in registry with the default builders registered."""
def test_combination(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('fast', 'html'),
LXMLTreeBuilder)
if LXML_PRESENT:
self.assertEqual(registry.lookup('permissive', 'xml'),
LXMLTreeBuilderForXML)
self.assertEqual(registry.lookup('strict', 'html'),
HTMLParserTreeBuilder)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html5lib', 'html'),
HTML5TreeBuilder)
def test_lookup_by_markup_type(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
else:
self.assertEqual(registry.lookup('xml'), None)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
else:
self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
def test_named_library(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('lxml', 'xml'),
LXMLTreeBuilderForXML)
self.assertEqual(registry.lookup('lxml', 'html'),
LXMLTreeBuilder)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html5lib'),
HTML5TreeBuilder)
self.assertEqual(registry.lookup('html.parser'),
HTMLParserTreeBuilder)
def test_beautifulsoup_constructor_does_lookup(self):
# You can pass in a string.
BeautifulSoup("", features="html")
# Or a list of strings.
BeautifulSoup("", features=["html", "fast"])
# You'll get an exception if BS can't find an appropriate
# builder.
self.assertRaises(ValueError, BeautifulSoup,
"", features="no-such-feature")
class RegistryTest(unittest.TestCase):
"""Test the TreeBuilderRegistry class in general."""
def setUp(self):
self.registry = TreeBuilderRegistry()
def builder_for_features(self, *feature_list):
cls = type('Builder_' + '_'.join(feature_list),
(object,), {'features' : feature_list})
self.registry.register(cls)
return cls
def test_register_with_no_features(self):
builder = self.builder_for_features()
# Since the builder advertises no features, you can't find it
# by looking up features.
self.assertEqual(self.registry.lookup('foo'), None)
# But you can find it by doing a lookup with no features, if
# this happens to be the only registered builder.
self.assertEqual(self.registry.lookup(), builder)
def test_register_with_features_makes_lookup_succeed(self):
builder = self.builder_for_features('foo', 'bar')
self.assertEqual(self.registry.lookup('foo'), builder)
self.assertEqual(self.registry.lookup('bar'), builder)
def test_lookup_fails_when_no_builder_implements_feature(self):
builder = self.builder_for_features('foo', 'bar')
self.assertEqual(self.registry.lookup('baz'), None)
def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
builder1 = self.builder_for_features('foo')
builder2 = self.builder_for_features('bar')
self.assertEqual(self.registry.lookup(), builder2)
def test_lookup_fails_when_no_tree_builders_registered(self):
self.assertEqual(self.registry.lookup(), None)
def test_lookup_gets_most_recent_builder_supporting_all_features(self):
has_one = self.builder_for_features('foo')
has_the_other = self.builder_for_features('bar')
has_both_early = self.builder_for_features('foo', 'bar', 'baz')
has_both_late = self.builder_for_features('foo', 'bar', 'quux')
lacks_one = self.builder_for_features('bar')
has_the_other = self.builder_for_features('foo')
# There are two builders featuring 'foo' and 'bar', but
# the one that also features 'quux' was registered later.
self.assertEqual(self.registry.lookup('foo', 'bar'),
has_both_late)
# There is only one builder featuring 'foo', 'bar', and 'baz'.
self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
has_both_early)
def test_lookup_fails_when_cannot_reconcile_requested_features(self):
builder1 = self.builder_for_features('foo', 'bar')
builder2 = self.builder_for_features('foo', 'baz')
self.assertEqual(self.registry.lookup('bar', 'baz'), None)
@@ -0,0 +1,36 @@
"Test harness for doctests."
# pylint: disable-msg=E0611,W0142
__metaclass__ = type
__all__ = [
'additional_tests',
]
import atexit
import doctest
import os
#from pkg_resources import (
# resource_filename, resource_exists, resource_listdir, cleanup_resources)
import unittest
DOCTEST_FLAGS = (
doctest.ELLIPSIS |
doctest.NORMALIZE_WHITESPACE |
doctest.REPORT_NDIFF)
# def additional_tests():
# "Run the doc tests (README.txt and docs/*, if any exist)"
# doctest_files = [
# os.path.abspath(resource_filename('bs4', 'README.txt'))]
# if resource_exists('bs4', 'docs'):
# for name in resource_listdir('bs4', 'docs'):
# if name.endswith('.txt'):
# doctest_files.append(
# os.path.abspath(
# resource_filename('bs4', 'docs/%s' % name)))
# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
# atexit.register(cleanup_resources)
# return unittest.TestSuite((
# doctest.DocFileSuite(*doctest_files, **kwargs)))
@@ -0,0 +1,85 @@
"""Tests to ensure that the html5lib tree builder generates good trees."""
import warnings
try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError, e:
HTML5LIB_PRESENT = False
from bs4.element import SoupStrainer
from bs4.testing import (
HTML5TreeBuilderSmokeTest,
SoupTest,
skipIf,
)
@skipIf(
not HTML5LIB_PRESENT,
"html5lib seems not to be present, not testing its tree builder.")
class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
"""See ``HTML5TreeBuilderSmokeTest``."""
@property
def default_builder(self):
return HTML5TreeBuilder()
def test_soupstrainer(self):
# The html5lib tree builder does not support SoupStrainers.
strainer = SoupStrainer("b")
markup = "<p>A <b>bold</b> statement.</p>"
with warnings.catch_warnings(record=True) as w:
soup = self.soup(markup, parse_only=strainer)
self.assertEqual(
soup.decode(), self.document_for(markup))
self.assertTrue(
"the html5lib tree builder doesn't support parse_only" in
str(w[0].message))
def test_correctly_nested_tables(self):
"""html5lib inserts <tbody> tags where other parsers don't."""
markup = ('<table id="1">'
'<tr>'
"<td>Here's another table:"
'<table id="2">'
'<tr><td>foo</td></tr>'
'</table></td>')
self.assertSoupEquals(
markup,
'<table id="1"><tbody><tr><td>Here\'s another table:'
'<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
'</td></tr></tbody></table>')
self.assertSoupEquals(
"<table><thead><tr><td>Foo</td></tr></thead>"
"<tbody><tr><td>Bar</td></tr></tbody>"
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
def test_xml_declaration_followed_by_doctype(self):
markup = '''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<p>foo</p>
</body>
</html>'''
soup = self.soup(markup)
# Verify that we can reach the <p> tag; this means the tree is connected.
self.assertEqual(b"<p>foo</p>", soup.p.encode())
def test_reparented_markup(self):
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
soup = self.soup(markup)
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
def test_reparented_markup_ends_with_whitespace(self):
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
soup = self.soup(markup)
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
@@ -0,0 +1,19 @@
"""Tests to ensure that the html.parser tree builder generates good
trees."""
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
from bs4.builder import HTMLParserTreeBuilder
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
@property
def default_builder(self):
return HTMLParserTreeBuilder()
def test_namespaced_system_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one.
pass
def test_namespaced_public_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one.
pass
@@ -0,0 +1,91 @@
"""Tests to ensure that the lxml tree builder generates good trees."""
import re
import warnings
try:
import lxml.etree
LXML_PRESENT = True
LXML_VERSION = lxml.etree.LXML_VERSION
except ImportError, e:
LXML_PRESENT = False
LXML_VERSION = (0,)
if LXML_PRESENT:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
from bs4 import (
BeautifulSoup,
BeautifulStoneSoup,
)
from bs4.element import Comment, Doctype, SoupStrainer
from bs4.testing import skipIf
from bs4.tests import test_htmlparser
from bs4.testing import (
HTMLTreeBuilderSmokeTest,
XMLTreeBuilderSmokeTest,
SoupTest,
skipIf,
)
@skipIf(
not LXML_PRESENT,
"lxml seems not to be present, not testing its tree builder.")
class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
"""See ``HTMLTreeBuilderSmokeTest``."""
@property
def default_builder(self):
return LXMLTreeBuilder()
def test_out_of_range_entity(self):
self.assertSoupEquals(
"<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
self.assertSoupEquals(
"<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
self.assertSoupEquals(
"<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
# test if an old version of lxml is installed.
@skipIf(
not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
"Skipping doctype test for old version of lxml to avoid segfault.")
def test_empty_doctype(self):
soup = self.soup("<!DOCTYPE>")
doctype = soup.contents[0]
self.assertEqual("", doctype.strip())
def test_beautifulstonesoup_is_xml_parser(self):
# Make sure that the deprecated BSS class uses an xml builder
# if one is installed.
with warnings.catch_warnings(record=True) as w:
soup = BeautifulStoneSoup("<b />")
self.assertEqual(u"<b/>", unicode(soup.b))
self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
def test_real_xhtml_document(self):
"""lxml strips the XML definition from an XHTML doc, which is fine."""
markup = b"""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Hello.</title></head>
<body>Goodbye.</body>
</html>"""
soup = self.soup(markup)
self.assertEqual(
soup.encode("utf-8").replace(b"\n", b''),
markup.replace(b'\n', b'').replace(
b'<?xml version="1.0" encoding="utf-8"?>', b''))
@skipIf(
not LXML_PRESENT,
"lxml seems not to be present, not testing its XML tree builder.")
class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
"""See ``HTMLTreeBuilderSmokeTest``."""
@property
def default_builder(self):
return LXMLTreeBuilderForXML()
@@ -0,0 +1,434 @@
# -*- coding: utf-8 -*-
"""Tests of Beautiful Soup as a whole."""
import logging
import unittest
import sys
import tempfile
from bs4 import (
BeautifulSoup,
BeautifulStoneSoup,
)
from bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
SoupStrainer,
NamespacedAttribute,
)
import bs4.dammit
from bs4.dammit import (
EntitySubstitution,
UnicodeDammit,
)
from bs4.testing import (
SoupTest,
skipIf,
)
import warnings
try:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
LXML_PRESENT = True
except ImportError, e:
LXML_PRESENT = False
PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
class TestConstructor(SoupTest):
def test_short_unicode_input(self):
data = u"<h1>éé</h1>"
soup = self.soup(data)
self.assertEqual(u"éé", soup.h1.string)
def test_embedded_null(self):
data = u"<h1>foo\0bar</h1>"
soup = self.soup(data)
self.assertEqual(u"foo\0bar", soup.h1.string)
class TestDeprecatedConstructorArguments(SoupTest):
def test_parseOnlyThese_renamed_to_parse_only(self):
with warnings.catch_warnings(record=True) as w:
soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
msg = str(w[0].message)
self.assertTrue("parseOnlyThese" in msg)
self.assertTrue("parse_only" in msg)
self.assertEqual(b"<b></b>", soup.encode())
def test_fromEncoding_renamed_to_from_encoding(self):
with warnings.catch_warnings(record=True) as w:
utf8 = b"\xc3\xa9"
soup = self.soup(utf8, fromEncoding="utf8")
msg = str(w[0].message)
self.assertTrue("fromEncoding" in msg)
self.assertTrue("from_encoding" in msg)
self.assertEqual("utf8", soup.original_encoding)
def test_unrecognized_keyword_argument(self):
self.assertRaises(
TypeError, self.soup, "<a>", no_such_argument=True)
class TestWarnings(SoupTest):
def test_disk_file_warning(self):
filehandle = tempfile.NamedTemporaryFile()
filename = filehandle.name
try:
with warnings.catch_warnings(record=True) as w:
soup = self.soup(filename)
msg = str(w[0].message)
self.assertTrue("looks like a filename" in msg)
finally:
filehandle.close()
# The file no longer exists, so Beautiful Soup will no longer issue the warning.
with warnings.catch_warnings(record=True) as w:
soup = self.soup(filename)
self.assertEqual(0, len(w))
def test_url_warning(self):
with warnings.catch_warnings(record=True) as w:
soup = self.soup("http://www.crummy.com/")
msg = str(w[0].message)
self.assertTrue("looks like a URL" in msg)
with warnings.catch_warnings(record=True) as w:
soup = self.soup("http://www.crummy.com/ is great")
self.assertEqual(0, len(w))
class TestSelectiveParsing(SoupTest):
def test_parse_with_soupstrainer(self):
markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
strainer = SoupStrainer("b")
soup = self.soup(markup, parse_only=strainer)
self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
class TestEntitySubstitution(unittest.TestCase):
"""Standalone tests of the EntitySubstitution class."""
def setUp(self):
self.sub = EntitySubstitution
def test_simple_html_substitution(self):
# Unicode characters corresponding to named HTML entites
# are substituted, and no others.
s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
self.assertEqual(self.sub.substitute_html(s),
u"foo&forall;\N{SNOWMAN}&otilde;bar")
def test_smart_quote_substitution(self):
# MS smart quotes are a common source of frustration, so we
# give them a special test.
quotes = b"\x91\x92foo\x93\x94"
dammit = UnicodeDammit(quotes)
self.assertEqual(self.sub.substitute_html(dammit.markup),
"&lsquo;&rsquo;foo&ldquo;&rdquo;")
def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
s = 'Welcome to "my bar"'
self.assertEqual(self.sub.substitute_xml(s, False), s)
def test_xml_attribute_quoting_normally_uses_double_quotes(self):
self.assertEqual(self.sub.substitute_xml("Welcome", True),
'"Welcome"')
self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
'"Bob\'s Bar"')
def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
s = 'Welcome to "my bar"'
self.assertEqual(self.sub.substitute_xml(s, True),
"'Welcome to \"my bar\"'")
def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
s = 'Welcome to "Bob\'s Bar"'
self.assertEqual(
self.sub.substitute_xml(s, True),
'"Welcome to &quot;Bob\'s Bar&quot;"')
def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
quoted = 'Welcome to "Bob\'s Bar"'
self.assertEqual(self.sub.substitute_xml(quoted), quoted)
def test_xml_quoting_handles_angle_brackets(self):
self.assertEqual(
self.sub.substitute_xml("foo<bar>"),
"foo&lt;bar&gt;")
def test_xml_quoting_handles_ampersands(self):
self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&amp;T")
def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self):
self.assertEqual(
self.sub.substitute_xml("&Aacute;T&T"),
"&amp;Aacute;T&amp;T")
def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self):
self.assertEqual(
self.sub.substitute_xml_containing_entities("&Aacute;T&T"),
"&Aacute;T&amp;T")
def test_quotes_not_html_substituted(self):
"""There's no need to do this except inside attribute values."""
text = 'Bob\'s "bar"'
self.assertEqual(self.sub.substitute_html(text), text)
class TestEncodingConversion(SoupTest):
# Test Beautiful Soup's ability to decode and encode from various
# encodings.
def setUp(self):
super(TestEncodingConversion, self).setUp()
self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
self.utf8_data = self.unicode_data.encode("utf-8")
# Just so you know what it looks like.
self.assertEqual(
self.utf8_data,
b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>')
def test_ascii_in_unicode_out(self):
# ASCII input is converted to Unicode. The original_encoding
# attribute is set to 'utf-8', a superset of ASCII.
chardet = bs4.dammit.chardet_dammit
logging.disable(logging.WARNING)
try:
def noop(str):
return None
# Disable chardet, which will realize that the ASCII is ASCII.
bs4.dammit.chardet_dammit = noop
ascii = b"<foo>a</foo>"
soup_from_ascii = self.soup(ascii)
unicode_output = soup_from_ascii.decode()
self.assertTrue(isinstance(unicode_output, unicode))
self.assertEqual(unicode_output, self.document_for(ascii.decode()))
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
finally:
logging.disable(logging.NOTSET)
bs4.dammit.chardet_dammit = chardet
def test_unicode_in_unicode_out(self):
# Unicode input is left alone. The original_encoding attribute
# is not set.
soup_from_unicode = self.soup(self.unicode_data)
self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
self.assertEqual(soup_from_unicode.original_encoding, None)
def test_utf8_in_unicode_out(self):
# UTF-8 input is converted to Unicode. The original_encoding
# attribute is set.
soup_from_utf8 = self.soup(self.utf8_data)
self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
def test_utf8_out(self):
# The internal data structures can be encoded as UTF-8.
soup_from_unicode = self.soup(self.unicode_data)
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
@skipIf(
PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
def test_attribute_name_containing_unicode_characters(self):
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
class TestUnicodeDammit(unittest.TestCase):
"""Standalone tests of UnicodeDammit."""
def test_unicode_input(self):
markup = u"I'm already Unicode! \N{SNOWMAN}"
dammit = UnicodeDammit(markup)
self.assertEqual(dammit.unicode_markup, markup)
def test_smart_quotes_to_unicode(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup)
self.assertEqual(
dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
def test_smart_quotes_to_xml_entities(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="xml")
self.assertEqual(
dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
def test_smart_quotes_to_html_entities(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="html")
self.assertEqual(
dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
def test_smart_quotes_to_ascii(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
self.assertEqual(
dammit.unicode_markup, """<foo>''""</foo>""")
def test_detect_utf8(self):
utf8 = b"\xc3\xa9"
dammit = UnicodeDammit(utf8)
self.assertEqual(dammit.unicode_markup, u'\xe9')
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_convert_hebrew(self):
hebrew = b"\xed\xe5\xec\xf9"
dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
def test_dont_see_smart_quotes_where_there_are_none(self):
utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
dammit = UnicodeDammit(utf_8)
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
def test_ignore_inappropriate_codecs(self):
utf8_data = u"Räksmörgås".encode("utf-8")
dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_ignore_invalid_codecs(self):
utf8_data = u"Räksmörgås".encode("utf-8")
for bad_encoding in ['.utf8', '...', 'utF---16.!']:
dammit = UnicodeDammit(utf8_data, [bad_encoding])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_detect_html5_style_meta_tag(self):
for data in (
b'<html><meta charset="euc-jp" /></html>',
b"<html><meta charset='euc-jp' /></html>",
b"<html><meta charset=euc-jp /></html>",
b"<html><meta charset=euc-jp/></html>"):
dammit = UnicodeDammit(data, is_html=True)
self.assertEqual(
"euc-jp", dammit.original_encoding)
def test_last_ditch_entity_replacement(self):
# This is a UTF-8 document that contains bytestrings
# completely incompatible with UTF-8 (ie. encoded with some other
# encoding).
#
# Since there is no consistent encoding for the document,
# Unicode, Dammit will eventually encode the document as UTF-8
# and encode the incompatible characters as REPLACEMENT
# CHARACTER.
#
# If chardet is installed, it will detect that the document
# can be converted into ISO-8859-1 without errors. This happens
# to be the wrong encoding, but it is a consistent encoding, so the
# code we're testing here won't run.
#
# So we temporarily disable chardet if it's present.
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
<html><b>\330\250\330\252\330\261</b>
<i>\310\322\321\220\312\321\355\344</i></html>"""
chardet = bs4.dammit.chardet_dammit
logging.disable(logging.WARNING)
try:
def noop(str):
return None
bs4.dammit.chardet_dammit = noop
dammit = UnicodeDammit(doc)
self.assertEqual(True, dammit.contains_replacement_characters)
self.assertTrue(u"\ufffd" in dammit.unicode_markup)
soup = BeautifulSoup(doc, "html.parser")
self.assertTrue(soup.contains_replacement_characters)
finally:
logging.disable(logging.NOTSET)
bs4.dammit.chardet_dammit = chardet
def test_byte_order_mark_removed(self):
# A document written in UTF-16LE will have its byte order marker stripped.
data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
dammit = UnicodeDammit(data)
self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
self.assertEqual("utf-16le", dammit.original_encoding)
def test_detwingle(self):
# Here's a UTF8 document.
utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
# Here's a Windows-1252 document.
windows_1252 = (
u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
# Through some unholy alchemy, they've been stuck together.
doc = utf8 + windows_1252 + utf8
# The document can't be turned into UTF-8:
self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
# Unicode, Dammit thinks the whole document is Windows-1252,
# and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃"
# But if we run it through fix_embedded_windows_1252, it's fixed:
fixed = UnicodeDammit.detwingle(doc)
self.assertEqual(
u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
def test_detwingle_ignores_multibyte_characters(self):
# Each of these characters has a UTF-8 representation ending
# in \x93. \x93 is a smart quote if interpreted as
# Windows-1252. But our code knows to skip over multibyte
# UTF-8 characters, so they'll survive the process unscathed.
for tricky_unicode_char in (
u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
):
input = tricky_unicode_char.encode("utf8")
self.assertTrue(input.endswith(b'\x93'))
output = UnicodeDammit.detwingle(input)
self.assertEqual(output, input)
class TestNamedspacedAttribute(SoupTest):
def test_name_may_be_none(self):
a = NamespacedAttribute("xmlns", None)
self.assertEqual(a, "xmlns")
def test_attribute_is_equivalent_to_colon_separated_string(self):
a = NamespacedAttribute("a", "b")
self.assertEqual("a:b", a)
def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
a = NamespacedAttribute("a", "b", "c")
b = NamespacedAttribute("a", "b", "c")
self.assertEqual(a, b)
# The actual namespace is not considered.
c = NamespacedAttribute("a", "b", None)
self.assertEqual(a, c)
# But name and prefix are important.
d = NamespacedAttribute("a", "z", "c")
self.assertNotEqual(a, d)
e = NamespacedAttribute("z", "b", "c")
self.assertNotEqual(a, e)
class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
def test_content_meta_attribute_value(self):
value = CharsetMetaAttributeValue("euc-jp")
self.assertEqual("euc-jp", value)
self.assertEqual("euc-jp", value.original_value)
self.assertEqual("utf8", value.encode("utf8"))
def test_content_meta_attribute_value(self):
value = ContentMetaAttributeValue("text/html; charset=euc-jp")
self.assertEqual("text/html; charset=euc-jp", value)
self.assertEqual("text/html; charset=euc-jp", value.original_value)
self.assertEqual("text/html; charset=utf8", value.encode("utf8"))
File diff suppressed because it is too large Load Diff
+504
View File
@@ -0,0 +1,504 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 2.1, February 1999
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
[This is the first released version of the Lesser GPL. It also counts
as the successor of the GNU Library Public License, version 2, hence
the version number 2.1.]
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
Licenses are intended to guarantee your freedom to share and change
free software--to make sure the software is free for all its users.
This license, the Lesser General Public License, applies to some
specially designated software packages--typically libraries--of the
Free Software Foundation and other authors who decide to use it. You
can use it too, but we suggest you first think carefully about whether
this license or the ordinary General Public License is the better
strategy to use in any particular case, based on the explanations below.
When we speak of free software, we are referring to freedom of use,
not price. Our General Public Licenses are designed to make sure that
you have the freedom to distribute copies of free software (and charge
for this service if you wish); that you receive source code or can get
it if you want it; that you can change the software and use pieces of
it in new free programs; and that you are informed that you can do
these things.
To protect your rights, we need to make restrictions that forbid
distributors to deny you these rights or to ask you to surrender these
rights. These restrictions translate to certain responsibilities for
you if you distribute copies of the library or if you modify it.
For example, if you distribute copies of the library, whether gratis
or for a fee, you must give the recipients all the rights that we gave
you. You must make sure that they, too, receive or can get the source
code. If you link other code with the library, you must provide
complete object files to the recipients, so that they can relink them
with the library after making changes to the library and recompiling
it. And you must show them these terms so they know their rights.
We protect your rights with a two-step method: (1) we copyright the
library, and (2) we offer you this license, which gives you legal
permission to copy, distribute and/or modify the library.
To protect each distributor, we want to make it very clear that
there is no warranty for the free library. Also, if the library is
modified by someone else and passed on, the recipients should know
that what they have is not the original version, so that the original
author's reputation will not be affected by problems that might be
introduced by others.
Finally, software patents pose a constant threat to the existence of
any free program. We wish to make sure that a company cannot
effectively restrict the users of a free program by obtaining a
restrictive license from a patent holder. Therefore, we insist that
any patent license obtained for a version of the library must be
consistent with the full freedom of use specified in this license.
Most GNU software, including some libraries, is covered by the
ordinary GNU General Public License. This license, the GNU Lesser
General Public License, applies to certain designated libraries, and
is quite different from the ordinary General Public License. We use
this license for certain libraries in order to permit linking those
libraries into non-free programs.
When a program is linked with a library, whether statically or using
a shared library, the combination of the two is legally speaking a
combined work, a derivative of the original library. The ordinary
General Public License therefore permits such linking only if the
entire combination fits its criteria of freedom. The Lesser General
Public License permits more lax criteria for linking other code with
the library.
We call this license the "Lesser" General Public License because it
does Less to protect the user's freedom than the ordinary General
Public License. It also provides other free software developers Less
of an advantage over competing non-free programs. These disadvantages
are the reason we use the ordinary General Public License for many
libraries. However, the Lesser license provides advantages in certain
special circumstances.
For example, on rare occasions, there may be a special need to
encourage the widest possible use of a certain library, so that it becomes
a de-facto standard. To achieve this, non-free programs must be
allowed to use the library. A more frequent case is that a free
library does the same job as widely used non-free libraries. In this
case, there is little to gain by limiting the free library to free
software only, so we use the Lesser General Public License.
In other cases, permission to use a particular library in non-free
programs enables a greater number of people to use a large body of
free software. For example, permission to use the GNU C Library in
non-free programs enables many more people to use the whole GNU
operating system, as well as its variant, the GNU/Linux operating
system.
Although the Lesser General Public License is Less protective of the
users' freedom, it does ensure that the user of a program that is
linked with the Library has the freedom and the wherewithal to run
that program using a modified version of the Library.
The precise terms and conditions for copying, distribution and
modification follow. Pay close attention to the difference between a
"work based on the library" and a "work that uses the library". The
former contains code derived from the library, whereas the latter must
be combined with the library in order to run.
GNU LESSER GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License Agreement applies to any software library or other
program which contains a notice placed by the copyright holder or
other authorized party saying it may be distributed under the terms of
this Lesser General Public License (also called "this License").
Each licensee is addressed as "you".
A "library" means a collection of software functions and/or data
prepared so as to be conveniently linked with application programs
(which use some of those functions and data) to form executables.
The "Library", below, refers to any such software library or work
which has been distributed under these terms. A "work based on the
Library" means either the Library or any derivative work under
copyright law: that is to say, a work containing the Library or a
portion of it, either verbatim or with modifications and/or translated
straightforwardly into another language. (Hereinafter, translation is
included without limitation in the term "modification".)
"Source code" for a work means the preferred form of the work for
making modifications to it. For a library, complete source code means
all the source code for all modules it contains, plus any associated
interface definition files, plus the scripts used to control compilation
and installation of the library.
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running a program using the Library is not restricted, and output from
such a program is covered only if its contents constitute a work based
on the Library (independent of the use of the Library in a tool for
writing it). Whether that is true depends on what the Library does
and what the program that uses the Library does.
1. You may copy and distribute verbatim copies of the Library's
complete source code as you receive it, in any medium, provided that
you conspicuously and appropriately publish on each copy an
appropriate copyright notice and disclaimer of warranty; keep intact
all the notices that refer to this License and to the absence of any
warranty; and distribute a copy of this License along with the
Library.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange for a
fee.
2. You may modify your copy or copies of the Library or any portion
of it, thus forming a work based on the Library, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) The modified work must itself be a software library.
b) You must cause the files modified to carry prominent notices
stating that you changed the files and the date of any change.
c) You must cause the whole of the work to be licensed at no
charge to all third parties under the terms of this License.
d) If a facility in the modified Library refers to a function or a
table of data to be supplied by an application program that uses
the facility, other than as an argument passed when the facility
is invoked, then you must make a good faith effort to ensure that,
in the event an application does not supply such function or
table, the facility still operates, and performs whatever part of
its purpose remains meaningful.
(For example, a function in a library to compute square roots has
a purpose that is entirely well-defined independent of the
application. Therefore, Subsection 2d requires that any
application-supplied function or table used by this function must
be optional: if the application does not supply it, the square
root function must still compute square roots.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Library,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Library, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote
it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Library.
In addition, mere aggregation of another work not based on the Library
with the Library (or with a work based on the Library) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may opt to apply the terms of the ordinary GNU General Public
License instead of this License to a given copy of the Library. To do
this, you must alter all the notices that refer to this License, so
that they refer to the ordinary GNU General Public License, version 2,
instead of to this License. (If a newer version than version 2 of the
ordinary GNU General Public License has appeared, then you can specify
that version instead if you wish.) Do not make any other change in
these notices.
Once this change is made in a given copy, it is irreversible for
that copy, so the ordinary GNU General Public License applies to all
subsequent copies and derivative works made from that copy.
This option is useful when you wish to copy part of the code of
the Library into a program that is not a library.
4. You may copy and distribute the Library (or a portion or
derivative of it, under Section 2) in object code or executable form
under the terms of Sections 1 and 2 above provided that you accompany
it with the complete corresponding machine-readable source code, which
must be distributed under the terms of Sections 1 and 2 above on a
medium customarily used for software interchange.
If distribution of object code is made by offering access to copy
from a designated place, then offering equivalent access to copy the
source code from the same place satisfies the requirement to
distribute the source code, even though third parties are not
compelled to copy the source along with the object code.
5. A program that contains no derivative of any portion of the
Library, but is designed to work with the Library by being compiled or
linked with it, is called a "work that uses the Library". Such a
work, in isolation, is not a derivative work of the Library, and
therefore falls outside the scope of this License.
However, linking a "work that uses the Library" with the Library
creates an executable that is a derivative of the Library (because it
contains portions of the Library), rather than a "work that uses the
library". The executable is therefore covered by this License.
Section 6 states terms for distribution of such executables.
When a "work that uses the Library" uses material from a header file
that is part of the Library, the object code for the work may be a
derivative work of the Library even though the source code is not.
Whether this is true is especially significant if the work can be
linked without the Library, or if the work is itself a library. The
threshold for this to be true is not precisely defined by law.
If such an object file uses only numerical parameters, data
structure layouts and accessors, and small macros and small inline
functions (ten lines or less in length), then the use of the object
file is unrestricted, regardless of whether it is legally a derivative
work. (Executables containing this object code plus portions of the
Library will still fall under Section 6.)
Otherwise, if the work is a derivative of the Library, you may
distribute the object code for the work under the terms of Section 6.
Any executables containing that work also fall under Section 6,
whether or not they are linked directly with the Library itself.
6. As an exception to the Sections above, you may also combine or
link a "work that uses the Library" with the Library to produce a
work containing portions of the Library, and distribute that work
under terms of your choice, provided that the terms permit
modification of the work for the customer's own use and reverse
engineering for debugging such modifications.
You must give prominent notice with each copy of the work that the
Library is used in it and that the Library and its use are covered by
this License. You must supply a copy of this License. If the work
during execution displays copyright notices, you must include the
copyright notice for the Library among them, as well as a reference
directing the user to the copy of this License. Also, you must do one
of these things:
a) Accompany the work with the complete corresponding
machine-readable source code for the Library including whatever
changes were used in the work (which must be distributed under
Sections 1 and 2 above); and, if the work is an executable linked
with the Library, with the complete machine-readable "work that
uses the Library", as object code and/or source code, so that the
user can modify the Library and then relink to produce a modified
executable containing the modified Library. (It is understood
that the user who changes the contents of definitions files in the
Library will not necessarily be able to recompile the application
to use the modified definitions.)
b) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (1) uses at run time a
copy of the library already present on the user's computer system,
rather than copying library functions into the executable, and (2)
will operate properly with a modified version of the library, if
the user installs one, as long as the modified version is
interface-compatible with the version that the work was made with.
c) Accompany the work with a written offer, valid for at
least three years, to give the same user the materials
specified in Subsection 6a, above, for a charge no more
than the cost of performing this distribution.
d) If distribution of the work is made by offering access to copy
from a designated place, offer equivalent access to copy the above
specified materials from the same place.
e) Verify that the user has already received a copy of these
materials or that you have already sent this user a copy.
For an executable, the required form of the "work that uses the
Library" must include any data and utility programs needed for
reproducing the executable from it. However, as a special exception,
the materials to be distributed need not include anything that is
normally distributed (in either source or binary form) with the major
components (compiler, kernel, and so on) of the operating system on
which the executable runs, unless that component itself accompanies
the executable.
It may happen that this requirement contradicts the license
restrictions of other proprietary libraries that do not normally
accompany the operating system. Such a contradiction means you cannot
use both them and the Library together in an executable that you
distribute.
7. You may place library facilities that are a work based on the
Library side-by-side in a single library together with other library
facilities not covered by this License, and distribute such a combined
library, provided that the separate distribution of the work based on
the Library and of the other library facilities is otherwise
permitted, and provided that you do these two things:
a) Accompany the combined library with a copy of the same work
based on the Library, uncombined with any other library
facilities. This must be distributed under the terms of the
Sections above.
b) Give prominent notice with the combined library of the fact
that part of it is a work based on the Library, and explaining
where to find the accompanying uncombined form of the same work.
8. You may not copy, modify, sublicense, link with, or distribute
the Library except as expressly provided under this License. Any
attempt otherwise to copy, modify, sublicense, link with, or
distribute the Library is void, and will automatically terminate your
rights under this License. However, parties who have received copies,
or rights, from you under this License will not have their licenses
terminated so long as such parties remain in full compliance.
9. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Library or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Library (or any work based on the
Library), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Library or works based on it.
10. Each time you redistribute the Library (or any work based on the
Library), the recipient automatically receives a license from the
original licensor to copy, distribute, link with or modify the Library
subject to these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties with
this License.
11. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Library at all. For example, if a patent
license would not permit royalty-free redistribution of the Library by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Library.
If any portion of this section is held invalid or unenforceable under any
particular circumstance, the balance of the section is intended to apply,
and the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
12. If the distribution and/or use of the Library is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Library under this License may add
an explicit geographical distribution limitation excluding those countries,
so that distribution is permitted only in or among countries not thus
excluded. In such case, this License incorporates the limitation as if
written in the body of this License.
13. The Free Software Foundation may publish revised and/or new
versions of the Lesser General Public License from time to time.
Such new versions will be similar in spirit to the present version,
but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Library
specifies a version number of this License which applies to it and
"any later version", you have the option of following the terms and
conditions either of that version or of any later version published by
the Free Software Foundation. If the Library does not specify a
license version number, you may choose any version ever published by
the Free Software Foundation.
14. If you wish to incorporate parts of the Library into other free
programs whose distribution conditions are incompatible with these,
write to the author to ask for permission. For software which is
copyrighted by the Free Software Foundation, write to the Free
Software Foundation; we sometimes make exceptions for this. Our
decision will be guided by the two goals of preserving the free status
of all derivatives of our free software and of promoting the sharing
and reuse of software generally.
NO WARRANTY
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Libraries
If you develop a new library, and you want it to be of the greatest
possible use to the public, we recommend making it free software that
everyone can redistribute and change. You can do so by permitting
redistribution under these terms (or, alternatively, under the terms of the
ordinary General Public License).
To apply these terms, attach the following notices to the library. It is
safest to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least the
"copyright" line and a pointer to where the full notice is found.
<one line to give the library's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Also add information on how to contact you by electronic and paper mail.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the library, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the
library `Frob' (a library for tweaking knobs) written by James Random Hacker.
<signature of Ty Coon>, 1 April 1990
Ty Coon, President of Vice
That's all there is to it!
@@ -0,0 +1,38 @@
Chardet: The Universal Character Encoding Detector
--------------------------------------------------
Detects
- ASCII, UTF-8, UTF-16 (2 variants), UTF-32 (4 variants)
- Big5, GB2312, EUC-TW, HZ-GB-2312, ISO-2022-CN (Traditional and Simplified Chinese)
- EUC-JP, SHIFT_JIS, ISO-2022-JP (Japanese)
- EUC-KR, ISO-2022-KR (Korean)
- KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251 (Cyrillic)
- ISO-8859-2, windows-1250 (Hungarian)
- ISO-8859-5, windows-1251 (Bulgarian)
- windows-1252 (English)
- ISO-8859-7, windows-1253 (Greek)
- ISO-8859-8, windows-1255 (Visual and Logical Hebrew)
- TIS-620 (Thai)
Requires Python 2.6 or later
Command-line Tool
-----------------
chardet comes with a command-line script which reports on the encodings of one
or more files::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
About
-----
This is a continuation of Mark Pilgrim's excellent chardet. Previously, two
versions needed to be maintained: one that supported python 2.x and one that
supported python 3.x. We've recently merged with `Ian Corduscano <https://github.com/sigmavirus24>`_'s
`charade <https://github.com/sigmavirus24/charade>`_ fork, so now we have one
coherent version that works for Python 2.6+.
:maintainer: Dan Blanchard
@@ -0,0 +1,32 @@
######################## BEGIN LICENSE BLOCK ########################
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
__version__ = "2.2.1"
from sys import version_info
def detect(aBuf):
if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
(version_info >= (3, 0) and not isinstance(aBuf, bytes))):
raise ValueError('Expected a bytes object, not a unicode object')
from . import universaldetector
u = universaldetector.UniversalDetector()
u.reset()
u.feed(aBuf)
u.close()
return u.result
@@ -0,0 +1,925 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# Big5 frequency table
# by Taiwan's Mandarin Promotion Council
# <http://www.edu.tw:81/mandr/>
#
# 128 --> 0.42261
# 256 --> 0.57851
# 512 --> 0.74851
# 1024 --> 0.89384
# 2048 --> 0.97583
#
# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98
# Random Distribution Ration = 512/(5401-512)=0.105
#
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
#Char to FreqOrder table
BIG5_TABLE_SIZE = 5376
Big5CharToFreqOrder = (
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48
63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64
3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80
4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96
5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112
630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128
179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144
995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160
2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176
1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192
3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208
706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240
3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256
2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272
437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288
3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304
1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320
5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336
266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352
5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368
1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384
32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400
188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416
3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432
3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448
324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464
2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480
2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496
314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512
287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528
3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544
1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560
1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576
1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592
2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608
265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624
4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640
1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656
5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672
2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688
383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704
98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720
523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736
710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752
5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768
379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784
1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800
585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816
690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832
5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848
1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864
544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880
3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896
4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912
3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928
279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944
610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960
1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976
4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992
3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008
3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024
2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040
5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056
3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072
5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088
1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104
2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120
1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136
78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152
1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168
4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184
3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200
534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216
165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232
626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248
2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264
5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280
1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296
2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312
1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328
1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344
5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360
5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376
5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392
3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408
4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424
4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440
2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456
5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472
3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488
598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504
5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520
5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536
1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552
2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568
3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584
4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600
5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616
3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632
4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648
1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664
1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680
4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696
1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712
240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728
1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744
1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760
3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776
619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792
5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808
2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824
1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840
1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856
5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872
829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888
4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904
375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920
2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936
444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952
1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968
1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984
730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000
4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016
4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032
1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048
3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064
5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080
5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096
1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112
2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128
1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144
3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160
2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176
3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192
2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208
4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224
4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240
3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256
97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272
3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288
424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304
3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320
4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336
3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352
1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368
5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384
199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400
5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416
1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432
391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448
4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464
4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480
397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496
2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512
2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528
3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544
1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560
4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576
2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592
1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608
1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624
2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640
3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656
1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672
5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688
1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704
4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720
1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736
135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752
1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768
4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784
4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800
2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816
1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832
4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848
660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864
5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880
2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896
3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912
4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928
790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944
5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960
5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976
1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992
4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008
4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024
2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040
3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056
3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072
2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088
1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104
4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120
3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136
3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152
2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168
4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184
5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200
3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216
2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232
3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248
1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264
2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280
3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296
4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312
2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328
2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344
5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360
1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376
2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392
1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408
3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424
4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440
2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456
3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472
3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488
2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504
4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520
2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536
3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552
4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568
5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584
3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600
194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616
1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632
4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648
1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664
4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680
5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696
510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712
5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728
5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744
2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760
3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776
2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792
2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808
681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824
1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840
4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856
3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872
3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888
838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904
2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920
625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936
2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952
4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968
1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984
4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000
1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016
3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032
574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048
3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064
5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080
5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096
3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112
3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128
1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144
2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160
5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176
1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192
1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208
3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224
919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240
1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256
4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272
5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288
2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304
3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320
516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336
1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352
2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368
2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384
5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400
5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416
5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432
2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448
2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464
1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480
4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496
3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512
3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528
4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544
4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560
2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576
2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592
5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608
4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624
5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640
4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656
502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672
121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688
1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704
3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720
4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736
1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752
5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768
2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784
2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800
3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816
5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832
1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848
3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864
5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880
1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896
5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912
2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928
3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944
2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960
3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976
3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992
3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008
4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024
803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040
2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056
4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072
3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088
5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104
1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120
5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136
425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152
1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168
479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184
4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200
1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216
4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232
1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248
433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264
3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280
4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296
5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312
938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328
3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 #last 512
#Everything below is of no interest for detection purpose
2522,1613,4812,5799,3345,3945,2523,5800,4162,5801,1637,4163,2471,4813,3946,5802, # 5392
2500,3034,3800,5803,5804,2195,4814,5805,2163,5806,5807,5808,5809,5810,5811,5812, # 5408
5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828, # 5424
5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844, # 5440
5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856,5857,5858,5859,5860, # 5456
5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872,5873,5874,5875,5876, # 5472
5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888,5889,5890,5891,5892, # 5488
5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905,5906,5907,5908, # 5504
5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920,5921,5922,5923,5924, # 5520
5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936,5937,5938,5939,5940, # 5536
5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952,5953,5954,5955,5956, # 5552
5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968,5969,5970,5971,5972, # 5568
5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984,5985,5986,5987,5988, # 5584
5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004, # 5600
6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020, # 5616
6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032,6033,6034,6035,6036, # 5632
6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052, # 5648
6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068, # 5664
6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084, # 5680
6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100, # 5696
6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116, # 5712
6117,6118,6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,6132, # 5728
6133,6134,6135,6136,6137,6138,6139,6140,6141,6142,6143,6144,6145,6146,6147,6148, # 5744
6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163,6164, # 5760
6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179,6180, # 5776
6181,6182,6183,6184,6185,6186,6187,6188,6189,6190,6191,6192,6193,6194,6195,6196, # 5792
6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210,6211,6212, # 5808
6213,6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,3670,6224,6225,6226,6227, # 5824
6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241,6242,6243, # 5840
6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,6254,6255,6256,6257,6258,6259, # 5856
6260,6261,6262,6263,6264,6265,6266,6267,6268,6269,6270,6271,6272,6273,6274,6275, # 5872
6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,4815,6286,6287,6288,6289,6290, # 5888
6291,6292,4816,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,6303,6304,6305, # 5904
6306,6307,6308,6309,6310,6311,4817,4818,6312,6313,6314,6315,6316,6317,6318,4819, # 5920
6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,6334, # 5936
6335,6336,6337,4820,6338,6339,6340,6341,6342,6343,6344,6345,6346,6347,6348,6349, # 5952
6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363,6364,6365, # 5968
6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379,6380,6381, # 5984
6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395,6396,6397, # 6000
6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,3441,6411,6412, # 6016
6413,6414,6415,6416,6417,6418,6419,6420,6421,6422,6423,6424,6425,4440,6426,6427, # 6032
6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,6439,6440,6441,6442,6443, # 6048
6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,4821,6455,6456,6457,6458, # 6064
6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472,6473,6474, # 6080
6475,6476,6477,3947,3948,6478,6479,6480,6481,3272,4441,6482,6483,6484,6485,4442, # 6096
6486,6487,6488,6489,6490,6491,6492,6493,6494,6495,6496,4822,6497,6498,6499,6500, # 6112
6501,6502,6503,6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516, # 6128
6517,6518,6519,6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532, # 6144
6533,6534,6535,6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548, # 6160
6549,6550,6551,6552,6553,6554,6555,6556,2784,6557,4823,6558,6559,6560,6561,6562, # 6176
6563,6564,6565,6566,6567,6568,6569,3949,6570,6571,6572,4824,6573,6574,6575,6576, # 6192
6577,6578,6579,6580,6581,6582,6583,4825,6584,6585,6586,3950,2785,6587,6588,6589, # 6208
6590,6591,6592,6593,6594,6595,6596,6597,6598,6599,6600,6601,6602,6603,6604,6605, # 6224
6606,6607,6608,6609,6610,6611,6612,4826,6613,6614,6615,4827,6616,6617,6618,6619, # 6240
6620,6621,6622,6623,6624,6625,4164,6626,6627,6628,6629,6630,6631,6632,6633,6634, # 6256
3547,6635,4828,6636,6637,6638,6639,6640,6641,6642,3951,2984,6643,6644,6645,6646, # 6272
6647,6648,6649,4165,6650,4829,6651,6652,4830,6653,6654,6655,6656,6657,6658,6659, # 6288
6660,6661,6662,4831,6663,6664,6665,6666,6667,6668,6669,6670,6671,4166,6672,4832, # 6304
3952,6673,6674,6675,6676,4833,6677,6678,6679,4167,6680,6681,6682,3198,6683,6684, # 6320
6685,6686,6687,6688,6689,6690,6691,6692,6693,6694,6695,6696,6697,4834,6698,6699, # 6336
6700,6701,6702,6703,6704,6705,6706,6707,6708,6709,6710,6711,6712,6713,6714,6715, # 6352
6716,6717,6718,6719,6720,6721,6722,6723,6724,6725,6726,6727,6728,6729,6730,6731, # 6368
6732,6733,6734,4443,6735,6736,6737,6738,6739,6740,6741,6742,6743,6744,6745,4444, # 6384
6746,6747,6748,6749,6750,6751,6752,6753,6754,6755,6756,6757,6758,6759,6760,6761, # 6400
6762,6763,6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777, # 6416
6778,6779,6780,6781,4168,6782,6783,3442,6784,6785,6786,6787,6788,6789,6790,6791, # 6432
4169,6792,6793,6794,6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806, # 6448
6807,6808,6809,6810,6811,4835,6812,6813,6814,4445,6815,6816,4446,6817,6818,6819, # 6464
6820,6821,6822,6823,6824,6825,6826,6827,6828,6829,6830,6831,6832,6833,6834,6835, # 6480
3548,6836,6837,6838,6839,6840,6841,6842,6843,6844,6845,6846,4836,6847,6848,6849, # 6496
6850,6851,6852,6853,6854,3953,6855,6856,6857,6858,6859,6860,6861,6862,6863,6864, # 6512
6865,6866,6867,6868,6869,6870,6871,6872,6873,6874,6875,6876,6877,3199,6878,6879, # 6528
6880,6881,6882,4447,6883,6884,6885,6886,6887,6888,6889,6890,6891,6892,6893,6894, # 6544
6895,6896,6897,6898,6899,6900,6901,6902,6903,6904,4170,6905,6906,6907,6908,6909, # 6560
6910,6911,6912,6913,6914,6915,6916,6917,6918,6919,6920,6921,6922,6923,6924,6925, # 6576
6926,6927,4837,6928,6929,6930,6931,6932,6933,6934,6935,6936,3346,6937,6938,4838, # 6592
6939,6940,6941,4448,6942,6943,6944,6945,6946,4449,6947,6948,6949,6950,6951,6952, # 6608
6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966,6967,6968, # 6624
6969,6970,6971,6972,6973,6974,6975,6976,6977,6978,6979,6980,6981,6982,6983,6984, # 6640
6985,6986,6987,6988,6989,6990,6991,6992,6993,6994,3671,6995,6996,6997,6998,4839, # 6656
6999,7000,7001,7002,3549,7003,7004,7005,7006,7007,7008,7009,7010,7011,7012,7013, # 6672
7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027,7028,7029, # 6688
7030,4840,7031,7032,7033,7034,7035,7036,7037,7038,4841,7039,7040,7041,7042,7043, # 6704
7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058,7059, # 6720
7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,2985,7071,7072,7073,7074, # 6736
7075,7076,7077,7078,7079,7080,4842,7081,7082,7083,7084,7085,7086,7087,7088,7089, # 6752
7090,7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105, # 6768
7106,7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,4450,7119,7120, # 6784
7121,7122,7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136, # 6800
7137,7138,7139,7140,7141,7142,7143,4843,7144,7145,7146,7147,7148,7149,7150,7151, # 6816
7152,7153,7154,7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167, # 6832
7168,7169,7170,7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183, # 6848
7184,7185,7186,7187,7188,4171,4172,7189,7190,7191,7192,7193,7194,7195,7196,7197, # 6864
7198,7199,7200,7201,7202,7203,7204,7205,7206,7207,7208,7209,7210,7211,7212,7213, # 6880
7214,7215,7216,7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229, # 6896
7230,7231,7232,7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245, # 6912
7246,7247,7248,7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261, # 6928
7262,7263,7264,7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277, # 6944
7278,7279,7280,7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293, # 6960
7294,7295,7296,4844,7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308, # 6976
7309,7310,7311,7312,7313,7314,7315,7316,4451,7317,7318,7319,7320,7321,7322,7323, # 6992
7324,7325,7326,7327,7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339, # 7008
7340,7341,7342,7343,7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,4173,7354, # 7024
7355,4845,7356,7357,7358,7359,7360,7361,7362,7363,7364,7365,7366,7367,7368,7369, # 7040
7370,7371,7372,7373,7374,7375,7376,7377,7378,7379,7380,7381,7382,7383,7384,7385, # 7056
7386,7387,7388,4846,7389,7390,7391,7392,7393,7394,7395,7396,7397,7398,7399,7400, # 7072
7401,7402,7403,7404,7405,3672,7406,7407,7408,7409,7410,7411,7412,7413,7414,7415, # 7088
7416,7417,7418,7419,7420,7421,7422,7423,7424,7425,7426,7427,7428,7429,7430,7431, # 7104
7432,7433,7434,7435,7436,7437,7438,7439,7440,7441,7442,7443,7444,7445,7446,7447, # 7120
7448,7449,7450,7451,7452,7453,4452,7454,3200,7455,7456,7457,7458,7459,7460,7461, # 7136
7462,7463,7464,7465,7466,7467,7468,7469,7470,7471,7472,7473,7474,4847,7475,7476, # 7152
7477,3133,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487,7488,7489,7490,7491, # 7168
7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,3347,7503,7504,7505,7506, # 7184
7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,7520,7521,4848, # 7200
7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535,7536,7537, # 7216
7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,3801,4849,7550,7551, # 7232
7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, # 7248
7568,7569,3035,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582, # 7264
7583,7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598, # 7280
7599,7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614, # 7296
7615,7616,4850,7617,7618,3802,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628, # 7312
7629,7630,7631,7632,4851,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643, # 7328
7644,7645,7646,7647,7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659, # 7344
7660,7661,7662,7663,7664,7665,7666,7667,7668,7669,7670,4453,7671,7672,7673,7674, # 7360
7675,7676,7677,7678,7679,7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690, # 7376
7691,7692,7693,7694,7695,7696,7697,3443,7698,7699,7700,7701,7702,4454,7703,7704, # 7392
7705,7706,7707,7708,7709,7710,7711,7712,7713,2472,7714,7715,7716,7717,7718,7719, # 7408
7720,7721,7722,7723,7724,7725,7726,7727,7728,7729,7730,7731,3954,7732,7733,7734, # 7424
7735,7736,7737,7738,7739,7740,7741,7742,7743,7744,7745,7746,7747,7748,7749,7750, # 7440
3134,7751,7752,4852,7753,7754,7755,4853,7756,7757,7758,7759,7760,4174,7761,7762, # 7456
7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,7777,7778, # 7472
7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791,7792,7793,7794, # 7488
7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,4854,7806,7807,7808,7809, # 7504
7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824,7825, # 7520
4855,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7536
7841,7842,7843,7844,7845,7846,7847,3955,7848,7849,7850,7851,7852,7853,7854,7855, # 7552
7856,7857,7858,7859,7860,3444,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870, # 7568
7871,7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886, # 7584
7887,7888,7889,7890,7891,4175,7892,7893,7894,7895,7896,4856,4857,7897,7898,7899, # 7600
7900,2598,7901,7902,7903,7904,7905,7906,7907,7908,4455,7909,7910,7911,7912,7913, # 7616
7914,3201,7915,7916,7917,7918,7919,7920,7921,4858,7922,7923,7924,7925,7926,7927, # 7632
7928,7929,7930,7931,7932,7933,7934,7935,7936,7937,7938,7939,7940,7941,7942,7943, # 7648
7944,7945,7946,7947,7948,7949,7950,7951,7952,7953,7954,7955,7956,7957,7958,7959, # 7664
7960,7961,7962,7963,7964,7965,7966,7967,7968,7969,7970,7971,7972,7973,7974,7975, # 7680
7976,7977,7978,7979,7980,7981,4859,7982,7983,7984,7985,7986,7987,7988,7989,7990, # 7696
7991,7992,7993,7994,7995,7996,4860,7997,7998,7999,8000,8001,8002,8003,8004,8005, # 7712
8006,8007,8008,8009,8010,8011,8012,8013,8014,8015,8016,4176,8017,8018,8019,8020, # 7728
8021,8022,8023,4861,8024,8025,8026,8027,8028,8029,8030,8031,8032,8033,8034,8035, # 7744
8036,4862,4456,8037,8038,8039,8040,4863,8041,8042,8043,8044,8045,8046,8047,8048, # 7760
8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063,8064, # 7776
8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,8080, # 7792
8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,8096, # 7808
8097,8098,8099,4864,4177,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110, # 7824
8111,8112,8113,8114,8115,8116,8117,8118,8119,8120,4178,8121,8122,8123,8124,8125, # 7840
8126,8127,8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141, # 7856
8142,8143,8144,8145,4865,4866,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155, # 7872
8156,8157,8158,8159,8160,8161,8162,8163,8164,8165,4179,8166,8167,8168,8169,8170, # 7888
8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181,4457,8182,8183,8184,8185, # 7904
8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197,8198,8199,8200,8201, # 7920
8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213,8214,8215,8216,8217, # 7936
8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229,8230,8231,8232,8233, # 7952
8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245,8246,8247,8248,8249, # 7968
8250,8251,8252,8253,8254,8255,8256,3445,8257,8258,8259,8260,8261,8262,4458,8263, # 7984
8264,8265,8266,8267,8268,8269,8270,8271,8272,4459,8273,8274,8275,8276,3550,8277, # 8000
8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,4460,8290,8291,8292, # 8016
8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,4867, # 8032
8308,8309,8310,8311,8312,3551,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322, # 8048
8323,8324,8325,8326,4868,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337, # 8064
8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353, # 8080
8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,4869,4461,8364,8365,8366,8367, # 8096
8368,8369,8370,4870,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382, # 8112
8383,8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398, # 8128
8399,8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,4871,8411,8412,8413, # 8144
8414,8415,8416,8417,8418,8419,8420,8421,8422,4462,8423,8424,8425,8426,8427,8428, # 8160
8429,8430,8431,8432,8433,2986,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443, # 8176
8444,8445,8446,8447,8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459, # 8192
8460,8461,8462,8463,8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475, # 8208
8476,8477,8478,4180,8479,8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490, # 8224
8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506, # 8240
8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522, # 8256
8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538, # 8272
8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554, # 8288
8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,4872,8565,8566,8567,8568,8569, # 8304
8570,8571,8572,8573,4873,8574,8575,8576,8577,8578,8579,8580,8581,8582,8583,8584, # 8320
8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597,8598,8599,8600, # 8336
8601,8602,8603,8604,8605,3803,8606,8607,8608,8609,8610,8611,8612,8613,4874,3804, # 8352
8614,8615,8616,8617,8618,8619,8620,8621,3956,8622,8623,8624,8625,8626,8627,8628, # 8368
8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,2865,8639,8640,8641,8642,8643, # 8384
8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,4463,8657,8658, # 8400
8659,4875,4876,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672, # 8416
8673,8674,8675,8676,8677,8678,8679,8680,8681,4464,8682,8683,8684,8685,8686,8687, # 8432
8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, # 8448
8704,8705,8706,8707,8708,8709,2261,8710,8711,8712,8713,8714,8715,8716,8717,8718, # 8464
8719,8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,4181, # 8480
8734,8735,8736,8737,8738,8739,8740,8741,8742,8743,8744,8745,8746,8747,8748,8749, # 8496
8750,8751,8752,8753,8754,8755,8756,8757,8758,8759,8760,8761,8762,8763,4877,8764, # 8512
8765,8766,8767,8768,8769,8770,8771,8772,8773,8774,8775,8776,8777,8778,8779,8780, # 8528
8781,8782,8783,8784,8785,8786,8787,8788,4878,8789,4879,8790,8791,8792,4880,8793, # 8544
8794,8795,8796,8797,8798,8799,8800,8801,4881,8802,8803,8804,8805,8806,8807,8808, # 8560
8809,8810,8811,8812,8813,8814,8815,3957,8816,8817,8818,8819,8820,8821,8822,8823, # 8576
8824,8825,8826,8827,8828,8829,8830,8831,8832,8833,8834,8835,8836,8837,8838,8839, # 8592
8840,8841,8842,8843,8844,8845,8846,8847,4882,8848,8849,8850,8851,8852,8853,8854, # 8608
8855,8856,8857,8858,8859,8860,8861,8862,8863,8864,8865,8866,8867,8868,8869,8870, # 8624
8871,8872,8873,8874,8875,8876,8877,8878,8879,8880,8881,8882,8883,8884,3202,8885, # 8640
8886,8887,8888,8889,8890,8891,8892,8893,8894,8895,8896,8897,8898,8899,8900,8901, # 8656
8902,8903,8904,8905,8906,8907,8908,8909,8910,8911,8912,8913,8914,8915,8916,8917, # 8672
8918,8919,8920,8921,8922,8923,8924,4465,8925,8926,8927,8928,8929,8930,8931,8932, # 8688
4883,8933,8934,8935,8936,8937,8938,8939,8940,8941,8942,8943,2214,8944,8945,8946, # 8704
8947,8948,8949,8950,8951,8952,8953,8954,8955,8956,8957,8958,8959,8960,8961,8962, # 8720
8963,8964,8965,4884,8966,8967,8968,8969,8970,8971,8972,8973,8974,8975,8976,8977, # 8736
8978,8979,8980,8981,8982,8983,8984,8985,8986,8987,8988,8989,8990,8991,8992,4885, # 8752
8993,8994,8995,8996,8997,8998,8999,9000,9001,9002,9003,9004,9005,9006,9007,9008, # 8768
9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,4182,9022,9023, # 8784
9024,9025,9026,9027,9028,9029,9030,9031,9032,9033,9034,9035,9036,9037,9038,9039, # 8800
9040,9041,9042,9043,9044,9045,9046,9047,9048,9049,9050,9051,9052,9053,9054,9055, # 8816
9056,9057,9058,9059,9060,9061,9062,9063,4886,9064,9065,9066,9067,9068,9069,4887, # 8832
9070,9071,9072,9073,9074,9075,9076,9077,9078,9079,9080,9081,9082,9083,9084,9085, # 8848
9086,9087,9088,9089,9090,9091,9092,9093,9094,9095,9096,9097,9098,9099,9100,9101, # 8864
9102,9103,9104,9105,9106,9107,9108,9109,9110,9111,9112,9113,9114,9115,9116,9117, # 8880
9118,9119,9120,9121,9122,9123,9124,9125,9126,9127,9128,9129,9130,9131,9132,9133, # 8896
9134,9135,9136,9137,9138,9139,9140,9141,3958,9142,9143,9144,9145,9146,9147,9148, # 8912
9149,9150,9151,4888,9152,9153,9154,9155,9156,9157,9158,9159,9160,9161,9162,9163, # 8928
9164,9165,9166,9167,9168,9169,9170,9171,9172,9173,9174,9175,4889,9176,9177,9178, # 8944
9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194, # 8960
9195,9196,9197,9198,9199,9200,9201,9202,9203,4890,9204,9205,9206,9207,9208,9209, # 8976
9210,9211,9212,9213,9214,9215,9216,9217,9218,9219,9220,9221,9222,4466,9223,9224, # 8992
9225,9226,9227,9228,9229,9230,9231,9232,9233,9234,9235,9236,9237,9238,9239,9240, # 9008
9241,9242,9243,9244,9245,4891,9246,9247,9248,9249,9250,9251,9252,9253,9254,9255, # 9024
9256,9257,4892,9258,9259,9260,9261,4893,4894,9262,9263,9264,9265,9266,9267,9268, # 9040
9269,9270,9271,9272,9273,4467,9274,9275,9276,9277,9278,9279,9280,9281,9282,9283, # 9056
9284,9285,3673,9286,9287,9288,9289,9290,9291,9292,9293,9294,9295,9296,9297,9298, # 9072
9299,9300,9301,9302,9303,9304,9305,9306,9307,9308,9309,9310,9311,9312,9313,9314, # 9088
9315,9316,9317,9318,9319,9320,9321,9322,4895,9323,9324,9325,9326,9327,9328,9329, # 9104
9330,9331,9332,9333,9334,9335,9336,9337,9338,9339,9340,9341,9342,9343,9344,9345, # 9120
9346,9347,4468,9348,9349,9350,9351,9352,9353,9354,9355,9356,9357,9358,9359,9360, # 9136
9361,9362,9363,9364,9365,9366,9367,9368,9369,9370,9371,9372,9373,4896,9374,4469, # 9152
9375,9376,9377,9378,9379,4897,9380,9381,9382,9383,9384,9385,9386,9387,9388,9389, # 9168
9390,9391,9392,9393,9394,9395,9396,9397,9398,9399,9400,9401,9402,9403,9404,9405, # 9184
9406,4470,9407,2751,9408,9409,3674,3552,9410,9411,9412,9413,9414,9415,9416,9417, # 9200
9418,9419,9420,9421,4898,9422,9423,9424,9425,9426,9427,9428,9429,3959,9430,9431, # 9216
9432,9433,9434,9435,9436,4471,9437,9438,9439,9440,9441,9442,9443,9444,9445,9446, # 9232
9447,9448,9449,9450,3348,9451,9452,9453,9454,9455,9456,9457,9458,9459,9460,9461, # 9248
9462,9463,9464,9465,9466,9467,9468,9469,9470,9471,9472,4899,9473,9474,9475,9476, # 9264
9477,4900,9478,9479,9480,9481,9482,9483,9484,9485,9486,9487,9488,3349,9489,9490, # 9280
9491,9492,9493,9494,9495,9496,9497,9498,9499,9500,9501,9502,9503,9504,9505,9506, # 9296
9507,9508,9509,9510,9511,9512,9513,9514,9515,9516,9517,9518,9519,9520,4901,9521, # 9312
9522,9523,9524,9525,9526,4902,9527,9528,9529,9530,9531,9532,9533,9534,9535,9536, # 9328
9537,9538,9539,9540,9541,9542,9543,9544,9545,9546,9547,9548,9549,9550,9551,9552, # 9344
9553,9554,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568, # 9360
9569,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,9581,9582,9583,9584, # 9376
3805,9585,9586,9587,9588,9589,9590,9591,9592,9593,9594,9595,9596,9597,9598,9599, # 9392
9600,9601,9602,4903,9603,9604,9605,9606,9607,4904,9608,9609,9610,9611,9612,9613, # 9408
9614,4905,9615,9616,9617,9618,9619,9620,9621,9622,9623,9624,9625,9626,9627,9628, # 9424
9629,9630,9631,9632,4906,9633,9634,9635,9636,9637,9638,9639,9640,9641,9642,9643, # 9440
4907,9644,9645,9646,9647,9648,9649,9650,9651,9652,9653,9654,9655,9656,9657,9658, # 9456
9659,9660,9661,9662,9663,9664,9665,9666,9667,9668,9669,9670,9671,9672,4183,9673, # 9472
9674,9675,9676,9677,4908,9678,9679,9680,9681,4909,9682,9683,9684,9685,9686,9687, # 9488
9688,9689,9690,4910,9691,9692,9693,3675,9694,9695,9696,2945,9697,9698,9699,9700, # 9504
9701,9702,9703,9704,9705,4911,9706,9707,9708,9709,9710,9711,9712,9713,9714,9715, # 9520
9716,9717,9718,9719,9720,9721,9722,9723,9724,9725,9726,9727,9728,9729,9730,9731, # 9536
9732,9733,9734,9735,4912,9736,9737,9738,9739,9740,4913,9741,9742,9743,9744,9745, # 9552
9746,9747,9748,9749,9750,9751,9752,9753,9754,9755,9756,9757,9758,4914,9759,9760, # 9568
9761,9762,9763,9764,9765,9766,9767,9768,9769,9770,9771,9772,9773,9774,9775,9776, # 9584
9777,9778,9779,9780,9781,9782,4915,9783,9784,9785,9786,9787,9788,9789,9790,9791, # 9600
9792,9793,4916,9794,9795,9796,9797,9798,9799,9800,9801,9802,9803,9804,9805,9806, # 9616
9807,9808,9809,9810,9811,9812,9813,9814,9815,9816,9817,9818,9819,9820,9821,9822, # 9632
9823,9824,9825,9826,9827,9828,9829,9830,9831,9832,9833,9834,9835,9836,9837,9838, # 9648
9839,9840,9841,9842,9843,9844,9845,9846,9847,9848,9849,9850,9851,9852,9853,9854, # 9664
9855,9856,9857,9858,9859,9860,9861,9862,9863,9864,9865,9866,9867,9868,4917,9869, # 9680
9870,9871,9872,9873,9874,9875,9876,9877,9878,9879,9880,9881,9882,9883,9884,9885, # 9696
9886,9887,9888,9889,9890,9891,9892,4472,9893,9894,9895,9896,9897,3806,9898,9899, # 9712
9900,9901,9902,9903,9904,9905,9906,9907,9908,9909,9910,9911,9912,9913,9914,4918, # 9728
9915,9916,9917,4919,9918,9919,9920,9921,4184,9922,9923,9924,9925,9926,9927,9928, # 9744
9929,9930,9931,9932,9933,9934,9935,9936,9937,9938,9939,9940,9941,9942,9943,9944, # 9760
9945,9946,4920,9947,9948,9949,9950,9951,9952,9953,9954,9955,4185,9956,9957,9958, # 9776
9959,9960,9961,9962,9963,9964,9965,4921,9966,9967,9968,4473,9969,9970,9971,9972, # 9792
9973,9974,9975,9976,9977,4474,9978,9979,9980,9981,9982,9983,9984,9985,9986,9987, # 9808
9988,9989,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000,10001,10002,10003, # 9824
10004,10005,10006,10007,10008,10009,10010,10011,10012,10013,10014,10015,10016,10017,10018,10019, # 9840
10020,10021,4922,10022,4923,10023,10024,10025,10026,10027,10028,10029,10030,10031,10032,10033, # 9856
10034,10035,10036,10037,10038,10039,10040,10041,10042,10043,10044,10045,10046,10047,10048,4924, # 9872
10049,10050,10051,10052,10053,10054,10055,10056,10057,10058,10059,10060,10061,10062,10063,10064, # 9888
10065,10066,10067,10068,10069,10070,10071,10072,10073,10074,10075,10076,10077,10078,10079,10080, # 9904
10081,10082,10083,10084,10085,10086,10087,4475,10088,10089,10090,10091,10092,10093,10094,10095, # 9920
10096,10097,4476,10098,10099,10100,10101,10102,10103,10104,10105,10106,10107,10108,10109,10110, # 9936
10111,2174,10112,10113,10114,10115,10116,10117,10118,10119,10120,10121,10122,10123,10124,10125, # 9952
10126,10127,10128,10129,10130,10131,10132,10133,10134,10135,10136,10137,10138,10139,10140,3807, # 9968
4186,4925,10141,10142,10143,10144,10145,10146,10147,4477,4187,10148,10149,10150,10151,10152, # 9984
10153,4188,10154,10155,10156,10157,10158,10159,10160,10161,4926,10162,10163,10164,10165,10166, #10000
10167,10168,10169,10170,10171,10172,10173,10174,10175,10176,10177,10178,10179,10180,10181,10182, #10016
10183,10184,10185,10186,10187,10188,10189,10190,10191,10192,3203,10193,10194,10195,10196,10197, #10032
10198,10199,10200,4478,10201,10202,10203,10204,4479,10205,10206,10207,10208,10209,10210,10211, #10048
10212,10213,10214,10215,10216,10217,10218,10219,10220,10221,10222,10223,10224,10225,10226,10227, #10064
10228,10229,10230,10231,10232,10233,10234,4927,10235,10236,10237,10238,10239,10240,10241,10242, #10080
10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258, #10096
10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,4480, #10112
4928,4929,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287, #10128
10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303, #10144
10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319, #10160
10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,4930, #10176
10335,10336,10337,10338,10339,10340,10341,10342,4931,10343,10344,10345,10346,10347,10348,10349, #10192
10350,10351,10352,10353,10354,10355,3088,10356,2786,10357,10358,10359,10360,4189,10361,10362, #10208
10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,4932,10376,10377, #10224
10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,4933, #10240
10393,10394,10395,4934,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407, #10256
10408,10409,10410,10411,10412,3446,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422, #10272
10423,4935,10424,10425,10426,10427,10428,10429,10430,4936,10431,10432,10433,10434,10435,10436, #10288
10437,10438,10439,10440,10441,10442,10443,4937,10444,10445,10446,10447,4481,10448,10449,10450, #10304
10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466, #10320
10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482, #10336
10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495,10496,10497,10498, #10352
10499,10500,10501,10502,10503,10504,10505,4938,10506,10507,10508,10509,10510,2552,10511,10512, #10368
10513,10514,10515,10516,3447,10517,10518,10519,10520,10521,10522,10523,10524,10525,10526,10527, #10384
10528,10529,10530,10531,10532,10533,10534,10535,10536,10537,10538,10539,10540,10541,10542,10543, #10400
4482,10544,4939,10545,10546,10547,10548,10549,10550,10551,10552,10553,10554,10555,10556,10557, #10416
10558,10559,10560,10561,10562,10563,10564,10565,10566,10567,3676,4483,10568,10569,10570,10571, #10432
10572,3448,10573,10574,10575,10576,10577,10578,10579,10580,10581,10582,10583,10584,10585,10586, #10448
10587,10588,10589,10590,10591,10592,10593,10594,10595,10596,10597,10598,10599,10600,10601,10602, #10464
10603,10604,10605,10606,10607,10608,10609,10610,10611,10612,10613,10614,10615,10616,10617,10618, #10480
10619,10620,10621,10622,10623,10624,10625,10626,10627,4484,10628,10629,10630,10631,10632,4940, #10496
10633,10634,10635,10636,10637,10638,10639,10640,10641,10642,10643,10644,10645,10646,10647,10648, #10512
10649,10650,10651,10652,10653,10654,10655,10656,4941,10657,10658,10659,2599,10660,10661,10662, #10528
10663,10664,10665,10666,3089,10667,10668,10669,10670,10671,10672,10673,10674,10675,10676,10677, #10544
10678,10679,10680,4942,10681,10682,10683,10684,10685,10686,10687,10688,10689,10690,10691,10692, #10560
10693,10694,10695,10696,10697,4485,10698,10699,10700,10701,10702,10703,10704,4943,10705,3677, #10576
10706,10707,10708,10709,10710,10711,10712,4944,10713,10714,10715,10716,10717,10718,10719,10720, #10592
10721,10722,10723,10724,10725,10726,10727,10728,4945,10729,10730,10731,10732,10733,10734,10735, #10608
10736,10737,10738,10739,10740,10741,10742,10743,10744,10745,10746,10747,10748,10749,10750,10751, #10624
10752,10753,10754,10755,10756,10757,10758,10759,10760,10761,4946,10762,10763,10764,10765,10766, #10640
10767,4947,4948,10768,10769,10770,10771,10772,10773,10774,10775,10776,10777,10778,10779,10780, #10656
10781,10782,10783,10784,10785,10786,10787,10788,10789,10790,10791,10792,10793,10794,10795,10796, #10672
10797,10798,10799,10800,10801,10802,10803,10804,10805,10806,10807,10808,10809,10810,10811,10812, #10688
10813,10814,10815,10816,10817,10818,10819,10820,10821,10822,10823,10824,10825,10826,10827,10828, #10704
10829,10830,10831,10832,10833,10834,10835,10836,10837,10838,10839,10840,10841,10842,10843,10844, #10720
10845,10846,10847,10848,10849,10850,10851,10852,10853,10854,10855,10856,10857,10858,10859,10860, #10736
10861,10862,10863,10864,10865,10866,10867,10868,10869,10870,10871,10872,10873,10874,10875,10876, #10752
10877,10878,4486,10879,10880,10881,10882,10883,10884,10885,4949,10886,10887,10888,10889,10890, #10768
10891,10892,10893,10894,10895,10896,10897,10898,10899,10900,10901,10902,10903,10904,10905,10906, #10784
10907,10908,10909,10910,10911,10912,10913,10914,10915,10916,10917,10918,10919,4487,10920,10921, #10800
10922,10923,10924,10925,10926,10927,10928,10929,10930,10931,10932,4950,10933,10934,10935,10936, #10816
10937,10938,10939,10940,10941,10942,10943,10944,10945,10946,10947,10948,10949,4488,10950,10951, #10832
10952,10953,10954,10955,10956,10957,10958,10959,4190,10960,10961,10962,10963,10964,10965,10966, #10848
10967,10968,10969,10970,10971,10972,10973,10974,10975,10976,10977,10978,10979,10980,10981,10982, #10864
10983,10984,10985,10986,10987,10988,10989,10990,10991,10992,10993,10994,10995,10996,10997,10998, #10880
10999,11000,11001,11002,11003,11004,11005,11006,3960,11007,11008,11009,11010,11011,11012,11013, #10896
11014,11015,11016,11017,11018,11019,11020,11021,11022,11023,11024,11025,11026,11027,11028,11029, #10912
11030,11031,11032,4951,11033,11034,11035,11036,11037,11038,11039,11040,11041,11042,11043,11044, #10928
11045,11046,11047,4489,11048,11049,11050,11051,4952,11052,11053,11054,11055,11056,11057,11058, #10944
4953,11059,11060,11061,11062,11063,11064,11065,11066,11067,11068,11069,11070,11071,4954,11072, #10960
11073,11074,11075,11076,11077,11078,11079,11080,11081,11082,11083,11084,11085,11086,11087,11088, #10976
11089,11090,11091,11092,11093,11094,11095,11096,11097,11098,11099,11100,11101,11102,11103,11104, #10992
11105,11106,11107,11108,11109,11110,11111,11112,11113,11114,11115,3808,11116,11117,11118,11119, #11008
11120,11121,11122,11123,11124,11125,11126,11127,11128,11129,11130,11131,11132,11133,11134,4955, #11024
11135,11136,11137,11138,11139,11140,11141,11142,11143,11144,11145,11146,11147,11148,11149,11150, #11040
11151,11152,11153,11154,11155,11156,11157,11158,11159,11160,11161,4956,11162,11163,11164,11165, #11056
11166,11167,11168,11169,11170,11171,11172,11173,11174,11175,11176,11177,11178,11179,11180,4957, #11072
11181,11182,11183,11184,11185,11186,4958,11187,11188,11189,11190,11191,11192,11193,11194,11195, #11088
11196,11197,11198,11199,11200,3678,11201,11202,11203,11204,11205,11206,4191,11207,11208,11209, #11104
11210,11211,11212,11213,11214,11215,11216,11217,11218,11219,11220,11221,11222,11223,11224,11225, #11120
11226,11227,11228,11229,11230,11231,11232,11233,11234,11235,11236,11237,11238,11239,11240,11241, #11136
11242,11243,11244,11245,11246,11247,11248,11249,11250,11251,4959,11252,11253,11254,11255,11256, #11152
11257,11258,11259,11260,11261,11262,11263,11264,11265,11266,11267,11268,11269,11270,11271,11272, #11168
11273,11274,11275,11276,11277,11278,11279,11280,11281,11282,11283,11284,11285,11286,11287,11288, #11184
11289,11290,11291,11292,11293,11294,11295,11296,11297,11298,11299,11300,11301,11302,11303,11304, #11200
11305,11306,11307,11308,11309,11310,11311,11312,11313,11314,3679,11315,11316,11317,11318,4490, #11216
11319,11320,11321,11322,11323,11324,11325,11326,11327,11328,11329,11330,11331,11332,11333,11334, #11232
11335,11336,11337,11338,11339,11340,11341,11342,11343,11344,11345,11346,11347,4960,11348,11349, #11248
11350,11351,11352,11353,11354,11355,11356,11357,11358,11359,11360,11361,11362,11363,11364,11365, #11264
11366,11367,11368,11369,11370,11371,11372,11373,11374,11375,11376,11377,3961,4961,11378,11379, #11280
11380,11381,11382,11383,11384,11385,11386,11387,11388,11389,11390,11391,11392,11393,11394,11395, #11296
11396,11397,4192,11398,11399,11400,11401,11402,11403,11404,11405,11406,11407,11408,11409,11410, #11312
11411,4962,11412,11413,11414,11415,11416,11417,11418,11419,11420,11421,11422,11423,11424,11425, #11328
11426,11427,11428,11429,11430,11431,11432,11433,11434,11435,11436,11437,11438,11439,11440,11441, #11344
11442,11443,11444,11445,11446,11447,11448,11449,11450,11451,11452,11453,11454,11455,11456,11457, #11360
11458,11459,11460,11461,11462,11463,11464,11465,11466,11467,11468,11469,4963,11470,11471,4491, #11376
11472,11473,11474,11475,4964,11476,11477,11478,11479,11480,11481,11482,11483,11484,11485,11486, #11392
11487,11488,11489,11490,11491,11492,4965,11493,11494,11495,11496,11497,11498,11499,11500,11501, #11408
11502,11503,11504,11505,11506,11507,11508,11509,11510,11511,11512,11513,11514,11515,11516,11517, #11424
11518,11519,11520,11521,11522,11523,11524,11525,11526,11527,11528,11529,3962,11530,11531,11532, #11440
11533,11534,11535,11536,11537,11538,11539,11540,11541,11542,11543,11544,11545,11546,11547,11548, #11456
11549,11550,11551,11552,11553,11554,11555,11556,11557,11558,11559,11560,11561,11562,11563,11564, #11472
4193,4194,11565,11566,11567,11568,11569,11570,11571,11572,11573,11574,11575,11576,11577,11578, #11488
11579,11580,11581,11582,11583,11584,11585,11586,11587,11588,11589,11590,11591,4966,4195,11592, #11504
11593,11594,11595,11596,11597,11598,11599,11600,11601,11602,11603,11604,3090,11605,11606,11607, #11520
11608,11609,11610,4967,11611,11612,11613,11614,11615,11616,11617,11618,11619,11620,11621,11622, #11536
11623,11624,11625,11626,11627,11628,11629,11630,11631,11632,11633,11634,11635,11636,11637,11638, #11552
11639,11640,11641,11642,11643,11644,11645,11646,11647,11648,11649,11650,11651,11652,11653,11654, #11568
11655,11656,11657,11658,11659,11660,11661,11662,11663,11664,11665,11666,11667,11668,11669,11670, #11584
11671,11672,11673,11674,4968,11675,11676,11677,11678,11679,11680,11681,11682,11683,11684,11685, #11600
11686,11687,11688,11689,11690,11691,11692,11693,3809,11694,11695,11696,11697,11698,11699,11700, #11616
11701,11702,11703,11704,11705,11706,11707,11708,11709,11710,11711,11712,11713,11714,11715,11716, #11632
11717,11718,3553,11719,11720,11721,11722,11723,11724,11725,11726,11727,11728,11729,11730,4969, #11648
11731,11732,11733,11734,11735,11736,11737,11738,11739,11740,4492,11741,11742,11743,11744,11745, #11664
11746,11747,11748,11749,11750,11751,11752,4970,11753,11754,11755,11756,11757,11758,11759,11760, #11680
11761,11762,11763,11764,11765,11766,11767,11768,11769,11770,11771,11772,11773,11774,11775,11776, #11696
11777,11778,11779,11780,11781,11782,11783,11784,11785,11786,11787,11788,11789,11790,4971,11791, #11712
11792,11793,11794,11795,11796,11797,4972,11798,11799,11800,11801,11802,11803,11804,11805,11806, #11728
11807,11808,11809,11810,4973,11811,11812,11813,11814,11815,11816,11817,11818,11819,11820,11821, #11744
11822,11823,11824,11825,11826,11827,11828,11829,11830,11831,11832,11833,11834,3680,3810,11835, #11760
11836,4974,11837,11838,11839,11840,11841,11842,11843,11844,11845,11846,11847,11848,11849,11850, #11776
11851,11852,11853,11854,11855,11856,11857,11858,11859,11860,11861,11862,11863,11864,11865,11866, #11792
11867,11868,11869,11870,11871,11872,11873,11874,11875,11876,11877,11878,11879,11880,11881,11882, #11808
11883,11884,4493,11885,11886,11887,11888,11889,11890,11891,11892,11893,11894,11895,11896,11897, #11824
11898,11899,11900,11901,11902,11903,11904,11905,11906,11907,11908,11909,11910,11911,11912,11913, #11840
11914,11915,4975,11916,11917,11918,11919,11920,11921,11922,11923,11924,11925,11926,11927,11928, #11856
11929,11930,11931,11932,11933,11934,11935,11936,11937,11938,11939,11940,11941,11942,11943,11944, #11872
11945,11946,11947,11948,11949,4976,11950,11951,11952,11953,11954,11955,11956,11957,11958,11959, #11888
11960,11961,11962,11963,11964,11965,11966,11967,11968,11969,11970,11971,11972,11973,11974,11975, #11904
11976,11977,11978,11979,11980,11981,11982,11983,11984,11985,11986,11987,4196,11988,11989,11990, #11920
11991,11992,4977,11993,11994,11995,11996,11997,11998,11999,12000,12001,12002,12003,12004,12005, #11936
12006,12007,12008,12009,12010,12011,12012,12013,12014,12015,12016,12017,12018,12019,12020,12021, #11952
12022,12023,12024,12025,12026,12027,12028,12029,12030,12031,12032,12033,12034,12035,12036,12037, #11968
12038,12039,12040,12041,12042,12043,12044,12045,12046,12047,12048,12049,12050,12051,12052,12053, #11984
12054,12055,12056,12057,12058,12059,12060,12061,4978,12062,12063,12064,12065,12066,12067,12068, #12000
12069,12070,12071,12072,12073,12074,12075,12076,12077,12078,12079,12080,12081,12082,12083,12084, #12016
12085,12086,12087,12088,12089,12090,12091,12092,12093,12094,12095,12096,12097,12098,12099,12100, #12032
12101,12102,12103,12104,12105,12106,12107,12108,12109,12110,12111,12112,12113,12114,12115,12116, #12048
12117,12118,12119,12120,12121,12122,12123,4979,12124,12125,12126,12127,12128,4197,12129,12130, #12064
12131,12132,12133,12134,12135,12136,12137,12138,12139,12140,12141,12142,12143,12144,12145,12146, #12080
12147,12148,12149,12150,12151,12152,12153,12154,4980,12155,12156,12157,12158,12159,12160,4494, #12096
12161,12162,12163,12164,3811,12165,12166,12167,12168,12169,4495,12170,12171,4496,12172,12173, #12112
12174,12175,12176,3812,12177,12178,12179,12180,12181,12182,12183,12184,12185,12186,12187,12188, #12128
12189,12190,12191,12192,12193,12194,12195,12196,12197,12198,12199,12200,12201,12202,12203,12204, #12144
12205,12206,12207,12208,12209,12210,12211,12212,12213,12214,12215,12216,12217,12218,12219,12220, #12160
12221,4981,12222,12223,12224,12225,12226,12227,12228,12229,12230,12231,12232,12233,12234,12235, #12176
4982,12236,12237,12238,12239,12240,12241,12242,12243,12244,12245,4983,12246,12247,12248,12249, #12192
4984,12250,12251,12252,12253,12254,12255,12256,12257,12258,12259,12260,12261,12262,12263,12264, #12208
4985,12265,4497,12266,12267,12268,12269,12270,12271,12272,12273,12274,12275,12276,12277,12278, #12224
12279,12280,12281,12282,12283,12284,12285,12286,12287,4986,12288,12289,12290,12291,12292,12293, #12240
12294,12295,12296,2473,12297,12298,12299,12300,12301,12302,12303,12304,12305,12306,12307,12308, #12256
12309,12310,12311,12312,12313,12314,12315,12316,12317,12318,12319,3963,12320,12321,12322,12323, #12272
12324,12325,12326,12327,12328,12329,12330,12331,12332,4987,12333,12334,12335,12336,12337,12338, #12288
12339,12340,12341,12342,12343,12344,12345,12346,12347,12348,12349,12350,12351,12352,12353,12354, #12304
12355,12356,12357,12358,12359,3964,12360,12361,12362,12363,12364,12365,12366,12367,12368,12369, #12320
12370,3965,12371,12372,12373,12374,12375,12376,12377,12378,12379,12380,12381,12382,12383,12384, #12336
12385,12386,12387,12388,12389,12390,12391,12392,12393,12394,12395,12396,12397,12398,12399,12400, #12352
12401,12402,12403,12404,12405,12406,12407,12408,4988,12409,12410,12411,12412,12413,12414,12415, #12368
12416,12417,12418,12419,12420,12421,12422,12423,12424,12425,12426,12427,12428,12429,12430,12431, #12384
12432,12433,12434,12435,12436,12437,12438,3554,12439,12440,12441,12442,12443,12444,12445,12446, #12400
12447,12448,12449,12450,12451,12452,12453,12454,12455,12456,12457,12458,12459,12460,12461,12462, #12416
12463,12464,4989,12465,12466,12467,12468,12469,12470,12471,12472,12473,12474,12475,12476,12477, #12432
12478,12479,12480,4990,12481,12482,12483,12484,12485,12486,12487,12488,12489,4498,12490,12491, #12448
12492,12493,12494,12495,12496,12497,12498,12499,12500,12501,12502,12503,12504,12505,12506,12507, #12464
12508,12509,12510,12511,12512,12513,12514,12515,12516,12517,12518,12519,12520,12521,12522,12523, #12480
12524,12525,12526,12527,12528,12529,12530,12531,12532,12533,12534,12535,12536,12537,12538,12539, #12496
12540,12541,12542,12543,12544,12545,12546,12547,12548,12549,12550,12551,4991,12552,12553,12554, #12512
12555,12556,12557,12558,12559,12560,12561,12562,12563,12564,12565,12566,12567,12568,12569,12570, #12528
12571,12572,12573,12574,12575,12576,12577,12578,3036,12579,12580,12581,12582,12583,3966,12584, #12544
12585,12586,12587,12588,12589,12590,12591,12592,12593,12594,12595,12596,12597,12598,12599,12600, #12560
12601,12602,12603,12604,12605,12606,12607,12608,12609,12610,12611,12612,12613,12614,12615,12616, #12576
12617,12618,12619,12620,12621,12622,12623,12624,12625,12626,12627,12628,12629,12630,12631,12632, #12592
12633,12634,12635,12636,12637,12638,12639,12640,12641,12642,12643,12644,12645,12646,4499,12647, #12608
12648,12649,12650,12651,12652,12653,12654,12655,12656,12657,12658,12659,12660,12661,12662,12663, #12624
12664,12665,12666,12667,12668,12669,12670,12671,12672,12673,12674,12675,12676,12677,12678,12679, #12640
12680,12681,12682,12683,12684,12685,12686,12687,12688,12689,12690,12691,12692,12693,12694,12695, #12656
12696,12697,12698,4992,12699,12700,12701,12702,12703,12704,12705,12706,12707,12708,12709,12710, #12672
12711,12712,12713,12714,12715,12716,12717,12718,12719,12720,12721,12722,12723,12724,12725,12726, #12688
12727,12728,12729,12730,12731,12732,12733,12734,12735,12736,12737,12738,12739,12740,12741,12742, #12704
12743,12744,12745,12746,12747,12748,12749,12750,12751,12752,12753,12754,12755,12756,12757,12758, #12720
12759,12760,12761,12762,12763,12764,12765,12766,12767,12768,12769,12770,12771,12772,12773,12774, #12736
12775,12776,12777,12778,4993,2175,12779,12780,12781,12782,12783,12784,12785,12786,4500,12787, #12752
12788,12789,12790,12791,12792,12793,12794,12795,12796,12797,12798,12799,12800,12801,12802,12803, #12768
12804,12805,12806,12807,12808,12809,12810,12811,12812,12813,12814,12815,12816,12817,12818,12819, #12784
12820,12821,12822,12823,12824,12825,12826,4198,3967,12827,12828,12829,12830,12831,12832,12833, #12800
12834,12835,12836,12837,12838,12839,12840,12841,12842,12843,12844,12845,12846,12847,12848,12849, #12816
12850,12851,12852,12853,12854,12855,12856,12857,12858,12859,12860,12861,4199,12862,12863,12864, #12832
12865,12866,12867,12868,12869,12870,12871,12872,12873,12874,12875,12876,12877,12878,12879,12880, #12848
12881,12882,12883,12884,12885,12886,12887,4501,12888,12889,12890,12891,12892,12893,12894,12895, #12864
12896,12897,12898,12899,12900,12901,12902,12903,12904,12905,12906,12907,12908,12909,12910,12911, #12880
12912,4994,12913,12914,12915,12916,12917,12918,12919,12920,12921,12922,12923,12924,12925,12926, #12896
12927,12928,12929,12930,12931,12932,12933,12934,12935,12936,12937,12938,12939,12940,12941,12942, #12912
12943,12944,12945,12946,12947,12948,12949,12950,12951,12952,12953,12954,12955,12956,1772,12957, #12928
12958,12959,12960,12961,12962,12963,12964,12965,12966,12967,12968,12969,12970,12971,12972,12973, #12944
12974,12975,12976,12977,12978,12979,12980,12981,12982,12983,12984,12985,12986,12987,12988,12989, #12960
12990,12991,12992,12993,12994,12995,12996,12997,4502,12998,4503,12999,13000,13001,13002,13003, #12976
4504,13004,13005,13006,13007,13008,13009,13010,13011,13012,13013,13014,13015,13016,13017,13018, #12992
13019,13020,13021,13022,13023,13024,13025,13026,13027,13028,13029,3449,13030,13031,13032,13033, #13008
13034,13035,13036,13037,13038,13039,13040,13041,13042,13043,13044,13045,13046,13047,13048,13049, #13024
13050,13051,13052,13053,13054,13055,13056,13057,13058,13059,13060,13061,13062,13063,13064,13065, #13040
13066,13067,13068,13069,13070,13071,13072,13073,13074,13075,13076,13077,13078,13079,13080,13081, #13056
13082,13083,13084,13085,13086,13087,13088,13089,13090,13091,13092,13093,13094,13095,13096,13097, #13072
13098,13099,13100,13101,13102,13103,13104,13105,13106,13107,13108,13109,13110,13111,13112,13113, #13088
13114,13115,13116,13117,13118,3968,13119,4995,13120,13121,13122,13123,13124,13125,13126,13127, #13104
4505,13128,13129,13130,13131,13132,13133,13134,4996,4506,13135,13136,13137,13138,13139,4997, #13120
13140,13141,13142,13143,13144,13145,13146,13147,13148,13149,13150,13151,13152,13153,13154,13155, #13136
13156,13157,13158,13159,4998,13160,13161,13162,13163,13164,13165,13166,13167,13168,13169,13170, #13152
13171,13172,13173,13174,13175,13176,4999,13177,13178,13179,13180,13181,13182,13183,13184,13185, #13168
13186,13187,13188,13189,13190,13191,13192,13193,13194,13195,13196,13197,13198,13199,13200,13201, #13184
13202,13203,13204,13205,13206,5000,13207,13208,13209,13210,13211,13212,13213,13214,13215,13216, #13200
13217,13218,13219,13220,13221,13222,13223,13224,13225,13226,13227,4200,5001,13228,13229,13230, #13216
13231,13232,13233,13234,13235,13236,13237,13238,13239,13240,3969,13241,13242,13243,13244,3970, #13232
13245,13246,13247,13248,13249,13250,13251,13252,13253,13254,13255,13256,13257,13258,13259,13260, #13248
13261,13262,13263,13264,13265,13266,13267,13268,3450,13269,13270,13271,13272,13273,13274,13275, #13264
13276,5002,13277,13278,13279,13280,13281,13282,13283,13284,13285,13286,13287,13288,13289,13290, #13280
13291,13292,13293,13294,13295,13296,13297,13298,13299,13300,13301,13302,3813,13303,13304,13305, #13296
13306,13307,13308,13309,13310,13311,13312,13313,13314,13315,13316,13317,13318,13319,13320,13321, #13312
13322,13323,13324,13325,13326,13327,13328,4507,13329,13330,13331,13332,13333,13334,13335,13336, #13328
13337,13338,13339,13340,13341,5003,13342,13343,13344,13345,13346,13347,13348,13349,13350,13351, #13344
13352,13353,13354,13355,13356,13357,13358,13359,13360,13361,13362,13363,13364,13365,13366,13367, #13360
5004,13368,13369,13370,13371,13372,13373,13374,13375,13376,13377,13378,13379,13380,13381,13382, #13376
13383,13384,13385,13386,13387,13388,13389,13390,13391,13392,13393,13394,13395,13396,13397,13398, #13392
13399,13400,13401,13402,13403,13404,13405,13406,13407,13408,13409,13410,13411,13412,13413,13414, #13408
13415,13416,13417,13418,13419,13420,13421,13422,13423,13424,13425,13426,13427,13428,13429,13430, #13424
13431,13432,4508,13433,13434,13435,4201,13436,13437,13438,13439,13440,13441,13442,13443,13444, #13440
13445,13446,13447,13448,13449,13450,13451,13452,13453,13454,13455,13456,13457,5005,13458,13459, #13456
13460,13461,13462,13463,13464,13465,13466,13467,13468,13469,13470,4509,13471,13472,13473,13474, #13472
13475,13476,13477,13478,13479,13480,13481,13482,13483,13484,13485,13486,13487,13488,13489,13490, #13488
13491,13492,13493,13494,13495,13496,13497,13498,13499,13500,13501,13502,13503,13504,13505,13506, #13504
13507,13508,13509,13510,13511,13512,13513,13514,13515,13516,13517,13518,13519,13520,13521,13522, #13520
13523,13524,13525,13526,13527,13528,13529,13530,13531,13532,13533,13534,13535,13536,13537,13538, #13536
13539,13540,13541,13542,13543,13544,13545,13546,13547,13548,13549,13550,13551,13552,13553,13554, #13552
13555,13556,13557,13558,13559,13560,13561,13562,13563,13564,13565,13566,13567,13568,13569,13570, #13568
13571,13572,13573,13574,13575,13576,13577,13578,13579,13580,13581,13582,13583,13584,13585,13586, #13584
13587,13588,13589,13590,13591,13592,13593,13594,13595,13596,13597,13598,13599,13600,13601,13602, #13600
13603,13604,13605,13606,13607,13608,13609,13610,13611,13612,13613,13614,13615,13616,13617,13618, #13616
13619,13620,13621,13622,13623,13624,13625,13626,13627,13628,13629,13630,13631,13632,13633,13634, #13632
13635,13636,13637,13638,13639,13640,13641,13642,5006,13643,13644,13645,13646,13647,13648,13649, #13648
13650,13651,5007,13652,13653,13654,13655,13656,13657,13658,13659,13660,13661,13662,13663,13664, #13664
13665,13666,13667,13668,13669,13670,13671,13672,13673,13674,13675,13676,13677,13678,13679,13680, #13680
13681,13682,13683,13684,13685,13686,13687,13688,13689,13690,13691,13692,13693,13694,13695,13696, #13696
13697,13698,13699,13700,13701,13702,13703,13704,13705,13706,13707,13708,13709,13710,13711,13712, #13712
13713,13714,13715,13716,13717,13718,13719,13720,13721,13722,13723,13724,13725,13726,13727,13728, #13728
13729,13730,13731,13732,13733,13734,13735,13736,13737,13738,13739,13740,13741,13742,13743,13744, #13744
13745,13746,13747,13748,13749,13750,13751,13752,13753,13754,13755,13756,13757,13758,13759,13760, #13760
13761,13762,13763,13764,13765,13766,13767,13768,13769,13770,13771,13772,13773,13774,3273,13775, #13776
13776,13777,13778,13779,13780,13781,13782,13783,13784,13785,13786,13787,13788,13789,13790,13791, #13792
13792,13793,13794,13795,13796,13797,13798,13799,13800,13801,13802,13803,13804,13805,13806,13807, #13808
13808,13809,13810,13811,13812,13813,13814,13815,13816,13817,13818,13819,13820,13821,13822,13823, #13824
13824,13825,13826,13827,13828,13829,13830,13831,13832,13833,13834,13835,13836,13837,13838,13839, #13840
13840,13841,13842,13843,13844,13845,13846,13847,13848,13849,13850,13851,13852,13853,13854,13855, #13856
13856,13857,13858,13859,13860,13861,13862,13863,13864,13865,13866,13867,13868,13869,13870,13871, #13872
13872,13873,13874,13875,13876,13877,13878,13879,13880,13881,13882,13883,13884,13885,13886,13887, #13888
13888,13889,13890,13891,13892,13893,13894,13895,13896,13897,13898,13899,13900,13901,13902,13903, #13904
13904,13905,13906,13907,13908,13909,13910,13911,13912,13913,13914,13915,13916,13917,13918,13919, #13920
13920,13921,13922,13923,13924,13925,13926,13927,13928,13929,13930,13931,13932,13933,13934,13935, #13936
13936,13937,13938,13939,13940,13941,13942,13943,13944,13945,13946,13947,13948,13949,13950,13951, #13952
13952,13953,13954,13955,13956,13957,13958,13959,13960,13961,13962,13963,13964,13965,13966,13967, #13968
13968,13969,13970,13971,13972) #13973
# flake8: noqa
@@ -0,0 +1,42 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import Big5DistributionAnalysis
from .mbcssm import Big5SMModel
class Big5Prober(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(Big5SMModel)
self._mDistributionAnalyzer = Big5DistributionAnalysis()
self.reset()
def get_charset_name(self):
return "Big5"
@@ -0,0 +1,46 @@
#!/usr/bin/env python
"""
Script which takes one or more file paths and reports on their detected
encodings
Example::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
If no paths are provided, it takes its input from stdin.
"""
from io import open
from sys import argv, stdin
from chardet.universaldetector import UniversalDetector
def description_of(file, name='stdin'):
"""Return a string describing the probable encoding of a file."""
u = UniversalDetector()
for line in file:
u.feed(line)
u.close()
result = u.result
if result['encoding']:
return '%s: %s with confidence %s' % (name,
result['encoding'],
result['confidence'])
else:
return '%s: no result' % name
def main():
if len(argv) <= 1:
print(description_of(stdin))
else:
for path in argv[1:]:
with open(path, 'rb') as f:
print(description_of(f, path))
if __name__ == '__main__':
main()
@@ -0,0 +1,231 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
EUCTW_TYPICAL_DISTRIBUTION_RATIO)
from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
EUCKR_TYPICAL_DISTRIBUTION_RATIO)
from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
GB2312_TYPICAL_DISTRIBUTION_RATIO)
from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
BIG5_TYPICAL_DISTRIBUTION_RATIO)
from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
JIS_TYPICAL_DISTRIBUTION_RATIO)
from .compat import wrap_ord
ENOUGH_DATA_THRESHOLD = 1024
SURE_YES = 0.99
SURE_NO = 0.01
MINIMUM_DATA_THRESHOLD = 3
class CharDistributionAnalysis:
def __init__(self):
# Mapping table to get frequency order from char order (get from
# GetOrder())
self._mCharToFreqOrder = None
self._mTableSize = None # Size of above table
# This is a constant value which varies from language to language,
# used in calculating confidence. See
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
# for further detail.
self._mTypicalDistributionRatio = None
self.reset()
def reset(self):
"""reset analyser, clear any state"""
# If this flag is set to True, detection is done and conclusion has
# been made
self._mDone = False
self._mTotalChars = 0 # Total characters encountered
# The number of characters whose frequency order is less than 512
self._mFreqChars = 0
def feed(self, aBuf, aCharLen):
"""feed a character with known length"""
if aCharLen == 2:
# we only care about 2-bytes character in our distribution analysis
order = self.get_order(aBuf)
else:
order = -1
if order >= 0:
self._mTotalChars += 1
# order is valid
if order < self._mTableSize:
if 512 > self._mCharToFreqOrder[order]:
self._mFreqChars += 1
def get_confidence(self):
"""return confidence based on existing data"""
# if we didn't receive any character in our consideration range,
# return negative answer
if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:
return SURE_NO
if self._mTotalChars != self._mFreqChars:
r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
* self._mTypicalDistributionRatio))
if r < SURE_YES:
return r
# normalize confidence (we don't want to be 100% sure)
return SURE_YES
def got_enough_data(self):
# It is not necessary to receive all data to draw conclusion.
# For charset detection, certain amount of data is enough
return self._mTotalChars > ENOUGH_DATA_THRESHOLD
def get_order(self, aBuf):
# We do not handle characters based on the original encoding string,
# but convert this encoding string to a number, here called order.
# This allows multiple encodings of a language to share one frequency
# table.
return -1
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = EUCTWCharToFreqOrder
self._mTableSize = EUCTW_TABLE_SIZE
self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
# for euc-TW encoding, we are interested
# first byte range: 0xc4 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char = wrap_ord(aBuf[0])
if first_char >= 0xC4:
return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1
else:
return -1
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = EUCKRCharToFreqOrder
self._mTableSize = EUCKR_TABLE_SIZE
self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
# for euc-KR encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char = wrap_ord(aBuf[0])
if first_char >= 0xB0:
return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1
else:
return -1
class GB2312DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = GB2312CharToFreqOrder
self._mTableSize = GB2312_TABLE_SIZE
self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
# for GB2312 encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
if (first_char >= 0xB0) and (second_char >= 0xA1):
return 94 * (first_char - 0xB0) + second_char - 0xA1
else:
return -1
class Big5DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = Big5CharToFreqOrder
self._mTableSize = BIG5_TABLE_SIZE
self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
# for big5 encoding, we are interested
# first byte range: 0xa4 -- 0xfe
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
if first_char >= 0xA4:
if second_char >= 0xA1:
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
else:
return 157 * (first_char - 0xA4) + second_char - 0x40
else:
return -1
class SJISDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = JISCharToFreqOrder
self._mTableSize = JIS_TABLE_SIZE
self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
# for sjis encoding, we are interested
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
# no validation needed here. State machine has done that
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
if (first_char >= 0x81) and (first_char <= 0x9F):
order = 188 * (first_char - 0x81)
elif (first_char >= 0xE0) and (first_char <= 0xEF):
order = 188 * (first_char - 0xE0 + 31)
else:
return -1
order = order + second_char - 0x40
if second_char > 0x7F:
order = -1
return order
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = JISCharToFreqOrder
self._mTableSize = JIS_TABLE_SIZE
self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
# for euc-JP encoding, we are interested
# first byte range: 0xa0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
char = wrap_ord(aBuf[0])
if char >= 0xA0:
return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1
else:
return -1
@@ -0,0 +1,106 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from . import constants
import sys
from .charsetprober import CharSetProber
class CharSetGroupProber(CharSetProber):
def __init__(self):
CharSetProber.__init__(self)
self._mActiveNum = 0
self._mProbers = []
self._mBestGuessProber = None
def reset(self):
CharSetProber.reset(self)
self._mActiveNum = 0
for prober in self._mProbers:
if prober:
prober.reset()
prober.active = True
self._mActiveNum += 1
self._mBestGuessProber = None
def get_charset_name(self):
if not self._mBestGuessProber:
self.get_confidence()
if not self._mBestGuessProber:
return None
# self._mBestGuessProber = self._mProbers[0]
return self._mBestGuessProber.get_charset_name()
def feed(self, aBuf):
for prober in self._mProbers:
if not prober:
continue
if not prober.active:
continue
st = prober.feed(aBuf)
if not st:
continue
if st == constants.eFoundIt:
self._mBestGuessProber = prober
return self.get_state()
elif st == constants.eNotMe:
prober.active = False
self._mActiveNum -= 1
if self._mActiveNum <= 0:
self._mState = constants.eNotMe
return self.get_state()
return self.get_state()
def get_confidence(self):
st = self.get_state()
if st == constants.eFoundIt:
return 0.99
elif st == constants.eNotMe:
return 0.01
bestConf = 0.0
self._mBestGuessProber = None
for prober in self._mProbers:
if not prober:
continue
if not prober.active:
if constants._debug:
sys.stderr.write(prober.get_charset_name()
+ ' not active\n')
continue
cf = prober.get_confidence()
if constants._debug:
sys.stderr.write('%s confidence = %s\n' %
(prober.get_charset_name(), cf))
if bestConf < cf:
bestConf = cf
self._mBestGuessProber = prober
if not self._mBestGuessProber:
return 0.0
return bestConf
# else:
# self._mBestGuessProber = self._mProbers[0]
# return self._mBestGuessProber.get_confidence()
@@ -0,0 +1,62 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from . import constants
import re
class CharSetProber:
def __init__(self):
pass
def reset(self):
self._mState = constants.eDetecting
def get_charset_name(self):
return None
def feed(self, aBuf):
pass
def get_state(self):
return self._mState
def get_confidence(self):
return 0.0
def filter_high_bit_only(self, aBuf):
aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
return aBuf
def filter_without_english_letters(self, aBuf):
aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
return aBuf
def filter_with_english_letters(self, aBuf):
# TODO
return aBuf
@@ -0,0 +1,61 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .constants import eStart
from .compat import wrap_ord
class CodingStateMachine:
def __init__(self, sm):
self._mModel = sm
self._mCurrentBytePos = 0
self._mCurrentCharLen = 0
self.reset()
def reset(self):
self._mCurrentState = eStart
def next_state(self, c):
# for each byte we get its class
# if it is first byte, we also get byte length
# PY3K: aBuf is a byte stream, so c is an int, not a byte
byteCls = self._mModel['classTable'][wrap_ord(c)]
if self._mCurrentState == eStart:
self._mCurrentBytePos = 0
self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
# from byte's class and stateTable, we get its next state
curr_state = (self._mCurrentState * self._mModel['classFactor']
+ byteCls)
self._mCurrentState = self._mModel['stateTable'][curr_state]
self._mCurrentBytePos += 1
return self._mCurrentState
def get_current_charlen(self):
return self._mCurrentCharLen
def get_coding_state_machine(self):
return self._mModel['name']
@@ -0,0 +1,34 @@
######################## BEGIN LICENSE BLOCK ########################
# Contributor(s):
# Ian Cordasco - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import sys
if sys.version_info < (3, 0):
base_str = (str, unicode)
else:
base_str = (bytes, str)
def wrap_ord(a):
if sys.version_info < (3, 0) and isinstance(a, base_str):
return ord(a)
else:
return a
@@ -0,0 +1,39 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
_debug = 0
eDetecting = 0
eFoundIt = 1
eNotMe = 2
eStart = 0
eError = 1
eItsMe = 2
SHORTCUT_THRESHOLD = 0.95
@@ -0,0 +1,44 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCKRDistributionAnalysis
from .mbcssm import CP949SMModel
class CP949Prober(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(CP949SMModel)
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
# not different.
self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
self.reset()
def get_charset_name(self):
return "CP949"
@@ -0,0 +1,86 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from . import constants
from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
ISO2022KRSMModel)
from .charsetprober import CharSetProber
from .codingstatemachine import CodingStateMachine
from .compat import wrap_ord
class EscCharSetProber(CharSetProber):
def __init__(self):
CharSetProber.__init__(self)
self._mCodingSM = [
CodingStateMachine(HZSMModel),
CodingStateMachine(ISO2022CNSMModel),
CodingStateMachine(ISO2022JPSMModel),
CodingStateMachine(ISO2022KRSMModel)
]
self.reset()
def reset(self):
CharSetProber.reset(self)
for codingSM in self._mCodingSM:
if not codingSM:
continue
codingSM.active = True
codingSM.reset()
self._mActiveSM = len(self._mCodingSM)
self._mDetectedCharset = None
def get_charset_name(self):
return self._mDetectedCharset
def get_confidence(self):
if self._mDetectedCharset:
return 0.99
else:
return 0.00
def feed(self, aBuf):
for c in aBuf:
# PY3K: aBuf is a byte array, so c is an int, not a byte
for codingSM in self._mCodingSM:
if not codingSM:
continue
if not codingSM.active:
continue
codingState = codingSM.next_state(wrap_ord(c))
if codingState == constants.eError:
codingSM.active = False
self._mActiveSM -= 1
if self._mActiveSM <= 0:
self._mState = constants.eNotMe
return self.get_state()
elif codingState == constants.eItsMe:
self._mState = constants.eFoundIt
self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8
return self.get_state()
return self.get_state()
+242
View File
@@ -0,0 +1,242 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .constants import eStart, eError, eItsMe
HZ_cls = (
1,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,0,0,0,0, # 20 - 27
0,0,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,0,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,4,0,5,2,0, # 78 - 7f
1,1,1,1,1,1,1,1, # 80 - 87
1,1,1,1,1,1,1,1, # 88 - 8f
1,1,1,1,1,1,1,1, # 90 - 97
1,1,1,1,1,1,1,1, # 98 - 9f
1,1,1,1,1,1,1,1, # a0 - a7
1,1,1,1,1,1,1,1, # a8 - af
1,1,1,1,1,1,1,1, # b0 - b7
1,1,1,1,1,1,1,1, # b8 - bf
1,1,1,1,1,1,1,1, # c0 - c7
1,1,1,1,1,1,1,1, # c8 - cf
1,1,1,1,1,1,1,1, # d0 - d7
1,1,1,1,1,1,1,1, # d8 - df
1,1,1,1,1,1,1,1, # e0 - e7
1,1,1,1,1,1,1,1, # e8 - ef
1,1,1,1,1,1,1,1, # f0 - f7
1,1,1,1,1,1,1,1, # f8 - ff
)
HZ_st = (
eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17
5,eError, 6,eError, 5, 5, 4,eError,# 18-1f
4,eError, 4, 4, 4,eError, 4,eError,# 20-27
4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f
)
HZCharLenTable = (0, 0, 0, 0, 0, 0)
HZSMModel = {'classTable': HZ_cls,
'classFactor': 6,
'stateTable': HZ_st,
'charLenTable': HZCharLenTable,
'name': "HZ-GB-2312"}
ISO2022CN_cls = (
2,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,0,0,0,0, # 20 - 27
0,3,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,4,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
2,2,2,2,2,2,2,2, # 80 - 87
2,2,2,2,2,2,2,2, # 88 - 8f
2,2,2,2,2,2,2,2, # 90 - 97
2,2,2,2,2,2,2,2, # 98 - 9f
2,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
2,2,2,2,2,2,2,2, # e0 - e7
2,2,2,2,2,2,2,2, # e8 - ef
2,2,2,2,2,2,2,2, # f0 - f7
2,2,2,2,2,2,2,2, # f8 - ff
)
ISO2022CN_st = (
eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f
eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f
eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27
5, 6,eError,eError,eError,eError,eError,eError,# 28-2f
eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37
eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f
)
ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)
ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
'classFactor': 9,
'stateTable': ISO2022CN_st,
'charLenTable': ISO2022CNCharLenTable,
'name': "ISO-2022-CN"}
ISO2022JP_cls = (
2,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,2,2, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,7,0,0,0, # 20 - 27
3,0,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
6,0,4,0,8,0,0,0, # 40 - 47
0,9,5,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
2,2,2,2,2,2,2,2, # 80 - 87
2,2,2,2,2,2,2,2, # 88 - 8f
2,2,2,2,2,2,2,2, # 90 - 97
2,2,2,2,2,2,2,2, # 98 - 9f
2,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
2,2,2,2,2,2,2,2, # e0 - e7
2,2,2,2,2,2,2,2, # e8 - ef
2,2,2,2,2,2,2,2, # f0 - f7
2,2,2,2,2,2,2,2, # f8 - ff
)
ISO2022JP_st = (
eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f
eError, 5,eError,eError,eError, 4,eError,eError,# 20-27
eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f
eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37
eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f
eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47
)
ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
'classFactor': 10,
'stateTable': ISO2022JP_st,
'charLenTable': ISO2022JPCharLenTable,
'name': "ISO-2022-JP"}
ISO2022KR_cls = (
2,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,3,0,0,0, # 20 - 27
0,4,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,5,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
2,2,2,2,2,2,2,2, # 80 - 87
2,2,2,2,2,2,2,2, # 88 - 8f
2,2,2,2,2,2,2,2, # 90 - 97
2,2,2,2,2,2,2,2, # 98 - 9f
2,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
2,2,2,2,2,2,2,2, # e0 - e7
2,2,2,2,2,2,2,2, # e8 - ef
2,2,2,2,2,2,2,2, # f0 - f7
2,2,2,2,2,2,2,2, # f8 - ff
)
ISO2022KR_st = (
eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17
eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f
eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27
)
ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)
ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
'classFactor': 6,
'stateTable': ISO2022KR_st,
'charLenTable': ISO2022KRCharLenTable,
'name': "ISO-2022-KR"}
# flake8: noqa
@@ -0,0 +1,90 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import sys
from . import constants
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCJPDistributionAnalysis
from .jpcntx import EUCJPContextAnalysis
from .mbcssm import EUCJPSMModel
class EUCJPProber(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(EUCJPSMModel)
self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
self._mContextAnalyzer = EUCJPContextAnalysis()
self.reset()
def reset(self):
MultiByteCharSetProber.reset(self)
self._mContextAnalyzer.reset()
def get_charset_name(self):
return "EUC-JP"
def feed(self, aBuf):
aLen = len(aBuf)
for i in range(0, aLen):
# PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
codingState = self._mCodingSM.next_state(aBuf[i])
if codingState == constants.eError:
if constants._debug:
sys.stderr.write(self.get_charset_name()
+ ' prober hit error at byte ' + str(i)
+ '\n')
self._mState = constants.eNotMe
break
elif codingState == constants.eItsMe:
self._mState = constants.eFoundIt
break
elif codingState == constants.eStart:
charLen = self._mCodingSM.get_current_charlen()
if i == 0:
self._mLastChar[1] = aBuf[0]
self._mContextAnalyzer.feed(self._mLastChar, charLen)
self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
else:
self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
charLen)
self._mLastChar[0] = aBuf[aLen - 1]
if self.get_state() == constants.eDetecting:
if (self._mContextAnalyzer.got_enough_data() and
(self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
self._mState = constants.eFoundIt
return self.get_state()
def get_confidence(self):
contxtCf = self._mContextAnalyzer.get_confidence()
distribCf = self._mDistributionAnalyzer.get_confidence()
return max(contxtCf, distribCf)
@@ -0,0 +1,596 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# Sampling from about 20M text materials include literature and computer technology
# 128 --> 0.79
# 256 --> 0.92
# 512 --> 0.986
# 1024 --> 0.99944
# 2048 --> 0.99999
#
# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24
# Random Distribution Ration = 512 / (2350-512) = 0.279.
#
# Typical Distribution Ratio
EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0
EUCKR_TABLE_SIZE = 2352
# Char to FreqOrder table ,
EUCKRCharToFreqOrder = ( \
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87,
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734,
945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739,
116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622,
708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750,
1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856,
344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205,
709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779,
1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19,
1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567,
1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797,
1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802,
1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899,
885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818,
1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409,
1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697,
1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770,
1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723,
544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416,
1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300,
119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083,
893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857,
1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871,
282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420,
1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885,
127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889,
0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893,
1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317,
1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841,
1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910,
1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610,
269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375,
1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939,
887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870,
217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934,
1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888,
1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950,
1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065,
1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002,
1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965,
1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467,
50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285,
639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7,
103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979,
1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985,
818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994,
1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250,
423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824,
532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003,
2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745,
619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61,
191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023,
2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032,
2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912,
2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224,
719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012,
819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050,
2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681,
499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414,
1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068,
2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075,
1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850,
2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606,
2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449,
1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452,
949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112,
2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121,
2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130,
22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274,
962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139,
2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721,
1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298,
2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463,
2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747,
2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285,
2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187,
2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10,
2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350,
1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201,
2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972,
2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219,
2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233,
2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242,
2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247,
1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178,
1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255,
2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259,
1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262,
2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702,
1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273,
295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541,
2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117,
432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187,
2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800,
808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312,
2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229,
2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315,
501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484,
2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170,
1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335,
425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601,
1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395,
2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354,
1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476,
2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035,
416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498,
2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310,
1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389,
2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504,
1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505,
2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145,
1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624,
593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700,
2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221,
2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377,
644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448,
915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485,
1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705,
1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465,
291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471,
2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997,
2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486,
797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494,
434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771,
585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323,
2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491,
95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510,
161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519,
2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532,
2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199,
704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544,
2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247,
1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441,
249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562,
2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362,
2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583,
2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465,
3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431,
202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151,
974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596,
2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406,
2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611,
2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619,
1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628,
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256
#Everything below is of no interest for detection purpose
2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658,
2659,2660,2661,2662,2663,2664,2665,2666,2667,2668,2669,2670,2671,2672,2673,2674,
2675,2676,2677,2678,2679,2680,2681,2682,2683,2684,2685,2686,2687,2688,2689,2690,
2691,2692,2693,2694,2695,2696,2697,2698,2699,1542, 880,2700,2701,2702,2703,2704,
2705,2706,2707,2708,2709,2710,2711,2712,2713,2714,2715,2716,2717,2718,2719,2720,
2721,2722,2723,2724,2725,1543,2726,2727,2728,2729,2730,2731,2732,1544,2733,2734,
2735,2736,2737,2738,2739,2740,2741,2742,2743,2744,2745,2746,2747,2748,2749,2750,
2751,2752,2753,2754,1545,2755,2756,2757,2758,2759,2760,2761,2762,2763,2764,2765,
2766,1546,2767,1547,2768,2769,2770,2771,2772,2773,2774,2775,2776,2777,2778,2779,
2780,2781,2782,2783,2784,2785,2786,1548,2787,2788,2789,1109,2790,2791,2792,2793,
2794,2795,2796,2797,2798,2799,2800,2801,2802,2803,2804,2805,2806,2807,2808,2809,
2810,2811,2812,1329,2813,2814,2815,2816,2817,2818,2819,2820,2821,2822,2823,2824,
2825,2826,2827,2828,2829,2830,2831,2832,2833,2834,2835,2836,2837,2838,2839,2840,
2841,2842,2843,2844,2845,2846,2847,2848,2849,2850,2851,2852,2853,2854,2855,2856,
1549,2857,2858,2859,2860,1550,2861,2862,1551,2863,2864,2865,2866,2867,2868,2869,
2870,2871,2872,2873,2874,1110,1330,2875,2876,2877,2878,2879,2880,2881,2882,2883,
2884,2885,2886,2887,2888,2889,2890,2891,2892,2893,2894,2895,2896,2897,2898,2899,
2900,2901,2902,2903,2904,2905,2906,2907,2908,2909,2910,2911,2912,2913,2914,2915,
2916,2917,2918,2919,2920,2921,2922,2923,2924,2925,2926,2927,2928,2929,2930,1331,
2931,2932,2933,2934,2935,2936,2937,2938,2939,2940,2941,2942,2943,1552,2944,2945,
2946,2947,2948,2949,2950,2951,2952,2953,2954,2955,2956,2957,2958,2959,2960,2961,
2962,2963,2964,1252,2965,2966,2967,2968,2969,2970,2971,2972,2973,2974,2975,2976,
2977,2978,2979,2980,2981,2982,2983,2984,2985,2986,2987,2988,2989,2990,2991,2992,
2993,2994,2995,2996,2997,2998,2999,3000,3001,3002,3003,3004,3005,3006,3007,3008,
3009,3010,3011,3012,1553,3013,3014,3015,3016,3017,1554,3018,1332,3019,3020,3021,
3022,3023,3024,3025,3026,3027,3028,3029,3030,3031,3032,3033,3034,3035,3036,3037,
3038,3039,3040,3041,3042,3043,3044,3045,3046,3047,3048,3049,3050,1555,3051,3052,
3053,1556,1557,3054,3055,3056,3057,3058,3059,3060,3061,3062,3063,3064,3065,3066,
3067,1558,3068,3069,3070,3071,3072,3073,3074,3075,3076,1559,3077,3078,3079,3080,
3081,3082,3083,1253,3084,3085,3086,3087,3088,3089,3090,3091,3092,3093,3094,3095,
3096,3097,3098,3099,3100,3101,3102,3103,3104,3105,3106,3107,3108,1152,3109,3110,
3111,3112,3113,1560,3114,3115,3116,3117,1111,3118,3119,3120,3121,3122,3123,3124,
3125,3126,3127,3128,3129,3130,3131,3132,3133,3134,3135,3136,3137,3138,3139,3140,
3141,3142,3143,3144,3145,3146,3147,3148,3149,3150,3151,3152,3153,3154,3155,3156,
3157,3158,3159,3160,3161,3162,3163,3164,3165,3166,3167,3168,3169,3170,3171,3172,
3173,3174,3175,3176,1333,3177,3178,3179,3180,3181,3182,3183,3184,3185,3186,3187,
3188,3189,1561,3190,3191,1334,3192,3193,3194,3195,3196,3197,3198,3199,3200,3201,
3202,3203,3204,3205,3206,3207,3208,3209,3210,3211,3212,3213,3214,3215,3216,3217,
3218,3219,3220,3221,3222,3223,3224,3225,3226,3227,3228,3229,3230,3231,3232,3233,
3234,1562,3235,3236,3237,3238,3239,3240,3241,3242,3243,3244,3245,3246,3247,3248,
3249,3250,3251,3252,3253,3254,3255,3256,3257,3258,3259,3260,3261,3262,3263,3264,
3265,3266,3267,3268,3269,3270,3271,3272,3273,3274,3275,3276,3277,1563,3278,3279,
3280,3281,3282,3283,3284,3285,3286,3287,3288,3289,3290,3291,3292,3293,3294,3295,
3296,3297,3298,3299,3300,3301,3302,3303,3304,3305,3306,3307,3308,3309,3310,3311,
3312,3313,3314,3315,3316,3317,3318,3319,3320,3321,3322,3323,3324,3325,3326,3327,
3328,3329,3330,3331,3332,3333,3334,3335,3336,3337,3338,3339,3340,3341,3342,3343,
3344,3345,3346,3347,3348,3349,3350,3351,3352,3353,3354,3355,3356,3357,3358,3359,
3360,3361,3362,3363,3364,1335,3365,3366,3367,3368,3369,3370,3371,3372,3373,3374,
3375,3376,3377,3378,3379,3380,3381,3382,3383,3384,3385,3386,3387,1336,3388,3389,
3390,3391,3392,3393,3394,3395,3396,3397,3398,3399,3400,3401,3402,3403,3404,3405,
3406,3407,3408,3409,3410,3411,3412,3413,3414,1337,3415,3416,3417,3418,3419,1338,
3420,3421,3422,1564,1565,3423,3424,3425,3426,3427,3428,3429,3430,3431,1254,3432,
3433,3434,1339,3435,3436,3437,3438,3439,1566,3440,3441,3442,3443,3444,3445,3446,
3447,3448,3449,3450,3451,3452,3453,3454,1255,3455,3456,3457,3458,3459,1567,1191,
3460,1568,1569,3461,3462,3463,1570,3464,3465,3466,3467,3468,1571,3469,3470,3471,
3472,3473,1572,3474,3475,3476,3477,3478,3479,3480,3481,3482,3483,3484,3485,3486,
1340,3487,3488,3489,3490,3491,3492,1021,3493,3494,3495,3496,3497,3498,1573,3499,
1341,3500,3501,3502,3503,3504,3505,3506,3507,3508,3509,3510,3511,1342,3512,3513,
3514,3515,3516,1574,1343,3517,3518,3519,1575,3520,1576,3521,3522,3523,3524,3525,
3526,3527,3528,3529,3530,3531,3532,3533,3534,3535,3536,3537,3538,3539,3540,3541,
3542,3543,3544,3545,3546,3547,3548,3549,3550,3551,3552,3553,3554,3555,3556,3557,
3558,3559,3560,3561,3562,3563,3564,3565,3566,3567,3568,3569,3570,3571,3572,3573,
3574,3575,3576,3577,3578,3579,3580,1577,3581,3582,1578,3583,3584,3585,3586,3587,
3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603,
3604,1579,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618,
3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,1580,3630,3631,1581,3632,
3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,3643,3644,3645,3646,3647,3648,
3649,3650,3651,3652,3653,3654,3655,3656,1582,3657,3658,3659,3660,3661,3662,3663,
3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675,3676,3677,3678,3679,
3680,3681,3682,3683,3684,3685,3686,3687,3688,3689,3690,3691,3692,3693,3694,3695,
3696,3697,3698,3699,3700,1192,3701,3702,3703,3704,1256,3705,3706,3707,3708,1583,
1257,3709,3710,3711,3712,3713,3714,3715,3716,1584,3717,3718,3719,3720,3721,3722,
3723,3724,3725,3726,3727,3728,3729,3730,3731,3732,3733,3734,3735,3736,3737,3738,
3739,3740,3741,3742,3743,3744,3745,1344,3746,3747,3748,3749,3750,3751,3752,3753,
3754,3755,3756,1585,3757,3758,3759,3760,3761,3762,3763,3764,3765,3766,1586,3767,
3768,3769,3770,3771,3772,3773,3774,3775,3776,3777,3778,1345,3779,3780,3781,3782,
3783,3784,3785,3786,3787,3788,3789,3790,3791,3792,3793,3794,3795,1346,1587,3796,
3797,1588,3798,3799,3800,3801,3802,3803,3804,3805,3806,1347,3807,3808,3809,3810,
3811,1589,3812,3813,3814,3815,3816,3817,3818,3819,3820,3821,1590,3822,3823,1591,
1348,3824,3825,3826,3827,3828,3829,3830,1592,3831,3832,1593,3833,3834,3835,3836,
3837,3838,3839,3840,3841,3842,3843,3844,1349,3845,3846,3847,3848,3849,3850,3851,
3852,3853,3854,3855,3856,3857,3858,1594,3859,3860,3861,3862,3863,3864,3865,3866,
3867,3868,3869,1595,3870,3871,3872,3873,1596,3874,3875,3876,3877,3878,3879,3880,
3881,3882,3883,3884,3885,3886,1597,3887,3888,3889,3890,3891,3892,3893,3894,3895,
1598,3896,3897,3898,1599,1600,3899,1350,3900,1351,3901,3902,1352,3903,3904,3905,
3906,3907,3908,3909,3910,3911,3912,3913,3914,3915,3916,3917,3918,3919,3920,3921,
3922,3923,3924,1258,3925,3926,3927,3928,3929,3930,3931,1193,3932,1601,3933,3934,
3935,3936,3937,3938,3939,3940,3941,3942,3943,1602,3944,3945,3946,3947,3948,1603,
3949,3950,3951,3952,3953,3954,3955,3956,3957,3958,3959,3960,3961,3962,3963,3964,
3965,1604,3966,3967,3968,3969,3970,3971,3972,3973,3974,3975,3976,3977,1353,3978,
3979,3980,3981,3982,3983,3984,3985,3986,3987,3988,3989,3990,3991,1354,3992,3993,
3994,3995,3996,3997,3998,3999,4000,4001,4002,4003,4004,4005,4006,4007,4008,4009,
4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,1355,4024,
4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040,
1605,4041,4042,4043,4044,4045,4046,4047,4048,4049,4050,4051,4052,4053,4054,4055,
4056,4057,4058,4059,4060,1606,4061,4062,4063,4064,1607,4065,4066,4067,4068,4069,
4070,4071,4072,4073,4074,4075,4076,1194,4077,4078,1608,4079,4080,4081,4082,4083,
4084,4085,4086,4087,1609,4088,4089,4090,4091,4092,4093,4094,4095,4096,4097,4098,
4099,4100,4101,4102,4103,4104,4105,4106,4107,4108,1259,4109,4110,4111,4112,4113,
4114,4115,4116,4117,4118,4119,4120,4121,4122,4123,4124,1195,4125,4126,4127,1610,
4128,4129,4130,4131,4132,4133,4134,4135,4136,4137,1356,4138,4139,4140,4141,4142,
4143,4144,1611,4145,4146,4147,4148,4149,4150,4151,4152,4153,4154,4155,4156,4157,
4158,4159,4160,4161,4162,4163,4164,4165,4166,4167,4168,4169,4170,4171,4172,4173,
4174,4175,4176,4177,4178,4179,4180,4181,4182,4183,4184,4185,4186,4187,4188,4189,
4190,4191,4192,4193,4194,4195,4196,4197,4198,4199,4200,4201,4202,4203,4204,4205,
4206,4207,4208,4209,4210,4211,4212,4213,4214,4215,4216,4217,4218,4219,1612,4220,
4221,4222,4223,4224,4225,4226,4227,1357,4228,1613,4229,4230,4231,4232,4233,4234,
4235,4236,4237,4238,4239,4240,4241,4242,4243,1614,4244,4245,4246,4247,4248,4249,
4250,4251,4252,4253,4254,4255,4256,4257,4258,4259,4260,4261,4262,4263,4264,4265,
4266,4267,4268,4269,4270,1196,1358,4271,4272,4273,4274,4275,4276,4277,4278,4279,
4280,4281,4282,4283,4284,4285,4286,4287,1615,4288,4289,4290,4291,4292,4293,4294,
4295,4296,4297,4298,4299,4300,4301,4302,4303,4304,4305,4306,4307,4308,4309,4310,
4311,4312,4313,4314,4315,4316,4317,4318,4319,4320,4321,4322,4323,4324,4325,4326,
4327,4328,4329,4330,4331,4332,4333,4334,1616,4335,4336,4337,4338,4339,4340,4341,
4342,4343,4344,4345,4346,4347,4348,4349,4350,4351,4352,4353,4354,4355,4356,4357,
4358,4359,4360,1617,4361,4362,4363,4364,4365,1618,4366,4367,4368,4369,4370,4371,
4372,4373,4374,4375,4376,4377,4378,4379,4380,4381,4382,4383,4384,4385,4386,4387,
4388,4389,4390,4391,4392,4393,4394,4395,4396,4397,4398,4399,4400,4401,4402,4403,
4404,4405,4406,4407,4408,4409,4410,4411,4412,4413,4414,4415,4416,1619,4417,4418,
4419,4420,4421,4422,4423,4424,4425,1112,4426,4427,4428,4429,4430,1620,4431,4432,
4433,4434,4435,4436,4437,4438,4439,4440,4441,4442,1260,1261,4443,4444,4445,4446,
4447,4448,4449,4450,4451,4452,4453,4454,4455,1359,4456,4457,4458,4459,4460,4461,
4462,4463,4464,4465,1621,4466,4467,4468,4469,4470,4471,4472,4473,4474,4475,4476,
4477,4478,4479,4480,4481,4482,4483,4484,4485,4486,4487,4488,4489,1055,4490,4491,
4492,4493,4494,4495,4496,4497,4498,4499,4500,4501,4502,4503,4504,4505,4506,4507,
4508,4509,4510,4511,4512,4513,4514,4515,4516,4517,4518,1622,4519,4520,4521,1623,
4522,4523,4524,4525,4526,4527,4528,4529,4530,4531,4532,4533,4534,4535,1360,4536,
4537,4538,4539,4540,4541,4542,4543, 975,4544,4545,4546,4547,4548,4549,4550,4551,
4552,4553,4554,4555,4556,4557,4558,4559,4560,4561,4562,4563,4564,4565,4566,4567,
4568,4569,4570,4571,1624,4572,4573,4574,4575,4576,1625,4577,4578,4579,4580,4581,
4582,4583,4584,1626,4585,4586,4587,4588,4589,4590,4591,4592,4593,4594,4595,1627,
4596,4597,4598,4599,4600,4601,4602,4603,4604,4605,4606,4607,4608,4609,4610,4611,
4612,4613,4614,4615,1628,4616,4617,4618,4619,4620,4621,4622,4623,4624,4625,4626,
4627,4628,4629,4630,4631,4632,4633,4634,4635,4636,4637,4638,4639,4640,4641,4642,
4643,4644,4645,4646,4647,4648,4649,1361,4650,4651,4652,4653,4654,4655,4656,4657,
4658,4659,4660,4661,1362,4662,4663,4664,4665,4666,4667,4668,4669,4670,4671,4672,
4673,4674,4675,4676,4677,4678,4679,4680,4681,4682,1629,4683,4684,4685,4686,4687,
1630,4688,4689,4690,4691,1153,4692,4693,4694,1113,4695,4696,4697,4698,4699,4700,
4701,4702,4703,4704,4705,4706,4707,4708,4709,4710,4711,1197,4712,4713,4714,4715,
4716,4717,4718,4719,4720,4721,4722,4723,4724,4725,4726,4727,4728,4729,4730,4731,
4732,4733,4734,4735,1631,4736,1632,4737,4738,4739,4740,4741,4742,4743,4744,1633,
4745,4746,4747,4748,4749,1262,4750,4751,4752,4753,4754,1363,4755,4756,4757,4758,
4759,4760,4761,4762,4763,4764,4765,4766,4767,4768,1634,4769,4770,4771,4772,4773,
4774,4775,4776,4777,4778,1635,4779,4780,4781,4782,4783,4784,4785,4786,4787,4788,
4789,1636,4790,4791,4792,4793,4794,4795,4796,4797,4798,4799,4800,4801,4802,4803,
4804,4805,4806,1637,4807,4808,4809,1638,4810,4811,4812,4813,4814,4815,4816,4817,
4818,1639,4819,4820,4821,4822,4823,4824,4825,4826,4827,4828,4829,4830,4831,4832,
4833,1077,4834,4835,4836,4837,4838,4839,4840,4841,4842,4843,4844,4845,4846,4847,
4848,4849,4850,4851,4852,4853,4854,4855,4856,4857,4858,4859,4860,4861,4862,4863,
4864,4865,4866,4867,4868,4869,4870,4871,4872,4873,4874,4875,4876,4877,4878,4879,
4880,4881,4882,4883,1640,4884,4885,1641,4886,4887,4888,4889,4890,4891,4892,4893,
4894,4895,4896,4897,4898,4899,4900,4901,4902,4903,4904,4905,4906,4907,4908,4909,
4910,4911,1642,4912,4913,4914,1364,4915,4916,4917,4918,4919,4920,4921,4922,4923,
4924,4925,4926,4927,4928,4929,4930,4931,1643,4932,4933,4934,4935,4936,4937,4938,
4939,4940,4941,4942,4943,4944,4945,4946,4947,4948,4949,4950,4951,4952,4953,4954,
4955,4956,4957,4958,4959,4960,4961,4962,4963,4964,4965,4966,4967,4968,4969,4970,
4971,4972,4973,4974,4975,4976,4977,4978,4979,4980,1644,4981,4982,4983,4984,1645,
4985,4986,1646,4987,4988,4989,4990,4991,4992,4993,4994,4995,4996,4997,4998,4999,
5000,5001,5002,5003,5004,5005,1647,5006,1648,5007,5008,5009,5010,5011,5012,1078,
5013,5014,5015,5016,5017,5018,5019,5020,5021,5022,5023,5024,5025,5026,5027,5028,
1365,5029,5030,5031,5032,5033,5034,5035,5036,5037,5038,5039,1649,5040,5041,5042,
5043,5044,5045,1366,5046,5047,5048,5049,5050,5051,5052,5053,5054,5055,1650,5056,
5057,5058,5059,5060,5061,5062,5063,5064,5065,5066,5067,5068,5069,5070,5071,5072,
5073,5074,5075,5076,5077,1651,5078,5079,5080,5081,5082,5083,5084,5085,5086,5087,
5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102,5103,
5104,5105,5106,5107,5108,5109,5110,1652,5111,5112,5113,5114,5115,5116,5117,5118,
1367,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,1653,5130,5131,5132,
5133,5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,
5149,1368,5150,1654,5151,1369,5152,5153,5154,5155,5156,5157,5158,5159,5160,5161,
5162,5163,5164,5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,5176,5177,
5178,1370,5179,5180,5181,5182,5183,5184,5185,5186,5187,5188,5189,5190,5191,5192,
5193,5194,5195,5196,5197,5198,1655,5199,5200,5201,5202,1656,5203,5204,5205,5206,
1371,5207,1372,5208,5209,5210,5211,1373,5212,5213,1374,5214,5215,5216,5217,5218,
5219,5220,5221,5222,5223,5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,
5235,5236,5237,5238,5239,5240,5241,5242,5243,5244,5245,5246,5247,1657,5248,5249,
5250,5251,1658,1263,5252,5253,5254,5255,5256,1375,5257,5258,5259,5260,5261,5262,
5263,5264,5265,5266,5267,5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,
5279,5280,5281,5282,5283,1659,5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,
5294,5295,5296,5297,5298,5299,5300,1660,5301,5302,5303,5304,5305,5306,5307,5308,
5309,5310,5311,5312,5313,5314,5315,5316,5317,5318,5319,5320,5321,1376,5322,5323,
5324,5325,5326,5327,5328,5329,5330,5331,5332,5333,1198,5334,5335,5336,5337,5338,
5339,5340,5341,5342,5343,1661,5344,5345,5346,5347,5348,5349,5350,5351,5352,5353,
5354,5355,5356,5357,5358,5359,5360,5361,5362,5363,5364,5365,5366,5367,5368,5369,
5370,5371,5372,5373,5374,5375,5376,5377,5378,5379,5380,5381,5382,5383,5384,5385,
5386,5387,5388,5389,5390,5391,5392,5393,5394,5395,5396,5397,5398,1264,5399,5400,
5401,5402,5403,5404,5405,5406,5407,5408,5409,5410,5411,5412,1662,5413,5414,5415,
5416,1663,5417,5418,5419,5420,5421,5422,5423,5424,5425,5426,5427,5428,5429,5430,
5431,5432,5433,5434,5435,5436,5437,5438,1664,5439,5440,5441,5442,5443,5444,5445,
5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456,5457,5458,5459,5460,5461,
5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472,5473,5474,5475,5476,5477,
5478,1154,5479,5480,5481,5482,5483,5484,5485,1665,5486,5487,5488,5489,5490,5491,
5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504,5505,5506,5507,
5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520,5521,5522,5523,
5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536,5537,5538,5539,
5540,5541,5542,5543,5544,5545,5546,5547,5548,1377,5549,5550,5551,5552,5553,5554,
5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568,5569,5570,
1114,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584,5585,
5586,5587,5588,5589,5590,5591,5592,1378,5593,5594,5595,5596,5597,5598,5599,5600,
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,1379,5615,
5616,5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,
5632,5633,5634,1380,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,
5647,5648,5649,1381,1056,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,
1666,5661,5662,5663,5664,5665,5666,5667,5668,1667,5669,1668,5670,5671,5672,5673,
5674,5675,5676,5677,5678,1155,5679,5680,5681,5682,5683,5684,5685,5686,5687,5688,
5689,5690,5691,5692,5693,5694,5695,5696,5697,5698,1669,5699,5700,5701,5702,5703,
5704,5705,1670,5706,5707,5708,5709,5710,1671,5711,5712,5713,5714,1382,5715,5716,
5717,5718,5719,5720,5721,5722,5723,5724,5725,1672,5726,5727,1673,1674,5728,5729,
5730,5731,5732,5733,5734,5735,5736,1675,5737,5738,5739,5740,5741,5742,5743,5744,
1676,5745,5746,5747,5748,5749,5750,5751,1383,5752,5753,5754,5755,5756,5757,5758,
5759,5760,5761,5762,5763,5764,5765,5766,5767,5768,1677,5769,5770,5771,5772,5773,
1678,5774,5775,5776, 998,5777,5778,5779,5780,5781,5782,5783,5784,5785,1384,5786,
5787,5788,5789,5790,5791,5792,5793,5794,5795,5796,5797,5798,5799,5800,1679,5801,
5802,5803,1115,1116,5804,5805,5806,5807,5808,5809,5810,5811,5812,5813,5814,5815,
5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828,5829,5830,5831,
5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844,5845,5846,5847,
5848,5849,5850,5851,5852,5853,5854,5855,1680,5856,5857,5858,5859,5860,5861,5862,
5863,5864,1681,5865,5866,5867,1682,5868,5869,5870,5871,5872,5873,5874,5875,5876,
5877,5878,5879,1683,5880,1684,5881,5882,5883,5884,1685,5885,5886,5887,5888,5889,
5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905,
5906,5907,1686,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920,
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,1687,
5936,5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,
5952,1688,1689,5953,1199,5954,5955,5956,5957,5958,5959,5960,5961,1690,5962,5963,
5964,5965,5966,5967,5968,5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,
5980,5981,1385,5982,1386,5983,5984,5985,5986,5987,5988,5989,5990,5991,5992,5993,
5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004,6005,6006,6007,6008,6009,
6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020,6021,6022,6023,6024,6025,
6026,6027,1265,6028,6029,1691,6030,6031,6032,6033,6034,6035,6036,6037,6038,6039,
6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055,
6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071,
6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084,1692,6085,6086,
6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100,6101,6102,
6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116,6117,6118,
6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,1693,6132,6133,
6134,6135,6136,1694,6137,6138,6139,6140,6141,1695,6142,6143,6144,6145,6146,6147,
6148,6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163,
6164,6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179,
6180,6181,6182,6183,6184,6185,1696,6186,6187,6188,6189,6190,6191,6192,6193,6194,
6195,6196,6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210,
6211,6212,6213,6214,6215,6216,6217,6218,6219,1697,6220,6221,6222,6223,6224,6225,
6226,6227,6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241,
6242,6243,6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,1698,6254,6255,6256,
6257,6258,6259,6260,6261,6262,6263,1200,6264,6265,6266,6267,6268,6269,6270,6271, #1024
6272,6273,6274,6275,6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,6286,6287,
6288,6289,6290,6291,6292,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,1699,
6303,6304,1700,6305,6306,6307,6308,6309,6310,6311,6312,6313,6314,6315,6316,6317,
6318,6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,
6334,6335,6336,6337,6338,6339,1701,6340,6341,6342,6343,6344,1387,6345,6346,6347,
6348,6349,6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363,
6364,6365,6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379,
6380,6381,6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395,
6396,6397,6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,6411,
6412,6413,1702,6414,6415,6416,6417,6418,6419,6420,6421,6422,1703,6423,6424,6425,
6426,6427,6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,1704,6439,6440,
6441,6442,6443,6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,6455,6456,
6457,6458,6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472,
6473,6474,6475,6476,6477,6478,6479,6480,6481,6482,6483,6484,6485,6486,6487,6488,
6489,6490,6491,6492,6493,6494,6495,6496,6497,6498,6499,6500,6501,6502,6503,1266,
6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,
6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532,6533,6534,6535,
6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548,6549,6550,6551,
1705,1706,6552,6553,6554,6555,6556,6557,6558,6559,6560,6561,6562,6563,6564,6565,
6566,6567,6568,6569,6570,6571,6572,6573,6574,6575,6576,6577,6578,6579,6580,6581,
6582,6583,6584,6585,6586,6587,6588,6589,6590,6591,6592,6593,6594,6595,6596,6597,
6598,6599,6600,6601,6602,6603,6604,6605,6606,6607,6608,6609,6610,6611,6612,6613,
6614,6615,6616,6617,6618,6619,6620,6621,6622,6623,6624,6625,6626,6627,6628,6629,
6630,6631,6632,6633,6634,6635,6636,6637,1388,6638,6639,6640,6641,6642,6643,6644,
1707,6645,6646,6647,6648,6649,6650,6651,6652,6653,6654,6655,6656,6657,6658,6659,
6660,6661,6662,6663,1708,6664,6665,6666,6667,6668,6669,6670,6671,6672,6673,6674,
1201,6675,6676,6677,6678,6679,6680,6681,6682,6683,6684,6685,6686,6687,6688,6689,
6690,6691,6692,6693,6694,6695,6696,6697,6698,6699,6700,6701,6702,6703,6704,6705,
6706,6707,6708,6709,6710,6711,6712,6713,6714,6715,6716,6717,6718,6719,6720,6721,
6722,6723,6724,6725,1389,6726,6727,6728,6729,6730,6731,6732,6733,6734,6735,6736,
1390,1709,6737,6738,6739,6740,6741,6742,1710,6743,6744,6745,6746,1391,6747,6748,
6749,6750,6751,6752,6753,6754,6755,6756,6757,1392,6758,6759,6760,6761,6762,6763,
6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777,6778,6779,
6780,1202,6781,6782,6783,6784,6785,6786,6787,6788,6789,6790,6791,6792,6793,6794,
6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806,6807,6808,6809,1711,
6810,6811,6812,6813,6814,6815,6816,6817,6818,6819,6820,6821,6822,6823,6824,6825,
6826,6827,6828,6829,6830,6831,6832,6833,6834,6835,6836,1393,6837,6838,6839,6840,
6841,6842,6843,6844,6845,6846,6847,6848,6849,6850,6851,6852,6853,6854,6855,6856,
6857,6858,6859,6860,6861,6862,6863,6864,6865,6866,6867,6868,6869,6870,6871,6872,
6873,6874,6875,6876,6877,6878,6879,6880,6881,6882,6883,6884,6885,6886,6887,6888,
6889,6890,6891,6892,6893,6894,6895,6896,6897,6898,6899,6900,6901,6902,1712,6903,
6904,6905,6906,6907,6908,6909,6910,1713,6911,6912,6913,6914,6915,6916,6917,6918,
6919,6920,6921,6922,6923,6924,6925,6926,6927,6928,6929,6930,6931,6932,6933,6934,
6935,6936,6937,6938,6939,6940,6941,6942,6943,6944,6945,6946,6947,6948,6949,6950,
6951,6952,6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966,
6967,6968,6969,6970,6971,6972,6973,6974,1714,6975,6976,6977,6978,6979,6980,6981,
6982,6983,6984,6985,6986,6987,6988,1394,6989,6990,6991,6992,6993,6994,6995,6996,
6997,6998,6999,7000,1715,7001,7002,7003,7004,7005,7006,7007,7008,7009,7010,7011,
7012,7013,7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027,
7028,1716,7029,7030,7031,7032,7033,7034,7035,7036,7037,7038,7039,7040,7041,7042,
7043,7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058,
7059,7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,7071,7072,7073,7074,
7075,7076,7077,7078,7079,7080,7081,7082,7083,7084,7085,7086,7087,7088,7089,7090,
7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105,7106,
7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,7119,7120,7121,7122,
7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136,7137,7138,
7139,7140,7141,7142,7143,7144,7145,7146,7147,7148,7149,7150,7151,7152,7153,7154,
7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167,7168,7169,7170,
7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183,7184,7185,7186,
7187,7188,7189,7190,7191,7192,7193,7194,7195,7196,7197,7198,7199,7200,7201,7202,
7203,7204,7205,7206,7207,1395,7208,7209,7210,7211,7212,7213,1717,7214,7215,7216,
7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229,7230,7231,7232,
7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245,7246,7247,7248,
7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261,7262,7263,7264,
7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277,7278,7279,7280,
7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293,7294,7295,7296,
7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308,7309,7310,7311,7312,
7313,1718,7314,7315,7316,7317,7318,7319,7320,7321,7322,7323,7324,7325,7326,7327,
7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339,7340,7341,7342,7343,
7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,7354,7355,7356,7357,7358,7359,
7360,7361,7362,7363,7364,7365,7366,7367,7368,7369,7370,7371,7372,7373,7374,7375,
7376,7377,7378,7379,7380,7381,7382,7383,7384,7385,7386,7387,7388,7389,7390,7391,
7392,7393,7394,7395,7396,7397,7398,7399,7400,7401,7402,7403,7404,7405,7406,7407,
7408,7409,7410,7411,7412,7413,7414,7415,7416,7417,7418,7419,7420,7421,7422,7423,
7424,7425,7426,7427,7428,7429,7430,7431,7432,7433,7434,7435,7436,7437,7438,7439,
7440,7441,7442,7443,7444,7445,7446,7447,7448,7449,7450,7451,7452,7453,7454,7455,
7456,7457,7458,7459,7460,7461,7462,7463,7464,7465,7466,7467,7468,7469,7470,7471,
7472,7473,7474,7475,7476,7477,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487,
7488,7489,7490,7491,7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,7503,
7504,7505,7506,7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,
7520,7521,7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535,
7536,7537,7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,7550,7551,
7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567,
7568,7569,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582,7583,
7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598,7599,
7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614,7615,
7616,7617,7618,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628,7629,7630,7631,
7632,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643,7644,7645,7646,7647,
7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659,7660,7661,7662,7663,
7664,7665,7666,7667,7668,7669,7670,7671,7672,7673,7674,7675,7676,7677,7678,7679,
7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690,7691,7692,7693,7694,7695,
7696,7697,7698,7699,7700,7701,7702,7703,7704,7705,7706,7707,7708,7709,7710,7711,
7712,7713,7714,7715,7716,7717,7718,7719,7720,7721,7722,7723,7724,7725,7726,7727,
7728,7729,7730,7731,7732,7733,7734,7735,7736,7737,7738,7739,7740,7741,7742,7743,
7744,7745,7746,7747,7748,7749,7750,7751,7752,7753,7754,7755,7756,7757,7758,7759,
7760,7761,7762,7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775,
7776,7777,7778,7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791,
7792,7793,7794,7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,7806,7807,
7808,7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,
7824,7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,
7840,7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,
7856,7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,
7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,
7888,7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,
7904,7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,
7920,7921,7922,7923,7924,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935,
7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951,
7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967,
7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983,
7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999,
8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015,
8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031,
8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047,
8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063,
8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,
8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,
8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111,
8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127,
8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,
8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,
8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,
8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,
8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,
8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,
8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,
8272,8273,8274,8275,8276,8277,8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,
8288,8289,8290,8291,8292,8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,
8304,8305,8306,8307,8308,8309,8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,
8320,8321,8322,8323,8324,8325,8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,
8336,8337,8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,
8352,8353,8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,
8368,8369,8370,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,
8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,
8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,
8416,8417,8418,8419,8420,8421,8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,
8432,8433,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,
8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,
8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,
8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,
8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,
8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,
8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,
8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,
8560,8561,8562,8563,8564,8565,8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,
8576,8577,8578,8579,8580,8581,8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,
8592,8593,8594,8595,8596,8597,8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,
8608,8609,8610,8611,8612,8613,8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,
8624,8625,8626,8627,8628,8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,
8640,8641,8642,8643,8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,
8656,8657,8658,8659,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,
8672,8673,8674,8675,8676,8677,8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,
8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,
8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,
8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,
8736,8737,8738,8739,8740,8741)
# flake8: noqa
@@ -0,0 +1,42 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCKRDistributionAnalysis
from .mbcssm import EUCKRSMModel
class EUCKRProber(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(EUCKRSMModel)
self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
self.reset()
def get_charset_name(self):
return "EUC-KR"
@@ -0,0 +1,428 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# EUCTW frequency table
# Converted from big5 work
# by Taiwan's Mandarin Promotion Council
# <http:#www.edu.tw:81/mandr/>
# 128 --> 0.42261
# 256 --> 0.57851
# 512 --> 0.74851
# 1024 --> 0.89384
# 2048 --> 0.97583
#
# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
# Random Distribution Ration = 512/(5401-512)=0.105
#
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
# Char to FreqOrder table ,
EUCTW_TABLE_SIZE = 8102
EUCTWCharToFreqOrder = (
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790
3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822
7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838
630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854
179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870
995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886
2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902
1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918
3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934
706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966
3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982
2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998
437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014
3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030
1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046
7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062
266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078
7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094
1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110
32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126
188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142
3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158
3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174
324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190
2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206
2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222
314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238
287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254
3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270
1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286
1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302
1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318
2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334
265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350
4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366
1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382
7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398
2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414
383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430
98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446
523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462
710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478
7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494
379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510
1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526
585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542
690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558
7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574
1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590
544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606
3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622
4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638
3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654
279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670
610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686
1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702
4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718
3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734
3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750
2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766
7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782
3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798
7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814
1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830
2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846
1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862
78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878
1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894
4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910
3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926
534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942
165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958
626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974
2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990
7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006
1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022
2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038
1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054
1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070
7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086
7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102
7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118
3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134
4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150
1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166
7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182
2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198
7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214
3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230
3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246
7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262
2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278
7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294
862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310
4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326
2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342
7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358
3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374
2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390
2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406
294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422
2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438
1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454
1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470
2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486
1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502
7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518
7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534
2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550
4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566
1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582
7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598
829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614
4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630
375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646
2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662
444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678
1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694
1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710
730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726
3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742
3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758
1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774
3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790
7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806
7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822
1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838
2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854
1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870
3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886
2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902
3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918
2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934
4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950
4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966
3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982
97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998
3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014
424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030
3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046
3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062
3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078
1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094
7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110
199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126
7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142
1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158
391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174
4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190
3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206
397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222
2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238
2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254
3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270
1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286
4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302
2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318
1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334
1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350
2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366
3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382
1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398
7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414
1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430
4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446
1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462
135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478
1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494
3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510
3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526
2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542
1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558
4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574
660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590
7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606
2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622
3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638
4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654
790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670
7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686
7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702
1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718
4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734
3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750
2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766
3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782
3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798
2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814
1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830
4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846
3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862
3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878
2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894
4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910
7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926
3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942
2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958
3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974
1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990
2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006
3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022
4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038
2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054
2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070
7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086
1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102
2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118
1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134
3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150
4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166
2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182
3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198
3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214
2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230
4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246
2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262
3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278
4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294
7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310
3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326
194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342
1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358
4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374
1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390
4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406
7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422
510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438
7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454
2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470
1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486
1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502
3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518
509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534
552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550
478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566
3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582
2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598
751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614
7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630
1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646
3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662
7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678
1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694
7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710
4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726
1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742
2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758
2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774
4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790
802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806
809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822
3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838
3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854
1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870
2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886
7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902
1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918
1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934
3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950
919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966
1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982
4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998
7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014
2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030
3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046
516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062
1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078
2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094
2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110
7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126
7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142
7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158
2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174
2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190
1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206
4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222
3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238
3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254
4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270
4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286
2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302
2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318
7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334
4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350
7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366
2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382
1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398
3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414
4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430
2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446
120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462
2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478
1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494
2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510
2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526
4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542
7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558
1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574
3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590
7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606
1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622
8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638
2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654
8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670
2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686
2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702
8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718
8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734
8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750
408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766
8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782
4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798
3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814
8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830
1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846
8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862
425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878
1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894
479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910
4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926
1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942
4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958
1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974
433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990
3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006
4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022
8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038
938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
#Everything below is of no interest for detection purpose
2515,1613,4582,8119,3312,3866,2516,8120,4058,8121,1637,4059,2466,4583,3867,8122, # 8118
2493,3016,3734,8123,8124,2192,8125,8126,2162,8127,8128,8129,8130,8131,8132,8133, # 8134
8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,8144,8145,8146,8147,8148,8149, # 8150
8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,8160,8161,8162,8163,8164,8165, # 8166
8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181, # 8182
8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197, # 8198
8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213, # 8214
8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229, # 8230
8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245, # 8246
8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,8256,8257,8258,8259,8260,8261, # 8262
8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,8272,8273,8274,8275,8276,8277, # 8278
8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,8290,8291,8292,8293, # 8294
8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,8308,8309, # 8310
8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322,8323,8324,8325, # 8326
8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337,8338,8339,8340,8341, # 8342
8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353,8354,8355,8356,8357, # 8358
8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,8368,8369,8370,8371,8372,8373, # 8374
8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,8384,8385,8386,8387,8388,8389, # 8390
8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,8400,8401,8402,8403,8404,8405, # 8406
8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,8416,8417,8418,8419,8420,8421, # 8422
8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,8432,8433,8434,8435,8436,8437, # 8438
8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,8448,8449,8450,8451,8452,8453, # 8454
8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,8464,8465,8466,8467,8468,8469, # 8470
8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,8480,8481,8482,8483,8484,8485, # 8486
8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501, # 8502
8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517, # 8518
8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533, # 8534
8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549, # 8550
8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,8565, # 8566
8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,8576,8577,8578,8579,8580,8581, # 8582
8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597, # 8598
8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,8608,8609,8610,8611,8612,8613, # 8614
8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,8624,8625,8626,8627,8628,8629, # 8630
8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,8640,8641,8642,8643,8644,8645, # 8646
8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,8657,8658,8659,8660,8661, # 8662
8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672,8673,8674,8675,8676,8677, # 8678
8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, # 8694
8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710
8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726
8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742
# flake8: noqa
@@ -0,0 +1,41 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCTWDistributionAnalysis
from .mbcssm import EUCTWSMModel
class EUCTWProber(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(EUCTWSMModel)
self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
self.reset()
def get_charset_name(self):
return "EUC-TW"
@@ -0,0 +1,472 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# GB2312 most frequently used character table
#
# Char to FreqOrder table , from hz6763
# 512 --> 0.79 -- 0.79
# 1024 --> 0.92 -- 0.13
# 2048 --> 0.98 -- 0.06
# 6768 --> 1.00 -- 0.02
#
# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79
# Random Distribution Ration = 512 / (3755 - 512) = 0.157
#
# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR
GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
GB2312_TABLE_SIZE = 3760
GB2312CharToFreqOrder = (
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670,
1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820,
1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585,
152,1687,1539, 738,1559, 59,1232,2925,2267,1388,1249,1741,1679,2960, 151,1566,
1125,1352,4271, 924,4296, 385,3166,4459, 310,1245,2850, 70,3285,2729,3534,3575,
2398,3298,3466,1960,2265, 217,3647, 864,1909,2084,4401,2773,1010,3269,5152, 853,
3051,3121,1244,4251,1895, 364,1499,1540,2313,1180,3655,2268, 562, 715,2417,3061,
544, 336,3768,2380,1752,4075, 950, 280,2425,4382, 183,2759,3272, 333,4297,2155,
1688,2356,1444,1039,4540, 736,1177,3349,2443,2368,2144,2225, 565, 196,1482,3406,
927,1335,4147, 692, 878,1311,1653,3911,3622,1378,4200,1840,2969,3149,2126,1816,
2534,1546,2393,2760, 737,2494, 13, 447, 245,2747, 38,2765,2129,2589,1079, 606,
360, 471,3755,2890, 404, 848, 699,1785,1236, 370,2221,1023,3746,2074,2026,2023,
2388,1581,2119, 812,1141,3091,2536,1519, 804,2053, 406,1596,1090, 784, 548,4414,
1806,2264,2936,1100, 343,4114,5096, 622,3358, 743,3668,1510,1626,5020,3567,2513,
3195,4115,5627,2489,2991, 24,2065,2697,1087,2719, 48,1634, 315, 68, 985,2052,
198,2239,1347,1107,1439, 597,2366,2172, 871,3307, 919,2487,2790,1867, 236,2570,
1413,3794, 906,3365,3381,1701,1982,1818,1524,2924,1205, 616,2586,2072,2004, 575,
253,3099, 32,1365,1182, 197,1714,2454,1201, 554,3388,3224,2748, 756,2587, 250,
2567,1507,1517,3529,1922,2761,2337,3416,1961,1677,2452,2238,3153, 615, 911,1506,
1474,2495,1265,1906,2749,3756,3280,2161, 898,2714,1759,3450,2243,2444, 563, 26,
3286,2266,3769,3344,2707,3677, 611,1402, 531,1028,2871,4548,1375, 261,2948, 835,
1190,4134, 353, 840,2684,1900,3082,1435,2109,1207,1674, 329,1872,2781,4055,2686,
2104, 608,3318,2423,2957,2768,1108,3739,3512,3271,3985,2203,1771,3520,1418,2054,
1681,1153, 225,1627,2929, 162,2050,2511,3687,1954, 124,1859,2431,1684,3032,2894,
585,4805,3969,2869,2704,2088,2032,2095,3656,2635,4362,2209, 256, 518,2042,2105,
3777,3657, 643,2298,1148,1779, 190, 989,3544, 414, 11,2135,2063,2979,1471, 403,
3678, 126, 770,1563, 671,2499,3216,2877, 600,1179, 307,2805,4937,1268,1297,2694,
252,4032,1448,1494,1331,1394, 127,2256, 222,1647,1035,1481,3056,1915,1048, 873,
3651, 210, 33,1608,2516, 200,1520, 415, 102, 0,3389,1287, 817, 91,3299,2940,
836,1814, 549,2197,1396,1669,2987,3582,2297,2848,4528,1070, 687, 20,1819, 121,
1552,1364,1461,1968,2617,3540,2824,2083, 177, 948,4938,2291, 110,4549,2066, 648,
3359,1755,2110,2114,4642,4845,1693,3937,3308,1257,1869,2123, 208,1804,3159,2992,
2531,2549,3361,2418,1350,2347,2800,2568,1291,2036,2680, 72, 842,1990, 212,1233,
1154,1586, 75,2027,3410,4900,1823,1337,2710,2676, 728,2810,1522,3026,4995, 157,
755,1050,4022, 710, 785,1936,2194,2085,1406,2777,2400, 150,1250,4049,1206, 807,
1910, 534, 529,3309,1721,1660, 274, 39,2827, 661,2670,1578, 925,3248,3815,1094,
4278,4901,4252, 41,1150,3747,2572,2227,4501,3658,4902,3813,3357,3617,2884,2258,
887, 538,4187,3199,1294,2439,3042,2329,2343,2497,1255, 107, 543,1527, 521,3478,
3568, 194,5062, 15, 961,3870,1241,1192,2664, 66,5215,3260,2111,1295,1127,2152,
3805,4135, 901,1164,1976, 398,1278, 530,1460, 748, 904,1054,1966,1426, 53,2909,
509, 523,2279,1534, 536,1019, 239,1685, 460,2353, 673,1065,2401,3600,4298,2272,
1272,2363, 284,1753,3679,4064,1695, 81, 815,2677,2757,2731,1386, 859, 500,4221,
2190,2566, 757,1006,2519,2068,1166,1455, 337,2654,3203,1863,1682,1914,3025,1252,
1409,1366, 847, 714,2834,2038,3209, 964,2970,1901, 885,2553,1078,1756,3049, 301,
1572,3326, 688,2130,1996,2429,1805,1648,2930,3421,2750,3652,3088, 262,1158,1254,
389,1641,1812, 526,1719, 923,2073,1073,1902, 468, 489,4625,1140, 857,2375,3070,
3319,2863, 380, 116,1328,2693,1161,2244, 273,1212,1884,2769,3011,1775,1142, 461,
3066,1200,2147,2212, 790, 702,2695,4222,1601,1058, 434,2338,5153,3640, 67,2360,
4099,2502, 618,3472,1329, 416,1132, 830,2782,1807,2653,3211,3510,1662, 192,2124,
296,3979,1739,1611,3684, 23, 118, 324, 446,1239,1225, 293,2520,3814,3795,2535,
3116, 17,1074, 467,2692,2201, 387,2922, 45,1326,3055,1645,3659,2817, 958, 243,
1903,2320,1339,2825,1784,3289, 356, 576, 865,2315,2381,3377,3916,1088,3122,1713,
1655, 935, 628,4689,1034,1327, 441, 800, 720, 894,1979,2183,1528,5289,2702,1071,
4046,3572,2399,1571,3281, 79, 761,1103, 327, 134, 758,1899,1371,1615, 879, 442,
215,2605,2579, 173,2048,2485,1057,2975,3317,1097,2253,3801,4263,1403,1650,2946,
814,4968,3487,1548,2644,1567,1285, 2, 295,2636, 97, 946,3576, 832, 141,4257,
3273, 760,3821,3521,3156,2607, 949,1024,1733,1516,1803,1920,2125,2283,2665,3180,
1501,2064,3560,2171,1592, 803,3518,1416, 732,3897,4258,1363,1362,2458, 119,1427,
602,1525,2608,1605,1639,3175, 694,3064, 10, 465, 76,2000,4846,4208, 444,3781,
1619,3353,2206,1273,3796, 740,2483, 320,1723,2377,3660,2619,1359,1137,1762,1724,
2345,2842,1850,1862, 912, 821,1866, 612,2625,1735,2573,3369,1093, 844, 89, 937,
930,1424,3564,2413,2972,1004,3046,3019,2011, 711,3171,1452,4178, 428, 801,1943,
432, 445,2811, 206,4136,1472, 730, 349, 73, 397,2802,2547, 998,1637,1167, 789,
396,3217, 154,1218, 716,1120,1780,2819,4826,1931,3334,3762,2139,1215,2627, 552,
3664,3628,3232,1405,2383,3111,1356,2652,3577,3320,3101,1703, 640,1045,1370,1246,
4996, 371,1575,2436,1621,2210, 984,4033,1734,2638, 16,4529, 663,2755,3255,1451,
3917,2257,1253,1955,2234,1263,2951, 214,1229, 617, 485, 359,1831,1969, 473,2310,
750,2058, 165, 80,2864,2419, 361,4344,2416,2479,1134, 796,3726,1266,2943, 860,
2715, 938, 390,2734,1313,1384, 248, 202, 877,1064,2854, 522,3907, 279,1602, 297,
2357, 395,3740, 137,2075, 944,4089,2584,1267,3802, 62,1533,2285, 178, 176, 780,
2440, 201,3707, 590, 478,1560,4354,2117,1075, 30, 74,4643,4004,1635,1441,2745,
776,2596, 238,1077,1692,1912,2844, 605, 499,1742,3947, 241,3053, 980,1749, 936,
2640,4511,2582, 515,1543,2162,5322,2892,2993, 890,2148,1924, 665,1827,3581,1032,
968,3163, 339,1044,1896, 270, 583,1791,1720,4367,1194,3488,3669, 43,2523,1657,
163,2167, 290,1209,1622,3378, 550, 634,2508,2510, 695,2634,2384,2512,1476,1414,
220,1469,2341,2138,2852,3183,2900,4939,2865,3502,1211,3680, 854,3227,1299,2976,
3172, 186,2998,1459, 443,1067,3251,1495, 321,1932,3054, 909, 753,1410,1828, 436,
2441,1119,1587,3164,2186,1258, 227, 231,1425,1890,3200,3942, 247, 959, 725,5254,
2741, 577,2158,2079, 929, 120, 174, 838,2813, 591,1115, 417,2024, 40,3240,1536,
1037, 291,4151,2354, 632,1298,2406,2500,3535,1825,1846,3451, 205,1171, 345,4238,
18,1163, 811, 685,2208,1217, 425,1312,1508,1175,4308,2552,1033, 587,1381,3059,
2984,3482, 340,1316,4023,3972, 792,3176, 519, 777,4690, 918, 933,4130,2981,3741,
90,3360,2911,2200,5184,4550, 609,3079,2030, 272,3379,2736, 363,3881,1130,1447,
286, 779, 357,1169,3350,3137,1630,1220,2687,2391, 747,1277,3688,2618,2682,2601,
1156,3196,5290,4034,3102,1689,3596,3128, 874, 219,2783, 798, 508,1843,2461, 269,
1658,1776,1392,1913,2983,3287,2866,2159,2372, 829,4076, 46,4253,2873,1889,1894,
915,1834,1631,2181,2318, 298, 664,2818,3555,2735, 954,3228,3117, 527,3511,2173,
681,2712,3033,2247,2346,3467,1652, 155,2164,3382, 113,1994, 450, 899, 494, 994,
1237,2958,1875,2336,1926,3727, 545,1577,1550, 633,3473, 204,1305,3072,2410,1956,
2471, 707,2134, 841,2195,2196,2663,3843,1026,4940, 990,3252,4997, 368,1092, 437,
3212,3258,1933,1829, 675,2977,2893, 412, 943,3723,4644,3294,3283,2230,2373,5154,
2389,2241,2661,2323,1404,2524, 593, 787, 677,3008,1275,2059, 438,2709,2609,2240,
2269,2246,1446, 36,1568,1373,3892,1574,2301,1456,3962, 693,2276,5216,2035,1143,
2720,1919,1797,1811,2763,4137,2597,1830,1699,1488,1198,2090, 424,1694, 312,3634,
3390,4179,3335,2252,1214, 561,1059,3243,2295,2561, 975,5155,2321,2751,3772, 472,
1537,3282,3398,1047,2077,2348,2878,1323,3340,3076, 690,2906, 51, 369, 170,3541,
1060,2187,2688,3670,2541,1083,1683, 928,3918, 459, 109,4427, 599,3744,4286, 143,
2101,2730,2490, 82,1588,3036,2121, 281,1860, 477,4035,1238,2812,3020,2716,3312,
1530,2188,2055,1317, 843, 636,1808,1173,3495, 649, 181,1002, 147,3641,1159,2414,
3750,2289,2795, 813,3123,2610,1136,4368, 5,3391,4541,2174, 420, 429,1728, 754,
1228,2115,2219, 347,2223,2733, 735,1518,3003,2355,3134,1764,3948,3329,1888,2424,
1001,1234,1972,3321,3363,1672,1021,1450,1584, 226, 765, 655,2526,3404,3244,2302,
3665, 731, 594,2184, 319,1576, 621, 658,2656,4299,2099,3864,1279,2071,2598,2739,
795,3086,3699,3908,1707,2352,2402,1382,3136,2475,1465,4847,3496,3865,1085,3004,
2591,1084, 213,2287,1963,3565,2250, 822, 793,4574,3187,1772,1789,3050, 595,1484,
1959,2770,1080,2650, 456, 422,2996, 940,3322,4328,4345,3092,2742, 965,2784, 739,
4124, 952,1358,2498,2949,2565, 332,2698,2378, 660,2260,2473,4194,3856,2919, 535,
1260,2651,1208,1428,1300,1949,1303,2942, 433,2455,2450,1251,1946, 614,1269, 641,
1306,1810,2737,3078,2912, 564,2365,1419,1415,1497,4460,2367,2185,1379,3005,1307,
3218,2175,1897,3063, 682,1157,4040,4005,1712,1160,1941,1399, 394, 402,2952,1573,
1151,2986,2404, 862, 299,2033,1489,3006, 346, 171,2886,3401,1726,2932, 168,2533,
47,2507,1030,3735,1145,3370,1395,1318,1579,3609,4560,2857,4116,1457,2529,1965,
504,1036,2690,2988,2405, 745,5871, 849,2397,2056,3081, 863,2359,3857,2096, 99,
1397,1769,2300,4428,1643,3455,1978,1757,3718,1440, 35,4879,3742,1296,4228,2280,
160,5063,1599,2013, 166, 520,3479,1646,3345,3012, 490,1937,1545,1264,2182,2505,
1096,1188,1369,1436,2421,1667,2792,2460,1270,2122, 727,3167,2143, 806,1706,1012,
1800,3037, 960,2218,1882, 805, 139,2456,1139,1521, 851,1052,3093,3089, 342,2039,
744,5097,1468,1502,1585,2087, 223, 939, 326,2140,2577, 892,2481,1623,4077, 982,
3708, 135,2131, 87,2503,3114,2326,1106, 876,1616, 547,2997,2831,2093,3441,4530,
4314, 9,3256,4229,4148, 659,1462,1986,1710,2046,2913,2231,4090,4880,5255,3392,
3274,1368,3689,4645,1477, 705,3384,3635,1068,1529,2941,1458,3782,1509, 100,1656,
2548, 718,2339, 408,1590,2780,3548,1838,4117,3719,1345,3530, 717,3442,2778,3220,
2898,1892,4590,3614,3371,2043,1998,1224,3483, 891, 635, 584,2559,3355, 733,1766,
1729,1172,3789,1891,2307, 781,2982,2271,1957,1580,5773,2633,2005,4195,3097,1535,
3213,1189,1934,5693,3262, 586,3118,1324,1598, 517,1564,2217,1868,1893,4445,3728,
2703,3139,1526,1787,1992,3882,2875,1549,1199,1056,2224,1904,2711,5098,4287, 338,
1993,3129,3489,2689,1809,2815,1997, 957,1855,3898,2550,3275,3057,1105,1319, 627,
1505,1911,1883,3526, 698,3629,3456,1833,1431, 746, 77,1261,2017,2296,1977,1885,
125,1334,1600, 525,1798,1109,2222,1470,1945, 559,2236,1186,3443,2476,1929,1411,
2411,3135,1777,3372,2621,1841,1613,3229, 668,1430,1839,2643,2916, 195,1989,2671,
2358,1387, 629,3205,2293,5256,4439, 123,1310, 888,1879,4300,3021,3605,1003,1162,
3192,2910,2010, 140,2395,2859, 55,1082,2012,2901, 662, 419,2081,1438, 680,2774,
4654,3912,1620,1731,1625,5035,4065,2328, 512,1344, 802,5443,2163,2311,2537, 524,
3399, 98,1155,2103,1918,2606,3925,2816,1393,2465,1504,3773,2177,3963,1478,4346,
180,1113,4655,3461,2028,1698, 833,2696,1235,1322,1594,4408,3623,3013,3225,2040,
3022, 541,2881, 607,3632,2029,1665,1219, 639,1385,1686,1099,2803,3231,1938,3188,
2858, 427, 676,2772,1168,2025, 454,3253,2486,3556, 230,1950, 580, 791,1991,1280,
1086,1974,2034, 630, 257,3338,2788,4903,1017, 86,4790, 966,2789,1995,1696,1131,
259,3095,4188,1308, 179,1463,5257, 289,4107,1248, 42,3413,1725,2288, 896,1947,
774,4474,4254, 604,3430,4264, 392,2514,2588, 452, 237,1408,3018, 988,4531,1970,
3034,3310, 540,2370,1562,1288,2990, 502,4765,1147, 4,1853,2708, 207, 294,2814,
4078,2902,2509, 684, 34,3105,3532,2551, 644, 709,2801,2344, 573,1727,3573,3557,
2021,1081,3100,4315,2100,3681, 199,2263,1837,2385, 146,3484,1195,2776,3949, 997,
1939,3973,1008,1091,1202,1962,1847,1149,4209,5444,1076, 493, 117,5400,2521, 972,
1490,2934,1796,4542,2374,1512,2933,2657, 413,2888,1135,2762,2314,2156,1355,2369,
766,2007,2527,2170,3124,2491,2593,2632,4757,2437, 234,3125,3591,1898,1750,1376,
1942,3468,3138, 570,2127,2145,3276,4131, 962, 132,1445,4196, 19, 941,3624,3480,
3366,1973,1374,4461,3431,2629, 283,2415,2275, 808,2887,3620,2112,2563,1353,3610,
955,1089,3103,1053, 96, 88,4097, 823,3808,1583, 399, 292,4091,3313, 421,1128,
642,4006, 903,2539,1877,2082, 596, 29,4066,1790, 722,2157, 130, 995,1569, 769,
1485, 464, 513,2213, 288,1923,1101,2453,4316, 133, 486,2445, 50, 625, 487,2207,
57, 423, 481,2962, 159,3729,1558, 491, 303, 482, 501, 240,2837, 112,3648,2392,
1783, 362, 8,3433,3422, 610,2793,3277,1390,1284,1654, 21,3823, 734, 367, 623,
193, 287, 374,1009,1483, 816, 476, 313,2255,2340,1262,2150,2899,1146,2581, 782,
2116,1659,2018,1880, 255,3586,3314,1110,2867,2137,2564, 986,2767,5185,2006, 650,
158, 926, 762, 881,3157,2717,2362,3587, 306,3690,3245,1542,3077,2427,1691,2478,
2118,2985,3490,2438, 539,2305, 983, 129,1754, 355,4201,2386, 827,2923, 104,1773,
2838,2771, 411,2905,3919, 376, 767, 122,1114, 828,2422,1817,3506, 266,3460,1007,
1609,4998, 945,2612,4429,2274, 726,1247,1964,2914,2199,2070,4002,4108, 657,3323,
1422, 579, 455,2764,4737,1222,2895,1670, 824,1223,1487,2525, 558, 861,3080, 598,
2659,2515,1967, 752,2583,2376,2214,4180, 977, 704,2464,4999,2622,4109,1210,2961,
819,1541, 142,2284, 44, 418, 457,1126,3730,4347,4626,1644,1876,3671,1864, 302,
1063,5694, 624, 723,1984,3745,1314,1676,2488,1610,1449,3558,3569,2166,2098, 409,
1011,2325,3704,2306, 818,1732,1383,1824,1844,3757, 999,2705,3497,1216,1423,2683,
2426,2954,2501,2726,2229,1475,2554,5064,1971,1794,1666,2014,1343, 783, 724, 191,
2434,1354,2220,5065,1763,2752,2472,4152, 131, 175,2885,3434, 92,1466,4920,2616,
3871,3872,3866, 128,1551,1632, 669,1854,3682,4691,4125,1230, 188,2973,3290,1302,
1213, 560,3266, 917, 763,3909,3249,1760, 868,1958, 764,1782,2097, 145,2277,3774,
4462, 64,1491,3062, 971,2132,3606,2442, 221,1226,1617, 218, 323,1185,3207,3147,
571, 619,1473,1005,1744,2281, 449,1887,2396,3685, 275, 375,3816,1743,3844,3731,
845,1983,2350,4210,1377, 773, 967,3499,3052,3743,2725,4007,1697,1022,3943,1464,
3264,2855,2722,1952,1029,2839,2467, 84,4383,2215, 820,1391,2015,2448,3672, 377,
1948,2168, 797,2545,3536,2578,2645, 94,2874,1678, 405,1259,3071, 771, 546,1315,
470,1243,3083, 895,2468, 981, 969,2037, 846,4181, 653,1276,2928, 14,2594, 557,
3007,2474, 156, 902,1338,1740,2574, 537,2518, 973,2282,2216,2433,1928, 138,2903,
1293,2631,1612, 646,3457, 839,2935, 111, 496,2191,2847, 589,3186, 149,3994,2060,
4031,2641,4067,3145,1870, 37,3597,2136,1025,2051,3009,3383,3549,1121,1016,3261,
1301, 251,2446,2599,2153, 872,3246, 637, 334,3705, 831, 884, 921,3065,3140,4092,
2198,1944, 246,2964, 108,2045,1152,1921,2308,1031, 203,3173,4170,1907,3890, 810,
1401,2003,1690, 506, 647,1242,2828,1761,1649,3208,2249,1589,3709,2931,5156,1708,
498, 666,2613, 834,3817,1231, 184,2851,1124, 883,3197,2261,3710,1765,1553,2658,
1178,2639,2351, 93,1193, 942,2538,2141,4402, 235,1821, 870,1591,2192,1709,1871,
3341,1618,4126,2595,2334, 603, 651, 69, 701, 268,2662,3411,2555,1380,1606, 503,
448, 254,2371,2646, 574,1187,2309,1770, 322,2235,1292,1801, 305, 566,1133, 229,
2067,2057, 706, 167, 483,2002,2672,3295,1820,3561,3067, 316, 378,2746,3452,1112,
136,1981, 507,1651,2917,1117, 285,4591, 182,2580,3522,1304, 335,3303,1835,2504,
1795,1792,2248, 674,1018,2106,2449,1857,2292,2845, 976,3047,1781,2600,2727,1389,
1281, 52,3152, 153, 265,3950, 672,3485,3951,4463, 430,1183, 365, 278,2169, 27,
1407,1336,2304, 209,1340,1730,2202,1852,2403,2883, 979,1737,1062, 631,2829,2542,
3876,2592, 825,2086,2226,3048,3625, 352,1417,3724, 542, 991, 431,1351,3938,1861,
2294, 826,1361,2927,3142,3503,1738, 463,2462,2723, 582,1916,1595,2808, 400,3845,
3891,2868,3621,2254, 58,2492,1123, 910,2160,2614,1372,1603,1196,1072,3385,1700,
3267,1980, 696, 480,2430, 920, 799,1570,2920,1951,2041,4047,2540,1321,4223,2469,
3562,2228,1271,2602, 401,2833,3351,2575,5157, 907,2312,1256, 410, 263,3507,1582,
996, 678,1849,2316,1480, 908,3545,2237, 703,2322, 667,1826,2849,1531,2604,2999,
2407,3146,2151,2630,1786,3711, 469,3542, 497,3899,2409, 858, 837,4446,3393,1274,
786, 620,1845,2001,3311, 484, 308,3367,1204,1815,3691,2332,1532,2557,1842,2020,
2724,1927,2333,4440, 567, 22,1673,2728,4475,1987,1858,1144,1597, 101,1832,3601,
12, 974,3783,4391, 951,1412, 1,3720, 453,4608,4041, 528,1041,1027,3230,2628,
1129, 875,1051,3291,1203,2262,1069,2860,2799,2149,2615,3278, 144,1758,3040, 31,
475,1680, 366,2685,3184, 311,1642,4008,2466,5036,1593,1493,2809, 216,1420,1668,
233, 304,2128,3284, 232,1429,1768,1040,2008,3407,2740,2967,2543, 242,2133, 778,
1565,2022,2620, 505,2189,2756,1098,2273, 372,1614, 708, 553,2846,2094,2278, 169,
3626,2835,4161, 228,2674,3165, 809,1454,1309, 466,1705,1095, 900,3423, 880,2667,
3751,5258,2317,3109,2571,4317,2766,1503,1342, 866,4447,1118, 63,2076, 314,1881,
1348,1061, 172, 978,3515,1747, 532, 511,3970, 6, 601, 905,2699,3300,1751, 276,
1467,3725,2668, 65,4239,2544,2779,2556,1604, 578,2451,1802, 992,2331,2624,1320,
3446, 713,1513,1013, 103,2786,2447,1661, 886,1702, 916, 654,3574,2031,1556, 751,
2178,2821,2179,1498,1538,2176, 271, 914,2251,2080,1325, 638,1953,2937,3877,2432,
2754, 95,3265,1716, 260,1227,4083, 775, 106,1357,3254, 426,1607, 555,2480, 772,
1985, 244,2546, 474, 495,1046,2611,1851,2061, 71,2089,1675,2590, 742,3758,2843,
3222,1433, 267,2180,2576,2826,2233,2092,3913,2435, 956,1745,3075, 856,2113,1116,
451, 3,1988,2896,1398, 993,2463,1878,2049,1341,2718,2721,2870,2108, 712,2904,
4363,2753,2324, 277,2872,2349,2649, 384, 987, 435, 691,3000, 922, 164,3939, 652,
1500,1184,4153,2482,3373,2165,4848,2335,3775,3508,3154,2806,2830,1554,2102,1664,
2530,1434,2408, 893,1547,2623,3447,2832,2242,2532,3169,2856,3223,2078, 49,3770,
3469, 462, 318, 656,2259,3250,3069, 679,1629,2758, 344,1138,1104,3120,1836,1283,
3115,2154,1437,4448, 934, 759,1999, 794,2862,1038, 533,2560,1722,2342, 855,2626,
1197,1663,4476,3127, 85,4240,2528, 25,1111,1181,3673, 407,3470,4561,2679,2713,
768,1925,2841,3986,1544,1165, 932, 373,1240,2146,1930,2673, 721,4766, 354,4333,
391,2963, 187, 61,3364,1442,1102, 330,1940,1767, 341,3809,4118, 393,2496,2062,
2211, 105, 331, 300, 439, 913,1332, 626, 379,3304,1557, 328, 689,3952, 309,1555,
931, 317,2517,3027, 325, 569, 686,2107,3084, 60,1042,1333,2794, 264,3177,4014,
1628, 258,3712, 7,4464,1176,1043,1778, 683, 114,1975, 78,1492, 383,1886, 510,
386, 645,5291,2891,2069,3305,4138,3867,2939,2603,2493,1935,1066,1848,3588,1015,
1282,1289,4609, 697,1453,3044,2666,3611,1856,2412, 54, 719,1330, 568,3778,2459,
1748, 788, 492, 551,1191,1000, 488,3394,3763, 282,1799, 348,2016,1523,3155,2390,
1049, 382,2019,1788,1170, 729,2968,3523, 897,3926,2785,2938,3292, 350,2319,3238,
1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232,
1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624,
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189,
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, # last 512
#Everything below is of no interest for detection purpose
5508,6484,3900,3414,3974,4441,4024,3537,4037,5628,5099,3633,6485,3148,6486,3636,
5509,3257,5510,5973,5445,5872,4941,4403,3174,4627,5873,6276,2286,4230,5446,5874,
5122,6102,6103,4162,5447,5123,5323,4849,6277,3980,3851,5066,4246,5774,5067,6278,
3001,2807,5695,3346,5775,5974,5158,5448,6487,5975,5976,5776,3598,6279,5696,4806,
4211,4154,6280,6488,6489,6490,6281,4212,5037,3374,4171,6491,4562,4807,4722,4827,
5977,6104,4532,4079,5159,5324,5160,4404,3858,5359,5875,3975,4288,4610,3486,4512,
5325,3893,5360,6282,6283,5560,2522,4231,5978,5186,5449,2569,3878,6284,5401,3578,
4415,6285,4656,5124,5979,2506,4247,4449,3219,3417,4334,4969,4329,6492,4576,4828,
4172,4416,4829,5402,6286,3927,3852,5361,4369,4830,4477,4867,5876,4173,6493,6105,
4657,6287,6106,5877,5450,6494,4155,4868,5451,3700,5629,4384,6288,6289,5878,3189,
4881,6107,6290,6495,4513,6496,4692,4515,4723,5100,3356,6497,6291,3810,4080,5561,
3570,4430,5980,6498,4355,5697,6499,4724,6108,6109,3764,4050,5038,5879,4093,3226,
6292,5068,5217,4693,3342,5630,3504,4831,4377,4466,4309,5698,4431,5777,6293,5778,
4272,3706,6110,5326,3752,4676,5327,4273,5403,4767,5631,6500,5699,5880,3475,5039,
6294,5562,5125,4348,4301,4482,4068,5126,4593,5700,3380,3462,5981,5563,3824,5404,
4970,5511,3825,4738,6295,6501,5452,4516,6111,5881,5564,6502,6296,5982,6503,4213,
4163,3454,6504,6112,4009,4450,6113,4658,6297,6114,3035,6505,6115,3995,4904,4739,
4563,4942,4110,5040,3661,3928,5362,3674,6506,5292,3612,4791,5565,4149,5983,5328,
5259,5021,4725,4577,4564,4517,4364,6298,5405,4578,5260,4594,4156,4157,5453,3592,
3491,6507,5127,5512,4709,4922,5984,5701,4726,4289,6508,4015,6116,5128,4628,3424,
4241,5779,6299,4905,6509,6510,5454,5702,5780,6300,4365,4923,3971,6511,5161,3270,
3158,5985,4100, 867,5129,5703,6117,5363,3695,3301,5513,4467,6118,6512,5455,4232,
4242,4629,6513,3959,4478,6514,5514,5329,5986,4850,5162,5566,3846,4694,6119,5456,
4869,5781,3779,6301,5704,5987,5515,4710,6302,5882,6120,4392,5364,5705,6515,6121,
6516,6517,3736,5988,5457,5989,4695,2457,5883,4551,5782,6303,6304,6305,5130,4971,
6122,5163,6123,4870,3263,5365,3150,4871,6518,6306,5783,5069,5706,3513,3498,4409,
5330,5632,5366,5458,5459,3991,5990,4502,3324,5991,5784,3696,4518,5633,4119,6519,
4630,5634,4417,5707,4832,5992,3418,6124,5993,5567,4768,5218,6520,4595,3458,5367,
6125,5635,6126,4202,6521,4740,4924,6307,3981,4069,4385,6308,3883,2675,4051,3834,
4302,4483,5568,5994,4972,4101,5368,6309,5164,5884,3922,6127,6522,6523,5261,5460,
5187,4164,5219,3538,5516,4111,3524,5995,6310,6311,5369,3181,3386,2484,5188,3464,
5569,3627,5708,6524,5406,5165,4677,4492,6312,4872,4851,5885,4468,5996,6313,5709,
5710,6128,2470,5886,6314,5293,4882,5785,3325,5461,5101,6129,5711,5786,6525,4906,
6526,6527,4418,5887,5712,4808,2907,3701,5713,5888,6528,3765,5636,5331,6529,6530,
3593,5889,3637,4943,3692,5714,5787,4925,6315,6130,5462,4405,6131,6132,6316,5262,
6531,6532,5715,3859,5716,5070,4696,5102,3929,5788,3987,4792,5997,6533,6534,3920,
4809,5000,5998,6535,2974,5370,6317,5189,5263,5717,3826,6536,3953,5001,4883,3190,
5463,5890,4973,5999,4741,6133,6134,3607,5570,6000,4711,3362,3630,4552,5041,6318,
6001,2950,2953,5637,4646,5371,4944,6002,2044,4120,3429,6319,6537,5103,4833,6538,
6539,4884,4647,3884,6003,6004,4758,3835,5220,5789,4565,5407,6540,6135,5294,4697,
4852,6320,6321,3206,4907,6541,6322,4945,6542,6136,6543,6323,6005,4631,3519,6544,
5891,6545,5464,3784,5221,6546,5571,4659,6547,6324,6137,5190,6548,3853,6549,4016,
4834,3954,6138,5332,3827,4017,3210,3546,4469,5408,5718,3505,4648,5790,5131,5638,
5791,5465,4727,4318,6325,6326,5792,4553,4010,4698,3439,4974,3638,4335,3085,6006,
5104,5042,5166,5892,5572,6327,4356,4519,5222,5573,5333,5793,5043,6550,5639,5071,
4503,6328,6139,6551,6140,3914,3901,5372,6007,5640,4728,4793,3976,3836,4885,6552,
4127,6553,4451,4102,5002,6554,3686,5105,6555,5191,5072,5295,4611,5794,5296,6556,
5893,5264,5894,4975,5466,5265,4699,4976,4370,4056,3492,5044,4886,6557,5795,4432,
4769,4357,5467,3940,4660,4290,6141,4484,4770,4661,3992,6329,4025,4662,5022,4632,
4835,4070,5297,4663,4596,5574,5132,5409,5895,6142,4504,5192,4664,5796,5896,3885,
5575,5797,5023,4810,5798,3732,5223,4712,5298,4084,5334,5468,6143,4052,4053,4336,
4977,4794,6558,5335,4908,5576,5224,4233,5024,4128,5469,5225,4873,6008,5045,4729,
4742,4633,3675,4597,6559,5897,5133,5577,5003,5641,5719,6330,6560,3017,2382,3854,
4406,4811,6331,4393,3964,4946,6561,2420,3722,6562,4926,4378,3247,1736,4442,6332,
5134,6333,5226,3996,2918,5470,4319,4003,4598,4743,4744,4485,3785,3902,5167,5004,
5373,4394,5898,6144,4874,1793,3997,6334,4085,4214,5106,5642,4909,5799,6009,4419,
4189,3330,5899,4165,4420,5299,5720,5227,3347,6145,4081,6335,2876,3930,6146,3293,
3786,3910,3998,5900,5300,5578,2840,6563,5901,5579,6147,3531,5374,6564,6565,5580,
4759,5375,6566,6148,3559,5643,6336,6010,5517,6337,6338,5721,5902,3873,6011,6339,
6567,5518,3868,3649,5722,6568,4771,4947,6569,6149,4812,6570,2853,5471,6340,6341,
5644,4795,6342,6012,5723,6343,5724,6013,4349,6344,3160,6150,5193,4599,4514,4493,
5168,4320,6345,4927,3666,4745,5169,5903,5005,4928,6346,5725,6014,4730,4203,5046,
4948,3395,5170,6015,4150,6016,5726,5519,6347,5047,3550,6151,6348,4197,4310,5904,
6571,5581,2965,6152,4978,3960,4291,5135,6572,5301,5727,4129,4026,5905,4853,5728,
5472,6153,6349,4533,2700,4505,5336,4678,3583,5073,2994,4486,3043,4554,5520,6350,
6017,5800,4487,6351,3931,4103,5376,6352,4011,4321,4311,4190,5136,6018,3988,3233,
4350,5906,5645,4198,6573,5107,3432,4191,3435,5582,6574,4139,5410,6353,5411,3944,
5583,5074,3198,6575,6354,4358,6576,5302,4600,5584,5194,5412,6577,6578,5585,5413,
5303,4248,5414,3879,4433,6579,4479,5025,4854,5415,6355,4760,4772,3683,2978,4700,
3797,4452,3965,3932,3721,4910,5801,6580,5195,3551,5907,3221,3471,3029,6019,3999,
5908,5909,5266,5267,3444,3023,3828,3170,4796,5646,4979,4259,6356,5647,5337,3694,
6357,5648,5338,4520,4322,5802,3031,3759,4071,6020,5586,4836,4386,5048,6581,3571,
4679,4174,4949,6154,4813,3787,3402,3822,3958,3215,3552,5268,4387,3933,4950,4359,
6021,5910,5075,3579,6358,4234,4566,5521,6359,3613,5049,6022,5911,3375,3702,3178,
4911,5339,4521,6582,6583,4395,3087,3811,5377,6023,6360,6155,4027,5171,5649,4421,
4249,2804,6584,2270,6585,4000,4235,3045,6156,5137,5729,4140,4312,3886,6361,4330,
6157,4215,6158,3500,3676,4929,4331,3713,4930,5912,4265,3776,3368,5587,4470,4855,
3038,4980,3631,6159,6160,4132,4680,6161,6362,3923,4379,5588,4255,6586,4121,6587,
6363,4649,6364,3288,4773,4774,6162,6024,6365,3543,6588,4274,3107,3737,5050,5803,
4797,4522,5589,5051,5730,3714,4887,5378,4001,4523,6163,5026,5522,4701,4175,2791,
3760,6589,5473,4224,4133,3847,4814,4815,4775,3259,5416,6590,2738,6164,6025,5304,
3733,5076,5650,4816,5590,6591,6165,6592,3934,5269,6593,3396,5340,6594,5804,3445,
3602,4042,4488,5731,5732,3525,5591,4601,5196,6166,6026,5172,3642,4612,3202,4506,
4798,6366,3818,5108,4303,5138,5139,4776,3332,4304,2915,3415,4434,5077,5109,4856,
2879,5305,4817,6595,5913,3104,3144,3903,4634,5341,3133,5110,5651,5805,6167,4057,
5592,2945,4371,5593,6596,3474,4182,6367,6597,6168,4507,4279,6598,2822,6599,4777,
4713,5594,3829,6169,3887,5417,6170,3653,5474,6368,4216,2971,5228,3790,4579,6369,
5733,6600,6601,4951,4746,4555,6602,5418,5475,6027,3400,4665,5806,6171,4799,6028,
5052,6172,3343,4800,4747,5006,6370,4556,4217,5476,4396,5229,5379,5477,3839,5914,
5652,5807,4714,3068,4635,5808,6173,5342,4192,5078,5419,5523,5734,6174,4557,6175,
4602,6371,6176,6603,5809,6372,5735,4260,3869,5111,5230,6029,5112,6177,3126,4681,
5524,5915,2706,3563,4748,3130,6178,4018,5525,6604,6605,5478,4012,4837,6606,4534,
4193,5810,4857,3615,5479,6030,4082,3697,3539,4086,5270,3662,4508,4931,5916,4912,
5811,5027,3888,6607,4397,3527,3302,3798,2775,2921,2637,3966,4122,4388,4028,4054,
1633,4858,5079,3024,5007,3982,3412,5736,6608,3426,3236,5595,3030,6179,3427,3336,
3279,3110,6373,3874,3039,5080,5917,5140,4489,3119,6374,5812,3405,4494,6031,4666,
4141,6180,4166,6032,5813,4981,6609,5081,4422,4982,4112,3915,5653,3296,3983,6375,
4266,4410,5654,6610,6181,3436,5082,6611,5380,6033,3819,5596,4535,5231,5306,5113,
6612,4952,5918,4275,3113,6613,6376,6182,6183,5814,3073,4731,4838,5008,3831,6614,
4888,3090,3848,4280,5526,5232,3014,5655,5009,5737,5420,5527,6615,5815,5343,5173,
5381,4818,6616,3151,4953,6617,5738,2796,3204,4360,2989,4281,5739,5174,5421,5197,
3132,5141,3849,5142,5528,5083,3799,3904,4839,5480,2880,4495,3448,6377,6184,5271,
5919,3771,3193,6034,6035,5920,5010,6036,5597,6037,6378,6038,3106,5422,6618,5423,
5424,4142,6619,4889,5084,4890,4313,5740,6620,3437,5175,5307,5816,4199,5198,5529,
5817,5199,5656,4913,5028,5344,3850,6185,2955,5272,5011,5818,4567,4580,5029,5921,
3616,5233,6621,6622,6186,4176,6039,6379,6380,3352,5200,5273,2908,5598,5234,3837,
5308,6623,6624,5819,4496,4323,5309,5201,6625,6626,4983,3194,3838,4167,5530,5922,
5274,6381,6382,3860,3861,5599,3333,4292,4509,6383,3553,5481,5820,5531,4778,6187,
3955,3956,4324,4389,4218,3945,4325,3397,2681,5923,4779,5085,4019,5482,4891,5382,
5383,6040,4682,3425,5275,4094,6627,5310,3015,5483,5657,4398,5924,3168,4819,6628,
5925,6629,5532,4932,4613,6041,6630,4636,6384,4780,4204,5658,4423,5821,3989,4683,
5822,6385,4954,6631,5345,6188,5425,5012,5384,3894,6386,4490,4104,6632,5741,5053,
6633,5823,5926,5659,5660,5927,6634,5235,5742,5824,4840,4933,4820,6387,4859,5928,
4955,6388,4143,3584,5825,5346,5013,6635,5661,6389,5014,5484,5743,4337,5176,5662,
6390,2836,6391,3268,6392,6636,6042,5236,6637,4158,6638,5744,5663,4471,5347,3663,
4123,5143,4293,3895,6639,6640,5311,5929,5826,3800,6189,6393,6190,5664,5348,3554,
3594,4749,4603,6641,5385,4801,6043,5827,4183,6642,5312,5426,4761,6394,5665,6191,
4715,2669,6643,6644,5533,3185,5427,5086,5930,5931,5386,6192,6044,6645,4781,4013,
5745,4282,4435,5534,4390,4267,6045,5746,4984,6046,2743,6193,3501,4087,5485,5932,
5428,4184,4095,5747,4061,5054,3058,3862,5933,5600,6646,5144,3618,6395,3131,5055,
5313,6396,4650,4956,3855,6194,3896,5202,4985,4029,4225,6195,6647,5828,5486,5829,
3589,3002,6648,6397,4782,5276,6649,6196,6650,4105,3803,4043,5237,5830,6398,4096,
3643,6399,3528,6651,4453,3315,4637,6652,3984,6197,5535,3182,3339,6653,3096,2660,
6400,6654,3449,5934,4250,4236,6047,6401,5831,6655,5487,3753,4062,5832,6198,6199,
6656,3766,6657,3403,4667,6048,6658,4338,2897,5833,3880,2797,3780,4326,6659,5748,
5015,6660,5387,4351,5601,4411,6661,3654,4424,5935,4339,4072,5277,4568,5536,6402,
6662,5238,6663,5349,5203,6200,5204,6201,5145,4536,5016,5056,4762,5834,4399,4957,
6202,6403,5666,5749,6664,4340,6665,5936,5177,5667,6666,6667,3459,4668,6404,6668,
6669,4543,6203,6670,4276,6405,4480,5537,6671,4614,5205,5668,6672,3348,2193,4763,
6406,6204,5937,5602,4177,5669,3419,6673,4020,6205,4443,4569,5388,3715,3639,6407,
6049,4058,6206,6674,5938,4544,6050,4185,4294,4841,4651,4615,5488,6207,6408,6051,
5178,3241,3509,5835,6208,4958,5836,4341,5489,5278,6209,2823,5538,5350,5206,5429,
6675,4638,4875,4073,3516,4684,4914,4860,5939,5603,5389,6052,5057,3237,5490,3791,
6676,6409,6677,4821,4915,4106,5351,5058,4243,5539,4244,5604,4842,4916,5239,3028,
3716,5837,5114,5605,5390,5940,5430,6210,4332,6678,5540,4732,3667,3840,6053,4305,
3408,5670,5541,6410,2744,5240,5750,6679,3234,5606,6680,5607,5671,3608,4283,4159,
4400,5352,4783,6681,6411,6682,4491,4802,6211,6412,5941,6413,6414,5542,5751,6683,
4669,3734,5942,6684,6415,5943,5059,3328,4670,4144,4268,6685,6686,6687,6688,4372,
3603,6689,5944,5491,4373,3440,6416,5543,4784,4822,5608,3792,4616,5838,5672,3514,
5391,6417,4892,6690,4639,6691,6054,5673,5839,6055,6692,6056,5392,6212,4038,5544,
5674,4497,6057,6693,5840,4284,5675,4021,4545,5609,6418,4454,6419,6213,4113,4472,
5314,3738,5087,5279,4074,5610,4959,4063,3179,4750,6058,6420,6214,3476,4498,4716,
5431,4960,4685,6215,5241,6694,6421,6216,6695,5841,5945,6422,3748,5946,5179,3905,
5752,5545,5947,4374,6217,4455,6423,4412,6218,4803,5353,6696,3832,5280,6219,4327,
4702,6220,6221,6059,4652,5432,6424,3749,4751,6425,5753,4986,5393,4917,5948,5030,
5754,4861,4733,6426,4703,6697,6222,4671,5949,4546,4961,5180,6223,5031,3316,5281,
6698,4862,4295,4934,5207,3644,6427,5842,5950,6428,6429,4570,5843,5282,6430,6224,
5088,3239,6060,6699,5844,5755,6061,6431,2701,5546,6432,5115,5676,4039,3993,3327,
4752,4425,5315,6433,3941,6434,5677,4617,4604,3074,4581,6225,5433,6435,6226,6062,
4823,5756,5116,6227,3717,5678,4717,5845,6436,5679,5846,6063,5847,6064,3977,3354,
6437,3863,5117,6228,5547,5394,4499,4524,6229,4605,6230,4306,4500,6700,5951,6065,
3693,5952,5089,4366,4918,6701,6231,5548,6232,6702,6438,4704,5434,6703,6704,5953,
4168,6705,5680,3420,6706,5242,4407,6066,3812,5757,5090,5954,4672,4525,3481,5681,
4618,5395,5354,5316,5955,6439,4962,6707,4526,6440,3465,4673,6067,6441,5682,6708,
5435,5492,5758,5683,4619,4571,4674,4804,4893,4686,5493,4753,6233,6068,4269,6442,
6234,5032,4705,5146,5243,5208,5848,6235,6443,4963,5033,4640,4226,6236,5849,3387,
6444,6445,4436,4437,5850,4843,5494,4785,4894,6709,4361,6710,5091,5956,3331,6237,
4987,5549,6069,6711,4342,3517,4473,5317,6070,6712,6071,4706,6446,5017,5355,6713,
6714,4988,5436,6447,4734,5759,6715,4735,4547,4456,4754,6448,5851,6449,6450,3547,
5852,5318,6451,6452,5092,4205,6716,6238,4620,4219,5611,6239,6072,4481,5760,5957,
5958,4059,6240,6453,4227,4537,6241,5761,4030,4186,5244,5209,3761,4457,4876,3337,
5495,5181,6242,5959,5319,5612,5684,5853,3493,5854,6073,4169,5613,5147,4895,6074,
5210,6717,5182,6718,3830,6243,2798,3841,6075,6244,5855,5614,3604,4606,5496,5685,
5118,5356,6719,6454,5960,5357,5961,6720,4145,3935,4621,5119,5962,4261,6721,6455,
4786,5963,4375,4582,6245,6246,6247,6076,5437,4877,5856,3376,4380,6248,4160,6722,
5148,6456,5211,6457,6723,4718,6458,6724,6249,5358,4044,3297,6459,6250,5857,5615,
5497,5245,6460,5498,6725,6251,6252,5550,3793,5499,2959,5396,6461,6462,4572,5093,
5500,5964,3806,4146,6463,4426,5762,5858,6077,6253,4755,3967,4220,5965,6254,4989,
5501,6464,4352,6726,6078,4764,2290,5246,3906,5438,5283,3767,4964,2861,5763,5094,
6255,6256,4622,5616,5859,5860,4707,6727,4285,4708,4824,5617,6257,5551,4787,5212,
4965,4935,4687,6465,6728,6466,5686,6079,3494,4413,2995,5247,5966,5618,6729,5967,
5764,5765,5687,5502,6730,6731,6080,5397,6467,4990,6258,6732,4538,5060,5619,6733,
4719,5688,5439,5018,5149,5284,5503,6734,6081,4607,6259,5120,3645,5861,4583,6260,
4584,4675,5620,4098,5440,6261,4863,2379,3306,4585,5552,5689,4586,5285,6735,4864,
6736,5286,6082,6737,4623,3010,4788,4381,4558,5621,4587,4896,3698,3161,5248,4353,
4045,6262,3754,5183,4588,6738,6263,6739,6740,5622,3936,6741,6468,6742,6264,5095,
6469,4991,5968,6743,4992,6744,6083,4897,6745,4256,5766,4307,3108,3968,4444,5287,
3889,4343,6084,4510,6085,4559,6086,4898,5969,6746,5623,5061,4919,5249,5250,5504,
5441,6265,5320,4878,3242,5862,5251,3428,6087,6747,4237,5624,5442,6266,5553,4539,
6748,2585,3533,5398,4262,6088,5150,4736,4438,6089,6267,5505,4966,6749,6268,6750,
6269,5288,5554,3650,6090,6091,4624,6092,5690,6751,5863,4270,5691,4277,5555,5864,
6752,5692,4720,4865,6470,5151,4688,4825,6753,3094,6754,6471,3235,4653,6755,5213,
5399,6756,3201,4589,5865,4967,6472,5866,6473,5019,3016,6757,5321,4756,3957,4573,
6093,4993,5767,4721,6474,6758,5625,6759,4458,6475,6270,6760,5556,4994,5214,5252,
6271,3875,5768,6094,5034,5506,4376,5769,6761,2120,6476,5253,5770,6762,5771,5970,
3990,5971,5557,5558,5772,6477,6095,2787,4641,5972,5121,6096,6097,6272,6763,3703,
5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978,
4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767)
# flake8: noqa
@@ -0,0 +1,41 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import GB2312DistributionAnalysis
from .mbcssm import GB2312SMModel
class GB2312Prober(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(GB2312SMModel)
self._mDistributionAnalyzer = GB2312DistributionAnalysis()
self.reset()
def get_charset_name(self):
return "GB2312"
@@ -0,0 +1,283 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Shy Shalom
# Portions created by the Initial Developer are Copyright (C) 2005
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
from .constants import eNotMe, eDetecting
from .compat import wrap_ord
# This prober doesn't actually recognize a language or a charset.
# It is a helper prober for the use of the Hebrew model probers
### General ideas of the Hebrew charset recognition ###
#
# Four main charsets exist in Hebrew:
# "ISO-8859-8" - Visual Hebrew
# "windows-1255" - Logical Hebrew
# "ISO-8859-8-I" - Logical Hebrew
# "x-mac-hebrew" - ?? Logical Hebrew ??
#
# Both "ISO" charsets use a completely identical set of code points, whereas
# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
# these code points. windows-1255 defines additional characters in the range
# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
# x-mac-hebrew defines similar additional code points but with a different
# mapping.
#
# As far as an average Hebrew text with no diacritics is concerned, all four
# charsets are identical with respect to code points. Meaning that for the
# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
# (including final letters).
#
# The dominant difference between these charsets is their directionality.
# "Visual" directionality means that the text is ordered as if the renderer is
# not aware of a BIDI rendering algorithm. The renderer sees the text and
# draws it from left to right. The text itself when ordered naturally is read
# backwards. A buffer of Visual Hebrew generally looks like so:
# "[last word of first line spelled backwards] [whole line ordered backwards
# and spelled backwards] [first word of first line spelled backwards]
# [end of line] [last word of second line] ... etc' "
# adding punctuation marks, numbers and English text to visual text is
# naturally also "visual" and from left to right.
#
# "Logical" directionality means the text is ordered "naturally" according to
# the order it is read. It is the responsibility of the renderer to display
# the text from right to left. A BIDI algorithm is used to place general
# punctuation marks, numbers and English text in the text.
#
# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
# what little evidence I could find, it seems that its general directionality
# is Logical.
#
# To sum up all of the above, the Hebrew probing mechanism knows about two
# charsets:
# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
# backwards while line order is natural. For charset recognition purposes
# the line order is unimportant (In fact, for this implementation, even
# word order is unimportant).
# Logical Hebrew - "windows-1255" - normal, naturally ordered text.
#
# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
# specifically identified.
# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
# that contain special punctuation marks or diacritics is displayed with
# some unconverted characters showing as question marks. This problem might
# be corrected using another model prober for x-mac-hebrew. Due to the fact
# that x-mac-hebrew texts are so rare, writing another model prober isn't
# worth the effort and performance hit.
#
#### The Prober ####
#
# The prober is divided between two SBCharSetProbers and a HebrewProber,
# all of which are managed, created, fed data, inquired and deleted by the
# SBCSGroupProber. The two SBCharSetProbers identify that the text is in
# fact some kind of Hebrew, Logical or Visual. The final decision about which
# one is it is made by the HebrewProber by combining final-letter scores
# with the scores of the two SBCharSetProbers to produce a final answer.
#
# The SBCSGroupProber is responsible for stripping the original text of HTML
# tags, English characters, numbers, low-ASCII punctuation characters, spaces
# and new lines. It reduces any sequence of such characters to a single space.
# The buffer fed to each prober in the SBCS group prober is pure text in
# high-ASCII.
# The two SBCharSetProbers (model probers) share the same language model:
# Win1255Model.
# The first SBCharSetProber uses the model normally as any other
# SBCharSetProber does, to recognize windows-1255, upon which this model was
# built. The second SBCharSetProber is told to make the pair-of-letter
# lookup in the language model backwards. This in practice exactly simulates
# a visual Hebrew model using the windows-1255 logical Hebrew model.
#
# The HebrewProber is not using any language model. All it does is look for
# final-letter evidence suggesting the text is either logical Hebrew or visual
# Hebrew. Disjointed from the model probers, the results of the HebrewProber
# alone are meaningless. HebrewProber always returns 0.00 as confidence
# since it never identifies a charset by itself. Instead, the pointer to the
# HebrewProber is passed to the model probers as a helper "Name Prober".
# When the Group prober receives a positive identification from any prober,
# it asks for the name of the charset identified. If the prober queried is a
# Hebrew model prober, the model prober forwards the call to the
# HebrewProber to make the final decision. In the HebrewProber, the
# decision is made according to the final-letters scores maintained and Both
# model probers scores. The answer is returned in the form of the name of the
# charset identified, either "windows-1255" or "ISO-8859-8".
# windows-1255 / ISO-8859-8 code points of interest
FINAL_KAF = 0xea
NORMAL_KAF = 0xeb
FINAL_MEM = 0xed
NORMAL_MEM = 0xee
FINAL_NUN = 0xef
NORMAL_NUN = 0xf0
FINAL_PE = 0xf3
NORMAL_PE = 0xf4
FINAL_TSADI = 0xf5
NORMAL_TSADI = 0xf6
# Minimum Visual vs Logical final letter score difference.
# If the difference is below this, don't rely solely on the final letter score
# distance.
MIN_FINAL_CHAR_DISTANCE = 5
# Minimum Visual vs Logical model score difference.
# If the difference is below this, don't rely at all on the model score
# distance.
MIN_MODEL_DISTANCE = 0.01
VISUAL_HEBREW_NAME = "ISO-8859-8"
LOGICAL_HEBREW_NAME = "windows-1255"
class HebrewProber(CharSetProber):
def __init__(self):
CharSetProber.__init__(self)
self._mLogicalProber = None
self._mVisualProber = None
self.reset()
def reset(self):
self._mFinalCharLogicalScore = 0
self._mFinalCharVisualScore = 0
# The two last characters seen in the previous buffer,
# mPrev and mBeforePrev are initialized to space in order to simulate
# a word delimiter at the beginning of the data
self._mPrev = ' '
self._mBeforePrev = ' '
# These probers are owned by the group prober.
def set_model_probers(self, logicalProber, visualProber):
self._mLogicalProber = logicalProber
self._mVisualProber = visualProber
def is_final(self, c):
return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,
FINAL_TSADI]
def is_non_final(self, c):
# The normal Tsadi is not a good Non-Final letter due to words like
# 'lechotet' (to chat) containing an apostrophe after the tsadi. This
# apostrophe is converted to a space in FilterWithoutEnglishLetters
# causing the Non-Final tsadi to appear at an end of a word even
# though this is not the case in the original text.
# The letters Pe and Kaf rarely display a related behavior of not being
# a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
# for example legally end with a Non-Final Pe or Kaf. However, the
# benefit of these letters as Non-Final letters outweighs the damage
# since these words are quite rare.
return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
def feed(self, aBuf):
# Final letter analysis for logical-visual decision.
# Look for evidence that the received buffer is either logical Hebrew
# or visual Hebrew.
# The following cases are checked:
# 1) A word longer than 1 letter, ending with a final letter. This is
# an indication that the text is laid out "naturally" since the
# final letter really appears at the end. +1 for logical score.
# 2) A word longer than 1 letter, ending with a Non-Final letter. In
# normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
# should not end with the Non-Final form of that letter. Exceptions
# to this rule are mentioned above in isNonFinal(). This is an
# indication that the text is laid out backwards. +1 for visual
# score
# 3) A word longer than 1 letter, starting with a final letter. Final
# letters should not appear at the beginning of a word. This is an
# indication that the text is laid out backwards. +1 for visual
# score.
#
# The visual score and logical score are accumulated throughout the
# text and are finally checked against each other in GetCharSetName().
# No checking for final letters in the middle of words is done since
# that case is not an indication for either Logical or Visual text.
#
# We automatically filter out all 7-bit characters (replace them with
# spaces) so the word boundary detection works properly. [MAP]
if self.get_state() == eNotMe:
# Both model probers say it's not them. No reason to continue.
return eNotMe
aBuf = self.filter_high_bit_only(aBuf)
for cur in aBuf:
if cur == ' ':
# We stand on a space - a word just ended
if self._mBeforePrev != ' ':
# next-to-last char was not a space so self._mPrev is not a
# 1 letter word
if self.is_final(self._mPrev):
# case (1) [-2:not space][-1:final letter][cur:space]
self._mFinalCharLogicalScore += 1
elif self.is_non_final(self._mPrev):
# case (2) [-2:not space][-1:Non-Final letter][
# cur:space]
self._mFinalCharVisualScore += 1
else:
# Not standing on a space
if ((self._mBeforePrev == ' ') and
(self.is_final(self._mPrev)) and (cur != ' ')):
# case (3) [-2:space][-1:final letter][cur:not space]
self._mFinalCharVisualScore += 1
self._mBeforePrev = self._mPrev
self._mPrev = cur
# Forever detecting, till the end or until both model probers return
# eNotMe (handled above)
return eDetecting
def get_charset_name(self):
# Make the decision: is it Logical or Visual?
# If the final letter score distance is dominant enough, rely on it.
finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore
if finalsub >= MIN_FINAL_CHAR_DISTANCE:
return LOGICAL_HEBREW_NAME
if finalsub <= -MIN_FINAL_CHAR_DISTANCE:
return VISUAL_HEBREW_NAME
# It's not dominant enough, try to rely on the model scores instead.
modelsub = (self._mLogicalProber.get_confidence()
- self._mVisualProber.get_confidence())
if modelsub > MIN_MODEL_DISTANCE:
return LOGICAL_HEBREW_NAME
if modelsub < -MIN_MODEL_DISTANCE:
return VISUAL_HEBREW_NAME
# Still no good, back to final letter distance, maybe it'll save the
# day.
if finalsub < 0.0:
return VISUAL_HEBREW_NAME
# (finalsub > 0 - Logical) or (don't know what to do) default to
# Logical.
return LOGICAL_HEBREW_NAME
def get_state(self):
# Remain active as long as any of the model probers are active.
if (self._mLogicalProber.get_state() == eNotMe) and \
(self._mVisualProber.get_state() == eNotMe):
return eNotMe
return eDetecting
@@ -0,0 +1,569 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# Sampling from about 20M text materials include literature and computer technology
#
# Japanese frequency table, applied to both S-JIS and EUC-JP
# They are sorted in order.
# 128 --> 0.77094
# 256 --> 0.85710
# 512 --> 0.92635
# 1024 --> 0.97130
# 2048 --> 0.99431
#
# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
#
# Typical Distribution Ratio, 25% of IDR
JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
# Char to FreqOrder table ,
JIS_TABLE_SIZE = 4368
JISCharToFreqOrder = (
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48
2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, # 64
2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, # 80
5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, # 96
1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, # 112
5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, # 128
5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, # 144
5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, # 160
5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, # 176
5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, # 192
5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, # 208
1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, # 224
1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, # 240
1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, # 256
2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, # 272
3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, # 288
3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, # 304
4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, # 320
12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, # 336
1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, # 352
109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, # 368
5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, # 384
271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, # 400
32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, # 416
43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, # 432
280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, # 448
54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, # 464
5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, # 480
5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, # 496
5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, # 512
4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, # 528
5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, # 544
5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, # 560
5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, # 576
5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, # 592
5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, # 608
5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, # 624
5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, # 640
5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, # 656
5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, # 672
3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, # 688
5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, # 704
5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, # 720
5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, # 736
5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, # 752
5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, # 768
5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, # 784
5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, # 800
5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, # 816
5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, # 832
5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, # 848
5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, # 864
5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, # 880
5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, # 896
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, # 912
5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, # 928
5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, # 944
5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, # 960
5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, # 976
5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, # 992
5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, # 1008
5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, # 1024
5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, # 1040
5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, # 1056
5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, # 1072
5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, # 1088
5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, # 1104
5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, # 1120
5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, # 1136
5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, # 1152
5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, # 1168
5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, # 1184
5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, # 1200
5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, # 1216
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, # 1232
5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, # 1248
5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, # 1264
5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, # 1280
5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, # 1296
6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, # 1312
6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, # 1328
6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, # 1344
6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, # 1360
6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, # 1376
6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, # 1392
6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, # 1408
6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, # 1424
4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, # 1440
854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, # 1456
665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, # 1472
1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, # 1488
1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, # 1504
896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, # 1520
3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, # 1536
3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, # 1552
804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, # 1568
3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, # 1584
3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, # 1600
586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, # 1616
2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, # 1632
277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, # 1648
3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, # 1664
1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, # 1680
380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, # 1696
1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, # 1712
850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, # 1728
2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, # 1744
2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, # 1760
2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, # 1776
2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, # 1792
1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, # 1808
1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, # 1824
1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, # 1840
1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, # 1856
2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, # 1872
1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, # 1888
2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, # 1904
1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, # 1920
1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, # 1936
1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, # 1952
1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, # 1968
1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, # 1984
1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, # 2000
606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, # 2016
684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, # 2032
1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, # 2048
2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, # 2064
2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, # 2080
2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, # 2096
3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, # 2112
3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, # 2128
884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, # 2144
3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, # 2160
1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, # 2176
861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, # 2192
2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, # 2208
1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, # 2224
576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, # 2240
3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, # 2256
4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, # 2272
2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, # 2288
1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, # 2304
2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, # 2320
1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, # 2336
385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, # 2352
178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, # 2368
1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, # 2384
2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, # 2400
2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, # 2416
2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, # 2432
3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, # 2448
1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, # 2464
2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, # 2480
359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, # 2496
837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, # 2512
855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, # 2528
1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, # 2544
2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, # 2560
633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, # 2576
1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, # 2592
1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, # 2608
353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, # 2624
1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, # 2640
1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, # 2656
1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, # 2672
764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, # 2688
2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, # 2704
278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, # 2720
2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, # 2736
3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, # 2752
2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, # 2768
1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, # 2784
6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, # 2800
1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, # 2816
2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, # 2832
1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, # 2848
470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, # 2864
72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, # 2880
3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, # 2896
3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, # 2912
1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, # 2928
1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, # 2944
1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, # 2960
1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, # 2976
123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, # 2992
913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, # 3008
2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, # 3024
900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, # 3040
3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, # 3056
2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, # 3072
423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, # 3088
1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, # 3104
2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, # 3120
220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, # 3136
1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, # 3152
745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, # 3168
4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, # 3184
2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, # 3200
1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, # 3216
666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, # 3232
1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, # 3248
2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, # 3264
376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, # 3280
6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, # 3296
1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, # 3312
1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, # 3328
2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, # 3344
3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, # 3360
914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, # 3376
3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, # 3392
1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, # 3408
674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, # 3424
1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, # 3440
199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, # 3456
3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, # 3472
370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, # 3488
2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, # 3504
414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, # 3520
4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, # 3536
2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, # 3552
1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, # 3568
1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, # 3584
1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, # 3600
166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, # 3616
1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, # 3632
3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, # 3648
1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, # 3664
3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, # 3680
264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, # 3696
543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, # 3712
983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, # 3728
2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, # 3744
1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, # 3760
867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, # 3776
1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, # 3792
894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, # 3808
1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, # 3824
530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, # 3840
839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, # 3856
480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, # 3872
1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, # 3888
1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, # 3904
2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, # 3920
4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, # 3936
227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, # 3952
1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, # 3968
328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, # 3984
1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, # 4000
3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, # 4016
1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, # 4032
2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, # 4048
2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, # 4064
1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, # 4080
1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, # 4096
2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, # 4112
455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, # 4128
2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, # 4144
1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, # 4160
1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, # 4176
1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, # 4192
1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, # 4208
3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, # 4224
2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, # 4240
2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, # 4256
575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, # 4272
3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, # 4288
3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, # 4304
1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, # 4320
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512
#Everything below is of no interest for detection purpose
2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, # 4384
6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, # 4400
6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, # 4416
6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, # 4432
6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, # 4448
4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, # 4464
4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, # 4480
3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, # 4496
3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, # 4512
4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, # 4528
3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, # 4544
6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, # 4560
4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, # 4576
6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, # 4592
6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, # 4608
6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, # 4624
6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, # 4640
6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, # 4656
6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, # 4672
3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, # 4688
3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, # 4704
6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, # 4720
2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, # 4736
4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, # 4752
4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, # 4768
4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, # 4784
6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, # 4800
3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, # 4816
4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, # 4832
4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, # 4848
6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, # 4864
4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, # 4880
6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, # 4896
3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, # 4912
2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, # 4928
4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, # 4944
2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, # 4960
6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, # 4976
4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, # 4992
6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, # 5008
6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, # 5024
6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, # 5040
4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, # 5056
6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, # 5072
2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, # 5088
6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, # 5104
4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, # 5120
6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, # 5136
4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, # 5152
4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, # 5168
6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, # 5184
6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, # 5200
6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, # 5216
3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, # 5232
1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, # 5248
3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, # 5264
3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, # 5280
4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, # 5296
6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, # 5312
3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, # 5328
6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, # 5344
3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, # 5360
3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, # 5376
2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, # 5392
6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, # 5408
6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, # 5424
3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, # 5440
6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, # 5456
3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, # 5472
6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, # 5488
6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, # 5504
6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, # 5520
4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, # 5536
6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, # 5552
4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, # 5568
3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, # 5584
3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, # 5600
6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, # 5616
6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, # 5632
4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, # 5648
6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, # 5664
6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, # 5680
6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, # 5696
6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, # 5712
6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, # 5728
6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, # 5744
4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, # 5760
4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, # 5776
3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, # 5792
6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, # 5808
4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, # 5824
2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, # 5840
6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, # 5856
6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, # 5872
4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, # 5888
2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, # 5904
4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, # 5920
2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, # 5936
4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, # 5952
4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, # 5968
4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, # 5984
6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, # 6000
3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, # 6016
6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, # 6032
3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, # 6048
6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, # 6064
2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, # 6080
3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, # 6096
7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, # 6112
2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, # 6128
3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, # 6144
3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, # 6160
3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, # 6176
3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, # 6192
7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, # 6208
7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, # 6224
7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, # 6240
7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, # 6256
7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, # 6272
4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, # 6288
3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, # 6304
3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, # 6320
4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, # 6336
3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, # 6352
3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, # 6368
7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, # 6384
4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, # 6400
7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, # 6416
7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, # 6432
7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, # 6448
7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, # 6464
7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, # 6480
4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, # 6496
4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, # 6512
7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, # 6528
3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, # 6544
4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, # 6560
7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, # 6576
7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, # 6592
4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, # 6608
3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, # 6624
3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, # 6640
7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, # 6656
4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, # 6672
4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, # 6688
4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, # 6704
4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, # 6720
4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, # 6736
4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, # 6752
7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, # 6768
7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, # 6784
7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, # 6800
7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, # 6816
7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, # 6832
2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, # 6848
3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, # 6864
7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, # 6880
7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, # 6896
3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, # 6912
4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, # 6928
3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, # 6944
3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, # 6960
2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, # 6976
7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, # 6992
7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, # 7008
4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, # 7024
3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, # 7040
3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, # 7056
7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, # 7072
7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, # 7088
7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, # 7104
4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, # 7120
7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, # 7136
2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, # 7152
3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, # 7168
4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, # 7184
7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, # 7200
4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, # 7216
4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, # 7232
7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, # 7248
7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, # 7264
5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, # 7280
7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, # 7296
7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, # 7312
7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, # 7328
7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, # 7344
7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, # 7360
5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, # 7376
5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, # 7392
7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, # 7408
3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, # 7424
7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, # 7440
7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, # 7456
3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, # 7472
7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, # 7488
7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, # 7504
1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, # 7520
3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, # 7536
4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, # 7552
2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, # 7568
3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, # 7584
2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, # 7600
5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, # 7616
4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, # 7632
4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, # 7648
5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, # 7664
7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, # 7680
7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, # 7696
7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, # 7712
7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, # 7728
3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, # 7744
7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, # 7760
3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, # 7776
7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, # 7792
4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, # 7808
7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, # 7824
7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7840
7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, # 7856
7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, # 7872
7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, # 7888
7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, # 7904
7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, # 7920
7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, # 7936
7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, # 7952
7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, # 7968
7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, # 7984
7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, # 8000
8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, # 8016
8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, # 8032
8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, # 8048
8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, # 8064
8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, # 8080
8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, # 8096
8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, # 8112
8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, # 8128
8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, # 8144
8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, # 8160
8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, # 8176
8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, # 8192
8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, # 8208
8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, # 8224
8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272
# flake8: noqa
+219
View File
@@ -0,0 +1,219 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .compat import wrap_ord
NUM_OF_CATEGORY = 6
DONT_KNOW = -1
ENOUGH_REL_THRESHOLD = 100
MAX_REL_THRESHOLD = 1000
MINIMUM_DATA_THRESHOLD = 4
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
jp2CharContext = (
(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
)
class JapaneseContextAnalysis:
def __init__(self):
self.reset()
def reset(self):
self._mTotalRel = 0 # total sequence received
# category counters, each interger counts sequence in its category
self._mRelSample = [0] * NUM_OF_CATEGORY
# if last byte in current buffer is not the last byte of a character,
# we need to know how many bytes to skip in next buffer
self._mNeedToSkipCharNum = 0
self._mLastCharOrder = -1 # The order of previous char
# If this flag is set to True, detection is done and conclusion has
# been made
self._mDone = False
def feed(self, aBuf, aLen):
if self._mDone:
return
# The buffer we got is byte oriented, and a character may span in more than one
# buffers. In case the last one or two byte in last buffer is not
# complete, we record how many byte needed to complete that character
# and skip these bytes here. We can choose to record those bytes as
# well and analyse the character once it is complete, but since a
# character will not make much difference, by simply skipping
# this character will simply our logic and improve performance.
i = self._mNeedToSkipCharNum
while i < aLen:
order, charLen = self.get_order(aBuf[i:i + 2])
i += charLen
if i > aLen:
self._mNeedToSkipCharNum = i - aLen
self._mLastCharOrder = -1
else:
if (order != -1) and (self._mLastCharOrder != -1):
self._mTotalRel += 1
if self._mTotalRel > MAX_REL_THRESHOLD:
self._mDone = True
break
self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1
self._mLastCharOrder = order
def got_enough_data(self):
return self._mTotalRel > ENOUGH_REL_THRESHOLD
def get_confidence(self):
# This is just one way to calculate confidence. It works well for me.
if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel
else:
return DONT_KNOW
def get_order(self, aBuf):
return -1, 1
class SJISContextAnalysis(JapaneseContextAnalysis):
def get_order(self, aBuf):
if not aBuf:
return -1, 1
# find out current char's byte length
first_char = wrap_ord(aBuf[0])
if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
charLen = 2
else:
charLen = 1
# return its order if it is hiragana
if len(aBuf) > 1:
second_char = wrap_ord(aBuf[1])
if (first_char == 202) and (0x9F <= second_char <= 0xF1):
return second_char - 0x9F, charLen
return -1, charLen
class EUCJPContextAnalysis(JapaneseContextAnalysis):
def get_order(self, aBuf):
if not aBuf:
return -1, 1
# find out current char's byte length
first_char = wrap_ord(aBuf[0])
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
charLen = 2
elif first_char == 0x8F:
charLen = 3
else:
charLen = 1
# return its order if it is hiragana
if len(aBuf) > 1:
second_char = wrap_ord(aBuf[1])
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
return second_char - 0xA1, charLen
return -1, charLen
# flake8: noqa
@@ -0,0 +1,229 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# 255: Control characters that usually does not exist in any text
# 254: Carriage/Return
# 253: symbol (punctuation) that does not belong to word
# 252: 0 - 9
# Character Mapping Table:
# this table is modified base on win1251BulgarianCharToOrderMap, so
# only number <64 is sure valid
Latin5_BulgarianCharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80
210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90
81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0
62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0
)
win1251BulgarianCharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80
221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90
88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0
73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0
)
# Model Table:
# total sequences: 100%
# first 512 sequences: 96.9392%
# first 1024 sequences:3.0618%
# rest sequences: 0.2992%
# negative sequences: 0.0020%
BulgarianLangModel = (
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,
0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,
0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,
0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,
0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,
0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,
2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,
3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,
1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,
3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,
1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,
2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,
2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,
3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,
1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,
2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,
2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,
1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,
2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,
2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,
2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,
1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,
2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,
1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,
3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,
1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,
3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,
1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,
2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,
1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,
2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,
1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,
2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,
1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,
1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,
2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,
1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,
1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,
1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,
1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,
1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,
0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,
0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,
1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,
1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
)
Latin5BulgarianModel = {
'charToOrderMap': Latin5_BulgarianCharToOrderMap,
'precedenceMatrix': BulgarianLangModel,
'mTypicalPositiveRatio': 0.969392,
'keepEnglishLetter': False,
'charsetName': "ISO-8859-5"
}
Win1251BulgarianModel = {
'charToOrderMap': win1251BulgarianCharToOrderMap,
'precedenceMatrix': BulgarianLangModel,
'mTypicalPositiveRatio': 0.969392,
'keepEnglishLetter': False,
'charsetName': "windows-1251"
}
# flake8: noqa
@@ -0,0 +1,329 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# KOI8-R language model
# Character Mapping Table:
KOI8R_CharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90
223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0
238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0
27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0
15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0
59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
)
win1251_CharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
)
latin5_CharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
)
macCyrillic_CharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
)
IBM855_CharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,
230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
)
IBM866_CharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
)
# Model Table:
# total sequences: 100%
# first 512 sequences: 97.6601%
# first 1024 sequences: 2.3389%
# rest sequences: 0.1237%
# negative sequences: 0.0009%
RussianLangModel = (
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
)
Koi8rModel = {
'charToOrderMap': KOI8R_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "KOI8-R"
}
Win1251CyrillicModel = {
'charToOrderMap': win1251_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "windows-1251"
}
Latin5CyrillicModel = {
'charToOrderMap': latin5_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "ISO-8859-5"
}
MacCyrillicModel = {
'charToOrderMap': macCyrillic_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "MacCyrillic"
};
Ibm866Model = {
'charToOrderMap': IBM866_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "IBM866"
}
Ibm855Model = {
'charToOrderMap': IBM855_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "IBM855"
}
# flake8: noqa
@@ -0,0 +1,225 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# 255: Control characters that usually does not exist in any text
# 254: Carriage/Return
# 253: symbol (punctuation) that does not belong to word
# 252: 0 - 9
# Character Mapping Table:
Latin7_CharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
)
win1253_CharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
)
# Model Table:
# total sequences: 100%
# first 512 sequences: 98.2851%
# first 1024 sequences:1.7001%
# rest sequences: 0.0359%
# negative sequences: 0.0148%
GreekLangModel = (
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
)
Latin7GreekModel = {
'charToOrderMap': Latin7_CharToOrderMap,
'precedenceMatrix': GreekLangModel,
'mTypicalPositiveRatio': 0.982851,
'keepEnglishLetter': False,
'charsetName': "ISO-8859-7"
}
Win1253GreekModel = {
'charToOrderMap': win1253_CharToOrderMap,
'precedenceMatrix': GreekLangModel,
'mTypicalPositiveRatio': 0.982851,
'keepEnglishLetter': False,
'charsetName': "windows-1253"
}
# flake8: noqa
@@ -0,0 +1,201 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Simon Montagu
# Portions created by the Initial Developer are Copyright (C) 2005
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
# Shoshannah Forbes - original C code (?)
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# 255: Control characters that usually does not exist in any text
# 254: Carriage/Return
# 253: symbol (punctuation) that does not belong to word
# 252: 0 - 9
# Windows-1255 language model
# Character Mapping Table:
win1255_CharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40
78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50
253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60
66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70
124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
)
# Model Table:
# total sequences: 100%
# first 512 sequences: 98.4004%
# first 1024 sequences: 1.5981%
# rest sequences: 0.087%
# negative sequences: 0.0015%
HebrewLangModel = (
0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,
1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,
1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,
0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,
0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,
0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,
0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,
0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,
0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,
0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,
0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,
0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,
0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,
0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,
0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,
0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,
0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,
0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,
0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,
1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,
0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,
2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,
1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,
2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,
2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,
0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
)
Win1255HebrewModel = {
'charToOrderMap': win1255_CharToOrderMap,
'precedenceMatrix': HebrewLangModel,
'mTypicalPositiveRatio': 0.984004,
'keepEnglishLetter': False,
'charsetName': "windows-1255"
}
# flake8: noqa
@@ -0,0 +1,225 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# 255: Control characters that usually does not exist in any text
# 254: Carriage/Return
# 253: symbol (punctuation) that does not belong to word
# 252: 0 - 9
# Character Mapping Table:
Latin2_HungarianCharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
)
win1250HungarianCharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
)
# Model Table:
# total sequences: 100%
# first 512 sequences: 94.7368%
# first 1024 sequences:5.2623%
# rest sequences: 0.8894%
# negative sequences: 0.0009%
HungarianLangModel = (
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
)
Latin2HungarianModel = {
'charToOrderMap': Latin2_HungarianCharToOrderMap,
'precedenceMatrix': HungarianLangModel,
'mTypicalPositiveRatio': 0.947368,
'keepEnglishLetter': True,
'charsetName': "ISO-8859-2"
}
Win1250HungarianModel = {
'charToOrderMap': win1250HungarianCharToOrderMap,
'precedenceMatrix': HungarianLangModel,
'mTypicalPositiveRatio': 0.947368,
'keepEnglishLetter': True,
'charsetName': "windows-1250"
}
# flake8: noqa
@@ -0,0 +1,200 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# 255: Control characters that usually does not exist in any text
# 254: Carriage/Return
# 253: symbol (punctuation) that does not belong to word
# 252: 0 - 9
# The following result for thai was collected from a limited sample (1M).
# Character Mapping Table:
TIS620CharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40
188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50
253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60
96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70
209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,
49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,
45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,
22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
)
# Model Table:
# total sequences: 100%
# first 512 sequences: 92.6386%
# first 1024 sequences:7.3177%
# rest sequences: 1.0230%
# negative sequences: 0.0436%
ThaiLangModel = (
0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
)
TIS620ThaiModel = {
'charToOrderMap': TIS620CharToOrderMap,
'precedenceMatrix': ThaiLangModel,
'mTypicalPositiveRatio': 0.926386,
'keepEnglishLetter': False,
'charsetName': "TIS-620"
}
# flake8: noqa
@@ -0,0 +1,139 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
from .constants import eNotMe
from .compat import wrap_ord
FREQ_CAT_NUM = 4
UDF = 0 # undefined
OTH = 1 # other
ASC = 2 # ascii capital letter
ASS = 3 # ascii small letter
ACV = 4 # accent capital vowel
ACO = 5 # accent capital other
ASV = 6 # accent small vowel
ASO = 7 # accent small other
CLASS_NUM = 8 # total classes
Latin1_CharToClass = (
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
)
# 0 : illegal
# 1 : very unlikely
# 2 : normal
# 3 : very likely
Latin1ClassModel = (
# UDF OTH ASC ASS ACV ACO ASV ASO
0, 0, 0, 0, 0, 0, 0, 0, # UDF
0, 3, 3, 3, 3, 3, 3, 3, # OTH
0, 3, 3, 3, 3, 3, 3, 3, # ASC
0, 3, 3, 3, 1, 1, 3, 3, # ASS
0, 3, 3, 3, 1, 2, 1, 2, # ACV
0, 3, 3, 3, 3, 3, 3, 3, # ACO
0, 3, 1, 3, 1, 1, 1, 3, # ASV
0, 3, 1, 3, 1, 1, 3, 3, # ASO
)
class Latin1Prober(CharSetProber):
def __init__(self):
CharSetProber.__init__(self)
self.reset()
def reset(self):
self._mLastCharClass = OTH
self._mFreqCounter = [0] * FREQ_CAT_NUM
CharSetProber.reset(self)
def get_charset_name(self):
return "windows-1252"
def feed(self, aBuf):
aBuf = self.filter_with_english_letters(aBuf)
for c in aBuf:
charClass = Latin1_CharToClass[wrap_ord(c)]
freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
+ charClass]
if freq == 0:
self._mState = eNotMe
break
self._mFreqCounter[freq] += 1
self._mLastCharClass = charClass
return self.get_state()
def get_confidence(self):
if self.get_state() == eNotMe:
return 0.01
total = sum(self._mFreqCounter)
if total < 0.01:
confidence = 0.0
else:
confidence = ((self._mFreqCounter[3] / total)
- (self._mFreqCounter[1] * 20.0 / total))
if confidence < 0.0:
confidence = 0.0
# lower the confidence of latin1 so that other more accurate
# detector can take priority.
confidence = confidence * 0.5
return confidence
@@ -0,0 +1,86 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
# Proofpoint, Inc.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import sys
from . import constants
from .charsetprober import CharSetProber
class MultiByteCharSetProber(CharSetProber):
def __init__(self):
CharSetProber.__init__(self)
self._mDistributionAnalyzer = None
self._mCodingSM = None
self._mLastChar = [0, 0]
def reset(self):
CharSetProber.reset(self)
if self._mCodingSM:
self._mCodingSM.reset()
if self._mDistributionAnalyzer:
self._mDistributionAnalyzer.reset()
self._mLastChar = [0, 0]
def get_charset_name(self):
pass
def feed(self, aBuf):
aLen = len(aBuf)
for i in range(0, aLen):
codingState = self._mCodingSM.next_state(aBuf[i])
if codingState == constants.eError:
if constants._debug:
sys.stderr.write(self.get_charset_name()
+ ' prober hit error at byte ' + str(i)
+ '\n')
self._mState = constants.eNotMe
break
elif codingState == constants.eItsMe:
self._mState = constants.eFoundIt
break
elif codingState == constants.eStart:
charLen = self._mCodingSM.get_current_charlen()
if i == 0:
self._mLastChar[1] = aBuf[0]
self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
else:
self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
charLen)
self._mLastChar[0] = aBuf[aLen - 1]
if self.get_state() == constants.eDetecting:
if (self._mDistributionAnalyzer.got_enough_data() and
(self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
self._mState = constants.eFoundIt
return self.get_state()
def get_confidence(self):
return self._mDistributionAnalyzer.get_confidence()
@@ -0,0 +1,54 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
# Proofpoint, Inc.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .charsetgroupprober import CharSetGroupProber
from .utf8prober import UTF8Prober
from .sjisprober import SJISProber
from .eucjpprober import EUCJPProber
from .gb2312prober import GB2312Prober
from .euckrprober import EUCKRProber
from .cp949prober import CP949Prober
from .big5prober import Big5Prober
from .euctwprober import EUCTWProber
class MBCSGroupProber(CharSetGroupProber):
def __init__(self):
CharSetGroupProber.__init__(self)
self._mProbers = [
UTF8Prober(),
SJISProber(),
EUCJPProber(),
GB2312Prober(),
EUCKRProber(),
CP949Prober(),
Big5Prober(),
EUCTWProber()
]
self.reset()
+575
View File
@@ -0,0 +1,575 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .constants import eStart, eError, eItsMe
# BIG5
BIG5_cls = (
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
1,1,1,1,1,1,0,0, # 08 - 0f
1,1,1,1,1,1,1,1, # 10 - 17
1,1,1,0,1,1,1,1, # 18 - 1f
1,1,1,1,1,1,1,1, # 20 - 27
1,1,1,1,1,1,1,1, # 28 - 2f
1,1,1,1,1,1,1,1, # 30 - 37
1,1,1,1,1,1,1,1, # 38 - 3f
2,2,2,2,2,2,2,2, # 40 - 47
2,2,2,2,2,2,2,2, # 48 - 4f
2,2,2,2,2,2,2,2, # 50 - 57
2,2,2,2,2,2,2,2, # 58 - 5f
2,2,2,2,2,2,2,2, # 60 - 67
2,2,2,2,2,2,2,2, # 68 - 6f
2,2,2,2,2,2,2,2, # 70 - 77
2,2,2,2,2,2,2,1, # 78 - 7f
4,4,4,4,4,4,4,4, # 80 - 87
4,4,4,4,4,4,4,4, # 88 - 8f
4,4,4,4,4,4,4,4, # 90 - 97
4,4,4,4,4,4,4,4, # 98 - 9f
4,3,3,3,3,3,3,3, # a0 - a7
3,3,3,3,3,3,3,3, # a8 - af
3,3,3,3,3,3,3,3, # b0 - b7
3,3,3,3,3,3,3,3, # b8 - bf
3,3,3,3,3,3,3,3, # c0 - c7
3,3,3,3,3,3,3,3, # c8 - cf
3,3,3,3,3,3,3,3, # d0 - d7
3,3,3,3,3,3,3,3, # d8 - df
3,3,3,3,3,3,3,3, # e0 - e7
3,3,3,3,3,3,3,3, # e8 - ef
3,3,3,3,3,3,3,3, # f0 - f7
3,3,3,3,3,3,3,0 # f8 - ff
)
BIG5_st = (
eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07
eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f
eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17
)
Big5CharLenTable = (0, 1, 1, 2, 0)
Big5SMModel = {'classTable': BIG5_cls,
'classFactor': 5,
'stateTable': BIG5_st,
'charLenTable': Big5CharLenTable,
'name': 'Big5'}
# CP949
CP949_cls = (
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
)
CP949_st = (
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eStart
eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe
eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3
eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4
eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5
eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6
)
CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
CP949SMModel = {'classTable': CP949_cls,
'classFactor': 10,
'stateTable': CP949_st,
'charLenTable': CP949CharLenTable,
'name': 'CP949'}
# EUC-JP
EUCJP_cls = (
4,4,4,4,4,4,4,4, # 00 - 07
4,4,4,4,4,4,5,5, # 08 - 0f
4,4,4,4,4,4,4,4, # 10 - 17
4,4,4,5,4,4,4,4, # 18 - 1f
4,4,4,4,4,4,4,4, # 20 - 27
4,4,4,4,4,4,4,4, # 28 - 2f
4,4,4,4,4,4,4,4, # 30 - 37
4,4,4,4,4,4,4,4, # 38 - 3f
4,4,4,4,4,4,4,4, # 40 - 47
4,4,4,4,4,4,4,4, # 48 - 4f
4,4,4,4,4,4,4,4, # 50 - 57
4,4,4,4,4,4,4,4, # 58 - 5f
4,4,4,4,4,4,4,4, # 60 - 67
4,4,4,4,4,4,4,4, # 68 - 6f
4,4,4,4,4,4,4,4, # 70 - 77
4,4,4,4,4,4,4,4, # 78 - 7f
5,5,5,5,5,5,5,5, # 80 - 87
5,5,5,5,5,5,1,3, # 88 - 8f
5,5,5,5,5,5,5,5, # 90 - 97
5,5,5,5,5,5,5,5, # 98 - 9f
5,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
0,0,0,0,0,0,0,0, # e0 - e7
0,0,0,0,0,0,0,0, # e8 - ef
0,0,0,0,0,0,0,0, # f0 - f7
0,0,0,0,0,0,0,5 # f8 - ff
)
EUCJP_st = (
3, 4, 3, 5,eStart,eError,eError,eError,#00-07
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17
eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f
3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27
)
EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)
EUCJPSMModel = {'classTable': EUCJP_cls,
'classFactor': 6,
'stateTable': EUCJP_st,
'charLenTable': EUCJPCharLenTable,
'name': 'EUC-JP'}
# EUC-KR
EUCKR_cls = (
1,1,1,1,1,1,1,1, # 00 - 07
1,1,1,1,1,1,0,0, # 08 - 0f
1,1,1,1,1,1,1,1, # 10 - 17
1,1,1,0,1,1,1,1, # 18 - 1f
1,1,1,1,1,1,1,1, # 20 - 27
1,1,1,1,1,1,1,1, # 28 - 2f
1,1,1,1,1,1,1,1, # 30 - 37
1,1,1,1,1,1,1,1, # 38 - 3f
1,1,1,1,1,1,1,1, # 40 - 47
1,1,1,1,1,1,1,1, # 48 - 4f
1,1,1,1,1,1,1,1, # 50 - 57
1,1,1,1,1,1,1,1, # 58 - 5f
1,1,1,1,1,1,1,1, # 60 - 67
1,1,1,1,1,1,1,1, # 68 - 6f
1,1,1,1,1,1,1,1, # 70 - 77
1,1,1,1,1,1,1,1, # 78 - 7f
0,0,0,0,0,0,0,0, # 80 - 87
0,0,0,0,0,0,0,0, # 88 - 8f
0,0,0,0,0,0,0,0, # 90 - 97
0,0,0,0,0,0,0,0, # 98 - 9f
0,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,3,3,3, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,3,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
2,2,2,2,2,2,2,2, # e0 - e7
2,2,2,2,2,2,2,2, # e8 - ef
2,2,2,2,2,2,2,2, # f0 - f7
2,2,2,2,2,2,2,0 # f8 - ff
)
EUCKR_st = (
eError,eStart, 3,eError,eError,eError,eError,eError,#00-07
eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f
)
EUCKRCharLenTable = (0, 1, 2, 0)
EUCKRSMModel = {'classTable': EUCKR_cls,
'classFactor': 4,
'stateTable': EUCKR_st,
'charLenTable': EUCKRCharLenTable,
'name': 'EUC-KR'}
# EUC-TW
EUCTW_cls = (
2,2,2,2,2,2,2,2, # 00 - 07
2,2,2,2,2,2,0,0, # 08 - 0f
2,2,2,2,2,2,2,2, # 10 - 17
2,2,2,0,2,2,2,2, # 18 - 1f
2,2,2,2,2,2,2,2, # 20 - 27
2,2,2,2,2,2,2,2, # 28 - 2f
2,2,2,2,2,2,2,2, # 30 - 37
2,2,2,2,2,2,2,2, # 38 - 3f
2,2,2,2,2,2,2,2, # 40 - 47
2,2,2,2,2,2,2,2, # 48 - 4f
2,2,2,2,2,2,2,2, # 50 - 57
2,2,2,2,2,2,2,2, # 58 - 5f
2,2,2,2,2,2,2,2, # 60 - 67
2,2,2,2,2,2,2,2, # 68 - 6f
2,2,2,2,2,2,2,2, # 70 - 77
2,2,2,2,2,2,2,2, # 78 - 7f
0,0,0,0,0,0,0,0, # 80 - 87
0,0,0,0,0,0,6,0, # 88 - 8f
0,0,0,0,0,0,0,0, # 90 - 97
0,0,0,0,0,0,0,0, # 98 - 9f
0,3,4,4,4,4,4,4, # a0 - a7
5,5,1,1,1,1,1,1, # a8 - af
1,1,1,1,1,1,1,1, # b0 - b7
1,1,1,1,1,1,1,1, # b8 - bf
1,1,3,1,3,3,3,3, # c0 - c7
3,3,3,3,3,3,3,3, # c8 - cf
3,3,3,3,3,3,3,3, # d0 - d7
3,3,3,3,3,3,3,3, # d8 - df
3,3,3,3,3,3,3,3, # e0 - e7
3,3,3,3,3,3,3,3, # e8 - ef
3,3,3,3,3,3,3,3, # f0 - f7
3,3,3,3,3,3,3,0 # f8 - ff
)
EUCTW_st = (
eError,eError,eStart, 3, 3, 3, 4,eError,#00-07
eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17
eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f
5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27
eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
)
EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)
EUCTWSMModel = {'classTable': EUCTW_cls,
'classFactor': 7,
'stateTable': EUCTW_st,
'charLenTable': EUCTWCharLenTable,
'name': 'x-euc-tw'}
# GB2312
GB2312_cls = (
1,1,1,1,1,1,1,1, # 00 - 07
1,1,1,1,1,1,0,0, # 08 - 0f
1,1,1,1,1,1,1,1, # 10 - 17
1,1,1,0,1,1,1,1, # 18 - 1f
1,1,1,1,1,1,1,1, # 20 - 27
1,1,1,1,1,1,1,1, # 28 - 2f
3,3,3,3,3,3,3,3, # 30 - 37
3,3,1,1,1,1,1,1, # 38 - 3f
2,2,2,2,2,2,2,2, # 40 - 47
2,2,2,2,2,2,2,2, # 48 - 4f
2,2,2,2,2,2,2,2, # 50 - 57
2,2,2,2,2,2,2,2, # 58 - 5f
2,2,2,2,2,2,2,2, # 60 - 67
2,2,2,2,2,2,2,2, # 68 - 6f
2,2,2,2,2,2,2,2, # 70 - 77
2,2,2,2,2,2,2,4, # 78 - 7f
5,6,6,6,6,6,6,6, # 80 - 87
6,6,6,6,6,6,6,6, # 88 - 8f
6,6,6,6,6,6,6,6, # 90 - 97
6,6,6,6,6,6,6,6, # 98 - 9f
6,6,6,6,6,6,6,6, # a0 - a7
6,6,6,6,6,6,6,6, # a8 - af
6,6,6,6,6,6,6,6, # b0 - b7
6,6,6,6,6,6,6,6, # b8 - bf
6,6,6,6,6,6,6,6, # c0 - c7
6,6,6,6,6,6,6,6, # c8 - cf
6,6,6,6,6,6,6,6, # d0 - d7
6,6,6,6,6,6,6,6, # d8 - df
6,6,6,6,6,6,6,6, # e0 - e7
6,6,6,6,6,6,6,6, # e8 - ef
6,6,6,6,6,6,6,6, # f0 - f7
6,6,6,6,6,6,6,0 # f8 - ff
)
GB2312_st = (
eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07
eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17
4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f
eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27
eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
)
# To be accurate, the length of class 6 can be either 2 or 4.
# But it is not necessary to discriminate between the two since
# it is used for frequency analysis only, and we are validing
# each code range there as well. So it is safe to set it to be
# 2 here.
GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)
GB2312SMModel = {'classTable': GB2312_cls,
'classFactor': 7,
'stateTable': GB2312_st,
'charLenTable': GB2312CharLenTable,
'name': 'GB2312'}
# Shift_JIS
SJIS_cls = (
1,1,1,1,1,1,1,1, # 00 - 07
1,1,1,1,1,1,0,0, # 08 - 0f
1,1,1,1,1,1,1,1, # 10 - 17
1,1,1,0,1,1,1,1, # 18 - 1f
1,1,1,1,1,1,1,1, # 20 - 27
1,1,1,1,1,1,1,1, # 28 - 2f
1,1,1,1,1,1,1,1, # 30 - 37
1,1,1,1,1,1,1,1, # 38 - 3f
2,2,2,2,2,2,2,2, # 40 - 47
2,2,2,2,2,2,2,2, # 48 - 4f
2,2,2,2,2,2,2,2, # 50 - 57
2,2,2,2,2,2,2,2, # 58 - 5f
2,2,2,2,2,2,2,2, # 60 - 67
2,2,2,2,2,2,2,2, # 68 - 6f
2,2,2,2,2,2,2,2, # 70 - 77
2,2,2,2,2,2,2,1, # 78 - 7f
3,3,3,3,3,3,3,3, # 80 - 87
3,3,3,3,3,3,3,3, # 88 - 8f
3,3,3,3,3,3,3,3, # 90 - 97
3,3,3,3,3,3,3,3, # 98 - 9f
#0xa0 is illegal in sjis encoding, but some pages does
#contain such byte. We need to be more error forgiven.
2,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
3,3,3,3,3,3,3,3, # e0 - e7
3,3,3,3,3,4,4,4, # e8 - ef
4,4,4,4,4,4,4,4, # f0 - f7
4,4,4,4,4,0,0,0 # f8 - ff
)
SJIS_st = (
eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17
)
SJISCharLenTable = (0, 1, 1, 2, 0, 0)
SJISSMModel = {'classTable': SJIS_cls,
'classFactor': 6,
'stateTable': SJIS_st,
'charLenTable': SJISCharLenTable,
'name': 'Shift_JIS'}
# UCS2-BE
UCS2BE_cls = (
0,0,0,0,0,0,0,0, # 00 - 07
0,0,1,0,0,2,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,3,0,0,0,0, # 18 - 1f
0,0,0,0,0,0,0,0, # 20 - 27
0,3,3,3,3,3,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,0,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
0,0,0,0,0,0,0,0, # 80 - 87
0,0,0,0,0,0,0,0, # 88 - 8f
0,0,0,0,0,0,0,0, # 90 - 97
0,0,0,0,0,0,0,0, # 98 - 9f
0,0,0,0,0,0,0,0, # a0 - a7
0,0,0,0,0,0,0,0, # a8 - af
0,0,0,0,0,0,0,0, # b0 - b7
0,0,0,0,0,0,0,0, # b8 - bf
0,0,0,0,0,0,0,0, # c0 - c7
0,0,0,0,0,0,0,0, # c8 - cf
0,0,0,0,0,0,0,0, # d0 - d7
0,0,0,0,0,0,0,0, # d8 - df
0,0,0,0,0,0,0,0, # e0 - e7
0,0,0,0,0,0,0,0, # e8 - ef
0,0,0,0,0,0,0,0, # f0 - f7
0,0,0,0,0,0,4,5 # f8 - ff
)
UCS2BE_st = (
5, 7, 7,eError, 4, 3,eError,eError,#00-07
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17
6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f
6, 6, 6, 6, 5, 7, 7,eError,#20-27
5, 8, 6, 6,eError, 6, 6, 6,#28-2f
6, 6, 6, 6,eError,eError,eStart,eStart #30-37
)
UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)
UCS2BESMModel = {'classTable': UCS2BE_cls,
'classFactor': 6,
'stateTable': UCS2BE_st,
'charLenTable': UCS2BECharLenTable,
'name': 'UTF-16BE'}
# UCS2-LE
UCS2LE_cls = (
0,0,0,0,0,0,0,0, # 00 - 07
0,0,1,0,0,2,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,3,0,0,0,0, # 18 - 1f
0,0,0,0,0,0,0,0, # 20 - 27
0,3,3,3,3,3,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,0,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
0,0,0,0,0,0,0,0, # 80 - 87
0,0,0,0,0,0,0,0, # 88 - 8f
0,0,0,0,0,0,0,0, # 90 - 97
0,0,0,0,0,0,0,0, # 98 - 9f
0,0,0,0,0,0,0,0, # a0 - a7
0,0,0,0,0,0,0,0, # a8 - af
0,0,0,0,0,0,0,0, # b0 - b7
0,0,0,0,0,0,0,0, # b8 - bf
0,0,0,0,0,0,0,0, # c0 - c7
0,0,0,0,0,0,0,0, # c8 - cf
0,0,0,0,0,0,0,0, # d0 - d7
0,0,0,0,0,0,0,0, # d8 - df
0,0,0,0,0,0,0,0, # e0 - e7
0,0,0,0,0,0,0,0, # e8 - ef
0,0,0,0,0,0,0,0, # f0 - f7
0,0,0,0,0,0,4,5 # f8 - ff
)
UCS2LE_st = (
6, 6, 7, 6, 4, 3,eError,eError,#00-07
eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17
5, 5, 5,eError, 5,eError, 6, 6,#18-1f
7, 6, 8, 8, 5, 5, 5,eError,#20-27
5, 5, 5,eError,eError,eError, 5, 5,#28-2f
5, 5, 5,eError, 5,eError,eStart,eStart #30-37
)
UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)
UCS2LESMModel = {'classTable': UCS2LE_cls,
'classFactor': 6,
'stateTable': UCS2LE_st,
'charLenTable': UCS2LECharLenTable,
'name': 'UTF-16LE'}
# UTF-8
UTF8_cls = (
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
1,1,1,1,1,1,0,0, # 08 - 0f
1,1,1,1,1,1,1,1, # 10 - 17
1,1,1,0,1,1,1,1, # 18 - 1f
1,1,1,1,1,1,1,1, # 20 - 27
1,1,1,1,1,1,1,1, # 28 - 2f
1,1,1,1,1,1,1,1, # 30 - 37
1,1,1,1,1,1,1,1, # 38 - 3f
1,1,1,1,1,1,1,1, # 40 - 47
1,1,1,1,1,1,1,1, # 48 - 4f
1,1,1,1,1,1,1,1, # 50 - 57
1,1,1,1,1,1,1,1, # 58 - 5f
1,1,1,1,1,1,1,1, # 60 - 67
1,1,1,1,1,1,1,1, # 68 - 6f
1,1,1,1,1,1,1,1, # 70 - 77
1,1,1,1,1,1,1,1, # 78 - 7f
2,2,2,2,3,3,3,3, # 80 - 87
4,4,4,4,4,4,4,4, # 88 - 8f
4,4,4,4,4,4,4,4, # 90 - 97
4,4,4,4,4,4,4,4, # 98 - 9f
5,5,5,5,5,5,5,5, # a0 - a7
5,5,5,5,5,5,5,5, # a8 - af
5,5,5,5,5,5,5,5, # b0 - b7
5,5,5,5,5,5,5,5, # b8 - bf
0,0,6,6,6,6,6,6, # c0 - c7
6,6,6,6,6,6,6,6, # c8 - cf
6,6,6,6,6,6,6,6, # d0 - d7
6,6,6,6,6,6,6,6, # d8 - df
7,8,8,8,8,8,8,8, # e0 - e7
8,8,8,8,8,9,8,8, # e8 - ef
10,11,11,11,11,11,11,11, # f0 - f7
12,13,13,13,14,15,0,0 # f8 - ff
)
UTF8_st = (
eError,eStart,eError,eError,eError,eError, 12, 10,#00-07
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
eError,eError,eError,eError,eError,eError,eError,eError,#10-17
eError,eError,eError,eError,eError,eError,eError,eError,#18-1f
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27
eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f
eError,eError, 5, 5, 5, 5,eError,eError,#30-37
eError,eError,eError,eError,eError,eError,eError,eError,#38-3f
eError,eError,eError, 5, 5, 5,eError,eError,#40-47
eError,eError,eError,eError,eError,eError,eError,eError,#48-4f
eError,eError, 7, 7, 7, 7,eError,eError,#50-57
eError,eError,eError,eError,eError,eError,eError,eError,#58-5f
eError,eError,eError,eError, 7, 7,eError,eError,#60-67
eError,eError,eError,eError,eError,eError,eError,eError,#68-6f
eError,eError, 9, 9, 9, 9,eError,eError,#70-77
eError,eError,eError,eError,eError,eError,eError,eError,#78-7f
eError,eError,eError,eError,eError, 9,eError,eError,#80-87
eError,eError,eError,eError,eError,eError,eError,eError,#88-8f
eError,eError, 12, 12, 12, 12,eError,eError,#90-97
eError,eError,eError,eError,eError,eError,eError,eError,#98-9f
eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7
eError,eError,eError,eError,eError,eError,eError,eError,#a8-af
eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7
eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf
eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7
eError,eError,eError,eError,eError,eError,eError,eError #c8-cf
)
UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
UTF8SMModel = {'classTable': UTF8_cls,
'classFactor': 16,
'stateTable': UTF8_st,
'charLenTable': UTF8CharLenTable,
'name': 'UTF-8'}
# flake8: noqa
@@ -0,0 +1,120 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import sys
from . import constants
from .charsetprober import CharSetProber
from .compat import wrap_ord
SAMPLE_SIZE = 64
SB_ENOUGH_REL_THRESHOLD = 1024
POSITIVE_SHORTCUT_THRESHOLD = 0.95
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
SYMBOL_CAT_ORDER = 250
NUMBER_OF_SEQ_CAT = 4
POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
#NEGATIVE_CAT = 0
class SingleByteCharSetProber(CharSetProber):
def __init__(self, model, reversed=False, nameProber=None):
CharSetProber.__init__(self)
self._mModel = model
# TRUE if we need to reverse every pair in the model lookup
self._mReversed = reversed
# Optional auxiliary prober for name decision
self._mNameProber = nameProber
self.reset()
def reset(self):
CharSetProber.reset(self)
# char order of last character
self._mLastOrder = 255
self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
self._mTotalSeqs = 0
self._mTotalChar = 0
# characters that fall in our sampling range
self._mFreqChar = 0
def get_charset_name(self):
if self._mNameProber:
return self._mNameProber.get_charset_name()
else:
return self._mModel['charsetName']
def feed(self, aBuf):
if not self._mModel['keepEnglishLetter']:
aBuf = self.filter_without_english_letters(aBuf)
aLen = len(aBuf)
if not aLen:
return self.get_state()
for c in aBuf:
order = self._mModel['charToOrderMap'][wrap_ord(c)]
if order < SYMBOL_CAT_ORDER:
self._mTotalChar += 1
if order < SAMPLE_SIZE:
self._mFreqChar += 1
if self._mLastOrder < SAMPLE_SIZE:
self._mTotalSeqs += 1
if not self._mReversed:
i = (self._mLastOrder * SAMPLE_SIZE) + order
model = self._mModel['precedenceMatrix'][i]
else: # reverse the order of the letters in the lookup
i = (order * SAMPLE_SIZE) + self._mLastOrder
model = self._mModel['precedenceMatrix'][i]
self._mSeqCounters[model] += 1
self._mLastOrder = order
if self.get_state() == constants.eDetecting:
if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:
cf = self.get_confidence()
if cf > POSITIVE_SHORTCUT_THRESHOLD:
if constants._debug:
sys.stderr.write('%s confidence = %s, we have a'
'winner\n' %
(self._mModel['charsetName'], cf))
self._mState = constants.eFoundIt
elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
if constants._debug:
sys.stderr.write('%s confidence = %s, below negative'
'shortcut threshhold %s\n' %
(self._mModel['charsetName'], cf,
NEGATIVE_SHORTCUT_THRESHOLD))
self._mState = constants.eNotMe
return self.get_state()
def get_confidence(self):
r = 0.01
if self._mTotalSeqs > 0:
r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
/ self._mModel['mTypicalPositiveRatio'])
r = r * self._mFreqChar / self._mTotalChar
if r >= 1.0:
r = 0.99
return r
@@ -0,0 +1,69 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .charsetgroupprober import CharSetGroupProber
from .sbcharsetprober import SingleByteCharSetProber
from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
Latin5CyrillicModel, MacCyrillicModel,
Ibm866Model, Ibm855Model)
from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
from .langthaimodel import TIS620ThaiModel
from .langhebrewmodel import Win1255HebrewModel
from .hebrewprober import HebrewProber
class SBCSGroupProber(CharSetGroupProber):
def __init__(self):
CharSetGroupProber.__init__(self)
self._mProbers = [
SingleByteCharSetProber(Win1251CyrillicModel),
SingleByteCharSetProber(Koi8rModel),
SingleByteCharSetProber(Latin5CyrillicModel),
SingleByteCharSetProber(MacCyrillicModel),
SingleByteCharSetProber(Ibm866Model),
SingleByteCharSetProber(Ibm855Model),
SingleByteCharSetProber(Latin7GreekModel),
SingleByteCharSetProber(Win1253GreekModel),
SingleByteCharSetProber(Latin5BulgarianModel),
SingleByteCharSetProber(Win1251BulgarianModel),
SingleByteCharSetProber(Latin2HungarianModel),
SingleByteCharSetProber(Win1250HungarianModel),
SingleByteCharSetProber(TIS620ThaiModel),
]
hebrewProber = HebrewProber()
logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
False, hebrewProber)
visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
hebrewProber)
hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
self._mProbers.extend([hebrewProber, logicalHebrewProber,
visualHebrewProber])
self.reset()
@@ -0,0 +1,91 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import sys
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import SJISDistributionAnalysis
from .jpcntx import SJISContextAnalysis
from .mbcssm import SJISSMModel
from . import constants
class SJISProber(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(SJISSMModel)
self._mDistributionAnalyzer = SJISDistributionAnalysis()
self._mContextAnalyzer = SJISContextAnalysis()
self.reset()
def reset(self):
MultiByteCharSetProber.reset(self)
self._mContextAnalyzer.reset()
def get_charset_name(self):
return "SHIFT_JIS"
def feed(self, aBuf):
aLen = len(aBuf)
for i in range(0, aLen):
codingState = self._mCodingSM.next_state(aBuf[i])
if codingState == constants.eError:
if constants._debug:
sys.stderr.write(self.get_charset_name()
+ ' prober hit error at byte ' + str(i)
+ '\n')
self._mState = constants.eNotMe
break
elif codingState == constants.eItsMe:
self._mState = constants.eFoundIt
break
elif codingState == constants.eStart:
charLen = self._mCodingSM.get_current_charlen()
if i == 0:
self._mLastChar[1] = aBuf[0]
self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
charLen)
self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
else:
self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
- charLen], charLen)
self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
charLen)
self._mLastChar[0] = aBuf[aLen - 1]
if self.get_state() == constants.eDetecting:
if (self._mContextAnalyzer.got_enough_data() and
(self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
self._mState = constants.eFoundIt
return self.get_state()
def get_confidence(self):
contxtCf = self._mContextAnalyzer.get_confidence()
distribCf = self._mDistributionAnalyzer.get_confidence()
return max(contxtCf, distribCf)
@@ -0,0 +1,170 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from . import constants
import sys
import codecs
from .latin1prober import Latin1Prober # windows-1252
from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets
from .sbcsgroupprober import SBCSGroupProber # single-byte character sets
from .escprober import EscCharSetProber # ISO-2122, etc.
import re
MINIMUM_THRESHOLD = 0.20
ePureAscii = 0
eEscAscii = 1
eHighbyte = 2
class UniversalDetector:
def __init__(self):
self._highBitDetector = re.compile(b'[\x80-\xFF]')
self._escDetector = re.compile(b'(\033|~{)')
self._mEscCharSetProber = None
self._mCharSetProbers = []
self.reset()
def reset(self):
self.result = {'encoding': None, 'confidence': 0.0}
self.done = False
self._mStart = True
self._mGotData = False
self._mInputState = ePureAscii
self._mLastChar = b''
if self._mEscCharSetProber:
self._mEscCharSetProber.reset()
for prober in self._mCharSetProbers:
prober.reset()
def feed(self, aBuf):
if self.done:
return
aLen = len(aBuf)
if not aLen:
return
if not self._mGotData:
# If the data starts with BOM, we know it is UTF
if aBuf[:3] == codecs.BOM:
# EF BB BF UTF-8 with BOM
self.result = {'encoding': "UTF-8", 'confidence': 1.0}
elif aBuf[:4] == codecs.BOM_UTF32_LE:
# FF FE 00 00 UTF-32, little-endian BOM
self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
elif aBuf[:4] == codecs.BOM_UTF32_BE:
# 00 00 FE FF UTF-32, big-endian BOM
self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
elif aBuf[:4] == b'\xFE\xFF\x00\x00':
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
self.result = {
'encoding': "X-ISO-10646-UCS-4-3412",
'confidence': 1.0
}
elif aBuf[:4] == b'\x00\x00\xFF\xFE':
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
self.result = {
'encoding': "X-ISO-10646-UCS-4-2143",
'confidence': 1.0
}
elif aBuf[:2] == codecs.BOM_LE:
# FF FE UTF-16, little endian BOM
self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
elif aBuf[:2] == codecs.BOM_BE:
# FE FF UTF-16, big endian BOM
self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}
self._mGotData = True
if self.result['encoding'] and (self.result['confidence'] > 0.0):
self.done = True
return
if self._mInputState == ePureAscii:
if self._highBitDetector.search(aBuf):
self._mInputState = eHighbyte
elif ((self._mInputState == ePureAscii) and
self._escDetector.search(self._mLastChar + aBuf)):
self._mInputState = eEscAscii
self._mLastChar = aBuf[-1:]
if self._mInputState == eEscAscii:
if not self._mEscCharSetProber:
self._mEscCharSetProber = EscCharSetProber()
if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
self.result = {'encoding': self._mEscCharSetProber.get_charset_name(),
'confidence': self._mEscCharSetProber.get_confidence()}
self.done = True
elif self._mInputState == eHighbyte:
if not self._mCharSetProbers:
self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
Latin1Prober()]
for prober in self._mCharSetProbers:
if prober.feed(aBuf) == constants.eFoundIt:
self.result = {'encoding': prober.get_charset_name(),
'confidence': prober.get_confidence()}
self.done = True
break
def close(self):
if self.done:
return
if not self._mGotData:
if constants._debug:
sys.stderr.write('no data received!\n')
return
self.done = True
if self._mInputState == ePureAscii:
self.result = {'encoding': 'ascii', 'confidence': 1.0}
return self.result
if self._mInputState == eHighbyte:
proberConfidence = None
maxProberConfidence = 0.0
maxProber = None
for prober in self._mCharSetProbers:
if not prober:
continue
proberConfidence = prober.get_confidence()
if proberConfidence > maxProberConfidence:
maxProberConfidence = proberConfidence
maxProber = prober
if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):
self.result = {'encoding': maxProber.get_charset_name(),
'confidence': maxProber.get_confidence()}
return self.result
if constants._debug:
sys.stderr.write('no probers hit minimum threshhold\n')
for prober in self._mCharSetProbers[0].mProbers:
if not prober:
continue
sys.stderr.write('%s confidence = %s\n' %
(prober.get_charset_name(),
prober.get_confidence()))
@@ -0,0 +1,76 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from . import constants
from .charsetprober import CharSetProber
from .codingstatemachine import CodingStateMachine
from .mbcssm import UTF8SMModel
ONE_CHAR_PROB = 0.5
class UTF8Prober(CharSetProber):
def __init__(self):
CharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(UTF8SMModel)
self.reset()
def reset(self):
CharSetProber.reset(self)
self._mCodingSM.reset()
self._mNumOfMBChar = 0
def get_charset_name(self):
return "utf-8"
def feed(self, aBuf):
for c in aBuf:
codingState = self._mCodingSM.next_state(c)
if codingState == constants.eError:
self._mState = constants.eNotMe
break
elif codingState == constants.eItsMe:
self._mState = constants.eFoundIt
break
elif codingState == constants.eStart:
if self._mCodingSM.get_current_charlen() >= 2:
self._mNumOfMBChar += 1
if self.get_state() == constants.eDetecting:
if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
self._mState = constants.eFoundIt
return self.get_state()
def get_confidence(self):
unlike = 0.99
if self._mNumOfMBChar < 6:
for i in range(0, self._mNumOfMBChar):
unlike = unlike * ONE_CHAR_PROB
return 1.0 - unlike
else:
return unlike
+27
View File
@@ -0,0 +1,27 @@
Copyright (c) 2011-2014 Mike Bayer
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author or contributors may not be used to endorse or
promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
@@ -0,0 +1,91 @@
dogpile.cache
=============
A caching API built around the concept of a "dogpile lock", which allows
continued access to an expiring data value while a single thread generates a
new value.
dogpile.cache builds on the `dogpile.core <http://pypi.python.org/pypi/dogpile.core>`_
locking system, which implements the idea of "allow one creator to write while
others read" in the abstract. Overall, dogpile.cache is intended as a
replacement to the `Beaker <http://beaker.groovie.org>`_ caching system, the internals
of which are written by the same author. All the ideas of Beaker which "work"
are re-implemented in dogpile.cache in a more efficient and succinct manner,
and all the cruft (Beaker's internals were first written in 2005) relegated
to the trash heap.
Features
--------
* A succinct API which encourages up-front configuration of pre-defined
"regions", each one defining a set of caching characteristics including
storage backend, configuration options, and default expiration time.
* A standard get/set/delete API as well as a function decorator API is
provided.
* The mechanics of key generation are fully customizable. The function
decorator API features a pluggable "key generator" to customize how
cache keys are made to correspond to function calls, and an optional
"key mangler" feature provides for pluggable mangling of keys
(such as encoding, SHA-1 hashing) as desired for each region.
* The dogpile lock, first developed as the core engine behind the Beaker
caching system, here vastly simplified, improved, and better tested.
Some key performance
issues that were intrinsic to Beaker's architecture, particularly that
values would frequently be "double-fetched" from the cache, have been fixed.
* Backends implement their own version of a "distributed" lock, where the
"distribution" matches the backend's storage system. For example, the
memcached backends allow all clients to coordinate creation of values
using memcached itself. The dbm file backend uses a lockfile
alongside the dbm file. New backends, such as a Redis-based backend,
can provide their own locking mechanism appropriate to the storage
engine.
* Writing new backends or hacking on the existing backends is intended to be
routine - all that's needed are basic get/set/delete methods. A distributed
lock tailored towards the backend is an optional addition, else dogpile uses
a regular thread mutex. New backends can be registered with dogpile.cache
directly or made available via setuptools entry points.
* Included backends feature three memcached backends (python-memcached, pylibmc,
bmemcached), a Redis backend, a backend based on Python's
anydbm, and a plain dictionary backend.
* Space for third party plugins, including the first which provides the
dogpile.cache engine to Mako templates.
* Python 3 compatible in place - no 2to3 required.
Synopsis
--------
dogpile.cache features a single public usage object known as the ``CacheRegion``.
This object then refers to a particular ``CacheBackend``. Typical usage
generates a region using ``make_region()``, which can then be used at the
module level to decorate functions, or used directly in code with a traditional
get/set interface. Configuration of the backend is applied to the region
using ``configure()`` or ``configure_from_config()``, allowing deferred
config-file based configuration to occur after modules have been imported::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.pylibmc',
expiration_time = 3600,
arguments = {
'url':["127.0.0.1"],
'binary':True,
'behaviors':{"tcp_nodelay": True,"ketama":True}
}
)
@region.cache_on_arguments()
def load_user_info(user_id):
return some_database.lookup_user_by_id(user_id)
Documentation
-------------
See dogpile.cache's full documentation at
`dogpile.cache documentation <http://dogpilecache.readthedocs.org>`_.
@@ -0,0 +1,6 @@
# See http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages
try:
__import__('pkg_resources').declare_namespace(__name__)
except ImportError:
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
+3
View File
@@ -0,0 +1,3 @@
__version__ = '0.5.4'
from .region import CacheRegion, register_backend, make_region
+193
View File
@@ -0,0 +1,193 @@
import operator
from .compat import py3k
class NoValue(object):
"""Describe a missing cache value.
The :attr:`.NO_VALUE` module global
should be used.
"""
@property
def payload(self):
return self
if py3k:
def __bool__(self): #pragma NO COVERAGE
return False
else:
def __nonzero__(self): #pragma NO COVERAGE
return False
NO_VALUE = NoValue()
"""Value returned from ``get()`` that describes
a key not present."""
class CachedValue(tuple):
"""Represent a value stored in the cache.
:class:`.CachedValue` is a two-tuple of
``(payload, metadata)``, where ``metadata``
is dogpile.cache's tracking information (
currently the creation time). The metadata
and tuple structure is pickleable, if
the backend requires serialization.
"""
payload = property(operator.itemgetter(0))
"""Named accessor for the payload."""
metadata = property(operator.itemgetter(1))
"""Named accessor for the dogpile.cache metadata dictionary."""
def __new__(cls, payload, metadata):
return tuple.__new__(cls, (payload, metadata))
def __reduce__(self):
return CachedValue, (self.payload, self.metadata)
class CacheBackend(object):
"""Base class for backend implementations."""
key_mangler = None
"""Key mangling function.
May be None, or otherwise declared
as an ordinary instance method.
"""
def __init__(self, arguments): #pragma NO COVERAGE
"""Construct a new :class:`.CacheBackend`.
Subclasses should override this to
handle the given arguments.
:param arguments: The ``arguments`` parameter
passed to :func:`.make_registry`.
"""
raise NotImplementedError()
@classmethod
def from_config_dict(cls, config_dict, prefix):
prefix_len = len(prefix)
return cls(
dict(
(key[prefix_len:], config_dict[key])
for key in config_dict
if key.startswith(prefix)
)
)
def get_mutex(self, key):
"""Return an optional mutexing object for the given key.
This object need only provide an ``acquire()``
and ``release()`` method.
May return ``None``, in which case the dogpile
lock will use a regular ``threading.Lock``
object to mutex concurrent threads for
value creation. The default implementation
returns ``None``.
Different backends may want to provide various
kinds of "mutex" objects, such as those which
link to lock files, distributed mutexes,
memcached semaphores, etc. Whatever
kind of system is best suited for the scope
and behavior of the caching backend.
A mutex that takes the key into account will
allow multiple regenerate operations across
keys to proceed simultaneously, while a mutex
that does not will serialize regenerate operations
to just one at a time across all keys in the region.
The latter approach, or a variant that involves
a modulus of the given key's hash value,
can be used as a means of throttling the total
number of value recreation operations that may
proceed at one time.
"""
return None
def get(self, key): #pragma NO COVERAGE
"""Retrieve a value from the cache.
The returned value should be an instance of
:class:`.CachedValue`, or ``NO_VALUE`` if
not present.
"""
raise NotImplementedError()
def get_multi(self, keys): #pragma NO COVERAGE
"""Retrieve multiple values from the cache.
The returned value should be a list, corresponding
to the list of keys given.
.. versionadded:: 0.5.0
"""
raise NotImplementedError()
def set(self, key, value): #pragma NO COVERAGE
"""Set a value in the cache.
The key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The value will always be an instance
of :class:`.CachedValue`.
"""
raise NotImplementedError()
def set_multi(self, mapping): #pragma NO COVERAGE
"""Set multiple values in the cache.
The key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The value will always be an instance
of :class:`.CachedValue`.
.. versionadded:: 0.5.0
"""
raise NotImplementedError()
def delete(self, key): #pragma NO COVERAGE
"""Delete a value from the cache.
The key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The behavior here should be idempotent,
that is, can be called any number of times
regardless of whether or not the
key exists.
"""
raise NotImplementedError()
def delete_multi(self, keys): #pragma NO COVERAGE
"""Delete multiple values from the cache.
The key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The behavior here should be idempotent,
that is, can be called any number of times
regardless of whether or not the
key exists.
.. versionadded:: 0.5.0
"""
raise NotImplementedError()
@@ -0,0 +1,10 @@
from dogpile.cache.region import register_backend
register_backend("dogpile.cache.null", "dogpile.cache.backends.null", "NullBackend")
register_backend("dogpile.cache.dbm", "dogpile.cache.backends.file", "DBMBackend")
register_backend("dogpile.cache.pylibmc", "dogpile.cache.backends.memcached", "PylibmcBackend")
register_backend("dogpile.cache.bmemcached", "dogpile.cache.backends.memcached", "BMemcachedBackend")
register_backend("dogpile.cache.memcached", "dogpile.cache.backends.memcached", "MemcachedBackend")
register_backend("dogpile.cache.memory", "dogpile.cache.backends.memory", "MemoryBackend")
register_backend("dogpile.cache.memory_pickle", "dogpile.cache.backends.memory", "MemoryPickleBackend")
register_backend("dogpile.cache.redis", "dogpile.cache.backends.redis", "RedisBackend")
+441
View File
@@ -0,0 +1,441 @@
"""
File Backends
------------------
Provides backends that deal with local filesystem access.
"""
from __future__ import with_statement
from dogpile.cache.api import CacheBackend, NO_VALUE
from contextlib import contextmanager
from dogpile.cache import compat
from dogpile.cache import util
import os
__all__ = 'DBMBackend', 'FileLock', 'AbstractFileLock'
class DBMBackend(CacheBackend):
"""A file-backend using a dbm file to store keys.
Basic usage::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.dbm',
expiration_time = 3600,
arguments = {
"filename":"/path/to/cachefile.dbm"
}
)
DBM access is provided using the Python ``anydbm`` module,
which selects a platform-specific dbm module to use.
This may be made to be more configurable in a future
release.
Note that different dbm modules have different behaviors.
Some dbm implementations handle their own locking, while
others don't. The :class:`.DBMBackend` uses a read/write
lockfile by default, which is compatible even with those
DBM implementations for which this is unnecessary,
though the behavior can be disabled.
The DBM backend by default makes use of two lockfiles.
One is in order to protect the DBM file itself from
concurrent writes, the other is to coordinate
value creation (i.e. the dogpile lock). By default,
these lockfiles use the ``flock()`` system call
for locking; this is **only available on Unix
platforms**. An alternative lock implementation, such as one
which is based on threads or uses a third-party system
such as `portalocker <https://pypi.python.org/pypi/portalocker>`_,
can be dropped in using the ``lock_factory`` argument
in conjunction with the :class:`.AbstractFileLock` base class.
Currently, the dogpile lock is against the entire
DBM file, not per key. This means there can
only be one "creator" job running at a time
per dbm file.
A future improvement might be to have the dogpile lock
using a filename that's based on a modulus of the key.
Locking on a filename that uniquely corresponds to the
key is problematic, since it's not generally safe to
delete lockfiles as the application runs, implying an
unlimited number of key-based files would need to be
created and never deleted.
Parameters to the ``arguments`` dictionary are
below.
:param filename: path of the filename in which to
create the DBM file. Note that some dbm backends
will change this name to have additional suffixes.
:param rw_lockfile: the name of the file to use for
read/write locking. If omitted, a default name
is used by appending the suffix ".rw.lock" to the
DBM filename. If False, then no lock is used.
:param dogpile_lockfile: the name of the file to use
for value creation, i.e. the dogpile lock. If
omitted, a default name is used by appending the
suffix ".dogpile.lock" to the DBM filename. If
False, then dogpile.cache uses the default dogpile
lock, a plain thread-based mutex.
:param lock_factory: a function or class which provides
for a read/write lock. Defaults to :class:`.FileLock`.
Custom implementations need to implement context-manager
based ``read()`` and ``write()`` functions - the
:class:`.AbstractFileLock` class is provided as a base class
which provides these methods based on individual read/write lock
functions. E.g. to replace the lock with the dogpile.core
:class:`.ReadWriteMutex`::
from dogpile.core.readwrite_lock import ReadWriteMutex
from dogpile.cache.backends.file import AbstractFileLock
class MutexLock(AbstractFileLock):
def __init__(self, filename):
self.mutex = ReadWriteMutex()
def acquire_read_lock(self, wait):
ret = self.mutex.acquire_read_lock(wait)
return wait or ret
def acquire_write_lock(self, wait):
ret = self.mutex.acquire_write_lock(wait)
return wait or ret
def release_read_lock(self):
return self.mutex.release_read_lock()
def release_write_lock(self):
return self.mutex.release_write_lock()
from dogpile.cache import make_region
region = make_region().configure(
"dogpile.cache.dbm",
expiration_time=300,
arguments={
"filename": "file.dbm",
"lock_factory": MutexLock
}
)
While the included :class:`.FileLock` uses ``os.flock()``, a
windows-compatible implementation can be built using a library
such as `portalocker <https://pypi.python.org/pypi/portalocker>`_.
.. versionadded:: 0.5.2
"""
def __init__(self, arguments):
self.filename = os.path.abspath(
os.path.normpath(arguments['filename'])
)
dir_, filename = os.path.split(self.filename)
self.lock_factory = arguments.get("lock_factory", FileLock)
self._rw_lock = self._init_lock(
arguments.get('rw_lockfile'),
".rw.lock", dir_, filename)
self._dogpile_lock = self._init_lock(
arguments.get('dogpile_lockfile'),
".dogpile.lock",
dir_, filename,
util.KeyReentrantMutex.factory)
# TODO: make this configurable
if compat.py3k:
import dbm
else:
import anydbm as dbm
self.dbmmodule = dbm
self._init_dbm_file()
def _init_lock(self, argument, suffix, basedir, basefile, wrapper=None):
if argument is None:
lock = self.lock_factory(os.path.join(basedir, basefile + suffix))
elif argument is not False:
lock = self.lock_factory(
os.path.abspath(
os.path.normpath(argument)
))
else:
return None
if wrapper:
lock = wrapper(lock)
return lock
def _init_dbm_file(self):
exists = os.access(self.filename, os.F_OK)
if not exists:
for ext in ('db', 'dat', 'pag', 'dir'):
if os.access(self.filename + os.extsep + ext, os.F_OK):
exists = True
break
if not exists:
fh = self.dbmmodule.open(self.filename, 'c')
fh.close()
def get_mutex(self, key):
# using one dogpile for the whole file. Other ways
# to do this might be using a set of files keyed to a
# hash/modulus of the key. the issue is it's never
# really safe to delete a lockfile as this can
# break other processes trying to get at the file
# at the same time - so handling unlimited keys
# can't imply unlimited filenames
if self._dogpile_lock:
return self._dogpile_lock(key)
else:
return None
@contextmanager
def _use_rw_lock(self, write):
if self._rw_lock is None:
yield
elif write:
with self._rw_lock.write():
yield
else:
with self._rw_lock.read():
yield
@contextmanager
def _dbm_file(self, write):
with self._use_rw_lock(write):
dbm = self.dbmmodule.open(self.filename,
"w" if write else "r")
yield dbm
dbm.close()
def get(self, key):
with self._dbm_file(False) as dbm:
if hasattr(dbm, 'get'):
value = dbm.get(key, NO_VALUE)
else:
# gdbm objects lack a .get method
try:
value = dbm[key]
except KeyError:
value = NO_VALUE
if value is not NO_VALUE:
value = compat.pickle.loads(value)
return value
def get_multi(self, keys):
return [self.get(key) for key in keys]
def set(self, key, value):
with self._dbm_file(True) as dbm:
dbm[key] = compat.pickle.dumps(value)
def set_multi(self, mapping):
with self._dbm_file(True) as dbm:
for key,value in mapping.items():
dbm[key] = compat.pickle.dumps(value)
def delete(self, key):
with self._dbm_file(True) as dbm:
try:
del dbm[key]
except KeyError:
pass
def delete_multi(self, keys):
with self._dbm_file(True) as dbm:
for key in keys:
try:
del dbm[key]
except KeyError:
pass
class AbstractFileLock(object):
"""Coordinate read/write access to a file.
typically is a file-based lock but doesn't necessarily have to be.
The default implementation here is :class:`.FileLock`.
Implementations should provide the following methods::
* __init__()
* acquire_read_lock()
* acquire_write_lock()
* release_read_lock()
* release_write_lock()
The ``__init__()`` method accepts a single argument "filename", which
may be used as the "lock file", for those implementations that use a lock
file.
Note that multithreaded environments must provide a thread-safe
version of this lock. The recommended approach for file-descriptor-based
locks is to use a Python ``threading.local()`` so that a unique file descriptor
is held per thread. See the source code of :class:`.FileLock` for an
implementation example.
"""
def __init__(self, filename):
"""Constructor, is given the filename of a potential lockfile.
The usage of this filename is optional and no file is
created by default.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def acquire(self, wait=True):
"""Acquire the "write" lock.
This is a direct call to :meth:`.AbstractFileLock.acquire_write_lock`.
"""
return self.acquire_write_lock(wait)
def release(self):
"""Release the "write" lock.
This is a direct call to :meth:`.AbstractFileLock.release_write_lock`.
"""
self.release_write_lock()
@contextmanager
def read(self):
"""Provide a context manager for the "read" lock.
This method makes use of :meth:`.AbstractFileLock.acquire_read_lock`
and :meth:`.AbstractFileLock.release_read_lock`
"""
self.acquire_read_lock(True)
try:
yield
finally:
self.release_read_lock()
@contextmanager
def write(self):
"""Provide a context manager for the "write" lock.
This method makes use of :meth:`.AbstractFileLock.acquire_write_lock`
and :meth:`.AbstractFileLock.release_write_lock`
"""
self.acquire_write_lock(True)
try:
yield
finally:
self.release_write_lock()
@property
def is_open(self):
"""optional method."""
raise NotImplementedError()
def acquire_read_lock(self, wait):
"""Acquire a 'reader' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def acquire_write_lock(self, wait):
"""Acquire a 'write' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def release_read_lock(self):
"""Release a 'reader' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def release_write_lock(self):
"""Release a 'writer' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
class FileLock(AbstractFileLock):
"""Use lockfiles to coordinate read/write access to a file.
Only works on Unix systems, using
`fcntl.flock() <http://docs.python.org/library/fcntl.html>`_.
"""
def __init__(self, filename):
self._filedescriptor = compat.threading.local()
self.filename = filename
@util.memoized_property
def _module(self):
import fcntl
return fcntl
@property
def is_open(self):
return hasattr(self._filedescriptor, 'fileno')
def acquire_read_lock(self, wait):
return self._acquire(wait, os.O_RDONLY, self._module.LOCK_SH)
def acquire_write_lock(self, wait):
return self._acquire(wait, os.O_WRONLY, self._module.LOCK_EX)
def release_read_lock(self):
self._release()
def release_write_lock(self):
self._release()
def _acquire(self, wait, wrflag, lockflag):
wrflag |= os.O_CREAT
fileno = os.open(self.filename, wrflag)
try:
if not wait:
lockflag |= self._module.LOCK_NB
self._module.flock(fileno, lockflag)
except IOError:
os.close(fileno)
if not wait:
# this is typically
# "[Errno 35] Resource temporarily unavailable",
# because of LOCK_NB
return False
else:
raise
else:
self._filedescriptor.fileno = fileno
return True
def _release(self):
try:
fileno = self._filedescriptor.fileno
except AttributeError:
return
else:
self._module.flock(fileno, self._module.LOCK_UN)
os.close(fileno)
del self._filedescriptor.fileno
@@ -0,0 +1,332 @@
"""
Memcached Backends
------------------
Provides backends for talking to `memcached <http://memcached.org>`_.
"""
from dogpile.cache.api import CacheBackend, NO_VALUE
from dogpile.cache import compat
from dogpile.cache import util
import random
import time
__all__ = 'GenericMemcachedBackend', 'MemcachedBackend',\
'PylibmcBackend', 'BMemcachedBackend', 'MemcachedLock'
class MemcachedLock(object):
"""Simple distributed lock using memcached.
This is an adaptation of the lock featured at
http://amix.dk/blog/post/19386
"""
def __init__(self, client_fn, key):
self.client_fn = client_fn
self.key = "_lock" + key
def acquire(self, wait=True):
client = self.client_fn()
i = 0
while True:
if client.add(self.key, 1):
return True
elif not wait:
return False
else:
sleep_time = (((i+1)*random.random()) + 2**i) / 2.5
time.sleep(sleep_time)
if i < 15:
i += 1
def release(self):
client = self.client_fn()
client.delete(self.key)
class GenericMemcachedBackend(CacheBackend):
"""Base class for memcached backends.
This base class accepts a number of paramters
common to all backends.
:param url: the string URL to connect to. Can be a single
string or a list of strings. This is the only argument
that's required.
:param distributed_lock: boolean, when True, will use a
memcached-lock as the dogpile lock (see :class:`.MemcachedLock`).
Use this when multiple
processes will be talking to the same memcached instance.
When left at False, dogpile will coordinate on a regular
threading mutex.
:param memcached_expire_time: integer, when present will
be passed as the ``time`` parameter to ``pylibmc.Client.set``.
This is used to set the memcached expiry time for a value.
.. note::
This parameter is **different** from Dogpile's own
``expiration_time``, which is the number of seconds after
which Dogpile will consider the value to be expired.
When Dogpile considers a value to be expired,
it **continues to use the value** until generation
of a new value is complete, when using
:meth:`.CacheRegion.get_or_create`.
Therefore, if you are setting ``memcached_expire_time``, you'll
want to make sure it is greater than ``expiration_time``
by at least enough seconds for new values to be generated,
else the value won't be available during a regeneration,
forcing all threads to wait for a regeneration each time
a value expires.
The :class:`.GenericMemachedBackend` uses a ``threading.local()``
object to store individual client objects per thread,
as most modern memcached clients do not appear to be inherently
threadsafe.
In particular, ``threading.local()`` has the advantage over pylibmc's
built-in thread pool in that it automatically discards objects
associated with a particular thread when that thread ends.
"""
set_arguments = {}
"""Additional arguments which will be passed
to the :meth:`set` method."""
def __init__(self, arguments):
self._imports()
# using a plain threading.local here. threading.local
# automatically deletes the __dict__ when a thread ends,
# so the idea is that this is superior to pylibmc's
# own ThreadMappedPool which doesn't handle this
# automatically.
self.url = util.to_list(arguments['url'])
self.distributed_lock = arguments.get('distributed_lock', False)
self.memcached_expire_time = arguments.get(
'memcached_expire_time', 0)
def _imports(self):
"""client library imports go here."""
raise NotImplementedError()
def _create_client(self):
"""Creation of a Client instance goes here."""
raise NotImplementedError()
@util.memoized_property
def _clients(self):
backend = self
class ClientPool(compat.threading.local):
def __init__(self):
self.memcached = backend._create_client()
return ClientPool()
@property
def client(self):
"""Return the memcached client.
This uses a threading.local by
default as it appears most modern
memcached libs aren't inherently
threadsafe.
"""
return self._clients.memcached
def get_mutex(self, key):
if self.distributed_lock:
return MemcachedLock(lambda: self.client, key)
else:
return None
def get(self, key):
value = self.client.get(key)
if value is None:
return NO_VALUE
else:
return value
def get_multi(self, keys):
values = self.client.get_multi(keys)
return [
NO_VALUE if key not in values
else values[key] for key in keys
]
def set(self, key, value):
self.client.set(key,
value,
**self.set_arguments
)
def set_multi(self, mapping):
self.client.set_multi(mapping,
**self.set_arguments
)
def delete(self, key):
self.client.delete(key)
def delete_multi(self, keys):
self.client.delete_multi(keys)
class MemcacheArgs(object):
"""Mixin which provides support for the 'time' argument to set(),
'min_compress_len' to other methods.
"""
def __init__(self, arguments):
self.min_compress_len = arguments.get('min_compress_len', 0)
self.set_arguments = {}
if "memcached_expire_time" in arguments:
self.set_arguments["time"] =\
arguments["memcached_expire_time"]
if "min_compress_len" in arguments:
self.set_arguments["min_compress_len"] =\
arguments["min_compress_len"]
super(MemcacheArgs, self).__init__(arguments)
class PylibmcBackend(MemcacheArgs, GenericMemcachedBackend):
"""A backend for the
`pylibmc <http://sendapatch.se/projects/pylibmc/index.html>`_
memcached client.
A configuration illustrating several of the optional
arguments described in the pylibmc documentation::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.pylibmc',
expiration_time = 3600,
arguments = {
'url':["127.0.0.1"],
'binary':True,
'behaviors':{"tcp_nodelay": True,"ketama":True}
}
)
Arguments accepted here include those of
:class:`.GenericMemcachedBackend`, as well as
those below.
:param binary: sets the ``binary`` flag understood by
``pylibmc.Client``.
:param behaviors: a dictionary which will be passed to
``pylibmc.Client`` as the ``behaviors`` parameter.
:param min_compress_len: Integer, will be passed as the
``min_compress_len`` parameter to the ``pylibmc.Client.set``
method.
"""
def __init__(self, arguments):
self.binary = arguments.get('binary', False)
self.behaviors = arguments.get('behaviors', {})
super(PylibmcBackend, self).__init__(arguments)
def _imports(self):
global pylibmc
import pylibmc
def _create_client(self):
return pylibmc.Client(self.url,
binary=self.binary,
behaviors=self.behaviors
)
class MemcachedBackend(MemcacheArgs, GenericMemcachedBackend):
"""A backend using the standard `Python-memcached <http://www.tummy.com/Community/software/python-memcached/>`_
library.
Example::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.memcached',
expiration_time = 3600,
arguments = {
'url':"127.0.0.1:11211"
}
)
"""
def _imports(self):
global memcache
import memcache
def _create_client(self):
return memcache.Client(self.url)
class BMemcachedBackend(GenericMemcachedBackend):
"""A backend for the
`python-binary-memcached <https://github.com/jaysonsantos/python-binary-memcached>`_
memcached client.
This is a pure Python memcached client which
includes the ability to authenticate with a memcached
server using SASL.
A typical configuration using username/password::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.bmemcached',
expiration_time = 3600,
arguments = {
'url':["127.0.0.1"],
'username':'scott',
'password':'tiger'
}
)
Arguments which can be passed to the ``arguments``
dictionary include:
:param username: optional username, will be used for
SASL authentication.
:param password: optional password, will be used for
SASL authentication.
"""
def __init__(self, arguments):
self.username = arguments.get('username', None)
self.password = arguments.get('password', None)
super(BMemcachedBackend, self).__init__(arguments)
def _imports(self):
global bmemcached
import bmemcached
class RepairBMemcachedAPI(bmemcached.Client):
"""Repairs BMemcached's non-standard method
signatures, which was fixed in BMemcached
ef206ed4473fec3b639e.
"""
def add(self, key, value):
try:
return super(RepairBMemcachedAPI, self).add(key, value)
except ValueError:
return False
self.Client = RepairBMemcachedAPI
def _create_client(self):
return self.Client(self.url,
username=self.username,
password=self.password
)
def delete_multi(self, keys):
"""python-binary-memcached api does not implements delete_multi"""
for key in keys:
self.delete(key)
@@ -0,0 +1,122 @@
"""
Memory Backends
---------------
Provides simple dictionary-based backends.
The two backends are :class:`.MemoryBackend` and :class:`.MemoryPickleBackend`;
the latter applies a serialization step to cached values while the former
places the value as given into the dictionary.
"""
from dogpile.cache.api import CacheBackend, NO_VALUE
from dogpile.cache.compat import pickle
class MemoryBackend(CacheBackend):
"""A backend that uses a plain dictionary.
There is no size management, and values which
are placed into the dictionary will remain
until explicitly removed. Note that
Dogpile's expiration of items is based on
timestamps and does not remove them from
the cache.
E.g.::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.memory'
)
To use a Python dictionary of your choosing,
it can be passed in with the ``cache_dict``
argument::
my_dictionary = {}
region = make_region().configure(
'dogpile.cache.memory',
arguments={
"cache_dict":my_dictionary
}
)
"""
pickle_values = False
def __init__(self, arguments):
self._cache = arguments.pop("cache_dict", {})
def get(self, key):
value = self._cache.get(key, NO_VALUE)
if value is not NO_VALUE and self.pickle_values:
value = pickle.loads(value)
return value
def get_multi(self, keys):
ret = [self._cache.get(key, NO_VALUE)
for key in keys]
if self.pickle_values:
ret = [
pickle.loads(value)
if value is not NO_VALUE else value
for value in ret
]
return ret
def set(self, key, value):
if self.pickle_values:
value = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
self._cache[key] = value
def set_multi(self, mapping):
pickle_values = self.pickle_values
for key, value in mapping.items():
if pickle_values:
value = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
self._cache[key] = value
def delete(self, key):
self._cache.pop(key, None)
def delete_multi(self, keys):
for key in keys:
self._cache.pop(key, None)
class MemoryPickleBackend(MemoryBackend):
"""A backend that uses a plain dictionary, but serializes objects on
:meth:`.MemoryBackend.set` and deserializes :meth:`.MemoryBackend.get`.
E.g.::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.memory_pickle'
)
The usage of pickle to serialize cached values allows an object
as placed in the cache to be a copy of the original given object, so
that any subsequent changes to the given object aren't reflected
in the cached value, thus making the backend behave the same way
as other backends which make use of serialization.
The serialization is performed via pickle, and incurs the same
performance hit in doing so as that of other backends; in this way
the :class:`.MemoryPickleBackend` performance is somewhere in between
that of the pure :class:`.MemoryBackend` and the remote server oriented
backends such as that of Memcached or Redis.
Pickle behavior here is the same as that of the Redis backend, using
either ``cPickle`` or ``pickle`` and specifying ``HIGHEST_PROTOCOL``
upon serialize.
.. versionadded:: 0.5.3
"""
pickle_values = True
@@ -0,0 +1,62 @@
"""
Null Backend
-------------
The Null backend does not do any caching at all. It can be
used to test behavior without caching, or as a means of disabling
caching for a region that is otherwise used normally.
.. versionadded:: 0.5.4
"""
from dogpile.cache.api import CacheBackend, NO_VALUE
__all__ = ['NullBackend']
class NullLock(object):
def acquire(self):
pass
def release(self):
pass
class NullBackend(CacheBackend):
"""A "null" backend that effectively disables all cache operations.
Basic usage::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.null'
)
"""
def __init__(self, arguments):
pass
def get_mutex(self, key):
return NullLock()
def get(self, key):
return NO_VALUE
def get_multi(self, keys):
return [NO_VALUE for k in keys]
def set(self, key, value):
pass
def set_multi(self, mapping):
pass
def delete(self, key):
pass
def delete_multi(self, keys):
pass
@@ -0,0 +1,181 @@
"""
Redis Backends
------------------
Provides backends for talking to `Redis <http://redis.io>`_.
"""
from __future__ import absolute_import
from dogpile.cache.api import CacheBackend, NO_VALUE
from dogpile.cache.compat import pickle, u
redis = None
__all__ = 'RedisBackend',
class RedisBackend(CacheBackend):
"""A `Redis <http://redis.io/>`_ backend, using the
`redis-py <http://pypi.python.org/pypi/redis/>`_ backend.
Example configuration::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.redis',
arguments = {
'host': 'localhost',
'port': 6379,
'db': 0,
'redis_expiration_time': 60*60*2, # 2 hours
'distributed_lock':True
}
)
Arguments accepted in the arguments dictionary:
:param url: string. If provided, will override separate host/port/db
params. The format is that accepted by ``StrictRedis.from_url()``.
.. versionadded:: 0.4.1
:param host: string, default is ``localhost``.
:param password: string, default is no password.
.. versionadded:: 0.4.1
:param port: integer, default is ``6379``.
:param db: integer, default is ``0``.
:param redis_expiration_time: integer, number of seconds after setting
a value that Redis should expire it. This should be larger than dogpile's
cache expiration. By default no expiration is set.
:param distributed_lock: boolean, when True, will use a
redis-lock as the dogpile lock.
Use this when multiple
processes will be talking to the same redis instance.
When left at False, dogpile will coordinate on a regular
threading mutex.
:param lock_timeout: integer, number of seconds after acquiring a lock that
Redis should expire it. This argument is only valid when
``distributed_lock`` is ``True``.
.. versionadded:: 0.5.0
:param socket_timeout: float, seconds for socket timeout.
Default is None (no timeout).
.. versionadded:: 0.5.4
:param lock_sleep: integer, number of seconds to sleep when failed to
acquire a lock. This argument is only valid when
``distributed_lock`` is ``True``.
.. versionadded:: 0.5.0
:param connection_pool: ``redis.ConnectionPool`` object. If provided,
this object supersedes other connection arguments passed to the
``redis.StrictRedis`` instance, including url and/or host as well as
socket_timeout, and will be passed to ``redis.StrictRedis`` as the
source of connectivity.
.. versionadded:: 0.5.4
"""
def __init__(self, arguments):
self._imports()
self.url = arguments.pop('url', None)
self.host = arguments.pop('host', 'localhost')
self.password = arguments.pop('password', None)
self.port = arguments.pop('port', 6379)
self.db = arguments.pop('db', 0)
self.distributed_lock = arguments.get('distributed_lock', False)
self.socket_timeout = arguments.pop('socket_timeout', None)
self.lock_timeout = arguments.get('lock_timeout', None)
self.lock_sleep = arguments.get('lock_sleep', 0.1)
self.redis_expiration_time = arguments.pop('redis_expiration_time', 0)
self.connection_pool = arguments.get('connection_pool', None)
self.client = self._create_client()
def _imports(self):
# defer imports until backend is used
global redis
import redis
def _create_client(self):
if self.connection_pool is not None:
# the connection pool already has all other connection
# options present within, so here we disregard socket_timeout
# and others.
return redis.StrictRedis(connection_pool=self.connection_pool)
args = {}
if self.socket_timeout:
args['socket_timeout'] = self.socket_timeout
if self.url is not None:
args.update(url=self.url)
return redis.StrictRedis.from_url(**args)
else:
args.update(
host=self.host, password=self.password,
port=self.port, db=self.db
)
return redis.StrictRedis(**args)
def get_mutex(self, key):
if self.distributed_lock:
return self.client.lock(u('_lock{0}').format(key),
self.lock_timeout, self.lock_sleep)
else:
return None
def get(self, key):
value = self.client.get(key)
if value is None:
return NO_VALUE
return pickle.loads(value)
def get_multi(self, keys):
values = self.client.mget(keys)
return [pickle.loads(v) if v is not None else NO_VALUE
for v in values]
def set(self, key, value):
if self.redis_expiration_time:
self.client.setex(key, self.redis_expiration_time,
pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
else:
self.client.set(key, pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
def set_multi(self, mapping):
mapping = dict(
(k, pickle.dumps(v, pickle.HIGHEST_PROTOCOL))
for k, v in mapping.items()
)
if not self.redis_expiration_time:
self.client.mset(mapping)
else:
pipe = self.client.pipeline()
for key, value in mapping.items():
pipe.setex(key, self.redis_expiration_time, value)
pipe.execute()
def delete(self, key):
self.client.delete(key)
def delete_multi(self, keys):
self.client.delete(*keys)

Some files were not shown because too many files have changed in this diff Show More