Compare commits
53 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fc2208f9e5 | |||
| 1a4eb366bb | |||
| b89c64a2c2 | |||
| 68e8f6e753 | |||
| f15cc4cb3c | |||
| 903273e3ef | |||
| 1c9b744d31 | |||
| 7c0fb29886 | |||
| 2505a7510c | |||
| 0a66db40a2 | |||
| 6c68893979 | |||
| c512eab0b6 | |||
| 3cedd4bd0f | |||
| 0759c5e4c6 | |||
| ad6cf4be79 | |||
| 23c3899fb2 | |||
| 1a6515a660 | |||
| 58815a7650 | |||
| c15ec9fefc | |||
| 0e18d59680 | |||
| 2d88efa5b4 | |||
| b3da7572f3 | |||
| 099ec4e85d | |||
| ff88a15c61 | |||
| 839791b0fa | |||
| 159a533731 | |||
| fb5835baa4 | |||
| a3f05cd597 | |||
| f3af1672f6 | |||
| c984c9849b | |||
| e28d264125 | |||
| 7166ab9502 | |||
| ab242c2ecb | |||
| 6f829dd4c7 | |||
| 3e0602cdf0 | |||
| 67cdebfb67 | |||
| 0f87973742 | |||
| 92317f7730 | |||
| ce936c2553 | |||
| b995f16c34 | |||
| 49c7adcc40 | |||
| 88eee6fe48 | |||
| cbe425d150 | |||
| 1c7d6b7bf8 | |||
| 8323608558 | |||
| 3f8a5ec125 | |||
| 464b1695a9 | |||
| d85602612b | |||
| 59440d251b | |||
| d774f09427 | |||
| 45be650db9 | |||
| d54847803f | |||
| ce3b66eda7 |
@@ -24,12 +24,11 @@ import support
|
||||
import interface
|
||||
sys.modules["interface"] = interface
|
||||
|
||||
from subliminal.cli import MutexLock
|
||||
from subzero.constants import OS_PLEX_USERAGENT, PERSONAL_MEDIA_IDENTIFIER
|
||||
from interface.menu import *
|
||||
from support.plex_media import media_to_videos, get_media_item_ids, scan_videos
|
||||
from support.subtitlehelpers import get_subtitles_from_metadata
|
||||
from support.storage import whack_missing_parts, save_subtitles, get_subtitle_storage
|
||||
from support.storage import whack_missing_parts, save_subtitles
|
||||
from support.items import is_ignored
|
||||
from support.config import config
|
||||
from support.lib import get_intent
|
||||
@@ -43,13 +42,7 @@ def Start():
|
||||
HTTP.CacheTime = 0
|
||||
HTTP.Headers['User-agent'] = OS_PLEX_USERAGENT
|
||||
|
||||
try:
|
||||
subliminal.region.configure('dogpile.cache.dbm', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'filename': os.path.join(config.data_items_path, 'subzero.dbm'),
|
||||
'lock_factory': MutexLock})
|
||||
except:
|
||||
Log.Warn("Not using file based cache!")
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
config.init_cache()
|
||||
|
||||
# clear expired intents
|
||||
intent = get_intent()
|
||||
@@ -224,7 +217,7 @@ class SubZeroAgent(object):
|
||||
whack_missing_parts(scanned_video_part_map)
|
||||
|
||||
if downloaded_subtitles:
|
||||
save_subtitles(scanned_video_part_map, downloaded_subtitles)
|
||||
save_subtitles(scanned_video_part_map, downloaded_subtitles, mods=config.default_mods)
|
||||
track_usage("Subtitle", "refreshed", "download", 1)
|
||||
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
|
||||
@@ -18,3 +18,6 @@ sys.modules["interface.refresh_item"] = refresh_item
|
||||
|
||||
import item_details
|
||||
sys.modules["interface.item_details"] = item_details
|
||||
|
||||
import sub_mod
|
||||
sys.modules["interface.modification"] = sub_mod
|
||||
|
||||
@@ -15,7 +15,7 @@ from support.helpers import timestamp, pad_title
|
||||
from support.config import config
|
||||
from support.lib import Plex
|
||||
from support.storage import reset_storage, log_storage
|
||||
from support.background import scheduler
|
||||
from support.scheduler import scheduler
|
||||
|
||||
|
||||
@route(PREFIX + '/advanced')
|
||||
@@ -148,24 +148,34 @@ def TriggerStorageMaintenance(randomize=None):
|
||||
|
||||
@route(PREFIX + '/get_logs_link')
|
||||
def GetLogsLink():
|
||||
if not config.plex_token:
|
||||
oc = ObjectContainer(title2="Download Logs", no_cache=True, no_history=True,
|
||||
header="Sorry, feature unavailable",
|
||||
message="Universal Plex token not available")
|
||||
return oc
|
||||
|
||||
# try getting the link base via the request in context, first, otherwise use the public ip
|
||||
req_headers = Core.sandbox.context.request.headers
|
||||
get_external_ip = True
|
||||
link_base = ""
|
||||
|
||||
if "Origin" in req_headers:
|
||||
link_base = req_headers["Origin"]
|
||||
Log.Debug("Using origin-based link_base")
|
||||
get_external_ip = False
|
||||
|
||||
elif "Referer" in req_headers:
|
||||
parsed = urlparse.urlparse(req_headers["Referer"])
|
||||
link_base = "%s://%s:%s" % (parsed.scheme, parsed.hostname, parsed.port)
|
||||
Log.Debug("Using referer-based link_base")
|
||||
get_external_ip = False
|
||||
|
||||
else:
|
||||
if get_external_ip or "plex.tv" in link_base:
|
||||
ip = Core.networking.http_request("http://www.plexapp.com/ip.php", cacheTime=7200).content.strip()
|
||||
link_base = "https://%s:32400" % ip
|
||||
Log.Debug("Using ip-based fallback link_base")
|
||||
|
||||
logs_link = "%s%s?X-Plex-Token=%s" % (link_base, PREFIX + '/logs', config.universal_plex_token)
|
||||
logs_link = "%s%s?X-Plex-Token=%s" % (link_base, PREFIX + '/logs', config.plex_token)
|
||||
oc = ObjectContainer(title2="Download Logs", no_cache=True, no_history=True,
|
||||
header="Copy this link and open this in your browser, please",
|
||||
message=logs_link)
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
|
||||
from subzero.constants import PREFIX
|
||||
from sub_mod import SubtitleModificationsMenu
|
||||
from menu_helpers import debounce, SubFolderObjectContainer, default_thumb, add_ignore_options, get_item_task_data, \
|
||||
set_refresh_menu_state
|
||||
|
||||
from refresh_item import RefreshItem
|
||||
from support.helpers import timestamp, cast_bool, df, get_language
|
||||
from support.items import get_item_kind_from_rating_key, get_item
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.lib import Plex
|
||||
from support.storage import get_subtitle_storage
|
||||
from subzero.constants import PREFIX
|
||||
from support.config import config
|
||||
from support.background import scheduler
|
||||
from support.helpers import timestamp, cast_bool, df, get_language
|
||||
from support.items import get_item_kind_from_rating_key, get_item, get_current_sub
|
||||
from support.lib import Plex
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.scheduler import scheduler
|
||||
from support.storage import get_subtitle_storage
|
||||
|
||||
|
||||
@route(PREFIX + '/item/{rating_key}/actions')
|
||||
@@ -89,12 +91,12 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
current_sub.score, current_sub.storage_type)
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, current_id=current_sub_id,
|
||||
key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
|
||||
item_title=item_title, language=lang, language_name=lang.name, current_id=current_sub_id,
|
||||
item_type=plex_item.type, filename=filename, current_data=summary,
|
||||
randomize=timestamp(), current_provider=current_sub_provider_name,
|
||||
current_score=current_score),
|
||||
title=u"List %s subtitles" % lang.name,
|
||||
title=u"Actions for %s subtitle" % lang.name,
|
||||
summary=summary
|
||||
))
|
||||
|
||||
@@ -103,10 +105,43 @@ def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, ra
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/current_sub/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleOptionsMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
|
||||
title=kwargs["title"], randomize=timestamp()),
|
||||
title=u"Back to: %s" % kwargs["title"],
|
||||
summary=kwargs["current_data"],
|
||||
thumb=default_thumb
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(ListAvailableSubsForItemMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"List %s subtitles" % kwargs["language_name"],
|
||||
summary=kwargs["current_data"]
|
||||
))
|
||||
if current_sub:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleModificationsMenu, randomize=timestamp(), **kwargs),
|
||||
title=u"Modify %s subtitle" % kwargs["language_name"],
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
))
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/search/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def ListAvailableSubsForItemMenu(rating_key=None, part_id=None, title=None, item_title=None, filename=None,
|
||||
item_type="episode", language=None, force=False, current_id=None, current_data=None,
|
||||
item_type="episode", language=None, language_name=None, force=False, current_id=None,
|
||||
current_data=None,
|
||||
current_provider=None, current_score=None, randomize=None):
|
||||
assert rating_key, part_id
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
from subzero.constants import PREFIX, TITLE, ART
|
||||
from support.config import config
|
||||
from support.helpers import pad_title, timestamp, df
|
||||
from support.background import scheduler
|
||||
from support.scheduler import scheduler
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_item_thumb, get_on_deck_items, get_all_items, get_items_info
|
||||
from menu_helpers import main_icon, debounce, SubFolderObjectContainer, default_thumb, dig_tree, add_ignore_options
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# coding=utf-8
|
||||
import logging
|
||||
import os
|
||||
|
||||
import logger
|
||||
|
||||
from item_details import ItemDetailsMenu
|
||||
@@ -9,9 +11,9 @@ from menu_helpers import add_ignore_options, dig_tree, set_refresh_menu_state, \
|
||||
from main import fatality, IgnoreMenu
|
||||
from advanced import DispatchRestart
|
||||
from subzero.constants import ART, PREFIX, DEPENDENCY_MODULE_NAMES
|
||||
from support.background import scheduler
|
||||
from support.scheduler import scheduler
|
||||
from support.config import config
|
||||
from support.helpers import timestamp, df
|
||||
from support.helpers import timestamp, df
|
||||
from support.ignore import ignore_list
|
||||
from support.items import get_all_items, get_items_info, \
|
||||
get_item_kind_from_rating_key
|
||||
@@ -147,7 +149,6 @@ def RefreshMissing(randomize=None):
|
||||
@route(PREFIX + '/ValidatePrefs', enforce_route=True)
|
||||
def ValidatePrefs():
|
||||
Core.log.setLevel(logging.DEBUG)
|
||||
Log.Debug("Validate Prefs called.")
|
||||
|
||||
# cache the channel state
|
||||
update_dict = False
|
||||
@@ -182,9 +183,44 @@ def ValidatePrefs():
|
||||
Core.log.removeHandler(logger.console_handler)
|
||||
Log.Debug("Stop logging to console")
|
||||
|
||||
Log.Debug("Validate Prefs called.")
|
||||
|
||||
# SZ config debug
|
||||
Log.Debug("--- SZ Config-Debug ---")
|
||||
for attr in [
|
||||
"app_support_path", "data_path", "data_items_path", "enable_agent",
|
||||
"enable_channel", "permissions_ok", "missing_permissions", "fs_encoding"]:
|
||||
Log.Debug("config.%s: %s", attr, getattr(config, attr))
|
||||
|
||||
for attr in ["plugin_log_path", "server_log_path"]:
|
||||
value = getattr(config, attr)
|
||||
access = os.access(value, os.R_OK)
|
||||
if Core.runtime.os == "Windows":
|
||||
try:
|
||||
f = open(value, "r")
|
||||
f.read(1)
|
||||
f.close()
|
||||
except:
|
||||
access = False
|
||||
|
||||
Log.Debug("config.%s: %s (accessible: %s)", attr, value, access)
|
||||
|
||||
# fixme: check existance of and os access of logs
|
||||
Log.Debug("Platform: %s", Core.runtime.platform)
|
||||
Log.Debug("OS: %s", Core.runtime.os)
|
||||
Log.Debug("----- Environment -----")
|
||||
for key, value in os.environ.iteritems():
|
||||
if key.startswith("PLEX"):
|
||||
if "TOKEN" in key:
|
||||
outval = "xxxxxxxxxxxxxxxxxxx"
|
||||
|
||||
else:
|
||||
outval = value
|
||||
Log.Debug("%s: %s", key, outval)
|
||||
Log.Debug("-----------------------")
|
||||
|
||||
Log.Debug("Setting log-level to %s", Prefs["log_level"])
|
||||
logger.register_logging_handler(DEPENDENCY_MODULE_NAMES, level=Prefs["log_level"])
|
||||
Core.log.setLevel(logging.getLevelName(Prefs["log_level"]))
|
||||
|
||||
return
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ from support.ignore import ignore_list
|
||||
from support.lib import get_intent
|
||||
from support.config import config
|
||||
from subzero.constants import ICON_SUB, ICON
|
||||
from support.background import scheduler
|
||||
from support.scheduler import scheduler
|
||||
|
||||
default_thumb = R(ICON_SUB)
|
||||
main_icon = ICON if not config.is_development else "icon-dev.jpg"
|
||||
|
||||
@@ -0,0 +1,231 @@
|
||||
# coding=utf-8
|
||||
|
||||
import traceback
|
||||
import types
|
||||
|
||||
from babelfish import Language
|
||||
|
||||
from menu_helpers import debounce, SubFolderObjectContainer
|
||||
from subliminal_patch import PatchedSubtitle as Subtitle
|
||||
from subzero.modification import registry as mod_registry, SubtitleModifications
|
||||
from subzero.constants import PREFIX
|
||||
from support.plex_media import get_plex_metadata, scan_videos
|
||||
from support.storage import save_subtitles
|
||||
from support.helpers import timestamp, pad_title
|
||||
from support.items import get_current_sub
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleModificationsMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
for identifier, mod in mod_registry.mods.iteritems():
|
||||
if mod.advanced:
|
||||
continue
|
||||
|
||||
if mod.exclusive and identifier in current_sub.mods:
|
||||
continue
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=identifier, mode="add", randomize=timestamp(), **kwargs),
|
||||
title=pad_title(mod.description), summary=mod.long_description or ""
|
||||
))
|
||||
|
||||
fps_mod = SubtitleModifications.get_mod_class("change_FPS")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleFPSModMenu, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(fps_mod.description), summary=fps_mod.long_description or ""
|
||||
))
|
||||
|
||||
shift_mod = SubtitleModifications.get_mod_class("shift_offset")
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
|
||||
title=pad_title(shift_mod.description), summary=shift_mod.long_description or ""
|
||||
))
|
||||
|
||||
if current_sub.mods:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=None, mode="remove_last", randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Remove last applied mod (%s)" % current_sub.mods[-1]),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
))
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleListMods, randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Manage applied mods"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_sub.mods))
|
||||
))
|
||||
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=None, mode="clear", randomize=timestamp(), **kwargs),
|
||||
title=pad_title("Restore original version"),
|
||||
summary=u"Currently applied mods: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_mod_fps/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleFPSModMenu(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
item_type = kwargs["item_type"]
|
||||
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
target_fps = plex_part.fps
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
for fps in ["23.976", "24.000", "25.000", "29.970", "30.000", "50.000", "59.940", "60.000"]:
|
||||
if float(fps) == float(target_fps):
|
||||
continue
|
||||
|
||||
if float(fps) > float(target_fps):
|
||||
indicator = "subs constantly getting faster"
|
||||
else:
|
||||
indicator = "subs constantly getting slower"
|
||||
|
||||
mod_ident = SubtitleModifications.get_mod_signature("change_FPS", **{"from": fps, "to": target_fps})
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
|
||||
title="%s fps -> %s fps (%s)" % (fps, target_fps, indicator)
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
POSSIBLE_UNITS = (("ms", "milliseconds"), ("s", "seconds"), ("m", "minutes"), ("h", "hours"))
|
||||
POSSIBLE_UNITS_D = dict(POSSIBLE_UNITS)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_mod_shift_unit/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleShiftModUnitMenu(**kwargs):
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
for unit, title in POSSIBLE_UNITS:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleShiftModMenu, unit=unit, randomize=timestamp(), **kwargs),
|
||||
title="Adjust by %s" % title
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_mod_shift/{rating_key}/{part_id}/{unit}', force=bool)
|
||||
@debounce
|
||||
def SubtitleShiftModMenu(unit=None, **kwargs):
|
||||
if unit not in POSSIBLE_UNITS_D:
|
||||
raise NotImplementedError
|
||||
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
rng = []
|
||||
if unit == "h":
|
||||
rng = range(-10, 11)
|
||||
elif unit in ("m", "s"):
|
||||
rng = range(-59, 60)
|
||||
elif unit == "ms":
|
||||
rng = range(-900, 1000, 100)
|
||||
|
||||
for i in rng:
|
||||
if i == 0:
|
||||
continue
|
||||
|
||||
mod_ident = SubtitleModifications.get_mod_signature("shift_offset", **{unit: i})
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=mod_ident, mode="add", randomize=timestamp(), **kwargs),
|
||||
title="%s %s" % (("%s" if i < 0 else "+%s") % i, unit)
|
||||
))
|
||||
|
||||
return oc
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_set_mods/{rating_key}/{part_id}/{mods}/{mode}', force=bool)
|
||||
@debounce
|
||||
def SubtitleSetMods(mods=None, mode=None, **kwargs):
|
||||
if not isinstance(mods, types.ListType) and mods:
|
||||
mods = [mods]
|
||||
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
lang_a2 = kwargs["language"]
|
||||
item_type = kwargs["item_type"]
|
||||
|
||||
language = Language.fromietf(lang_a2)
|
||||
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
if mode == "add":
|
||||
for mod in mods:
|
||||
identifier, args = SubtitleModifications.parse_identifier(mod)
|
||||
if identifier not in mod_registry.mods_available:
|
||||
raise NotImplementedError("Mod unknown or not registered")
|
||||
|
||||
current_sub.add_mod(mod)
|
||||
elif mode == "clear":
|
||||
current_sub.add_mod(None)
|
||||
elif mode == "remove":
|
||||
for mod in mods:
|
||||
current_sub.mods.remove(mod)
|
||||
|
||||
elif mode == "remove_last":
|
||||
if current_sub.mods:
|
||||
current_sub.mods.pop()
|
||||
else:
|
||||
raise NotImplementedError("Wrong mode given")
|
||||
storage.save(stored_subs)
|
||||
|
||||
metadata = get_plex_metadata(rating_key, part_id, item_type)
|
||||
scanned_parts = scan_videos([metadata], kind="series" if item_type == "episode" else "movie", ignore_all=True)
|
||||
video, plex_part = scanned_parts.items()[0]
|
||||
|
||||
subtitle = Subtitle(language, mods=current_sub.mods)
|
||||
subtitle.content = current_sub.content
|
||||
subtitle.plex_media_fps = plex_part.fps
|
||||
subtitle.page_link = "modify subtitles with: %s" % (", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
subtitle.language = language
|
||||
|
||||
try:
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode="m", bare_save=True)
|
||||
Log.Debug("Modified %s subtitle for: %s:%s with: %s", language.name, rating_key, part_id,
|
||||
", ".join(current_sub.mods) if current_sub.mods else "none")
|
||||
except:
|
||||
Log.Error("Something went wrong when modifying subtitle: %s", traceback.format_exc())
|
||||
|
||||
kwargs.pop("randomize")
|
||||
return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
|
||||
|
||||
|
||||
@route(PREFIX + '/item/sub_list_mods/{rating_key}/{part_id}', force=bool)
|
||||
@debounce
|
||||
def SubtitleListMods(**kwargs):
|
||||
rating_key = kwargs["rating_key"]
|
||||
part_id = kwargs["part_id"]
|
||||
language = kwargs["language"]
|
||||
current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
|
||||
|
||||
kwargs.pop("randomize")
|
||||
|
||||
oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)
|
||||
for identifier in current_sub.mods:
|
||||
oc.add(DirectoryObject(
|
||||
key=Callback(SubtitleSetMods, mods=identifier, mode="remove", randomize=timestamp(), **kwargs),
|
||||
title="Remove: %s" % identifier
|
||||
))
|
||||
|
||||
return oc
|
||||
@@ -32,9 +32,9 @@ import missing_subtitles
|
||||
|
||||
sys.modules["support.missing_subtitles"] = missing_subtitles
|
||||
|
||||
import background
|
||||
import scheduler
|
||||
|
||||
sys.modules["support.background"] = background
|
||||
sys.modules["support.scheduler"] = scheduler
|
||||
|
||||
import tasks
|
||||
|
||||
|
||||
@@ -11,9 +11,9 @@ class PlexActivityManager(object):
|
||||
def start(self):
|
||||
activity_sources_enabled = None
|
||||
|
||||
if config.universal_plex_token:
|
||||
if config.plex_token:
|
||||
from plex import Plex
|
||||
Plex.configuration.defaults.authentication(config.universal_plex_token)
|
||||
Plex.configuration.defaults.authentication(config.plex_token)
|
||||
activity_sources_enabled = ["websocket"]
|
||||
Activity.on('websocket.playing', self.on_playing)
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ import datetime
|
||||
import subliminal
|
||||
import subliminal_patch
|
||||
from babelfish import Language
|
||||
from subliminal.cli import MutexLock
|
||||
from subzero.lib.io import FileIO, get_viable_encoding
|
||||
from subzero.constants import PLUGIN_NAME, PLUGIN_IDENTIFIER, MOVIE, SHOW
|
||||
from lib import Plex
|
||||
@@ -45,6 +46,7 @@ class Config(object):
|
||||
data_path = None
|
||||
data_items_path = None
|
||||
universal_plex_token = None
|
||||
plex_token = None
|
||||
is_development = False
|
||||
|
||||
enable_channel = True
|
||||
@@ -67,12 +69,15 @@ class Config(object):
|
||||
notify_executable = None
|
||||
sections = None
|
||||
enabled_sections = None
|
||||
remove_hi = False
|
||||
fix_ocr = False
|
||||
enforce_encoding = False
|
||||
chmod = None
|
||||
forced_only = False
|
||||
exotic_ext = False
|
||||
treat_und_as_first = False
|
||||
ext_match_strictness = False
|
||||
default_mods = None
|
||||
use_activities = False
|
||||
activity_mode = None
|
||||
|
||||
@@ -89,6 +94,7 @@ class Config(object):
|
||||
self.data_path = getattr(Data, "_core").storage.data_path
|
||||
self.data_items_path = os.path.join(self.data_path, "DataItems")
|
||||
self.universal_plex_token = self.get_universal_plex_token()
|
||||
self.plex_token = os.environ.get("PLEXTOKEN", self.universal_plex_token)
|
||||
|
||||
self.set_plugin_mode()
|
||||
self.set_plugin_lock()
|
||||
@@ -106,14 +112,32 @@ class Config(object):
|
||||
self.enabled_sections = self.check_enabled_sections()
|
||||
self.permissions_ok = self.check_permissions()
|
||||
self.notify_executable = self.check_notify_executable()
|
||||
self.remove_hi = cast_bool(Prefs['subtitles.remove_hi'])
|
||||
self.fix_ocr = cast_bool(Prefs['subtitles.fix_ocr'])
|
||||
self.enforce_encoding = cast_bool(Prefs['subtitles.enforce_encoding'])
|
||||
self.chmod = self.check_chmod()
|
||||
self.forced_only = cast_bool(Prefs["subtitles.only_foreign"])
|
||||
self.exotic_ext = cast_bool(Prefs["subtitles.scan.exotic_ext"])
|
||||
self.treat_und_as_first = cast_bool(Prefs["subtitles.language.treat_und_as_first"])
|
||||
self.ext_match_strictness = self.determine_ext_sub_strictness()
|
||||
self.default_mods = self.get_default_mods()
|
||||
self.initialized = True
|
||||
|
||||
def init_cache(self):
|
||||
use_fallback_cache = True
|
||||
if Core.runtime.os != "Windows":
|
||||
try:
|
||||
subliminal.region.configure('dogpile.cache.dbm', expiration_time=datetime.timedelta(days=30),
|
||||
arguments={'filename': os.path.join(config.data_items_path, 'subzero.dbm'),
|
||||
'lock_factory': MutexLock})
|
||||
use_fallback_cache = False
|
||||
except:
|
||||
pass
|
||||
|
||||
if use_fallback_cache:
|
||||
Log.Warn("Not using file based cache!")
|
||||
subliminal.region.configure('dogpile.cache.memory')
|
||||
|
||||
def set_log_paths(self):
|
||||
# find log handler
|
||||
for handler in Core.log.handlers:
|
||||
@@ -140,6 +164,8 @@ class Config(object):
|
||||
else:
|
||||
Log("Did NOT find Preferences file - please check logfile and hierarchy. Aborting!")
|
||||
|
||||
# fixme: windows
|
||||
|
||||
def set_plugin_mode(self):
|
||||
if Prefs["plugin_mode"] == "only agent":
|
||||
self.enable_channel = False
|
||||
@@ -404,6 +430,15 @@ class Config(object):
|
||||
return "loose"
|
||||
return "strict"
|
||||
|
||||
def get_default_mods(self):
|
||||
mods = []
|
||||
if self.remove_hi:
|
||||
mods.append("remove_HI")
|
||||
if self.fix_ocr:
|
||||
mods.append("OCR_fixes")
|
||||
|
||||
return mods
|
||||
|
||||
def set_activity_modes(self):
|
||||
val = Prefs["activity.on_playback"]
|
||||
if val == "never":
|
||||
|
||||
@@ -110,9 +110,9 @@ def str_pad(s, length, align='left', pad_char=' ', trim=False):
|
||||
raise ValueError("Unknown align type, expected either 'left' or 'right'")
|
||||
|
||||
|
||||
def pad_title(value):
|
||||
def pad_title(value, width=49):
|
||||
"""Pad a title to 30 characters to force the 'details' view."""
|
||||
return str_pad(value, 49, pad_char=' ')
|
||||
return str_pad(value, width, pad_char=' ')
|
||||
|
||||
|
||||
def get_plex_item_display_title(item, kind, parent=None, parent_title=None, section_title=None,
|
||||
|
||||
@@ -283,3 +283,13 @@ def refresh_item(rating_key, force=False, timeout=8000, refresh_kind=None, paren
|
||||
for key in refresh:
|
||||
Log.Info("%s item %s", "Refreshing" if not force else "Forced-refreshing", key)
|
||||
Plex["library/metadata"].refresh(key)
|
||||
|
||||
|
||||
def get_current_sub(rating_key, part_id, language):
|
||||
from support.storage import get_subtitle_storage
|
||||
|
||||
item = get_item(rating_key)
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
stored_subs = subtitle_storage.load_or_new(item)
|
||||
current_sub = stored_subs.get_any(part_id, language)
|
||||
return current_sub, stored_subs, subtitle_storage
|
||||
Executable → Regular
@@ -6,16 +6,17 @@ import pprint
|
||||
import copy
|
||||
|
||||
import subliminal
|
||||
from items import get_item
|
||||
from subzero.subtitle_storage import StoredSubtitlesManager
|
||||
|
||||
from subtitlehelpers import force_utf8
|
||||
from config import config
|
||||
from helpers import notify_executable, get_title_for_video_metadata, cast_bool, force_unicode
|
||||
from plex_media import PMSMediaProxy
|
||||
from support.items import get_item
|
||||
|
||||
|
||||
get_subtitle_storage = lambda: StoredSubtitlesManager(Data, get_item)
|
||||
def get_subtitle_storage():
|
||||
return StoredSubtitlesManager(Data, get_item)
|
||||
|
||||
|
||||
def whack_missing_parts(scanned_video_part_map, existing_parts=None):
|
||||
@@ -156,9 +157,29 @@ def save_subtitles_to_metadata(videos, subtitles):
|
||||
return True
|
||||
|
||||
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a"):
|
||||
def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a", bare_save=False, mods=None):
|
||||
"""
|
||||
|
||||
:param scanned_video_part_map:
|
||||
:param downloaded_subtitles:
|
||||
:param mode:
|
||||
:param bare_save: don't trigger anything; don't store information
|
||||
:param mods: enabled mods
|
||||
:return:
|
||||
"""
|
||||
meta_fallback = False
|
||||
save_successful = False
|
||||
|
||||
if mods:
|
||||
for video, video_subtitles in downloaded_subtitles.items():
|
||||
if not video_subtitles:
|
||||
continue
|
||||
|
||||
for subtitle in video_subtitles:
|
||||
Log.Info("Applying mods: %s to %s", mods, subtitle)
|
||||
subtitle.mods = mods
|
||||
subtitle.plex_media_fps = video.fps
|
||||
|
||||
storage = "metadata"
|
||||
if Prefs['subtitles.save.filesystem']:
|
||||
storage = "filesystem"
|
||||
@@ -180,8 +201,9 @@ def save_subtitles(scanned_video_part_map, downloaded_subtitles, mode="a"):
|
||||
Log.Debug("Using metadata as subtitle storage")
|
||||
save_successful = save_subtitles_to_metadata(scanned_video_part_map, downloaded_subtitles)
|
||||
|
||||
if save_successful and config.notify_executable:
|
||||
if not bare_save and save_successful and config.notify_executable:
|
||||
notify_executable(config.notify_executable, scanned_video_part_map, downloaded_subtitles, storage)
|
||||
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode)
|
||||
if not bare_save:
|
||||
store_subtitle_info(scanned_video_part_map, downloaded_subtitles, storage, mode=mode)
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ from subliminal import list_subtitles as list_all_subtitles
|
||||
from babelfish import Language
|
||||
|
||||
from missing_subtitles import items_get_all_missing_subs, refresh_item
|
||||
from background import scheduler
|
||||
from scheduler import scheduler
|
||||
from storage import save_subtitles, whack_missing_parts, get_subtitle_storage
|
||||
from support.config import config
|
||||
from support.items import get_recent_items, is_ignored, get_item
|
||||
@@ -80,6 +80,7 @@ class Task(object):
|
||||
return
|
||||
|
||||
def run(self):
|
||||
Log.Info(u"Task: running: %s", self.name)
|
||||
self.time_start = datetime.datetime.now()
|
||||
|
||||
def post_run(self, data_holder):
|
||||
@@ -88,6 +89,7 @@ class Task(object):
|
||||
if self.time_start:
|
||||
self.last_run_time = self.last_run - self.time_start
|
||||
self.time_start = None
|
||||
Log.Info(u"Task: ran: %s", self.name)
|
||||
|
||||
|
||||
class SearchAllRecentlyAddedMissing(Task):
|
||||
@@ -248,7 +250,7 @@ class DownloadSubtitleMixin(object):
|
||||
if subtitle.content:
|
||||
try:
|
||||
whack_missing_parts(scanned_parts)
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode=mode)
|
||||
save_subtitles(scanned_parts, {video: [subtitle]}, mode=mode, mods=config.default_mods)
|
||||
Log.Debug("Manually downloaded subtitle for: %s", rating_key)
|
||||
download_successful = True
|
||||
refresh_item(rating_key)
|
||||
@@ -362,13 +364,21 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
return
|
||||
|
||||
now = datetime.datetime.now()
|
||||
min_score_series = int(Prefs["subtitles.search.minimumTVScore2"].strip())
|
||||
min_score_movies = int(Prefs["subtitles.search.minimumMovieScore2"].strip())
|
||||
|
||||
subtitle_storage = get_subtitle_storage()
|
||||
recent_subs = subtitle_storage.load_recent_files(age_days=max_search_days)
|
||||
|
||||
for fn, stored_subs in recent_subs.iteritems():
|
||||
video_id = stored_subs.video_id
|
||||
cutoff = self.series_cutoff if stored_subs.item_type == "episode" else self.movies_cutoff
|
||||
|
||||
if stored_subs.item_type == "episode":
|
||||
cutoff = self.series_cutoff
|
||||
min_score = min_score_series
|
||||
else:
|
||||
cutoff = self.movies_cutoff
|
||||
min_score = min_score_movies
|
||||
|
||||
# don't search for better subtitles until at least 30 minutes have passed
|
||||
if stored_subs.added_at + datetime.timedelta(minutes=30) > now:
|
||||
@@ -420,7 +430,7 @@ class FindBetterSubtitles(DownloadSubtitleMixin, SubtitleListingMixin, Task):
|
||||
better_downloaded = False
|
||||
better_tried_download = 0
|
||||
for sub in subs:
|
||||
if sub.score > current_score:
|
||||
if sub.score > current_score and sub.score > min_score:
|
||||
Log.Debug("Better subtitle found for %s, downloading", video_id)
|
||||
better_tried_download += 1
|
||||
ret = self.download_subtitle(sub, video_id, mode="b")
|
||||
|
||||
@@ -395,6 +395,18 @@
|
||||
],
|
||||
"default": "don't prefer"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.remove_hi",
|
||||
"label": "Remove Hearing Impaired tags from downloaded subtitles",
|
||||
"type": "bool",
|
||||
"default": "false"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.fix_ocr",
|
||||
"label": "Fix common OCR errors in downloaded subtitles",
|
||||
"type": "bool",
|
||||
"default": "true"
|
||||
},
|
||||
{
|
||||
"id": "subtitles.enforce_encoding",
|
||||
"label": "Normalize subtitle encoding to UTF-8",
|
||||
|
||||
+2
-2
@@ -13,7 +13,7 @@
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>2.0.0.9</string>
|
||||
<string>2.0.0.13</string>
|
||||
<key>PlexFrameworkVersion</key>
|
||||
<string>2</string>
|
||||
<key>PlexPluginClass</key>
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
<h1>Sub-Zero for Plex</h1><i>Subtitles done right</i>
|
||||
|
||||
Version 2.0.0.9 DEV
|
||||
Version 2.0.0.13 DEV
|
||||
|
||||
Originally based on @bramwalet's awesome <a href="https://github.com/bramwalet/Subliminal.bundle">Subliminal.bundle</a>
|
||||
|
||||
|
||||
@@ -369,7 +369,8 @@ class Chapter(object):
|
||||
if chapterdisplays:
|
||||
string = chapterdisplays[0].get('ChapString')
|
||||
language = chapterdisplays[0].get('ChapLanguage')
|
||||
return cls(start, hidden, enabled, end, string, language)
|
||||
return cls(start, hidden, enabled, end, string, language)
|
||||
return cls(start, hidden, enabled, end)
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s [%s, enabled=%s]>' % (self.__class__.__name__, self.start, self.enabled)
|
||||
|
||||
@@ -515,7 +515,7 @@ def save_subtitles(video, subtitles, single=False, directory=None, encoding=None
|
||||
|
||||
# save normalized subtitle if encoder or no encoding is given
|
||||
if has_encoder or encoding is None:
|
||||
content = encode_with(subtitle.text) if has_encoder else subtitle.content
|
||||
content = encode_with(subtitle.get_modified_text()) if has_encoder else subtitle.get_modified_content()
|
||||
with io.open(subtitle_path, 'wb') as f:
|
||||
f.write(content)
|
||||
|
||||
@@ -530,7 +530,7 @@ def save_subtitles(video, subtitles, single=False, directory=None, encoding=None
|
||||
# save subtitle if encoding given
|
||||
if encoding is not None:
|
||||
with io.open(subtitle_path, 'w', encoding=encoding) as f:
|
||||
f.write(subtitle.text)
|
||||
f.write(subtitle.get_modified_text())
|
||||
|
||||
# change chmod if requested
|
||||
if chmod:
|
||||
|
||||
@@ -74,7 +74,7 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
|
||||
logger.info("Only searching for foreign/forced subtitles")
|
||||
|
||||
super(OpenSubtitlesProvider, self).__init__()
|
||||
self.server = ServerProxy('https://api.opensubtitles.org/xml-rpc', TimeoutSafeTransport(10))
|
||||
self.server = ServerProxy('https://api.opensubtitles.org/xml-rpc', TimeoutSafeTransport(4))
|
||||
|
||||
def initialize(self):
|
||||
logger.info('Logging in')
|
||||
|
||||
@@ -7,6 +7,7 @@ import chardet
|
||||
import pysrt
|
||||
import pysubs2
|
||||
from bs4 import UnicodeDammit
|
||||
from subzero.modification import SubtitleModifications
|
||||
from subliminal import Subtitle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -17,6 +18,13 @@ class PatchedSubtitle(Subtitle):
|
||||
release_info = None
|
||||
matches = None
|
||||
hash_verifiable = False
|
||||
mods = None
|
||||
plex_media_fps = None
|
||||
|
||||
def __init__(self, language, hearing_impaired=False, page_link=None, encoding=None, mods=None):
|
||||
super(PatchedSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link,
|
||||
encoding=encoding)
|
||||
self.mods = mods
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s %r [%s]>' % (
|
||||
@@ -126,4 +134,26 @@ class PatchedSubtitle(Subtitle):
|
||||
logger.exception("Couldn't convert subtitle %s to .srt format", self)
|
||||
return False
|
||||
|
||||
return True
|
||||
return True
|
||||
|
||||
def get_modified_content(self):
|
||||
"""
|
||||
:return: string
|
||||
"""
|
||||
if not self.mods:
|
||||
return self.content
|
||||
|
||||
encoding = self.guess_encoding()
|
||||
|
||||
submods = SubtitleModifications()
|
||||
submods.load(content=self.text, language=self.language)
|
||||
submods.modify(*self.mods)
|
||||
return submods.to_string("srt", encoding=encoding).encode(encoding=encoding)
|
||||
|
||||
def get_modified_text(self):
|
||||
"""
|
||||
:return: unicode
|
||||
"""
|
||||
content = self.get_modified_content()
|
||||
encoding = self.guess_encoding()
|
||||
return content.decode(encoding=encoding)
|
||||
|
||||
@@ -1,8 +1,24 @@
|
||||
# coding=utf-8
|
||||
|
||||
from subzero.modification import SubMod, HearingImpaired
|
||||
import logging
|
||||
import sys
|
||||
|
||||
submod = SubMod("test.srt")
|
||||
submod.modify(HearingImpaired)
|
||||
from babelfish import Language
|
||||
|
||||
print submod.f.to_string("srt")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from subzero.modification import SubMod
|
||||
|
||||
fn = sys.argv[1]
|
||||
debug = "--debug" in sys.argv
|
||||
|
||||
if debug:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
submod = SubMod(debug=debug)
|
||||
submod.load(fn, language=Language.fromietf("eng"))
|
||||
#submod.modify("remove_HI", "OCR_fixes")
|
||||
#submod.modify("OCR_fixes")
|
||||
submod.modify("change_FPS(from=24,to=25)")
|
||||
|
||||
#print submod.f.to_string("srt")
|
||||
|
||||
@@ -1,167 +0,0 @@
|
||||
# coding=utf-8
|
||||
|
||||
import re
|
||||
import traceback
|
||||
|
||||
import pysubs2
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SubtitleModifications(object):
|
||||
def __init__(self, fn=None, content=None, fps=None):
|
||||
try:
|
||||
if fn:
|
||||
self.f = pysubs2.load(fn, fps=fps)
|
||||
elif content:
|
||||
self.f = pysubs2.SSAFile.from_string(content, fps=fps)
|
||||
except (IOError,
|
||||
UnicodeDecodeError,
|
||||
pysubs2.exceptions.UnknownFPSError,
|
||||
pysubs2.exceptions.UnknownFormatIdentifierError,
|
||||
pysubs2.exceptions.FormatAutodetectionError):
|
||||
if fn:
|
||||
logger.exception("Couldn't load subtitle: %s: %s", fn, traceback.format_exc())
|
||||
elif content:
|
||||
logger.exception("Couldn't load subtitle: %s", traceback.format_exc())
|
||||
|
||||
def modify(self, *mods):
|
||||
new_f = []
|
||||
for line in self.f:
|
||||
for mod in mods:
|
||||
new_content = mod.modify(line.text)
|
||||
if not new_content:
|
||||
logger.debug("deleting %s", line)
|
||||
continue
|
||||
|
||||
line.text = new_content
|
||||
new_f.append(line)
|
||||
|
||||
self.f.events = new_f
|
||||
|
||||
def to_string(self, format="srt"):
|
||||
return self.f.to_string(format)
|
||||
|
||||
def save(self, fn):
|
||||
self.f.save(fn)
|
||||
|
||||
|
||||
SubMod = SubtitleModifications
|
||||
|
||||
|
||||
class Processor(object):
|
||||
"""
|
||||
Processor base class
|
||||
"""
|
||||
|
||||
def process(self, content):
|
||||
return content
|
||||
|
||||
|
||||
class StringProcessor(Processor):
|
||||
"""
|
||||
String replacement processor base
|
||||
"""
|
||||
|
||||
def __init__(self, search, replace):
|
||||
self.search = search
|
||||
self.replace = replace
|
||||
|
||||
def process(self, content):
|
||||
return content.replace(self.search, self.replace)
|
||||
|
||||
|
||||
class ReProcessor(Processor):
|
||||
"""
|
||||
Regex processor
|
||||
"""
|
||||
pattern = None
|
||||
replace_with = None
|
||||
|
||||
def __init__(self, pattern, replace_with):
|
||||
self.pattern = pattern
|
||||
self.replace_with = replace_with
|
||||
|
||||
def process(self, content):
|
||||
return self.pattern.sub(self.replace_with, content)
|
||||
|
||||
|
||||
class NReProcessor(ReProcessor):
|
||||
"""
|
||||
Single line regex processor
|
||||
"""
|
||||
|
||||
def process(self, content):
|
||||
lines = []
|
||||
for line in content.split(r"\N"):
|
||||
a = super(NReProcessor, self).process(line)
|
||||
if not a:
|
||||
continue
|
||||
lines.append(a)
|
||||
return r"\N".join(lines)
|
||||
|
||||
|
||||
class SubtitleModification(object):
|
||||
pre_processors = []
|
||||
processors = []
|
||||
post_processors = []
|
||||
|
||||
@classmethod
|
||||
def _process(cls, content, processors):
|
||||
if not content:
|
||||
return
|
||||
|
||||
new_content = content
|
||||
for processor in processors:
|
||||
new_content = processor.process(new_content)
|
||||
return new_content
|
||||
|
||||
@classmethod
|
||||
def pre_process(cls, content):
|
||||
return cls._process(content, cls.pre_processors)
|
||||
|
||||
@classmethod
|
||||
def process(cls, content):
|
||||
return cls._process(content, cls.processors)
|
||||
|
||||
@classmethod
|
||||
def post_process(cls, content):
|
||||
return cls._process(content, cls.post_processors)
|
||||
|
||||
@classmethod
|
||||
def modify(cls, content):
|
||||
new_content = content
|
||||
for method in ("pre_process", "process", "post_process"):
|
||||
new_content = getattr(cls, method)(new_content)
|
||||
|
||||
return new_content
|
||||
|
||||
|
||||
class SubtitleTextModification(SubtitleModification):
|
||||
post_processors = [
|
||||
# empty tag
|
||||
ReProcessor(re.compile(r'(<[A-z]+[^>]*>)[\s.,-_!?]+(</[A-z]>)'), ""),
|
||||
|
||||
# empty line (needed?)
|
||||
ReProcessor(re.compile(r'(?m)^\s+$'), ""),
|
||||
|
||||
# empty dash line (needed?)
|
||||
ReProcessor(re.compile(r'(?m)(^[\s]*[\-]+[\s]*)$'), ""),
|
||||
|
||||
# clean whitespace at start and end
|
||||
ReProcessor(re.compile(r'^\s*([^\s]+)\s*$'), r"\1"),
|
||||
]
|
||||
|
||||
|
||||
class HearingImpaired(SubtitleTextModification):
|
||||
processors = [
|
||||
# brackets
|
||||
ReProcessor(re.compile(r'(?sux)[([{].+[)\]}]'), ""),
|
||||
|
||||
# text before colon (and possible dash in front)
|
||||
ReProcessor(re.compile(r'(?mu)(^[A-z\-]+[\w\s]*:[^0-9{2}][\s]*)'), ""),
|
||||
|
||||
# all caps line (at least 3 chars
|
||||
NReProcessor(re.compile(r'(?mu)(^[A-Z\.\-_]{3,}$)'), ""),
|
||||
]
|
||||
@@ -0,0 +1,5 @@
|
||||
# coding=utf-8
|
||||
|
||||
from registry import registry
|
||||
from mods import hearing_impaired, ocr_fixes, fps, offset
|
||||
from main import SubtitleModifications, SubMod
|
||||
@@ -0,0 +1,3 @@
|
||||
# coding=utf-8
|
||||
|
||||
from data import data
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,59 @@
|
||||
# coding=utf-8
|
||||
|
||||
import re
|
||||
import os
|
||||
import pprint
|
||||
from collections import OrderedDict
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
TEMPLATE = """\
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
data = """
|
||||
|
||||
TEMPLATE_END = """\
|
||||
|
||||
for lang, grps in data.iteritems():
|
||||
for grp in grps.iterkeys():
|
||||
if data[lang][grp]["pattern"]:
|
||||
data[lang][grp]["pattern"] = re.compile(data[lang][grp]["pattern"])
|
||||
"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
cur_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
xml_dir = os.path.join(cur_dir, "xml")
|
||||
file_list = os.listdir(xml_dir)
|
||||
|
||||
data = {}
|
||||
|
||||
for fn in file_list:
|
||||
if fn.endswith("_OCRFixReplaceList.xml"):
|
||||
lang = fn.split("_")[0]
|
||||
soup = BeautifulSoup(open(os.path.join(xml_dir, fn)), "xml")
|
||||
|
||||
fetch_data = (
|
||||
# group, item_name, pattern
|
||||
("WholeLines", "Line", None),
|
||||
("WholeWords", "Word", lambda d: (ur"(um)" + u"|".join([ur'\b' + re.escape(k) + ur'\b' for k in d.keys()])) if d else None),
|
||||
("PartialWordsAlways", "WordPart", None),
|
||||
("PartialLines", "LinePart", None),
|
||||
("BeginLines", "Beginning", lambda d: (ur"(um)^"+u"|".join([ur'\b' + re.escape(k) + ur'\b' for k in d.keys()])) if d else None),
|
||||
("EndLines", "Ending", lambda d: (ur"(um)" + u"|".join([ur'\b' + re.escape(k) + ur'\b' for k in d.keys()]) + ur"$") if d else None,),
|
||||
)
|
||||
|
||||
data[lang] = dict((grp, {"data": OrderedDict(), "pattern": None}) for grp, item_name, pattern in fetch_data)
|
||||
|
||||
for grp, item_name, pattern in fetch_data:
|
||||
for grp_data in soup.find_all(grp):
|
||||
for line in grp_data.find_all(item_name):
|
||||
data[lang][grp]["data"][line["from"]] = line["to"]
|
||||
|
||||
if pattern:
|
||||
data[lang][grp]["pattern"] = pattern(data[lang][grp]["data"])
|
||||
|
||||
f = open(os.path.join(cur_dir, "data.py"), "w+")
|
||||
f.write(TEMPLATE)
|
||||
f.write(pprint.pformat(data, width=1))
|
||||
f.write(TEMPLATE_END)
|
||||
f.close()
|
||||
@@ -0,0 +1,10 @@
|
||||
# coding=utf-8
|
||||
|
||||
from babelfish import Language
|
||||
from data import data
|
||||
|
||||
#for lang, data in data.iteritems():
|
||||
# print Language.fromietf(lang).alpha2
|
||||
|
||||
for find, rep in data["dan"].iteritems():
|
||||
print find, rep
|
||||
+638
@@ -0,0 +1,638 @@
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="Haner" to="Han er" />
|
||||
<Word from="JaveL" to="Javel" />
|
||||
<Word from="Pa//e" to="Palle" />
|
||||
<Word from="bffte" to="bitte" />
|
||||
<Word from="Utro//gt" to="Utroligt" />
|
||||
<Word from="Kommerdu" to="Kommer du" />
|
||||
<Word from="smi/er" to="smiler" />
|
||||
<Word from="/eg" to="leg" />
|
||||
<Word from="harvinger" to="har vinger" />
|
||||
<Word from="/et" to="let" />
|
||||
<Word from="erjeres" to="er jeres" />
|
||||
<Word from="hardet" to="har det" />
|
||||
<Word from="tænktjer" to="tænkt jer" />
|
||||
<Word from="erjo" to="er jo" />
|
||||
<Word from="sti/" to="stil" />
|
||||
<Word from="Iappe" to="lappe" />
|
||||
<Word from="Beklagelç" to="Beklager," />
|
||||
<Word from="vardet" to="var det" />
|
||||
<Word from="afden" to="af den" />
|
||||
<Word from="snupperjeg" to="snupper jeg" />
|
||||
<Word from="ikkejeg" to="ikke jeg" />
|
||||
<Word from="bliverjeg" to="bliver jeg" />
|
||||
<Word from="hartravit" to="har travlt" />
|
||||
<Word from="pandekagef/ag" to="pandekageflag" />
|
||||
<Word from="Stormvarsell" to="Stormvarsel!" />
|
||||
<Word from="stormvejn" to="stormvejr." />
|
||||
<Word from="morgenkomp/et" to="morgenkomplet" />
|
||||
<Word from="/yv" to="lyv" />
|
||||
<Word from="varjo" to="var jo" />
|
||||
<Word from="/eger" to="leger" />
|
||||
<Word from="harjeg" to="har jeg" />
|
||||
<Word from="havdejeg" to="havde jeg" />
|
||||
<Word from="hvorjeg" to="hvor jeg" />
|
||||
<Word from="nårjeg" to="når jeg" />
|
||||
<Word from="gårvi" to="går vi" />
|
||||
<Word from="atjeg" to="at jeg" />
|
||||
<Word from="isine" to="i sine" />
|
||||
<Word from="fårjeg" to="får jeg" />
|
||||
<Word from="kærtighed" to="kærlighed" />
|
||||
<Word from="skullejeg" to="skulle jeg" />
|
||||
<Word from="laest" to="læst" />
|
||||
<Word from="laese" to="læse" />
|
||||
<Word from="gørjeg" to="gør jeg" />
|
||||
<Word from="gørvi" to="gør vi" />
|
||||
<Word from="angrerjo" to="angrer jo" />
|
||||
<Word from="Hvergang" to="Hver gang" />
|
||||
<Word from="erder" to="er der" />
|
||||
<Word from="villetilgive" to="ville tilgive" />
|
||||
<Word from="fieme" to="fjeme" />
|
||||
<Word from="genopståri" to="genopstår i" />
|
||||
<Word from="svigtejer" to="svigte jer" />
|
||||
<Word from="kommernu" to="kommer nu" />
|
||||
<Word from="nårman" to="når man" />
|
||||
<Word from="erfire" to="er fire" />
|
||||
<Word from="Hvorforfinderdu" to="Hvorfor finder du" />
|
||||
<Word from="undertigt" to="underligt" />
|
||||
<Word from="itroen" to="i troen" />
|
||||
<Word from="erløgnt" to="er løgn!" />
|
||||
<Word from="gørden" to="gør den" />
|
||||
<Word from="forhelvede" to="for helvede" />
|
||||
<Word from="hjpe" to="hjælpe" />
|
||||
<Word from="togeti" to="toget i" />
|
||||
<Word from="Måjeg" to="Må jeg" />
|
||||
<Word from="savnerjer" to="savner jer" />
|
||||
<Word from="erjeg" to="er jeg" />
|
||||
<Word from="vaere" to="være" />
|
||||
<Word from="geme" to="gerne" />
|
||||
<Word from="trorpå" to="tror på" />
|
||||
<Word from="forham" to="for ham" />
|
||||
<Word from="afham" to="af ham" />
|
||||
<Word from="harjo" to="har jo" />
|
||||
<Word from="ovemafiet" to="overnattet" />
|
||||
<Word from="begaefiighed" to="begærlighed" />
|
||||
<Word from="sy’g" to="syg" />
|
||||
<Word from="Imensjeg" to="Imens jeg" />
|
||||
<Word from="bliverdu" to="bliver du" />
|
||||
<Word from="fiser" to="fiser" />
|
||||
<Word from="manipuierer" to="manipulerer" />
|
||||
<Word from="forjeg" to="for jeg" />
|
||||
<Word from="iivgivendefor" to="livgivende for" />
|
||||
<Word from="formig" to="for mig" />
|
||||
<Word from="Hardu" to="Har du" />
|
||||
<Word from="fornold" to="forhold" />
|
||||
<Word from="defrelste" to="de frelste" />
|
||||
<Word from="Såjeg" to="Så jeg" />
|
||||
<Word from="varjeg" to="var jeg" />
|
||||
<Word from="gørved" to="gør ved" />
|
||||
<Word from="kalderjeg" to="kalder jeg" />
|
||||
<Word from="flytte" to="flytte" />
|
||||
<Word from="handlerdet" to="handler det" />
|
||||
<Word from="trorjeg" to="tror jeg" />
|
||||
<Word from="flytter" to="flytter" />
|
||||
<Word from="soverjeg" to="sover jeg" />
|
||||
<Word from="finderud" to="finder ud" />
|
||||
<Word from="naboerpå" to="naboer på" />
|
||||
<Word from="ervildt" to="er vildt" />
|
||||
<Word from="væreher" to="være her" />
|
||||
<Word from="hyggerjer" to="hygger jer" />
|
||||
<Word from="borjo" to="bor jo" />
|
||||
<Word from="kommerikke" to="kommer ikke" />
|
||||
<Word from="folkynde" to="forkynde" />
|
||||
<Word from="farglad" to="far glad" />
|
||||
<Word from="misterjeg" to="mister jeg" />
|
||||
<Word from="fint" to="fint" />
|
||||
<Word from="Harl" to="Har I" />
|
||||
<Word from="bedejer" to="bede jer" />
|
||||
<Word from="synesjeg" to="synes jeg" />
|
||||
<Word from="vartil" to="var til" />
|
||||
<Word from="eren" to="er en" />
|
||||
<Word from="\Al" to="Vil" />
|
||||
<Word from="\A" to="Vi" />
|
||||
<Word from="fjeme" to="fjerne" />
|
||||
<Word from="Iigefyldt" to="lige fyldt" />
|
||||
<Word from="ertil" to="er til" />
|
||||
<Word from="fafiigt" to="farligt" />
|
||||
<Word from="finder" to="finder" />
|
||||
<Word from="findes" to="findes" />
|
||||
<Word from="irettesaefielse" to="irettesættelse" />
|
||||
<Word from="ermed" to="er med" />
|
||||
<Word from="èn" to="én" />
|
||||
<Word from="gikjoi" to="gik jo i" />
|
||||
<Word from="Hvisjeg" to="Hvis jeg" />
|
||||
<Word from="ovemafier" to="overnatter" />
|
||||
<Word from="hoident" to="holdent" />
|
||||
<Word from="\Adne" to="Vidne" />
|
||||
<Word from="fori" to="for i" />
|
||||
<Word from="vei" to="vel" />
|
||||
<Word from="savnerjerjo" to="savner jer jo" />
|
||||
<Word from="elskerjer" to="elsker jer" />
|
||||
<Word from="harløjet" to="har løjet" />
|
||||
<Word from="eri" to="er i" />
|
||||
<Word from="fiende" to="fjende" />
|
||||
<Word from="derjo" to="der jo" />
|
||||
<Word from="sigerjo" to="siger jo" />
|
||||
<Word from="menerjeg" to="mener jeg" />
|
||||
<Word from="Harjeg" to="Har jeg" />
|
||||
<Word from="sigerjeg" to="siger jeg" />
|
||||
<Word from="splitterjeg" to="splitter jeg" />
|
||||
<Word from="erjournalist" to="er journalist" />
|
||||
<Word from="erjoumalist" to="er journalist" />
|
||||
<Word from="Forjeg" to="For jeg" />
|
||||
<Word from="gârjeg" to="går jeg" />
|
||||
<Word from="Nârjeg" to="Når jeg" />
|
||||
<Word from="afllom" to="afkom" />
|
||||
<Word from="farerjo" to="farer jo" />
|
||||
<Word from="tagerjeg" to="tager jeg" />
|
||||
<Word from="Virkerjeg" to="Virker jeg" />
|
||||
<Word from="morerjer" to="morer jer" />
|
||||
<Word from="kommerjo" to="kommer jo" />
|
||||
<Word from="istand" to="i stand" />
|
||||
<Word from="bøm" to="børn" />
|
||||
<Word from="frygterjeg" to="frygter jeg" />
|
||||
<Word from="kommerjeg" to="kommer jeg" />
|
||||
<Word from="eriournalistelev" to="er journalistelev" />
|
||||
<Word from="harfat" to="har fat" />
|
||||
<Word from="fårfingre" to="får fingre" />
|
||||
<Word from="slârjeg" to="slår jeg" />
|
||||
<Word from="bam" to="barn" />
|
||||
<Word from="erjournalistelev" to="er journalistelev" />
|
||||
<Word from="politietjo" to="politiet jo" />
|
||||
<Word from="elskerjo" to="elsker jo" />
|
||||
<Word from="vari" to="var i" />
|
||||
<Word from="fornemmerjeres" to="fornemmer jeres" />
|
||||
<Word from="udklækketl" to="udklækket!" />
|
||||
<Word from="í" to="i" />
|
||||
<Word from="nyi" to="ny i" />
|
||||
<Word from="Iumijelse" to="fornøjelse" />
|
||||
<Word from="vures" to="vores" />
|
||||
<Word from="I/Vashíngtan" to="Washington" />
|
||||
<Word from="opleverjeg" to="oplever jeg" />
|
||||
<Word from="PANTELÃNER" to="PANTELÅNER" />
|
||||
<Word from="Gudmurgen" to="Godmorgen" />
|
||||
<Word from="SKYDEVÃBEN" to="SKYDEVÅBEN" />
|
||||
<Word from="PÃLIDELIG" to="PÅLIDELIG" />
|
||||
<Word from="avertalte" to="overtalte" />
|
||||
<Word from="Omsíder" to="Omsider" />
|
||||
<Word from="lurtebåd" to="lortebåd" />
|
||||
<Word from="Telrslning" to="Tekstning" />
|
||||
<Word from="miUø" to="miljø" />
|
||||
<Word from="gåri" to="går i" />
|
||||
<Word from="Fan/el" to="Farvel" />
|
||||
<Word from="abefiæs" to="abefjæs" />
|
||||
<Word from="hartalt" to="har talt" />
|
||||
<Word from="\Årkelig" to="Virkelig" />
|
||||
<Word from="beklagerjeg" to="beklager jeg" />
|
||||
<Word from="Nårjeg" to="Når jeg" />
|
||||
<Word from="rnaend" to="mænd" />
|
||||
<Word from="vaskebjorn" to="vaskebjørn" />
|
||||
<Word from="Ivil" to="I vil" />
|
||||
<Word from="besog" to="besøg" />
|
||||
<Word from="Vaer" to="Vær" />
|
||||
<Word from="Undersogte" to="Undersøgte" />
|
||||
<Word from="modte" to="mødte" />
|
||||
<Word from="toj" to="tøj" />
|
||||
<Word from="fodt" to="født" />
|
||||
<Word from="gore" to="gøre" />
|
||||
<Word from="provede" to="prøvede" />
|
||||
<Word from="forste" to="første" />
|
||||
<Word from="igang" to="i gang" />
|
||||
<Word from="ligenu" to="lige nu" />
|
||||
<Word from="clet" to="det" />
|
||||
<Word from="Strombell" to="Strombel!" />
|
||||
<Word from="tmvlt" to="travlt" />
|
||||
<Word from="studererjournalistik" to="studerer journalistik" />
|
||||
<Word from="inforrnererjeg" to="informerer jeg" />
|
||||
<Word from="omkfing" to="omkring" />
|
||||
<Word from="tilAsgård" to="til Asgård" />
|
||||
<Word from="Kederjeg" to="Keder jeg" />
|
||||
<Word from="jaettetamp" to="jættetamp" />
|
||||
<Word from="erjer" to="er jer" />
|
||||
<Word from="atjulehygge" to="at julehygge" />
|
||||
<Word from="Ueneste" to="tjeneste" />
|
||||
<Word from="foltsaetter" to="fortsætter" />
|
||||
<Word from="A/ice" to="Alice" />
|
||||
<Word from="tvivlerjeg" to="tvivler jeg" />
|
||||
<Word from="henterjer" to="henter jer" />
|
||||
<Word from="forstårjeg" to="forstår jeg" />
|
||||
<Word from="hvisjeg" to="hvis jeg" />
|
||||
<Word from="/ært" to="lært" />
|
||||
<Word from="vfgtrgt" to="vigtigt" />
|
||||
<Word from="hurtigtjeg" to="hurtigt jeg" />
|
||||
<Word from="kenderjo" to="kender jo" />
|
||||
<Word from="seiv" to="selv" />
|
||||
<Word from="/ægehuset" to="lægehuset" />
|
||||
<Word from="herjo" to="her jo" />
|
||||
<Word from="stolerjeg" to="stoler jeg" />
|
||||
<Word from="digi" to="dig i" />
|
||||
<Word from="taberi" to="taber i" />
|
||||
<Word from="slårjeres" to="slår jeres" />
|
||||
<Word from="laere" to="lære" />
|
||||
<Word from="trænerwushu" to="træner wushu" />
|
||||
<Word from="efterjeg" to="efter jeg" />
|
||||
<Word from="efier" to="efter" />
|
||||
<Word from="dui" to="du i" />
|
||||
<Word from="afien" to="aften" />
|
||||
<Word from="bliveri" to="bliver i" />
|
||||
<Word from="acceptererjer" to="accepterer jer" />
|
||||
<Word from="drikkerjo" to="drikker jo" />
|
||||
<Word from="fianjin" to="Tianjin" />
|
||||
<Word from="erlænge" to="er længe" />
|
||||
<Word from="erikke" to="er ikke" />
|
||||
<Word from="medjer" to="med jer" />
|
||||
<Word from="Tmykke" to="Tillykke" />
|
||||
<Word from="'fianjins" to="Tianjins" />
|
||||
<Word from="Mesteri" to="Mester i" />
|
||||
<Word from="sagdetil" to="sagde til" />
|
||||
<Word from="indei" to="inde i" />
|
||||
<Word from="ofie" to="ofte" />
|
||||
<Word from="'filgiv" to="Tilgiv" />
|
||||
<Word from="Lfår" to="I får" />
|
||||
<Word from="viserjer" to="viser jer" />
|
||||
<Word from="Rejsjerblot" to="Rejs jer blot" />
|
||||
<Word from="'fillad" to="Tillad" />
|
||||
<Word from="iiiiefinger" to="lillefinger" />
|
||||
<Word from="VILOMFATTE" to="VIL OMFATTE" />
|
||||
<Word from="mofio" to="motto" />
|
||||
<Word from="gørjer" to="gør jer" />
|
||||
<Word from="gifi" to="gift" />
|
||||
<Word from="hardu" to="har du" />
|
||||
<Word from="gifi" to="gift" />
|
||||
<Word from="Iaeggerjeg" to="lægger jeg" />
|
||||
<Word from="iet" to="i et" />
|
||||
<Word from="sv/yte" to="svigte" />
|
||||
<Word from="ti/" to="til" />
|
||||
<Word from="Wdal" to="Vidal" />
|
||||
<Word from="fiået" to="fået" />
|
||||
<Word from="Hvo/for" to="Hvorfor" />
|
||||
<Word from="hellerikke" to="heller ikke" />
|
||||
<Word from="Wlle" to="Ville" />
|
||||
<Word from="dr/ver" to="driver" />
|
||||
<Word from="V\fllliam" to="William" />
|
||||
<Word from="V\fllliams" to="Williams" />
|
||||
<Word from="Vkfilliam" to="William" />
|
||||
<Word from="vådejakke" to="våde jakke" />
|
||||
<Word from="kæfll" to="kæft!" />
|
||||
<Word from="sagdejeg" to="sagde jeg" />
|
||||
<Word from="oven/ejet" to="overvejet" />
|
||||
<Word from="karameisauce" to="karamelsauce" />
|
||||
<Word from="Lfølgejødisk" to="Ifølge jødisk" />
|
||||
<Word from="blevjo" to="blev jo" />
|
||||
<Word from="asiateri" to="asiater i" />
|
||||
<Word from="erV\fllliam" to="er William" />
|
||||
<Word from="lidtflov" to="lidt flov" />
|
||||
<Word from="sagdejo" to="sagde jo" />
|
||||
<Word from="erlige" to="er lige" />
|
||||
<Word from="Vtfilliam" to="William" />
|
||||
<Word from="WfiII" to="Will" />
|
||||
<Word from="afldarede" to="afklarede" />
|
||||
<Word from="hjæiperjeg" to="hjælper jeg" />
|
||||
<Word from="laderjeg" to="lader jeg" />
|
||||
<Word from="Hândledsbeskyttere" to="Håndledsbeskyttere" />
|
||||
<Word from="Lsabels" to="Isabels" />
|
||||
<Word from="Gørjeg" to="Gør jeg" />
|
||||
<Word from="mâjeg" to="må jeg" />
|
||||
<Word from="ogjeg" to="og jeg" />
|
||||
<Word from="gjordejeg" to="gjorde jeg" />
|
||||
<Word from="villejeg" to="ville jeg" />
|
||||
<Word from="Vlfllliams" to="Williams" />
|
||||
<Word from="Dajeg" to="Da jeg" />
|
||||
<Word from="iorden" to="i orden" />
|
||||
<Word from="fandtjeg" to="fandt jeg" />
|
||||
<Word from="Tilykke" to="Tillykke" />
|
||||
<Word from="kørerjer" to="kører jer" />
|
||||
<Word from="gøfjeg" to="gør jeg" />
|
||||
<Word from="Selvflgelig" to="Selvfølgelig" />
|
||||
<Word from="fdder" to="fadder" />
|
||||
<Word from="bnfaldt" to="bønfaldt" />
|
||||
<Word from="t\/ehovedede" to="tvehovedede" />
|
||||
<Word from="EIler" to="Eller" />
|
||||
<Word from="ringerjeg" to="ringer jeg" />
|
||||
<Word from="blevvæk" to="blev væk" />
|
||||
<Word from="stárjeg" to="står jeg" />
|
||||
<Word from="varforbi" to="var forbi" />
|
||||
<Word from="harfortalt" to="har fortalt" />
|
||||
<Word from="iflere" to="i flere" />
|
||||
<Word from="tørjeg" to="tør jeg" />
|
||||
<Word from="kunnejeg" to="kunne jeg" />
|
||||
<Word from="má" to="må" />
|
||||
<Word from="hartænkt" to="har tænkt" />
|
||||
<Word from="Fárjeg" to="Får jeg" />
|
||||
<Word from="afdelingervar" to="afdelinger var" />
|
||||
<Word from="0rd" to="ord" />
|
||||
<Word from="pástá" to="påstå" />
|
||||
<Word from="gráharet" to="gråharet" />
|
||||
<Word from="varforbløffende" to="var forbløffende" />
|
||||
<Word from="holdtjeg" to="holdt jeg" />
|
||||
<Word from="hængerjo" to="hænger jo" />
|
||||
<Word from="fikjeg" to="fik jeg" />
|
||||
<Word from="fár" to="får" />
|
||||
<Word from="Hvorforfølerjeg" to="Hvorfor føler jeg" />
|
||||
<Word from="harfeber" to="har feber" />
|
||||
<Word from="ándssvagt" to="åndssvagt" />
|
||||
<Word from="0g" to="Og" />
|
||||
<Word from="vartre" to="var tre" />
|
||||
<Word from="abner" to="åbner" />
|
||||
<Word from="garjeg" to="går jeg" />
|
||||
<Word from="sertil" to="ser til" />
|
||||
<Word from="hvorfin" to="hvor fin" />
|
||||
<Word from="harfri" to="har fri" />
|
||||
<Word from="forstarjeg" to="forstår jeg" />
|
||||
<Word from="Sä" to="Så" />
|
||||
<Word from="hvorfint" to="hvor fint" />
|
||||
<Word from="mærkerjeg" to="mærker jeg" />
|
||||
<Word from="ogsa" to="også" />
|
||||
<Word from="nárjeg" to="når jeg" />
|
||||
<Word from="Jasá" to="Jaså" />
|
||||
<Word from="bándoptager" to="båndoptager" />
|
||||
<Word from="bedárende" to="bedårende" />
|
||||
<Word from="sá" to="så" />
|
||||
<Word from="nár" to="når" />
|
||||
<Word from="kunnejo" to="kunne jo" />
|
||||
<Word from="Brammertil" to="Brammer til" />
|
||||
<Word from="serjeg" to="ser jeg" />
|
||||
<Word from="gikjeg" to="gik jeg" />
|
||||
<Word from="udholderjeg" to="udholder jeg" />
|
||||
<Word from="máneder" to="måneder" />
|
||||
<Word from="vartræt" to="var træt" />
|
||||
<Word from="dárligt" to="dårligt" />
|
||||
<Word from="klaretjer" to="klaret jer" />
|
||||
<Word from="pavirkelig" to="påvirkelig" />
|
||||
<Word from="spekulererjeg" to="spekulerer jeg" />
|
||||
<Word from="forsøgerjeg" to="forsøger jeg" />
|
||||
<Word from="huskerjeg" to="husker jeg" />
|
||||
<Word from="ifavnen" to="i favnen" />
|
||||
<Word from="skullejo" to="skulle jo" />
|
||||
<Word from="vartung" to="var tung" />
|
||||
<Word from="varfuldstændig" to="var fuldstændig" />
|
||||
<Word from="Paskedag" to="Påskedag" />
|
||||
<Word from="turi" to="tur i" />
|
||||
<Word from="spillerschumanns" to="spiller Schumanns" />
|
||||
<Word from="forstárjeg" to="forstår jeg" />
|
||||
<Word from="istedet" to="i stedet" />
|
||||
<Word from="nárfrem" to="når frem" />
|
||||
<Word from="habertrods" to="håber trods" />
|
||||
<Word from="forførste" to="for første" />
|
||||
<Word from="varto" to="var to" />
|
||||
<Word from="overtil" to="over til" />
|
||||
<Word from="forfem" to="for fem" />
|
||||
<Word from="holdtjo" to="holdt jo" />
|
||||
<Word from="passerjo" to="passer jo" />
|
||||
<Word from="ellerto" to="eller to" />
|
||||
<Word from="hartrods" to="har trods" />
|
||||
<Word from="harfuldstændig" to="har fuldstændig" />
|
||||
<Word from="gårjeg" to="går jeg" />
|
||||
<Word from="giderjeg" to="gider jeg" />
|
||||
<Word from="forjer" to="for jer" />
|
||||
<Word from="erindrerjeg" to="erindrer jeg" />
|
||||
<Word from="tænkerjeg" to="tænker jeg" />
|
||||
<Word from="GAEt" to="GÅET" />
|
||||
<Word from="hørerjo" to="hører jo" />
|
||||
<Word from="forladerjeg" to="forlader jeg" />
|
||||
<Word from="kosterjo" to="koster jo" />
|
||||
<Word from="fortællerjeg" to="fortæller jeg" />
|
||||
<Word from="Forstyrrerjeg" to="Forstyrrer jeg" />
|
||||
<Word from="tjekkerjeg" to="tjekker jeg" />
|
||||
<Word from="erjurist" to="er jurist" />
|
||||
<Word from="tlLBUD" to="TILBUD" />
|
||||
<Word from="serjo" to="se rjo" />
|
||||
<Word from="bederjeg" to="beder jeg" />
|
||||
<Word from="bilderjeg" to="bilder jeg" />
|
||||
<Word from="ULVEtlME" to="ULVETlME" />
|
||||
<Word from="skærerjo" to="skærer jo" />
|
||||
<Word from="afjer" to="af jer" />
|
||||
<Word from="ordnerjeg" to="ordner jeg" />
|
||||
<Word from="giverjeg" to="giver jeg" />
|
||||
<Word from="rejservi" to="rejser vi" />
|
||||
<Word from="fangerjeg" to="fanger jeg" />
|
||||
<Word from="erjaloux" to="er jaloux" />
|
||||
<Word from="glemmerjeg" to="glemmer jeg" />
|
||||
<Word from="Behøverjeg" to="Behøver jeg" />
|
||||
<Word from="harvi" to="har vi" />
|
||||
<Word from="ertyndere" to="er tyndere" />
|
||||
<Word from="fårtordenvejr" to="får tordenvejr" />
|
||||
<Word from="varfærdig" to="var færdig" />
|
||||
<Word from="hørerfor" to="hører for" />
|
||||
<Word from="varvel" to="var vel" />
|
||||
<Word from="erforbi" to="er forbi" />
|
||||
<Word from="AIle" to="Alle" />
|
||||
<Word from="læserjo" to="læser jo" />
|
||||
<Word from="Edgarer" to="Edgar er" />
|
||||
<Word from="hartaget" to="har taget" />
|
||||
<Word from="derer" to="der er" />
|
||||
<Word from="stikkerfrem" to="stikker frem" />
|
||||
<Word from="haraldrig" to="har aldrig" />
|
||||
<Word from="ellerfar" to="eller far" />
|
||||
<Word from="erat" to="er at" />
|
||||
<Word from="turtil" to="tur til" />
|
||||
<Word from="erfærdig" to="er færdig" />
|
||||
<Word from="følerjeg" to="føler jeg" />
|
||||
<Word from="jerfra" to="jer fra" />
|
||||
<Word from="eralt" to="er alt" />
|
||||
<Word from="harfaktisk" to="har faktisk" />
|
||||
<Word from="harfundet" to="har fundet" />
|
||||
<Word from="harvendt" to="har vendt" />
|
||||
<Word from="Kunstneraf" to="Kunstner af" />
|
||||
<Word from="ervel" to="er vel" />
|
||||
<Word from="ståransigt" to="står ansigt" />
|
||||
<Word from="Erjeg" to="Er jeg" />
|
||||
<Word from="venterjeg" to="venter jeg" />
|
||||
<Word from="Hvorvar" to="Hvor var" />
|
||||
<Word from="varfint" to="var fint" />
|
||||
<Word from="ervarmt" to="er varmt" />
|
||||
<Word from="gårfint" to="går fint" />
|
||||
<Word from="flyverforbi" to="flyver forbi" />
|
||||
<Word from="Dervar" to="Der var" />
|
||||
<Word from="dervar" to="der var" />
|
||||
<Word from="meneråndeligt" to="mener åndeligt" />
|
||||
<Word from="forat" to="for at" />
|
||||
<Word from="herovertil" to="herover til" />
|
||||
<Word from="soverfor" to="sover for" />
|
||||
<Word from="begyndtejeg" to="begyndte jeg" />
|
||||
<Word from="vendertilbage" to="vender tilbage" />
|
||||
<Word from="erforfærdelig" to="er forfærdelig" />
|
||||
<Word from="gøraltid" to="gør altid" />
|
||||
<Word from="ertilbage" to="er tilbage" />
|
||||
<Word from="harværet" to="har været" />
|
||||
<Word from="bagoverellertil" to="bagover eller til" />
|
||||
<Word from="hertaler" to="her taler" />
|
||||
<Word from="vågnerjeg" to="vågner jeg" />
|
||||
<Word from="vartomt" to="var tomt" />
|
||||
<Word from="gårfrem" to="går frem" />
|
||||
<Word from="talertil" to="taler til" />
|
||||
<Word from="ertryg" to="er tryg" />
|
||||
<Word from="ansigtervendes" to="ansigter vendes" />
|
||||
<Word from="hervirkeligt" to="her virkeligt" />
|
||||
<Word from="herer" to="her er" />
|
||||
<Word from="drømmerjo" to="drømmer jo" />
|
||||
<Word from="erfuldkommen" to="er fuldkommen" />
|
||||
<Word from="hveren" to="hver en" />
|
||||
<Word from="erfej" to="er fej" />
|
||||
<Word from="datterforgæves" to="datter forgæves" />
|
||||
<Word from="forsøgerjo" to="forsøger jo" />
|
||||
<Word from="ertom" to="er tom" />
|
||||
<Word from="vareftermiddag" to="var eftermiddag" />
|
||||
<Word from="vartom" to="var tom" />
|
||||
<Word from="angerellerforventninger" to="anger eller forventninger" />
|
||||
<Word from="kørtejeg" to="kørte jeg" />
|
||||
<Word from="Hvorforfortæller" to="Hvorfor fortæller" />
|
||||
<Word from="gårtil" to="går til" />
|
||||
<Word from="ringerefter" to="ringer efter" />
|
||||
<Word from="søgertilflugt" to="søger tilflugt" />
|
||||
<Word from="ertvunget" to="er tvunget" />
|
||||
<Word from="megetjeg" to="meget jeg" />
|
||||
<Word from="varikke" to="var ikke" />
|
||||
<Word from="Derermange" to="Der e rmange" />
|
||||
<Word from="dervilhindre" to="der vil hindre" />
|
||||
<Word from="erså" to="er så" />
|
||||
<Word from="DetforstårLeggodt" to="Det forstår jeg godt" />
|
||||
<Word from="ergodt" to="er godt" />
|
||||
<Word from="vorventen" to="vor venten" />
|
||||
<Word from="tagerfejl" to="tager fejl" />
|
||||
<Word from="ellerer" to="eller er" />
|
||||
<Word from="laverjeg" to="laver jeg" />
|
||||
<Word from="0mgang" to="omgang" />
|
||||
<Word from="afstár" to="afstår" />
|
||||
<Word from="pá" to="på" />
|
||||
<Word from="rejserjeg" to="rejser jeg" />
|
||||
<Word from="ellertage" to="eller tage" />
|
||||
<Word from="takkerjeg" to="takker jeg" />
|
||||
<Word from="ertilfældigvis" to="er tilfældigvis" />
|
||||
<Word from="fremstar" to="fremstår" />
|
||||
<Word from="ertæt" to="er tæt" />
|
||||
<Word from="ijeres" to="i jeres" />
|
||||
<Word from="Sagdejeg" to="Sagde jeg" />
|
||||
<Word from="overi" to="over i" />
|
||||
<Word from="plukkerjordbær" to="plukker jordbær" />
|
||||
<Word from="klarerjeg" to="klarer jeg" />
|
||||
<Word from="jerfire" to="jer fire" />
|
||||
<Word from="tábeligste" to="tåbeligste" />
|
||||
<Word from="sigertvillingerne" to="siger tvillingerne" />
|
||||
<Word from="erfaktisk" to="er faktisk" />
|
||||
<Word from="gár" to="går" />
|
||||
<Word from="harvasket" to="har vasket" />
|
||||
<Word from="harplukketjordbærtil" to="har plukket jordbær til" />
|
||||
<Word from="plukketjordbær" to="plukket jordbær" />
|
||||
<Word from="klaverfirehændigt" to="klaver firehændigt" />
|
||||
<Word from="erjævnaldrende" to="er jævnaldrende" />
|
||||
<Word from="tierjeg" to="tier jeg" />
|
||||
<Word from="Hvorerden" to="Hvor er den" />
|
||||
<Word from="0veraltjeg" to="overalt jeg" />
|
||||
<Word from="gårpå" to="går på" />
|
||||
<Word from="finderjeg" to="finder jeg" />
|
||||
<Word from="serhans" to="ser hans" />
|
||||
<Word from="tiderbliver" to="tider bliver" />
|
||||
<Word from="ellertrist" to="eller trist" />
|
||||
<Word from="forstårjeres" to="forstår jeres" />
|
||||
<Word from="Hvorsjælen" to="Hvor sjælen" />
|
||||
<Word from="finderro" to="finder ro" />
|
||||
<Word from="sidderjeg" to="sidder jeg" />
|
||||
<Word from="tagerjo" to="tager jo" />
|
||||
<Word from="efterjeres" to="efter jeres" />
|
||||
<Word from="10O" to="100" />
|
||||
<Word from="besluttedejeg" to="besluttede jeg" />
|
||||
<Word from="varsket" to="var sket" />
|
||||
<Word from="uadskillige" to="uadskillelige" />
|
||||
<Word from="harjetlag" to="har jetlag" />
|
||||
<Word from="lkke" to="Ikke" />
|
||||
<Word from="lntet" to="Intet" />
|
||||
<Word from="afslørerjeg" to="afslører jeg" />
|
||||
<Word from="måjeg" to="må jeg" />
|
||||
<Word from="Vl" to="VI" />
|
||||
<Word from="atbygge" to="at bygge" />
|
||||
<Word from="detmakabre" to="det makabre" />
|
||||
<Word from="vilikke" to="vil ikke" />
|
||||
<Word from="talsmandbekræfter" to="talsmand bekræfter" />
|
||||
<Word from="vedatrenovere" to="ved at renovere" />
|
||||
<Word from="forsøgeratforstå" to="forsøger at forstå" />
|
||||
<Word from="ersket" to="er sket" />
|
||||
<Word from="morderpå" to="morder på" />
|
||||
<Word from="frifodiRosewood" to="fri fod i Rosewood" />
|
||||
<Word from="holdtpressemøde" to="holdt pressemøde" />
|
||||
<Word from="lngen" to="Ingen" />
|
||||
<Word from="lND" to="IND" />
|
||||
<Word from="henterjeg" to="henter jeg" />
|
||||
<Word from="lsabel" to="Isabel" />
|
||||
<Word from="lsabels" to="Isabels" />
|
||||
<Word from="vinderjo" to="vinder jo" />
|
||||
<Word from="rødmerjo" to="rødmer jo" />
|
||||
<Word from="etjakkesæt" to="et jakkesæt" />
|
||||
<Word from="glæderjeg" to="glæder jeg" />
|
||||
<Word from="lgen" to="Igen" />
|
||||
<Word from="lsær" to="Især" />
|
||||
<Word from="iparken" to="i parken" />
|
||||
<Word from="nårl" to="når I" />
|
||||
<Word from="tilA1" to="til A1" />
|
||||
<Word from="FBl" to="FBI" />
|
||||
<Word from="viljo" to="vil jo" />
|
||||
<Word from="detpå" to="det på" />
|
||||
<Word from="KIar" to="Klar" />
|
||||
<Word from="PIan" to="Plan" />
|
||||
<Word from="EIIer" to="Eller" />
|
||||
<Word from="FIot" to="Flot" />
|
||||
<Word from="AIIe" to="Alle" />
|
||||
<Word from="AIt" to="Alt" />
|
||||
<Word from="KIap" to="Klap" />
|
||||
<Word from="PIaza" to="Plaza" />
|
||||
<Word from="SIap" to="Slap" />
|
||||
<Word from="Iå" to="lå" />
|
||||
<Word from="BIing" to="Bling" />
|
||||
<Word from="GIade" to="Glade" />
|
||||
<Word from="Iejrbålssange" to="lejrbålssange" />
|
||||
<Word from="bedtjer" to="bedt jer" />
|
||||
<Word from="hørerjeg" to="hører jeg" />
|
||||
<Word from="Fårjeg" to="Får jeg" />
|
||||
<Word from="fikJames" to="fik James" />
|
||||
<Word from="atsnakke" to="at snakke" />
|
||||
<Word from="varkun" to="var kun" />
|
||||
<Word from="retterjeg" to="retter jeg" />
|
||||
<Word from="ernormale" to="er normale" />
|
||||
<Word from="viljeg" to="vil jeg" />
|
||||
<Word from="Sætjer" to="Sæt jer" />
|
||||
<Word from="udsatham" to="udsat ham" />
|
||||
</WholeWords>
|
||||
<PartialWordsAlways>
|
||||
<WordPart from="¤" to="o" />
|
||||
<WordPart from="IVI" to="M" />
|
||||
<WordPart from="lVI" to="M" />
|
||||
<WordPart from="IVl" to="M" />
|
||||
<WordPart from="lVl" to="M" />
|
||||
</PartialWordsAlways>
|
||||
<PartialWords>
|
||||
<!-- Will be used to check words not in dictionary -->
|
||||
<!-- If new word(s) exists in spelling dictionary, it(they) is accepted -->
|
||||
<WordPart from="fi" to="fi" />
|
||||
<WordPart from="fl" to="fl" />
|
||||
<WordPart from="/" to="l" />
|
||||
<WordPart from="vv" to="w" />
|
||||
<WordPart from="m" to="rn" />
|
||||
<WordPart from="l" to="i" />
|
||||
<WordPart from="€" to="e" />
|
||||
<WordPart from="I" to="l" />
|
||||
<WordPart from="c" to="o" />
|
||||
<WordPart from="i" to="t" />
|
||||
<WordPart from="cc" to="oo" />
|
||||
<WordPart from="ii" to="tt" />
|
||||
<WordPart from="n/" to="ry" />
|
||||
<WordPart from="ae" to="æ" />
|
||||
<!-- "f " will be two words -->
|
||||
<WordPart from="f" to="f " />
|
||||
<WordPart from="c" to="e" />
|
||||
<WordPart from="o" to="e" />
|
||||
<WordPart from="I" to="t" />
|
||||
<WordPart from="n" to="o" />
|
||||
<WordPart from="s" to="e" />
|
||||
<WordPart from="\A" to="Vi" />
|
||||
<WordPart from="n/" to="rv" />
|
||||
<WordPart from="Ã" to="Å" />
|
||||
<WordPart from="í" to="i" />
|
||||
</PartialWords>
|
||||
<PartialLines />
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
||||
+6865
File diff suppressed because it is too large
Load Diff
+2338
File diff suppressed because it is too large
Load Diff
+1032
File diff suppressed because it is too large
Load Diff
+270
@@ -0,0 +1,270 @@
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="@immatriculation" to="d'immatriculation" />
|
||||
<Word from="acquer" to="acquér" />
|
||||
<Word from="acteurjoue" to="acteur joue" />
|
||||
<Word from="aerien" to="aérien" />
|
||||
<Word from="agreable" to="agréable" />
|
||||
<Word from="aientjamais" to="aient jamais" />
|
||||
<Word from="AII" to="All" />
|
||||
<Word from="aitjamais" to="ait jamais" />
|
||||
<Word from="aitjus" to="ait jus" />
|
||||
<Word from="alle" to="allé" />
|
||||
<Word from="alles" to="allés" />
|
||||
<Word from="appele" to="appelé" />
|
||||
<Word from="apres" to="après" />
|
||||
<Word from="aujourdhui" to="aujourd'hui" />
|
||||
<Word from="aupres" to="auprès" />
|
||||
<Word from="beaute" to="beauté" />
|
||||
<Word from="cabossee" to="cabossée" />
|
||||
<Word from="carj'" to="car j'" />
|
||||
<Word from="Carj'" to="Car j'" />
|
||||
<Word from="carla" to="car la" />
|
||||
<Word from="CEdipe" to="Œdipe" />
|
||||
<Word from="Cest" to="C'est" />
|
||||
<Word from="c'etaient" to="c'étaient" />
|
||||
<Word from="Cétaient" to="C'étaient" />
|
||||
<Word from="c'etait" to="c'était" />
|
||||
<Word from="C'etait" to="C'était" />
|
||||
<Word from="Cétait" to="C'était" />
|
||||
<Word from="choregraphiee" to="chorégraphiée" />
|
||||
<Word from="cinema" to="cinéma" />
|
||||
<Word from="cl'AIcatraz" to="d'Alcatraz" />
|
||||
<Word from="cles" to="clés" />
|
||||
<Word from="cœurjoie" to="cœur-joie" />
|
||||
<Word from="completer" to="compléter" />
|
||||
<Word from="costumiere" to="costumière" />
|
||||
<Word from="cree" to="créé" />
|
||||
<Word from="daccord" to="d'accord" />
|
||||
<Word from="d'AIbert" to="d'Albert" />
|
||||
<Word from="d'AIdous" to="d'Aldous" />
|
||||
<Word from="d'AIec" to="d'Alec" />
|
||||
<Word from="danniversaire" to="d'anniversaire" />
|
||||
<Word from="d'Arra'bida" to="d'Arrabida" />
|
||||
<Word from="d'autodérision" to="d'auto-dérision" />
|
||||
<Word from="dautres" to="d'autres" />
|
||||
<Word from="debattait" to="débattait" />
|
||||
<Word from="decor" to="décor" />
|
||||
<Word from="decorateurs" to="décorateurs" />
|
||||
<Word from="decors" to="décors" />
|
||||
<Word from="defi" to="défi" />
|
||||
<Word from="dejà" to="déjà" />
|
||||
<Word from="déjàm" to="déjà..." />
|
||||
<Word from="dejeunait" to="déjeunait" />
|
||||
<Word from="dengager" to="d'engager" />
|
||||
<Word from="déquipement" to="d'équipement" />
|
||||
<Word from="dérnièré" to="dernière" />
|
||||
<Word from="Desole" to="Désolé" />
|
||||
<Word from="dessayage" to="d'essayage" />
|
||||
<Word from="dessence" to="d'essence" />
|
||||
<Word from="détaient" to="c'étaient" />
|
||||
<Word from="detail" to="détail" />
|
||||
<Word from="dexcellents" to="d'excellents" />
|
||||
<Word from="dexpérience" to="d'expérience" />
|
||||
<Word from="dexpériences" to="d'expériences" />
|
||||
<Word from="d'héro'l'ne" to="d'héroïne" />
|
||||
<Word from="d'idees" to="d'idées" />
|
||||
<Word from="d'intensite" to="d'intensité" />
|
||||
<Word from="dontj" to="dont j" />
|
||||
<Word from="doublaitAlfo" to="doublait Alfo" />
|
||||
<Word from="DrNo" to="Dr No" />
|
||||
<Word from="e'" to="é" />
|
||||
<Word from="ecrit" to="écrit" />
|
||||
<Word from="elegant" to="élégant" />
|
||||
<Word from="Ellé" to="Elle" />
|
||||
<Word from="én" to="en" />
|
||||
<Word from="equipe" to="équipe" />
|
||||
<Word from="erjus" to="er jus" />
|
||||
<Word from="estjamais" to="est jamais" />
|
||||
<Word from="ét" to="et" />
|
||||
<Word from="etaient" to="étaient" />
|
||||
<Word from="etait" to="était" />
|
||||
<Word from="ete" to="été" />
|
||||
<Word from="etiez" to="étiez" />
|
||||
<Word from="etj'" to="et j'" />
|
||||
<Word from="Etj'" to="Et j'" />
|
||||
<Word from="etje" to="et je" />
|
||||
<Word from="Etje" to="Et je" />
|
||||
<Word from="EtsouvenL" to="Et souvent" />
|
||||
<Word from="eviter" to="éviter" />
|
||||
<Word from="Fabsence" to="l'absence" />
|
||||
<Word from="fadapter" to="t'adapter" />
|
||||
<Word from="fadore" to="j'adore" />
|
||||
<Word from="Fâge" to="l'âge" />
|
||||
<Word from="Fagent" to="l'agent" />
|
||||
<Word from="faiessayé" to="j'ai essayé" />
|
||||
<Word from="Failure" to="l'alllure" />
|
||||
<Word from="Fambiance" to="l'ambiance" />
|
||||
<Word from="Famener" to="l'amener" />
|
||||
<Word from="Fanniversaire" to="l'anniversaire" />
|
||||
<Word from="Fapparence" to="l'apparence" />
|
||||
<Word from="Fapres" to="l'apres" />
|
||||
<Word from="Faprès" to="l'après" />
|
||||
<Word from="Farmée" to="l'armée" />
|
||||
<Word from="Farrière" to="l'arrière" />
|
||||
<Word from="Farrivée" to="l'arrivée" />
|
||||
<Word from="Fascenseur" to="l'ascenseur" />
|
||||
<Word from="Fascension" to="l'ascension" />
|
||||
<Word from="Fassaut" to="l'assaut" />
|
||||
<Word from="Fassomme" to="l'assomme" />
|
||||
<Word from="Fatmosphère" to="l'atmosphère" />
|
||||
<Word from="Fattention" to="l'attention" />
|
||||
<Word from="Favalanche" to="l'avalanche" />
|
||||
<Word from="Féclairage" to="l'éclairage" />
|
||||
<Word from="Fécran" to="l'écran" />
|
||||
<Word from="Fémotion" to="l'émotion" />
|
||||
<Word from="Femplacement" to="l'emplacement" />
|
||||
<Word from="Fendroit" to="l'endroit" />
|
||||
<Word from="Fenseigne" to="l'enseigne" />
|
||||
<Word from="Fensemble" to="l'ensemble" />
|
||||
<Word from="Fentouraient" to="l'entouraient" />
|
||||
<Word from="Fentrée" to="l'entrée" />
|
||||
<Word from="Fépaisseur" to="l'épaisseur" />
|
||||
<Word from="Fépoque" to="l'époque" />
|
||||
<Word from="Féquipe" to="Équipe" />
|
||||
<Word from="Fespace" to="l'espace" />
|
||||
<Word from="fespérais" to="j'espérais" />
|
||||
<Word from="Fespère" to="l'espère" />
|
||||
<Word from="Festhétique" to="l'esthétique" />
|
||||
<Word from="Fetranger" to="l'etranger" />
|
||||
<Word from="Févasion" to="l'évasion" />
|
||||
<Word from="Févoque" to="l'évoque" />
|
||||
<Word from="Fexpérience" to="l'expérience" />
|
||||
<Word from="Fexplique" to="l'explique" />
|
||||
<Word from="Fexplosion" to="l'explosion" />
|
||||
<Word from="Fextérieur" to="l'extérieur" />
|
||||
<Word from="Fhabituelle" to="l'habituelle" />
|
||||
<Word from="Fhélicoptère" to="l'hélicoptère" />
|
||||
<Word from="Fhéliport" to="l'héliport" />
|
||||
<Word from="Fhélistation" to="l'hélistation" />
|
||||
<Word from="Fhonneur" to="l'honneur" />
|
||||
<Word from="Fhorloge" to="l'horloge" />
|
||||
<Word from="Fidée" to="l'idée" />
|
||||
<Word from="Fimage" to="l'image" />
|
||||
<Word from="Fimportance" to="l'importance" />
|
||||
<Word from="Fimpression" to="l'impression" />
|
||||
<Word from="Finfluence" to="l'influence" />
|
||||
<Word from="Finscription" to="l'inscription" />
|
||||
<Word from="Fintérieur" to="l'intérieur" />
|
||||
<Word from="Fintrigue" to="l'intrigue" />
|
||||
<Word from="Fobjectif" to="l'objectif" />
|
||||
<Word from="Foccasion" to="l'occasion" />
|
||||
<Word from="Fordre" to="l'ordre" />
|
||||
<Word from="Forigine" to="l'origine" />
|
||||
<Word from="frêre" to="frère" />
|
||||
<Word from="gaylns" to="gaijins" />
|
||||
<Word from="general" to="général" />
|
||||
<Word from="hawaïennel" to="hawaïenne" />
|
||||
<Word from="hawa'l'en" to="hawaïen" />
|
||||
<Word from="Ia" to="la" />
|
||||
<Word from="Ià" to="là" />
|
||||
<Word from="Iaryngotomie" to="laryngotomie" />
|
||||
<Word from="idee" to="idée" />
|
||||
<Word from="idees" to="idées" />
|
||||
<Word from="Ie" to="le" />
|
||||
<Word from="Ies" to="les" />
|
||||
<Word from="Iester" to="Lester" />
|
||||
<Word from="II" to="Il" />
|
||||
<Word from="Iimit" to="limit" />
|
||||
<Word from="IIs" to="Ils" />
|
||||
<Word from="immediatement" to="immédiatement" />
|
||||
<Word from="insufflee" to="insufflée" />
|
||||
<Word from="integrer" to="intégrer" />
|
||||
<Word from="interessante" to="intéressante" />
|
||||
<Word from="Iogions" to="logions" />
|
||||
<Word from="Iorsqu" to="lorsqu" />
|
||||
<Word from="isee" to="isée" />
|
||||
<Word from="Iumiere" to="lumiere" />
|
||||
<Word from="Iynchage" to="lynchage" />
|
||||
<Word from="J'espere" to="J'espère" />
|
||||
<Word from="Jessaie" to="J'essaie" />
|
||||
<Word from="j'etais" to="j'étais" />
|
||||
<Word from="J'etais" to="J'étais" />
|
||||
<Word from="latéralémént" to="latéralement" />
|
||||
<Word from="lci" to="Ici" />
|
||||
<Word from="Lci" to="Ici" />
|
||||
<Word from="lé-" to="là-" />
|
||||
<Word from="lepidopteres" to="lépidoptères" />
|
||||
<Word from="litteraire" to="littéraire" />
|
||||
<Word from="ll" to="il" />
|
||||
<Word from="Ll" to="Il" />
|
||||
<Word from="lls" to="ils" />
|
||||
<Word from="Lls" to="Ils" />
|
||||
<Word from="maintenanu" to="maintenant" />
|
||||
<Word from="maniere" to="manière" />
|
||||
<Word from="mariee" to="mariée" />
|
||||
<Word from="Mayer/ing" to="Mayerling" />
|
||||
<Word from="meilleurjour" to="meilleur jour" />
|
||||
<Word from="melange" to="mélange" />
|
||||
<Word from="n'avaiént" to="n'avaient" />
|
||||
<Word from="n'etait" to="n'était" />
|
||||
<Word from="oitjamais" to="oit jamais" />
|
||||
<Word from="oitjus" to="oit jus" />
|
||||
<Word from="ontete" to="ont été" />
|
||||
<Word from="operateur" to="opérateur" />
|
||||
<Word from="ouvérté" to="ouverte" />
|
||||
<Word from="Pépreuve" to="l'épreuve" />
|
||||
<Word from="pere" to="père" />
|
||||
<Word from="plateforme" to="plate-forme" />
|
||||
<Word from="pourjouer" to="pour jouer" />
|
||||
<Word from="precipice" to="précipice" />
|
||||
<Word from="preferes" to="préférés" />
|
||||
<Word from="premierjour" to="premier jour" />
|
||||
<Word from="presenter" to="présenter" />
|
||||
<Word from="prevu" to="prévu" />
|
||||
<Word from="prevue" to="prévue" />
|
||||
<Word from="propriete" to="propriété" />
|
||||
<Word from="protègeraient" to="protégeraient" />
|
||||
<Word from="qué" to="que" />
|
||||
<Word from="qwangoissé" to="qu'angoissé" />
|
||||
<Word from="realisateur" to="réalisateur" />
|
||||
<Word from="reception" to="réception" />
|
||||
<Word from="reévalu" to="réévalu" />
|
||||
<Word from="repute" to="réputé" />
|
||||
<Word from="reussi" to="réussi" />
|
||||
<Word from="s'arrétait" to="s'arrêtait" />
|
||||
<Word from="s'ave'rer" to="s'avérer" />
|
||||
<Word from="scenario" to="scénario" />
|
||||
<Word from="scene" to="scène" />
|
||||
<Word from="scenes" to="scènes" />
|
||||
<Word from="seances" to="séances" />
|
||||
<Word from="sequence" to="séquence" />
|
||||
<Word from="sflécrasa" to="s'écrasa" />
|
||||
<Word from="speciale" to="spéciale" />
|
||||
<Word from="Supen" to="Super" />
|
||||
<Word from="torturee" to="torturée" />
|
||||
<Word from="Uadmirable" to="L'admirable" />
|
||||
<Word from="Uensemblier" to="L'ensemblier" />
|
||||
<Word from="Uexplosion" to="L'explosion" />
|
||||
<Word from="Uouvre" to="L'ouvre" />
|
||||
<Word from="Vaise" to="l'aise" />
|
||||
<Word from="vecu" to="vécu" />
|
||||
<Word from="vehicules" to="véhicules" />
|
||||
<Word from="Ÿappréciais" to="J'appréciais" />
|
||||
<Word from="Ÿespère" to="J'espère" />
|
||||
<Word from="ÿétrangle" to="s'étrangle" />
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<PartialLines>
|
||||
<LinePart from=" I'" to=" l'" />
|
||||
<LinePart from=" |'" to=" l'" />
|
||||
</PartialLines>
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines>
|
||||
<Line from=""D'ac:c:ord."" to=""D'accord."" />
|
||||
<Line from="“i QUÎ gagne, qui perd," to="ni qui gagne, qui perd," />
|
||||
<Line from="L'ac:c:ent est mis 
 
 sur son trajet jusqu'en Suisse." to="L'accent est mis 
 
 sur son trajet jusqu'en Suisse." />
|
||||
<Line from="C'est la plus gentille chose 
 
 qu'Hitchc:oc:k m'ait jamais dite." to="C'est la plus gentille chose 
 
 qu'Hitchcock m'ait jamais dite." />
|
||||
<Line from="Tout le monde, en revanche, qualifie 
 
 Goldfinger d'aventu re structurée," to="Tout le monde, en revanche, qualifie 
 
 Goldfinger d'aventure structurée," />
|
||||
<Line from="et le film Shadow of a man 
 
 a lancé sa carrière au cinéma." to="et le film <i>Shadow of a man</i> 
 
 a lancé sa carrière au cinéma." />
|
||||
<Line from="En 1948, Young est passé à la réalisation 
 
 avec One night with you." to="En 1948, Young est passé à la réalisation 
 
 avec <i>One night with you</i>." />
|
||||
<Line from="Il a construit tous ces véhicules 
 
 à C)c:ala, en Floride." to="Il a construit tous ces véhicules 
 
 à Ocala, en Floride." />
|
||||
<Line from="Tokyo Pop et A Taxing Woman? Return." to="Tokyo Pop et A Taxing Woman's Return." />
|
||||
<Line from="Peter H u nt." to="Peter Hunt." />
|
||||
<Line from=""C'est bien mieux dans Peau. 
 
 On peut sfléclabousser, faire du bruit."" to=""C'est bien mieux dans l'eau. 
 
 On peut s'éclabousser, faire du bruit."" />
|
||||
</WholeLines>
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
||||
+2266
File diff suppressed because it is too large
Load Diff
+1442
File diff suppressed because it is too large
Load Diff
+25
@@ -0,0 +1,25 @@
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords />
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<PartialLines />
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions>
|
||||
<!-- nagy I-l javítások -->
|
||||
<RegEx find="([\x41-\x5a\x61-\x7a\xc1-\xfc])II" replaceWith="$1ll" />
|
||||
<RegEx find="II([\x61-\x7a\xe1-\xfc])" replaceWith="ll$1" />
|
||||
<RegEx find="([\x61-\x7a\xe1-\xfc])I" replaceWith="$1l" />
|
||||
<RegEx find="([\x20])I([^aeou\x41-\x5a\xc1-\xdc])" replaceWith="$1l$2" />
|
||||
<RegEx find="\bl([bcdfghjklmnpqrstvwxz])" replaceWith="I$1" />
|
||||
<RegEx find="([\x41-\x5a\xc1-\xdc])I([\x61-\x7a\xe1-\xfc])" replaceWith="$1l$2" />
|
||||
<RegEx find="([\x61-\x7a\xe1-\xfc][\-])I([\x61-\x7a\xe1-\xfc])" replaceWith="$1l$2" />
|
||||
<RegEx find="([\x41-\x5a\xc1-\xdc])I([\-][\x41-\x5a\xc1-\xdc][\x61-\x7a\xe1-\xfc])" replaceWith="$1l$2" />
|
||||
<RegEx find="\b([AEÜÓ])I([^\x41-\x5a\xc1-\xdc])" replaceWith="$1l$2" />
|
||||
<RegEx find="\bI([aáeéiíoóöuúüy\xf5\xfb])" replaceWith="l$1" />
|
||||
<RegEx find="\b(?:II|ll)" replaceWith="Il" />
|
||||
<RegEx find="([\xf5\xfb])I" replaceWith="$1l" />
|
||||
</RegularExpressions>
|
||||
</OCRFixReplaceList>
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="ls" to="Is" />
|
||||
<Word from="ln" to="In" />
|
||||
<Word from="lk" to="Ik" />
|
||||
<Word from="ledereen" to="Iedereen" />
|
||||
<Word from="ledere" to="Iedere" />
|
||||
<Word from="lemand" to="Iemand" />
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<PartialLines />
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions>
|
||||
<RegEx find="\blk(?=\p{Ll}{2})" replaceWith="Ik" />
|
||||
<RegEx find="\bln(?=\p{Ll}{2})" replaceWith="In" />
|
||||
<RegEx find="\bls(?=\p{Ll}{2})" replaceWith="Is" />
|
||||
<RegEx find="\beIk" replaceWith="elk" />
|
||||
<RegEx find="\bler(land|se|s|)\b" replaceWith="Ier$1" />
|
||||
</RegularExpressions>
|
||||
</OCRFixReplaceList>
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords />
|
||||
<PartialWordsAlways />
|
||||
<PartialWords>
|
||||
<!-- Will be used to check words not in dictionary -->
|
||||
<!-- If new word(s) exists in spelling dictionary, it(they) is accepted -->
|
||||
<WordPart from="¤" to="o" />
|
||||
<WordPart from="fi" to="fi" />
|
||||
<WordPart from="fl" to="fl" />
|
||||
<WordPart from="/" to="l" />
|
||||
<WordPart from="vv" to="w" />
|
||||
<WordPart from="IVI" to="M" />
|
||||
<WordPart from="lVI" to="M" />
|
||||
<WordPart from="IVl" to="M" />
|
||||
<WordPart from="lVl" to="M" />
|
||||
<WordPart from="m" to="rn" />
|
||||
<WordPart from="l" to="i" />
|
||||
<WordPart from="€" to="e" />
|
||||
<WordPart from="I" to="l" />
|
||||
<WordPart from="c" to="o" />
|
||||
<WordPart from="i" to="t" />
|
||||
<WordPart from="cc" to="oo" />
|
||||
<WordPart from="ii" to="tt" />
|
||||
<WordPart from="n/" to="ry" />
|
||||
<WordPart from="ae" to="æ" />
|
||||
<!-- "f " will be two words -->
|
||||
<WordPart from="f" to="f " />
|
||||
<WordPart from="c" to="e" />
|
||||
<WordPart from="I" to="t" />
|
||||
<WordPart from="n" to="o" />
|
||||
<WordPart from="s" to="e" />
|
||||
<WordPart from="\A" to="Vi" />
|
||||
<WordPart from="n/" to="rv" />
|
||||
<WordPart from="Ã" to="Å" />
|
||||
<WordPart from="í" to="i" />
|
||||
</PartialWords>
|
||||
<PartialLines />
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
||||
+508
@@ -0,0 +1,508 @@
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="abitual" to="habitual" />
|
||||
<Word from="àcerca" to="acerca" />
|
||||
<Word from="acessor" to="assessor" />
|
||||
<Word from="acólico" to="acólito" />
|
||||
<Word from="açoreano" to="açoriano" />
|
||||
<Word from="actuacao" to="actuação" />
|
||||
<Word from="acucar" to="açúcar" />
|
||||
<Word from="açucar" to="açúcar" />
|
||||
<Word from="advinhar" to="adivinhar" />
|
||||
<Word from="africa" to="África" />
|
||||
<Word from="ajuisar" to="ajuizar" />
|
||||
<Word from="album" to="álbum" />
|
||||
<Word from="alcoolémia" to="alcoolemia" />
|
||||
<Word from="aldião" to="aldeão" />
|
||||
<Word from="algerino" to="argelino" />
|
||||
<Word from="ameixeal" to="ameixial" />
|
||||
<Word from="amiaça" to="ameaça" />
|
||||
<Word from="analizar" to="analisar" />
|
||||
<Word from="andáste" to="andaste" />
|
||||
<Word from="anemona" to="anémona" />
|
||||
<Word from="antartico" to="antárctico" />
|
||||
<Word from="antártico" to="antárctico" />
|
||||
<Word from="antepôr" to="antepor" />
|
||||
<Word from="apárte" to="aparte" />
|
||||
<Word from="apiadeiro" to="apeadeiro" />
|
||||
<Word from="apiar" to="apear" />
|
||||
<Word from="apreciacao" to="apreciação" />
|
||||
<Word from="arctico" to="árctico" />
|
||||
<Word from="arrazar" to="arrasar" />
|
||||
<Word from="ártico" to="árctico" />
|
||||
<Word from="artifice" to="artífice" />
|
||||
<Word from="artifícial" to="artificial" />
|
||||
<Word from="ascenção" to="ascensão" />
|
||||
<!-- <Word from="assucar" to="açúcar" /> assucar é uma palavra existente no dicionário -->
|
||||
<Word from="assúcar" to="açúcar" />
|
||||
<Word from="aste" to="haste" />
|
||||
<Word from="asterístico" to="asterisco" />
|
||||
<Word from="averção" to="aversão" />
|
||||
<Word from="avizar" to="avisar" />
|
||||
<Word from="avulsso" to="avulso" />
|
||||
<Word from="baínha" to="bainha" />
|
||||
<Word from="banca-rota" to="bancarrota" />
|
||||
<Word from="bandeija" to="bandeja" />
|
||||
<Word from="bébé" to="bebé" />
|
||||
<Word from="beige" to="bege" />
|
||||
<Word from="benção" to="bênção" />
|
||||
<Word from="beneficiência" to="beneficência" />
|
||||
<Word from="beneficiente" to="beneficente" />
|
||||
<Word from="benvinda" to="bem-vinda" />
|
||||
<Word from="benvindo" to="bem-vindo" />
|
||||
<Word from="boasvindas" to="boas-vindas" />
|
||||
<Word from="borborinho" to="burburinho" />
|
||||
<Word from="Brazil" to="Brasil" />
|
||||
<Word from="bussula" to="bússola" />
|
||||
<Word from="cabo-verdeano" to="cabo-verdiano" />
|
||||
<Word from="caimbras" to="cãibras" />
|
||||
<Word from="calcáreo" to="calcário" />
|
||||
<Word from="calsado" to="calçado" />
|
||||
<Word from="calvíce" to="calvície" />
|
||||
<Word from="camoneano" to="camoniano" />
|
||||
<Word from="campião" to="campeão" />
|
||||
<Word from="cançacos" to="cansaços" />
|
||||
<Word from="caracter" to="carácter" />
|
||||
<Word from="caractéres" to="caracteres" />
|
||||
<Word from="catequeze" to="catequese" />
|
||||
<Word from="catequisador" to="catequizador" />
|
||||
<Word from="catequisar" to="catequizar" />
|
||||
<Word from="chícara" to="xícara" />
|
||||
<Word from="ciclano" to="sicrano" />
|
||||
<Word from="cicrano" to="sicrano" />
|
||||
<Word from="cidadães" to="cidadãos" />
|
||||
<Word from="cidadões" to="cidadãos" />
|
||||
<Word from="cincoenta" to="cinquenta" />
|
||||
<Word from="cinseiro" to="cinzeiro" />
|
||||
<Word from="cinsero" to="sincero" />
|
||||
<Word from="citacoes" to="citações" />
|
||||
<Word from="coalizão" to="colisão" />
|
||||
<Word from="côdia" to="côdea" />
|
||||
<Word from="combóio" to="comboio" />
|
||||
<Word from="compôr" to="compor" />
|
||||
<Word from="concerteza" to="com certeza" />
|
||||
<Word from="constituia" to="constituía" />
|
||||
<Word from="constituíu" to="constituiu" />
|
||||
<Word from="contato" to="contacto" />
|
||||
<Word from="contensão" to="contenção" />
|
||||
<Word from="contribuicoes" to="contribuições" />
|
||||
<Word from="côr" to="cor" />
|
||||
<Word from="corassão" to="coração" />
|
||||
<Word from="corçario" to="corsário" />
|
||||
<Word from="corçário" to="corsário" />
|
||||
<Word from="cornprimidosinbo" to="comprimidozinho" />
|
||||
<!-- <Word from="cota-parte" to="quota-parte" /> é uma palavra existente no dicionário -->
|
||||
<Word from="crâneo" to="crânio" />
|
||||
<Word from="dE" to="de" />
|
||||
<Word from="defenição" to="definição" />
|
||||
<Word from="defenido" to="definido" />
|
||||
<Word from="defenir" to="definir" />
|
||||
<Word from="deficite" to="défice" />
|
||||
<Word from="degladiar" to="digladiar" />
|
||||
<Word from="deiche" to="deixe" />
|
||||
<Word from="desinteria" to="disenteria" />
|
||||
<Word from="despendio" to="dispêndio" />
|
||||
<Word from="despêndio" to="dispêndio" />
|
||||
<Word from="desplicência" to="displicência" />
|
||||
<Word from="dificulidade" to="dificuldade" />
|
||||
<Word from="dispender" to="despender" />
|
||||
<Word from="dispendio" to="dispêndio" />
|
||||
<Word from="distribuido" to="distribuído" />
|
||||
<Word from="druída" to="druida" />
|
||||
<Word from="écrã" to="ecrã" />
|
||||
<Word from="ecran" to="ecrã" />
|
||||
<Word from="écran" to="ecrã" />
|
||||
<Word from="êle" to="ele" />
|
||||
<Word from="elice" to="hélice" />
|
||||
<Word from="élice" to="hélice" />
|
||||
<Word from="emiratos" to="emirados" />
|
||||
<Word from="engolis-te" to="engoliste" />
|
||||
<Word from="engulir" to="engolir" />
|
||||
<Word from="enguliste" to="engoliste" />
|
||||
<Word from="entertido" to="entretido" />
|
||||
<Word from="entitular" to="intitular" />
|
||||
<Word from="entreterimento" to="entretenimento" />
|
||||
<Word from="entreti-me" to="entretive-me" />
|
||||
<Word from="envólucro" to="invólucro" />
|
||||
<Word from="erói" to="herói" />
|
||||
<Word from="escluir" to="excluir" />
|
||||
<Word from="esclusão" to="exclusão" />
|
||||
<Word from="escrivões" to="escrivães" />
|
||||
<Word from="esqueiro" to="isqueiro" />
|
||||
<Word from="esquesito" to="esquisito" />
|
||||
<Word from="estacoes" to="estações" />
|
||||
<Word from="esteje" to="esteja" />
|
||||
<Word from="excavação" to="escavação" />
|
||||
<Word from="excavar" to="escavar" />
|
||||
<Word from="exdrúxula" to="esdrúxula" />
|
||||
<Word from="exdrúxulas" to="esdrúxulas" />
|
||||
<Word from="exitar" to="hesitar" />
|
||||
<Word from="explicacoes" to="explicações" />
|
||||
<Word from="exquisito" to="esquisito" />
|
||||
<Word from="extende" to="estende" />
|
||||
<Word from="extender" to="estender" />
|
||||
<Word from="fàcilmenfe" to="facilmente" />
|
||||
<Word from="fàcilmente" to="facilmente" />
|
||||
<Word from="fariam-lhe" to="far-lhe-iam" />
|
||||
<Word from="FARMÁClAS" to="FARMÁCIAS" />
|
||||
<Word from="farmecêutico" to="farmacêutico" />
|
||||
<Word from="fassa" to="faça" />
|
||||
<Word from="fébre" to="febre" />
|
||||
<Word from="fecula" to="fécula" />
|
||||
<Word from="fémea" to="fêmea" />
|
||||
<Word from="femenino" to="feminino" />
|
||||
<Word from="femininismo" to="feminismo" />
|
||||
<Word from="físiologista" to="fisiologista" />
|
||||
<Word from="fizémos" to="fizemos" />
|
||||
<Word from="fizes-te" to="fizeste" />
|
||||
<Word from="flôr" to="flor" />
|
||||
<Word from="forão" to="foram" />
|
||||
<Word from="formalisar" to="formalizar" />
|
||||
<Word from="fôro" to="foro" />
|
||||
<Word from="fos-te" to="foste" />
|
||||
<Word from="fragância" to="fragrância" />
|
||||
<Word from="françês" to="francês" />
|
||||
<Word from="frasqutnho" to="frasquinho" />
|
||||
<Word from="frustado" to="frustrado" />
|
||||
<Word from="furá" to="furar" />
|
||||
<Word from="gaz" to="gás" />
|
||||
<Word from="gáz" to="gás" />
|
||||
<Word from="geito" to="jeito" />
|
||||
<Word from="geneceu" to="gineceu" />
|
||||
<Word from="geropiga" to="jeropiga" />
|
||||
<Word from="glicémia" to="glicemia" />
|
||||
<Word from="gorgeta" to="gorjeta" />
|
||||
<Word from="grangear" to="granjear" />
|
||||
<Word from="guizar" to="guisar" />
|
||||
<Word from="hectar" to="hectare" />
|
||||
<Word from="herméticamente" to="hermeticamente" />
|
||||
<Word from="hernia" to="hérnia" />
|
||||
<Word from="higiéne" to="higiene" />
|
||||
<Word from="hilariedade" to="hilaridade" />
|
||||
<Word from="hiperacídez" to="hiperacidez" />
|
||||
<Word from="hontem" to="ontem" />
|
||||
<Word from="igiene" to="higiene" />
|
||||
<Word from="igienico" to="higiénico" />
|
||||
<Word from="igiénico" to="higiénico" />
|
||||
<Word from="igreija" to="igreja" />
|
||||
<Word from="iguasu" to="iguaçu" />
|
||||
<Word from="ilacção" to="ilação" />
|
||||
<Word from="imbigo" to="umbigo" />
|
||||
<Word from="impecilho" to="empecilho" />
|
||||
<Word from="íncas" to="incas" />
|
||||
<Word from="incêsto" to="incesto" />
|
||||
<Word from="inclusivé" to="inclusive" />
|
||||
<Word from="incômodos" to="incómodos" />
|
||||
<Word from="incontestávelmente" to="incontestavelmente" />
|
||||
<Word from="incontestàvelmente" to="incontestavelmente" />
|
||||
<Word from="indespensáveis" to="indispensáveis" />
|
||||
<Word from="indespensável" to="indispensável" />
|
||||
<Word from="India" to="Índia" />
|
||||
<Word from="indiguinação" to="indignação" />
|
||||
<Word from="indiguinado" to="indignado" />
|
||||
<Word from="indiguinar" to="indignar" />
|
||||
<Word from="inflacção" to="inflação" />
|
||||
<Word from="ingreja" to="igreja" />
|
||||
<Word from="INSCRICOES" to="INSCRIÇÕES" />
|
||||
<Word from="intensão" to="intenção" />
|
||||
<Word from="intertido" to="entretido" />
|
||||
<Word from="intoxica" to="Intoxica" />
|
||||
<Word from="intrega" to="entrega" />
|
||||
<Word from="inverosímel" to="inverosímil" />
|
||||
<Word from="iorgute" to="iogurte" />
|
||||
<Word from="ipopótamo" to="hipopótamo" />
|
||||
<Word from="ipsilon" to="ípsilon" />
|
||||
<Word from="ipslon" to="ípsilon" />
|
||||
<Word from="isquesito" to="esquisito" />
|
||||
<Word from="juíz" to="juiz" />
|
||||
<Word from="juiza" to="juíza" />
|
||||
<Word from="júniores" to="juniores" />
|
||||
<Word from="justanzente" to="justamente" />
|
||||
<Word from="juz" to="jus" />
|
||||
<Word from="kilo" to="quilo" />
|
||||
<Word from="laboratório-porque" to="laboratório porque" />
|
||||
<Word from="ladravaz" to="ladrava" />
|
||||
<Word from="lamentàvelmente" to="lamentavelmente" />
|
||||
<Word from="lampeão" to="lampião" />
|
||||
<Word from="largartixa" to="lagartixa" />
|
||||
<Word from="largarto" to="lagarto" />
|
||||
<Word from="lêm" to="lêem" />
|
||||
<Word from="leucémia" to="leucemia" />
|
||||
<Word from="licensa" to="licença" />
|
||||
<Word from="linguísta" to="linguista" />
|
||||
<Word from="lisongear" to="lisonjear" />
|
||||
<Word from="logista" to="lojista" />
|
||||
<Word from="maçajar" to="massajar" />
|
||||
<Word from="Macfadden-o" to="Macfadden o" />
|
||||
<Word from="mae" to="mãe" />
|
||||
<Word from="magestade" to="majestade" />
|
||||
<Word from="mãgua" to="mágoa" />
|
||||
<Word from="mangerico" to="manjerico" />
|
||||
<Word from="mangerona" to="manjerona" />
|
||||
<Word from="manteem-se" to="mantêm-se" />
|
||||
<Word from="mantega" to="manteiga" />
|
||||
<Word from="mantem-se" to="mantém-se" />
|
||||
<Word from="massiço" to="maciço" />
|
||||
<Word from="massisso" to="maciço" />
|
||||
<Word from="médica-Rio" to="médica Rio" />
|
||||
<Word from="menistro" to="ministro" />
|
||||
<Word from="merciaria" to="mercearia" />
|
||||
<Word from="metrelhadora" to="metralhadora" />
|
||||
<Word from="miscegenação" to="miscigenação" />
|
||||
<Word from="misogenia" to="misoginia" />
|
||||
<Word from="misogeno" to="misógino" />
|
||||
<Word from="misógeno" to="misógino" />
|
||||
<Word from="mº" to="º" />
|
||||
<Word from="môlho" to="molho" />
|
||||
<Word from="monumentânea" to="momentânea" />
|
||||
<Word from="mortandela" to="mortadela" />
|
||||
<Word from="morteIa" to="mortela" />
|
||||
<Word from="muinto" to="muito" />
|
||||
<Word from="nasaias" to="nasais" />
|
||||
<Word from="nêle" to="nele" />
|
||||
<Word from="nest" to="neste" />
|
||||
<Word from="Nivea" to="Nívea" />
|
||||
<Word from="nonagessimo" to="nonagésimo" />
|
||||
<Word from="nonagéssimo" to="nonagésimo" />
|
||||
<Word from="nornal" to="normal" />
|
||||
<Word from="notàvelmente" to="notavelmente" />
|
||||
<Word from="obcessão" to="obsessão" />
|
||||
<Word from="obesidae" to="obesidade" />
|
||||
<Word from="óbviamente" to="obviamente" />
|
||||
<Word from="òbviamente" to="obviamente" />
|
||||
<Word from="ofecina" to="oficina" />
|
||||
<Word from="oje" to="hoje" />
|
||||
<Word from="omem" to="homem" />
|
||||
<Word from="opcoes" to="opções" />
|
||||
<Word from="opóbrio" to="opróbrio" />
|
||||
<Word from="opróbio" to="opróbrio" />
|
||||
<Word from="orfão" to="órfão" />
|
||||
<Word from="organigrama" to="organograma" />
|
||||
<Word from="organisar" to="organizar" />
|
||||
<Word from="orgão" to="órgão" />
|
||||
<Word from="orta" to="horta" />
|
||||
<Word from="ótima" to="óptima" />
|
||||
<Word from="ótimos" to="óptimos" />
|
||||
<Word from="paralização" to="paralisação" />
|
||||
<Word from="paralizado" to="paralisado" />
|
||||
<Word from="paralizar" to="paralisar" />
|
||||
<Word from="paráste" to="paraste" />
|
||||
<Word from="Pátria" to="pátria" />
|
||||
<Word from="paúl" to="Paul" />
|
||||
<Word from="pecalço" to="percalço" />
|
||||
<Word from="pêga" to="pega" />
|
||||
<Word from="periodo" to="período" />
|
||||
<Word from="pertubar" to="perturbar" />
|
||||
<Word from="perú" to="peru" />
|
||||
<Word from="piqueno" to="pequeno" />
|
||||
<Word from="pirinéus" to="Pirenéus" />
|
||||
<Word from="poblema" to="problema" />
|
||||
<Word from="pobrema" to="problema" />
|
||||
<Word from="poden" to="podem" />
|
||||
<Word from="poder-mos" to="pudermos" />
|
||||
<Word from="ponteagudo" to="pontiagudo" />
|
||||
<Word from="pontuacoes" to="pontuações" />
|
||||
<Word from="prazeiroso" to="prazeroso" />
|
||||
<Word from="precaridade" to="precariedade" />
|
||||
<Word from="precizar" to="precisar" />
|
||||
<Word from="preserverança" to="perseverança" />
|
||||
<Word from="previlégio" to="privilégio" />
|
||||
<Word from="primária-que" to="primária que" />
|
||||
<Word from="priúdo" to="período" />
|
||||
<Word from="probalidade" to="probabilidade" />
|
||||
<Word from="progreso" to="progresso" />
|
||||
<Word from="proibído" to="proibido" />
|
||||
<Word from="proíbido" to="proibido" />
|
||||
<Word from="própia" to="própria" />
|
||||
<Word from="propiedade" to="propriedade" />
|
||||
<Word from="propio" to="próprio" />
|
||||
<Word from="própio" to="próprio" />
|
||||
<Word from="provocacoes" to="provocações" />
|
||||
<Word from="prsença" to="presença" />
|
||||
<Word from="prustituta" to="prostituta" />
|
||||
<Word from="pudérmos" to="pudermos" />
|
||||
<Word from="púlico" to="público" />
|
||||
<Word from="pús" to="pus" />
|
||||
<Word from="pusémos" to="pusemos" />
|
||||
<Word from="quadricomia" to="quadricromia" />
|
||||
<Word from="quadriplicado" to="quadruplicado" />
|
||||
<Word from="quaisqueres" to="quaisquer" />
|
||||
<Word from="quer-a" to="quere-a" />
|
||||
<Word from="quere-se" to="quer-se" />
|
||||
<Word from="quer-o" to="quere-o" />
|
||||
<Word from="químco" to="químico" />
|
||||
<Word from="quises-te" to="quiseste" />
|
||||
<Word from="quizer" to="quiser" />
|
||||
<Word from="quizeram" to="quiseram" />
|
||||
<Word from="quizesse" to="quisesse" />
|
||||
<Word from="quizessem" to="quisessem" />
|
||||
<Word from="raínha" to="rainha" />
|
||||
<Word from="raíz" to="raiz" />
|
||||
<Word from="raizes" to="raízes" />
|
||||
<Word from="ratato" to="retrato" />
|
||||
<Word from="raúl" to="raul" />
|
||||
<Word from="razar" to="rasar" />
|
||||
<Word from="rectaguarda" to="retaguarda" />
|
||||
<Word from="rédia" to="rédea" />
|
||||
<Word from="reestabelecer" to="restabelecer" />
|
||||
<Word from="refeicoes" to="refeições" />
|
||||
<Word from="refêrencia" to="referência" />
|
||||
<Word from="regeitar" to="rejeitar" />
|
||||
<Word from="regurjitar" to="regurgitar" />
|
||||
<Word from="reinvidicação" to="reivindicação" />
|
||||
<Word from="reinvidicar" to="reivindicar" />
|
||||
<Word from="requer-a" to="requere-a" />
|
||||
<Word from="requere-se" to="requer-se" />
|
||||
<Word from="requer-o" to="requere-o" />
|
||||
<Word from="requesito" to="requisito" />
|
||||
<Word from="requisicoes" to="requisições" />
|
||||
<Word from="RESIDENCIA" to="RESIDÊNCIA" />
|
||||
<Word from="respiraçáo" to="respiração" />
|
||||
<Word from="restablecer" to="restabelecer" />
|
||||
<Word from="réstea" to="réstia" />
|
||||
<Word from="ruborisar" to="ruborizar" />
|
||||
<Word from="rúbrica" to="rubrica" />
|
||||
<Word from="sàdia" to="sadia" />
|
||||
<Word from="saiem" to="saem" />
|
||||
<Word from="salchicha" to="salsicha" />
|
||||
<Word from="salchichas" to="salsichas" />
|
||||
<Word from="saloice" to="saloiice" />
|
||||
<Word from="salvé" to="salve" />
|
||||
<Word from="salve-raínha" to="salve-rainha" />
|
||||
<Word from="salvé-rainha" to="salve-rainha" />
|
||||
<Word from="salvé-raínha" to="salve-rainha" />
|
||||
<Word from="sao" to="são" />
|
||||
<Word from="sargeta" to="sarjeta" />
|
||||
<Word from="seções" to="secções" />
|
||||
<Word from="seija" to="seja" />
|
||||
<Word from="seissentos" to="seiscentos" />
|
||||
<Word from="seje" to="seja" />
|
||||
<Word from="semiar" to="semear" />
|
||||
<Word from="séniores" to="seniores" />
|
||||
<Word from="sensibilidadc" to="sensibilidade" />
|
||||
<Word from="sensívelmente" to="sensivelmente" />
|
||||
<Word from="setessentos" to="setecentos" />
|
||||
<Word from="siclano" to="sicrano" />
|
||||
<Word from="Sifilis" to="Sífilis" />
|
||||
<Word from="sifílis" to="sífilis" />
|
||||
<Word from="sinão" to="senão" />
|
||||
<Word from="sinmtoma" to="sintoma" />
|
||||
<Word from="sintéticamente" to="sinteticamente" />
|
||||
<Word from="sintetisa" to="sintetiza" />
|
||||
<Word from="SÓ" to="só" />
|
||||
<Word from="sôfra" to="sofra" />
|
||||
<Word from="sôfregamente" to="sofregamente" />
|
||||
<Word from="somáste" to="somaste" />
|
||||
<Word from="sombracelha" to="sobrancelha" />
|
||||
<Word from="sombrancelha" to="sobrancelha" />
|
||||
<Word from="sombrancelhas" to="sobrancelhas" />
|
||||
<Word from="suavisar" to="suavizar" />
|
||||
<Word from="substituido" to="substituído" />
|
||||
<Word from="suburbio" to="subúrbio" />
|
||||
<!-- <Word from="sues" to="seus" /> sues existe "Cuidado, não sues muito." -->
|
||||
<Word from="suI" to="sul" />
|
||||
<Word from="Suiça" to="Suíça" />
|
||||
<Word from="suiças" to="suíças" />
|
||||
<Word from="suiço" to="suíço" />
|
||||
<Word from="suiços" to="suíços" />
|
||||
<Word from="supôr" to="supor" />
|
||||
<Word from="tabeliões" to="tabeliães" />
|
||||
<Word from="taínha" to="tainha" />
|
||||
<Word from="tava" to="estava" />
|
||||
<Word from="têem" to="têm" />
|
||||
<Word from="telemovel" to="telemóvel" />
|
||||
<Word from="telémovel" to="telemóvel" />
|
||||
<Word from="terminacoes" to="terminações" />
|
||||
<Word from="toráxico" to="torácico" />
|
||||
<Word from="tou" to="estou" />
|
||||
<Word from="transpôr" to="transpor" />
|
||||
<Word from="trasnporte" to="transporte" />
|
||||
<Word from="tumors" to="tumores" />
|
||||
<Word from="úmida" to="húmida" />
|
||||
<Word from="umidade" to="unidade" />
|
||||
<Word from="vai-vem" to="vaivém" />
|
||||
<Word from="vegilância" to="vigilância" />
|
||||
<Word from="vegilante" to="vigilante" />
|
||||
<Word from="ventoínha" to="ventoinha" />
|
||||
<Word from="verosímel" to="verosímil" />
|
||||
<Word from="video" to="vídeo" />
|
||||
<Word from="virus" to="vírus" />
|
||||
<Word from="visiense" to="viseense" />
|
||||
<Word from="voçe" to="você" />
|
||||
<Word from="voçê" to="você" />
|
||||
<Word from="vôo" to="voo" />
|
||||
<Word from="xadrês" to="xadrez" />
|
||||
<Word from="xafariz" to="chafariz" />
|
||||
<Word from="xéxé" to="xexé" />
|
||||
<Word from="xilindró" to="chilindró" />
|
||||
<Word from="zaíre" to="Zaire" />
|
||||
<Word from="zepelin" to="zepelim" />
|
||||
<Word from="zig-zag" to="ziguezague" />
|
||||
<Word from="zoô" to="zoo" />
|
||||
<Word from="zôo" to="zoo" />
|
||||
<Word from="zuar" to="zoar" />
|
||||
<Word from="zum-zum" to="zunzum" />
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<PartialLines>
|
||||
<LinePart from="IN 6-E" to="N 6 E" />
|
||||
<LinePart from="in tegrar-se" to="integrar-se" />
|
||||
<LinePart from="in teresse" to="interesse" />
|
||||
<LinePart from="in testinos" to="intestinos" />
|
||||
<LinePart from="indica ção" to="indicação" />
|
||||
<LinePart from="inte tino" to="intestino" />
|
||||
<LinePart from="intes tinos" to="intestinos" />
|
||||
<LinePart from="L da" to="Lda" />
|
||||
<LinePart from="mal estar" to="mal-estar" />
|
||||
<LinePart from="mastiga çáo" to="mastigação" />
|
||||
<LinePart from="médi cas" to="médicas" />
|
||||
<LinePart from="mineo rais" to="minerais" />
|
||||
<LinePart from="mola res" to="molares" />
|
||||
<LinePart from="movi mentos" to="movimentos" />
|
||||
<LinePart from="movimen to" to="movimento" />
|
||||
<LinePart from="N 5-Estendido" to="Nº 5 Estendido" />
|
||||
<LinePart from="oxigé nio" to="oxigénio" />
|
||||
<LinePart from="pod mos" to="podemos" />
|
||||
<LinePart from="poder-se ia" to="poder-se-ia" />
|
||||
<LinePart from="pos sibilidade" to="possibilidade" />
|
||||
<LinePart from="possibi lidades" to="possibilidades" />
|
||||
<LinePart from="pro duto" to="produto" />
|
||||
<LinePart from="procu rar" to="procurar" />
|
||||
<LinePart from="Q u e" to="Que" />
|
||||
<LinePart from="qualifi cam" to="qualificam" />
|
||||
<LinePart from="R egião" to="Região" />
|
||||
<LinePart from="unsuficien temente" to="insuficientemente" />
|
||||
</PartialLines>
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions>
|
||||
<!-- <RegEx find="\bi\b" replaceWith="I" /> just an example - do not use this regex -->
|
||||
<RegEx find="([0-9]) +º" replaceWith="$1º" />
|
||||
<RegEx find="\Bcao\b" replaceWith="ção" />
|
||||
<RegEx find="\Bcoes\b" replaceWith="ções" />
|
||||
<!-- <RegEx find="\Bccao\b" replaceWith="cção" /> não faz sentido ter este e ter a linha de cima -->
|
||||
<!-- <RegEx find="\Bccoes\b" replaceWith="cções" /> não faz sentido ter este e ter a linha de cima -->
|
||||
<RegEx find="\b(m|M)ae\b" replaceWith="$1ãe" />
|
||||
<RegEx find="\Bdmnis\B" replaceWith="dminis" />
|
||||
<RegEx find="\Blcól\B" replaceWith="lcoól" />
|
||||
<RegEx find="\b(t|T)a[nm]b[eé]m\b" replaceWith="$1ambém" />
|
||||
<RegEx find="\bzeppeli[mn]\b" replaceWith="zepelim" />
|
||||
<RegEx find="\b(s|S)ufe?ciente\b" replaceWith="$1uficiente" />
|
||||
<RegEx find="\b(n|N)ao\b" replaceWith="$1ão" />
|
||||
<RegEx find="\b(B|b)elem\b" replaceWith="$1elém" />
|
||||
<RegEx find="\b(s|S)u[íi]sso(s)?\b" replaceWith="$1uíço$2" />
|
||||
<RegEx find="\b(s|S)u[íi]ssa(s)?\b" replaceWith="$1uíça$2" />
|
||||
<RegEx find="\b(p|P)rivelig[ie]\p{Ll}d" replaceWith="$1rivelegiad" />
|
||||
<RegEx find="\bpud(?:és|e-)se\b" replaceWith="pudesse" />
|
||||
<RegEx find="\biquilíbr(?:e|i)o\b" replaceWith="equilíbrio" />
|
||||
<RegEx find="\b(c|C)orregi\B" replaceWith="$1orrigid" />
|
||||
<RegEx find="(?<=A|a)ssociacao" replaceWith="ssociação" />
|
||||
<RegEx find="(?<=N|n)inguem" replaceWith="inguém" />
|
||||
<RegEx find="(?<=g|G)rat(?:uí|úi)to" replaceWith="ratuito" />
|
||||
<RegEx find="(?<=d|D)esiquilíbr[ei]o" replaceWith="esequilíbrio" />
|
||||
<RegEx find="\b[k|K]il(ogramas?|ómetros?)" replaceWith="qui$1" />
|
||||
</RegularExpressions>
|
||||
</OCRFixReplaceList>
|
||||
+257
@@ -0,0 +1,257 @@
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="НЄЙ" to="НЕЙ" />
|
||||
<Word from="ОРГЗНИЗМОБ" to="ОРГАНИЗМА" />
|
||||
<Word from="Чї0" to="ЧТО" />
|
||||
<Word from="НЭ" to="НА" />
|
||||
<Word from="СОСЄДНЮЮ" to="СОСЕДНЮЮ" />
|
||||
<Word from="ПЛЗНЄТУ" to="ПЛАНЕТУ" />
|
||||
<Word from="ЗЗГЭДОК" to="ЗАГАДОК" />
|
||||
<Word from="СОТВОРЄНИЯ" to="СОТВОРЕНИЯ" />
|
||||
<Word from="МИРЭ" to="МИРА" />
|
||||
<Word from="ПОЯБЛЄНИЯ" to="ПОЯВЛЕНИЯ" />
|
||||
<Word from="ЗЄМЛЄ" to="ЗЕМЛЕ" />
|
||||
<Word from="ЄЩЄ" to="ЕЩЁ" />
|
||||
<Word from="ТЄМНЬІХ" to="ТЕМНЫХ" />
|
||||
<Word from="СЄРЬЄЗНЬІМ" to="СЕРЬЕЗНЫМ" />
|
||||
<Word from="ПОШІІ0" to="ПОШЛО" />
|
||||
<Word from="Пр0ИЗ0ШЄЛ" to="ПРОИЗОШЕЛ" />
|
||||
<Word from="СЄКРЄТЭМИ" to="СЕКРЕТАМИ" />
|
||||
<Word from="МЭТЄРИЗЛЬІ" to="МАТЕРИАЛЫ" />
|
||||
<Word from="ПЯТЄН" to="ПЯТЕН" />
|
||||
<Word from="ПЛаНЄїЄ" to="ПЛАНЕТЕ" />
|
||||
<Word from="КЗТЭКЛИЗМ" to="КАТАКЛИЗМ" />
|
||||
<Word from="ОКЗЗЗЛСЯ" to="ОКАЗАЛСЯ" />
|
||||
<Word from="ДЭЛЬШЕ" to="ДАЛЬШЕ" />
|
||||
<Word from="ТВК" to="ТАК" />
|
||||
<Word from="ПЛЗНЄТЗ" to="ПЛАНЕТА" />
|
||||
<Word from="ЧЄГО" to="ЧЕГО" />
|
||||
<Word from="УЗНЭТЬ" to="УЗНАТЬ" />
|
||||
<Word from="ПЛЭНЄТЄ" to="ПЛАНЕТЕ" />
|
||||
<Word from="НЄМ" to="НЕМ" />
|
||||
<Word from="БОЗМОЖНЗ" to="ВОЗМОЖНА" />
|
||||
<Word from="СОБЄРШЄННО" to="СОВЕРШЕННО" />
|
||||
<Word from="ИНЭЧЄ" to="ИНАЧЕ" />
|
||||
<Word from="БСЄ" to="ВСЕ" />
|
||||
<Word from="НЕДОСТЗТКИ" to="НЕДОСТАТКИ" />
|
||||
<Word from="НОВЬІЄ" to="НОВЫЕ" />
|
||||
<Word from="ВЄЛИКОЛЄПНЭЯ" to="ВЕЛИКОЛЕПНАЯ" />
|
||||
<Word from="ОСТЭІІОСЬ" to="ОСТАЛОСЬ" />
|
||||
<Word from="НЗЛИЧИЄ" to="НАЛИЧИЕ" />
|
||||
<Word from="бЫ" to="бы" />
|
||||
<Word from="ПРОЦВЕТВТЬ" to="ПРОЦВЕТАТЬ" />
|
||||
<Word from="КЗК" to="КАК" />
|
||||
<Word from="ВОДЗ" to="ВОДА" />
|
||||
<Word from="НЗШЕЛ" to="НАШЕЛ" />
|
||||
<Word from="НЄ" to="НЕ" />
|
||||
<Word from="ТОЖЄ" to="ТОЖЕ" />
|
||||
<Word from="ВУЛКЭНИЧЄСКОЙ" to="ВУЛКАНИЧЕСКОЙ" />
|
||||
<Word from="ЭКТИБНОСТИ" to="АКТИВНОСТИ" />
|
||||
<Word from="ПОЯВИЛЗСЬ" to="ПОЯВИЛАСЬ" />
|
||||
<Word from="НОВЗЯ" to="НОВАЯ" />
|
||||
<Word from="СТРЭТЄГИЯ" to="СТРАТЕГИЯ" />
|
||||
<Word from="УСПЄШН0" to="УСПЕШНО" />
|
||||
<Word from="ПОСЗДКУ" to="ПОСАДКУ" />
|
||||
<Word from="ГОТОБЫ" to="ГОТОВЫ" />
|
||||
<Word from="НЗЧЗТЬ" to="НАЧАТЬ" />
|
||||
<Word from="ОХОТЭ" to="ОХОТА" />
|
||||
<Word from="ПРИЗНЗКЗМИ" to="ПРИЗНАКАМИ" />
|
||||
<Word from="Пр0ШЛОМ" to="ПРОШЛОМ" />
|
||||
<Word from="НЭСТОЯЩЄМ" to="НАСТОЯЩЕМ" />
|
||||
<Word from="ПУСТОТЗХ" to="ПУСТОТАХ" />
|
||||
<Word from="БЛЗЖНОЙ" to="ВЛАЖНОЙ" />
|
||||
<Word from="ПОЧБЄ" to="ПОЧВЕ" />
|
||||
<Word from="МЬІ" to="МЫ" />
|
||||
<Word from="СЄЙЧЗС" to="СЕЙЧАС" />
|
||||
<Word from="ЄСЛИ" to="ЕСЛИ" />
|
||||
<Word from="ЗЗТРОНЕМ" to="ЗАТРОНЕМ" />
|
||||
<Word from="ОПЗСЗЄМСЯ" to="ОПАСАЕМСЯ" />
|
||||
<Word from="СИЛЬН0" to="СИЛЬНО" />
|
||||
<Word from="ОТЛИЧЗЄТСЯ" to="ОТЛИЧАЕТСЯ" />
|
||||
<Word from="РЭНЬШЄ" to="РАНЬШЕ" />
|
||||
<Word from="НЗЗЬІВЗЮТ" to="НАЗЫВАЮТ" />
|
||||
<Word from="ТЄКЛ3" to="ТЕКЛА" />
|
||||
<Word from="ОСЗДОЧНЫМИ" to="ОСАДОЧНЫМИ" />
|
||||
<Word from="ПОСТЄПЄНН0" to="ПОСТЕПЕННО" />
|
||||
<Word from="ИСПЭРЯЛЗСЬ" to="ИСПАРЯЛАСЬ" />
|
||||
<Word from="ЄОЛЬШОЄ" to="БОЛЬШОЕ" />
|
||||
<Word from="КОЛИЧЄСТБО" to="КОЛИЧЕСТВО" />
|
||||
<Word from="ГЄМЗТИТЕ" to="ГЕМАТИТА" />
|
||||
<Word from="ПОЛУЧЭЄТ" to="ПОЛУЧАЕТ" />
|
||||
<Word from="НЄДОСТЗЧН0" to="НЕДОСТАТОЧНО" />
|
||||
<Word from="ПИТЭНИЯ" to="ПИТАНИЯ" />
|
||||
<Word from="ПОКЗ" to="ПОКА" />
|
||||
<Word from="БЬІХОДИЛИ" to="ВЫХОДИЛИ" />
|
||||
<Word from="ЗЄМІІЄ" to="ЗЕМЛЕ" />
|
||||
<Word from="ВЄСЬІИЗ" to="ВЕСЬМА" />
|
||||
<Word from="ЗЄМЛИ" to="ЗЕМЛИ" />
|
||||
<Word from="бЬІЛО" to="БЫЛО" />
|
||||
<Word from="КИЗНИ" to="ЖИЗНИ" />
|
||||
<Word from="СТЗНОВИЛЗСЬ" to="СТАНОВИЛАСЬ" />
|
||||
<Word from="СОЛЄНЄЄ" to="СОЛЁНЕЕ" />
|
||||
<Word from="МЭГНИТНЫМ" to="МАГНИТНЫМ" />
|
||||
<Word from="ЧТОбЬІ" to="ЧТОБЫ" />
|
||||
<Word from="СОЗДЕТЬ" to="СОЗДАТЬ" />
|
||||
<Word from="МЗГНИТНОЄ" to="МАГНИТНОЕ" />
|
||||
<Word from="КЭЖУТСЯ" to="КАЖУТСЯ" />
|
||||
<Word from="ОЗНЗЧЗЄТ" to="ОЗНАЧАЕТ" />
|
||||
<Word from="МОГЛЗ" to="МОГЛА" />
|
||||
<Word from="ИМЄТЬ" to="ИМЕТЬ" />
|
||||
<Word from="КОСМОСЭ" to="КОСМОСА" />
|
||||
<Word from="СОЛНЄЧНЗЯ" to="СОЛНЕЧНАЯ" />
|
||||
<Word from="СИСТЄМЗ" to="СИСТЕМА" />
|
||||
<Word from="ПОСІІУЖИЛО" to="ПОСЛУЖИЛО" />
|
||||
<Word from="МЗГНИТНОГО" to="МАГНИТНОГО" />
|
||||
<Word from="ПЛВНЄТЫ" to="ПЛАНЕТЫ" />
|
||||
<Word from="ЛОКЗЛЬНЬІХ" to="ЛОКАЛЬНЫХ" />
|
||||
<Word from="ПОЛЄЙ" to="ПОЛЕЙ" />
|
||||
<Word from="КЗЖУТСЯ" to="КАЖУТСЯ" />
|
||||
<Word from="КЗКОГО" to="КАКОГО" />
|
||||
<Word from="СТРЗШНОГО" to="СТРАШНОГО" />
|
||||
<Word from="СТОЛКНОЕЄНИЯ" to="СТОЛКНОВЕНИЯ" />
|
||||
<Word from="МЕСТЗМИ" to="МЕСТАМИ" />
|
||||
<Word from="СДЄЛЗТЬ" to="СДЕЛАТЬ" />
|
||||
<Word from="СТЗЛО" to="СТАЛО" />
|
||||
<Word from="МЭГНИТНОГО" to="МАГНИТНОГО" />
|
||||
<Word from="ЗЗКЛЮЧЗВШЄЙСЯ" to="ЗАКЛЮЧАВШЕЙСЯ" />
|
||||
<Word from="ЄГО" to="ЕГО" />
|
||||
<Word from="ЯДРЄ" to="ЯДРЕ" />
|
||||
<Word from="НЗ" to="НА" />
|
||||
<Word from="ИСЧЄЗЛ3" to="ИСЧЕЗЛА" />
|
||||
<Word from="СЧИТЗЮ" to="СЧИТАЮ" />
|
||||
<Word from="ШЭНСЫ" to="ШАНСЫ" />
|
||||
<Word from="ИНЗЧЄ" to="ИНАЧЕ" />
|
||||
<Word from="СТЗЛ" to="СТАЛ" />
|
||||
<Word from="ТРЗТИТЬ" to="ТРАТИТЬ" />
|
||||
<Word from="НЗПРЗВЛЯЄТСЯ" to="НАПРАВЛЯЕТСЯ" />
|
||||
<Word from="ОБЛЭСТИ" to="ОБЛАСТИ" />
|
||||
<Word from="ЯВЛЯІОТСЯ" to="ЯВЛЯЮТСЯ" />
|
||||
<Word from="ГЛЭВНОЙ" to="ГЛАВНОЙ" />
|
||||
<Word from="ДОКЗЗЗТЄЛЬСТВ" to="ДОКАЗАТЕЛЬСТВ" />
|
||||
<Word from="КИСЛОТЭМИ" to="КИСЛОТАМИ" />
|
||||
<Word from="ОНЭ" to="ОНА" />
|
||||
<Word from="ПРЗКТИЧЄСКИ" to="ПРАКТИЧЕСКИ" />
|
||||
<Word from="ЛЄСУ" to="ЛЕСУ" />
|
||||
<Word from="УСЛОБИЯМ" to="УСЛОВИЯМ" />
|
||||
<Word from="СПЗСТИСЬ" to="СПАСТИСЬ" />
|
||||
<Word from="РЗЗВИВЗЮЩИЄСЯ" to="РАЗВИВАЮЩИЕСЯ" />
|
||||
<Word from="ШЭПКИ" to="ШАПКИ" />
|
||||
<Word from="ЗНЗЄМ" to="ЗНАЕМ" />
|
||||
<Word from="СООИРЭЄМСЯ" to="СОБИРАЕМСЯ" />
|
||||
<Word from="БЫЯСНИТЬ" to="ВЫЯСНИТЬ" />
|
||||
<Word from="СЗМ" to="САМ" />
|
||||
<Word from="РЗСПОЗНЗТЬ" to="РАСПОЗНАТЬ" />
|
||||
<Word from="УЗНЗТЬ" to="УЗНАТЬ" />
|
||||
<Word from="КЭЖЄТСЯ" to="КАЖЕТСЯ" />
|
||||
<Word from="ОРЄИТЗЛЬНЬІЄ" to="ОРБИТАЛЬНЫЕ" />
|
||||
<Word from="ЛЄТЭТЄЛЬНЬІЄ" to="ЛЕТАТЕЛЬНЫЕ" />
|
||||
<Word from="ЗППЗРЕТЬІ" to="АППАРАТЫ" />
|
||||
<Word from="ЖЄ" to="ЖЕ" />
|
||||
<Word from="ТЗКЗЯ" to="ТАКАЯ" />
|
||||
<Word from="МЗЛЄНЬКЗЯ" to="МАЛЕНЬКАЯ" />
|
||||
<Word from="ПЛЭНЄТЗ" to="ПЛАНЕТА" />
|
||||
<Word from="СПЗІІЬКО" to="СТОЛЬКО" />
|
||||
<Word from="бЬІЛ3" to="БЫЛА" />
|
||||
<Word from="ЁЕСЧИСЛЄННОЄ" to="БЕСЧИСЛЕННОЕ" />
|
||||
<Word from="МЗГНИїНЬІХ" to="МАГНИТНЫХ" />
|
||||
<Word from="ПОСТраД3Л" to="ПОСТРАДАЛ" />
|
||||
<Word from="ДЗЖЄ" to="ДАЖЕ" />
|
||||
<Word from="РЗЗНЬІМИ" to="РАЗНЫМИ" />
|
||||
<Word from="СУЩЄСТБОВЭНИЄ" to="СУЩЕСТВОВАНИЕ" />
|
||||
<Word from="ПЛаНЄїЬІ" to="ПЛАНЕТЫ" />
|
||||
<Word from="ПОДВЄРГЛЗСЬ" to="ПОДВЕРГЛАСЬ" />
|
||||
<Word from="ОПЗСІ-ІОСТИ" to="ОПАСНОСТИ" />
|
||||
<Word from="ПЛЗНЄТЄ" to="ПЛАНЕТЕ" />
|
||||
<Word from="Н0" to="НО" />
|
||||
<Word from="бЬІ" to="БЫ" />
|
||||
<Word from="ОТДЗЛЄННЫЄ" to="ОТДАЛЁННЫЕ" />
|
||||
<Word from="ПОЛЯРНЬІЄ" to="ПОЛЯРНЫЕ" />
|
||||
<Word from="ЦЄЛЬІ-О" to="ЦЕЛЬЮ" />
|
||||
<Word from="ПЄЩЄРЗХ" to="ПЕЩЕРАХ" />
|
||||
<Word from="НЗПОЛНЄННЬІХ" to="НАПОЛНЕННЫХ" />
|
||||
<Word from="ИСПЗРЄНИЯМИ" to="ИСПАРЕНИЯМИ" />
|
||||
<Word from="МИНИЗТЮРНЬІЄ" to="МИНИАТЮРНЫЕ" />
|
||||
<Word from="ТЭКЗЯ" to="ТАКАЯ" />
|
||||
<Word from="ПрИСП0СОбИТЬСЯ" to="ПРИСПОСОБИТЬСЯ" />
|
||||
<Word from="НЄОЄХОДИМЬІЄ" to="НЕОБХОДИМЫЕ" />
|
||||
<Word from="ОРГВНИЧЄСКИЄ" to="ОРГАНИЧЕСКИЕ" />
|
||||
<Word from="МЗРСИЗНСКИЄ" to="МАРСИАНСКИЕ" />
|
||||
<Word from="МЄСТЄ" to="МЕСТЕ" />
|
||||
<Word from="І\/ІАККЕЙШ" to="МАККЕЙН" />
|
||||
<Word from="НЗХОДЯЩИЄСЯ" to="НАХОДЯЩИЕСЯ" />
|
||||
<Word from="НЄЗКТИВНОМ" to="НЕАКТИВНОМ" />
|
||||
<Word from="ЗЭСНЯТЬ" to="ЗАСНЯТЬ" />
|
||||
<Word from="ОРГЗНИЗМЬІ" to="ОРГАНИЗМЫ" />
|
||||
<Word from="ВЗЕИМОДЄЙСТВОВЕТЬ" to="ВЗАИМОДЕЙСТВОВАТЬ" />
|
||||
<Word from="ПУТЄШЄСТБИЄ" to="ПУТЕШЕСТВИЕ" />
|
||||
<Word from="ПуСїЬІННЫХ" to="ПУСТЫННЫХ" />
|
||||
<Word from="ТЗКИХ" to="ТАКИХ" />
|
||||
<Word from="ПЄРЄТЗСКИВЗЄМ" to="ПЕРЕТАСКИВАЕМ" />
|
||||
<Word from="ЧТ0" to="ЧТО" />
|
||||
<Word from="ВЄСЬМЗ" to="ВЕСЬМА" />
|
||||
<Word from="ПОЛОСЗМИ" to="ПОЛОСАМИ" />
|
||||
<Word from="ОрїЭНИЗМЬІ" to="ОРГАНИЗМЫ" />
|
||||
<Word from="ОЁЛЗСТИ" to="ОБЛАСТИ" />
|
||||
<Word from="ЯБЛЯЮТСЯ" to="ЯВЛЯЮТСЯ" />
|
||||
<Word from="ЦЄЛЬЮ" to="ЦЕЛЬЮ" />
|
||||
<Word from="ПОИСКОБ" to="ПОИСКОВ" />
|
||||
<Word from="ДОКЗЗЗТЄІІЬСТВ" to="ДОКАЗАТЕЛЬСТВ" />
|
||||
<Word from="МОЖЄТ" to="МОЖЕТ" />
|
||||
<Word from="НЭХОДИТЬСЯ" to="НАХОДИТЬСЯ" />
|
||||
<Word from="ОЧЄНЬ" to="ОЧЕНЬ" />
|
||||
<Word from="СРЗВНИТЬ" to="СРАВНИТЬ" />
|
||||
<Word from="ОЄНЗРУЖИЛ" to="ОБНАРУЖИЛ" />
|
||||
<Word from="ЛЬДЗ" to="ЛЬДА" />
|
||||
<Word from="ПОТЄПЛЄНИЄІИ" to="ПОТЕПЛЕНИЕМ" />
|
||||
<Word from="ПОХОЛОДЗНИЄБД" to="ПОХОЛОДАНИЕМ" />
|
||||
<Word from="КЭК" to="КАК" />
|
||||
<Word from="ТЄЛО" to="ТЕЛО" />
|
||||
<Word from="бОЛЬШЄ" to="БОЛЬШЕ" />
|
||||
<Word from="НЭКЛОНЯЄТСЯ" to="НАКЛОНЯЕТСЯ" />
|
||||
<Word from="СОІІНЦУ" to="СОЛНЦУ" />
|
||||
<Word from="СТ3бИЛИЗИрОБЗТЬ" to="СТАБИЛИЗИРОВАТЬ" />
|
||||
<Word from="СТЭБИЛЬНЭ" to="СТАБИЛЬНА" />
|
||||
<Word from="МИЛІІИОНОВ" to="МИЛЛИОНОВ" />
|
||||
<Word from="НЗЗЭД" to="НАЗАД" />
|
||||
<Word from="ТЄПЛ0" to="ТЕПЛО" />
|
||||
<Word from="ПОІІЯРНЫХ" to="ПОЛЯРНЫХ" />
|
||||
<Word from="СОІІЕНЫМИ" to="СОЛЕНЫМИ" />
|
||||
<Word from="КЕКИМИ" to="КАКИМИ" />
|
||||
<Word from="кислютнюсггь" to="кислотность" />
|
||||
<Word from="ТЗМ" to="ТАМ" />
|
||||
<Word from="ОРГЗНИЗМЫ" to="ОРГАНИЗМЫ" />
|
||||
<Word from="СУЩЄСТВОВЄТЬ" to="СУЩЕСТВОВАТЬ" />
|
||||
<Word from="ВНИМЗНИЄ" to="ВНИМАНИЕ" />
|
||||
<Word from="СДЄЛЗЄТ" to="СДЕЛАЕТ" />
|
||||
<Word from="ПОЗНЭКОМИТЬСЯ" to="ПОЗНАКОМИТЬСЯ" />
|
||||
<Word from="НЭШИМ" to="НАШИМ" />
|
||||
<Word from="ДОКЗЗЭТЄЛЬСТБО" to="ДОКАЗАТЕЛЬСТВО" />
|
||||
<Word from="ЩЗЗЩЄНИЯ" to="ВРАЩЕНИЯ" />
|
||||
<Word from="бЬІЛ0" to="БЫЛО" />
|
||||
<Word from="ОЄЛЕСТЯХ" to="ОБЛАСТЯХ" />
|
||||
<Word from="бЬІЛИ" to="БЫЛИ" />
|
||||
<Word from="РЭЗМЬІШЛЯІІИ" to="РАЗМЫШЛЯЛИ" />
|
||||
<Word from="КОЛИЧЄСТБЄ" to="КОЛИЧЕСТВЕ" />
|
||||
<Word from="ЩЄІІОЧНЫЄ" to="ЩЕЛОЧНЫЕ" />
|
||||
<Word from="НЄКОТЩЗЬІЄ" to="НЕКОТОРЫЕ" />
|
||||
<Word from="ПрИБІ1ЕКуї" to="ПРИВЛЕКУТ" />
|
||||
<Word from="НЗЗЬІВЭЄМЫЄ" to="НАЗЫВАЕМЫЕ" />
|
||||
<Word from="Чї06Ы" to="ЧТОБЫ" />
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords>
|
||||
<WordPart from="Є" to="Е" />
|
||||
<WordPart from="ЬІ" to="Ы" />
|
||||
<WordPart from="КЗ" to="КА" />
|
||||
<WordPart from="ЛЗ" to="ЛА" />
|
||||
<WordPart from="НЗ" to="НА" />
|
||||
<WordPart from="ШЗ" to="ША" />
|
||||
<WordPart from="І\/І" to="М" />
|
||||
</PartialWords>
|
||||
<PartialLines />
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
||||
+946
@@ -0,0 +1,946 @@
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<!-- Abreviaturas simples -->
|
||||
<Word from="KBs" to="kB" />
|
||||
<Word from="Vd" to="Ud" />
|
||||
<Word from="N°" to="N.°" />
|
||||
<Word from="n°" to="n.°" />
|
||||
<Word from="nro." to="n.°" />
|
||||
<Word from="Nro." to="N.°" />
|
||||
<!-- Ortografía básica -->
|
||||
<Word from="aca" to="acá" />
|
||||
<Word from="actuas" to="actúas" />
|
||||
<Word from="actues" to="actúes" />
|
||||
<Word from="adios" to="adiós" />
|
||||
<Word from="agarrenla" to="agárrenla" />
|
||||
<Word from="agarrenlo" to="agárrenlo" />
|
||||
<Word from="agarrandose" to="agarrándose" />
|
||||
<Word from="algun" to="algún" />
|
||||
<Word from="alli" to="allí" />
|
||||
<Word from="alla" to="allá" />
|
||||
<Word from="alejate" to="aléjate" />
|
||||
<Word from="ahi" to="ahí" />
|
||||
<Word from="angel" to="ángel" />
|
||||
<Word from="angeles" to="ángeles" />
|
||||
<Word from="apagala" to="apágala" />
|
||||
<Word from="aqui" to="aquí" />
|
||||
<Word from="asi" to="así" />
|
||||
<Word from="bahia" to="bahía" />
|
||||
<Word from="busqueda" to="búsqueda" />
|
||||
<Word from="busquedas" to="búsquedas" />
|
||||
<Word from="callate" to="cállate" />
|
||||
<Word from="carcel" to="cárcel" />
|
||||
<Word from="camara" to="cámara" />
|
||||
<Word from="caido" to="caído" />
|
||||
<Word from="cabron" to="cabrón" />
|
||||
<Word from="camion" to="camión" />
|
||||
<Word from="codigo" to="código" />
|
||||
<Word from="codigos" to="códigos" />
|
||||
<Word from="comence" to="comencé" />
|
||||
<Word from="comprate" to="cómprate" />
|
||||
<Word from="consegui" to="conseguí" />
|
||||
<Word from="confias" to="confías" />
|
||||
<Word from="convertira" to="convertirá" />
|
||||
<Word from="corazon" to="corazón" />
|
||||
<Word from="crei" to="creí" />
|
||||
<Word from="creia" to="creía" />
|
||||
<Word from="creido" to="creído" />
|
||||
<Word from="creiste" to="creíste" />
|
||||
<Word from="cubrenos" to="cúbrenos" />
|
||||
<Word from="comio" to="comió" />
|
||||
<Word from="dara" to="dará" />
|
||||
<Word from="dia" to="día" />
|
||||
<Word from="dias" to="días" />
|
||||
<Word from="debio" to="debió" />
|
||||
<Word from="demelo" to="démelo" />
|
||||
<Word from="dimelo" to="dímelo" />
|
||||
<Word from="denoslo" to="dénoslo" />
|
||||
<Word from="deselo" to="déselo" />
|
||||
<Word from="decia" to="decía" />
|
||||
<Word from="decian" to="decían" />
|
||||
<Word from="detras" to="detrás" />
|
||||
<Word from="deberia" to="debería" />
|
||||
<Word from="deberas" to="deberás" />
|
||||
<Word from="deberias" to="deberías" />
|
||||
<Word from="deberian" to="deberían" />
|
||||
<Word from="deberiamos" to="deberíamos" />
|
||||
<Word from="dejame" to="déjame" />
|
||||
<Word from="dejate" to="déjate" />
|
||||
<Word from="dejalo" to="déjalo" />
|
||||
<Word from="dejarian" to="dejarían" />
|
||||
<Word from="damela" to="dámela" />
|
||||
<Word from="despues" to="después" />
|
||||
<Word from="diciendome" to="diciéndome" />
|
||||
<Word from="dificil" to="difícil" />
|
||||
<Word from="dificiles" to="difíciles" />
|
||||
<Word from="disculpate" to="discúlpate" />
|
||||
<Word from="dolares" to="dólares" />
|
||||
<Word from="hechar" to="echar" />
|
||||
<Word from="examenes" to="exámenes" />
|
||||
<Word from="empezo" to="empezó" />
|
||||
<Word from="empujon" to="empujón" />
|
||||
<Word from="empujalo" to="empújalo" />
|
||||
<Word from="escondanme" to="escóndanme" />
|
||||
<Word from="esperame" to="espérame" />
|
||||
<Word from="estara" to="estará" />
|
||||
<Word from="estare" to="estaré" />
|
||||
<Word from="estaria" to="estaría" />
|
||||
<Word from="estan" to="están" />
|
||||
<Word from="estaran" to="estarán" />
|
||||
<Word from="estabamos" to="estábamos" />
|
||||
<Word from="estuvieramos" to="estuviéramos" />
|
||||
<Word from="exito" to="éxito" />
|
||||
<Word from="facil" to="fácil" />
|
||||
<Word from="fiscalia" to="fiscalía" />
|
||||
<Word from="fragil" to="frágil" />
|
||||
<Word from="fragiles" to="frágiles" />
|
||||
<Word from="frances" to="francés" />
|
||||
<Word from="gustaria" to="gustaría" />
|
||||
<Word from="habia" to="había" />
|
||||
<Word from="habias" to="habías" />
|
||||
<Word from="habian" to="habían" />
|
||||
<Word from="habrian" to="habrían" />
|
||||
<Word from="habrias" to="habrías" />
|
||||
<Word from="hagalo" to="hágalo" />
|
||||
<Word from="haria" to="haría" />
|
||||
<Word from="increible" to="increíble" />
|
||||
<Word from="incredulo" to="incrédulo" />
|
||||
<Word from="intentalo" to="inténtalo" />
|
||||
<Word from="ire" to="iré" />
|
||||
<Word from="jovenes" to="jóvenes" />
|
||||
<Word from="ladron" to="ladrón" />
|
||||
<Word from="linea" to="línea" />
|
||||
<Word from="llamame" to="llámame" />
|
||||
<Word from="llevalo" to="llévalo" />
|
||||
<Word from="mama" to="mamá" />
|
||||
<Word from="maricon" to="maricón" />
|
||||
<Word from="mayoria" to="mayoría" />
|
||||
<Word from="metodo" to="método" />
|
||||
<Word from="metodos" to="métodos" />
|
||||
<Word from="mio" to="mío" />
|
||||
<Word from="mostro" to="mostró" />
|
||||
<Word from="morira" to="morirá" />
|
||||
<Word from="muevete" to="muévete" />
|
||||
<Word from="murio" to="murió" />
|
||||
<Word from="numero" to="número" />
|
||||
<Word from="numeros" to="números" />
|
||||
<Word from="ningun" to="ningún" />
|
||||
<Word from="oido" to="oído" />
|
||||
<Word from="oidos" to="oídos" />
|
||||
<Word from="oimos" to="oímos" />
|
||||
<Word from="oiste" to="oíste" />
|
||||
<Word from="pasale" to="pásale" />
|
||||
<Word from="pasame" to="pásame" />
|
||||
<Word from="paraiso" to="paraíso" />
|
||||
<Word from="parate" to="párate" />
|
||||
<Word from="pense" to="pensé" />
|
||||
<Word from="peluqueria" to="peluquería" />
|
||||
<Word from="platano" to="plátano" />
|
||||
<Word from="plastico" to="plástico" />
|
||||
<Word from="plasticos" to="plásticos" />
|
||||
<Word from="policia" to="policía" />
|
||||
<Word from="policias" to="policías" />
|
||||
<Word from="poster" to="póster" />
|
||||
<Word from="podia" to="podía" />
|
||||
<Word from="podias" to="podías" />
|
||||
<Word from="podria" to="podría" />
|
||||
<Word from="podrian" to="podrían" />
|
||||
<Word from="podrias" to="podrías" />
|
||||
<Word from="podriamos" to="podríamos" />
|
||||
<Word from="prometio" to="prometió" />
|
||||
<Word from="proposito" to="propósito" />
|
||||
<Word from="pideselo" to="pídeselo" />
|
||||
<Word from="ponganse" to="pónganse" />
|
||||
<Word from="prometeme" to="prométeme" />
|
||||
<Word from="publico" to="público" />
|
||||
<Word from="publicos" to="públicos" />
|
||||
<Word from="publicamente" to="públicamente" />
|
||||
<Word from="quedate" to="quédate" />
|
||||
<Word from="queria" to="quería" />
|
||||
<Word from="querrias" to="querrías" />
|
||||
<Word from="querian" to="querían" />
|
||||
<Word from="rapido" to="rápido" />
|
||||
<Word from="rapidamente" to="rápidamente" />
|
||||
<Word from="razon" to="razón" />
|
||||
<Word from="rehusen" to="rehúsen" />
|
||||
<Word from="rie" to="ríe" />
|
||||
<Word from="rias" to="rías" />
|
||||
<Word from="rindete" to="ríndete" />
|
||||
<Word from="sacame" to="sácame" />
|
||||
<Word from="sentian" to="sentían" />
|
||||
<Word from="sientate" to="siéntate" />
|
||||
<Word from="sera" to="será" />
|
||||
<Word from="soplon" to="soplón" />
|
||||
<Word from="sueltalo" to="suéltalo" />
|
||||
<Word from="tambien" to="también" />
|
||||
<Word from="teoria" to="teoría" />
|
||||
<Word from="tendra" to="tendrá" />
|
||||
<Word from="telefono" to="teléfono" />
|
||||
<Word from="tipica" to="típica" />
|
||||
<Word from="todavia" to="todavía" />
|
||||
<Word from="tomalo" to="tómalo" />
|
||||
<Word from="tonterias" to="tonterías" />
|
||||
<Word from="torci" to="torcí" />
|
||||
<Word from="traelos" to="tráelos" />
|
||||
<Word from="traiganlo" to="tráiganlo" />
|
||||
<Word from="traiganlos" to="tráiganlos" />
|
||||
<Word from="trio" to="trío" />
|
||||
<Word from="tuvieramos" to="tuviéramos" />
|
||||
<Word from="union" to="unión" />
|
||||
<Word from="ultimo" to="último" />
|
||||
<Word from="ultima" to="última" />
|
||||
<Word from="ultimos" to="últimos" />
|
||||
<Word from="ultimas" to="últimas" />
|
||||
<Word from="unica" to="única" />
|
||||
<Word from="unico" to="único" />
|
||||
<Word from="vamonos" to="vámonos" />
|
||||
<Word from="vayanse" to="váyanse" />
|
||||
<Word from="victima" to="víctima" />
|
||||
<Word from="vivira" to="vivirá" />
|
||||
<Word from="volvio" to="volvió" />
|
||||
<Word from="volvia" to="volvía" />
|
||||
<Word from="volvian" to="volvían" />
|
||||
<!-- Palabras con eír/oír más usadas -->
|
||||
<Word from="reir" to="reír" />
|
||||
<Word from="freir" to="freír" />
|
||||
<Word from="sonreir" to="sonreír" />
|
||||
<Word from="hazmerreir" to="hazmerreír" />
|
||||
<Word from="oir" to="oír" />
|
||||
<Word from="oirlo" to="oírlo" />
|
||||
<Word from="oirte" to="oírte" />
|
||||
<Word from="oirse" to="oírse" />
|
||||
<Word from="oirme" to="oírme" />
|
||||
<Word from="oirle" to="oírle" />
|
||||
<Word from="oirla" to="oírla" />
|
||||
<Word from="oirles" to="oírles" />
|
||||
<Word from="oirnos" to="oírnos" />
|
||||
<Word from="oirlas" to="oírlas" />
|
||||
<!-- Palabras que no llevan acento -->
|
||||
<Word from="bién" to="bien" />
|
||||
<Word from="crímen" to="crimen" />
|
||||
<Word from="fué" to="fue" />
|
||||
<Word from="fuí" to="fui" />
|
||||
<Word from="quiéres" to="quieres" />
|
||||
<Word from="tí" to="ti" />
|
||||
<Word from="dí" to="di" />
|
||||
<Word from="vá" to="va" />
|
||||
<Word from="vé" to="ve" />
|
||||
<Word from="ví" to="vi" />
|
||||
<Word from="vió" to="vio" />
|
||||
<Word from="ó" to="o" />
|
||||
<Word from="clón" to="clon" />
|
||||
<Word from="dió" to="dio" />
|
||||
<Word from="guión" to="guion" />
|
||||
<Word from="dón" to="don" />
|
||||
<Word from="fé" to="fe" />
|
||||
<Word from="áquel" to="aquel" />
|
||||
<!-- Palabras donde se puede prescindir de la tilde diacrítica -->
|
||||
<Word from="éste" to="este" />
|
||||
<Word from="ésta" to="esta" />
|
||||
<Word from="éstos" to="estos" />
|
||||
<Word from="éstas" to="estas" />
|
||||
<Word from="ése" to="ese" />
|
||||
<Word from="ésa" to="esa" />
|
||||
<Word from="ésos" to="esos" />
|
||||
<Word from="ésas" to="esas" />
|
||||
<Word from="sólo" to="solo" />
|
||||
<!-- Errores no relacionados con los tildes -->
|
||||
<Word from="coktel" to="cóctel" />
|
||||
<Word from="cocktel" to="cóctel" />
|
||||
<Word from="conciente" to="consciente" />
|
||||
<Word from="comenzé" to="comencé" />
|
||||
<Word from="desilucionarte" to="desilusionarte" />
|
||||
<Word from="dijieron" to="dijeron" />
|
||||
<Word from="empezé" to="empecé" />
|
||||
<Word from="hize" to="hice" />
|
||||
<Word from="ilucionarte" to="ilusionarte" />
|
||||
<Word from="inconciente" to="inconsciente" />
|
||||
<Word from="quize" to="quise" />
|
||||
<Word from="quizo" to="quiso" />
|
||||
<Word from="verguenza" to="vergüenza" />
|
||||
<!-- Errores en nombres propios o de países -->
|
||||
<Word from="Nuñez" to="Núñez" />
|
||||
<Word from="Ivan" to="Iván" />
|
||||
<Word from="Japon" to="Japón" />
|
||||
<Word from="Monica" to="Mónica" />
|
||||
<Word from="Maria" to="María" />
|
||||
<Word from="Jose" to="José" />
|
||||
<Word from="Ramon" to="Ramón" />
|
||||
<Word from="Garcia" to="García" />
|
||||
<Word from="Gonzalez" to="González" />
|
||||
<Word from="Jesus" to="Jesús" />
|
||||
<Word from="Alvarez" to="Álvarez" />
|
||||
<Word from="Damian" to="Damián" />
|
||||
<Word from="Rene" to="René" />
|
||||
<Word from="Nicolas" to="Nicolás" />
|
||||
<Word from="Jonas" to="Jonás" />
|
||||
<Word from="Lopez" to="López" />
|
||||
<Word from="Hernandez" to="Hernández" />
|
||||
<Word from="Bermudez" to="Bermúdez" />
|
||||
<Word from="Fernandez" to="Fernández" />
|
||||
<Word from="Suarez" to="Suárez" />
|
||||
<Word from="Sofia" to="Sofía" />
|
||||
<Word from="Seneca" to="Séneca" />
|
||||
<Word from="Tokyo" to="Tokio" />
|
||||
<Word from="Canada" to="Canadá" />
|
||||
<Word from="Paris" to="París" />
|
||||
<Word from="Turquia" to="Turquía" />
|
||||
<Word from="Mexico" to="México" />
|
||||
<Word from="Mejico" to="México" />
|
||||
<Word from="Matias" to="Matías" />
|
||||
<Word from="Valentin" to="Valentín" />
|
||||
<Word from="mejicano" to="mexicano" />
|
||||
<Word from="mejicanos" to="mexicanos" />
|
||||
<Word from="mejicana" to="mexicana" />
|
||||
<Word from="mejicanas" to="mexicanas" />
|
||||
<!-- Creados por SE -->
|
||||
<Word from="io" to="lo" />
|
||||
<Word from="ia" to="la" />
|
||||
<Word from="ie" to="le" />
|
||||
<Word from="Io" to="lo" />
|
||||
<Word from="Ia" to="la" />
|
||||
<Word from="AI" to="Al" />
|
||||
<Word from="Ie" to="le" />
|
||||
<Word from="EI" to="El" />
|
||||
<Word from="subafluente" to="subafluente" />
|
||||
<Word from="aflójalo" to="aflójalo" />
|
||||
<Word from="Aflójalo" to="Aflójalo" />
|
||||
<Word from="perdi" to="perdí" />
|
||||
<Word from="Podria" to="Podría" />
|
||||
<Word from="confia" to="confía" />
|
||||
<Word from="pasaria" to="pasaría" />
|
||||
<Word from="Podias" to="Podías" />
|
||||
<Word from="responsabke" to="responsable" />
|
||||
<Word from="Todavia" to="Todavía" />
|
||||
<Word from="envien" to="envíen" />
|
||||
<Word from="Queria" to="Quería" />
|
||||
<Word from="tio" to="tío" />
|
||||
<Word from="traido" to="traído" />
|
||||
<Word from="Asi" to="Así" />
|
||||
<Word from="elegi" to="elegí" />
|
||||
<Word from="habria" to="habría" />
|
||||
<Word from="encantaria" to="encantaría" />
|
||||
<Word from="leido" to="leído" />
|
||||
<Word from="conocias" to="conocías" />
|
||||
<Word from="harias" to="harías" />
|
||||
<Word from="Aqui" to="Aquí" />
|
||||
<Word from="decidi" to="decidí" />
|
||||
<Word from="mia" to="mía" />
|
||||
<Word from="Crei" to="Creí" />
|
||||
<Word from="podiamos" to="podíamos" />
|
||||
<Word from="avisame" to="avísame" />
|
||||
<Word from="debia" to="debía" />
|
||||
<Word from="pensarias" to="pensarías" />
|
||||
<Word from="reuniamos" to="reuníamos" />
|
||||
<Word from="POÏ" to="por" />
|
||||
<Word from="vendria" to="vendría" />
|
||||
<Word from="caida" to="caída" />
|
||||
<Word from="venian" to="venían" />
|
||||
<Word from="compañias" to="compañías" />
|
||||
<Word from="leiste" to="leíste" />
|
||||
<Word from="Leiste" to="Leíste" />
|
||||
<Word from="fiaria" to="fiaría" />
|
||||
<Word from="Hungria" to="Hungría" />
|
||||
<Word from="fotografia" to="fotografía" />
|
||||
<Word from="cafeteria" to="cafetería" />
|
||||
<Word from="Digame" to="Dígame" />
|
||||
<Word from="debias" to="debías" />
|
||||
<Word from="tendria" to="tendría" />
|
||||
<Word from="CÏGO" to="creo" />
|
||||
<Word from="anteg" to="antes" />
|
||||
<Word from="SóIo" to="Solo" />
|
||||
<Word from="Ilamándola" to="llamándola" />
|
||||
<Word from="Cáflaté" to="Cállate" />
|
||||
<Word from="Ilamaste" to="llamaste" />
|
||||
<Word from="daria" to="daría" />
|
||||
<Word from="Iargaba" to="largaba" />
|
||||
<Word from="Yati" to="Y a ti" />
|
||||
<Word from="querias" to="querías" />
|
||||
<Word from="Iimpiarlo" to="limpiarlo" />
|
||||
<Word from="Iargado" to="largado" />
|
||||
<Word from="galeria" to="galería" />
|
||||
<Word from="Bartomeu" to="Bertomeu" />
|
||||
<Word from="Iocalizarlo" to="localizarlo" />
|
||||
<Word from="Ilámame" to="llámame" />
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<PartialLines>
|
||||
<!-- Varios -->
|
||||
<LinePart from="de gratis" to="gratis" />
|
||||
<LinePart from="si quiera" to="siquiera" />
|
||||
<LinePart from="Cada una de los" to="Cada uno de los" />
|
||||
<LinePart from="Cada uno de las" to="Cada una de las" />
|
||||
<!-- Uso incorrecto de haber / a ver -->
|
||||
<LinePart from="haber que" to="a ver qué" />
|
||||
<LinePart from="haber qué" to="a ver qué" />
|
||||
<LinePart from="Haber si" to="A ver si" />
|
||||
<!-- Ponombres exclamativos o interrogativos Parte 1 -->
|
||||
<LinePart from=" que hora" to=" qué hora" />
|
||||
<LinePart from="yo que se" to="yo qué sé" />
|
||||
<LinePart from="Yo que se" to="Yo qué sé" />
|
||||
<!-- Acentos al final de los signos de exclamación -->
|
||||
<LinePart from=" tu!" to=" tú!" />
|
||||
<LinePart from=" si!" to=" sí!" />
|
||||
<LinePart from=" mi!" to=" mí!" />
|
||||
<LinePart from=" el!" to=" él!" />
|
||||
<!-- Acentos al final de los signos de interrogación -->
|
||||
<LinePart from=" tu?" to=" tú?" />
|
||||
<LinePart from=" si?" to=" sí?" />
|
||||
<LinePart from=" mi?" to=" mí?" />
|
||||
<LinePart from=" el?" to=" él?" />
|
||||
<LinePart from=" aun?" to=" aún?" />
|
||||
<LinePart from=" mas?" to=" más?" />
|
||||
<LinePart from=" que?" to=" qué?" />
|
||||
<LinePart from=" paso?" to=" pasó?" />
|
||||
<LinePart from=" cuando?" to=" cuándo?" />
|
||||
<LinePart from=" cuanto?" to=" cuánto?" />
|
||||
<LinePart from=" cuanta?" to=" cuánta?" />
|
||||
<LinePart from=" cuantas?" to=" cuántas?" />
|
||||
<LinePart from=" cuantos?" to=" cuántos?" />
|
||||
<LinePart from=" donde?" to=" dónde?" />
|
||||
<LinePart from=" quien?" to=" quién?" />
|
||||
<LinePart from=" como?" to=" cómo?" />
|
||||
<LinePart from=" adonde?" to=" adónde?" />
|
||||
<LinePart from=" cual?" to=" cuál?" />
|
||||
<!-- Acentos en los signos de interrogación completos -->
|
||||
<LinePart from="¿Si?" to="¿Sí?" />
|
||||
<LinePart from="¿esta bien?" to="¿está bien?" />
|
||||
<!-- Enunciados que son a la vez interrogativos y exclamativos -->
|
||||
<LinePart from="¿Pero qué haces?" to="¡¿Pero qué haces?!" />
|
||||
<LinePart from="¿pero qué haces?" to="¡¿pero qué haces?!" />
|
||||
<LinePart from="¿Es que no me has escuchado?" to="¡¿Es que no me has escuchado?!" />
|
||||
<LinePart from="¡¿es que no me has escuchado?!" to="¡¿es que no me has escuchado?!" />
|
||||
<!-- Acentos al principio de los signos de interrogación con minúsculas -->
|
||||
<LinePart from="¿aun" to="¿aún" />
|
||||
<LinePart from="¿tu " to="¿tú " />
|
||||
<LinePart from="¿que " to="¿qué " />
|
||||
<LinePart from="¿sabes que" to="¿sabes qué" />
|
||||
<LinePart from="¿sabes adonde" to="¿sabes adónde" />
|
||||
<LinePart from="¿sabes cual" to="¿sabes cuál" />
|
||||
<LinePart from="¿sabes quien" to="¿sabes quién" />
|
||||
<LinePart from="¿sabes como" to="¿sabes cómo" />
|
||||
<LinePart from="¿sabes cuan" to="¿sabes cuán" />
|
||||
<LinePart from="¿sabes cuanto" to="¿sabes cuánto" />
|
||||
<LinePart from="¿sabes cuanta" to="¿sabes cuánta" />
|
||||
<LinePart from="¿sabes cuantos" to="¿sabes cuántos" />
|
||||
<LinePart from="¿sabes cuantas" to="¿sabes cuántas" />
|
||||
<LinePart from="¿sabes cuando" to="¿sabes cuándo" />
|
||||
<LinePart from="¿sabes donde" to="¿sabes dónde" />
|
||||
<LinePart from="¿sabe que" to="¿sabe qué" />
|
||||
<LinePart from="¿sabe adonde" to="¿sabe adónde" />
|
||||
<LinePart from="¿sabe cual" to="¿sabe cuál" />
|
||||
<LinePart from="¿sabe quien" to="¿sabe quién" />
|
||||
<LinePart from="¿sabe como" to="¿sabe cómo" />
|
||||
<LinePart from="¿sabe cuan" to="¿sabe cuán" />
|
||||
<LinePart from="¿sabe cuanto" to="¿sabe cuánto" />
|
||||
<LinePart from="¿sabe cuanta" to="¿sabe cuánta" />
|
||||
<LinePart from="¿sabe cuantos" to="¿sabe cuántos" />
|
||||
<LinePart from="¿sabe cuantas" to="¿sabe cuántas" />
|
||||
<LinePart from="¿sabe cuando" to="¿sabe cuándo" />
|
||||
<LinePart from="¿sabe donde" to="¿sabe dónde" />
|
||||
<LinePart from="¿saben que" to="¿saben qué" />
|
||||
<LinePart from="¿saben adonde" to="¿saben adónde" />
|
||||
<LinePart from="¿saben cual" to="¿saben cuál" />
|
||||
<LinePart from="¿saben quien" to="¿saben quién" />
|
||||
<LinePart from="¿saben como" to="¿saben cómo" />
|
||||
<LinePart from="¿saben cuan" to="¿saben cuán" />
|
||||
<LinePart from="¿saben cuanto" to="¿saben cuánto" />
|
||||
<LinePart from="¿saben cuanta" to="¿saben cuánta" />
|
||||
<LinePart from="¿saben cuantos" to="¿saben cuántos" />
|
||||
<LinePart from="¿saben cuantas" to="¿saben cuántas" />
|
||||
<LinePart from="¿saben cuando" to="¿saben cuándo" />
|
||||
<LinePart from="¿saben donde" to="¿saben dónde" />
|
||||
<LinePart from="¿de que" to="¿de qué" />
|
||||
<LinePart from="¿de donde" to="¿de dónde" />
|
||||
<LinePart from="¿de cual" to="¿de cuál" />
|
||||
<LinePart from="¿de quien" to="¿de quién" />
|
||||
<LinePart from="¿de cuanto" to="¿de cuánto" />
|
||||
<LinePart from="¿de cuanta" to="¿de cuánta" />
|
||||
<LinePart from="¿de cuantos" to="¿de cuántos" />
|
||||
<LinePart from="¿de cuantas" to="¿de cuántas" />
|
||||
<LinePart from="¿de cuando" to="¿de cuándo" />
|
||||
<LinePart from="¿sobre que" to="¿sobre qué" />
|
||||
<LinePart from="¿como " to="¿cómo " />
|
||||
<LinePart from="¿cual " to="¿cuál " />
|
||||
<LinePart from="¿en cual" to="¿en cuál" />
|
||||
<LinePart from="¿cuando" to="¿cuándo" />
|
||||
<LinePart from="¿hasta cual" to="¿hasta cuál" />
|
||||
<LinePart from="¿hasta quien" to="¿hasta quién" />
|
||||
<LinePart from="¿hasta cuanto" to="¿hasta cuánto" />
|
||||
<LinePart from="¿hasta cuantas" to="¿hasta cuántas" />
|
||||
<LinePart from="¿hasta cuantos" to="¿hasta cuántos" />
|
||||
<LinePart from="¿hasta cuando" to="¿hasta cuándo" />
|
||||
<LinePart from="¿hasta donde" to="¿hasta dónde" />
|
||||
<LinePart from="¿hasta que" to="¿hasta qué" />
|
||||
<LinePart from="¿hasta adonde" to="¿hasta adónde" />
|
||||
<LinePart from="¿desde que" to="¿desde qué" />
|
||||
<LinePart from="¿desde cuando" to="¿desde cuándo" />
|
||||
<LinePart from="¿desde quien" to="¿desde quién" />
|
||||
<LinePart from="¿desde donde" to="¿desde dónde" />
|
||||
<LinePart from="¿cuanto" to="¿cuánto" />
|
||||
<LinePart from="¿cuantos" to="¿cuántos" />
|
||||
<LinePart from="¿donde" to="¿dónde" />
|
||||
<LinePart from="¿adonde" to="¿adónde" />
|
||||
<LinePart from="¿con que" to="¿con qué" />
|
||||
<LinePart from="¿con cual" to="¿con cuál" />
|
||||
<LinePart from="¿con quien" to="¿con quién" />
|
||||
<LinePart from="¿con cuantos" to="¿con cuántos" />
|
||||
<LinePart from="¿con cuantas" to="¿con cuántas" />
|
||||
<LinePart from="¿con cuanta" to="¿con cuánta" />
|
||||
<LinePart from="¿con cuanto" to="¿con cuánto" />
|
||||
<LinePart from="¿para donde" to="¿para dónde" />
|
||||
<LinePart from="¿para adonde" to="¿para adónde" />
|
||||
<LinePart from="¿para cuando" to="¿para cuándo" />
|
||||
<LinePart from="¿para que" to="¿para qué" />
|
||||
<LinePart from="¿para quien" to="¿para quién" />
|
||||
<LinePart from="¿para cuanto" to="¿para cuánto" />
|
||||
<LinePart from="¿para cuanta" to="¿para cuánta" />
|
||||
<LinePart from="¿para cuantos" to="¿para cuántos" />
|
||||
<LinePart from="¿para cuantas" to="¿para cuántas" />
|
||||
<LinePart from="¿a donde" to="¿a dónde" />
|
||||
<LinePart from="¿a que" to="¿a qué" />
|
||||
<LinePart from="¿a cual" to="¿a cuál" />
|
||||
<LinePart from="¿a quien" to="¿a quien" />
|
||||
<LinePart from="¿a como" to="¿a cómo" />
|
||||
<LinePart from="¿a cuanto" to="¿a cuánto" />
|
||||
<LinePart from="¿a cuanta" to="¿a cuánta" />
|
||||
<LinePart from="¿a cuantos" to="¿a cuántos" />
|
||||
<LinePart from="¿a cuantas" to="¿a cuántas" />
|
||||
<LinePart from="¿por que" to="¿por qué" />
|
||||
<LinePart from="¿por cual" to="¿por cuál" />
|
||||
<LinePart from="¿por quien" to="¿por quién" />
|
||||
<LinePart from="¿por cuanto" to="¿por cuánto" />
|
||||
<LinePart from="¿por cuanta" to="¿por cuánta" />
|
||||
<LinePart from="¿por cuantos" to="¿por cuántos" />
|
||||
<LinePart from="¿por cuantas" to="¿por cuántas" />
|
||||
<LinePart from="¿por donde" to="¿por dónde" />
|
||||
<LinePart from="¿porque" to="¿por qué" />
|
||||
<LinePart from="¿porqué" to="¿por qué" />
|
||||
<LinePart from="¿y que" to="¿y qué" />
|
||||
<LinePart from="¿y como" to="¿y cómo" />
|
||||
<LinePart from="¿y cuando" to="¿y cuándo" />
|
||||
<LinePart from="¿y cual" to="¿y cuál" />
|
||||
<LinePart from="¿y quien" to="¿y quién" />
|
||||
<LinePart from="¿y cuanto" to="¿y cuánto" />
|
||||
<LinePart from="¿y cuanta" to="¿y cuánta" />
|
||||
<LinePart from="¿y cuantos" to="¿y cuántos" />
|
||||
<LinePart from="¿y cuantas" to="¿y cuántas" />
|
||||
<LinePart from="¿y donde" to="¿y dónde" />
|
||||
<LinePart from="¿y adonde" to="¿y adónde" />
|
||||
<LinePart from="¿quien " to="¿quién " />
|
||||
<LinePart from="¿esta " to="¿está " />
|
||||
<LinePart from="¿estas " to="¿estás " />
|
||||
<!-- Acentos al principio de los signos de interrogación con mayúsculas -->
|
||||
<LinePart from="¿Aun" to="¿Aún" />
|
||||
<LinePart from="¿Que " to="¿Qué " />
|
||||
<LinePart from="¿Sabes que" to="¿Sabes qué" />
|
||||
<LinePart from="¿Sabes adonde" to="¿Sabes adónde" />
|
||||
<LinePart from="¿Sabes cual" to="¿Sabes cuál" />
|
||||
<LinePart from="¿Sabes quien" to="¿Sabes quién" />
|
||||
<LinePart from="¿Sabes como" to="¿Sabes cómo" />
|
||||
<LinePart from="¿Sabes cuan" to="¿Sabes cuán" />
|
||||
<LinePart from="¿Sabes cuanto" to="¿Sabes cuánto" />
|
||||
<LinePart from="¿Sabes cuanta" to="¿Sabes cuánta" />
|
||||
<LinePart from="¿Sabes cuantos" to="¿Sabes cuántos" />
|
||||
<LinePart from="¿Sabes cuantas" to="¿Sabes cuántas" />
|
||||
<LinePart from="¿Sabes cuando" to="¿Sabes cuándo" />
|
||||
<LinePart from="¿Sabes donde" to="¿Sabes dónde" />
|
||||
<LinePart from="¿Sabe que" to="¿Sabe qué" />
|
||||
<LinePart from="¿Sabe adonde" to="¿Sabe adónde" />
|
||||
<LinePart from="¿Sabe cual" to="¿Sabe cuál" />
|
||||
<LinePart from="¿Sabe quien" to="¿Sabe quién" />
|
||||
<LinePart from="¿Sabe como" to="¿Sabe cómo" />
|
||||
<LinePart from="¿Sabe cuan" to="¿Sabe cuán" />
|
||||
<LinePart from="¿Sabe cuanto" to="¿Sabe cuánto" />
|
||||
<LinePart from="¿Sabe cuanta" to="¿Sabe cuánta" />
|
||||
<LinePart from="¿Sabe cuantos" to="¿Sabe cuántos" />
|
||||
<LinePart from="¿Sabe cuantas" to="¿Sabe cuántas" />
|
||||
<LinePart from="¿Sabe cuando" to="¿Sabe cuándo" />
|
||||
<LinePart from="¿Sabe donde" to="¿Sabe dónde" />
|
||||
<LinePart from="¿Saben que" to="¿Saben qué" />
|
||||
<LinePart from="¿Saben adonde" to="¿Saben adónde" />
|
||||
<LinePart from="¿Saben cual" to="¿Saben cuál" />
|
||||
<LinePart from="¿Saben quien" to="¿Saben quién" />
|
||||
<LinePart from="¿Saben como" to="¿Saben cómo" />
|
||||
<LinePart from="¿Saben cuan" to="¿Saben cuán" />
|
||||
<LinePart from="¿Saben cuanto" to="¿Saben cuánto" />
|
||||
<LinePart from="¿Saben cuanta" to="¿Saben cuánta" />
|
||||
<LinePart from="¿Saben cuantos" to="¿Saben cuántos" />
|
||||
<LinePart from="¿Saben cuantas" to="¿Saben cuántas" />
|
||||
<LinePart from="¿Saben cuando" to="¿Saben cuándo" />
|
||||
<LinePart from="¿Saben donde" to="¿Saben dónde" />
|
||||
<LinePart from="¿De que" to="¿De qué" />
|
||||
<LinePart from="¿De donde" to="¿De dónde" />
|
||||
<LinePart from="¿De cual" to="¿De cuál" />
|
||||
<LinePart from="¿De quien" to="¿De quién" />
|
||||
<LinePart from="¿De cuanto" to="¿De cuánto" />
|
||||
<LinePart from="¿De cuanta" to="¿De cuánta" />
|
||||
<LinePart from="¿De cuantos" to="¿De cuántos" />
|
||||
<LinePart from="¿De cuantas" to="¿De cuántas" />
|
||||
<LinePart from="¿De cuando" to="¿De cuándo" />
|
||||
<LinePart from="¿Desde que" to="¿Desde qué" />
|
||||
<LinePart from="¿Desde cuando" to="¿Desde cuándo" />
|
||||
<LinePart from="¿Desde quien" to="¿Desde quién" />
|
||||
<LinePart from="¿Desde donde" to="¿Desde dónde" />
|
||||
<LinePart from="¿Sobre que" to="¿Sobre qué" />
|
||||
<LinePart from="¿Como " to="¿Cómo " />
|
||||
<LinePart from="¿Cual " to="¿Cuál " />
|
||||
<LinePart from="¿En cual" to="¿En cuál" />
|
||||
<LinePart from="¿Cuando" to="¿Cuándo" />
|
||||
<LinePart from="¿Hasta cual" to="¿Hasta cuál" />
|
||||
<LinePart from="¿Hasta quien" to="¿Hasta quién" />
|
||||
<LinePart from="¿Hasta cuanto" to="¿Hasta cuánto" />
|
||||
<LinePart from="¿Hasta cuantas" to="¿Hasta cuántas" />
|
||||
<LinePart from="¿Hasta cuantos" to="¿Hasta cuántos" />
|
||||
<LinePart from="¿Hasta cuando" to="¿Hasta cuándo" />
|
||||
<LinePart from="¿Hasta donde" to="¿Hasta dónde" />
|
||||
<LinePart from="¿Hasta que" to="¿Hasta qué" />
|
||||
<LinePart from="¿Hasta adonde" to="¿Hasta adónde" />
|
||||
<LinePart from="¿Cuanto" to="¿Cuánto" />
|
||||
<LinePart from="¿Cuantos" to="¿Cuántos" />
|
||||
<LinePart from="¿Donde" to="¿Dónde" />
|
||||
<LinePart from="¿Adonde" to="¿Adónde" />
|
||||
<LinePart from="¿Con que" to="¿Con qué" />
|
||||
<LinePart from="¿Con cual" to="¿Con cuál" />
|
||||
<LinePart from="¿Con quien" to="¿Con quién" />
|
||||
<LinePart from="¿Con cuantos" to="¿Con cuántos" />
|
||||
<LinePart from="¿Con cuanta" to="¿Con cuántas" />
|
||||
<LinePart from="¿Con cuanta" to="¿Con cuánta" />
|
||||
<LinePart from="¿Con cuanto" to="¿Con cuánto" />
|
||||
<LinePart from="¿Para donde" to="¿Para dónde" />
|
||||
<LinePart from="¿Para adonde" to="¿Para adónde" />
|
||||
<LinePart from="¿Para cuando" to="¿Para cuándo" />
|
||||
<LinePart from="¿Para que" to="¿Para qué" />
|
||||
<LinePart from="¿Para quien" to="¿Para quién" />
|
||||
<LinePart from="¿Para cuanto" to="¿Para cuánto" />
|
||||
<LinePart from="¿Para cuanta" to="¿Para cuánta" />
|
||||
<LinePart from="¿Para cuantos" to="¿Para cuántos" />
|
||||
<LinePart from="¿Para cuantas" to="¿Para cuántas" />
|
||||
<LinePart from="¿A donde" to="¿A dónde" />
|
||||
<LinePart from="¿A que" to="¿A qué" />
|
||||
<LinePart from="¿A cual" to="¿A cuál" />
|
||||
<LinePart from="¿A quien" to="¿A quien" />
|
||||
<LinePart from="¿A como" to="¿A cómo" />
|
||||
<LinePart from="¿A cuanto" to="¿A cuánto" />
|
||||
<LinePart from="¿A cuanta" to="¿A cuánta" />
|
||||
<LinePart from="¿A cuantos" to="¿A cuántos" />
|
||||
<LinePart from="¿A cuantas" to="¿A cuántas" />
|
||||
<LinePart from="¿Por que" to="¿Por qué" />
|
||||
<LinePart from="¿Por cual" to="¿Por cuál" />
|
||||
<LinePart from="¿Por quien" to="¿Por quién" />
|
||||
<LinePart from="¿Por cuanto" to="¿Por cuánto" />
|
||||
<LinePart from="¿Por cuanta" to="¿Por cuánta" />
|
||||
<LinePart from="¿Por cuantos" to="¿Por cuántos" />
|
||||
<LinePart from="¿Por cuantas" to="¿Por cuántas" />
|
||||
<LinePart from="¿Por donde" to="¿Por dónde" />
|
||||
<LinePart from="¿Porque" to="¿Por qué" />
|
||||
<LinePart from="¿Porqué" to="¿Por qué" />
|
||||
<LinePart from="¿Y que" to="¿Y qué" />
|
||||
<LinePart from="¿Y como" to="¿Y cómo" />
|
||||
<LinePart from="¿Y cuando" to="¿Y cuándo" />
|
||||
<LinePart from="¿Y cual" to="¿Y cuál" />
|
||||
<LinePart from="¿Y quien" to="¿Y quién" />
|
||||
<LinePart from="¿Y cuanto" to="¿Y cuánto" />
|
||||
<LinePart from="¿Y cuanta" to="¿Y cuánta" />
|
||||
<LinePart from="¿Y cuantos" to="¿Y cuántos" />
|
||||
<LinePart from="¿Y cuantas" to="¿Y cuántas" />
|
||||
<LinePart from="¿Y donde" to="¿Y dónde" />
|
||||
<LinePart from="¿Y adonde" to="¿Y adónde" />
|
||||
<LinePart from="¿Quien " to="¿Quién " />
|
||||
<LinePart from="¿Esta " to="¿Está " />
|
||||
<!-- Tilde diacrítica en oraciones interrogativas o exclamativas indirectas -->
|
||||
<LinePart from="el porque" to="el porqué" />
|
||||
<LinePart from="su porque" to="su porqué" />
|
||||
<LinePart from="los porqués" to="los porqués" />
|
||||
<!-- aún -->
|
||||
<LinePart from="aun," to="aún," />
|
||||
<LinePart from="aun no" to="aún no" />
|
||||
<!-- dé -->
|
||||
<LinePart from=" de y " to=" dé y " />
|
||||
<LinePart from=" nos de " to=" nos dé " />
|
||||
<!-- tú -->
|
||||
<LinePart from=" tu ya " to=" tú ya " />
|
||||
<LinePart from="Tu ya " to="Tú ya " />
|
||||
<!-- casos específicos antes de la coma -->
|
||||
<LinePart from=" de, " to=" dé," />
|
||||
<LinePart from=" mi, " to=" mí," />
|
||||
<LinePart from=" tu, " to=" tú," />
|
||||
<LinePart from=" el, " to=" él," />
|
||||
<LinePart from=" te, " to=" té," />
|
||||
<LinePart from=" mas, " to=" más," />
|
||||
<LinePart from=" quien, " to=" quién," />
|
||||
<LinePart from=" cual," to=" cuál," />
|
||||
<LinePart from="porque, " to="porqué," />
|
||||
<LinePart from="cuanto, " to="cuánto," />
|
||||
<LinePart from="cuando, " to="cuándo," />
|
||||
<!-- sé -->
|
||||
<LinePart from=" se," to=" sé," />
|
||||
<LinePart from="se donde" to="sé dónde" />
|
||||
<LinePart from="se cuando" to="sé cuándo" />
|
||||
<LinePart from="se adonde" to="sé adónde" />
|
||||
<LinePart from="se como" to="sé cómo" />
|
||||
<LinePart from="se cual" to="sé cuál" />
|
||||
<LinePart from="se quien" to="sé quién" />
|
||||
<LinePart from="se cuanto" to="sé cuánto" />
|
||||
<LinePart from="se cuanta" to="sé cuánta" />
|
||||
<LinePart from="se cuantos" to="sé cuántos" />
|
||||
<LinePart from="se cuantas" to="sé cuántas" />
|
||||
<LinePart from="se cuan" to="sé cuán" />
|
||||
<!-- si/sí -->
|
||||
<LinePart from=" el si " to=" el sí " />
|
||||
<LinePart from="si mismo" to="sí mismo" />
|
||||
<LinePart from="si misma" to="sí misma" />
|
||||
<!-- Errores de "l" en vez de "i" en casos específicos -->
|
||||
<LinePart from=" llegal" to=" ilegal" />
|
||||
<LinePart from=" lluminar" to=" iluminar" />
|
||||
<LinePart from="sllbato" to="silbato" />
|
||||
<LinePart from="sllenclo" to="silencio" />
|
||||
<LinePart from="clemencla" to="clemencia" />
|
||||
<LinePart from="socledad" to="sociedad" />
|
||||
<LinePart from="tlene" to="tiene" />
|
||||
<LinePart from="tlempo" to="tiempo" />
|
||||
<LinePart from="equlvocaba" to="equivocaba" />
|
||||
<LinePart from="qulnce" to="quince" />
|
||||
<LinePart from="comlen" to="comien" />
|
||||
<LinePart from="historl" to="histori" />
|
||||
<LinePart from="misterl" to="misteri" />
|
||||
<LinePart from="vivencl" to="vivenci" />
|
||||
</PartialLines>
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines>
|
||||
<Ending from=".»." to="»." />
|
||||
</EndLines>
|
||||
<WholeLines>
|
||||
<!-- Todas las líneas -->
|
||||
<Line from="No" to="No." />
|
||||
</WholeLines>
|
||||
<RegularExpressions>
|
||||
<!-- Abreviaturas compuestas -->
|
||||
<RegEx find="\b[Ss](r|ra|rta)\b\.?" replaceWith="S$1." />
|
||||
<RegEx find="\b[Dd](r|ra)\b\.?" replaceWith="D$1." />
|
||||
<RegEx find="\b[Uu](d|ds)\b\.?" replaceWith="U$1." />
|
||||
<RegEx find="(\d)(\s){0,1}([Aa])(\.){0,1}([Mm])(\.){0,1}(\W){0,1}" replaceWith="$1 a. m.$7" />
|
||||
<RegEx find="(\d)(\s){0,1}([Pp])(\.){0,1}([Mm])(\.){0,1}(\W){0,1}" replaceWith="$1 p. m.$7" />
|
||||
<RegEx find="(\d)(\s){0,1}(h)(s\b|r\b|rs\b){0,1}(\.){0,1}(\W){0,1}" replaceWith="$1 $3$6" />
|
||||
<RegEx find="(\d)(\s){0,1}([Kk])(m\b|ms\b)(\.){0,1}(\W){0,1}" replaceWith="$1 km$6" />
|
||||
<RegEx find="(\d)(\s){0,1}(s)(g\b|eg\b){0,1}(\.){0,1}(\W){0,1}" replaceWith="$1 s$6" />
|
||||
<RegEx find="(\d)(\s){0,1}([Kk])(g\b|gs\b)(\.){0,1}(\W){0,1}" replaceWith="$1 kg$6" />
|
||||
<RegEx find="(\d)(\s){0,1}(m)(t\b|ts\b){0,1}(\.){0,1}(\W){0,1}" replaceWith="$1 m$6" />
|
||||
<RegEx find="(\d)KBs(\W){0,1}" replaceWith="$1 kB$2" />
|
||||
<RegEx find="([Nn])°(\s){0,1}(\d)" replaceWith="$1.° $3" />
|
||||
<RegEx find="([Nn])ro(\.){0,1}(\s){0,1}(\d)" replaceWith="$1.° $4" />
|
||||
<!-- Signos invertidos -->
|
||||
<RegEx find="\?¿(\W|\w)" replaceWith="? ¿$1" />
|
||||
<RegEx find="\!¡(\W|\w)" replaceWith="! ¡$1" />
|
||||
<RegEx find="\?¿¿(\W|\w)" replaceWith="? ¿$1" />
|
||||
<RegEx find="\!¡¡(\W|\w)" replaceWith="! ¡$1" />
|
||||
<!-- Inicio de línea -->
|
||||
<RegEx find="^_(\s)" replaceWith="-$1" />
|
||||
<RegEx find="^_(\w)" replaceWith="- $1" />
|
||||
<!-- Uso de comillas según la recomendación de la RAE y la Wikipedia -->
|
||||
<RegEx find="(«[^“«»]+)«" replaceWith="$1“" />
|
||||
<RegEx find="(“[^«»”]+)»" replaceWith="$1”" />
|
||||
<RegEx find="`" replaceWith="‘" />
|
||||
<RegEx find="´" replaceWith="’" />
|
||||
<RegEx find="([\wá-ú])(\.)(«|»)" replaceWith="$1»." />
|
||||
<RegEx find="«(\?)" replaceWith="»?" />
|
||||
<RegEx find="«(\!)" replaceWith="»!" />
|
||||
<RegEx find="«\s" replaceWith="» " />
|
||||
<RegEx find="«(\))" replaceWith="»)" />
|
||||
<RegEx find="(\?)«" replaceWith="?»" />
|
||||
<RegEx find="(\!)«" replaceWith="!»" />
|
||||
<RegEx find="«(,)" replaceWith="»," />
|
||||
<RegEx find="«(;)" replaceWith="»;" />
|
||||
<RegEx find="«(:)" replaceWith="»:" />
|
||||
<RegEx find="(¿)»" replaceWith="¿«" />
|
||||
<RegEx find="(¡)»" replaceWith="¡«" />
|
||||
<!-- Uso de comillas (ANSI) según la recomendación de la RAE («\x22» es el carácter «"») -->
|
||||
<RegEx find="([\wá-ú])([\.,]) ?[\x22»]" replaceWith="$1»$2" />
|
||||
<RegEx find="([\wá-ú])\?[\x22»](\s|$)" replaceWith="$1?».$2" />
|
||||
<RegEx find="^(\.\.\.)(\s){0,1}\x22" replaceWith="$1«" />
|
||||
<RegEx find="«\x22" replaceWith="«" />
|
||||
<RegEx find="\x22»" replaceWith="»" />
|
||||
<RegEx find="^\x22{2,}" replaceWith="«" />
|
||||
<RegEx find="\x22{2,}$" replaceWith="»" />
|
||||
<RegEx find="\x22\r" replaceWith="»" />
|
||||
<RegEx find="^\x22" replaceWith="«" />
|
||||
<RegEx find="\x22$" replaceWith="»." />
|
||||
<RegEx find="([\wá-ú])\.[\x22»]" replaceWith="$1»." />
|
||||
<RegEx find="\s\x22" replaceWith=" «" />
|
||||
<RegEx find="\x22\s" replaceWith="» " />
|
||||
<RegEx find="\x22(,)" replaceWith="»," />
|
||||
<RegEx find="\x22(\.)" replaceWith="»." />
|
||||
<RegEx find="\x22(;)" replaceWith="»;" />
|
||||
<RegEx find="\x22(:)" replaceWith="»:" />
|
||||
<RegEx find="(\!)\x22" replaceWith="!»" />
|
||||
<RegEx find="\x22(\!)" replaceWith="»!" />
|
||||
<RegEx find="(\?)\x22" replaceWith="?»" />
|
||||
<RegEx find="\x22(\?)" replaceWith="»?" />
|
||||
<RegEx find="\x22(¿)" replaceWith="«¿" />
|
||||
<RegEx find="(¿)\x22" replaceWith="¿«" />
|
||||
<RegEx find="\x22(¡)" replaceWith="«¡" />
|
||||
<RegEx find="(¡)\x22" replaceWith="¡«" />
|
||||
<RegEx find="\x22(\))" replaceWith="»)" />
|
||||
<RegEx find="(\))\x22" replaceWith=")»" />
|
||||
<RegEx find="(\()\x22" replaceWith="(«" />
|
||||
<!-- Uso de comillas (Unicode) según la recomendación de la RAE («\u0022» es el carácter «"») -->
|
||||
<RegEx find="^(\.\.\.)(\s){0,1}\u0022" replaceWith="$1«" />
|
||||
<RegEx find="^\u0022{2,}" replaceWith="«" />
|
||||
<RegEx find="\u0022{2,}$" replaceWith="»" />
|
||||
<RegEx find="\u0022\r" replaceWith="»" />
|
||||
<RegEx find="^\u0022" replaceWith="«" />
|
||||
<RegEx find="\u0022$" replaceWith="»" />
|
||||
<RegEx find="(\w)(\.)\u0022" replaceWith="$1»." />
|
||||
<RegEx find="\s\u0022" replaceWith=" «" />
|
||||
<RegEx find="\u0022\s" replaceWith="» " />
|
||||
<RegEx find="\u0022(,)" replaceWith="»," />
|
||||
<RegEx find="\u0022(\.)" replaceWith="»." />
|
||||
<RegEx find="\u0022(;)" replaceWith="»;" />
|
||||
<RegEx find="\u0022(:)" replaceWith="»:" />
|
||||
<RegEx find="(\!)\u0022" replaceWith="!»" />
|
||||
<RegEx find="\u0022(\!)" replaceWith="»!" />
|
||||
<RegEx find="(\?)\u0022" replaceWith="?»" />
|
||||
<RegEx find="\u0022(\?)" replaceWith="»?" />
|
||||
<RegEx find="\u0022(¿)" replaceWith="«¿" />
|
||||
<RegEx find="(¿)\u0022" replaceWith="¿«" />
|
||||
<RegEx find="\u0022(¡)" replaceWith="«¡" />
|
||||
<RegEx find="(¡)\u0022" replaceWith="¡«" />
|
||||
<RegEx find="\u0022(\))" replaceWith="»)" />
|
||||
<RegEx find="(\))\u0022" replaceWith=")»" />
|
||||
<RegEx find="(\()\u0022" replaceWith="(«" />
|
||||
<!-- Numeración -->
|
||||
<RegEx find="([0-9])\.([0-9])\b" replaceWith="$1,$2" />
|
||||
<RegEx find="(^|\s|[¡¿«])([0-9])(,|\.)?([0-9]{3})\b" replaceWith="$1$2$4" />
|
||||
<RegEx find="(\d)\s(?=\d{2}\b)" replaceWith="$1-" />
|
||||
<!-- "1 :", "2 :"... "n :" a "n:" -->
|
||||
<RegEx find="(\d) ([:;])" replaceWith="$1$2" />
|
||||
<!-- Corregir las comas y puntos por ej. «, ,» por «,» & «,,,» o similar por «...» -->
|
||||
<RegEx find="(\.\.\.+)$" replaceWith="..." />
|
||||
<RegEx find="(, ,+)$" replaceWith="," />
|
||||
<RegEx find="(,\s),+\s" replaceWith="$1" />
|
||||
<RegEx find="(\.\.\.),$" replaceWith="$1" />
|
||||
<RegEx find="([\wá-ú])(\.\.)$" replaceWith="$1." />
|
||||
<!-- Puntos innecesarios (complemento) -->
|
||||
<RegEx find="([\w\W]\.{3})([¡¿])" replaceWith="$1 $2" />
|
||||
<RegEx find="(\w)\.\.(\s)" replaceWith="$1.$2" />
|
||||
<RegEx find="([\wá-ú\x22»])\.([\?\!])" replaceWith="$1$2" />
|
||||
<RegEx find="([\:\;])\." replaceWith="$1" />
|
||||
<RegEx find="\.([\:\;])" replaceWith="$1" />
|
||||
<RegEx find="\:+" replaceWith=":" />
|
||||
<!-- Terminaciones ción/sión -->
|
||||
<RegEx find="([sc]i)o(n)\b" replaceWith="$1ó$2" />
|
||||
<RegEx find="([SC]I)O(N)\b" replaceWith="$1Ó$2" />
|
||||
<!-- "i" en vez de "l" en terminaciones «clón» -->
|
||||
<RegEx find="clón\b" replaceWith="ción" />
|
||||
<!-- "si" en vez de "sl" -->
|
||||
<RegEx find="\b([Ss])(l)\b" replaceWith="$1i" />
|
||||
<!-- Para corregir por ej. raclones, perforaclones, opclones, etc -->
|
||||
<RegEx find="([Rr]ac)l(o)" replaceWith="$1i$2" />
|
||||
<RegEx find="([Oo]pc)l(o)" replaceWith="$1i$2" />
|
||||
<!-- Para corregir por ej. tenldo, víctlmas, olvldarlo, legítlmo, etc -->
|
||||
<RegEx find="([BbCcDdFfHhMmNnRrSsTtVv])l([bcdhmnrstv])" replaceWith="$1i$2" />
|
||||
<!-- Corrige los errores en el ripeo de la «o» mayúscula por el cero «0» y viceversa -->
|
||||
<RegEx find="(\d)O" replaceWith="$1 0" />
|
||||
<RegEx find="(\d)[,\.]O" replaceWith="$1.0" />
|
||||
<RegEx find="([A-Z])0" replaceWith="$1O" />
|
||||
<RegEx find="\b0([A-Za-z])" replaceWith="O$1" />
|
||||
<!-- Signos musicales -->
|
||||
<RegEx find="[♪♫☺☹♥©☮☯Σ∞≡⇒π#](\r\n)[♪♫☺☹♥©☮☯Σ∞≡⇒π#]" replaceWith="$1" />
|
||||
<!-- Tilde diacrítica antes del punto -->
|
||||
<RegEx find="(\s)([dst])e\.(\s|\$)" replaceWith="$1$2é.$3" />
|
||||
<RegEx find="(\s)mi\.(\s|\$)" replaceWith="$1mí.$2" />
|
||||
<RegEx find="(\s)el\.(\s|\$)" replaceWith="$1él.$2" />
|
||||
<RegEx find="(\s)tu\.(\s|\$)" replaceWith="$1tú.$2" />
|
||||
<RegEx find="(\s)si\.(\s|\$)" replaceWith="$1sí.$2" />
|
||||
<RegEx find="(\s)aun\.(\s|\$)" replaceWith="$1aún.$2" />
|
||||
<RegEx find="(\s)mas\.(\s|\$)" replaceWith="$1más.$2" />
|
||||
<RegEx find="(\s)quien\.(\s|\$)" replaceWith="$1quién.$2" />
|
||||
<RegEx find="(\s)cual\.(\s|\$)" replaceWith="$1cuál.$2" />
|
||||
<RegEx find="(\s)que\.(\s|\$)" replaceWith="$1qué.$2" />
|
||||
<RegEx find="(\s)porque\.(\s|\$)" replaceWith="$1porqué.$2" />
|
||||
<RegEx find="(\s)cuanto\.(\s|\$)" replaceWith="$1cuánto.$2" />
|
||||
<RegEx find="(\s)cuando\.(\s|\$)" replaceWith="$1cuándo.$2" />
|
||||
<!-- Prefijos; palabras compuestas (simple) -->
|
||||
<RegEx find="(\b[Ee]x|\b[Ss]uper|\b[Aa]nti|\b[Pp]os|\b[Pp]re|\b[Pp]ro|\b[Vv]ice)[\s\x2D]([a-zá-ú]{3,20})(\b)" replaceWith="$1$2" />
|
||||
<!-- Prefijos; palabras compuestas (números) -->
|
||||
<RegEx find="(\b[Ss]ub|\b[Ss]uper)[\s\x2D](\d{2})(\b)" replaceWith="$1-$2$3" />
|
||||
<!-- Prefijos; palabras compuestas (mayúsculas) -->
|
||||
<RegEx find="(\b[Aa]nti|\b[Mm]ini|\b[Pp]os|\b[Pp]ro)\s([A-Z]{1,10})([A-Z][a-zá-ú]){0,10}(\b)" replaceWith="$1-$2$3" />
|
||||
<!-- Casos de mayúsculas con dos puntos -->
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(a)" replaceWith="$1A" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(b)" replaceWith="$1B" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(c)" replaceWith="$1C" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(d)" replaceWith="$1D" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(e)" replaceWith="$1E" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(f)" replaceWith="$1F" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(g)" replaceWith="$1G" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(h)" replaceWith="$1H" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(i)" replaceWith="$1I" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(j)" replaceWith="$1J" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(k)" replaceWith="$1K" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(l)" replaceWith="$1L" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(m)" replaceWith="$1M" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(n)" replaceWith="$1N" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(ñ)" replaceWith="$1Ñ" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(o)" replaceWith="$1O" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(p)" replaceWith="$1P" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(q)" replaceWith="$1Q" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(r)" replaceWith="$1R" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(s)" replaceWith="$1S" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(t)" replaceWith="$1T" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(u)" replaceWith="$1U" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(v)" replaceWith="$1V" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(w)" replaceWith="$1W" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(x)" replaceWith="$1X" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(y)" replaceWith="$1Y" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(z)" replaceWith="$1Z" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(á)" replaceWith="$1Á" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(é)" replaceWith="$1É" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(í)" replaceWith="$1Í" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(ó)" replaceWith="$1Ó" />
|
||||
<RegEx find="([\wá-ú]:\s[«\x22]?)(ú)" replaceWith="$1Ú" />
|
||||
<!-- Usos correctos de la coma -->
|
||||
<RegEx find="(\b[Pp]ero),(\s)([¡¿])" replaceWith="$1$2$3" />
|
||||
<RegEx find="(\b[Aa]unque),(\s|$)" replaceWith="$1$2" />
|
||||
<!-- Vocativos -->
|
||||
<RegEx find="(\bHola|\bBueno|\bBien|\bVen|\bVen acá|\besto|\bBuenos días|\bFeliz cumpleaños|\bsiento)\s([A-Z][a-zá-ú]{3,12}\b|seño(r|ra|rita)\b|hij(o|a) mío\b|amig(o|a)\b)" replaceWith="$1, $2" />
|
||||
<!-- «aún» cuando son sinónimos de «incluso» o «hasta» -->
|
||||
<RegEx find="(\W|^)(\b[Aa])ú(n)(\s)(así\b|cuando\b|los\b|las\b|negar(te|se)\b)" replaceWith="$1$2u$3$4$5" />
|
||||
<RegEx find="(\b[Nn]i)(\s)(a)ú(n)(\W|$)" replaceWith="$1$2$3u$4$5" />
|
||||
<!-- «sí» -->
|
||||
<RegEx find="\b([Ss])i(:|;|\.)" replaceWith="$1í$2" />
|
||||
<!-- «sé» -->
|
||||
<RegEx find="(\b[Ll]o|\b[Ll]a|\b[Ll]e)(\s)se(\W|$)" replaceWith="$1$2sé$3" />
|
||||
<RegEx find="[Ss]e\s(dónde\b|cuándo\b|adónde\b|cómo\b|cuál\b|quién\b|cuánto\b|cuánta\b|cuántos\b|cuántas\b|cuán\b)" replaceWith="sé $1" />
|
||||
<!-- «té» -->
|
||||
<RegEx find="\b([Tt])e\s(verde\b|negro\b|perla\b|de manzanilla\b|de lim[óo]n\b|de jazm[íi]n\b)" replaceWith="$1é $2" />
|
||||
<!-- Apóstrofo -->
|
||||
<RegEx find="(\b[A-Z][a-zá-ú]{3,12})\s(’|')(\d\d(\s|$))" replaceWith="$1 $3" />
|
||||
<RegEx find="(\b[A-Z]{2,5})(’|')(s)" replaceWith="(Ej. Devedés)$1$3" />
|
||||
<RegEx find="(\b\d{1,2})(’|')(\d{2})\s(s|m)(\W|$)" replaceWith="$1,$3 $4$5" />
|
||||
<RegEx find="(\b\d{1,2})(’|')(\d{2})\s(h)(\W|$)" replaceWith="$1:$3 $4$5" />
|
||||
<!-- Porcentaje (debe llevar espacio) -->
|
||||
<RegEx find="(\b\d{1,3})%(\W)" replaceWith="$1 %$2" />
|
||||
<!-- Haz/has -->
|
||||
<RegEx find="(\b)([Hh])as\s(la\b|lo\b|clic\b)(\W)" replaceWith="$1$2az $3$4" />
|
||||
<RegEx find="(\b)([Hh])az\s(de\b)(\W)" replaceWith="$1$2as $3$4" />
|
||||
<RegEx find="(\b)([Hh])as(le\b|nos\b|me\b)(\W)" replaceWith="$1$2az$3$4" />
|
||||
<!-- Quitar itálicas en 3 o menos letras -->
|
||||
<RegEx find="\x3ci\x3e(.{1,3})\x3c\/i\x3e" replaceWith="$1" />
|
||||
<!-- Miscelánea -->
|
||||
<RegEx find="(\b[Cc]erca|\b[Ee]ncima|\b[Dd]ebajo|\b[Dd]etrás|\b[Dd]elante)(\s)mío" replaceWith="$1 de mí" />
|
||||
<RegEx find="(\b[Cc]erca|\b[Ee]ncima|\b[Dd]ebajo|\b[Dd]etrás|\b[Dd]elante)(\s)tuyo" replaceWith="$1 de ti" />
|
||||
<!-- Punto antes de «¿» y «¡» -->
|
||||
<RegEx find="([\wá-ú»])\s(?=(¿|¡)[A-ZÁ-Ú])" replaceWith="$1. " />
|
||||
<!-- Espacios después del guión -->
|
||||
<RegEx find="(^|\n)(-)([^\s])" replaceWith="$1$2 $3" />
|
||||
<!-- Punto antes del guión -->
|
||||
<RegEx find="([^\.\?\!]) - " replaceWith="$1. - " />
|
||||
<!-- Terminaciones en «ólogo», «ílogo» y «álogo» -->
|
||||
<RegEx find="\Bo(log[ao]s?\b)" replaceWith="ó$1" />
|
||||
<RegEx find="\Ba(log[ao]s?\b)" replaceWith="á$1" />
|
||||
<RegEx find="\Bi(log[ao]s?\b)" replaceWith="í$1" />
|
||||
</RegularExpressions>
|
||||
</OCRFixReplaceList>
|
||||
+234
@@ -0,0 +1,234 @@
|
||||
<!-- Credit goes to: MilanRS [http://www.prijevodi-online.org] -->
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="ču" to="ću" />
|
||||
<Word from="češ" to="ćeš" />
|
||||
<Word from="če" to="će" />
|
||||
<Word from="ćš" to="ćeš" />
|
||||
<Word from="ćmo" to="ćemo" />
|
||||
<Word from="ćte" to="ćete" />
|
||||
<Word from="čemo" to="ćemo" />
|
||||
<Word from="čete" to="čete" />
|
||||
<Word from="djete" to="dijete" />
|
||||
<Word from="Hey" to="Hej" />
|
||||
<Word from="hey" to="hej" />
|
||||
<Word from="htjeo" to="htio" />
|
||||
<Word from="Hočeš" to="Hoćeš" />
|
||||
<Word from="hočeš" to="hoćeš" />
|
||||
<Word from="iči" to="ići" />
|
||||
<Word from="jel" to="je l'" />
|
||||
<Word from="Jel" to="Je l'" />
|
||||
<Word from="nedaj" to="ne daj" />
|
||||
<Word from="Rješit" to="Riješit" />
|
||||
<Word from="smjeo" to="smio" />
|
||||
<Word from="uopče" to="uopće" />
|
||||
<Word from="valda" to="valjda" />
|
||||
<Word from="želila" to="željela" />
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords>
|
||||
<WordPart from="¤" to="o" />
|
||||
<WordPart from="vv" to="w" />
|
||||
<WordPart from="IVI" to="M" />
|
||||
<WordPart from="lVI" to="M" />
|
||||
<WordPart from="IVl" to="M" />
|
||||
<WordPart from="lVl" to="M" />
|
||||
</PartialWords>
|
||||
<PartialLines>
|
||||
<LinePart from="bi smo" to="bismo" />
|
||||
<LinePart from="dali je" to="da li je" />
|
||||
<LinePart from="dali si" to="da li si" />
|
||||
<LinePart from="Dali si" to="Da li si" />
|
||||
<LinePart from="Jel sam ti" to="Jesam li ti" />
|
||||
<LinePart from="Jel si" to="Jesi li" />
|
||||
<LinePart from="Jel' si" to="Jesi li" />
|
||||
<LinePart from="Je I'" to="Jesi li" />
|
||||
<LinePart from="Jel si to" to="Jesi li to" />
|
||||
<LinePart from="Jel' si to" to="Da li si to" />
|
||||
<LinePart from="jel si to" to="da li si to" />
|
||||
<LinePart from="jel' si to" to="jesi li to" />
|
||||
<LinePart from="Jel si ti" to="Da li si ti" />
|
||||
<LinePart from="Jel' si ti" to="Da li si ti" />
|
||||
<LinePart from="jel si ti" to="da li si ti" />
|
||||
<LinePart from="jel' si ti" to="da li si ti" />
|
||||
<LinePart from="jel ste " to="jeste li " />
|
||||
<LinePart from="Jel ste" to="Jeste li" />
|
||||
<LinePart from="jel' ste " to="jeste li " />
|
||||
<LinePart from="Jel' ste " to="Jeste li " />
|
||||
<LinePart from="Jel su " to="Jesu li " />
|
||||
<LinePart from="Jel da " to="Zar ne" />
|
||||
<LinePart from="jel da " to="zar ne" />
|
||||
<LinePart from="jel'da " to="zar ne" />
|
||||
<LinePart from="Jeli sve " to="Je li sve" />
|
||||
<LinePart from="Jeli on " to="Je li on" />
|
||||
<LinePart from="Jeli ti " to="Je li ti" />
|
||||
<LinePart from="jeli ti " to="je li ti" />
|
||||
<LinePart from="Jeli to " to="Je li to" />
|
||||
<LinePart from="Nebrini" to="Ne brini" />
|
||||
<LinePart from="nedaj" to="ne daj" />
|
||||
<LinePart from="ne ću" to="neću" />
|
||||
<LinePart from="Nemogu" to="Ne mogu" />
|
||||
<LinePart from="ne mogu" to="ne mogu" />
|
||||
<LinePart from="Nemoraš" to="Ne moraš" />
|
||||
<LinePart from="od kako" to="otkako" />
|
||||
<LinePart from="Si dobro" to="Jesi li dobro" />
|
||||
<LinePart from="Svo vreme" to="Sve vrijeme" />
|
||||
<LinePart from="Svo vrijeme" to="Sve vrijeme" />
|
||||
<LinePart from="Cijelo vrijeme" to="Sve vrijeme" />
|
||||
</PartialLines>
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions>
|
||||
<RegEx find="đž" replaceWith="dž" />
|
||||
<RegEx find="ajsmiješnij" replaceWith="ajsmješnij" />
|
||||
<RegEx find="boži[čć]([aeiu]|em|ima)?\b" replaceWith="Božić$1" />
|
||||
<RegEx find=" g-dine\.$" replaceWith=" gospodine." />
|
||||
<RegEx find=" g-dine +(?=[A-ZČĐŠŽ])" replaceWith=" g. " />
|
||||
<RegEx find="([gG])dine? +(?=[A-ZČĐŠŽ])" replaceWith="$1. " />
|
||||
<RegEx find="([gG])-đo +(?=[A-ZČĐŠŽ])" replaceWith="$1gđo " />
|
||||
<RegEx find="gdina +(?=[A-ZČĐŠŽ])" replaceWith="g. " />
|
||||
<RegEx find=" gosp +" replaceWith=" g. " />
|
||||
<RegEx find="Jel si sigur" replaceWith="Jesi li sigur" />
|
||||
<RegEx find="Jel' si sigur" replaceWith="Jesi li sigur" />
|
||||
<RegEx find="\b([jJ])el\?" replaceWith="$1e l'?" />
|
||||
<RegEx find="\bJel'" replaceWith="Je l'" />
|
||||
<RegEx find="([kK]alib(?:ar|r[aeui]))\. *([0-9])" replaceWith="$1 .$2" />
|
||||
<RegEx find="([mM])jenjati" replaceWith="$1ijenjati" />
|
||||
<RegEx find="([mM])oguč" replaceWith="$1oguć" />
|
||||
<RegEx find="\b([nN])ebih?" replaceWith="$1e bi" />
|
||||
<RegEx find="\b([nN])eč([ue]š?|emo|ete)\b" replaceWith="$1eć$2" />
|
||||
<RegEx find="\b([nN])emože(mo|š|te)?\b" replaceWith="$1e može$2" />
|
||||
<RegEx find="\b([nN])ezna([šm]o?|t[ei]|ju|jući|vši)?\b" replaceWith="$1e zna$2" />
|
||||
<RegEx find="najcijenjen" replaceWith="najcjenjen" />
|
||||
<RegEx find="N[jJ]u Jork" replaceWith="Njujork" />
|
||||
<RegEx find="([oO])d([kp])" replaceWith="$1t$2" />
|
||||
<RegEx find="([oO])ružij([aeu])" replaceWith="$1ružj$2" />
|
||||
<RegEx find="([oO])sječa" replaceWith="$1sjeća" />
|
||||
<RegEx find="([pPdD])onje([lt])" replaceWith="$1onije$2" />
|
||||
<RegEx find="([pP])objedi([mšto])" replaceWith="$1obijedi$2" />
|
||||
<RegEx find="redamnom" replaceWith="reda mnom" />
|
||||
<RegEx find="redpostav" replaceWith="retpostav" />
|
||||
<RegEx find="([pP])rimjeti" replaceWith="$1rimijeti" />
|
||||
<RegEx find="([pP])romjeni([mštol])" replaceWith="$1romijeni$2" />
|
||||
<RegEx find="([rR])azumijeć" replaceWith="$1azumjeć" />
|
||||
<RegEx find="rascjepljen" replaceWith="rascijepljen" />
|
||||
<RegEx find="redhodn" replaceWith="rethodn" />
|
||||
<RegEx find="rimjenjen" replaceWith="rimijenjen" />
|
||||
<RegEx find="([^d])rješit" replaceWith="$1riješit" />
|
||||
<RegEx find="([sSzZ])amnom" replaceWith="$1a mnom" />
|
||||
<RegEx find="([sS])lijede[čć]([aeiu]|e[mg])" replaceWith="$1ljedeć$2" />
|
||||
<RegEx find="([sS])mješno" replaceWith="$1miješno" />
|
||||
<RegEx find="([uU])mijesto" replaceWith="$1mjesto" />
|
||||
<RegEx find="([uU])spijeh" replaceWith="$1spjeh" />
|
||||
<RegEx find="([uU])spiješ(an|n[aeiou]|no[mgj])" replaceWith="$1spješ$2" />
|
||||
<RegEx find="([uU])vjek" replaceWith="$1vijek" />
|
||||
<RegEx find="\b([vV])eč([aeiou])" replaceWith="$1eć$2" />
|
||||
<RegEx find="([zZ])ahtijeva" replaceWith="$1ahtjeva" />
|
||||
<RegEx find="([zZ])ahtjeva([ojlmšt])" replaceWith="$1ahtijeva$2" />
|
||||
<RegEx find="([ks]ao)\.:" replaceWith="$1:" />
|
||||
<RegEx find="(?<=[a-zčđšž])Ij(?=[a-zčđšž])" replaceWith="lj" />
|
||||
<RegEx find="(?<=[^A-ZČĐŠŽa-zčđšž])Iju(?=bav|d|t)" replaceWith="lju" />
|
||||
<!-- kad ima razmak između tagova </i> <i> -->
|
||||
<!-- <RegEx find="(>) +(<)" replaceWith="$1$2" /> -->
|
||||
<!-- ',"' to '",' -->
|
||||
<RegEx find="(?<=\w),"(?=\s|$)" replaceWith=""," />
|
||||
<RegEx find=",\.{3}|\.{3},|\.{2} \." replaceWith="..." />
|
||||
<!-- "1 :", "2 :"... "n :" to "n:" -->
|
||||
<RegEx find="([0-9]) +: +(\D)" replaceWith="$1: $2" />
|
||||
<!-- Two or more consecutive "," to "..." -->
|
||||
<RegEx find=",{2,}" replaceWith="..." />
|
||||
<!-- Two or more consecutive "-" to "..." -->
|
||||
<RegEx find="-{2,}" replaceWith="..." />
|
||||
<RegEx find="([^().])\.{2}([^().:])" replaceWith="$1...$2" />
|
||||
<!-- separator stotica i decimalnog ostatka 1,499,000.00 -> 1.499.000,00 -->
|
||||
<RegEx find="([0-9]{3})\.([0-9]{2}[^0-9])" replaceWith="$1,$2" />
|
||||
<RegEx find="([0-9]),([0-9]{3}\D)" replaceWith="$1.$2" />
|
||||
<!-- Apostrophes -->
|
||||
<RegEx find="´´" replaceWith=""" />
|
||||
<!-- <RegEx find="[´`]" replaceWith="'" /> -->
|
||||
<!-- <RegEx find="[“”]" replaceWith=""" /> -->
|
||||
<RegEx find="''" replaceWith=""" />
|
||||
<!-- Two or more consecutive '"' to one '"' -->
|
||||
<RegEx find=""{2,}" replaceWith=""" />
|
||||
<!-- Fix zero and capital 'o' ripping mistakes -->
|
||||
<RegEx find="(?<=[0-9]\.?)O" replaceWith="0" />
|
||||
<RegEx find="\b0(?=[A-ZČĐŠŽa-zčđšž])" replaceWith="O" />
|
||||
<!-- Brisanje crte - na početku 1. reda (i kada ima dva reda) -->
|
||||
<RegEx find="\A- ?([A-ZČĐŠŽa-zčđšž0-9„'"]|\.{3})" replaceWith="$1" />
|
||||
<RegEx find="\A(<[ibu]>)- ?" replaceWith="$1" />
|
||||
<RegEx find=" - " replaceWith=" -" />
|
||||
<!-- Brisanje razmaka iza crte - na početku 2. reda -->
|
||||
<RegEx find="(?<=\n(<[ibu]>)?)- (?=[A-ZČĐŠŽčš0-9„'"<])" replaceWith="-" />
|
||||
<!-- Korigovanje crte - kad je u sredini prvog reda -->
|
||||
<RegEx find="([.!?">]) - ([A-ZČĐŠŽčš'"<])" replaceWith="$1 -$2" />
|
||||
<!-- Zatvoren tag pa razmak poslije crtice -->
|
||||
<RegEx find="(>) - ([A-ZČĐŠŽčš„'"])" replaceWith="$1 -$2" />
|
||||
<!-- Zatvoren tag pa crtica razmak -->
|
||||
<RegEx find="(>)- ([A-ZČĐŠŽčš„'"])" replaceWith="$1-$2" />
|
||||
<!-- Zagrada pa crtica razmak -->
|
||||
<RegEx find="\(- ([A-ZČĐŠŽčš„'"])" replaceWith="(-$1" />
|
||||
<!-- Smart space after dot -->
|
||||
<!-- osim kad je zadnje t (riječ kolt) -->
|
||||
<RegEx find="(?<=[a-su-zá-úñä-ü])\.(?=[^\s\n().:?!*^“”'"<])" replaceWith=". " />
|
||||
<!-- Oznaka za kalibar. Npr. "Colt .45" -->
|
||||
<!-- Da bi radilo, da bi ovaj razmak bio dozvoljen, odčekirajte "Razmaci ispred tačke" -->
|
||||
<RegEx find="t\.(?=[0-9]{2})" replaceWith="t ." />
|
||||
<!-- Joey(j)a -->
|
||||
<RegEx find="(?<=\b[A-Z][a-z])eyj(?=[a-z])" replaceWith="ey" />
|
||||
<!-- Sređuje zarez sa razmakom -->
|
||||
<RegEx find="(?<=[A-ZČĐŠŽa-zčđšžá-úñä-ü"]),(?=[^\s(),?!“<])" replaceWith=", " />
|
||||
<RegEx find=" +,(?=[A-ZČĐŠŽa-zčđšž])" replaceWith=", " />
|
||||
<RegEx find=" +, +" replaceWith=", " />
|
||||
<RegEx find=" +,$" replaceWith="," />
|
||||
<RegEx find="([?!])-" replaceWith="$1 -" />
|
||||
<!-- Space after last of some consecutive dots (eg. "...") -->
|
||||
<RegEx find="(?<=[a-zčđšž])(\.{3}|!)(?=[a-zčđšž])" replaceWith="$1 " />
|
||||
<!-- Delete space after "..." that is at the beginning of the line. You may delete this line if you don't like it -->
|
||||
<!-- <RegEx find="^\.{3} +" replaceWith="..." /> -->
|
||||
<!-- "tekst ... tekst" mijenja u "tekst... tekst" -->
|
||||
<RegEx find="(?<=[A-ZČĐŠŽa-zčđšž]) +\.{3} +" replaceWith="... " />
|
||||
<RegEx find="(?<=\S)\. +"" replaceWith="."" />
|
||||
<RegEx find="" +\." replaceWith=""." />
|
||||
<RegEx find="(?<=\S\.{3}) +"(?=\s|$)" replaceWith=""" />
|
||||
<RegEx find=" +\.{3}$" replaceWith="..." />
|
||||
<RegEx find="(?<=[a-zčđšž])(?: +\.{3}|\.{2}$)" replaceWith="..." />
|
||||
<!-- Razmak ispred zagrade -->
|
||||
<RegEx find="(?<=[A-ZČĐŠŽa-zčđšž])\(" replaceWith=" (" />
|
||||
<!-- Razmak iza upitnika -->
|
||||
<RegEx find="\?(?=[A-ZČĐŠŽčš])" replaceWith="? " />
|
||||
<RegEx find="(?<=^|>)\.{3} +(?=[A-ZČĐŠŽčš])" replaceWith="..." />
|
||||
<!-- Brise ... kad je na poč. reda "... -->
|
||||
<RegEx find="^"\.{3} +" replaceWith=""" />
|
||||
<RegEx find="(?<=[0-9])\$" replaceWith=" $$" />
|
||||
<!-- ti š -> t š by Strider -->
|
||||
<!-- Zamijeni sva "**ti šu*" s "**t šu*" i "**ti še*" s "**t še*" -->
|
||||
<!-- <RegEx find="([a-z])ti (š+[eu])" replaceWith="$1t $2" /> -->
|
||||
<!-- <RegEx find="([A-Za-z])ti( |\r?\n)(š[eu])" replaceWith="$1t$2$3" /> -->
|
||||
<!-- <RegEx find="(?i)\b(ni)t (š[eu])" replaceWith="$1ti $2" /> -->
|
||||
<!-- <RegEx find="\. +Mr. " replaceWith=". G. " /> -->
|
||||
<!-- <RegEx find="\. +Mrs. " replaceWith=". Gđa " /> -->
|
||||
<!-- <RegEx find="\. +Miss " replaceWith=". Gđica " /> -->
|
||||
<!-- <RegEx find=", +Mrs. " replaceWith=", gđo " /> -->
|
||||
<!-- <RegEx find=", +Miss " replaceWith=", gđice " /> -->
|
||||
<!-- Razmak poslije <i> i poslije .. -->
|
||||
<RegEx find="^(<[ibu]>) +" replaceWith="$1" />
|
||||
<RegEx find="^\.{2} +" replaceWith="..." />
|
||||
<!-- Razmak ? "</i> -->
|
||||
<RegEx find="([.?!]) +("<)" replaceWith="$1$2" />
|
||||
<!-- Bez razmaka kod Npr.: -->
|
||||
<RegEx find="(?<=[Nn]pr\.) *: *" replaceWith=": " />
|
||||
<RegEx find="\. ," replaceWith=".," />
|
||||
<RegEx find="([?!])\." replaceWith="$1" />
|
||||
<!-- Da ne kvari potpise sa ..:: -->
|
||||
<RegEx find="\.{3}::" replaceWith="..::" />
|
||||
<RegEx find="::\.{3}" replaceWith="::.." />
|
||||
<RegEx find="\.{2} +::" replaceWith="..::" />
|
||||
<!-- Skracenice bez razmaka -->
|
||||
<RegEx find="d\. o\.o\." replaceWith="d.o.o." />
|
||||
<!-- Kad red počinje sa ...pa malo slovo -->
|
||||
<!-- <RegEx find="^\.{3}([a-zčđšž"<])" replaceWith="$1" /> -->
|
||||
<!-- <RegEx find=" +([.?!])" replaceWith="$1" /> -->
|
||||
</RegularExpressions>
|
||||
</OCRFixReplaceList>
|
||||
+405
@@ -0,0 +1,405 @@
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="lârt" to="lärt" />
|
||||
<Word from="hedervårda" to="hedervärda" />
|
||||
<Word from="stormâstare" to="stormästare" />
|
||||
<Word from="Avfârd" to="Avfärd" />
|
||||
<Word from="tâlten" to="tälten" />
|
||||
<Word from="ârjag" to="är jag" />
|
||||
<Word from="ärjag" to="är jag" />
|
||||
<Word from="jâmlikar" to="jämlikar" />
|
||||
<Word from="Riskakofl" to="Riskakor" />
|
||||
<Word from="Karamellen/" to="Karamellen" />
|
||||
<Word from="Lngenüng" to="Ingenting" />
|
||||
<Word from="ärju" to="är ju" />
|
||||
<Word from="Sá" to="Så" />
|
||||
<Word from="närjag" to="när jag" />
|
||||
<Word from="alltjag" to="allt jag" />
|
||||
<Word from="görjag" to="gör jag" />
|
||||
<Word from="trorjag" to="tror jag" />
|
||||
<Word from="varju" to="var ju" />
|
||||
<Word from="görju" to="gör ju" />
|
||||
<Word from="kanju" to="kan ju" />
|
||||
<Word from="blirjag" to="blir jag" />
|
||||
<Word from="sägerjag" to="säger jag" />
|
||||
<Word from="behållerjag" to="behåller jag" />
|
||||
<Word from="prøblem" to="problem" />
|
||||
<Word from="räddadeju" to="räddade ju" />
|
||||
<Word from="honøm" to="honom" />
|
||||
<Word from="Ln" to="In" />
|
||||
<Word from="svårflörtad" to="svårflörtad" />
|
||||
<Word from="øch" to="och" />
|
||||
<Word from="flörtar" to="flörtar" />
|
||||
<Word from="kännerjag" to="känner jag" />
|
||||
<Word from="flickan" to="flickan" />
|
||||
<Word from="snø" to="snö" />
|
||||
<Word from="gerju" to="ger ju" />
|
||||
<Word from="køntakter" to="kontakter" />
|
||||
<Word from="ølycka" to="olycka" />
|
||||
<Word from="nølla" to="nolla" />
|
||||
<Word from="sinnenajublar" to="sinnena jublar" />
|
||||
<Word from="ijobbet" to="i jobbet" />
|
||||
<Word from="Fårjag" to="Får jag" />
|
||||
<Word from="Ar" to="Är" />
|
||||
<Word from="liggerju" to="ligger ju" />
|
||||
<Word from="um" to="om" />
|
||||
<Word from="lbland" to="Ibland" />
|
||||
<Word from="skjuterjag" to="skjuter jag" />
|
||||
<Word from="Vaddå" to="Vad då" />
|
||||
<Word from="pratarjämt" to="pratar jämt" />
|
||||
<Word from="harju" to="har ju" />
|
||||
<Word from="sitterjag" to="sitter jag" />
|
||||
<Word from="häfla" to="härja" />
|
||||
<Word from="sfiäl" to="stjäl" />
|
||||
<Word from="FÖU" to="Följ" />
|
||||
<Word from="varförjag" to="varför jag" />
|
||||
<Word from="sfiärna" to="stjärna" />
|
||||
<Word from="böflar" to="börjar" />
|
||||
<Word from="böflan" to="början" />
|
||||
<Word from="stäri" to="står" />
|
||||
<Word from="pä" to="på" />
|
||||
<Word from="harjag" to="har jag" />
|
||||
<Word from="attjag" to="att jag" />
|
||||
<Word from="Verkarjag" to="Verkar jag" />
|
||||
<Word from="Kännerjag" to="Känner jag" />
|
||||
<Word from="därjag" to="där jag" />
|
||||
<Word from="tufi" to="tuff" />
|
||||
<Word from="lurarjag" to="lurar jag" />
|
||||
<Word from="varjättebra" to="var jättebra" />
|
||||
<Word from="allvan" to="allvar" />
|
||||
<Word from="dethär" to="det här" />
|
||||
<Word from="vafle" to="varje" />
|
||||
<Word from="FöUer" to="Följer" />
|
||||
<Word from="personalmötetl" to="personalmötet!" />
|
||||
<Word from="harjust" to="har just" />
|
||||
<Word from="ärjätteduktig" to="är jätteduktig" />
|
||||
<Word from="därja" to="där ja" />
|
||||
<Word from="lngenüng" to="lngenting" />
|
||||
<Word from="iluften" to="i luften" />
|
||||
<Word from="ösen" to="öser" />
|
||||
<Word from="tvâ" to="två" />
|
||||
<Word from="Uejerna" to="Tjejerna" />
|
||||
<Word from="hån*" to="hårt" />
|
||||
<Word from="Ärjag" to="Är jag" />
|
||||
<Word from="keL" to="Okej" />
|
||||
<Word from="Förjag" to="För jag" />
|
||||
<Word from="varjättekul" to="var jättekul" />
|
||||
<Word from="kämpan" to="kämpar" />
|
||||
<Word from="mycketjobb" to="mycket jobb" />
|
||||
<Word from="Uus" to="ljus" />
|
||||
<Word from="serjag" to="ser jag" />
|
||||
<Word from="vetjag" to="vet jag" />
|
||||
<Word from="fårjag" to="får jag" />
|
||||
<Word from="hurjag" to="hur jag" />
|
||||
<Word from="försökerjag" to="försöker jag" />
|
||||
<Word from="tánagel" to="tånagel" />
|
||||
<Word from="vaüe" to="varje" />
|
||||
<Word from="Uudet" to="ljudet" />
|
||||
<Word from="amhopa" to="allihopa" />
|
||||
<Word from="Väü" to="Välj" />
|
||||
<Word from="gäri" to="går" />
|
||||
<Word from="rödüus" to="rödljus" />
|
||||
<Word from="Uuset" to="ljuset" />
|
||||
<Word from="Ridàn" to="Ridån" />
|
||||
<Word from="viüa" to="vilja" />
|
||||
<Word from="gåri" to="går i" />
|
||||
<Word from="Hurdå" to="Hur då" />
|
||||
<Word from="inter\/juar" to="intervjuar" />
|
||||
<Word from="menarjag" to="menar jag" />
|
||||
<Word from="spyrjag" to="spyr jag" />
|
||||
<Word from="briüera" to="briljera" />
|
||||
<Word from="Närjag" to="När jag" />
|
||||
<Word from="ner\/ös" to="nervös" />
|
||||
<Word from="ilivets" to="i livets" />
|
||||
<Word from="nägot" to="något" />
|
||||
<Word from="pà" to="på" />
|
||||
<Word from="Lnnan" to="Innan" />
|
||||
<Word from="Uf" to="Ut" />
|
||||
<Word from="lnnan" to="Innan" />
|
||||
<Word from="Dàren" to="Dåren" />
|
||||
<Word from="Fàrjag" to="Får jag" />
|
||||
<Word from="VadärdetdäL" to="Vad är det där" />
|
||||
<Word from="smàtjuv" to="småtjuv" />
|
||||
<Word from="tàgrånare" to="tågrånare" />
|
||||
<Word from="ditàt" to="ditåt" />
|
||||
<Word from="sä" to="så" />
|
||||
<Word from="vàrdslösa" to="vårdslösa" />
|
||||
<Word from="nàn" to="nån" />
|
||||
<Word from="kommerjag" to="kommer jag" />
|
||||
<Word from="ärjättebra" to="är jättebra" />
|
||||
<Word from="ärjävligt" to="är jävligt" />
|
||||
<Word from="àkerjag" to="åker jag" />
|
||||
<Word from="ellerjapaner" to="eller japaner" />
|
||||
<Word from="attjaga" to="att jaga" />
|
||||
<Word from="eften" to="efter" />
|
||||
<Word from="hästan" to="hästar" />
|
||||
<Word from="Lntensivare" to="Intensivare" />
|
||||
<Word from="fràgarjag" to="frågar jag" />
|
||||
<Word from="pen/ers" to="pervers" />
|
||||
<Word from="ràbarkade" to="råbarkade" />
|
||||
<Word from="styrkon" to="styrkor" />
|
||||
<Word from="Difåf" to="Ditåt" />
|
||||
<Word from="händen" to="händer" />
|
||||
<Word from="föfia" to="följa" />
|
||||
<Word from="Idioten/" to="Idioter!" />
|
||||
<Word from="Varförjagade" to="Varför jagade" />
|
||||
<Word from="därförjag" to="därför jag" />
|
||||
<Word from="forjag" to="for jag" />
|
||||
<Word from="Iivsgladje" to="livsglädje" />
|
||||
<Word from="narjag" to="när jag" />
|
||||
<Word from="sajag" to="sa jag" />
|
||||
<Word from="genastja" to="genast ja" />
|
||||
<Word from="rockumentàren" to="rockumentären" />
|
||||
<Word from="turne" to="turné" />
|
||||
<Word from="fickjag" to="fick jag" />
|
||||
<Word from="sager" to="säger" />
|
||||
<Word from="Ijushårig" to="ljushårig" />
|
||||
<Word from="tradgårdsolycka" to="trädgårdsolycka" />
|
||||
<Word from="kvavdes" to="kvävdes" />
|
||||
<Word from="dàrja" to="där ja" />
|
||||
<Word from="hedersgaster" to="hedersgäster" />
|
||||
<Word from="Nar" to="När" />
|
||||
<Word from="smakiösa" to="smaklösa" />
|
||||
<Word from="lan" to="Ian" />
|
||||
<Word from="Lan" to="Ian" />
|
||||
<Word from="eri" to="er i" />
|
||||
<Word from="universitetsamne" to="universitetsämne" />
|
||||
<Word from="garna" to="gärna" />
|
||||
<Word from="ar" to="är" />
|
||||
<Word from="baltdjur" to="bältdjur" />
|
||||
<Word from="varjag" to="var jag" />
|
||||
<Word from="àr" to="är" />
|
||||
<Word from="förförstàrkare" to="förförstärkare" />
|
||||
<Word from="arjattespeciell" to="är jättespeciell" />
|
||||
<Word from="hàrgår" to="här går" />
|
||||
<Word from="Ia" to="la" />
|
||||
<Word from="Iimousinen" to="limousinen" />
|
||||
<Word from="krickettra" to="kricketträ" />
|
||||
<Word from="hårdrockvàrlden" to="hårdrockvärlden" />
|
||||
<Word from="tràbit" to="träbit" />
|
||||
<Word from="Mellanvastern" to="Mellanvästern" />
|
||||
<Word from="arju" to="är ju" />
|
||||
<Word from="turnen" to="turnén" />
|
||||
<Word from="kanns" to="känns" />
|
||||
<Word from="battre" to="bättre" />
|
||||
<Word from="vàrldsturne" to="världsturne" />
|
||||
<Word from="dar" to="där" />
|
||||
<Word from="sjàlvantànder" to="självantänder" />
|
||||
<Word from="jattelange" to="jättelänge" />
|
||||
<Word from="berattade" to="berättade" />
|
||||
<Word from="Sä" to="Så" />
|
||||
<Word from="vandpunkten" to="vändpunkten" />
|
||||
<Word from="Nàrjag" to="När jag" />
|
||||
<Word from="lasa" to="läsa" />
|
||||
<Word from="skitlàskigt" to="skitläskigt" />
|
||||
<Word from="sambandsvàg" to="sambandsväg" />
|
||||
<Word from="valdigt" to="väldigt" />
|
||||
<Word from="Stamgafiel" to="Stämgaffel" />
|
||||
<Word from="àrjag" to="är jag" />
|
||||
<Word from="tajming" to="tajmning" />
|
||||
<Word from="utgäng" to="utgång" />
|
||||
<Word from="Hàråt" to="Häråt" />
|
||||
<Word from="hàråt" to="häråt" />
|
||||
<Word from="anvander" to="använder" />
|
||||
<Word from="harjobbat" to="har jobbat" />
|
||||
<Word from="imageide" to="imageidé" />
|
||||
<Word from="klafien" to="klaffen" />
|
||||
<Word from="sjalv" to="själv" />
|
||||
<Word from="dvarg" to="dvärg" />
|
||||
<Word from="detjag" to="det jag" />
|
||||
<Word from="dvargarna" to="dvärgarna" />
|
||||
<Word from="fantasivàrld" to="fantasivärld" />
|
||||
<Word from="fiolliga" to="Fjolliga" />
|
||||
<Word from="mandoiinstràngar" to="mandollnsträngar" />
|
||||
<Word from="mittjobb" to="mitt jobb" />
|
||||
<Word from="Skajag" to="Ska jag" />
|
||||
<Word from="landari" to="landar i" />
|
||||
<Word from="gang" to="gäng" />
|
||||
<Word from="Detjag" to="Det jag" />
|
||||
<Word from="Narmre" to="Närmre" />
|
||||
<Word from="Iåtjavelni" to="låtjäveln" />
|
||||
<Word from="Hållerjag" to="Håller jag" />
|
||||
<Word from="visionarer" to="visionärer" />
|
||||
<Word from="Tülvad" to="Till vad" />
|
||||
<Word from="militàrbas" to="militärbas" />
|
||||
<Word from="jattegiada" to="jätteglada" />
|
||||
<Word from="Fastjag" to="Fast jag" />
|
||||
<Word from="såjag" to="så jag" />
|
||||
<Word from="rockvarlden" to="rockvärlden" />
|
||||
<Word from="saknarjag" to="saknar jag" />
|
||||
<Word from="allafall" to="alla fall" />
|
||||
<Word from="fianta" to="fjanta" />
|
||||
<Word from="Kràma" to="Kräma" />
|
||||
<Word from="stammer" to="stämmer" />
|
||||
<Word from="budbàrare" to="budbärare" />
|
||||
<Word from="Iivsfiiosofi" to="livsfiiosofi" />
|
||||
<Word from="förjämnan" to="för jämnan" />
|
||||
<Word from="gillarjag" to="gillar jag" />
|
||||
<Word from="Iarvat" to="larvat" />
|
||||
<Word from="klararjag" to="klarar jag" />
|
||||
<Word from="hattafi'àr" to="hattaffär" />
|
||||
<Word from="Dà" to="Då" />
|
||||
<Word from="uppfinna" to="uppfinna" />
|
||||
<Word from="Ràttfåglar" to="Råttfåglar" />
|
||||
<Word from="Sväüboda" to="Sväljboda" />
|
||||
<Word from="Påböflar" to="Påbörjar" />
|
||||
<Word from="slutarju" to="slutar ju" />
|
||||
<Word from="nifiskebuüken" to="i fiskebutiken" />
|
||||
<Word from="härjäkeln" to="här jäkeln" />
|
||||
<Word from="Hßppa" to="Hoppa" />
|
||||
<Word from="förstörds" to="förstördes" />
|
||||
<Word from="varjättegoda" to="var jättegoda" />
|
||||
<Word from="Kor\/" to="Korv" />
|
||||
<Word from="brüléel" to="brülée!" />
|
||||
<Word from="Hei" to="Hej" />
|
||||
<Word from="älskarjordgubbsglass" to="älskar jordgubbsglass" />
|
||||
<Word from="Snöbom" to="Snöboll" />
|
||||
<Word from="SnöboH" to="Snöboll" />
|
||||
<Word from="Snöbol" to="Snöboll" />
|
||||
<Word from="snöboH" to="snöboll" />
|
||||
<Word from="Läggerpå" to="Lägger på" />
|
||||
<Word from="lngefl" to="lnget!" />
|
||||
<Word from="Sägerjättesmarta" to="Säger jättesmarta" />
|
||||
<Word from="dopplen/äderradar" to="dopplerväderradar" />
|
||||
<Word from="säkertjättefin" to="säkert jättefin" />
|
||||
<Word from="ärjättefin" to="är jättefin" />
|
||||
<Word from="verkarju" to="verkar ju" />
|
||||
<Word from="blirju" to="blir ju" />
|
||||
<Word from="kor\/" to="korv" />
|
||||
<Word from="naturkatastrofi" to="naturkatastrof!" />
|
||||
<Word from="stickerjag" to="stickerj ag" />
|
||||
<Word from="jättebufié" to="jättebuffé" />
|
||||
<Word from="befinner" to="befinner" />
|
||||
<Word from="Spflng" to="Spring" />
|
||||
<Word from="trecfie" to="tredje" />
|
||||
<Word from="ryckerjag" to="rycker jag" />
|
||||
<Word from="skullejag" to="skulle jag" />
|
||||
<Word from="vetju" to="vet ju" />
|
||||
<Word from="afljag" to="att jag" />
|
||||
<Word from="flnns" to="finns" />
|
||||
<Word from="ärlång" to="är lång" />
|
||||
<Word from="kåra" to="kära" />
|
||||
<Word from="ärfina" to="är fina" />
|
||||
<Word from="äri" to="är i" />
|
||||
<Word from="hörden" to="hör den" />
|
||||
<Word from="ättjäg" to="att jäg" />
|
||||
<Word from="gär" to="går" />
|
||||
<Word from="föri" to="för i" />
|
||||
<Word from="Hurvisste" to="Hur visste" />
|
||||
<Word from="fick" to="fick" />
|
||||
<Word from="finns" to="finns" />
|
||||
<Word from="fin" to="fin" />
|
||||
<Word from="Fa" to="Bra." />
|
||||
<Word from="bori" to="bor i" />
|
||||
<Word from="fiendeplanl" to="fiendeplan!" />
|
||||
<Word from="iförnamn" to="i förnamn" />
|
||||
<Word from="detju" to="det ju" />
|
||||
<Word from="Nüd" to="Niki" />
|
||||
<Word from="hatarjag" to="hatar jag" />
|
||||
<Word from="Klararjag" to="Klarar jag" />
|
||||
<Word from="detafier" to="detaljer" />
|
||||
<Word from="vä/" to="väl" />
|
||||
<Word from="smakarju" to="smakar ju" />
|
||||
<Word from="Teachefl" to="Teacher!" />
|
||||
<Word from="imorse" to="i morse" />
|
||||
<Word from="drickerjag" to="dricker jag" />
|
||||
<Word from="ståri" to="står i" />
|
||||
<Word from="Harjag" to="Har jag" />
|
||||
<Word from="Talarjag" to="Talar jag" />
|
||||
<Word from="undrarjag" to="undrar jag" />
|
||||
<Word from="ålderjag" to="ålder jag" />
|
||||
<Word from="vafie" to="varje" />
|
||||
<Word from="förfalskningl" to="förfalskning!" />
|
||||
<Word from="Vifiiiiam" to="William" />
|
||||
<Word from="V\filliams" to="Williams" />
|
||||
<Word from="attjobba" to="att jobba" />
|
||||
<Word from="intei" to="inte i" />
|
||||
<Word from="närV\filliam" to="när William" />
|
||||
<Word from="V\filliam" to="William" />
|
||||
<Word from="Efiersom" to="Eftersom" />
|
||||
<Word from="Vlfilliam" to="William" />
|
||||
<Word from="Iängejag" to="länge jag" />
|
||||
<Word from="'fidigare" to="Tidigare" />
|
||||
<Word from="börjadei" to="började i" />
|
||||
<Word from="merjust" to="mer just" />
|
||||
<Word from="efieråt" to="efteråt" />
|
||||
<Word from="gjordejag" to="gjorde jag" />
|
||||
<Word from="hadeju" to="hade ju" />
|
||||
<Word from="gårvi" to="går vi" />
|
||||
<Word from="köperjag" to="köper jag" />
|
||||
<Word from="Måstejag" to="Måste jag" />
|
||||
<Word from="kännerju" to="känner ju" />
|
||||
<Word from="fln" to="fin" />
|
||||
<Word from="treviig" to="trevlig" />
|
||||
<Word from="Grattisl" to="Grattis!" />
|
||||
<Word from="kande" to="kände" />
|
||||
<Word from="'llden" to="Tiden" />
|
||||
<Word from="sakjag" to="sak jag" />
|
||||
<Word from="klartjag" to="klart jag" />
|
||||
<Word from="häfiigt" to="häftigt" />
|
||||
<Word from="Iämnarjag" to="lämnar jag" />
|
||||
<Word from="gickju" to="gick ju" />
|
||||
<Word from="skajag" to="ska jag" />
|
||||
<Word from="Görjag" to="Gör jag" />
|
||||
<Word from="måstejag" to="måste jag" />
|
||||
<Word from="gra\/iditet" to="graviditet" />
|
||||
<Word from="hittadqdin" to="hittade din" />
|
||||
<Word from="ärjobbigt" to="är jobbigt" />
|
||||
<Word from="Overdrivet" to="Överdrivet" />
|
||||
<Word from="hOgtidlig" to="högtidlig" />
|
||||
<Word from="Overtyga" to="Övertyga" />
|
||||
<Word from="SKILSMASSA" to="SKILSMÄSSA" />
|
||||
<Word from="brukarju" to="brukar ju" />
|
||||
<Word from="lsabel" to="Isabel" />
|
||||
<Word from="kundejag" to="kunde jag" />
|
||||
<Word from="ärläget" to="är läget" />
|
||||
<Word from="blirinte" to="blir inte" />
|
||||
<Word from="l'm" to="I'm" />
|
||||
<Word from="lt's" to="It's" />
|
||||
<Word from="ijakt" to="i jakt" />
|
||||
<Word from="avjordens" to="av jordens" />
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords>
|
||||
<!-- Will be used to check words not in dictionary -->
|
||||
<!-- If new word(s) exists in spelling dictionary, it(they) is accepted -->
|
||||
<WordPart from="¤" to="o" />
|
||||
<WordPart from="fi" to="fi" />
|
||||
<WordPart from="â" to="ä" />
|
||||
<WordPart from="/" to="l" />
|
||||
<WordPart from="vv" to="w" />
|
||||
<WordPart from="IVI" to="M" />
|
||||
<WordPart from="lVI" to="M" />
|
||||
<WordPart from="IVl" to="M" />
|
||||
<WordPart from="lVl" to="M" />
|
||||
<WordPart from="m" to="rn" />
|
||||
<WordPart from="l" to="i" />
|
||||
<WordPart from="€" to="e" />
|
||||
<WordPart from="I" to="l" />
|
||||
<WordPart from="c" to="o" />
|
||||
<WordPart from="i" to="t" />
|
||||
<WordPart from="cc" to="oo" />
|
||||
<WordPart from="ii" to="tt" />
|
||||
<WordPart from="n/" to="ry" />
|
||||
<WordPart from="ae" to="æ" />
|
||||
<!-- "f " will be two words -->
|
||||
<WordPart from="f" to="f " />
|
||||
<WordPart from="c" to="e" />
|
||||
<WordPart from="o" to="e" />
|
||||
<WordPart from="I" to="t" />
|
||||
<WordPart from="n" to="o" />
|
||||
<WordPart from="s" to="e" />
|
||||
<WordPart from="å" to="ä" />
|
||||
<WordPart from="à" to="å" />
|
||||
<WordPart from="n/" to="rv" />
|
||||
</PartialWords>
|
||||
<PartialLines />
|
||||
<PartialLinesAlways />
|
||||
<BeginLines>
|
||||
<Beginning from="Ln " to="In " />
|
||||
<Beginning from="U ppfattat" to="Uppfattat" />
|
||||
</BeginLines>
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
||||
@@ -0,0 +1,130 @@
|
||||
# coding=utf-8
|
||||
|
||||
import traceback
|
||||
import pysubs2
|
||||
import logging
|
||||
|
||||
from registry import registry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SubtitleModifications(object):
|
||||
debug = False
|
||||
language = None
|
||||
initialized_mods = {}
|
||||
|
||||
def __init__(self, debug=False):
|
||||
self.debug = debug
|
||||
self.initialized_mods = {}
|
||||
|
||||
def load(self, fn=None, content=None, language=None):
|
||||
"""
|
||||
|
||||
:param language: babelfish.Language language of the subtitle
|
||||
:param fn: filename
|
||||
:param content: unicode
|
||||
:return:
|
||||
"""
|
||||
self.language = language
|
||||
self.initialized_mods = {}
|
||||
try:
|
||||
if fn:
|
||||
self.f = pysubs2.load(fn)
|
||||
elif content:
|
||||
self.f = pysubs2.SSAFile.from_string(content)
|
||||
except (IOError,
|
||||
UnicodeDecodeError,
|
||||
pysubs2.exceptions.UnknownFPSError,
|
||||
pysubs2.exceptions.UnknownFormatIdentifierError,
|
||||
pysubs2.exceptions.FormatAutodetectionError):
|
||||
if fn:
|
||||
logger.exception("Couldn't load subtitle: %s: %s", fn, traceback.format_exc())
|
||||
elif content:
|
||||
logger.exception("Couldn't load subtitle: %s", traceback.format_exc())
|
||||
|
||||
@classmethod
|
||||
def parse_identifier(cls, identifier):
|
||||
# simple identifier
|
||||
if identifier in registry.mods:
|
||||
return identifier, {}
|
||||
|
||||
# identifier with params; identifier(param=value)
|
||||
split_args = identifier[identifier.find("(")+1:-1].split(",")
|
||||
args = dict((key, value) for key, value in [sub.split("=") for sub in split_args])
|
||||
return identifier[:identifier.find("(")], args
|
||||
|
||||
@classmethod
|
||||
def get_mod_class(cls, identifier):
|
||||
identifier, args = cls.parse_identifier(identifier)
|
||||
return registry.mods[identifier]
|
||||
|
||||
@classmethod
|
||||
def get_mod_signature(cls, identifier, **kwargs):
|
||||
return cls.get_mod_class(identifier).get_signature(**kwargs)
|
||||
|
||||
def modify(self, *mods):
|
||||
new_f = []
|
||||
|
||||
parsed_mods = [SubtitleModifications.parse_identifier(mod) for mod in mods]
|
||||
line_mods = []
|
||||
non_line_mods = []
|
||||
|
||||
print parsed_mods
|
||||
for identifier, args in parsed_mods:
|
||||
if identifier not in registry.mods:
|
||||
raise NotImplementedError("Mod %s not loaded" % identifier)
|
||||
|
||||
mod_cls = registry.mods[identifier]
|
||||
if mod_cls.modifies_whole_file:
|
||||
non_line_mods.append((identifier, args))
|
||||
else:
|
||||
line_mods.append((identifier, args))
|
||||
|
||||
if identifier not in self.initialized_mods:
|
||||
self.initialized_mods[identifier] = mod_cls(self)
|
||||
|
||||
# apply file mods
|
||||
if non_line_mods:
|
||||
for identifier, args in non_line_mods:
|
||||
mod = self.initialized_mods[identifier]
|
||||
mod.modify(None, debug=self.debug, parent=self, **args)
|
||||
|
||||
# apply line mods
|
||||
if line_mods:
|
||||
for line in self.f:
|
||||
applied_mods = []
|
||||
for identifier, args in line_mods:
|
||||
mod = self.initialized_mods[identifier]
|
||||
|
||||
# don't bother reapplying exclusive mods multiple times
|
||||
if mod.exclusive and identifier in applied_mods:
|
||||
continue
|
||||
|
||||
if not mod.processors:
|
||||
continue
|
||||
|
||||
new_content = mod.modify(line.text, debug=self.debug, parent=self, **args)
|
||||
if not new_content:
|
||||
if self.debug:
|
||||
logger.debug("%s: deleting %s", identifier, line)
|
||||
continue
|
||||
|
||||
line.text = new_content
|
||||
applied_mods.append(identifier)
|
||||
new_f.append(line)
|
||||
|
||||
self.f.events = new_f
|
||||
|
||||
def to_string(self, format="srt", encoding="utf-8"):
|
||||
return self.f.to_string(format, encoding=encoding)
|
||||
|
||||
def save(self, fn):
|
||||
self.f.save(fn)
|
||||
|
||||
|
||||
SubMod = SubtitleModifications
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
# coding=utf-8
|
||||
import re
|
||||
import logging
|
||||
|
||||
from subzero.modification.processors.re_processor import ReProcessor, NReProcessor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SubtitleModification(object):
|
||||
identifier = None
|
||||
description = None
|
||||
long_description = None
|
||||
exclusive = False
|
||||
advanced = False # has parameters
|
||||
modifies_whole_file = False # operates on the whole file, not individual entries
|
||||
pre_processors = []
|
||||
processors = []
|
||||
post_processors = []
|
||||
|
||||
def __init__(self, parent):
|
||||
return
|
||||
|
||||
def _process(self, content, processors, debug=False, parent=None, **kwargs):
|
||||
if not content:
|
||||
return
|
||||
|
||||
# processors may be a list or a callable
|
||||
#if callable(processors):
|
||||
# _processors = processors()
|
||||
#else:
|
||||
# _processors = processors
|
||||
_processors = processors
|
||||
|
||||
new_content = content
|
||||
for processor in _processors:
|
||||
old_content = new_content
|
||||
new_content = processor.process(new_content, debug=debug)
|
||||
if not new_content:
|
||||
if debug:
|
||||
logger.debug("Processor returned empty line: %s", processor)
|
||||
break
|
||||
if debug:
|
||||
if old_content == new_content:
|
||||
continue
|
||||
logger.debug("%s: %s -> %s", processor, old_content, new_content)
|
||||
return new_content
|
||||
|
||||
def pre_process(self, content, debug=False, parent=None, **kwargs):
|
||||
return self._process(content, self.pre_processors, debug=debug, parent=parent, **kwargs)
|
||||
|
||||
def process(self, content, debug=False, parent=None, **kwargs):
|
||||
return self._process(content, self.processors, debug=debug, parent=parent, **kwargs)
|
||||
|
||||
def post_process(self, content, debug=False, parent=None, **kwargs):
|
||||
return self._process(content, self.post_processors, debug=debug, parent=parent, **kwargs)
|
||||
|
||||
def modify(self, content, debug=False, parent=None, **kwargs):
|
||||
new_content = content
|
||||
for method in ("pre_process", "process", "post_process"):
|
||||
new_content = getattr(self, method)(new_content, debug=debug, parent=parent, **kwargs)
|
||||
|
||||
return new_content
|
||||
|
||||
@classmethod
|
||||
def get_signature(cls, **kwargs):
|
||||
string_args = ",".join(["%s=%s" % (key, value) for key, value in kwargs.iteritems()])
|
||||
return "%s(%s)" % (cls.identifier, string_args)
|
||||
|
||||
|
||||
class SubtitleTextModification(SubtitleModification):
|
||||
post_processors = [
|
||||
# empty tag
|
||||
ReProcessor(re.compile(r'({\\\w+1})[\s.,-_!?]+({\\\w+0})'), "", name="empty_tag"),
|
||||
|
||||
# empty line (needed?)
|
||||
NReProcessor(re.compile(r'^\s+$'), "", name="empty_line"),
|
||||
|
||||
# empty dash line (needed?)
|
||||
NReProcessor(re.compile(r'(^[\s]*[\-]+[\s]*)$'), "", name="empty_dash_line"),
|
||||
|
||||
# clean whitespace at start and end
|
||||
ReProcessor(re.compile(r'^\s*([^\s]+)\s*$'), r"\1", name="surrounding_whitespace"),
|
||||
]
|
||||
@@ -0,0 +1,27 @@
|
||||
# coding=utf-8
|
||||
|
||||
import logging
|
||||
|
||||
from subzero.modification.mods import SubtitleModification
|
||||
from subzero.modification import registry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ChangeFPS(SubtitleModification):
|
||||
identifier = "change_FPS"
|
||||
description = "Change the FPS of the subtitle"
|
||||
exclusive = True
|
||||
advanced = True
|
||||
modifies_whole_file = True
|
||||
|
||||
long_description = """\
|
||||
Re-syncs the subtitle to the framerate of the current media file.
|
||||
"""
|
||||
|
||||
def modify(self, content, debug=False, parent=None, **kwargs):
|
||||
fps_from = kwargs.get("from")
|
||||
fps_to = kwargs.get("to")
|
||||
parent.f.transform_framerate(float(fps_from), float(fps_to))
|
||||
|
||||
registry.register(ChangeFPS)
|
||||
@@ -0,0 +1,33 @@
|
||||
# coding=utf-8
|
||||
import re
|
||||
|
||||
from subzero.modification.mods import SubtitleTextModification
|
||||
from subzero.modification.processors.re_processor import NReProcessor
|
||||
from subzero.modification import registry
|
||||
|
||||
|
||||
class HearingImpaired(SubtitleTextModification):
|
||||
identifier = "remove_HI"
|
||||
description = "Remove Hearing Impaired tags"
|
||||
exclusive = True
|
||||
|
||||
long_description = """\
|
||||
Removes tags, text and characters from subtitles that are meant for hearing impaired people
|
||||
"""
|
||||
|
||||
processors = [
|
||||
# brackets
|
||||
NReProcessor(re.compile(r'(?sux)[([].+[)\]]'), "", name="HI_brackets"),
|
||||
|
||||
# text before colon (and possible dash in front), max 11 chars after the first whitespace (if any)
|
||||
NReProcessor(re.compile(r'(?u)(^[A-z\-\'"_]+[\w\s]{0,11}:[^0-9{2}][\s]*)'), "", name="HI_before_colon"),
|
||||
|
||||
# all caps line (at least 3 chars)
|
||||
NReProcessor(re.compile(r'(?u)(^[A-Z]{3,}$)'), "", name="HI_all_caps"),
|
||||
|
||||
# dash in front
|
||||
NReProcessor(re.compile(r'(?u)^\s*-\s*'), "", name="HI_starting_dash"),
|
||||
]
|
||||
|
||||
|
||||
registry.register(HearingImpaired)
|
||||
@@ -0,0 +1,47 @@
|
||||
# coding=utf-8
|
||||
import logging
|
||||
|
||||
from subzero.modification.mods import SubtitleTextModification
|
||||
from subzero.modification.processors.string_processor import MultipleLineProcessor
|
||||
from subzero.modification.processors.re_processor import MultipleWordReProcessor
|
||||
from subzero.modification import registry
|
||||
from subzero.modification.dictionaries.data import data as OCR_fix_data
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FixOCR(SubtitleTextModification):
|
||||
identifier = "OCR_fixes"
|
||||
description = "Fix common OCR issues"
|
||||
exclusive = True
|
||||
data_dict = None
|
||||
|
||||
long_description = """\
|
||||
Fix issues that happen when a subtitle gets converted from bitmap to text through OCR
|
||||
"""
|
||||
|
||||
def __init__(self, parent):
|
||||
super(FixOCR, self).__init__(parent)
|
||||
data_dict = OCR_fix_data.get(parent.language.alpha3t)
|
||||
if not data_dict:
|
||||
logger.debug("No SnR-data available for language %s", parent.language)
|
||||
return
|
||||
|
||||
self.data_dict = data_dict
|
||||
self.processors = self.get_processors()
|
||||
|
||||
def get_processors(self):
|
||||
if not self.data_dict:
|
||||
return []
|
||||
|
||||
return [
|
||||
MultipleLineProcessor(self.data_dict["WholeLines"], name="SE_replace_line"),
|
||||
MultipleWordReProcessor(self.data_dict["WholeWords"], name="SE_replace_word"),
|
||||
MultipleWordReProcessor(self.data_dict["BeginLines"], name="SE_replace_beginline"),
|
||||
MultipleWordReProcessor(self.data_dict["EndLines"], name="SE_replace_endline"),
|
||||
MultipleLineProcessor(self.data_dict["PartialLines"], name="SE_replace_partialline"),
|
||||
MultipleLineProcessor(self.data_dict["PartialWordsAlways"], name="SE_replace_partialwordsalways")
|
||||
]
|
||||
|
||||
|
||||
registry.register(FixOCR)
|
||||
@@ -0,0 +1,27 @@
|
||||
# coding=utf-8
|
||||
|
||||
import logging
|
||||
|
||||
from subzero.modification.mods import SubtitleModification
|
||||
from subzero.modification import registry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ShiftOffset(SubtitleModification):
|
||||
identifier = "shift_offset"
|
||||
description = "Change the timing of the subtitle"
|
||||
exclusive = True
|
||||
advanced = True
|
||||
modifies_whole_file = True
|
||||
|
||||
long_description = """\
|
||||
Adds or substracts a certain amount of time from the whole subtitle to match your media
|
||||
"""
|
||||
|
||||
def modify(self, content, debug=False, parent=None, **kwargs):
|
||||
parent.f.shift(h=int(kwargs.get("h", 0)), m=int(kwargs.get("m", 0)), s=int(kwargs.get("s", 0)),
|
||||
ms=int(kwargs.get("ms", 0)))
|
||||
|
||||
|
||||
registry.register(ShiftOffset)
|
||||
@@ -0,0 +1,29 @@
|
||||
# coding=utf-8
|
||||
|
||||
|
||||
class Processor(object):
|
||||
"""
|
||||
Processor base class
|
||||
"""
|
||||
name = None
|
||||
parent = None
|
||||
|
||||
def __init__(self, name=None, parent=None):
|
||||
self.name = name
|
||||
self.parent = parent
|
||||
|
||||
@property
|
||||
def info(self):
|
||||
return self.name
|
||||
|
||||
def process(self, content, debug=False):
|
||||
return content
|
||||
|
||||
def __repr__(self):
|
||||
return "Processor <%s %s>" % (self.__class__.__name__, self.info)
|
||||
|
||||
def __str__(self):
|
||||
return repr(self)
|
||||
|
||||
def __unicode__(self):
|
||||
return unicode(repr(self))
|
||||
@@ -0,0 +1,62 @@
|
||||
# coding=utf-8
|
||||
import re
|
||||
import logging
|
||||
|
||||
from subzero.modification.processors import Processor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ReProcessor(Processor):
|
||||
"""
|
||||
Regex processor
|
||||
"""
|
||||
pattern = None
|
||||
replace_with = None
|
||||
|
||||
def __init__(self, pattern, replace_with, name=None):
|
||||
super(ReProcessor, self).__init__(name=name)
|
||||
self.pattern = pattern
|
||||
self.replace_with = replace_with
|
||||
|
||||
def process(self, content, debug=False):
|
||||
return self.pattern.sub(self.replace_with, content)
|
||||
|
||||
|
||||
class NReProcessor(ReProcessor):
|
||||
"""
|
||||
Single line regex processor
|
||||
"""
|
||||
|
||||
def process(self, content, debug=False):
|
||||
lines = []
|
||||
for line in content.split(r"\N"):
|
||||
a = super(NReProcessor, self).process(line, debug=debug)
|
||||
if not a:
|
||||
continue
|
||||
lines.append(a)
|
||||
return r"\N".join(lines)
|
||||
|
||||
|
||||
class MultipleWordReProcessor(ReProcessor):
|
||||
"""
|
||||
Expects a dictionary in the form of:
|
||||
dict = {
|
||||
"data": {"old_value": "new_value"},
|
||||
"pattern": compiled re object that matches data.keys()
|
||||
}
|
||||
replaces found key in pattern with the corresponding value in data
|
||||
"""
|
||||
def __init__(self, snr_dict, name=None, parent=None):
|
||||
super(ReProcessor, self).__init__(name=name)
|
||||
self.snr_dict = snr_dict
|
||||
|
||||
def process(self, content, debug=False):
|
||||
if not self.snr_dict["data"]:
|
||||
return content
|
||||
|
||||
out = []
|
||||
for a in content.split(ur"\N"):
|
||||
out.append(self.snr_dict["pattern"].sub(lambda x: self.snr_dict["data"][x.group(0)], a))
|
||||
return ur"\N".join(out)
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
# coding=utf-8
|
||||
|
||||
import logging
|
||||
|
||||
from subzero.modification.processors import Processor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StringProcessor(Processor):
|
||||
"""
|
||||
String replacement processor base
|
||||
"""
|
||||
|
||||
def __init__(self, search, replace, name=None, parent=None):
|
||||
super(StringProcessor, self).__init__(name=name)
|
||||
self.search = search
|
||||
self.replace = replace
|
||||
|
||||
def process(self, content, debug=False):
|
||||
return content.replace(self.search, self.replace)
|
||||
|
||||
|
||||
class MultipleLineProcessor(Processor):
|
||||
"""
|
||||
replaces stuff in whole lines
|
||||
|
||||
takes a search/replace dict as first argument
|
||||
Expects a dictionary in the form of:
|
||||
dict = {
|
||||
"data": {"old_value": "new_value"}
|
||||
}
|
||||
"""
|
||||
def __init__(self, snr_dict, name=None, parent=None):
|
||||
super(MultipleLineProcessor, self).__init__(name=name)
|
||||
self.snr_dict = snr_dict
|
||||
|
||||
def process(self, content, debug=False):
|
||||
if not self.snr_dict["data"]:
|
||||
return content
|
||||
|
||||
out = []
|
||||
for cnt in content.split(ur"\N"):
|
||||
cnt_ = cnt
|
||||
for key, value in self.snr_dict["data"].iteritems():
|
||||
if debug and key in cnt_:
|
||||
logger.debug(u"Replacing '%s' with '%s' in '%s'", key, value, cnt_)
|
||||
|
||||
cnt_ = cnt_.replace(key, value)
|
||||
out.append(cnt_)
|
||||
|
||||
return ur"\N".join(out)
|
||||
|
||||
|
||||
class MultipleWordProcessor(MultipleLineProcessor):
|
||||
"""
|
||||
replaces words
|
||||
|
||||
takes a search/replace dict as first argument
|
||||
Expects a dictionary in the form of:
|
||||
dict = {
|
||||
"data": {"old_value": "new_value"}
|
||||
}
|
||||
"""
|
||||
def process(self, content, debug=False):
|
||||
new_lines = []
|
||||
for line in content.split(u"\\N"):
|
||||
words = line.split(u" ")
|
||||
new_words = []
|
||||
for word in words:
|
||||
new_words.append(self.snr_dict.get(word, word))
|
||||
|
||||
new_lines.append(u" ".join(new_words))
|
||||
|
||||
return u"\\N".join(new_lines)
|
||||
@@ -0,0 +1,17 @@
|
||||
# coding=utf-8
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class SubtitleModRegistry(object):
|
||||
mods = None
|
||||
mods_available = None
|
||||
|
||||
def __init__(self):
|
||||
self.mods = OrderedDict()
|
||||
self.mods_available = []
|
||||
|
||||
def register(self, mod):
|
||||
self.mods[mod.identifier] = mod
|
||||
self.mods_available.append(mod.identifier)
|
||||
|
||||
registry = SubtitleModRegistry()
|
||||
@@ -6,7 +6,6 @@ import logging
|
||||
import traceback
|
||||
|
||||
from constants import mode_map
|
||||
from modification import SubtitleModifications
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -22,7 +21,8 @@ class StoredSubtitle(object):
|
||||
content = None
|
||||
mods = None
|
||||
|
||||
def __init__(self, score, storage_type, hash, provider_name, id, date_added=None, mode="a", content=None):
|
||||
def __init__(self, score, storage_type, hash, provider_name, id, date_added=None, mode="a", content=None,
|
||||
mods=None):
|
||||
self.score = int(score)
|
||||
self.storage_type = storage_type
|
||||
self.hash = hash
|
||||
@@ -31,19 +31,20 @@ class StoredSubtitle(object):
|
||||
self.date_added = date_added or datetime.datetime.now()
|
||||
self.mode = mode
|
||||
self.content = content
|
||||
self.mods = []
|
||||
self.mods = mods or []
|
||||
|
||||
def add_mod(self, identifier):
|
||||
self.mods = self.mods or []
|
||||
if identifier is None:
|
||||
self.mods = []
|
||||
return
|
||||
|
||||
self.mods.append(identifier)
|
||||
|
||||
@property
|
||||
def mode_verbose(self):
|
||||
return mode_map.get(self.mode, "Unknown")
|
||||
|
||||
def get_modified_content(self, fps=None):
|
||||
if not self.mods:
|
||||
return self.content
|
||||
submods = SubtitleModifications(content=self.content, fps=fps)
|
||||
submods.modify(*self.mods)
|
||||
return submods.to_string("srt")
|
||||
|
||||
|
||||
class StoredVideoSubtitles(object):
|
||||
"""
|
||||
@@ -80,7 +81,7 @@ class StoredVideoSubtitles(object):
|
||||
sub_key = self.get_sub_key(subtitle.provider_name, subtitle.id)
|
||||
subs[sub_key] = StoredSubtitle(subtitle.score, storage_type, hashlib.md5(subtitle.content).hexdigest(),
|
||||
subtitle.provider_name, subtitle.id, date_added=date_added, mode=mode,
|
||||
content=subtitle.content)
|
||||
content=subtitle.content, mods=subtitle.mods)
|
||||
subs["current"] = sub_key
|
||||
|
||||
return True
|
||||
|
||||
@@ -14,19 +14,19 @@ of signal, drawing the Tardis off course.
|
||||
|
||||
4
|
||||
00:00:16,099 --> 00:00:17,224
|
||||
Where are we?
|
||||
this is a subtitle test with a text before colons: Where are we?
|
||||
|
||||
5
|
||||
00:00:17,225 --> 00:00:19,684
|
||||
Earth. Utah, North America.
|
||||
less text before colons: Earth. Utah, North America.
|
||||
|
||||
6
|
||||
00:00:19,686 --> 00:00:21,103
|
||||
About half a mile underground.
|
||||
Ithinkyou're About half a mile underground.
|
||||
|
||||
7
|
||||
00:00:21,103 --> 00:00:23,603
|
||||
And when are we?
|
||||
lrn gonna And when are we?
|
||||
|
||||
8
|
||||
00:00:24,274 --> 00:00:26,649
|
||||
@@ -55,7 +55,7 @@ ROSE: Like a great big museum.
|
||||
|
||||
14
|
||||
00:00:40,414 --> 00:00:42,914
|
||||
DOCTOR: An alien museum.
|
||||
DOCTOR's MOM: An alien museum.
|
||||
|
||||
15
|
||||
00:00:43,542 --> 00:00:46,042
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user