First pass at Dutch IPA

Working for English
2019-05-19 20:52:43 -04:00 · 2019-05-19 15:12:15 -04:00
11 changed files with 205 additions and 109 deletions
@@ -363,7 +363,7 @@ def api_pronounce() -> Union[Response, str]:

    if pronounce_type == "phonemes":
        # Convert from Sphinx to espeak phonemes
-        espeak_str = core.get_word_phonemes(pronounce_str).phonemes
+        espeak_str = core.get_word_phonemes(pronounce_str).phonemes["espeak"]
    else:
        # Speak word directly
        espeak_str = pronounce_str
@@ -47,26 +47,30 @@ sudo apt-get install -y python3 python3-pip python3-venv python3-dev \
     gfortran \
     sphinxbase-utils sphinxtrain pocketsphinx \
     jq checkinstall unzip xz-utils \
-     curl libffi-dev
+     curl

 # Download dependencies
 echo "Downloading dependencies"
-bash download-dependencies.sh "${CPU_ARCH}"
+bash download-dependencies.sh

 # -----------------------------------------------------------------------------
 # OpenFST
 # -----------------------------------------------------------------------------

 case $CPU_ARCH in
-    x86_64|armv7l|arm64v8)
-        # Use pre-built packages
-        openfst_file="${download_dir}/openfst_1.6.9-1_${FRIENDLY_ARCH}.deb"
-        echo "Installing OpenFST (${openfst_file})"
-        sudo dpkg -i "${openfst_file}"
+    armv7l|arm64v8)
+        # Build from source
+        openfst_file="${download_dir}/openfst-1.6.2.tar.gz"
+        echo "Building OpenFST (${openfst_file})"
+        tar -C "${temp_dir}" -xzf "${openfst_file}" && \
+            cd "${temp_dir}/openfst-1.6.2" && \
+            ./configure --enable-static --enable-shared --enable-far --enable-ngram-fsts && \
+            make -j 4 && \
+            sudo make install
        ;;

    *)
-        # Use system packages
+        # Use pre-built packages
        sudo apt-get install -y libfst-dev libfst-tools
 esac

@@ -84,7 +88,7 @@ if [[ -z "$(which python3.6)" ]]; then
    python_file="${download_dir}/Python-3.6.8.tar.xz"
    if [[ ! -f "${python_file}" ]]; then
        python_url='https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tar.xz'
-        curl -sSfL -o "${python_file}" "${python_url}"
+        curl -sSfL-o "${python_file}" "${python_url}"
    fi

    tar -C "${temp_dir}" -xf "${python_file}"
@@ -114,7 +118,6 @@ mkdir -p "${VENV_PATH}"
 # shellcheck source=/dev/null
 source "${VENV_PATH}/bin/activate"
 "${PYTHON}" -m pip install wheel
-"${PYTHON}" -m pip install requests

 case $CPU_ARCH in
    armv7l|arm64v8)
@@ -145,7 +148,7 @@ case $CPU_ARCH in
        if [[ ! -f "${snowboy_file}" ]]; then
            snowboy_url='https://github.com/Kitt-AI/snowboy/archive/v1.3.0.tar.gz'
            echo "Downloading snowboy (${snowboy_url})"
-            curl -sSfL -o "${snowboy_file}" "${snowboy_url}"
+            curl -sSfL-o "${snowboy_file}" "${snowboy_url}"
        fi

        "${PYTHON}" -m pip install "${snowboy_file}"
@@ -178,25 +181,14 @@ fi
 # -----------------------------------------------------------------------------

 if [[ -z "$(which ngramcount)" ]]; then
-    case $CPU_ARCH in
-        x86_64|armv7l|arm64v8)
-            # Use pre-built packages
-            opengrm_file="${download_dir}/opengrm_1.3.4-1_${FRIENDLY_ARCH}.deb"
-            echo "Installing opengrm (${opengrm_file})"
-            sudo dpkg -i "${opengrm_file}"
-            ;;
-
-        *)
-            # Build from source
-            opengrm_file="${download_dir}/opengrm-ngram-1.3.3.tar.gz"
-            echo "Building Opengrm ${opengrm_file}"
-            tar -C "${temp_dir}" -xf "${opengrm_file}" && \
-                cd "${temp_dir}/opengrm-ngram-1.3.3" && \
-                ./configure && \
-                make -j 4 && \
-                sudo make install && \
-                sudo ldconfig
-    esac
+    opengrm_file="${download_dir}/opengrm-ngram-1.3.3.tar.gz"
+    echo "Building Opengrm ${opengrm_file}"
+    tar -C "${temp_dir}" -xf "${opengrm_file}" && \
+        cd "${temp_dir}/opengrm-ngram-1.3.3" && \
+        ./configure && \
+        make -j 4 && \
+        sudo make install && \
+        sudo ldconfig
 fi

 # -----------------------------------------------------------------------------
@@ -8,32 +8,25 @@ DIR="$( cd "$( dirname "$0" )" && pwd )"
 download_dir="${DIR}/download"
 mkdir -p "${download_dir}"

+# CPU architecture
+CPU_ARCHS=("x86_64" "armv7l" "arm64v8")
+FRIENDLY_ARCHS=("amd64" "armhf" "aarch64")
+
 declare -A CPU_TO_FRIENDLY
 CPU_TO_FRIENDLY["x86_64"]="amd64"
 CPU_TO_FRIENDLY["armv7l"]="armhf"
 CPU_TO_FRIENDLY["arm64v8"]="aarch64"

-# CPU architecture
-CPU_ARCHS=("x86_64" "armv7l" "arm64v8")
-FRIENDLY_ARCHS=("amd64" "armhf" "aarch64")
-
-if [[ ! -z "$1" ]]; then
-    CPU_ARCHS=("$1")
-    FRIENDLY_ARCHS=(${CPU_TO_FRIENDLY["$1"]})
-fi
-
 # -----------------------------------------------------------------------------
 # OpenFST
 # -----------------------------------------------------------------------------

-for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}"; do
-    openfst_file="${download_dir}/openfst_1.6.9-1_${FRIENDLY_ARCH}.deb"
-    if [[ ! -f "${openfst_file}" ]]; then
-        openfst_url="https://github.com/synesthesiam/docker-opengrm/releases/download/v1.3.4-${FRIENDLY_ARCH}/openfst_1.6.9-1_${FRIENDLY_ARCH}.deb"
-        echo "Downloading OpenFST pre-built binary (${openfst_url})"
-        curl -sSfL -o "${openfst_file}" "${openfst_url}"
-    fi
-done
+openfst_file="${download_dir}/openfst-1.6.2.tar.gz"
+if [[ ! -f "${openfst_file}" ]]; then
+    openfst_url='http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.2.tar.gz'
+    echo "Downloading OpenFST source (${openfst_url})"
+    curl -sSfL -o "${openfst_file}" "${openfst_url}"
+fi

 # -----------------------------------------------------------------------------
 # Pocketsphinx for Python
@@ -87,23 +80,12 @@ done
 # -----------------------------------------------------------------------------

 if [[ -z "$(which ngramcount)" ]]; then
-    # Download source
    opengrm_file="${download_dir}/opengrm-ngram-1.3.3.tar.gz"
    if [[ ! -f "${opengrm_file}" ]]; then
        opengrm_url='https://www.opengrm.org/twiki/pub/GRM/NGramDownload/opengrm-ngram-1.3.3.tar.gz'
        echo "Download Opengrm (${opengrm_url})"
        curl -sSfLk -o "${opengrm_file}" "${opengrm_url}"
    fi
-
-    # Download pre-built packages
-    for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}"; do
-        opengrm_file="${download_dir}/opengrm_1.3.4-1_${FRIENDLY_ARCH}.deb"
-        if [[ ! -f "${opengrm_file}" ]]; then
-            opengrm_url="https://github.com/synesthesiam/docker-opengrm/releases/download/v1.3.4-${FRIENDLY_ARCH}/opengrm_1.3.4-1_${FRIENDLY_ARCH}.deb"
-            echo "Downloading opengrm pre-built binary (${opengrm_url})"
-            curl -sSfL -o "${opengrm_file}" "${opengrm_url}"
-        fi
-    done
 fi

 # -----------------------------------------------------------------------------
@@ -159,7 +159,8 @@
            "program": ""
        },
        "espeak": {
-            "phoneme_map": "espeak_phonemes.txt"
+            "phoneme_map": "espeak_phonemes.txt",
+            "ipa_map": "ipa_phonemes.txt"
        },
        "flite": {
            "voice": "kal16"
@@ -0,0 +1,25 @@
+a abre  əbɹˈɛ
+b baja  bˈɑhɑ
+ch ocho  ˈɒtʃəʊ
+d anda  ˈændə
+e aire  ˈɛə
+f café  kæfˈeɪ
+g agua  ˈæɡjʉːə
+gn años  sˈænsəːn
+i allí  ˈælɪ
+j bajo  bˈɑdʒəʊ
+k acto  ˈæktəʊ
+l alex  ˈælɪks
+ll allá  əlˈɑ
+m alma  ˈɒlmə
+n andy  ˈændɪ
+o algo  ˈælɡəʊ
+p copa  kˈəʊpə
+r amor  ˈæmɚ
+rr raro  ɹˈæɹəʊ
+s base  bˈeɪs
+t alta  ˈɔːltə
+u auto  ˈɔːtəʊ
+x sexo  sˈiksəʊ
+y ayer  ˈaɪɚ
+z azul  æzˈʉːl
@@ -0,0 +1,25 @@
+a abre  a
+b baja  b
+ch ocho  tʃ
+d anda  d
+e aire  e
+f café  f
+g agua  g
+gn años ɲ
+i allí  i
+j bajo  j
+k acto  k
+l alex  l
+ll allá  ʎ
+m alma  m
+n andy  n
+o algo  o
+p copa  p
+r amor  r
+rr raro ɾ
+s base  s
+t alta  t
+u auto  u
+x sexo  x
+y ayer  ʝ
+z azul  z
@@ -1,22 +1,25 @@
-A A A
-B VEZ B E Z
-CH MUCHO M U CH O
-D DE D E
-E EL E L
-F FUE F U E
-G ALGO A L G O
-GN AñOS A GN O S
-I Y I
-K CON K O N
-L AL A L
-LL ELLA E LL A
-M ME M E
-N EN E N
-O O O
-P POR P O R
-R ERA E R A
-S ES E S
-T TE T E
-U SU S U
-Y YA Y A
-Z HACE A Z E
+a abre a b r e
+b baja b a j a
+ch ocho o ch o
+d anda a n d a
+e aire a i r e
+f café k a f e
+g agua a g u a
+gn años a gn o s
+i allí a ll i
+j bajo b a j o
+k acto a k t o
+l alex a l e x
+ll allá a ll a
+m alma a l m a
+n andy a n d i
+o algo a l g o
+p copa k o p a
+r amor a m o r
+rr raro rr a r o
+s base b a s e
+t alta a l t a
+u auto a u t o
+x sexo s e x o
+y ayer a y e r
+z azul a z u l
@@ -0,0 +1,38 @@
+@ adem  ə
+a adam  aː
+aa acht  ɑ
+b baan  b
+d andy  d
+e baby  eː
+ee alex  ɛ
+ei blij  ɛi
+er albert  ɛː
+eu deur  øː
+f cafÉ  f
+g chicago  g
+gg hoge  ɟ
+h haag  ɦ
+i bier  i
+ii bill  ɪ
+j doei  j
+k bank  k
+l alle  l
+m amen  m
+n anna  n
+nn bang  ŋ
+o auto  oː
+oo bond  ɔ
+ou bouw  ʌu
+p club  p
+r arme  r
+s arts  s
+ss chef  ʃ
+t band  t
+u boek  u
+ui buik  œy
+v dave  v
+w eeuw  ʋ
+x berg  x
+y duur  y
+yy brug  ʒ
+z deze  z
@@ -1035,7 +1035,8 @@ def word2wav(core: RhasspyCore, profile: Profile, args: Any) -> None:
    word_pronunciations = all_pronunciations[args.word]["pronunciations"]

    # Convert from CMU phonemes to eSpeak phonemes
-    espeak_str = core.get_word_phonemes(word_pronunciations[0]).phonemes
+    sphinx = word_pronunciations[0]
+    espeak_str = all_pronunciations[args.word]["phonemes"][sphinx]["espeak"]

    # Pronounce as WAV
    wav_data = core.speak_word(espeak_str).wav_data
@@ -5,6 +5,7 @@ import logging
 import subprocess
 import tempfile
 from typing import Dict, Tuple, List, Optional, Any
+from collections import defaultdict

 from .actor import RhasspyActor
 from .utils import read_dict, load_phoneme_map
@@ -35,7 +36,7 @@ class GetWordPhonemes:


 class WordPhonemes:
-    def __init__(self, word: str, phonemes: str) -> None:
+    def __init__(self, word: str, phonemes: Dict[str, str]) -> None:
        self.word = word
        self.phonemes = phonemes

@@ -98,8 +99,11 @@ class PhonetisaurusPronounce(RhasspyActor):
                self._logger.exception("pronounce")
                self.send(message.receiver or sender, PronunciationFailed(repr(e)))
        elif isinstance(message, GetWordPhonemes):
-            phonemes = self.translate_phonemes(message.word)
-            self.send(message.receiver or sender, WordPhonemes(message.word, phonemes))
+            phonemes = self.translate_phonemes([message.word])
+            self.send(
+                message.receiver or sender,
+                WordPhonemes(message.word, phonemes[message.word]),
+            )

    # -------------------------------------------------------------------------

@@ -129,20 +133,49 @@ class PhonetisaurusPronounce(RhasspyActor):

    # -------------------------------------------------------------------------

-    def translate_phonemes(self, phonemes: str) -> str:
-        # Load map from Sphinx to eSpeak phonemes
+    def translate_phonemes(self, sphinxes: List[str]) -> Dict[str, Dict[str, str]]:
+        from .lexconvert import convert
+
+        # Load map from Sphinx to IPA
        map_path = self.profile.read_path(
-            self.profile.get("text_to_speech.espeak.phoneme_map")
+            self.profile.get(
+                "text_to_speech.espeak.ipa.phoneme_map", "ipa_phonemes.txt"
+            )
        )

-        phoneme_map = load_phoneme_map(map_path)
+        if os.path.exists(map_path):
+            # Compute Sphinx <-> eSpeak map using IPA
+            ipa_map = load_phoneme_map(map_path)
+            phoneme_map = {
+                sphinx: convert(ipa, "unicode-ipa", "espeak")
+                for sphinx, ipa in ipa_map.items()
+            }
+        else:
+            # Fall back to Sphinx <-> eSpeak map
+            map_path = self.profile.read_path(
+                self.profile.get(
+                    "text_to_speech.espeak.phoneme_map", "espeak_phonemes.txt"
+                )
+            )

-        # Convert from Sphinx to espeak phonemes
-        espeak_str = "[['%s]]" % "".join(
-            phoneme_map.get(p, p) for p in phonemes.split()
-        )
+            phoneme_map = load_phoneme_map(map_path)
+            ipa_map = {
+                sphinx: convert(espeak, "espeak", "unicode-ipa")
+                for sphinx, espeak in phoneme_map.items()
+            }

-        return espeak_str
+        phonemes = defaultdict(dict)
+        for sphinx in sphinxes:
+            sphinx_parts = sphinx.split()
+
+            # Convert from Sphinx to espeak phonemes
+            phonemes[sphinx]["espeak"] = "[['%s]]" % "".join(
+                phoneme_map.get(p, p) for p in sphinx_parts
+            )
+
+            phonemes[sphinx]["ipa"] = "".join(ipa_map.get(p, p) for p in sphinx_parts)
+
+        return phonemes

    # -------------------------------------------------------------------------

@@ -168,20 +201,16 @@ class PhonetisaurusPronounce(RhasspyActor):
                    read_dict(dictionary_file, word_dict)

        pronunciations = self._lookup_words(words, word_dict, n)
+        all_pronunciations = []
+        for pron in pronunciations.values():
+            all_pronunciations.extend(pron["pronunciations"])

-        # Get phonemes from eSpeak
-        for word in words:
-            espeak_command = ["espeak", "-q", "-x"]
+        phonemes = self.translate_phonemes(all_pronunciations)

-            voice = self._get_voice()
-            if voice is not None:
-                espeak_command.extend(["-v", voice])
-
-            espeak_command.append(word)
-
-            self._logger.debug(repr(espeak_command))
-            espeak_str = subprocess.check_output(espeak_command).decode().strip()
-            pronunciations[word]["phonemes"] = espeak_str
+        for word in pronunciations.keys():
+            pronunciations[word]["phonemes"] = {}
+            for pron in pronunciations[word]["pronunciations"]:
+                pronunciations[word]["phonemes"][pron] = phonemes[pron]

        return pronunciations

@@ -40,7 +40,7 @@
                    <button type="button" class="btn btn-success" title="Add this pronunciation to your custom words" @click="addToCustomWords">Add</button>
                </div>
                <div class="col-xs-auto">
-                    <input id="espeak-phonemes" title="eSpeak Phonemes" class="form-control" type="text" v-model="espeakPhonemes" readonly>
+                    <input id="espeak-phonemes" title="IPA/eSpeak Phonemes" class="form-control" type="text" v-model="espeakPhonemes" readonly>
                </div>
                <div class="col-xs-auto">
                    <button type="button" class="btn btn-secondary" @click="pronouncePhonemesOrWord(phonemes)"
@@ -176,7 +176,7 @@
                         this.phonemes = this.pronunciations[0];
                     }

-                     this.espeakPhonemes = request.data.phonemes
+                     this.espeakPhonemes = request.data.phonemes[this.phonemes]['ipa']
                 })
                 .then(() => this.$parent.endAsync())
                 .catch(err => this.$parent.error(err))
Author	SHA1	Message	Date
Michael Hansen	6f4992e310	First pass at Dutch IPA	2019-05-19 20:52:43 -04:00
Michael Hansen	8245155fab	Working for English	2019-05-19 15:12:15 -04:00