Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6f4992e310 | |||
| 8245155fab |
@@ -1,175 +1,18 @@
|
||||
*
|
||||
!etc/qemu-*
|
||||
.git/
|
||||
.venv/
|
||||
node_modules/
|
||||
test/
|
||||
etc/homeassistant/config/.storage
|
||||
examples/typical/home-assistant/config/.storage
|
||||
examples/typical-intent/home-assistant/config/.storage
|
||||
examples/client-server/home-assistant/config/.storage
|
||||
examples/mqtt-hermes/home-assistant/config/.storage
|
||||
|
||||
!download/rhasspy-tools*
|
||||
!download/pocketsphinx-python.tar.gz
|
||||
!download/snowboy*
|
||||
!download/kaldi*
|
||||
base_dictionary.txt
|
||||
base_language_model.txt
|
||||
acoustic_model/
|
||||
|
||||
!requirements.txt
|
||||
!dist/
|
||||
!etc/wav
|
||||
profiles/en-kaldi/
|
||||
profiles/en-zamia/
|
||||
|
||||
!docker/run.sh
|
||||
!docker/rhasspy
|
||||
|
||||
!profiles/defaults.json
|
||||
|
||||
!profiles/zh/profile.json
|
||||
!profiles/zh/custom_words.txt
|
||||
!profiles/zh/espeak_phonemes.txt
|
||||
!profiles/zh/phoneme_examples.txt
|
||||
!profiles/zh/frequent_words.txt
|
||||
!profiles/zh/sentences.ini
|
||||
!profiles/zh/stop_words.txt
|
||||
!profiles/zh/slots
|
||||
!profiles/zh/slot_programs
|
||||
|
||||
!profiles/hi/profile.json
|
||||
!profiles/hi/custom_words.txt
|
||||
!profiles/hi/espeak_phonemes.txt
|
||||
!profiles/hi/phoneme_examples.txt
|
||||
!profiles/hi/frequent_words.txt
|
||||
!profiles/hi/sentences.ini
|
||||
!profiles/hi/stop_words.txt
|
||||
!profiles/hi/slots
|
||||
!profiles/hi/slot_programs
|
||||
|
||||
!profiles/el/profile.json
|
||||
!profiles/el/custom_words.txt
|
||||
!profiles/el/espeak_phonemes.txt
|
||||
!profiles/el/phoneme_examples.txt
|
||||
!profiles/el/frequent_words.txt
|
||||
!profiles/el/sentences.ini
|
||||
!profiles/el/stop_words.txt
|
||||
!profiles/el/slots
|
||||
!profiles/el/slot_programs
|
||||
|
||||
!profiles/es/profile.json
|
||||
!profiles/es/custom_words.txt
|
||||
!profiles/es/espeak_phonemes.txt
|
||||
!profiles/es/phoneme_examples.txt
|
||||
!profiles/es/frequent_words.txt
|
||||
!profiles/es/sentences.ini
|
||||
!profiles/es/stop_words.txt
|
||||
!profiles/es/slots
|
||||
!profiles/es/slot_programs
|
||||
|
||||
!profiles/it/profile.json
|
||||
!profiles/it/custom_words.txt
|
||||
!profiles/it/espeak_phonemes.txt
|
||||
!profiles/it/phoneme_examples.txt
|
||||
!profiles/it/frequent_words.txt
|
||||
!profiles/it/sentences.ini
|
||||
!profiles/it/stop_words.txt
|
||||
!profiles/it/slots
|
||||
!profiles/it/slot_programs
|
||||
|
||||
!profiles/ru/profile.json
|
||||
!profiles/ru/custom_words.txt
|
||||
!profiles/ru/espeak_phonemes.txt
|
||||
!profiles/ru/phoneme_examples.txt
|
||||
!profiles/ru/frequent_words.txt
|
||||
!profiles/ru/sentences.ini
|
||||
!profiles/ru/stop_words.txt
|
||||
!profiles/ru/slots
|
||||
!profiles/ru/slot_programs
|
||||
|
||||
!profiles/pt/profile.json
|
||||
!profiles/pt/custom_words.txt
|
||||
!profiles/pt/espeak_phonemes.txt
|
||||
!profiles/pt/phoneme_examples.txt
|
||||
!profiles/pt/frequent_words.txt
|
||||
!profiles/pt/sentences.ini
|
||||
!profiles/pt/stop_words.txt
|
||||
!profiles/pt/slots
|
||||
!profiles/pt/slot_programs
|
||||
|
||||
!profiles/sv/profile.json
|
||||
!profiles/sv/custom_words.txt
|
||||
!profiles/sv/espeak_phonemes.txt
|
||||
!profiles/sv/phoneme_examples.txt
|
||||
!profiles/sv/frequent_words.txt
|
||||
!profiles/sv/sentences.ini
|
||||
!profiles/sv/stop_words.txt
|
||||
!profiles/sv/slots
|
||||
!profiles/sv/slot_programs
|
||||
|
||||
!profiles/vi/profile.json
|
||||
!profiles/vi/custom_words.txt
|
||||
!profiles/vi/espeak_phonemes.txt
|
||||
!profiles/vi/phoneme_examples.txt
|
||||
!profiles/vi/frequent_words.txt
|
||||
!profiles/vi/sentences.ini
|
||||
!profiles/vi/stop_words.txt
|
||||
!profiles/vi/slots
|
||||
!profiles/vi/slot_programs
|
||||
|
||||
!profiles/ca/profile.json
|
||||
!profiles/ca/custom_words.txt
|
||||
!profiles/ca/espeak_phonemes.txt
|
||||
!profiles/ca/phoneme_examples.txt
|
||||
!profiles/ca/frequent_words.txt
|
||||
!profiles/ca/sentences.ini
|
||||
!profiles/ca/stop_words.txt
|
||||
!profiles/ca/slots
|
||||
!profiles/ca/slot_programs
|
||||
|
||||
!profiles/nl/profile.json
|
||||
!profiles/nl/custom_words.txt
|
||||
!profiles/nl/espeak_phonemes.txt
|
||||
!profiles/nl/phoneme_examples.txt
|
||||
!profiles/nl/frequent_words.txt
|
||||
!profiles/nl/sentences.ini
|
||||
!profiles/nl/stop_words.txt
|
||||
!profiles/nl/slots
|
||||
!profiles/nl/slot_programs
|
||||
!profiles/nl/kaldi/custom_words.txt
|
||||
!profiles/nl/kaldi/espeak_phonemes.txt
|
||||
!profiles/nl/kaldi/phoneme_examples.txt
|
||||
|
||||
!profiles/de/profile.json
|
||||
!profiles/de/custom_words.txt
|
||||
!profiles/de/espeak_phonemes.txt
|
||||
!profiles/de/phoneme_examples.txt
|
||||
!profiles/de/frequent_words.txt
|
||||
!profiles/de/sentences.ini
|
||||
!profiles/de/stop_words.txt
|
||||
!profiles/de/slots
|
||||
!profiles/de/slot_programs
|
||||
!profiles/de/kaldi/custom_words.txt
|
||||
!profiles/de/kaldi/espeak_phonemes.txt
|
||||
!profiles/de/kaldi/phoneme_examples.txt
|
||||
|
||||
!profiles/fr/profile.json
|
||||
!profiles/fr/custom_words.txt
|
||||
!profiles/fr/espeak_phonemes.txt
|
||||
!profiles/fr/phoneme_examples.txt
|
||||
!profiles/fr/frequent_words.txt
|
||||
!profiles/fr/sentences.ini
|
||||
!profiles/fr/stop_words.txt
|
||||
!profiles/fr/slots
|
||||
!profiles/fr/slot_programs
|
||||
!profiles/fr/kaldi/custom_words.txt
|
||||
!profiles/fr/kaldi/espeak_phonemes.txt
|
||||
!profiles/fr/kaldi/phoneme_examples.txt
|
||||
|
||||
!profiles/en/profile.json
|
||||
!profiles/en/custom_words.txt
|
||||
!profiles/en/espeak_phonemes.txt
|
||||
!profiles/en/phoneme_examples.txt
|
||||
!profiles/en/frequent_words.txt
|
||||
!profiles/en/sentences.ini
|
||||
!profiles/en/stop_words.txt
|
||||
!profiles/en/slots
|
||||
!profiles/en/slot_programs
|
||||
!profiles/en/kaldi/custom_words.txt
|
||||
!profiles/en/kaldi/espeak_phonemes.txt
|
||||
!profiles/en/kaldi/phoneme_examples.txt
|
||||
|
||||
!rhasspy/profile_schema.json
|
||||
!rhasspy/*.py
|
||||
!rhasspy/train/*.py
|
||||
!rhasspy/train/jsgf2fst/*.py
|
||||
!*.py
|
||||
!VERSION
|
||||
profiles/*/download/
|
||||
@@ -4,7 +4,6 @@ __pycache__/
|
||||
.venv/
|
||||
.ipynb_checkpoints/
|
||||
download/
|
||||
build/
|
||||
|
||||
# QEMU
|
||||
etc/qemu-arm-static
|
||||
@@ -27,11 +26,6 @@ profiles/*/*.umdl
|
||||
profiles/*/flair/
|
||||
profiles/*/*.pb
|
||||
profiles/*/*.pb.params
|
||||
profiles/*/snowboy/
|
||||
profiles/*/precise/
|
||||
profiles/*/porcupine/
|
||||
.doit.db
|
||||
vocab.txt
|
||||
|
||||
adapt_config.json
|
||||
tagged_sentences.md
|
||||
@@ -47,11 +41,6 @@ grammars/
|
||||
sentences/
|
||||
record/
|
||||
|
||||
# Tools
|
||||
tools/dist/
|
||||
tools/etc/
|
||||
opt/
|
||||
|
||||
# Third party
|
||||
etc/*.tar.gz
|
||||
etc/*.tar.xz
|
||||
@@ -64,15 +53,5 @@ tts/
|
||||
*.log
|
||||
.HA_VERSION
|
||||
|
||||
# Tests
|
||||
etc/test/pt/Google/
|
||||
etc/test/pt/PT*
|
||||
etc/test/pt/original/
|
||||
etc/test/vi/2*
|
||||
|
||||
# Misc
|
||||
workbench.xmi
|
||||
|
||||
# Debian
|
||||
site/
|
||||
debian/rhasspy-server*
|
||||
workbench.xmi
|
||||
@@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Michael Hansen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -1,13 +1,11 @@
|
||||
.PHONY: web-dist docker manifest docs-uml g2p check
|
||||
.PHONY: web-dist docker manifest docs-uml g2p
|
||||
SHELL := bash
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Docker
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
docker: web-dist docker-amd64 docker-armhf docker-aarch64
|
||||
|
||||
docker-deploy: docker-push manifest
|
||||
docker: web-dist docker-amd64 docker-armhf docker-aarch64 docker-push manifest
|
||||
|
||||
docker-amd64:
|
||||
docker build . -f docker/templates/dockerfiles/Dockerfile.prebuilt.alsa.all \
|
||||
@@ -24,7 +22,7 @@ docker-armhf:
|
||||
-t synesthesiam/rhasspy-server:armhf
|
||||
|
||||
docker-aarch64:
|
||||
docker build . -f docker/templates/dockerfiles/Dockerfile.prebuilt.alsa.all \
|
||||
docker build . -f docker/templates/dockerfiles/Dockerfile.from-source.alsa.all \
|
||||
--build-arg BUILD_ARCH=aarch64 \
|
||||
--build-arg CPU_ARCH=arm64v8 \
|
||||
--build-arg BUILD_FROM=arm64v8/ubuntu:bionic \
|
||||
@@ -51,9 +49,6 @@ manifest:
|
||||
|
||||
web-dist:
|
||||
yarn build
|
||||
mkdir -p download
|
||||
rm -f download/rhasspy-web-dist.tar.gz
|
||||
tar -czf download/rhasspy-web-dist.tar.gz dist/
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Documentation
|
||||
@@ -78,12 +73,3 @@ g2p: $(G2P_MODELS)
|
||||
|
||||
%/g2p.fst: %/base_dictionary.txt
|
||||
./make-g2p.sh $< $@
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Testing
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
check:
|
||||
flake8 --exclude=lexconvert.py app.py test.py rhasspy/*.py
|
||||
pylint --ignore=lexconvert.py app.py test.py rhasspy/*.py
|
||||
mypy app.py test.py rhasspy/*.py
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
Rhasspy (pronounced RAH-SPEE) is an offline, [multilingual](#supported-languages) voice assistant toolkit inspired by [Jasper](https://jasperproject.github.io/) that works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
|
||||
|
||||
* [Documentation](https://rhasspy.readthedocs.io/)
|
||||
* [Discussion](https://community.rhasspy.org)
|
||||
* [Video Introduction](https://www.youtube.com/watch?v=ijKTR_GqWwA)
|
||||
* [Hass.IO Add-On Repository](https://github.com/synesthesiam/hassio-addons)
|
||||
* [Discussion](https://community.home-assistant.io/t/rhasspy-offline-voice-assistant-toolkit/60862)
|
||||
|
||||
Rhasspy transcribes voice commands into [JSON](https://json.org) events that can trigger actions in home automation software, like [Home Assistant automations](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](https://rhasspy.readthedocs.io/en/latest/usage/#node-red). You define custom voice commands in a [profile](https://rhasspy.readthedocs.io/en/latest/profiles/) using a [specialized template syntax](https://rhasspy.readthedocs.io/en/latest/training/#sentencesini), and Rhasspy takes care of the rest.
|
||||
Rhasspy transforms voice commands into [JSON](https://json.org) events that can trigger actions in home automation software, like [Home Assistant automations](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](https://rhasspy.readthedocs.io/en/latest/usage/#node-red). You define custom voice commands in a [profile](profiles.md) using a [specialized template syntax](https://rhasspy.readthedocs.io/en/latest/training/#sentencesini), and Rhasspy takes care of the rest.
|
||||
|
||||
To run Rhasspy with the English (en) profile using Docker:
|
||||
|
||||
@@ -38,29 +38,26 @@ Rhasspy currently supports the following languages:
|
||||
* Mandarin (`zh`)
|
||||
* Vietnamese (`vi`)
|
||||
* Portuguese (`pt`)
|
||||
* Swedish (`sv`)
|
||||
* Catalan (`ca`)
|
||||
|
||||
The table below summarizes language support across the various supporting technologies that Rhasspy uses:
|
||||
|
||||
| Category | Name | Offline? | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt | sv | ca |
|
||||
| -------- | ------ | -------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| **Wake Word** | [pocketsphinx](https://rhasspy.readthedocs.io/en/latest/wake-word/#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | ✓ |
|
||||
| | [porcupine](https://rhasspy.readthedocs.io/en/latest/wake-word.md#porcupine) | ✓ | ✓ | | | | | | | | | | | | | |
|
||||
| | [snowboy](https://rhasspy.readthedocs.io/en/latest/wake-word/#snowboy) | *requires account* | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| | [precise](https://rhasspy.readthedocs.io/en/latest/wake-word/#mycroft-precise) | ✓ | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| **Speech to Text** | [pocketsphinx](https://rhasspy.readthedocs.io/en/latest/speech-to-text/#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | ✓ |
|
||||
| | [kaldi](https://rhasspy.readthedocs.io/en/latest/speech-to-text/#kaldi) | ✓ | | | | | | | | | | | ✓ | | ✓ | |
|
||||
| **Intent Recognition** | [fsticuffs](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#fsticuffs) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [fuzzywuzzy](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#fuzzywuzzy) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [adapt](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#mycroft-adapt) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flair](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#flair) | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | | | | | ✓ | | ✓ |
|
||||
| | [rasaNLU](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#rasanlu) | *needs extra software* | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| **Text to Speech** | [espeak](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flite](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#flite) | ✓ | ✓ | | | | | | | | ✓ | | | | | |
|
||||
| | [picotts](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#picotts) | ✓ | ✓ | | | | | | | | | | | | | |
|
||||
| | [marytts](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#marytts) | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | | | |
|
||||
| | [wavenet](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#google-wavenet) | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | ✓ | |
|
||||
| Category | Name | Offline? | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt |
|
||||
| -------- | ------ | -------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| **Wake Word** | [pocketsphinx](https://rhasspy.readthedocs.io/en/latest/wake-word/#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | |
|
||||
| | [snowboy](https://rhasspy.readthedocs.io/en/latest/wake-word/#snowboy) | *requires account* | ✓ | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| | [precise](https://rhasspy.readthedocs.io/en/latest/wake-word/#mycroft-precise) | ✓ | ✓ | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| **Speech to Text** | [pocketsphinx](https://rhasspy.readthedocs.io/en/latest/speech-to-text/#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | |
|
||||
| | [kaldi](https://rhasspy.readthedocs.io/en/latest/speech-to-text/#kaldi) | ✓ | | | | | | | | | | | ✓ | ✓ |
|
||||
| **Intent Recognition** | [fsticuffs](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#fsticuffs) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [fuzzywuzzy](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#fuzzywuzzy) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [adapt](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#mycroft-adapt) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flair](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#flair) | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | | | | | ✓ |
|
||||
| | [rasaNLU](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#rasanlu) | *needs extra software* | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| **Text to Speech** | [espeak](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flite](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#flite) | ✓ | ✓ | | | | | | | | ✓ | | | |
|
||||
| | [picotts](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#picotts) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [marytts](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#marytts) | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | |
|
||||
| | [wavenet](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#google-wavenet) | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ |
|
||||
|
||||
• - yes, but requires training/customization
|
||||
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
defaults:
|
||||
-
|
||||
scope:
|
||||
path: ""
|
||||
values:
|
||||
render_with_liquid: false
|
||||
@@ -36,7 +36,7 @@ def main():
|
||||
|
||||
# Load dictionary
|
||||
word_dict = {}
|
||||
logging.info("Loading dictionary from %s", args.dictionary)
|
||||
logging.info("Loading dictionary from %s" % args.dictionary)
|
||||
with open(args.dictionary, "r") as dict_file:
|
||||
read_dict(dict_file, word_dict)
|
||||
|
||||
@@ -53,7 +53,7 @@ def main():
|
||||
all_words.append(word)
|
||||
|
||||
assert len(phonemes) == len(phoneme_words), "Not enough words to cover phonemes"
|
||||
logging.debug("Phonemes: %s", ", ".join(phoneme_words))
|
||||
logging.debug("Phonemes: %s" % ", ".join(phoneme_words.keys()))
|
||||
|
||||
phoneme_hyps = defaultdict(lambda: defaultdict(float))
|
||||
|
||||
@@ -66,7 +66,7 @@ def main():
|
||||
phoneme_hyps[phoneme][hyp] = count
|
||||
|
||||
# Sample words from the dictionary
|
||||
logging.info("Starting %s sample(s)", args.samples)
|
||||
logging.info("Starting %s sample(s)" % args.samples)
|
||||
phoneme_futures = {}
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Schedule eSpeak word samples
|
||||
@@ -80,7 +80,7 @@ def main():
|
||||
for i, future in enumerate(as_completed(phoneme_futures)):
|
||||
if i % len(phonemes) == 0:
|
||||
logging.info(
|
||||
"Sample %s of %s", (i // len(phonemes) + 1), args.samples
|
||||
"Sample %s of %s" % ((i // len(phonemes) + 1), args.samples)
|
||||
)
|
||||
|
||||
phoneme = phoneme_futures[future]
|
||||
@@ -113,14 +113,14 @@ def main():
|
||||
best = {}
|
||||
todo = set(phonemes)
|
||||
used = set()
|
||||
while todo:
|
||||
while len(todo) > 0:
|
||||
for phoneme in list(todo):
|
||||
best_to_worst = sorted(
|
||||
phoneme_hyps[phoneme].items(), key=lambda kv: kv[1], reverse=True
|
||||
)
|
||||
|
||||
for hyp, count in best_to_worst:
|
||||
if hyp not in used:
|
||||
if not hyp in used:
|
||||
best[phoneme] = hyp
|
||||
used.add(hyp)
|
||||
todo.remove(phoneme)
|
||||
@@ -165,7 +165,7 @@ def read_dict(dict_file, word_dict):
|
||||
"""
|
||||
for line in dict_file:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
if len(line) == 0:
|
||||
continue
|
||||
|
||||
word, pronounce = re.split("[ ]+", line, maxsplit=1)
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import calendar
|
||||
import json
|
||||
import locale
|
||||
from pathlib import Path
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser("generate-slots")
|
||||
parser.add_argument("profiles_dir")
|
||||
args = parser.parse_args()
|
||||
|
||||
for profile_dir in Path(args.profiles_dir).glob("*"):
|
||||
if not profile_dir.is_dir():
|
||||
continue
|
||||
|
||||
with open(profile_dir / "profile.json", "r") as profile_file:
|
||||
profile = json.load(profile_file)
|
||||
locale_name = profile["locale"] + ".UTF-8"
|
||||
locale.setlocale(locale.LC_ALL, locale_name)
|
||||
print(locale_name)
|
||||
|
||||
slots_dir = profile_dir / "slots" / "rhasspy"
|
||||
slots_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Day names
|
||||
(slots_dir / "days").write_text('\n'.join(calendar.day_name))
|
||||
|
||||
# Month names
|
||||
(slots_dir / "months").write_text('\n'.join(filter(None, calendar.month_name)))
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -28,8 +28,6 @@ def main():
|
||||
"hin": "hi",
|
||||
"ell": "el",
|
||||
"por": "pt",
|
||||
"swe": "sv",
|
||||
"cat": "ca",
|
||||
}
|
||||
|
||||
for language in languages:
|
||||
@@ -38,7 +36,9 @@ def main():
|
||||
|
||||
if not os.path.exists(html_path):
|
||||
# Download
|
||||
url = f"https://www.ezglot.com/most-frequently-used-words.php?l={language}&submit=Select"
|
||||
url = "https://www.ezglot.com/most-frequently-used-words.php?l={0}&submit=Select".format(
|
||||
language
|
||||
)
|
||||
print(f"Downloading from {url}")
|
||||
|
||||
with open(html_path, "w") as html_file:
|
||||
|
||||
@@ -26,10 +26,10 @@ def main():
|
||||
with open(args.dictionary, "r") as dict_file:
|
||||
for line in dict_file:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
if len(line) == 0:
|
||||
continue
|
||||
|
||||
parts = re.split(r"[\t ]+", line)
|
||||
parts = re.split(r"[ ]+", line)
|
||||
word = parts[0]
|
||||
|
||||
if "(" in word:
|
||||
@@ -44,11 +44,11 @@ def main():
|
||||
|
||||
# Pick unique example words for every phoneme
|
||||
used_words = set()
|
||||
for phoneme in sorted(examples):
|
||||
for phoneme in sorted(examples.keys()):
|
||||
# Choose the shortest, unused example word for this phoneme.
|
||||
# Exclude words with 3 or fewer letters.
|
||||
for word, pron in sorted(examples[phoneme], key=lambda kv: len(kv[0])):
|
||||
if len(word) > 3 and (word not in used_words):
|
||||
if len(word) > 3 and (not word in used_words):
|
||||
# Output format is:
|
||||
# phoneme word pronunciation
|
||||
print(phoneme, word, " ".join(pron))
|
||||
|
||||
@@ -31,10 +31,10 @@ def main():
|
||||
with open(args.dictionary, "r") as dict_file:
|
||||
for line in dict_file:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
if len(line) == 0:
|
||||
continue
|
||||
|
||||
parts = re.split(r"[\t ]+", line)
|
||||
parts = re.split(r"[ ]+", line)
|
||||
word = parts[0].lower()
|
||||
|
||||
if ("(" in word) or (word in freq_phonemes):
|
||||
@@ -70,7 +70,7 @@ def main():
|
||||
with open(args.frequent_phones, "r") as freq_phones_file:
|
||||
for line in freq_phones_file:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
if len(line) == 0:
|
||||
continue
|
||||
|
||||
parts = re.split(r"[ ]+", line, maxsplit=1)
|
||||
@@ -82,7 +82,7 @@ def main():
|
||||
mappings = []
|
||||
bad_espeak = (":", ";", "-", "#")
|
||||
for word, espeak in freq_espeak.items():
|
||||
if word not in freq_phonemes:
|
||||
if not word in freq_phonemes:
|
||||
# No pronunciation
|
||||
continue
|
||||
|
||||
@@ -134,7 +134,7 @@ def main():
|
||||
m = 4
|
||||
for p in all_phonemes:
|
||||
candidate_counts = [
|
||||
(e, phoneme_counts[(cp, e)]) for (cp, e) in phoneme_counts if cp == p
|
||||
(e, phoneme_counts[(cp, e)]) for (cp, e) in phoneme_counts.keys() if cp == p
|
||||
]
|
||||
candidate_counts = [ec for ec in candidate_counts if ec[1] > n]
|
||||
candidate_counts = sorted(candidate_counts, key=lambda x: x[1], reverse=True)
|
||||
@@ -213,7 +213,7 @@ assign(P, E) :- maybe_assign(P, E).
|
||||
predicates = []
|
||||
for line in proc.stdout.splitlines():
|
||||
line = line.decode().strip()
|
||||
if not line:
|
||||
if len(line) == 0:
|
||||
continue
|
||||
elif line.startswith("OPTIMUM FOUND"):
|
||||
break
|
||||
|
||||
@@ -20,7 +20,7 @@ def main():
|
||||
with open(dict_path, "r") as dict_file:
|
||||
for line in dict_file:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
if len(line) == 0:
|
||||
continue
|
||||
|
||||
parts = re.split(r"[ ]+", line)
|
||||
|
||||
@@ -12,7 +12,7 @@ def main():
|
||||
with open(sys.argv[1], "r") as dict_file:
|
||||
for line in dict_file:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
if len(line) == 0:
|
||||
continue
|
||||
|
||||
parts = re.split(r"[ ]+", line)
|
||||
|
||||
@@ -1,29 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
# Directory of *this* script
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
DIR="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
venv="${this_dir}/../.venv"
|
||||
if [[ ! -d "${venv}" ]]; then
|
||||
echo "Missing virtual environment at ${venv}"
|
||||
echo "Did you run create-venv.sh?"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
source "${venv}/bin/activate"
|
||||
|
||||
# Force .venv/lib to be used
|
||||
export LD_LIBRARY_PATH="${venv}/lib:${LD_LIBRARY_PATH}"
|
||||
|
||||
# Use local Kaldi
|
||||
if [[ -d "${this_dir}/opt/kaldi" ]]; then
|
||||
export KALDI_PREFIX="${this_dir}/opt"
|
||||
fi
|
||||
cd "$DIR/.."
|
||||
source .venv/bin/activate
|
||||
|
||||
# Path to sphinxtrain tools
|
||||
if [[ -d "/usr/lib/sphinxtrain" ]]; then
|
||||
export PATH="/usr/lib/sphinxtrain:${PATH}"
|
||||
export PATH="/usr/lib/sphinxtrain:$PATH"
|
||||
fi
|
||||
|
||||
cd "${this_dir}/.." && python3 -m rhasspy "$@"
|
||||
python3 -m rhasspy "$@"
|
||||
|
||||
@@ -1,411 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
CPU_ARCH="$(uname --m)"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Command-line Arguments
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
. "${this_dir}/etc/shflags"
|
||||
|
||||
DEFINE_string 'venv' "${this_dir}/.venv" 'Path to create virtual environment'
|
||||
DEFINE_string 'download-dir' "${this_dir}/download" 'Directory to cache downloaded files'
|
||||
DEFINE_string 'build-dir' "${this_dir}/build_${CPU_ARCH}" 'Directory to build dependencies in'
|
||||
DEFINE_boolean 'system' true 'Install system dependencies'
|
||||
DEFINE_boolean 'flair' false 'Install flair'
|
||||
DEFINE_boolean 'precise' false 'Install Mycroft Precise'
|
||||
DEFINE_boolean 'adapt' false 'Install Mycroft Adapt'
|
||||
DEFINE_boolean 'google' false 'Install Google Text to Speech'
|
||||
DEFINE_boolean 'kaldi' false 'Install Kaldi'
|
||||
DEFINE_boolean 'offline' false "Don't download anything"
|
||||
DEFINE_boolean 'web' true "Build Vue web interface with yarn"
|
||||
DEFINE_boolean 'sudo' true "Use sudo for apt"
|
||||
DEFINE_integer 'make-threads' 4 'Number of threads to use with make' 'j'
|
||||
DEFINE_string 'python' 'python3' 'Path to Python executable'
|
||||
|
||||
FLAGS "$@" || exit $?
|
||||
eval set -- "${FLAGS_ARGV}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Default Settings
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
set -e
|
||||
|
||||
python="${FLAGS_python}"
|
||||
venv="${FLAGS_venv}"
|
||||
|
||||
download_dir="${FLAGS_download_dir}"
|
||||
mkdir -p "${download_dir}"
|
||||
echo "Download directory: ${download_dir}"
|
||||
|
||||
build_dir="${FLAGS_build_dir}"
|
||||
mkdir -p "${build_dir}"
|
||||
echo "Build directory: ${build_dir}"
|
||||
|
||||
if [[ "${FLAGS_system}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_system='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_flair}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_flair='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_precise}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_precise='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_adapt}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_adapt='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_kaldi}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_kaldi='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_google}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_google='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_offline}" -eq "${FLAGS_TRUE}" ]]; then
|
||||
offline='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_web}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_web='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_sudo}" -eq "${FLAGS_TRUE}" ]]; then
|
||||
function run_sudo {
|
||||
sudo "$@"
|
||||
}
|
||||
else
|
||||
function run_sudo {
|
||||
"$@"
|
||||
}
|
||||
fi
|
||||
|
||||
make_threads="${FLAGS_make_threads}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Create a temporary directory for building stuff
|
||||
temp_dir="$(mktemp -d)"
|
||||
|
||||
function cleanup {
|
||||
rm -rf "${temp_dir}"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
function maybe_download {
|
||||
if [[ ! -s "$2" ]]; then
|
||||
if [[ -n "${offline}" ]]; then
|
||||
echo "Need to download $1 but offline."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$2")"
|
||||
curl -sSfL -o "$2" "$1" || { echo "Can't download $1"; exit 1; }
|
||||
echo "$1 => $2"
|
||||
fi
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Checking required programs"
|
||||
|
||||
if [[ -z "${no_web}" ]]; then
|
||||
if [[ ! -n "$(command -v yarn)" ]]; then
|
||||
echo "Please install yarn to continue (https://yarnpkg.com)"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_system}" ]]; then
|
||||
echo "Installing system dependencies"
|
||||
|
||||
run_sudo apt-get update
|
||||
run_sudo apt-get install --no-install-recommends \
|
||||
python3 python3-pip python3-venv python3-dev \
|
||||
python \
|
||||
build-essential autoconf autoconf-archive libtool automake bison \
|
||||
sox espeak flite swig portaudio19-dev \
|
||||
libatlas-base-dev \
|
||||
gfortran \
|
||||
sphinxbase-utils sphinxtrain pocketsphinx \
|
||||
jq checkinstall unzip xz-utils \
|
||||
curl \
|
||||
lame
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Downloading dependencies"
|
||||
|
||||
# Python-Pocketsphinx
|
||||
pocketsphinx_file="${download_dir}/pocketsphinx-python.tar.gz"
|
||||
if [[ ! -s "${pocketsphinx_file}" ]]; then
|
||||
pocketsphinx_url='https://github.com/synesthesiam/pocketsphinx-python/releases/download/v1.0/pocketsphinx-python.tar.gz'
|
||||
echo "Downloading pocketsphinx (${pocketsphinx_url})"
|
||||
maybe_download "${pocketsphinx_url}" "${pocketsphinx_file}"
|
||||
fi
|
||||
|
||||
# OpenFST
|
||||
openfst_dir="${build_dir}/openfst-1.6.9"
|
||||
if [[ ! -d "${openfst_dir}/build" ]]; then
|
||||
openfst_file="${download_dir}/openfst-1.6.9.tar.gz"
|
||||
|
||||
if [[ ! -s "${openfst_file}" ]]; then
|
||||
openfst_url='http://openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.9.tar.gz'
|
||||
echo "Downloading openfst (${openfst_url})"
|
||||
maybe_download "${openfst_url}" "${openfst_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Opengrm
|
||||
opengrm_dir="${build_dir}/opengrm-ngram-1.3.4"
|
||||
if [[ ! -d "${opengrm_dir}/build" ]]; then
|
||||
opengrm_file="${download_dir}/opengrm-ngram-1.3.4.tar.gz"
|
||||
|
||||
if [[ ! -s "${opengrm_file}" ]]; then
|
||||
opengrm_url='http://www.opengrm.org/twiki/pub/GRM/NGramDownload/opengrm-ngram-1.3.4.tar.gz'
|
||||
echo "Downloading opengrm (${opengrm_url})"
|
||||
maybe_download "${opengrm_url}" "${opengrm_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Phonetisaurus
|
||||
phonetisaurus_dir="${build_dir}/phonetisaurus"
|
||||
if [[ ! -d "${phonetisaurus_dir}/build" ]]; then
|
||||
phonetisaurus_file="${download_dir}/phonetisaurus-2019.tar.gz"
|
||||
|
||||
if [[ ! -s "${phonetisaurus_file}" ]]; then
|
||||
phonetisaurus_url='https://github.com/synesthesiam/docker-phonetisaurus/raw/master/download/phonetisaurus-2019.tar.gz'
|
||||
echo "Downloading phonetisaurus (${phonetisaurus_url})"
|
||||
maybe_download "${phonetisaurus_url}" "${phonetisaurus_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Kaldi
|
||||
kaldi_dir="${this_dir}/opt/kaldi"
|
||||
if [[ -z "${no_kaldi}" && ! -d "${kaldi_dir}" ]]; then
|
||||
install libatlas-base-dev libatlas3-base gfortran
|
||||
run_sudo ldconfig
|
||||
kaldi_file="${download_dir}/kaldi-2019.tar.gz"
|
||||
|
||||
if [[ ! -s "${kaldi_file}" ]]; then
|
||||
kaldi_url='https://github.com/kaldi-asr/kaldi/archive/master.tar.gz'
|
||||
echo "Downloading kaldi (${kaldi_url})"
|
||||
maybe_download "${kaldi_url}" "${kaldi_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Re-create virtual environment
|
||||
echo "Creating virtual environment"
|
||||
rm -rf "${venv}"
|
||||
"${python}" -m venv "${venv}"
|
||||
source "${venv}/bin/activate"
|
||||
pip3 install wheel setuptools
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# openfst
|
||||
# http://www.openfst.org
|
||||
#
|
||||
# Required to build languag models and do intent recognition.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ ! -d "${openfst_dir}/build" ]]; then
|
||||
echo "Building openfst (${openfst_file})"
|
||||
tar -C "${build_dir}" -xf "${openfst_file}" && \
|
||||
cd "${openfst_dir}" && \
|
||||
./configure "--prefix=${openfst_dir}/build" \
|
||||
--enable-far \
|
||||
--disable-static \
|
||||
--enable-shared \
|
||||
--enable-ngram-fsts && \
|
||||
make -j "${make_threads}" && \
|
||||
make install
|
||||
fi
|
||||
|
||||
# Copy build artifacts into virtual environment
|
||||
cp -R "${openfst_dir}"/build/include/* "${venv}/include/"
|
||||
cp -R "${openfst_dir}"/build/lib/*.so* "${venv}/lib/"
|
||||
cp -R "${openfst_dir}"/build/bin/* "${venv}/bin/"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# opengrm
|
||||
# http://www.opengrm.org/twiki/bin/view/GRM/NGramLibrary
|
||||
#
|
||||
# Required to build language models.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# opengrm
|
||||
if [[ ! -d "${opengrm_dir}/build" ]]; then
|
||||
echo "Building opengrm (${opengrm_file})"
|
||||
export CXXFLAGS="-I${venv}/include"
|
||||
export LDFLAGS="-L${venv}/lib"
|
||||
tar -C "${build_dir}" -xf "${opengrm_file}" && \
|
||||
cd "${opengrm_dir}" && \
|
||||
./configure "--prefix=${opengrm_dir}/build" && \
|
||||
make -j "${make_threads}" && \
|
||||
make install
|
||||
fi
|
||||
|
||||
# Copy build artifacts into virtual environment
|
||||
cp -R "${opengrm_dir}"/build/bin/* "${venv}/bin/"
|
||||
cp -R "${opengrm_dir}"/build/include/* "${venv}/include/"
|
||||
cp -R "${opengrm_dir}"/build/lib/*.so* "${venv}/lib/"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# phonetisaurus
|
||||
# https://github.com/AdolfVonKleist/Phonetisaurus
|
||||
#
|
||||
# Required to guess word pronunciations.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ ! -d "${phonetisaurus_dir}/build" ]]; then
|
||||
echo "Installing phonetisaurus (${phonetisaurus_file})"
|
||||
tar -C "${build_dir}" -xf "${phonetisaurus_file}" && \
|
||||
cd "${phonetisaurus_dir}" && \
|
||||
./configure "--prefix=${phonetisaurus_dir}/build" \
|
||||
--with-openfst-includes="${venv}/include" \
|
||||
--with-openfst-libs="${venv}/lib" && \
|
||||
make -j "${make_threads}" && \
|
||||
make install
|
||||
fi
|
||||
|
||||
# Copy build artifacts into virtual environment
|
||||
cp -R "${phonetisaurus_dir}"/build/bin/* "${venv}/bin/"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# kaldi
|
||||
# https://kaldi-asr.org
|
||||
#
|
||||
# Required for speech recognition with Kaldi-based profiles.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_kaldi}" && ! -f "${kaldi_dir}/src/online2bin/online2-wav-nnet3-latgen-faster" ]]; then
|
||||
echo "Installing kaldi (${kaldi_file})"
|
||||
|
||||
# armhf
|
||||
if [[ -f '/usr/lib/arm-linux-gnueabihf/libatlas.so' ]]; then
|
||||
# Kaldi install doesn't check here, despite in being in ldconfig
|
||||
export ATLASLIBDIR='/usr/lib/arm-linux-gnueabihf'
|
||||
fi
|
||||
|
||||
# aarch64
|
||||
if [[ -f '/usr/lib/aarch64-linux-gnu/libatlas.so' ]]; then
|
||||
# Kaldi install doesn't check here, despite in being in ldconfig
|
||||
export ATLASLIBDIR='/usr/lib/aarch64-linux-gnu'
|
||||
fi
|
||||
|
||||
tar -C "${build_dir}" -xf "${kaldi_file}" && \
|
||||
cp "${this_dir}/etc/linux_atlas_aarch64.mk" "${kaldi_dir}/src/makefiles/" && \
|
||||
cd "${kaldi_dir}/tools" && \
|
||||
make -j "${make_threads}" && \
|
||||
cd "${kaldi_dir}/src" && \
|
||||
./configure --shared --mathlib=ATLAS --use-cuda=no && \
|
||||
make depend -j "${make_threads}" && \
|
||||
make -j "${make_threads}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Python requirements
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Installing Python requirements"
|
||||
|
||||
"${python}" -m pip install requests
|
||||
|
||||
# pytorch is not available on ARM
|
||||
case "${CPU_ARCH}" in
|
||||
armv7l|arm64v8)
|
||||
no_flair="true" ;;
|
||||
esac
|
||||
|
||||
requirements_file="${temp_dir}/requirements.txt"
|
||||
temp_requirements_file="${temp_dir}/temp_requirements.txt"
|
||||
cp "${this_dir}/requirements.txt" "${requirements_file}"
|
||||
|
||||
# Exclude requirements
|
||||
if [[ -n "${no_flair}" ]]; then
|
||||
echo "Excluding flair from virtual environment"
|
||||
sed '/^flair/d' "${requirements_file}" > "${temp_requirements_file}" &&
|
||||
mv "${temp_requirements_file}" "${requirements_file}"
|
||||
|
||||
fi
|
||||
|
||||
if [[ -n "${no_precise}" ]]; then
|
||||
echo "Excluding Mycroft Precise from virtual environment"
|
||||
sed '/^precise-runner/d' "${requirements_file}" > "${temp_requirements_file}" &&
|
||||
mv "${temp_requirements_file}" "${requirements_file}"
|
||||
|
||||
fi
|
||||
|
||||
if [[ -n "${no_adapt}" ]]; then
|
||||
echo "Excluding Mycroft Adapt from virtual environment"
|
||||
sed '/^adapt-parser/d' "${requirements_file}" > "${temp_requirements_file}" &&
|
||||
mv "${temp_requirements_file}" "${requirements_file}"
|
||||
|
||||
fi
|
||||
|
||||
if [[ -n "${no_google}" ]]; then
|
||||
echo "Excluding Google Text to Speech from virtual environment"
|
||||
sed '/^google-cloud-texttospeech/d' "${requirements_file}" > "${temp_requirements_file}" &&
|
||||
mv "${temp_requirements_file}" "${requirements_file}"
|
||||
fi
|
||||
|
||||
# Install everything except openfst first
|
||||
sed '/^openfst/d' "${requirements_file}" > "${temp_requirements_file}" &&
|
||||
mv "${temp_requirements_file}" "${requirements_file}"
|
||||
|
||||
"${python}" -m pip install -r "${requirements_file}"
|
||||
|
||||
echo "Installing Python openfst wrapper"
|
||||
"${python}" -m pip install \
|
||||
--global-option=build_ext \
|
||||
--global-option="-I${venv}/include" \
|
||||
--global-option="-L${venv}/lib" \
|
||||
-r <(grep '^openfst' "${this_dir}/requirements.txt")
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Pocketsphinx for Python
|
||||
# https://github.com/cmusphinx/pocketsphinx
|
||||
#
|
||||
# Speech to text for most profiles.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
pocketsphinx_file="${download_dir}/pocketsphinx-python.tar.gz"
|
||||
echo "Installing Python pocketsphinx (${pocketsphinx_file})"
|
||||
|
||||
"${python}" -m pip install "${pocketsphinx_file}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Snowboy
|
||||
# https://snowboy.kitt.ai
|
||||
#
|
||||
# Wake word system
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
case "${CPU_ARCH}" in
|
||||
x86_64|armv7l)
|
||||
snowboy_file="${download_dir}/snowboy-1.3.0.tar.gz"
|
||||
echo "Installing snowboy (${snowboy_file})"
|
||||
"${python}" -m pip install "${snowboy_file}"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Not installing snowboy (${CPU_ARCH} not supported)"
|
||||
esac
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_web}" ]]; then
|
||||
echo "Building web interface"
|
||||
cd "${this_dir}" && yarn install && yarn build
|
||||
fi
|
||||
@@ -1,157 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Command-line Arguments
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
. "${this_dir}/etc/shflags"
|
||||
|
||||
DEFINE_string 'venv' "${this_dir}/.venv" 'Path to create virtual environment'
|
||||
DEFINE_string 'download-dir' "${this_dir}/download" 'Directory to cache downloaded files'
|
||||
DEFINE_boolean 'system' true 'Install system dependencies'
|
||||
DEFINE_boolean 'flair' false 'Install flair'
|
||||
DEFINE_boolean 'precise' false 'Install Mycroft Precise'
|
||||
DEFINE_boolean 'adapt' true 'Install Mycroft Adapt'
|
||||
DEFINE_boolean 'google' false 'Install Google Text to Speech'
|
||||
DEFINE_boolean 'kaldi' true 'Install Kaldi'
|
||||
DEFINE_boolean 'offline' false "Don't download anything"
|
||||
DEFINE_integer 'make-threads' 4 'Number of threads to use with make' 'j'
|
||||
DEFINE_string 'python' '' 'Path to Python executable'
|
||||
|
||||
FLAGS "$@" || exit $?
|
||||
eval set -- "${FLAGS_ARGV}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Default Settings
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
set -e
|
||||
|
||||
venv="${FLAGS_venv}"
|
||||
download_dir="${FLAGS_download_dir}"
|
||||
# Directory of *this* script
|
||||
DIR="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
# Place where downloaded artifacts are stored
|
||||
download_dir="${DIR}/download"
|
||||
mkdir -p "${download_dir}"
|
||||
|
||||
if [[ "${FLAGS_system}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_system='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_flair}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_flair='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_precise}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_precise='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_adapt}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_adapt='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_kaldi}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_kaldi='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_google}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_google='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_offline}" -eq "${FLAGS_TRUE}" ]]; then
|
||||
offline='true'
|
||||
fi
|
||||
|
||||
make_threads="${FLAGS_make_threads}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Create a temporary directory for building stuff
|
||||
temp_dir="$(mktemp -d)"
|
||||
|
||||
function cleanup {
|
||||
rm -rf "${temp_dir}"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
function maybe_download {
|
||||
if [[ ! -s "$2" ]]; then
|
||||
if [[ -n "${offline}" ]]; then
|
||||
echo "Need to download $1 but offline."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$2")"
|
||||
curl -sSfL -o "$2" "$1" || { echo "Can't download $1"; exit 1; }
|
||||
echo "$1 => $2"
|
||||
fi
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Debian dependencies
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_system}" ]]; then
|
||||
echo "Installing system dependencies"
|
||||
sudo apt-get update
|
||||
sudo apt-get install --no-install-recommends \
|
||||
python3 python3-pip python3-venv python3-dev \
|
||||
python \
|
||||
build-essential autoconf autoconf-archive libtool automake bison \
|
||||
sox espeak flite swig portaudio19-dev \
|
||||
libatlas-base-dev \
|
||||
gfortran \
|
||||
sphinxbase-utils sphinxtrain pocketsphinx \
|
||||
jq checkinstall unzip xz-utils \
|
||||
curl \
|
||||
lame
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Python >= 3.6
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${FLAGS_python}" ]]; then
|
||||
# Auto-detect Python
|
||||
if [[ -n "$(command -v python3.8)" ]]; then
|
||||
PYTHON='python3.8'
|
||||
elif [[ -n "$(command -v python3.7)" ]]; then
|
||||
PYTHON='python3.7'
|
||||
elif [[ -n "$(command -v python3.6)" ]]; then
|
||||
PYTHON='python3.6'
|
||||
else
|
||||
echo "Installing Python 3.6 from source. This is going to take a LONG time."
|
||||
sudo apt-get install --no-install-recommends \
|
||||
tk-dev libncurses5-dev libncursesw5-dev \
|
||||
libreadline6-dev libdb5.3-dev libgdbm-dev \
|
||||
libsqlite3-dev libssl-dev libbz2-dev \
|
||||
libexpat1-dev liblzma-dev zlib1g-dev
|
||||
|
||||
python_file="${download_dir}/Python-3.6.8.tar.xz"
|
||||
python_url='https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tar.xz'
|
||||
maybe_download "${python_url}" "${python_file}"
|
||||
|
||||
tar -C "${temp_dir}" -xf "${python_file}"
|
||||
cd "${temp_dir}/Python-3.6.8" && \
|
||||
./configure && \
|
||||
make -j "${make_threads}" && \
|
||||
sudo make altinstall
|
||||
|
||||
PYTHON='python3.6'
|
||||
fi
|
||||
else
|
||||
# User-provided Python
|
||||
PYTHON="${FLAGS_python}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Download dependencies
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# CPU architecture
|
||||
CPU_ARCH="$(uname --m)"
|
||||
case "${CPU_ARCH}" in
|
||||
CPU_ARCH="$(lscpu | awk '/^Architecture/{print $2}')"
|
||||
case $CPU_ARCH in
|
||||
x86_64)
|
||||
FRIENDLY_ARCH=amd64
|
||||
;;
|
||||
@@ -163,104 +22,115 @@ case "${CPU_ARCH}" in
|
||||
arm64v8)
|
||||
FRIENDLY_ARCH=aarch64
|
||||
;;
|
||||
|
||||
*)
|
||||
FRIENDLY_ARCH="${CPU_ARCH}"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Create a temporary directory for building stuff
|
||||
temp_dir="$(mktemp -d)"
|
||||
|
||||
function cleanup {
|
||||
rm -rf "${temp_dir}"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Debian dependencies
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Installing system dependencies (${FRIENDLY_ARCH})"
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y python3 python3-pip python3-venv python3-dev \
|
||||
python \
|
||||
build-essential autoconf autoconf-archive libtool automake bison \
|
||||
sox espeak flite swig portaudio19-dev \
|
||||
libatlas-base-dev \
|
||||
gfortran \
|
||||
sphinxbase-utils sphinxtrain pocketsphinx \
|
||||
jq checkinstall unzip xz-utils \
|
||||
curl
|
||||
|
||||
# Download dependencies
|
||||
echo "Downloading dependencies"
|
||||
download_args=()
|
||||
if [[ -n "${offline}" ]]; then
|
||||
download_args+=('--offline')
|
||||
fi
|
||||
bash download-dependencies.sh
|
||||
|
||||
if [[ -n "${no_precise}" ]]; then
|
||||
download_args+=('--noprecise')
|
||||
fi
|
||||
# -----------------------------------------------------------------------------
|
||||
# OpenFST
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -n "${no_kaldi}" ]]; then
|
||||
download_args+=('--nokaldi')
|
||||
fi
|
||||
case $CPU_ARCH in
|
||||
armv7l|arm64v8)
|
||||
# Build from source
|
||||
openfst_file="${download_dir}/openfst-1.6.2.tar.gz"
|
||||
echo "Building OpenFST (${openfst_file})"
|
||||
tar -C "${temp_dir}" -xzf "${openfst_file}" && \
|
||||
cd "${temp_dir}/openfst-1.6.2" && \
|
||||
./configure --enable-static --enable-shared --enable-far --enable-ngram-fsts && \
|
||||
make -j 4 && \
|
||||
sudo make install
|
||||
;;
|
||||
|
||||
bash download-dependencies.sh "${download_args[@]}"
|
||||
*)
|
||||
# Use pre-built packages
|
||||
sudo apt-get install -y libfst-dev libfst-tools
|
||||
esac
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Python 3.6
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "$(which python3.6)" ]]; then
|
||||
echo "Installing Python 3.6 from source. This is going to take a LONG time."
|
||||
sudo apt-get install -y tk-dev libncurses5-dev libncursesw5-dev \
|
||||
libreadline6-dev libdb5.3-dev libgdbm-dev \
|
||||
libsqlite3-dev libssl-dev libbz2-dev \
|
||||
libexpat1-dev liblzma-dev zlib1g-dev
|
||||
|
||||
python_file="${download_dir}/Python-3.6.8.tar.xz"
|
||||
if [[ ! -f "${python_file}" ]]; then
|
||||
python_url='https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tar.xz'
|
||||
curl -sSfL-o "${python_file}" "${python_url}"
|
||||
fi
|
||||
|
||||
tar -C "${temp_dir}" -xf "${python_file}"
|
||||
cd "${temp_dir}/Python-3.6.8" && \
|
||||
./configure && \
|
||||
make -j 4 && \
|
||||
sudo make altinstall
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Virtual environment
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
cd "${this_dir}"
|
||||
cd "${DIR}"
|
||||
|
||||
echo "${venv}"
|
||||
PYTHON="python3.6"
|
||||
VENV_PATH="${DIR}/.venv"
|
||||
echo "${VENV_PATH}"
|
||||
|
||||
if [[ -d "${venv}" ]]; then
|
||||
echo "Removing existing virtual environment"
|
||||
rm -rf "${venv}"
|
||||
fi
|
||||
echo "Removing existing virtual environment"
|
||||
rm -rf "${VENV_PATH}"
|
||||
|
||||
echo "Creating new virtual environment"
|
||||
mkdir -p "${venv}"
|
||||
"${PYTHON}" -m venv "${venv}"
|
||||
|
||||
# Extract Rhasspy tools
|
||||
rhasspy_tools_file="${download_dir}/rhasspy-tools_${FRIENDLY_ARCH}.tar.gz"
|
||||
echo "Extracting tools (${rhasspy_tools_file})"
|
||||
tar -C "${venv}" -xf "${rhasspy_tools_file}"
|
||||
|
||||
# Force .venv/lib to be used
|
||||
export LD_LIBRARY_PATH="${venv}/lib:${LD_LIBRARY_PATH}"
|
||||
mkdir -p "${VENV_PATH}"
|
||||
"${PYTHON}" -m venv "${VENV_PATH}"
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "${venv}/bin/activate"
|
||||
source "${VENV_PATH}/bin/activate"
|
||||
"${PYTHON}" -m pip install wheel
|
||||
|
||||
echo "Upgrading pip"
|
||||
"${PYTHON}" -m pip install --upgrade pip
|
||||
|
||||
echo "Installing Python requirements"
|
||||
"${PYTHON}" -m pip install wheel setuptools
|
||||
"${PYTHON}" -m pip install requests
|
||||
|
||||
# pytorch is not available on ARM
|
||||
case "${CPU_ARCH}" in
|
||||
case $CPU_ARCH in
|
||||
armv7l|arm64v8)
|
||||
no_flair="true" ;;
|
||||
# Exclude flair
|
||||
grep -v flair requirements.txt > "${temp_dir}/requirements-noflair.txt"
|
||||
"${PYTHON}" -m pip install -r "${temp_dir}/requirements-noflair.txt"
|
||||
;;
|
||||
|
||||
*)
|
||||
# Install all requirements
|
||||
"${PYTHON}" -m pip install -r requirements.txt
|
||||
esac
|
||||
|
||||
requirements_file="${temp_dir}/requirements.txt"
|
||||
cp "${this_dir}/requirements.txt" "${requirements_file}"
|
||||
|
||||
# Exclude requirements
|
||||
if [[ -n "${no_flair}" ]]; then
|
||||
echo "Excluding flair from virtual environment"
|
||||
sed -i '/^flair/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ -n "${no_precise}" ]]; then
|
||||
echo "Excluding Mycroft Precise from virtual environment"
|
||||
sed -i '/^precise-runner/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ -n "${no_adapt}" ]]; then
|
||||
echo "Excluding Mycroft Adapt from virtual environment"
|
||||
sed -i '/^adapt-parser/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ -n "${no_google}" ]]; then
|
||||
echo "Excluding Google Text to Speech from virtual environment"
|
||||
sed -i '/^google-cloud-texttospeech/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
# Install everything except openfst first
|
||||
sed -i '/^openfst/d' "${requirements_file}"
|
||||
python3 -m pip install -r "${requirements_file}"
|
||||
|
||||
# Install Python openfst wrapper
|
||||
"${PYTHON}" -m pip install \
|
||||
--global-option=build_ext \
|
||||
--global-option="-I${venv}/include" \
|
||||
--global-option="-L${venv}/lib" \
|
||||
-r <(grep '^openfst' "${this_dir}/requirements.txt")
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Pocketsphinx for Python
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -272,9 +142,15 @@ pocketsphinx_file="${download_dir}/pocketsphinx-python.tar.gz"
|
||||
# Snowboy
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
case "${CPU_ARCH}" in
|
||||
case $CPU_ARCH in
|
||||
x86_64|armv7l)
|
||||
snowboy_file="${download_dir}/snowboy-1.3.0.tar.gz"
|
||||
if [[ ! -f "${snowboy_file}" ]]; then
|
||||
snowboy_url='https://github.com/Kitt-AI/snowboy/archive/v1.3.0.tar.gz'
|
||||
echo "Downloading snowboy (${snowboy_url})"
|
||||
curl -sSfL-o "${snowboy_file}" "${snowboy_url}"
|
||||
fi
|
||||
|
||||
"${PYTHON}" -m pip install "${snowboy_file}"
|
||||
;;
|
||||
|
||||
@@ -286,14 +162,13 @@ esac
|
||||
# Mycroft Precise
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_precise}" && -z "$(command -v precise-engine)" ]]; then
|
||||
case "${CPU_ARCH}" in
|
||||
if [[ -z "$(which precise-engine)" ]]; then
|
||||
case $CPU_ARCH in
|
||||
x86_64|armv7l)
|
||||
echo "Installing Mycroft Precise"
|
||||
precise_file="${download_dir}/precise-engine_0.3.0_${CPU_ARCH}.tar.gz"
|
||||
precise_install="${venv}/lib"
|
||||
tar -C "${precise_install}" -xf "${precise_file}"
|
||||
ln -s "${precise_install}/precise-engine/precise-engine" "${venv}/bin/precise-engine"
|
||||
precise_install='/usr/lib'
|
||||
sudo tar -C "${precise_install}" -xf "${precise_file}"
|
||||
sudo ln -s "${precise_install}/precise-engine/precise-engine" '/usr/bin/precise-engine'
|
||||
;;
|
||||
|
||||
*)
|
||||
@@ -302,23 +177,71 @@ if [[ -z "${no_precise}" && -z "$(command -v precise-engine)" ]]; then
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Kaldi
|
||||
# Opengrm
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_kaldi}" ]]; then
|
||||
kaldi_file="${download_dir}/kaldi_${FRIENDLY_ARCH}.tar.gz"
|
||||
echo "Installing Kaldi (${kaldi_file})"
|
||||
mkdir -p "${this_dir}/opt"
|
||||
tar -C "${this_dir}/opt" -xf "${kaldi_file}"
|
||||
if [[ -z "$(which ngramcount)" ]]; then
|
||||
opengrm_file="${download_dir}/opengrm-ngram-1.3.3.tar.gz"
|
||||
echo "Building Opengrm ${opengrm_file}"
|
||||
tar -C "${temp_dir}" -xf "${opengrm_file}" && \
|
||||
cd "${temp_dir}/opengrm-ngram-1.3.3" && \
|
||||
./configure && \
|
||||
make -j 4 && \
|
||||
sudo make install && \
|
||||
sudo ldconfig
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Phonetisaurus
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "$(which phonetisaurus-apply)" ]]; then
|
||||
case $CPU_ARCH in
|
||||
x86_64|armv7l|arm64v8)
|
||||
# Install pre-built package
|
||||
phonetisaurus_file="${download_dir}/phonetisaurus-2019_${FRIENDLY_ARCH}.deb"
|
||||
echo "Installing phonetisaurus (${phonetisaurus_file})"
|
||||
sudo dpkg -i "${phonetisaurus_file}"
|
||||
;;
|
||||
|
||||
*)
|
||||
# Build from source
|
||||
phonetisaurus_file="${download_dir}/phonetisaurus-2019.zip"
|
||||
echo "Building phonetisaurus (${phonetisaurus_file})"
|
||||
unzip -d "${temp_dir}" "${phonetisaurus_file}" && \
|
||||
cd "${temp_dir}/phonetisaurus" && \
|
||||
./configure && \
|
||||
make -j 4 && \
|
||||
sudo make install
|
||||
esac
|
||||
fi
|
||||
|
||||
# Add /usr/local/lib to LD_LIBRARY_PATH
|
||||
sudo ldconfig
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# NodeJS / Yarn
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "$(which node)" ]]; then
|
||||
echo "Installing nodejs"
|
||||
sudo apt-get install -y nodejs
|
||||
fi
|
||||
|
||||
if [[ -z "$(which yarn)" ]]; then
|
||||
echo "Installing yarn"
|
||||
curl -o- -L https://yarnpkg.com/install.sh | bash
|
||||
|
||||
# Need to re-source .bashrc so yarn is in the path
|
||||
source "${HOME}/.bashrc"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Web Interface
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
rhasspy_web_file="${download_dir}/rhasspy-web-dist.tar.gz"
|
||||
echo "Extracting web interface (${rhasspy_web_file})"
|
||||
tar -C "${this_dir}" -xf "${rhasspy_web_file}"
|
||||
echo "Building web interface"
|
||||
cd "${DIR}" && yarn && yarn build
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
Package: rhasspy-server
|
||||
Version: 2.4.10
|
||||
Section: utils
|
||||
Priority: optional
|
||||
Depends: sox,alsa-utils,espeak,libstdc++6,jq,xz-utils,unzip,curl,sphinxbase-utils,sphinxtrain,flite,libatlas-base-dev,gfortran
|
||||
Architecture: ${architecture}
|
||||
Maintainer: Michael Hansen
|
||||
Description: Offline voice assistant
|
||||
@@ -1,11 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if [[ -z "${RHASSPY_BASE_DIR}" ]]; then
|
||||
export RHASSPY_BASE_DIR="/usr/lib/rhasspy"
|
||||
fi
|
||||
|
||||
if [[ -z "${KALDI_PREFIX}" ]]; then
|
||||
export KALDI_PREFIX="${RHASSPY_BASE_DIR}"
|
||||
fi
|
||||
|
||||
cd "${RHASSPY_BASE_DIR}" && rhasspy/rhasspy "$@"
|
||||
@@ -1,177 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
rhasspy_version="2.4.10"
|
||||
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Command-line Arguments
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
. "${this_dir}/etc/shflags"
|
||||
|
||||
DEFINE_string 'architecture' '' 'Debian architecture'
|
||||
DEFINE_string 'version' "${rhasspy_version}" 'Package version'
|
||||
DEFINE_boolean 'package' true 'Create debian package (.deb)'
|
||||
|
||||
FLAGS "$@" || exit $?
|
||||
eval set -- "${FLAGS_ARGV}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Settings
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
export architecture="${FLAGS_architecture}"
|
||||
version="${FLAGS_version}"
|
||||
debian_dir="${this_dir}/debian"
|
||||
|
||||
set -e
|
||||
|
||||
if [[ -z "${architecture}" ]]; then
|
||||
# Guess architecture
|
||||
architecture="$(dpkg-architecture | grep 'DEB_BUILD_ARCH=' | sed 's/^[^=]\+=//')"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Activate virtual environment
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
venv="${this_dir}/.venv"
|
||||
|
||||
if [[ ! -d "${venv}" ]]; then
|
||||
echo "Missing virtual environment at ${venv}"
|
||||
echo "Did you run create-venv.sh?"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "${this_dir}"
|
||||
source "${venv}/bin/activate"
|
||||
|
||||
if [[ -z "$(command -v pyinstaller)" ]]; then
|
||||
echo "Missing PyInstaller"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Run PyInstaller
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Running PyInstaller"
|
||||
package_name="rhasspy-server_${version}_${architecture}"
|
||||
package_dir="${debian_dir}/${package_name}"
|
||||
output_dir="${package_dir}/usr/lib/rhasspy"
|
||||
share_dir="${package_dir}/usr/share/rhasspy"
|
||||
|
||||
pyinstaller\
|
||||
-y \
|
||||
--workpath "pyinstaller/build" \
|
||||
--distpath "${output_dir}" \
|
||||
"${this_dir}/rhasspy.spec"
|
||||
|
||||
# Remove all symbols (Liantian warning)
|
||||
strip --strip-all "${output_dir}/rhasspy"/*.so* || true
|
||||
|
||||
# Remove executable bit from shared libs (Lintian warning)
|
||||
chmod -x "${output_dir}/rhasspy"/*.so* || true
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Copy Rhasspy
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Profiles
|
||||
mkdir -p "${output_dir}/profiles"
|
||||
rsync -av \
|
||||
--delete \
|
||||
--exclude 'acoustic_model' \
|
||||
--exclude 'download' \
|
||||
--exclude 'flair' \
|
||||
--exclude 'base_dictionary.txt' \
|
||||
--exclude 'base_language_model.txt' \
|
||||
--exclude 'g2p.fst' \
|
||||
--exclude 'HCLG.fst' \
|
||||
--exclude 'final.mdl' \
|
||||
--exclude '*.umdl' \
|
||||
"${this_dir}/profiles/" \
|
||||
"${output_dir}/profiles/"
|
||||
|
||||
# Sounds
|
||||
mkdir -p "${output_dir}/etc/wav"
|
||||
rsync -av \
|
||||
--delete \
|
||||
"${this_dir}/etc/wav/" \
|
||||
"${output_dir}/etc/wav/"
|
||||
|
||||
# Web
|
||||
mkdir -p "${output_dir}/dist"
|
||||
rsync -av \
|
||||
--delete \
|
||||
"${this_dir}/dist/" \
|
||||
"${output_dir}/dist/"
|
||||
|
||||
# Documentation
|
||||
mkdocs build
|
||||
mkdir -p "${share_dir}/docs"
|
||||
rsync -av \
|
||||
--delete \
|
||||
"${this_dir}/site/" \
|
||||
"${share_dir}/docs/"
|
||||
|
||||
# Source code
|
||||
mkdir -p "${share_dir}/src"
|
||||
rsync -av \
|
||||
--delete \
|
||||
--exclude '.mypy_cache' \
|
||||
--exclude '__pycache__' \
|
||||
"${this_dir}/rhasspy/" \
|
||||
"${share_dir}/src/rhasspy/"
|
||||
|
||||
cp "${this_dir}/app.py" "${share_dir}/src/"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Copy Kaldi
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Copying Kaldi"
|
||||
kaldi_src="${this_dir}/opt/kaldi"
|
||||
if [[ ! -d "${kaldi_src}" ]]; then
|
||||
echo "Missing Kaldi at ${kaldi_src}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
kaldi_dest="${output_dir}/kaldi"
|
||||
mkdir -p "${kaldi_dest}"
|
||||
rsync -av --delete "${kaldi_src}/" "${kaldi_dest}/"
|
||||
|
||||
# Avoid link recursion
|
||||
rm -f "${kaldi_dest}/egs/wsj/s5/utils/utils"
|
||||
|
||||
# Turn duplicate .so files into symbolic links
|
||||
function fix_library_links {
|
||||
lib_dir="$1"
|
||||
|
||||
for lib in "${lib_dir}"/*.so; do
|
||||
lib_base="$(basename ${lib})"
|
||||
for lib_link in "${lib_dir}/${lib_base}".*; do
|
||||
rm -f "${lib_link}"
|
||||
ln -s "${lib_base}" "${lib_link}"
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
fix_library_links "${kaldi_dest}/tools/openfst/lib"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Create Debian package
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Creating Debian package"
|
||||
mkdir -p "${package_dir}/DEBIAN"
|
||||
cat "${debian_dir}/DEBIAN/control" | \
|
||||
envsubst > "${package_dir}/DEBIAN/control"
|
||||
|
||||
mkdir -p "${package_dir}/usr/bin"
|
||||
cp "${debian_dir}/bin/rhasspy-server" "${package_dir}/usr/bin/"
|
||||
|
||||
if [[ "${FLAGS_package}" -eq "${FLAGS_TRUE}" ]]; then
|
||||
# Actually build the package
|
||||
cd 'debian' && fakeroot dpkg --build "${package_name}"
|
||||
fi
|
||||
@@ -14,7 +14,9 @@ QEMU
|
||||
|
||||
SYSTEM_DEPENDENCIES
|
||||
|
||||
RHASSPY_TOOLS
|
||||
PHONETISAURUS
|
||||
|
||||
OPENGRM
|
||||
|
||||
PYTHON_REQUIREMENTS
|
||||
|
||||
@@ -22,6 +24,8 @@ PYTHON_POCKETSPHINX
|
||||
|
||||
SNOWBOY
|
||||
|
||||
MYCROFT_PRECISE
|
||||
|
||||
TTS
|
||||
|
||||
KALDI
|
||||
@@ -32,8 +36,6 @@ RUN ldconfig
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
GSTREAMER
|
||||
|
||||
PULSEAUDIO
|
||||
|
||||
# Copy script to run
|
||||
@@ -47,6 +49,5 @@ PROFILES
|
||||
RHASSPY_CODE
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -1 +1,120 @@
|
||||
COPY profiles/ ${RHASSPY_APP}/profiles/
|
||||
COPY profiles/zh/profile.json \
|
||||
profiles/zh/custom_words.txt \
|
||||
profiles/zh/download-profile.sh \
|
||||
profiles/zh/check-profile.sh \
|
||||
profiles/zh/espeak_phonemes.txt \
|
||||
profiles/zh/phoneme_examples.txt \
|
||||
profiles/zh/frequent_words.txt \
|
||||
profiles/zh/sentences.ini \
|
||||
profiles/zh/stop_words.txt ${RHASSPY_APP}/profiles/zh/
|
||||
|
||||
COPY profiles/hi/ \
|
||||
profiles/hi/profile.json \
|
||||
profiles/hi/custom_words.txt \
|
||||
profiles/hi/download-profile.sh \
|
||||
profiles/hi/check-profile.sh \
|
||||
profiles/hi/espeak_phonemes.txt \
|
||||
profiles/hi/phoneme_examples.txt \
|
||||
profiles/hi/frequent_words.txt \
|
||||
profiles/hi/sentences.ini \
|
||||
profiles/hi/stop_words.txt ${RHASSPY_APP}/profiles/hi/
|
||||
|
||||
COPY profiles/el/profile.json \
|
||||
profiles/el/custom_words.txt \
|
||||
profiles/el/download-profile.sh \
|
||||
profiles/el/check-profile.sh \
|
||||
profiles/el/espeak_phonemes.txt \
|
||||
profiles/el/phoneme_examples.txt \
|
||||
profiles/el/frequent_words.txt \
|
||||
profiles/el/sentences.ini \
|
||||
profiles/el/stop_words.txt ${RHASSPY_APP}/profiles/el/
|
||||
|
||||
COPY profiles/de/profile.json \
|
||||
profiles/de/custom_words.txt \
|
||||
profiles/de/download-profile.sh \
|
||||
profiles/de/check-profile.sh \
|
||||
profiles/de/espeak_phonemes.txt \
|
||||
profiles/de/phoneme_examples.txt \
|
||||
profiles/de/frequent_words.txt \
|
||||
profiles/de/sentences.ini \
|
||||
profiles/de/stop_words.txt ${RHASSPY_APP}/profiles/de/
|
||||
|
||||
COPY profiles/it/profile.json \
|
||||
profiles/it/custom_words.txt \
|
||||
profiles/it/download-profile.sh \
|
||||
profiles/it/check-profile.sh \
|
||||
profiles/it/espeak_phonemes.txt \
|
||||
profiles/it/phoneme_examples.txt \
|
||||
profiles/it/frequent_words.txt \
|
||||
profiles/it/sentences.ini \
|
||||
profiles/it/stop_words.txt ${RHASSPY_APP}/profiles/it/
|
||||
|
||||
COPY profiles/es/profile.json \
|
||||
profiles/es/custom_words.txt \
|
||||
profiles/es/download-profile.sh \
|
||||
profiles/es/check-profile.sh \
|
||||
profiles/es/espeak_phonemes.txt \
|
||||
profiles/es/phoneme_examples.txt \
|
||||
profiles/es/frequent_words.txt \
|
||||
profiles/es/sentences.ini \
|
||||
profiles/es/stop_words.txt ${RHASSPY_APP}/profiles/es/
|
||||
|
||||
COPY profiles/fr/profile.json \
|
||||
profiles/fr/custom_words.txt \
|
||||
profiles/fr/download-profile.sh \
|
||||
profiles/fr/check-profile.sh \
|
||||
profiles/fr/espeak_phonemes.txt \
|
||||
profiles/fr/phoneme_examples.txt \
|
||||
profiles/fr/frequent_words.txt \
|
||||
profiles/fr/sentences.ini \
|
||||
profiles/fr/stop_words.txt ${RHASSPY_APP}/profiles/fr/
|
||||
|
||||
COPY profiles/ru/profile.json \
|
||||
profiles/ru/custom_words.txt \
|
||||
profiles/ru/download-profile.sh \
|
||||
profiles/ru/check-profile.sh \
|
||||
profiles/ru/espeak_phonemes.txt \
|
||||
profiles/ru/phoneme_examples.txt \
|
||||
profiles/ru/frequent_words.txt \
|
||||
profiles/ru/sentences.ini \
|
||||
profiles/ru/stop_words.txt ${RHASSPY_APP}/profiles/ru/
|
||||
|
||||
COPY profiles/nl/profile.json \
|
||||
profiles/nl/custom_words.txt \
|
||||
profiles/nl/download-profile.sh \
|
||||
profiles/nl/check-profile.sh \
|
||||
profiles/nl/espeak_phonemes.txt \
|
||||
profiles/nl/phoneme_examples.txt \
|
||||
profiles/nl/frequent_words.txt \
|
||||
profiles/nl/sentences.ini \
|
||||
profiles/nl/stop_words.txt ${RHASSPY_APP}/profiles/nl/
|
||||
|
||||
COPY profiles/vi/profile.json \
|
||||
profiles/vi/custom_words.txt \
|
||||
profiles/vi/download-profile.sh \
|
||||
profiles/vi/check-profile.sh \
|
||||
profiles/vi/espeak_phonemes.txt \
|
||||
profiles/vi/phoneme_examples.txt \
|
||||
profiles/vi/frequent_words.txt \
|
||||
profiles/vi/sentences.ini \
|
||||
profiles/vi/stop_words.txt ${RHASSPY_APP}/profiles/vi/
|
||||
|
||||
COPY profiles/pt/profile.json \
|
||||
profiles/pt/custom_words.txt \
|
||||
profiles/pt/download-profile.sh \
|
||||
profiles/pt/check-profile.sh \
|
||||
profiles/pt/espeak_phonemes.txt \
|
||||
profiles/pt/phoneme_examples.txt \
|
||||
profiles/pt/frequent_words.txt \
|
||||
profiles/pt/sentences.ini \
|
||||
profiles/pt/stop_words.txt ${RHASSPY_APP}/profiles/pt/
|
||||
|
||||
COPY profiles/en/profile.json \
|
||||
profiles/en/custom_words.txt \
|
||||
profiles/en/download-profile.sh \
|
||||
profiles/en/check-profile.sh \
|
||||
profiles/en/espeak_phonemes.txt \
|
||||
profiles/en/phoneme_examples.txt \
|
||||
profiles/en/frequent_words.txt \
|
||||
profiles/en/sentences.ini \
|
||||
profiles/en/stop_words.txt ${RHASSPY_APP}/profiles/en/
|
||||
|
||||
@@ -14,30 +14,48 @@ COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
apt-get install -y bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/openfst-1.6.2-1_${BUILD_ARCH}.deb /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
apt-get install --no-install-recommends --yes libfst-dev libfst-tools; \
|
||||
apt-get install -y libfst-dev libfst-tools; \
|
||||
else \
|
||||
dpkg -i /openfst-1.6.2-1_${BUILD_ARCH}.deb; \
|
||||
rm /openfst*.deb; \
|
||||
fi
|
||||
|
||||
RHASSPY_TOOLS
|
||||
COPY download/phonetisaurus-2019.tar.gz /phonetisaurus.tar.gz
|
||||
RUN cd / && tar -xf phonetisaurus.tar.gz
|
||||
RUN cd /phonetisaurus && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
rm -rf /phonetisaurus*
|
||||
|
||||
# Install Opengrm
|
||||
COPY download/opengrm-ngram-1.3.3.tar.gz /
|
||||
RUN cd / && tar -xf opengrm-ngram-1.3.3.tar.gz && \
|
||||
cd opengrm-ngram-1.3.3 && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
ldconfig && \
|
||||
rm -rf /opengrm*
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
RUN python3 -m pip install --no-cache-dir wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY download/jsgf2fst-0.1.0.tar.gz \
|
||||
/download/
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
@@ -54,7 +72,15 @@ RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
# Install Mycroft Precise
|
||||
COPY download/precise-engine_0.3.0_${CPU_ARCH}.tar.gz /precise-engine.tar.gz
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
cd / && tar -xzf /precise-engine.tar.gz && \
|
||||
ln -s /precise-engine/precise-engine /usr/bin/precise-engine && \
|
||||
rm /precise-engine.tar.gz; \
|
||||
fi
|
||||
|
||||
RUN apt-get install -y flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
@@ -67,9 +93,6 @@ RUN ldconfig
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
ENV RHASSPY_APP /usr/share/rhasspy
|
||||
|
||||
# Copy script to run
|
||||
@@ -80,6 +103,8 @@ RUN chmod +x /run.sh
|
||||
|
||||
COPY profiles/zh/profile.json \
|
||||
profiles/zh/custom_words.txt \
|
||||
profiles/zh/download-profile.sh \
|
||||
profiles/zh/check-profile.sh \
|
||||
profiles/zh/espeak_phonemes.txt \
|
||||
profiles/zh/phoneme_examples.txt \
|
||||
profiles/zh/frequent_words.txt \
|
||||
@@ -89,6 +114,8 @@ COPY profiles/zh/profile.json \
|
||||
COPY profiles/hi/ \
|
||||
profiles/hi/profile.json \
|
||||
profiles/hi/custom_words.txt \
|
||||
profiles/hi/download-profile.sh \
|
||||
profiles/hi/check-profile.sh \
|
||||
profiles/hi/espeak_phonemes.txt \
|
||||
profiles/hi/phoneme_examples.txt \
|
||||
profiles/hi/frequent_words.txt \
|
||||
@@ -97,6 +124,8 @@ COPY profiles/hi/ \
|
||||
|
||||
COPY profiles/el/profile.json \
|
||||
profiles/el/custom_words.txt \
|
||||
profiles/el/download-profile.sh \
|
||||
profiles/el/check-profile.sh \
|
||||
profiles/el/espeak_phonemes.txt \
|
||||
profiles/el/phoneme_examples.txt \
|
||||
profiles/el/frequent_words.txt \
|
||||
@@ -105,6 +134,8 @@ COPY profiles/el/profile.json \
|
||||
|
||||
COPY profiles/de/profile.json \
|
||||
profiles/de/custom_words.txt \
|
||||
profiles/de/download-profile.sh \
|
||||
profiles/de/check-profile.sh \
|
||||
profiles/de/espeak_phonemes.txt \
|
||||
profiles/de/phoneme_examples.txt \
|
||||
profiles/de/frequent_words.txt \
|
||||
@@ -113,6 +144,8 @@ COPY profiles/de/profile.json \
|
||||
|
||||
COPY profiles/it/profile.json \
|
||||
profiles/it/custom_words.txt \
|
||||
profiles/it/download-profile.sh \
|
||||
profiles/it/check-profile.sh \
|
||||
profiles/it/espeak_phonemes.txt \
|
||||
profiles/it/phoneme_examples.txt \
|
||||
profiles/it/frequent_words.txt \
|
||||
@@ -121,6 +154,8 @@ COPY profiles/it/profile.json \
|
||||
|
||||
COPY profiles/es/profile.json \
|
||||
profiles/es/custom_words.txt \
|
||||
profiles/es/download-profile.sh \
|
||||
profiles/es/check-profile.sh \
|
||||
profiles/es/espeak_phonemes.txt \
|
||||
profiles/es/phoneme_examples.txt \
|
||||
profiles/es/frequent_words.txt \
|
||||
@@ -129,6 +164,8 @@ COPY profiles/es/profile.json \
|
||||
|
||||
COPY profiles/fr/profile.json \
|
||||
profiles/fr/custom_words.txt \
|
||||
profiles/fr/download-profile.sh \
|
||||
profiles/fr/check-profile.sh \
|
||||
profiles/fr/espeak_phonemes.txt \
|
||||
profiles/fr/phoneme_examples.txt \
|
||||
profiles/fr/frequent_words.txt \
|
||||
@@ -137,6 +174,8 @@ COPY profiles/fr/profile.json \
|
||||
|
||||
COPY profiles/ru/profile.json \
|
||||
profiles/ru/custom_words.txt \
|
||||
profiles/ru/download-profile.sh \
|
||||
profiles/ru/check-profile.sh \
|
||||
profiles/ru/espeak_phonemes.txt \
|
||||
profiles/ru/phoneme_examples.txt \
|
||||
profiles/ru/frequent_words.txt \
|
||||
@@ -145,6 +184,8 @@ COPY profiles/ru/profile.json \
|
||||
|
||||
COPY profiles/nl/profile.json \
|
||||
profiles/nl/custom_words.txt \
|
||||
profiles/nl/download-profile.sh \
|
||||
profiles/nl/check-profile.sh \
|
||||
profiles/nl/espeak_phonemes.txt \
|
||||
profiles/nl/phoneme_examples.txt \
|
||||
profiles/nl/frequent_words.txt \
|
||||
@@ -153,6 +194,8 @@ COPY profiles/nl/profile.json \
|
||||
|
||||
COPY profiles/vi/profile.json \
|
||||
profiles/vi/custom_words.txt \
|
||||
profiles/vi/download-profile.sh \
|
||||
profiles/vi/check-profile.sh \
|
||||
profiles/vi/espeak_phonemes.txt \
|
||||
profiles/vi/phoneme_examples.txt \
|
||||
profiles/vi/frequent_words.txt \
|
||||
@@ -161,30 +204,18 @@ COPY profiles/vi/profile.json \
|
||||
|
||||
COPY profiles/pt/profile.json \
|
||||
profiles/pt/custom_words.txt \
|
||||
profiles/pt/download-profile.sh \
|
||||
profiles/pt/check-profile.sh \
|
||||
profiles/pt/espeak_phonemes.txt \
|
||||
profiles/pt/phoneme_examples.txt \
|
||||
profiles/pt/frequent_words.txt \
|
||||
profiles/pt/sentences.ini \
|
||||
profiles/pt/stop_words.txt ${RHASSPY_APP}/profiles/pt/
|
||||
|
||||
COPY profiles/sv/profile.json \
|
||||
profiles/sv/custom_words.txt \
|
||||
profiles/sv/espeak_phonemes.txt \
|
||||
profiles/sv/phoneme_examples.txt \
|
||||
profiles/sv/frequent_words.txt \
|
||||
profiles/sv/sentences.ini \
|
||||
profiles/sv/stop_words.txt ${RHASSPY_APP}/profiles/sv/
|
||||
|
||||
COPY profiles/ca/profile.json \
|
||||
profiles/ca/custom_words.txt \
|
||||
profiles/ca/espeak_phonemes.txt \
|
||||
profiles/ca/phoneme_examples.txt \
|
||||
profiles/ca/frequent_words.txt \
|
||||
profiles/ca/sentences.ini \
|
||||
profiles/ca/stop_words.txt ${RHASSPY_APP}/profiles/ca/
|
||||
|
||||
COPY profiles/en/profile.json \
|
||||
profiles/en/custom_words.txt \
|
||||
profiles/en/download-profile.sh \
|
||||
profiles/en/check-profile.sh \
|
||||
profiles/en/espeak_phonemes.txt \
|
||||
profiles/en/phoneme_examples.txt \
|
||||
profiles/en/frequent_words.txt \
|
||||
@@ -196,12 +227,9 @@ COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -14,30 +14,48 @@ COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
apt-get install -y bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/openfst-1.6.2-1_${BUILD_ARCH}.deb /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
apt-get install --no-install-recommends --yes libfst-dev libfst-tools; \
|
||||
apt-get install -y libfst-dev libfst-tools; \
|
||||
else \
|
||||
dpkg -i /openfst-1.6.2-1_${BUILD_ARCH}.deb; \
|
||||
rm /openfst*.deb; \
|
||||
fi
|
||||
|
||||
RHASSPY_TOOLS
|
||||
COPY download/phonetisaurus-2019.tar.gz /phonetisaurus.tar.gz
|
||||
RUN cd / && tar -xf phonetisaurus.tar.gz
|
||||
RUN cd /phonetisaurus && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
rm -rf /phonetisaurus*
|
||||
|
||||
# Install Opengrm
|
||||
COPY download/opengrm-ngram-1.3.3.tar.gz /
|
||||
RUN cd / && tar -xf opengrm-ngram-1.3.3.tar.gz && \
|
||||
cd opengrm-ngram-1.3.3 && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
ldconfig && \
|
||||
rm -rf /opengrm*
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
RUN python3 -m pip install --no-cache-dir wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY download/jsgf2fst-0.1.0.tar.gz \
|
||||
/download/
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
@@ -54,7 +72,15 @@ RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
# Install Mycroft Precise
|
||||
COPY download/precise-engine_0.3.0_${CPU_ARCH}.tar.gz /precise-engine.tar.gz
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
cd / && tar -xzf /precise-engine.tar.gz && \
|
||||
ln -s /precise-engine/precise-engine /usr/bin/precise-engine && \
|
||||
rm /precise-engine.tar.gz; \
|
||||
fi
|
||||
|
||||
RUN apt-get install -y flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
@@ -67,9 +93,6 @@ RUN ldconfig
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
ENV RHASSPY_APP /usr/share/rhasspy
|
||||
|
||||
# Copy script to run
|
||||
@@ -85,12 +108,9 @@ COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -14,30 +14,48 @@ COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
apt-get install -y bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/openfst-1.6.2-1_${BUILD_ARCH}.deb /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
apt-get install --no-install-recommends --yes libfst-dev libfst-tools; \
|
||||
apt-get install -y libfst-dev libfst-tools; \
|
||||
else \
|
||||
dpkg -i /openfst-1.6.2-1_${BUILD_ARCH}.deb; \
|
||||
rm /openfst*.deb; \
|
||||
fi
|
||||
|
||||
RHASSPY_TOOLS
|
||||
COPY download/phonetisaurus-2019.tar.gz /phonetisaurus.tar.gz
|
||||
RUN cd / && tar -xf phonetisaurus.tar.gz
|
||||
RUN cd /phonetisaurus && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
rm -rf /phonetisaurus*
|
||||
|
||||
# Install Opengrm
|
||||
COPY download/opengrm-ngram-1.3.3.tar.gz /
|
||||
RUN cd / && tar -xf opengrm-ngram-1.3.3.tar.gz && \
|
||||
cd opengrm-ngram-1.3.3 && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
ldconfig && \
|
||||
rm -rf /opengrm*
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
RUN python3 -m pip install --no-cache-dir wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY download/jsgf2fst-0.1.0.tar.gz \
|
||||
/download/
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
@@ -54,7 +72,15 @@ RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
# Install Mycroft Precise
|
||||
COPY download/precise-engine_0.3.0_${CPU_ARCH}.tar.gz /precise-engine.tar.gz
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
cd / && tar -xzf /precise-engine.tar.gz && \
|
||||
ln -s /precise-engine/precise-engine /usr/bin/precise-engine && \
|
||||
rm /precise-engine.tar.gz; \
|
||||
fi
|
||||
|
||||
RUN apt-get install -y flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
@@ -67,9 +93,6 @@ RUN ldconfig
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
RUN apt-get install -y pulseaudio
|
||||
COPY etc/pulseaudio.client.conf /etc/pulse/client.conf
|
||||
|
||||
@@ -102,12 +125,9 @@ COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -14,24 +14,37 @@ COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
apt-get install -y bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libfst-dev libfst-tools \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/rhasspy-tools_${BUILD_ARCH}.tar.gz /
|
||||
RUN tar -C /usr -xvf /rhasspy-tools_${BUILD_ARCH}.tar.gz && \
|
||||
rm -f /rhasspy-tools_${BUILD_ARCH}.tar.gz
|
||||
COPY download/phonetisaurus-2019_${BUILD_ARCH}.deb /phonetisaurus.deb
|
||||
RUN dpkg -i /phonetisaurus.deb && \
|
||||
rm /phonetisaurus.deb
|
||||
|
||||
# Install Opengrm
|
||||
COPY download/opengrm-ngram-1.3.3.tar.gz /
|
||||
RUN cd / && tar -xf opengrm-ngram-1.3.3.tar.gz && \
|
||||
cd opengrm-ngram-1.3.3 && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
ldconfig && \
|
||||
rm -rf /opengrm*
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
RUN python3 -m pip install --no-cache-dir wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY download/jsgf2fst-0.1.0.tar.gz \
|
||||
/download/
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
@@ -48,7 +61,15 @@ RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils lame
|
||||
# Install Mycroft Precise
|
||||
COPY download/precise-engine_0.3.0_${CPU_ARCH}.tar.gz /precise-engine.tar.gz
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
cd / && tar -xzf /precise-engine.tar.gz && \
|
||||
ln -s /precise-engine/precise-engine /usr/bin/precise-engine && \
|
||||
rm /precise-engine.tar.gz; \
|
||||
fi
|
||||
|
||||
RUN apt-get install -y flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
@@ -61,9 +82,6 @@ RUN ldconfig
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
ENV RHASSPY_APP /usr/share/rhasspy
|
||||
|
||||
# Copy script to run
|
||||
@@ -72,20 +90,135 @@ RUN chmod +x /run.sh
|
||||
|
||||
|
||||
|
||||
COPY profiles/ ${RHASSPY_APP}/profiles/
|
||||
COPY profiles/zh/profile.json \
|
||||
profiles/zh/custom_words.txt \
|
||||
profiles/zh/download-profile.sh \
|
||||
profiles/zh/check-profile.sh \
|
||||
profiles/zh/espeak_phonemes.txt \
|
||||
profiles/zh/phoneme_examples.txt \
|
||||
profiles/zh/frequent_words.txt \
|
||||
profiles/zh/sentences.ini \
|
||||
profiles/zh/stop_words.txt ${RHASSPY_APP}/profiles/zh/
|
||||
|
||||
COPY profiles/hi/ \
|
||||
profiles/hi/profile.json \
|
||||
profiles/hi/custom_words.txt \
|
||||
profiles/hi/download-profile.sh \
|
||||
profiles/hi/check-profile.sh \
|
||||
profiles/hi/espeak_phonemes.txt \
|
||||
profiles/hi/phoneme_examples.txt \
|
||||
profiles/hi/frequent_words.txt \
|
||||
profiles/hi/sentences.ini \
|
||||
profiles/hi/stop_words.txt ${RHASSPY_APP}/profiles/hi/
|
||||
|
||||
COPY profiles/el/profile.json \
|
||||
profiles/el/custom_words.txt \
|
||||
profiles/el/download-profile.sh \
|
||||
profiles/el/check-profile.sh \
|
||||
profiles/el/espeak_phonemes.txt \
|
||||
profiles/el/phoneme_examples.txt \
|
||||
profiles/el/frequent_words.txt \
|
||||
profiles/el/sentences.ini \
|
||||
profiles/el/stop_words.txt ${RHASSPY_APP}/profiles/el/
|
||||
|
||||
COPY profiles/de/profile.json \
|
||||
profiles/de/custom_words.txt \
|
||||
profiles/de/download-profile.sh \
|
||||
profiles/de/check-profile.sh \
|
||||
profiles/de/espeak_phonemes.txt \
|
||||
profiles/de/phoneme_examples.txt \
|
||||
profiles/de/frequent_words.txt \
|
||||
profiles/de/sentences.ini \
|
||||
profiles/de/stop_words.txt ${RHASSPY_APP}/profiles/de/
|
||||
|
||||
COPY profiles/it/profile.json \
|
||||
profiles/it/custom_words.txt \
|
||||
profiles/it/download-profile.sh \
|
||||
profiles/it/check-profile.sh \
|
||||
profiles/it/espeak_phonemes.txt \
|
||||
profiles/it/phoneme_examples.txt \
|
||||
profiles/it/frequent_words.txt \
|
||||
profiles/it/sentences.ini \
|
||||
profiles/it/stop_words.txt ${RHASSPY_APP}/profiles/it/
|
||||
|
||||
COPY profiles/es/profile.json \
|
||||
profiles/es/custom_words.txt \
|
||||
profiles/es/download-profile.sh \
|
||||
profiles/es/check-profile.sh \
|
||||
profiles/es/espeak_phonemes.txt \
|
||||
profiles/es/phoneme_examples.txt \
|
||||
profiles/es/frequent_words.txt \
|
||||
profiles/es/sentences.ini \
|
||||
profiles/es/stop_words.txt ${RHASSPY_APP}/profiles/es/
|
||||
|
||||
COPY profiles/fr/profile.json \
|
||||
profiles/fr/custom_words.txt \
|
||||
profiles/fr/download-profile.sh \
|
||||
profiles/fr/check-profile.sh \
|
||||
profiles/fr/espeak_phonemes.txt \
|
||||
profiles/fr/phoneme_examples.txt \
|
||||
profiles/fr/frequent_words.txt \
|
||||
profiles/fr/sentences.ini \
|
||||
profiles/fr/stop_words.txt ${RHASSPY_APP}/profiles/fr/
|
||||
|
||||
COPY profiles/ru/profile.json \
|
||||
profiles/ru/custom_words.txt \
|
||||
profiles/ru/download-profile.sh \
|
||||
profiles/ru/check-profile.sh \
|
||||
profiles/ru/espeak_phonemes.txt \
|
||||
profiles/ru/phoneme_examples.txt \
|
||||
profiles/ru/frequent_words.txt \
|
||||
profiles/ru/sentences.ini \
|
||||
profiles/ru/stop_words.txt ${RHASSPY_APP}/profiles/ru/
|
||||
|
||||
COPY profiles/nl/profile.json \
|
||||
profiles/nl/custom_words.txt \
|
||||
profiles/nl/download-profile.sh \
|
||||
profiles/nl/check-profile.sh \
|
||||
profiles/nl/espeak_phonemes.txt \
|
||||
profiles/nl/phoneme_examples.txt \
|
||||
profiles/nl/frequent_words.txt \
|
||||
profiles/nl/sentences.ini \
|
||||
profiles/nl/stop_words.txt ${RHASSPY_APP}/profiles/nl/
|
||||
|
||||
COPY profiles/vi/profile.json \
|
||||
profiles/vi/custom_words.txt \
|
||||
profiles/vi/download-profile.sh \
|
||||
profiles/vi/check-profile.sh \
|
||||
profiles/vi/espeak_phonemes.txt \
|
||||
profiles/vi/phoneme_examples.txt \
|
||||
profiles/vi/frequent_words.txt \
|
||||
profiles/vi/sentences.ini \
|
||||
profiles/vi/stop_words.txt ${RHASSPY_APP}/profiles/vi/
|
||||
|
||||
COPY profiles/pt/profile.json \
|
||||
profiles/pt/custom_words.txt \
|
||||
profiles/pt/download-profile.sh \
|
||||
profiles/pt/check-profile.sh \
|
||||
profiles/pt/espeak_phonemes.txt \
|
||||
profiles/pt/phoneme_examples.txt \
|
||||
profiles/pt/frequent_words.txt \
|
||||
profiles/pt/sentences.ini \
|
||||
profiles/pt/stop_words.txt ${RHASSPY_APP}/profiles/pt/
|
||||
|
||||
COPY profiles/en/profile.json \
|
||||
profiles/en/custom_words.txt \
|
||||
profiles/en/download-profile.sh \
|
||||
profiles/en/check-profile.sh \
|
||||
profiles/en/espeak_phonemes.txt \
|
||||
profiles/en/phoneme_examples.txt \
|
||||
profiles/en/frequent_words.txt \
|
||||
profiles/en/sentences.ini \
|
||||
profiles/en/stop_words.txt ${RHASSPY_APP}/profiles/en/
|
||||
|
||||
COPY profiles/defaults.json ${RHASSPY_APP}/profiles/
|
||||
COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
COPY VERSION ${RHASSPY_APP}/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -14,25 +14,37 @@ COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libfst-dev libfst-tools \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
apt-get install -y bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libfst-dev libfst-tools \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/rhasspy-tools_${BUILD_ARCH}.tar.gz /
|
||||
RUN tar -C /usr -xvf /rhasspy-tools_${BUILD_ARCH}.tar.gz && \
|
||||
rm -f /rhasspy-tools_${BUILD_ARCH}.tar.gz
|
||||
COPY download/phonetisaurus-2019_${BUILD_ARCH}.deb /phonetisaurus.deb
|
||||
RUN dpkg -i /phonetisaurus.deb && \
|
||||
rm /phonetisaurus.deb
|
||||
|
||||
# Install Opengrm
|
||||
COPY download/opengrm-ngram-1.3.3.tar.gz /
|
||||
RUN cd / && tar -xf opengrm-ngram-1.3.3.tar.gz && \
|
||||
cd opengrm-ngram-1.3.3 && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
ldconfig && \
|
||||
rm -rf /opengrm*
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
RUN python3 -m pip install --no-cache-dir wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY download/jsgf2fst-0.1.0.tar.gz \
|
||||
/download/
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
@@ -49,7 +61,15 @@ RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
# Install Mycroft Precise
|
||||
COPY download/precise-engine_0.3.0_${CPU_ARCH}.tar.gz /precise-engine.tar.gz
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
cd / && tar -xzf /precise-engine.tar.gz && \
|
||||
ln -s /precise-engine/precise-engine /usr/bin/precise-engine && \
|
||||
rm /precise-engine.tar.gz; \
|
||||
fi
|
||||
|
||||
RUN apt-get install -y flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
@@ -62,9 +82,6 @@ RUN ldconfig
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
ENV RHASSPY_APP /usr/share/rhasspy
|
||||
|
||||
# Copy script to run
|
||||
@@ -80,12 +97,9 @@ COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -14,25 +14,37 @@ COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libfst-dev libfst-tools \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
apt-get install -y bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libfst-dev libfst-tools \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/rhasspy-tools_${BUILD_ARCH}.tar.gz /
|
||||
RUN tar -C /usr -xvf /rhasspy-tools_${BUILD_ARCH}.tar.gz && \
|
||||
rm -f /rhasspy-tools_${BUILD_ARCH}.tar.gz
|
||||
COPY download/phonetisaurus-2019_${BUILD_ARCH}.deb /phonetisaurus.deb
|
||||
RUN dpkg -i /phonetisaurus.deb && \
|
||||
rm /phonetisaurus.deb
|
||||
|
||||
# Install Opengrm
|
||||
COPY download/opengrm-ngram-1.3.3.tar.gz /
|
||||
RUN cd / && tar -xf opengrm-ngram-1.3.3.tar.gz && \
|
||||
cd opengrm-ngram-1.3.3 && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
ldconfig && \
|
||||
rm -rf /opengrm*
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
RUN python3 -m pip install --no-cache-dir wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY download/jsgf2fst-0.1.0.tar.gz \
|
||||
/download/
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
@@ -49,7 +61,15 @@ RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
# Install Mycroft Precise
|
||||
COPY download/precise-engine_0.3.0_${CPU_ARCH}.tar.gz /precise-engine.tar.gz
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
cd / && tar -xzf /precise-engine.tar.gz && \
|
||||
ln -s /precise-engine/precise-engine /usr/bin/precise-engine && \
|
||||
rm /precise-engine.tar.gz; \
|
||||
fi
|
||||
|
||||
RUN apt-get install -y flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
@@ -62,9 +82,6 @@ RUN ldconfig
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
RUN apt-get install -y pulseaudio
|
||||
COPY etc/pulseaudio.client.conf /etc/pulse/client.conf
|
||||
|
||||
@@ -97,12 +114,9 @@ COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -1,17 +1,16 @@
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
apt-get install -y bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/openfst-1.6.2-1_${BUILD_ARCH}.deb /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
apt-get install --no-install-recommends --yes libfst-dev libfst-tools; \
|
||||
apt-get install -y libfst-dev libfst-tools; \
|
||||
else \
|
||||
dpkg -i /openfst-1.6.2-1_${BUILD_ARCH}.deb; \
|
||||
rm /openfst*.deb; \
|
||||
|
||||
@@ -39,28 +39,28 @@ function set_variables {
|
||||
#------------
|
||||
# From source
|
||||
#------------
|
||||
# set_variables "$DIR/shared/" "$DIR/from-source/" \
|
||||
# "$DIR/alsa/" "$DIR/en_profile/" \
|
||||
# | cat - "$template" | m4 > "$out/Dockerfile.from-source.alsa.en"
|
||||
set_variables "$DIR/shared/" "$DIR/from-source/" \
|
||||
"$DIR/alsa/" "$DIR/en_profile/" \
|
||||
| cat - "$template" | m4 > "$out/Dockerfile.from-source.alsa.en"
|
||||
|
||||
# set_variables "$DIR/shared/" "$DIR/from-source/" \
|
||||
# "$DIR/pulseaudio/" "$DIR/en_profile/" \
|
||||
# | cat - "$template" | m4 > "$out/Dockerfile.from-source.pulseaudio.en"
|
||||
set_variables "$DIR/shared/" "$DIR/from-source/" \
|
||||
"$DIR/pulseaudio/" "$DIR/en_profile/" \
|
||||
| cat - "$template" | m4 > "$out/Dockerfile.from-source.pulseaudio.en"
|
||||
|
||||
# set_variables "$DIR/shared/" "$DIR/from-source/" \
|
||||
# "$DIR/alsa/" "$DIR/all_profiles/" \
|
||||
# | cat - "$template" | m4 > "$out/Dockerfile.from-source.alsa.all"
|
||||
set_variables "$DIR/shared/" "$DIR/from-source/" \
|
||||
"$DIR/alsa/" "$DIR/all_profiles/" \
|
||||
| cat - "$template" | m4 > "$out/Dockerfile.from-source.alsa.all"
|
||||
|
||||
#-----------
|
||||
# Pre-built
|
||||
#-----------
|
||||
# set_variables "$DIR/shared/" "$DIR/prebuilt/" \
|
||||
# "$DIR/alsa/" "$DIR/en_profile/" \
|
||||
# | cat - "$template" | m4 > "$out/Dockerfile.prebuilt.alsa.en"
|
||||
set_variables "$DIR/shared/" "$DIR/prebuilt/" \
|
||||
"$DIR/alsa/" "$DIR/en_profile/" \
|
||||
| cat - "$template" | m4 > "$out/Dockerfile.prebuilt.alsa.en"
|
||||
|
||||
# set_variables "$DIR/shared/" "$DIR/prebuilt/" \
|
||||
# "$DIR/pulseaudio/" "$DIR/en_profile/" \
|
||||
# | cat - "$template" | m4 > "$out/Dockerfile.prebuilt.pulseaudio.en"
|
||||
set_variables "$DIR/shared/" "$DIR/prebuilt/" \
|
||||
"$DIR/pulseaudio/" "$DIR/en_profile/" \
|
||||
| cat - "$template" | m4 > "$out/Dockerfile.prebuilt.pulseaudio.en"
|
||||
|
||||
set_variables "$DIR/shared/" "$DIR/prebuilt/" \
|
||||
"$DIR/alsa/" "$DIR/all_profiles/" \
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
COPY download/rhasspy-tools_${BUILD_ARCH}.tar.gz /
|
||||
RUN tar -C /usr -xvf /rhasspy-tools_${BUILD_ARCH}.tar.gz && \
|
||||
rm -f /rhasspy-tools_${BUILD_ARCH}.tar.gz
|
||||
@@ -1,10 +1,10 @@
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
apt-get install -y bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libfst-dev libfst-tools \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
@@ -1,5 +1,5 @@
|
||||
# Install mitlm
|
||||
RUN apt-get install --no-install-recommends --yes gfortran
|
||||
RUN apt-get install -y gfortran
|
||||
COPY download/mitlm-0.4.2.tar.xz /
|
||||
RUN cd / && tar -xf mitlm-0.4.2.tar.xz && cd mitlm-0.4.2/ && \
|
||||
./configure && \
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
RUN python3 -m pip install --no-cache-dir wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY download/jsgf2fst-0.1.0.tar.gz \
|
||||
/download/
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
|
||||
@@ -3,8 +3,5 @@ COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
COPY VERSION ${RHASSPY_APP}/
|
||||
|
||||
@@ -1 +1 @@
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils lame
|
||||
RUN apt-get install -y flite libttspico-utils
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
theme: jekyll-theme-cayman
|
||||
@@ -2,49 +2,15 @@
|
||||
|
||||
Rhasspy was created and is currently maintained by [Michael Hansen](https://synesthesiam.com/).
|
||||
|
||||
<img src="../img/mike-head.png" style="max-height: 100px;" title="Mike head">
|
||||

|
||||
|
||||
Special thanks to:
|
||||
|
||||
* [Romkabouter](https://github.com/Romkabouter)
|
||||
* [koenvervloesem](https://github.com/koenvervloesem)
|
||||
* [FunkyBoT](https://community.home-assistant.io/u/FunkyBoT)
|
||||
* [fastjack](https://community.rhasspy.org/u/fastjack)
|
||||
* [S_n_Nguy_n](https://community.home-assistant.io/u/S_n_Nguy_n)
|
||||
|
||||
## Motivation
|
||||
|
||||
A typical voice assistant (Alexa, Google Home, etc.) solves a number of important problems:
|
||||
|
||||
1. Deciding when to record audio ([wake word](wake-word.md))
|
||||
2. Listening for voice commands ([command listener](command-listener.md))
|
||||
3. Transcribing command/question ([speech to text](speech-to-text.md))
|
||||
4. Interpreting the speaker's **intent** from the text ([intent recognition](intent-recognition.md))
|
||||
5. Fulfilling the speaker's intent ([intent handling](intent-handling.md))
|
||||
|
||||
Rhasspy provides **offline, private solutions** to problems 1-4 using off-the-shelf tools. These tools are:
|
||||
|
||||
* **Wake word**
|
||||
* [Pocketsphinx keyphrase](https://cmusphinx.github.io/wiki/tutoriallm/#using-keyword-lists-with-pocketsphinx)
|
||||
* [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* [snowboy](https://snowboy.kitt.ai)
|
||||
* [porcupine](https://github.com/Picovoice/Porcupine)
|
||||
* **Command listener**
|
||||
* [webrtcvad](https://github.com/wiseman/py-webrtcvad)
|
||||
* **Speech to text**
|
||||
* [Pocketsphinx](https://github.com/cmusphinx/pocketsphinx)
|
||||
* [Kaldi](https://kaldi-asr.org)
|
||||
* **Intent recognition**
|
||||
* [OpenFST](https://www.openfst.org)
|
||||
* [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy)
|
||||
* [Mycroft Adapt](https://github.com/MycroftAI/adapt)
|
||||
* [flair](http://github.com/zalandoresearch/flair)
|
||||
* [Rasa NLU](https://rasa.com/)
|
||||
|
||||
For problem 5 (fulfilling the speaker's intent), Rhasspy works with external home automation software, such as Home Assistant's built-in [automation capability](https://www.home-assistant.io/docs/automation/) or a [Node-RED flow](https://nodered.org).
|
||||
|
||||
For each intent you define, Rhasspy emits a JSON event that can do anything Home Assistant can do (toggle switches, call REST services, etc.). This means that Rhasspy will do very little out of the box compared to other voice assistants, but there are also be *no limits* to what can be done.
|
||||
|
||||
## Supporting Tools
|
||||
|
||||
The following tools/libraries help to support Rhasspy:
|
||||
@@ -62,13 +28,12 @@ The following tools/libraries help to support Rhasspy:
|
||||
* [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) (word pronunciations)
|
||||
* [PicoTTS](https://en.wikipedia.org/wiki/SVOX) (text to speech)
|
||||
* [Pocketsphinx](https://github.com/cmusphinx/pocketsphinx) (speech to text, wake word)
|
||||
* [porcupine](https://github.com/Picovoice/Porcupine) (wake word)
|
||||
* [PyAudio](https://people.csail.mit.edu/hubert/pyaudio/) (microphone)
|
||||
* [pyjsgf](https://github.com/Danesprite/pyjsgf) (JSGF grammar parsing)
|
||||
* [Python 3](https://www.python.org)
|
||||
* [OpenFST](http://www.openfst.org) (intent recognition)
|
||||
* [Opengrm](http://www.opengrm.org/twiki/bin/view/GRM/NGramLibrary) (language modeling)
|
||||
* [Rasa NLU](https://rasa.com/) (intent recognition)
|
||||
* [RasaNLU](https://rasa.com/) (intent recognition)
|
||||
* [sphinxtrain](https://github.com/cmusphinx/sphinxtrain) (acoustic model tuning)
|
||||
* [snowboy](https://snowboy.kitt.ai) (wake word)
|
||||
* [Sox](http://sox.sourceforge.net) (WAV conversion)
|
||||
|
||||
@@ -22,11 +22,11 @@ Add to your [profile](profiles.md):
|
||||
```
|
||||
|
||||
Set `microphone.pyaudio.device` to a PyAudio device number or leave blank for the default device.
|
||||
Streams 30ms chunks of 16-bit, 16 kHz mono audio by default (480 frames).
|
||||
Streams 30ms chunks of 16-bit, 16 Khz mono audio by default (480 frames).
|
||||
|
||||
See `rhasspy.audio_recorder.PyAudioRecorder` for details.
|
||||
|
||||
## ALSA
|
||||
## ALSA
|
||||
|
||||
Starts an `arecord` process locally and reads audio data from its standard out.
|
||||
Works best with [ALSA](https://www.alsa-project.org/main/index.php/Main_Page).
|
||||
@@ -42,7 +42,7 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Set `microphone.arecord.device` to the name of the ALSA device to use (`-D` flag
|
||||
to `arecord`) or leave blank for the default device.
|
||||
By default, calls `arecord -t raw -r 16000 -f S16_LE -c 1` and reads 30ms (960
|
||||
@@ -52,9 +52,9 @@ See `rhasspy.audio_recorder.ARecordAudioRecorder` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Listens to the `hermes/audioServer/<SITE_ID>/audioFrame` topic for WAV data ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
Listens to the `hermes/audioServer/<SITE_ID>/audioFrame` topic for WAV data ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)).
|
||||
This allows Rhasspy to receive audio from [Snips.AI](https://snips.ai/).
|
||||
Audio data is automatically converted to 16-bit, 16 kHz mono with [sox](http://sox.sourceforge.net).
|
||||
Audio data is automatically converted to 16-bit, 16Khz mono with [sox](http://sox.sourceforge.net).
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -72,72 +72,12 @@ Add to your [profile](profiles.md):
|
||||
"site_id": "default"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Adjust the `mqtt` configuration to connect to your MQTT broker.
|
||||
Set `mqtt.site_id` to match your Snips.AI siteId.
|
||||
|
||||
See `rhasspy.audio_recorder.HermesAudioRecorder` for details.
|
||||
|
||||
## HTTP Stream
|
||||
|
||||
Accepts chunks of 16-bit 16 kHz mono audio via an HTTP POST stream (assumes [chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding)).
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"microphone": {
|
||||
"system": "http",
|
||||
"http": {
|
||||
"host": "127.0.0.1",
|
||||
"port": 12333,
|
||||
"stop_after": "never"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Set `microphone.http.stop_after` to one of "never", "text", or "intent". When set to "never", you can continuously stream (chunked) audio into Rhasspy across multiple voice commands. When set to "text" or "intent", the stream will be closed when the first voice command has been transcribed ("text") or recognized ("intent"). Once closed, you can perform an HTTP GET request to the stream URL to retrieve the result (text for transcriptions or JSON for intent).
|
||||
|
||||
Note that `microphone.http.port` must be different than Rhasspy's webserver port (usually 12101).
|
||||
|
||||
See `rhasspy.audio_recorder.HTTPAudioRecorder` for details.
|
||||
|
||||
## GStreamer
|
||||
|
||||
Receives audio chunks via stdout from a [GStreamer](https://gstreamer.freedesktop.org/) pipeline.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"microphone": {
|
||||
"system": "gstreamer",
|
||||
"gstreamer": {
|
||||
"pipeline": "...",
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Set `microphone.gstreamer.pipeline` to your GStreamer pipeline **without a sink** (this will be added by Rhasspy). By default, the pipeline is:
|
||||
|
||||
```
|
||||
udpsrc port=12333 ! rawaudioparse use-sink-caps=false format=pcm pcm-format=s16le sample-rate=16000 num-channels=1 ! queue ! audioconvert ! audioresample
|
||||
```
|
||||
|
||||
which "simply" receives raw 16-bit 16 kHz audio chunks via UDP port 12333. You could stream microphone audio to Rhasspy from another machine by running the following terminal command:
|
||||
|
||||
```bash
|
||||
gst-launch-1.0 \
|
||||
autoaudiosrc ! \
|
||||
audioconvert ! \
|
||||
audioresample ! \
|
||||
audio/x-raw, rate=16000, channels=1, format=S16LE ! \
|
||||
udpsink host=RHASSPY_SERVER port=12333
|
||||
```
|
||||
|
||||
where `RHASSPY_SERVER` is the hostname of your Rhasspy server (e.g., `localhost`).
|
||||
|
||||
The Rhasspy Docker images contains the ["good" plugin](https://gstreamer.freedesktop.org/data/doc/gstreamer/head/gst-plugins-good-plugins/html/) set for GStreamer, which includes a wide variety of ways to stream/transform audio.
|
||||
|
||||
See `rhasspy.audio_recorder.GStreamerAudioRecorder` for details.
|
||||
|
||||
## Dummy
|
||||
|
||||
@@ -152,3 +92,4 @@ Add to your [profile](profiles.md):
|
||||
```
|
||||
|
||||
See `rhasspy.audio_recorder.DummyAudioRecorder` for details.
|
||||
|
||||
|
||||
@@ -9,44 +9,41 @@ Plays WAV files on the local device by calling the `aplay` command. Should work
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"sounds": {
|
||||
"system": "aplay",
|
||||
"aplay": {
|
||||
"device": ""
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
"sounds": {
|
||||
"system": "aplay",
|
||||
"aplay": {
|
||||
"device": ""
|
||||
}
|
||||
}
|
||||
|
||||
If provided, `sounds.aplay.device` is passed to `aplay` with the `-D` argument.
|
||||
Leave it blank to use the default device.
|
||||
|
||||
See `rhasspy.audio_player.APlayAudioPlayer` for details.
|
||||
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Publishes WAV data to the `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>` topic ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
Publishes WAV data to the `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>` topic ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)).
|
||||
This allows Rhasspy to send audio to [Snips.AI](https://snips.ai/).
|
||||
|
||||
Rhasspy will by default send 16 kHz, 16-bit mono audio, unless specified otherwise.
|
||||
Rhasspy will always try to send 16Khz, 16-bit mono audio.
|
||||
The request id is generated each time a sound is played using `uuid.uuid4`.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"sounds": {
|
||||
"system": "hermes"
|
||||
},
|
||||
|
||||
"mqtt": {
|
||||
"enabled": true,
|
||||
"host": "localhost",
|
||||
"username": "",
|
||||
"port": 1883,
|
||||
"password": "",
|
||||
"site_id": "default"
|
||||
}
|
||||
```
|
||||
"sounds": {
|
||||
"system": "hermes"
|
||||
},
|
||||
|
||||
"mqtt": {
|
||||
"enabled": true,
|
||||
"host": "localhost",
|
||||
"username": "",
|
||||
"port": 1883,
|
||||
"password": "",
|
||||
"site_id": "default"
|
||||
}
|
||||
|
||||
Adjust the `mqtt` configuration to connect to your MQTT broker.
|
||||
Set `mqtt.site_id` to match your Snips.AI siteId.
|
||||
|
||||
@@ -11,6 +11,7 @@ You can also make Rhasspy record a voice command using the [HTTP API](usage.md#h
|
||||
2. Speaking your voice command
|
||||
3. POST-ing to `/api/stop-recording`. Rhasspy will stop recording and process the voice command.
|
||||
|
||||
|
||||
## WebRTCVAD
|
||||
|
||||
Listens for a voice commands using [webrtcvad](https://github.com/wiseman/py-webrtcvad) to detect speech and silence.
|
||||
@@ -32,11 +33,11 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
This system listens for up to `timeout_sec` for a voice command. The first few frames of audio data are discarded (`throwaway_buffers`) to avoid clicks from the microphone being engaged. When speech is detected for some number of successive frames (`speech_buffers`), the voice command is considered to have *started*. After `min_sec`, Rhasspy will start listening for silence. If at least `silence_sec` goes by without any speech detected, the command is considered *finished*, and the recorded WAV data is sent to the [speech recognition system](speech-to-text.md).
|
||||
|
||||
You may want to adjust `min_sec`, `silence_sec`, and `vad_mode` for your environment.
|
||||
These control how short a voice command can be (`min_sec`), how much silence is required before Rhasspy stops listening (`silence_sec`), and how aggressive the voice activity filter `vad_mode` is: this is an integer between 0 and 3. 0 is the least aggressive about filtering out non-speech, 3 is the most aggressive.
|
||||
These control how short a voice command can be (`min_sec`), how much silence is required before Rhasspy stops listening (`silence_sec`), and how sensitive the voice activity detector is (`vad_mode`, higher is more sensitive).
|
||||
|
||||
**NOTE**: you must set `chunk_size` such that (relative to sample rate) it produces 10, 20, or 30 millisecond buffers. This is required by `webrtcvad`.
|
||||
|
||||
@@ -59,15 +60,15 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
See `rhasspy.command_listener.OneShotCommandListener` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Subscribes to the `hermes/asr/startListening` and `hermes/asr/stopListening` topics ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
Subscribes to the `hermes/asr/startListening` and `hermes/asr/stopListening` topics ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)).
|
||||
This allows Rhasspy to be controlled by [Snips.AI](https://snips.ai/).
|
||||
|
||||
Wakes up Rhasspy when `startListening` is received and starts recording. Stops recording when `stopListening` is received and processes the voice command.
|
||||
Wakes up Rhasspy when `startListening` is received and starts recording. Stops recording when `stopListening` is received and processes the voice command.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -95,16 +96,12 @@ Set `mqtt.site_id` to match your Snips.AI siteId.
|
||||
|
||||
Using [mosquitto_pub](https://mosquitto.org/man/mosquitto_pub-1.html), wake up Rhasspy with:
|
||||
|
||||
```bash
|
||||
mosquitto_pub -t 'hermes/asr/startListening' -m '{ "siteId": "default" }'
|
||||
```
|
||||
|
||||
mosquitto_pub -t 'hermes/asr/startListening' -m '{ "siteId": "default" }'
|
||||
|
||||
Say your voice command, then stop recording with:
|
||||
|
||||
```bash
|
||||
mosquitto_pub -t 'hermes/asr/stopListening' -m '{ "siteId": "default" }'
|
||||
```
|
||||
|
||||
mosquitto_pub -t 'hermes/asr/stopListening' -m '{ "siteId": "default" }'
|
||||
|
||||
Rhasspy should process your voice command.
|
||||
|
||||
See `rhasspy.command.HermesCommandListener` for details.
|
||||
@@ -127,7 +124,7 @@ Add to your [profile](profiles.md):
|
||||
|
||||
When awake, Rhasspy normally listens for voice commands from the microphone and waits for silence by using [webrtcvad](https://github.com/wiseman/py-webrtcvad). You can call a custom program that will listen for a voice command and simply return the recorded WAV audio data to Rhasspy.
|
||||
|
||||
When Rhasspy wakes up, your program will be called with the given arguments. The program's output should be WAV data with the recorded voice command (Rhasspy will automatically convert this to 16-bit 16 kHz mono if necessary).
|
||||
When Rhasspy wakes up, your program will be called with the given arguments. The program's output should be WAV data with the recorded voice command (Rhasspy will automatically convert this to 16-bit 16Khz mono if necessary).
|
||||
|
||||
The following environment variables are available to your program:
|
||||
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
# Development
|
||||
|
||||
Rhasspy's code can be found [on GitHub](https://github.com/synesthesiam/rhasspy).
|
||||
|
||||
## Set up your development environment
|
||||
|
||||
If you want to start developing on Rhasspy, [fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the repository, and clone your fork:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/<your_username>/rhasspy.git
|
||||
cd rhasspy
|
||||
```
|
||||
|
||||
Add the original repository as an [upstream remote](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/configuring-a-remote-for-a-fork):
|
||||
|
||||
```bash
|
||||
git remote add upstream https://github.com/synesthesiam/rhasspy.git
|
||||
```
|
||||
|
||||
Then follow the installation steps for a [virtual environment](installation.md#virtual-environment). If the `create-venv.sh` script fails, please [report an issue](https://github.com/synesthesiam/rhasspy/issues) before proceeding.
|
||||
|
||||
If you pull changes, make sure to re-download and extract `rhasspy-web-dist.tar.gz` from [the releases page](https://github.com/synesthesiam/rhasspy/releases/tag/v2.0). This contains the pre-compiled web artifacts. Alternatively, you can install [yarn](https://yarnpkg.com) and run `yarn build` in the `rhasspy` directory after a `git pull`.
|
||||
|
||||
## Run the unit tests
|
||||
|
||||
A good start to check whether your development environment is set up correctly (or to find some bugs) is to run the unit tests:
|
||||
|
||||
```bash
|
||||
./run-tests.sh
|
||||
```
|
||||
|
||||
This will run tests against pre-recorded WAV files in `rhasspy/etc/test` for specific languages. You can run tests only for a specific language (profile) like this:
|
||||
|
||||
```bash
|
||||
./run-tests.sh -p en
|
||||
```
|
||||
|
||||
It’s good practice to run the unit tests before and after you work on something, to be sure your changes don't accidentally break something.
|
||||
|
||||
## Keeping your fork synchronized
|
||||
|
||||
When the upstream repository has new commits, you should [synchronize your fork](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/syncing-a-fork):
|
||||
|
||||
```bash
|
||||
git fetch upstream
|
||||
git checkout master
|
||||
git merge upstream/master
|
||||
```
|
||||
|
||||
Then [update your fork on GitHub](https://help.github.com/en/github/using-git/pushing-commits-to-a-remote-repository):
|
||||
|
||||
```bash
|
||||
git push
|
||||
```
|
||||
|
||||
Your fork is now synchronized to the original repository.
|
||||
|
||||
## Development practices
|
||||
|
||||
* Before starting significant work, please propose it and discuss it first on the [issue tracker](https://github.com/synesthesiam/rhasspy/issues) on GitHub. Other people may have suggestions, will want to collaborate and will wish to review your code.
|
||||
* Please work on one piece of conceptual work at a time. Keep each narrative of work in a different branch.
|
||||
* As much as possible, have each commit solve one problem.
|
||||
* A commit must not leave the project in a non-functional state.
|
||||
* Run the unit tests before you create a commit.
|
||||
* Treat code, tests and documentation as one.
|
||||
* Create a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork) from your fork.
|
||||
|
||||
## Development workflow
|
||||
|
||||
If you want to start working on a specific feature or bug fix, this is an example workflow:
|
||||
|
||||
* Synchronize your fork with the upstream repository.
|
||||
* Create a new branch: `git checkout -b <nameofbranch>`
|
||||
* Create your changes.
|
||||
* Add the changed files with `git add <files>`.
|
||||
* Commit your changes with `git commit`.
|
||||
* Push your changes to your fork on GitHub.
|
||||
* Create a pull request from your fork.
|
||||
|
||||
## License of contributions
|
||||
|
||||
By submitting patches to this project, you agree to allow them to be redistributed under the project’s [license](license.md) according to the normal forms and usages of the open source community.
|
||||
|
||||
It is your responsibility to make sure you have all the necessary rights to contribute to the project.
|
||||
@@ -4,36 +4,12 @@ Rhasspy is designed to be run on different kinds of hardware, such as:
|
||||
|
||||
* Raspberry Pi 2-3 B/B+ (`armhf`/`aarch64`)
|
||||
* Desktop/laptop/server (`amd64`)
|
||||
* Raspberry Pi Zero (`armv6l`)
|
||||
* You must use a [virtual environment](installation.md#virtual-environment)
|
||||
* The [Kaldi speech recognizer](speech-to-text.md#kaldi) is **not** supported
|
||||
|
||||
The table below summarizes architecture compatibility with Rhasspy's components:
|
||||
|
||||
| Category | Name | amd64 | armhf | aarch64 |
|
||||
| -------- | ------ | ------- | ------- | ------- |
|
||||
| **Wake Word** | [pocketsphinx](wake-word.md#pocketsphinx) | ✓ | ✓ | ✓ |
|
||||
| | [snowboy](wake-word.md#snowboy) | ✓ | ✓ | |
|
||||
| | [precise](wake-word.md#mycroft-precise) | ✓ | ✓ | |
|
||||
| | [porcupine](wake-word.md#porcupine) | ✓ | ✓ | ✓ |
|
||||
| **Speech to Text** | [pocketsphinx](speech-to-text.md#pocketsphinx) | ✓ | ✓ | ✓ |
|
||||
| | [kaldi](speech-to-text.md#kaldi) | ✓ | ✓ | ✓ |
|
||||
| **Intent Recognition** | [fsticuffs](intent-recognition.md#fsticuffs) | ✓ | ✓ | ✓ |
|
||||
| | [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) | ✓ | ✓ | ✓ |
|
||||
| | [adapt](intent-recognition.md#mycroft-adapt) | ✓ | ✓ | ✓ |
|
||||
| | [flair](intent-recognition.md#flair) | ✓ | | |
|
||||
| | [rasaNLU](intent-recognition.md#rasanlu) | ✓ | ✓ | ✓ |
|
||||
| **Text to Speech** | [espeak](text-to-speech.md#espeak) | ✓ | ✓ | ✓ |
|
||||
| | [flite](text-to-speech.md#flite) | ✓ | ✓ | ✓ |
|
||||
| | [picotts](text-to-speech.md#picotts) | ✓ | ✓ | ✓ |
|
||||
| | [marytts](text-to-speech.md#marytts) | ✓ | ✓ | ✓ |
|
||||
| | [wavenet](text-to-speech.md#google-wavenet) | ✓ | ✓ | ✓ |
|
||||
|
||||
## Raspberry Pi
|
||||
|
||||
To run Rhasspy on a Raspberry Pi, you'll need at least a 4 GB SD card and a good power supply. I highly recommend the [CanaKit Starter Kit](https://www.amazon.com/CanaKit-Raspberry-Starter-Premium-Black/dp/B07BCC8PK7), which includes a 32 GB SD card, a 2.5 A power supply, and a case.
|
||||
|
||||
Some components of Rhasspy will not work on the Raspberry Pi 3 B+ model with a 64-bit operating system (`aarch64`). As of the time of this writing, these are:
|
||||
Some components of Rhasspy will not work on the Raspberry Pi 3 B+ model (`aarch64`). As of the time of this writing, these are:
|
||||
|
||||
* [snowboy](wake-word.md#snowboy) (wake word)
|
||||
* [Mycroft Precise](wake-word.md#mycroft-precise) (wake word)
|
||||
|
||||
|
Before Width: | Height: | Size: 20 KiB |
@@ -1,140 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="149.42726mm"
|
||||
height="36.848656mm"
|
||||
viewBox="0 0 149.42726 36.848656"
|
||||
version="1.1"
|
||||
id="svg860"
|
||||
inkscape:version="0.92.3 (2405546, 2018-03-11)"
|
||||
sodipodi:docname="rhasspy-discourse-logo.svg"
|
||||
inkscape:export-filename="./rhasspy-discourse-logo.png"
|
||||
inkscape:export-xdpi="82.716721"
|
||||
inkscape:export-ydpi="82.716721">
|
||||
<defs
|
||||
id="defs854" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="0.9899495"
|
||||
inkscape:cx="268.11251"
|
||||
inkscape:cy="139.11788"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
fit-margin-top="0"
|
||||
fit-margin-left="0"
|
||||
fit-margin-right="0"
|
||||
fit-margin-bottom="0"
|
||||
inkscape:window-width="1440"
|
||||
inkscape:window-height="755"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-maximized="1" />
|
||||
<metadata
|
||||
id="metadata857">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(47.552776,-100.1735)">
|
||||
<circle
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="path1476"
|
||||
cx="-29.128448"
|
||||
cy="118.59783"
|
||||
r="18.174328" />
|
||||
<g
|
||||
transform="matrix(0.80207931,0,0,0.80207931,-74.139422,96.215375)"
|
||||
id="g2275">
|
||||
<g
|
||||
id="text817"
|
||||
style="font-style:normal;font-weight:normal;font-size:41.37965775px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1.03449142"
|
||||
transform="rotate(-45)"
|
||||
aria-label="R">
|
||||
<path
|
||||
sodipodi:nodetypes="ccccccccccccccccssccccccccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path819"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:41.38083267px;font-family:'CC Adamantium';-inkscape-font-specification:'CC Adamantium, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:1.03449142"
|
||||
d="M 31.509252,62.491941 31.16794,75.667305 H 30.83455 L 28.604505,65.027061 15.316738,59.73528 13.781121,75.667305 H 13.286086 L 11.528138,71.268365 10.110857,66.929543 9.526899,61.048002 6.9616279,56.651114 8.7144255,54.299084 6.356732,51.899807 8.2521138,50.246675 6.1006224,45.789404 9.891565,42.358563 c 2.435726,-1.492588 4.806268,-0.545105 7.30443,-1.317335 4.174203,-1.290327 7.29492,-1.792422 11.275957,5.059621 0.756691,1.302392 3.239334,1.578749 4.130578,3.198298 -0.882306,1.555823 -2.064327,2.923061 -3.546063,4.101714 -1.481735,1.171918 -3.152055,2.175457 -5.01096,3.010617 -1.852169,0.828425 -3.852512,1.535617 -6.001029,2.121576 z M 25.51612,48.388298 c -6.142518,4.42909 -6.341445,-0.106922 -8.663766,-3.716207 l -1.283048,13.860963 c 5.545523,-1.913183 8.340713,-6.051669 9.946814,-10.144756 z" />
|
||||
</g>
|
||||
<ellipse
|
||||
ry="0.93544334"
|
||||
rx="0.33408689"
|
||||
cy="21.859995"
|
||||
cx="52.059788"
|
||||
id="path2115"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<ellipse
|
||||
transform="rotate(-45)"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
id="ellipse2117"
|
||||
cx="18.873178"
|
||||
cy="50.914211"
|
||||
rx="0.33408689"
|
||||
ry="0.93544334" />
|
||||
<path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2119"
|
||||
d="m 64.331743,23.950737 -0.788701,-2.167883 0.785662,-0.376444 0.715334,2.441702 z"
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 69.630908,29.701094 1.48309,-1.766977 0.718843,0.492181 -1.75691,1.840348 z"
|
||||
id="path2121"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
sodipodi:nodetypes="cscc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2123"
|
||||
d="m 47.978861,19.145376 c -0.0362,0.284741 -0.632118,0.443544 -1.331028,0.354698 -0.698909,-0.08885 -1.236142,-0.391701 -1.199944,-0.676442 z"
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<path
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
d="m 49.344113,18.846496 c 0.224679,0.178626 0.762248,-0.123625 1.200693,-0.675116 0.438451,-0.55148 0.611752,-1.143345 0.387075,-1.321972 z"
|
||||
id="path2126"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="cscc" />
|
||||
<path
|
||||
sodipodi:nodetypes="ccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2128"
|
||||
d="m 43.707615,19.788656 8.68626,10.557147 c 2.944473,-4.699489 1.792375,-9.398979 -0.200452,-14.098468"
|
||||
style="opacity:1;fill:none;stroke:#808080;stroke-width:0.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
</g>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:30.12816238px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5"
|
||||
x="-5.9640822"
|
||||
y="128.49496"
|
||||
id="text824"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan822"
|
||||
x="-5.9640822"
|
||||
y="128.49496"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:30.12828064px;font-family:'Sansus Webissimo';-inkscape-font-specification:'Sansus Webissimo, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#ffffff;stroke:#000000;stroke-width:0.5">RHASSPY</tspan></text>
|
||||
</g>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 6.9 KiB |
|
Before Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 38 KiB |
@@ -1,123 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="36.848656mm"
|
||||
height="36.848656mm"
|
||||
viewBox="0 0 36.848656 36.848656"
|
||||
version="1.1"
|
||||
id="svg860"
|
||||
inkscape:version="0.92.3 (2405546, 2018-03-11)"
|
||||
sodipodi:docname="rhasspy-raven-square.svg"
|
||||
inkscape:export-filename="./rhasspy-discourse-square-logo-nocircle.png"
|
||||
inkscape:export-xdpi="352.92468"
|
||||
inkscape:export-ydpi="352.92468">
|
||||
<defs
|
||||
id="defs854" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="1.979899"
|
||||
inkscape:cx="-98.08577"
|
||||
inkscape:cy="43.808495"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
fit-margin-top="0"
|
||||
fit-margin-left="0"
|
||||
fit-margin-right="0"
|
||||
fit-margin-bottom="0"
|
||||
inkscape:window-width="1440"
|
||||
inkscape:window-height="755"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-maximized="1" />
|
||||
<metadata
|
||||
id="metadata857">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(47.552776,-100.1735)">
|
||||
<g
|
||||
transform="matrix(0.80207931,0,0,0.80207931,-74.139422,96.215375)"
|
||||
id="g2275">
|
||||
<g
|
||||
id="text817"
|
||||
style="font-style:normal;font-weight:normal;font-size:41.37965775px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1.03449142"
|
||||
transform="rotate(-45)"
|
||||
aria-label="R">
|
||||
<path
|
||||
sodipodi:nodetypes="ccccccccccccccccssccccccccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path819"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:41.38083267px;font-family:'CC Adamantium';-inkscape-font-specification:'CC Adamantium, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:1.03449142"
|
||||
d="M 31.509252,62.491941 31.16794,75.667305 H 30.83455 L 28.604505,65.027061 15.316738,59.73528 13.781121,75.667305 H 13.286086 L 11.528138,71.268365 10.110857,66.929543 9.526899,61.048002 6.9616279,56.651114 8.7144255,54.299084 6.356732,51.899807 8.2521138,50.246675 6.1006224,45.789404 9.891565,42.358563 c 2.435726,-1.492588 4.806268,-0.545105 7.30443,-1.317335 4.174203,-1.290327 7.29492,-1.792422 11.275957,5.059621 0.756691,1.302392 3.239334,1.578749 4.130578,3.198298 -0.882306,1.555823 -2.064327,2.923061 -3.546063,4.101714 -1.481735,1.171918 -3.152055,2.175457 -5.01096,3.010617 -1.852169,0.828425 -3.852512,1.535617 -6.001029,2.121576 z M 25.51612,48.388298 c -6.142518,4.42909 -6.341445,-0.106922 -8.663766,-3.716207 l -1.283048,13.860963 c 5.545523,-1.913183 8.340713,-6.051669 9.946814,-10.144756 z" />
|
||||
</g>
|
||||
<ellipse
|
||||
ry="0.93544334"
|
||||
rx="0.33408689"
|
||||
cy="21.859995"
|
||||
cx="52.059788"
|
||||
id="path2115"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<ellipse
|
||||
transform="rotate(-45)"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
id="ellipse2117"
|
||||
cx="18.873178"
|
||||
cy="50.914211"
|
||||
rx="0.33408689"
|
||||
ry="0.93544334" />
|
||||
<path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2119"
|
||||
d="m 64.331743,23.950737 -0.788701,-2.167883 0.785662,-0.376444 0.715334,2.441702 z"
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 69.630908,29.701094 1.48309,-1.766977 0.718843,0.492181 -1.75691,1.840348 z"
|
||||
id="path2121"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
sodipodi:nodetypes="cscc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2123"
|
||||
d="m 47.978861,19.145376 c -0.0362,0.284741 -0.632118,0.443544 -1.331028,0.354698 -0.698909,-0.08885 -1.236142,-0.391701 -1.199944,-0.676442 z"
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<path
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
d="m 49.344113,18.846496 c 0.224679,0.178626 0.762248,-0.123625 1.200693,-0.675116 0.438451,-0.55148 0.611752,-1.143345 0.387075,-1.321972 z"
|
||||
id="path2126"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="cscc" />
|
||||
<path
|
||||
sodipodi:nodetypes="ccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2128"
|
||||
d="m 43.707615,19.788656 8.68626,10.557147 c 2.944473,-4.699489 1.792375,-9.398979 -0.200452,-14.098468"
|
||||
style="opacity:1;fill:none;stroke:#808080;stroke-width:0.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 5.8 KiB |
|
Before Width: | Height: | Size: 181 KiB |
|
Before Width: | Height: | Size: 38 KiB After Width: | Height: | Size: 65 KiB |
|
Before Width: | Height: | Size: 96 KiB |
|
Before Width: | Height: | Size: 42 KiB After Width: | Height: | Size: 76 KiB |
|
Before Width: | Height: | Size: 45 KiB After Width: | Height: | Size: 94 KiB |
|
Before Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 45 KiB After Width: | Height: | Size: 73 KiB |
|
Before Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 50 KiB |
|
Before Width: | Height: | Size: 55 KiB After Width: | Height: | Size: 85 KiB |
@@ -1,55 +1,41 @@
|
||||
<img src="img/rhasspy.svg" style="max-height: 200px;" title="Rhasspy logo">
|
||||

|
||||
|
||||
Rhasspy (pronounced RAH-SPEE) is an [open source](https://github.com/synesthesiam/rhasspy), fully offline voice assistant toolkit for [many languages](#supported-languages) that works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
|
||||
Rhasspy (pronounced RAH-SPEE) is an offline, [multilingual](#supported-languages) voice assistant toolkit inspired by [Jasper](https://jasperproject.github.io/) that works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
|
||||
|
||||
You specify voice commands in a [template language](training.md):
|
||||
Rhasspy transforms voice commands into [JSON](https://json.org) events that can trigger actions in home automation software, like [Home Assistant automations](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](usage.md#node-red). You define custom voice commands in a [profile](profiles.md) using a [specialized template syntax](training.md), and Rhasspy takes care of the rest.
|
||||
|
||||
```
|
||||
[LightState]
|
||||
states = (on | off)
|
||||
turn (<states>){state} [the] light
|
||||
```
|
||||
## Motivation
|
||||
|
||||
and Rhasspy will produce [JSON](https://json.org) events that can trigger actions in [home automation software](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](usage.md#node-red):
|
||||
A typical voice assistant (Alexa, Google Home, etc.) solves a number of important problems:
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "turn on the light",
|
||||
"intent": {
|
||||
"name": "LightState"
|
||||
},
|
||||
"slots": {
|
||||
"state": "on"
|
||||
}
|
||||
}
|
||||
```
|
||||
1. Deciding when to record audio ([wake word](wake-word.md))
|
||||
2. Listening for voice commands ([command listener](command-listener.md))
|
||||
3. Transcribing command/question ([speech to text](speech-to-text.md))
|
||||
4. Interpreting the speaker's **intent** from the text ([intent recognition](intent-recognition.md))
|
||||
5. Fulfilling the speaker's intent ([intent handling](intent-handling.md))
|
||||
|
||||
Rhasspy is <strong>optimized for</strong>:
|
||||
Rhasspy provides **offline, private solutions** to problems 1-4 using off-the-shelf tools. These tools are:
|
||||
|
||||
* Working with external services via [MQTT](usage.md#mqtt), [HTTP](usage.md#http-api), and [Websockets](usage.md#websocket-events)
|
||||
* Home Assistant and Hass.IO have [built-in support](usage.md#home-assistant)
|
||||
* Pre-specified voice commands that are described well [by a grammar](training.md#sentencesini)
|
||||
* You can also do [open-ended speech recognition](speech-to-text.md#open-transcription)
|
||||
* Voice commands with [uncommon words or pronunciations](usage.md#words-tab)
|
||||
* New words are added phonetically with [automated assistance](https://github.com/AdolfVonKleist/Phonetisaurus)
|
||||
* **Wake word**
|
||||
* [Pocketsphinx keyphrase](https://cmusphinx.github.io/wiki/tutoriallm/#using-keyword-lists-with-pocketsphinx)
|
||||
* [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* [snowboy](https://snowboy.kitt.ai)
|
||||
* **Command listener**
|
||||
* [webrtcvad](https://github.com/wiseman/py-webrtcvad)
|
||||
* **Speech to text**
|
||||
* [Pocketsphinx](https://github.com/cmusphinx/pocketsphinx)
|
||||
* [Kaldi](https://kaldi-asr.org)
|
||||
* **Intent recognition**
|
||||
* [OpenFST](https://www.openfst.org)
|
||||
* [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy)
|
||||
* [Mycroft Adapt](https://github.com/MycroftAI/adapt)
|
||||
* [RasaNLU](https://rasa.com/)
|
||||
|
||||
## Getting Started
|
||||
|
||||
Ready to try Rhasspy? Follow the steps below and check out the [tutorials](tutorials.md).
|
||||
|
||||
1. Make sure you have the [necessary hardware](hardware.md)
|
||||
2. Choose an [installation method](installation.md)
|
||||
3. Access the [web interface](usage.md#web-interface) to download a profile
|
||||
4. Author your [custom voice commands](training.md) and train Rhasspy
|
||||
5. Connect Rhasspy to [Home Assistant](usage.md#home-assistant) or a [Node-RED](usage.md#node-red) flow
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you have problems, please stop by the [Rhasspy community site](https://community.rhasspy.org) or [open a GitHub issue](https://github.com/synesthesiam/rhasspy/issues).
|
||||
For problem 5 (fulfilling the speaker's intent), Rhasspy works with external home automation software, such as Home Assistant's built-in [automation capability](https://www.home-assistant.io/docs/automation/) or a [Node-RED flow](https://nodered.org). For each intent you define, Rhasspy emits a JSON event that can, for example, be used to do anything Home Assistant can do (toggle switches, call REST services, etc.). This means that Rhasspy will do very little out of the box compared to other voice assistants, but there are also be *no limits* to what can be done.
|
||||
|
||||
## Supported Languages
|
||||
|
||||
Rhasspy supports the following languages:
|
||||
Rhasspy currently supports the following languages:
|
||||
|
||||
* English (`en`)
|
||||
* German (`de`)
|
||||
@@ -63,10 +49,124 @@ Rhasspy supports the following languages:
|
||||
* Mandarin (`zh`)
|
||||
* Vietnamese (`vi`)
|
||||
* Portuguese (`pt`)
|
||||
* Swedish (`sv`)
|
||||
* Catalan (`ca`)
|
||||
|
||||
## Intended Audience
|
||||
Support for these languages comes directly from existing [CMU Sphinx](https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/) and [Kaldi](https://montreal-forced-aligner.readthedocs.io/en/latest/pretrained_models.html) acoustic models.
|
||||
|
||||
It is possible to extend Rhasspy to new languages with only:
|
||||
|
||||
* A [phonetic dictionary](https://cmusphinx.github.io/wiki/tutorialdict/#using-g2p-seq2seq-to-extend-the-dictionary)
|
||||
* A trained [acoustic model](https://cmusphinx.github.io/wiki/tutorialam/)
|
||||
* A [grapheme to phoneme model](https://github.com/AdolfVonKleist/Phonetisaurus)
|
||||
|
||||
The table below summarizes language support across the various supporting technologies that Rhasspy uses:
|
||||
|
||||
| Category | Name | Offline? | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt |
|
||||
| -------- | ------ | -------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| **Wake Word** | [pocketsphinx](wake-word.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | |
|
||||
| | [snowboy](wake-word.md#snowboy) | *requires account* | ✓ | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| | [precise](wake-word.md#mycroft-precise) | ✓ | ✓ | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| **Speech to Text** | [pocketsphinx](speech-to-text.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | |
|
||||
| | [kaldi](speech-to-text.md#kaldi) | ✓ | | | | | | | | | | | ✓ | ✓ |
|
||||
| **Intent Recognition** | [fsticuffs](intent-recognition.md#fsticuffs) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [adapt](intent-recognition.md#mycroft-adapt) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flair](intent-recognition.md#flair) | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | | | | | ✓ |
|
||||
| | [rasaNLU](intent-recognition.md#rasanlu) | *needs extra software* | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| **Text to Speech** | [espeak](text-to-speech.md#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flite](text-to-speech.md#flite) | ✓ | ✓ | | | | | | | | ✓ | | | |
|
||||
| | [picotts](text-to-speech.md#picotts) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [marytts](text-to-speech.md#marytts) | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | |
|
||||
| | [wavenet](text-to-speech.md#google-wavenet) | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ |
|
||||
|
||||
• - yes, but requires training/customization
|
||||
|
||||
## How It Works
|
||||
|
||||
Rhasspy starts off asleep, listening for a [wake word](wake-word.md). Once awoken, it listens for a [voice command](command-listener.md). After recording the command, its transcribed with the [speech to text](speech-to-text.md) system into text, which is then run through an [intent recognizer](intent-recognition.md). Finally, the recognized intent is used to generate an event that can be [handled by Home Assistant or Node-RED](intent-handling.md).
|
||||
|
||||

|
||||
|
||||
## Customization
|
||||
|
||||
Every step of Rhasspy's processing pipeline can be customized, including using a remote Rhasspy server via its [HTTP API](usage.md#http-api) for [speech to text](speech-to-text.md#remote-http-server) and [intent recognition](intent-recognition.md#remote-http-server). Some useful Rhasspy API endpoints are:
|
||||
|
||||
* `/api/listen-for-command`
|
||||
* POST to wake Rhasspy up and start listening for a voice command
|
||||
* `/api/train`
|
||||
* POST to re-train your profile
|
||||
* `/api/speech-to-intent`
|
||||
* POST a WAV file and have Rhasspy process it as a voice command
|
||||
* `/api/text-to-intent`
|
||||
* POST text and have Rhasspy process it as command
|
||||
* `/api/text-to-speech`
|
||||
* POST text and have Rhasspy speak it
|
||||
|
||||
Additionally, you can call out to a custom external program for [wake word detection](wake-word.md#command), [voice command listening](command-listener.md#command), [speech recognition](speech-to-text.md#command), [intent recognition](intent-recognition.md#command), and event [intent handling](intent-handling.md#command)! This means that you can use Rhasspy as a general voice command toolkit, with or without Home Assistant.
|
||||
|
||||
## RGB Light Example
|
||||
|
||||
Let's say you have an RGB light of some kind in your bedroom that's [hooked up already to Home Assistant](https://www.home-assistant.io/components/light.mqtt). You'd like to be able to say things like "*set the bedroom light to red*" to change its color. To start, let's write a [Home Assistant automation](https://www.home-assistant.io/docs/automation/action/) to help you out:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
...
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
Now you just need the trigger! Rhasspy will send events that can be caught with the [event trigger platform](https://www.home-assistant.io/docs/automation/trigger/#event-trigger). A different event will be sent for each *intent* that you define, with slot values corresponding to important parts of the command (like light name and color). Let's start by defining an intent in Rhasspy called `ChangeLightColor` that can be said a few different ways:
|
||||
|
||||
[ChangeLightColor]
|
||||
colors = (red | green | blue) {color}
|
||||
set [the] (bedroom){name} [to] <colors>
|
||||
|
||||
This is a [simplified JSGF grammar](doc/sentences/md) that will generate the following sentences:
|
||||
|
||||
* set the bedroom to red
|
||||
* set the bedroom to green
|
||||
* set the bedroom to blue
|
||||
* set the bedroom red
|
||||
* set the bedroom green
|
||||
* set the bedroom blue
|
||||
* set bedroom to red
|
||||
* set bedroom to green
|
||||
* set bedroom to blue
|
||||
* set bedroom red
|
||||
* set bedroom green
|
||||
* set bedroom blue
|
||||
|
||||
Rhasspy uses these sentences to create an [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) for speech recognition, and also train an intent recognizer that can extract relevant parts of the command. The `{color}` tag in the `colors` rule will make Rhasspy put a `color` property in each event with the name of the recognized color (red, green, or blue). Likewise, the `{name}` tag on `bedroom` will add a `name` property to the event.
|
||||
|
||||
If trained on these sentences, Rhasspy will now recognize commands like "*set the bedroom light to red*" and send a `rhasspy_ChangeLightState` to Home Assistant with the following data:
|
||||
|
||||
{
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
|
||||
You can now fill in the rest of the Home Assistant automation:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
platform: event
|
||||
event_type: rhasspy_ChangeLightState
|
||||
event_data:
|
||||
name: bedroom
|
||||
color: red
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
This will handle the specific case of setting the bedroom light to red, but not any other color. You can either add additional automations to handle these, or make use of [automation templating](https://www.home-assistant.io/docs/automation/templating/) to do it all at once.
|
||||
|
||||
Intended Audience
|
||||
---------------------
|
||||
|
||||
Rhasspy is intended for advanced users that want to have a voice interface to Home Assistant, but value **privacy** and **freedom** above all else. There are many other voice assistants, but none (to my knowledge) that:
|
||||
|
||||
|
||||
@@ -2,22 +2,20 @@
|
||||
|
||||
Rhasspy should run in a variety of software environments, including:
|
||||
|
||||
* Within a [Docker](#docker) container
|
||||
* As a [Hass.io add-on](#hassio)
|
||||
* Inside a [Python virtual environment](#virtual-environment)
|
||||
* Running as a [service](#running-as-a-service)
|
||||
* Build [from source](#build-from-source)
|
||||
* Within a [Docker](https://www.docker.com/) container
|
||||
* As a [Hass.IO add-on](https://www.home-assistant.io/addons/)
|
||||
* Inside a [Python virtual environment](https://docs.python-guide.org/dev/virtualenvs/)
|
||||
|
||||
## Docker
|
||||
### Docker
|
||||
|
||||
The easiest way to try Rhasspy is with Docker. To get started, make sure you have [Docker installed](https://docs.docker.com/install/):
|
||||
|
||||
curl -sSL https://get.docker.com | sh
|
||||
|
||||
|
||||
and that your user is part of the `docker` group:
|
||||
|
||||
sudo usermod -a -G docker $USER
|
||||
|
||||
|
||||
**Be sure to reboot** after adding yourself to the `docker` group!
|
||||
|
||||
Next, start the [Rhasspy Docker image](https://hub.docker.com/r/synesthesiam/rhasspy-server) in the background:
|
||||
@@ -29,9 +27,9 @@ Next, start the [Rhasspy Docker image](https://hub.docker.com/r/synesthesiam/rha
|
||||
synesthesiam/rhasspy-server:latest \
|
||||
--user-profiles /profiles \
|
||||
--profile en
|
||||
|
||||
|
||||
This will start Rhasspy with the English profile (`en`) in the background (`-d`) on port 12101 (`-p`) and give Rhasspy access to your microphone (`--device`). Any changes you make to [your profile](profiles.md) will be saved to `~/.config/rhasspy`.
|
||||
|
||||
|
||||
Once it starts, Rhasspy's web interface should be accessible at [http://localhost:12101](http://localhost:12101). If something went wrong, trying running docker with `-it` instead of `-d` to see the output.
|
||||
|
||||
If you're using [docker compose](https://docs.docker.com/compose/), add the following to your `docker-compose.yml` file:
|
||||
@@ -46,25 +44,10 @@ If you're using [docker compose](https://docs.docker.com/compose/), add the foll
|
||||
devices:
|
||||
- "/dev/snd:/dev/snd"
|
||||
command: --user-profiles /profiles --profile en
|
||||
|
||||
### Updating Docker Image
|
||||
|
||||
To update your Rhasspy Docker image, just run:
|
||||
### Hass.IO
|
||||
|
||||
```bash
|
||||
docker pull synesthesiam/rhasspy-server:latest
|
||||
```
|
||||
on your Rhasspy server and restart the Docker container. This may require running something like:
|
||||
|
||||
```bash
|
||||
docker rm <container-name>
|
||||
```
|
||||
|
||||
before doing a `docker run...`
|
||||
|
||||
## Hass.io
|
||||
|
||||
The second easiest way to install Rhasspy is as a [Hass.io add-on](https://www.home-assistant.io/addons/). Follow the [installation instructions for Hass.io](https://www.home-assistant.io/hassio/installation/) before proceeding.
|
||||
The second easiest was to install Rhasspy is as a [Hass.IO add-on](https://www.home-assistant.io/addons/). Following the [installation instructions for Hass.IO](https://www.home-assistant.io/hassio/installation/) before proceeding.
|
||||
|
||||
To install the add-on, add my [Hass.IO Add-On Repository](https://github.com/synesthesiam/hassio-addons) in the Add-On Store, refresh, then install the "Rhasspy Assistant" under “Synesthesiam Hass.IO Add-Ons” (all the way at the bottom of the Add-On Store screen).
|
||||
|
||||
@@ -76,129 +59,37 @@ Watch the system log for a message like `Build 8e35c251/armhf-addon-rhasspy:2.11
|
||||
|
||||
Before starting the add-on, make sure to give it access to your microphone and speakers:
|
||||
|
||||

|
||||

|
||||
|
||||
### Updating Hass.IO Add-On
|
||||
|
||||
You should receive notifications when a new version of Rhasspy is available for Hass.IO. Follow the instructions from Hass.IO on how to update the add-on.
|
||||
|
||||
## Virtual Environment
|
||||
### Virtual Environment
|
||||
|
||||
Rhasspy can be installed into a Python virtual environment, though there are a number of requirements. This may be desirable, however, if you have trouble getting Rhasspy to access your microphone from within a Docker container. To start, clone the repo somewhere:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/synesthesiam/rhasspy.git
|
||||
```
|
||||
|
||||
git clone https://github.com/synesthesiam/rhasspy.git
|
||||
|
||||
Then run the `download-dependencies.sh` and `create-venv.sh` scripts (assumes a Debian distribution):
|
||||
|
||||
```bash
|
||||
cd rhasspy/
|
||||
./download-dependencies.sh
|
||||
./create-venv.sh
|
||||
```
|
||||
|
||||
cd rhasspy/
|
||||
./download-dependencies.sh
|
||||
./create-venv.sh
|
||||
|
||||
Once the installation finishes (5-10 minutes on a Raspberry Pi 3), you can use the `run-venv.sh` script to start Rhasspy:
|
||||
|
||||
```bash
|
||||
./run-venv.sh --profile en
|
||||
```
|
||||
|
||||
./run-venv.sh --profile en
|
||||
|
||||
If all is well, the web interface will be available at [http://localhost:12101](http://localhost:12101)
|
||||
|
||||
### Updating Virtual Environment
|
||||
### Software Requirements
|
||||
|
||||
To update your Rhasspy virtual environment to the latest version, run:
|
||||
|
||||
```bash
|
||||
git pull origin master
|
||||
```
|
||||
|
||||
in your `rhasspy` directory, and then update your Python dependencies:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
pip3 install -r requirements.txt
|
||||
```
|
||||
|
||||
You should also re-build the web interface:
|
||||
|
||||
1. Install [yarn](https://yarnpkg.com) on your system
|
||||
2. Run `yarn build` in the `rhasspy` directory
|
||||
3. Restart any running instances of Rhasspy
|
||||
|
||||
### Running as a Service
|
||||
|
||||
Once installed, Rhasspy can be run as a [systemd service](https://systemd.io/). An [example unit file](https://github.com/synesthesiam/rhasspy/blob/master/etc/rhasspy.service) is available (thanks [UnderpantsGnome](https://github.com/UnderpantsGnome)):
|
||||
|
||||
```
|
||||
[Unit]
|
||||
Description=Rhasspy
|
||||
After=syslog.target network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/home/<USER>/path/to/rhasspy
|
||||
ExecStart=/bin/bash -lc './run-venv.sh --profile <LANGUAGE>'
|
||||
|
||||
RestartSec=1
|
||||
Restart=on-failure
|
||||
|
||||
StandardOutput=syslog
|
||||
StandardError=syslog
|
||||
|
||||
SyslogIdentifier=rhasspy
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
* Replace `/home/<USER>/path/to/rhasspy` with the full path to your Rhasspy installation (where `run-venv.sh` is).
|
||||
* Replace `<LANGUAGE>` with your profile language (e.g., `en`)
|
||||
|
||||
Create a file named `rhasspy.service` in the `/home/<USER>/.config/systemd/user` directory (you may need to create the directory itself). Once the file has been saved, run:
|
||||
|
||||
```bash
|
||||
systemctl --user daemon-reload
|
||||
```
|
||||
|
||||
Then, you can start Rhasspy with:
|
||||
|
||||
```bash
|
||||
systemctl --user start rhasspy
|
||||
```
|
||||
|
||||
If you'd like Rhasspy to start on boot, run:
|
||||
|
||||
```bash
|
||||
systemctl --user enable --now rhasspy
|
||||
```
|
||||
|
||||
## Build From Source
|
||||
|
||||
The `create-venv.sh` script uses [pre-compiled binaries](https://github.com/synesthesiam/rhasspy/releases/tag/v2.0) for Rhasspy's required tools:
|
||||
|
||||
* [OpenFST](https://www.openfst.org)
|
||||
* [Opengrm](http://www.opengrm.org/twiki/bin/view/GRM/NGramLibrary)
|
||||
* [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus)
|
||||
* [Kaldi](https://kaldi-asr.org)
|
||||
|
||||
The [build-from-source.sh](https://github.com/synesthesiam/rhasspy/blob/master/build-from-source.sh) attempts to build all of these tools from source. The binary artifacts (command-line tools, shared libraries) are installed into the `bin` and `lib` directories of a Python virtual environment. The `run-venv.sh` script automatically adds these directories to `PATH` and `LD_LIBRARY_PATH` before starting Rhasspy.
|
||||
|
||||
### Swap Size
|
||||
|
||||
On low memory devices like the Raspberry Pi, building the tools above can quickly consume the entire RAM. Before building, it's highly recommended that you increase the available swap space by several gigabytes:
|
||||
|
||||
1. Edit `/etc/dphys-swapfile`
|
||||
2. Change `CONF_SWAPSIZE` to something large, like 2048 (2GB)
|
||||
3. Reboot
|
||||
|
||||
### Kaldi
|
||||
|
||||
You can skip building Kaldi if you plan to just [use Pocketsphinx](speech-to-text.md#pocketsphinx) for speech recognition.
|
||||
|
||||
### Updating Source Install
|
||||
|
||||
Follow the same instructions as [updating a virtual environment](#updating-virtual-environment).
|
||||
At its core, Rhasspy requires:
|
||||
|
||||
* Linux
|
||||
* Python 3.6
|
||||
* [Flask](https://pypi.org/project/Flask/) web server, including
|
||||
* [flask-swagger-ui](https://pypi.org/project/flask-swagger-ui/) for HTTP API documentation
|
||||
* [Flask-Cors](https://pypi.org/project/Flask-Cors/) for [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) stuff
|
||||
* [Flask-Sockets](https://pypi.org/project/Flask-Sockets/) for websocket support
|
||||
* [pydash](https://pypi.org/project/pydash/) utility library
|
||||
|
||||
To actually use any components, however, requires a lot of [extra software](about.md#supporting-tools).
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
# Intent Handling
|
||||
|
||||
After a voice command has been transcribed and your intent has been successfully recognized, Rhasspy is ready to send a JSON event to another system like Home Assistant or Node-RED.
|
||||
|
||||
* [Home Assistant](#home-assistant)
|
||||
* [Remote Server](#remote-server)
|
||||
* [Command](#command)
|
||||
After a voice command has been transcribed and your intent has been successfully recognized, Rhasspy is ready to send a JSON event to Home Assistant or Node-RED.
|
||||
|
||||
Regardless of which intent handling system you choose, Rhasspy emits JSON events [over a websocket connection](usage.md#websocket-events).
|
||||
|
||||
@@ -116,60 +112,10 @@ Set `home_assistant.pem_file` to the full path to your <a href="http://docs.pyth
|
||||
|
||||
Use the environment variable `RHASSPY_PROFILE_DIR` to reference your current profile's directory. For example, `$RHASSPY_PROFILE_DIR/my.pem` will tell Rhasspy to use a file named `my.pem` in your profile directory when verifying your self-signed certificate.
|
||||
|
||||
## Remote Server
|
||||
|
||||
Rhasspy can POST the intent JSON to a remote URL.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"handle": {
|
||||
"system": "remote",
|
||||
"remote": {
|
||||
"url": "http://<address>:<port>/path/to/endpoint"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
When an intent is recognized, Rhasspy will POST to `handle.remote.url` with the intent JSON. You should **return JSON** back, optionally with additional information. If `handle.forward_to_hass` is `true`, Rhasspy will look for a `hass_event` property of the returned JSON with the following structure:
|
||||
|
||||
```json
|
||||
{
|
||||
// rest of input JSON
|
||||
// ...
|
||||
"hass_event": {
|
||||
"event_type": "...",
|
||||
"event_data": {
|
||||
"key": "value",
|
||||
// ...
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Rhasspy will create the Home Assistant event based on this information. If it is **not** present, the remaining intent information will be used to construct the event as normal (i.e., `intent` and `entities`). If `handle.forward_to_hass` is `false`, the output of your program is not used.
|
||||
|
||||
### Speech
|
||||
|
||||
If the returned JSON contains a "speech" key like this:
|
||||
|
||||
```json
|
||||
{
|
||||
...
|
||||
"speech": {
|
||||
"text": "Some text to speak."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
then Rhasspy will forward `speech.text` to the configured [text to speech](text-to-speech.md) system.
|
||||
|
||||
See `rhasspy.intent_handler.RemoteIntentHandler` for details.
|
||||
|
||||
## Command
|
||||
|
||||
Once an intent is successfully recognized, Rhasspy will send an event to Home Assistant with the details. You can call a custom program instead *or in addition* to this behavior.
|
||||
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
@@ -198,7 +144,7 @@ When an intent is recognized, Rhasspy will call your custom program with the int
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Rhasspy will create the Home Assistant event based on this information. If it is **not** present, the remaining intent information will be used to construct the event as normal (i.e., `intent` and `entities`). If `handle.forward_to_hass` is `false`, the output of your program is not used.
|
||||
|
||||
The following environment variables are available to your program:
|
||||
@@ -209,21 +155,6 @@ The following environment variables are available to your program:
|
||||
|
||||
See [handle.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/handle.sh) for an example program.
|
||||
|
||||
### Speech
|
||||
|
||||
If the returned JSON contains a "speech" key like this:
|
||||
|
||||
```json
|
||||
{
|
||||
...
|
||||
"speech": {
|
||||
"text": "Some text to speak."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
then Rhasspy will forward `speech.text` to the configured [text to speech](text-to-speech.md) system.
|
||||
|
||||
See `rhasspy.intent_handler.CommandIntentHandler` for details.
|
||||
|
||||
## Dummy
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Intent Recognition
|
||||
|
||||
After your voice command has been transcribed by the [speech to text](speech-to-text.md) system, the next step is to recognize your intent.
|
||||
After your voice command has been transcribed by the [speech to text](speech-to-text.md) system, the next step is to recognize your intent.
|
||||
The end result is a JSON event with information about the intent.
|
||||
|
||||
The following table summarizes the trade-offs of using each intent recognizer:
|
||||
@@ -24,14 +24,11 @@ Add to your [profile](profiles.md):
|
||||
"system": "fsticuffs",
|
||||
"fsticuffs": {
|
||||
"intent_fst": "intent.fst",
|
||||
"ignore_unknown_words": true,
|
||||
"fuzzy": true
|
||||
"ignore_unknown_words": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
By default, fuzzy mathing is enabled (`fuzzy` is true). This allows `fsticuffs` to be less strict when matching text, skipping over any words in `stop_words.txt`, and handling repeated words gracefully. Words must still appear in the correct order according to `sentences.ini`, but additional words will not cause a recognition failure.
|
||||
|
||||
When `ignore_unknown_words` is true, any word outside of `sentences.ini` is simply ignored. This allows a lot more sentences to be accepted, but may cause unexpected results when used with arbitrary input from text chat.
|
||||
|
||||
See `rhasspy.intent.FsticuffsRecognizer` for details.
|
||||
@@ -61,7 +58,7 @@ Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"intent": {
|
||||
"system": "adapt",
|
||||
"system": "adapt",
|
||||
"adapt": {
|
||||
"stop_words": "stop_words.txt"
|
||||
}
|
||||
@@ -80,7 +77,7 @@ Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"intent": {
|
||||
"system": "flair",
|
||||
"system": "flair",
|
||||
"flair": {
|
||||
"data_dir": "flair_data",
|
||||
"max_epochs": 25,
|
||||
@@ -98,7 +95,7 @@ See `rhasspy.intent.FlairRecognizer` for details.
|
||||
|
||||
## RasaNLU
|
||||
|
||||
Recognizes intents **remotely** using a [Rasa NLU](https://rasa.com/) server. You must [install a Rasa NLU server](https://rasa.com/docs/rasa/user-guide/installation/) somewhere that Rhasspy can access. Works well when you have a large number of sentences (thousands to hundreds of thousands) and need to handle sentences *and* words not seen during training. This needs Rasa 1.0 or higher.
|
||||
Recognizes intents **remotely** using a [rasaNLU](https://rasa.com/) server. You must [install a rasaNLU server](https://rasa.com/docs/nlu/installation) somewhere that Rhasspy can access. Works well when you have a large number of sentences (thousands to hundreds of thousands) and need to handle sentences *and* words not seen during training.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -108,7 +105,7 @@ Add to your [profile](profiles.md):
|
||||
"rasa": {
|
||||
"examples_markdown": "intent_examples.md",
|
||||
"project_name": "rhasspy",
|
||||
"url": "http://localhost:5005/"
|
||||
"url": "http://localhost:5000/"
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -132,35 +129,6 @@ Add to your [profile](profiles.md):
|
||||
|
||||
See `rhasspy.intent.RemoteRecognizer` for details.
|
||||
|
||||
## Home Assistant Conversation
|
||||
|
||||
Sends transcriptions from [speech to text](speech-to-text.md) to [Home Assistant's conversation API](https://www.home-assistant.io/integrations/conversation/). If the response contains speech, Rhasspy can optionally speak it.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"intent": {
|
||||
"system": "conversation",
|
||||
"conversation": {
|
||||
"handle_speech": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
When `handle_speech` is `true`, Rhasspy will forward the returned speech to your [text to speech](text-to-speech.md) system.
|
||||
|
||||
The settings from your profile's `home_assistant` section are automatically used (URL, access token, etc.).
|
||||
|
||||
Because Home Assistant will already handle your intent (probably using an [intent script](https://www.home-assistant.io/integrations/intent_script/)), Rhasspy will always generate an empty intent with this recognizer.
|
||||
|
||||
See `rhasspy.intent.HomeAssistantConversationRecognizer` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Publishes intent recognitions/failures to `hermes/intent/<INTENT_NAME>` or `hermes/nlu/intentNotRecognized` ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
|
||||
This is enabled by default and controlled by the `mqtt.publish_intents` setting in your [profile](profiles.md).
|
||||
|
||||
## Command
|
||||
|
||||
Recognizes intents from text using a custom external program.
|
||||
@@ -177,7 +145,7 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
```
|
||||
|
||||
Rhasspy recognizes intents from text using one of several systems, such as [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy) or [Rasa NLU](https://rasa.com/). You can call a custom program that does intent recognition from a text command.
|
||||
Rhasspy recognizes intents from text using one of several systems, such as [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy) or [rasaNLU](https://rasa.com/). You can call a custom program that does intent recognition from a text command.
|
||||
|
||||
When a voice command is successfully transcribed, your program will be called with the text transcription printed to standard in. Your program should return JSON on standard out, something like:
|
||||
|
||||
@@ -196,7 +164,7 @@ When a voice command is successfully transcribed, your program will be called wi
|
||||
"text": "set the bedroom light to red"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
The following environment variables are available to your program:
|
||||
|
||||
* `$RHASSPY_BASE_DIR` - path to the directory where Rhasspy is running from
|
||||
|
||||
@@ -23,7 +23,7 @@ The default location for each of these directories is:
|
||||
* User profile location is `$HOME/.config/rhasspy/profiles`
|
||||
* Docker
|
||||
* System profile location is either `/usr/share/rhasspy/profiles` (ALSA) or `/home/rhasspy/profiles` (PulseAudio)
|
||||
* User profile location **must** be explicitly set and mapped to a volume:
|
||||
* User profile location **must** be explicity set and mapped to a volume:
|
||||
* `docker run ... -v /path/to/profiles:/profiles synesthesiam/rhasspy-server --user-profiles /profiles`
|
||||
|
||||
### Example
|
||||
@@ -40,8 +40,189 @@ If you need to install Rhasspy onto a machine that is not connected to the inter
|
||||
2. `fr-g2p.tar.gz`
|
||||
3. `fr-small.lm.gz`
|
||||
|
||||
If your user profile directory is `$HOME/.config/rhasspy/profiles`, then you should download/copy all three artifacts to `$HOME/.config/rhasspy/profiles/fr/download` on the offline machine. Now, when Rhasspy loads the `fr` profile and you click "Download", it will extract the files in the `download` directory without going out to the internet.
|
||||
If your user profile directory is `$HOME/.config/rhasspy/profiles`, then you should download/copy all three artifacts to `$HOME/.config/rhasspy/profiles/fr/download` on the offline machine. Now, when Rhasspy loads the `fr` profile and you click "Download", it will extract the files in the `download` directory without going out to the internet.
|
||||
|
||||
If you want to know precisely which files Rhasspy is looking for for a given profile, visit the `profiles` directory in [the source code](https://github.com/synesthesiam/rhasspy/tree/master/profiles) and examine these scripts in that profile's directory:
|
||||
|
||||
* `download-profile.sh`
|
||||
* Downloads and extracts all required binary artifacts. Uses cache in `download` directory unless `--delete` option is given.
|
||||
* `check-profile.sh`
|
||||
* Verifies that required binary artifacts are present. Returns non-zero exit code if download is required.
|
||||
|
||||
## Available Settings
|
||||
|
||||
See [the reference](reference.md#profile-settings) for all available profile settings.
|
||||
All available profile sections and settings are listed below:
|
||||
|
||||
* `rhasspy` - configuration for Rhasspy assistant
|
||||
* `preload_profile` - true if speech/intent recognizers should be loaded immediately for default profile (default: `true`)
|
||||
* `listen_on_start` - true if Rhasspy should listen for wake word at startup (default: `true`)
|
||||
* `load_timeout_sec` - number of seconds to wait for internal actors before proceeding with start up
|
||||
* `home_assistant` - how to communicate with Home Assistant/Hass.io
|
||||
* `url` - Base URL of Home Assistant server (no `/api`)
|
||||
* `access_token` - long-lived access token for Home Assistant (Hass.io token is used automatically)
|
||||
* `api_password` - Password, if you have that enabled (deprecated)
|
||||
* `pem_file` - Full path to your <a href="http://docs.python-requests.org/en/latest/user/advanced/#ssl-cert-verification">CA_BUNDLE file or a directory with certificates of trusted CAs</a>
|
||||
* `event_type_format` - Python format string used to create event type from intent type (`{0}`)
|
||||
* `speech_to_text` - transcribing [voice commands to text](speech-to-text.md)
|
||||
* `system` - name of speech to text system (`pocketsphinx`, `remote`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for [Pocketsphinx](speech-to-text.md#pocketsphinx)
|
||||
* `compatible` - true if profile can use pocketsphinx for speech recognition
|
||||
* `acoustic_model` - directory with CMU 16Khz acoustic model
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `language_model` - text file with trigram [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) built from example sentences
|
||||
* `mllr_matrix` - MLLR matrix from [acoustic model tuning](https://cmusphinx.github.io/wiki/tutorialtuning/)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `kaldi` - configuration for [Kaldi](speech-to-text.md#kaldi)
|
||||
* `compatible` - true if profile can use Kaldi for speech recognition
|
||||
* `kaldi_dir` - absolute path to Kaldi root directory
|
||||
* `model_dir` - directory where Kaldi model is stored (relative to profile directory)
|
||||
* `graph` - directory where HCLG.fst is located (relative to `model_dir`)
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `remote` - configuration for [remote Rhasspy server](speech-to-text.md#remote-http-server)
|
||||
* `url` - URL to POST WAV data for transcription (e.g., `http://your-rhasspy-server:12101/api/speech-to-text`)
|
||||
* `command` - configuration for [external speech-to-text program](speech-to-text.md#command)
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `sentences_ini` - Ini file with example [sentences/JSGF templates](training.md#sentencesini) grouped by intent
|
||||
* `g2p_model` - finite-state transducer for phonetisaurus to guess word pronunciations
|
||||
* `g2p_casing` - casing to force for g2p model (`upper`, `lower`, or blank)
|
||||
* `dictionary_casing` - casing to force for dictionary words (`upper`, `lower`, or blank)
|
||||
* `grammars_dir` - directory to write generated JSGF grammars from sentences ini file
|
||||
* `fsts_dir` - directory to write generated finite state transducers from JSGF grammars
|
||||
* `intent` - transforming text commands to intents
|
||||
* `system` - intent recognition system (`fsticuffs`, `fuzzywuzzy`, `rasa`, `remote`, `adapt`, `command`, or `dummy`)
|
||||
* `fsticuffs` - configuration for [OpenFST-based](https://www.openfst.org) intent recognizer
|
||||
* `intent_fst` - path to generated finite state transducer with all intents combined
|
||||
* `ignore_unknown_words` - true if words not in the FST symbol table should be ignored
|
||||
* `fuzzywuzzy` - configuration for simplistic [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) based intent recognizer
|
||||
* `examples_json` - JSON file with intents/example sentences
|
||||
* `min_confidence` - minimum confidence required for intent to be converted to a JSON event (0-1)
|
||||
* `remote` - configuration for remote Rhasspy server
|
||||
* `url` - URL to POST text to for intent recognition (e.g., `http://your-rhasspy-server:12101/api/text-to-intent`)
|
||||
* `rasa` - configuration for [rasaNLU](https://rasa.com/) based intent recognizer
|
||||
* `url` - URL of remote rasaNLU server (e.g., `http://localhost:5000/`)
|
||||
* `examples_markdown` - Markdown file to generate with intents/example sentences
|
||||
* `project_name` - name of project to generate during training
|
||||
* `adapt` - configuration for [Mycroft Adapt](https://github.com/MycroftAI/adapt) based intent recognizer
|
||||
* `stop_words` - text file with words to ignore in training sentences
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `text_to_speech` - pronouncing words
|
||||
* `system` - text to speech system (`espeak`, `flite`, `picotts`, `marytts`, `command`, or `dummy`)
|
||||
* `espeak` - configuration for [eSpeak](http://espeak.sourceforge.net)
|
||||
* `phoneme_map` - text file mapping CMU phonemes to eSpeak phonemes
|
||||
* `flite` - configuration for [flite](http://www.festvox.org/flite)
|
||||
* `voice` - name of voice to use (e.g., `kal16`, `rms`, `awb`)
|
||||
* `picotts` - configuration for [PicoTTS](https://en.wikipedia.org/wiki/SVOX)
|
||||
* `language` - language to use (default if not present)
|
||||
* `marytts` - configuration for [MaryTTS](http://mary.dfki.de)
|
||||
* `url` - address:port of MaryTTS server (port is usually 59125)
|
||||
* `voice` - name of voice to use (e.g., `cmu-slt`). Default if not present.
|
||||
* `locale` - name of locale to use (e.g., `en-US`). Default if not present.
|
||||
* `phoneme_examples` - text file with examples for each CMU phoneme
|
||||
* `training` - training speech/intent recognizers
|
||||
* `dictionary_number_duplicates` - true if duplicate words in dictionary should be suffixed by `(2)`, `(3)`, etc.
|
||||
* `tokenizer` - system used to break sentences into words (`regex` only for now)
|
||||
* `regex` - configuration for regex tokenizer
|
||||
* `replace` - list of dictionaries with patterns/replacements used on each example sentence
|
||||
* `split` - pattern used to break sentences into words
|
||||
* `unknown_words` - configuration for dealing with words not in base/custom dictionaries
|
||||
* `fail_when_present` - true if Rhasspy should halt training when unknown words are found
|
||||
* `guess_pronunciations` - true if [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) should be used to guess how an unknown word is pronounced
|
||||
* `speech_to_text` - training for speech decoder
|
||||
* `system` - speech to text training system (`auto`, `pocketsphinx`, `kaldi`, `command`, or `dummy`)
|
||||
* `command` - configuration for external speech-to-text training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `intent` - training for intent recognizer
|
||||
* `system` - intent recognizer training system (`auto`, `fsticuffs`, `fuzzywuzzy`, `rasa`, `adapt`, `command`, or `dummy`)
|
||||
* `command` - configuration for external intent recognizer training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `wake` - waking Rhasspy up for speech input
|
||||
* `system` - wake word recognition system (`pocketsphinx`, `snowboy`, `precise`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for Pocketsphinx wake word recognizer
|
||||
* `keyphrase` - phrase to wake up on (3-4 syllables recommended)
|
||||
* `threshold` - sensitivity of detection (recommended range 1e-50 to 1e-5)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Pocketsphinx (default 960)
|
||||
* `snowboy` - configuration for [snowboy](https://snowboy.kitt.ai)
|
||||
* `model` - path to model file (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `audio_gain` - audio gain (default 1)
|
||||
* `chunk_size` - number of bytes per chunk to feed to snowboy (default 960)
|
||||
* `precise` - configuration for [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* `engine_path` - path to the precise-engine binary
|
||||
* `model` - path to model file (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `trigger_level` - number of events to trigger activation (default 3)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Precise (default 2048)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `microphone` - configuration for audio recording
|
||||
* `system` - audio recording system (`pyaudio`, `arecord`, `hermes`, or `dummy`)
|
||||
* `pyaudio` - configuration for [PyAudio](https://people.csail.mit.edu/hubert/pyaudio/) microphone
|
||||
* `device` - index of device to use or empty for default device
|
||||
* `frames_per_buffer` - number of frames to read at a time (default 480)
|
||||
* `arecord` - configuration for ALSA microphone
|
||||
* `device` - name of ALSA device (see `arecord -L`) to use or empty for default device
|
||||
* `chunk_size` - number of bytes to read at a time (default 960)
|
||||
* `hermes` - configuration for MQTT "microphone" ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* Subscribes to WAV data from `hermes/audioServer/<SITE_ID>/audioFrame`
|
||||
* Requires MQTT to be enabled
|
||||
* `sounds` - configuration for feedback sounds from Rhasspy
|
||||
* `system` - which sound output system to use (`aplay`, `hermes`, or `dummy`)
|
||||
* `wake` - path to WAV file to play when Rhasspy wakes up
|
||||
* `recorded` - path to WAV file to play when a command finishes recording
|
||||
* `aplay` - configuration for ALSA speakers
|
||||
* `device` - name of ALSA device (see `aplay -L`) to use or empty for default device
|
||||
* `hermes` - configuration for MQTT "speakers" ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* WAV data published to `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>`
|
||||
* Requires MQTT to be enabled
|
||||
* `command`
|
||||
* `system` - which voice command listener system to use (`webrtcvad`, `oneshot`, `hermes`, or `dummy`)
|
||||
* `webrtcvad` - configuration for [webrtcvad](https://github.com/wiseman/py-webrtcvad) system
|
||||
* `sample_rate` - sample rate of input audio
|
||||
* `chunk_size` - bytes per buffer (must be 10,20,30 ms)
|
||||
* `vad_mode` - sensitivity of `webrtcvad` (0-3)
|
||||
* `min_sec` - minimum number of seconds in a command
|
||||
* `silence_sec` - number of seconds of silences after voice command before stopping
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `throwaway_buffers` - number of buffers to drop when recording starts
|
||||
* `speech_buffers` - number of buffers with speech before command starts
|
||||
* `oneshot` - configuration for voice command system that takes first audio frame as entire command
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `command` - configuration for external voice command program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `hermes` - configuration for MQTT-based voice command system that listens betweens `startListening` and `stopListening` commands ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `handle`
|
||||
* `system` - which intent handling system to use (`hass`, `command`, or `dummy`)
|
||||
* `forward_to_hass` - true if intents are always forwarded to Home Assistant (even if `system` is `command`)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `mqtt` - configuration for MQTT ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* `enabled` - true if MQTT client should be started
|
||||
* `host` - MQTT host
|
||||
* `port` - MQTT port
|
||||
* `username` - MQTT username (blank for anonymous)
|
||||
* `password` - MQTT password
|
||||
* `reconnect_sec` - number of seconds before client will reconnect
|
||||
* `site_id` - ID of site ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* `publish_intents` - true if intents are published to MQTT
|
||||
* `tuning` - configuration for acoustic model tuning
|
||||
* `system` - system for tuning (currently only `sphinxtrain`)
|
||||
* `sphinxtrain` - configuration for [sphinxtrain](https://github.com/cmusphinx/sphinxtrain) based acoustic model tuning
|
||||
* `mllr_matrix` - name of generated MLLR matrix (should match `speech_to_text.pocketsphinx.mllr_matrix`)
|
||||
|
||||
@@ -1,601 +0,0 @@
|
||||
# Reference
|
||||
|
||||
* [Supported Languages](#supported-languages)
|
||||
* [HTTP API](#http-api)
|
||||
* [Websocket API](#websocket-api)
|
||||
* [MQTT API](#mqtt-api)
|
||||
* [Command Line](#command-line)
|
||||
* [Profile Settings](#profile-settings)
|
||||
|
||||
## Supported Languages
|
||||
|
||||
The table below lists which components and compatible with Rhasspy's supported languages.
|
||||
|
||||
| Category | Name | Offline? | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt | sv | ca |
|
||||
| -------- | ------ | -------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| **Wake Word** | [pocketsphinx](wake-word.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | | | |
|
||||
| | [porcupine](wake-word.md#porcupine) | ✓ | ✓ | | | | | | | | | | | | | |
|
||||
| | [snowboy](wake-word.md#snowboy) | *requires account* | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| | [precise](wake-word.md#mycroft-precise) | ✓ | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| **Speech to Text** | [pocketsphinx](speech-to-text.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | ✓ |
|
||||
| | [kaldi](speech-to-text.md#kaldi) | ✓ | ✓ | ✓ | | ✓ | | ✓ | | | | | ✓ | | ✓ | |
|
||||
| **Intent Recognition** | [fsticuffs](intent-recognition.md#fsticuffs) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [adapt](intent-recognition.md#mycroft-adapt) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flair](intent-recognition.md#flair) | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | | | | | ✓ | | ✓ |
|
||||
| | [rasaNLU](intent-recognition.md#rasanlu) | *needs extra software* | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| **Text to Speech** | [espeak](text-to-speech.md#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flite](text-to-speech.md#flite) | ✓ | ✓ | | | | | | | | ✓ | | | | | |
|
||||
| | [picotts](text-to-speech.md#picotts) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | | | | | | | | |
|
||||
| | [marytts](text-to-speech.md#marytts) | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | | | |
|
||||
| | [wavenet](text-to-speech.md#google-wavenet) | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | ✓ | |
|
||||
|
||||
• - yes, but requires training/customization
|
||||
|
||||
## HTTP API
|
||||
|
||||
Rhasspy's HTTP endpoints are documented below. You can also visit `/api/` in your Rhasspy server (note the final slash) to try out each endpoint.
|
||||
|
||||
Application authors may want to use the [rhasspy-client](https://pypi.org/project/rhasspy-client/), which provides a high-level interface to a remote Rhasspy server.
|
||||
|
||||
### Endpoints
|
||||
|
||||
* `/api/custom-words`
|
||||
* GET custom word dictionary as plain text, or POST to overwrite it
|
||||
* See `custom_words.txt` in your profile directory
|
||||
* `/api/download-profile`
|
||||
* Force Rhasspy to re-download profile
|
||||
* `?delete=true` - clear download cache
|
||||
* `/api/listen-for-command`
|
||||
* POST to wake Rhasspy up and start listening for a voice command
|
||||
* Returns intent JSON when command is finished
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `?timeout=<seconds>` - override default command timeout
|
||||
* `?entity=<entity>&value=<value>` - set custom entity/value in recognized intent
|
||||
* `/api/listen-for-wake-word`
|
||||
* POST to wake Rhasspy up and return immediately
|
||||
* `/api/lookup`
|
||||
* POST word as plain text to look up or guess pronunciation
|
||||
* `?n=<number>` - return at most `n` guessed pronunciations
|
||||
* `/api/microphones`
|
||||
* GET list of available microphones
|
||||
* `/api/phonemes`
|
||||
* GET example phonemes from speech recognizer for your profile
|
||||
* See `phoneme_examples.txt` in your profile directory
|
||||
* `/api/play-wav`
|
||||
* POST to play WAV data
|
||||
* `/api/profile`
|
||||
* GET the JSON for your profile, or POST to overwrite it
|
||||
* `?layers=profile` to only see settings different from `defaults.json`
|
||||
* See `profile.json` in your profile directory
|
||||
* `/api/restart`
|
||||
* Restart Rhasspy server
|
||||
* `/api/sentences`
|
||||
* GET voice command templates or POST to overwrite
|
||||
* Set `Accept: application/json` to GET JSON with all sentence files
|
||||
* Set `Content-Type: application/json` to POST JSON with sentences for multiple files
|
||||
* See `sentences.ini` and `intents` directory in your profile
|
||||
* `/api/slots`
|
||||
* GET slot values as JSON or POST to add to/overwrite them
|
||||
* `?overwrite_all=true` to clear slots in JSON before writing
|
||||
* `/api/speakers`
|
||||
* GET list of available audio output devices
|
||||
* `/api/speech-to-intent`
|
||||
* POST a WAV file and have Rhasspy process it as a voice command
|
||||
* Returns intent JSON when command is finished
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `/api/speech-to-text`
|
||||
* POST a WAV file and have Rhasspy return the text transcription
|
||||
* Set `Accept: application/json` to receive JSON with more details
|
||||
* `?noheader=true` - send raw 16-bit 16Khz mono audio without a WAV header
|
||||
* `/api/start-recording`
|
||||
* POST to have Rhasspy start recording a voice command
|
||||
* `/api/stop-recording`
|
||||
* POST to have Rhasspy stop recording and process recorded data as a voice command
|
||||
* Returns intent JSON when command has been processed
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `/api/test-microphones`
|
||||
* GET list of available microphones and if they're working
|
||||
* `/api/text-to-intent`
|
||||
* POST text and have Rhasspy process it as command
|
||||
* Returns intent JSON when command has been processed
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `/api/text-to-speech`
|
||||
* POST text and have Rhasspy speak it
|
||||
* `?play=false` - get WAV data instead of having Rhasspy speak
|
||||
* `?voice=<voice>` - override default TTS voice
|
||||
* `?language=<language>` - override default TTS language or locale
|
||||
* `?repeat=true` - have Rhasspy repeat the last sentence it spoke
|
||||
* `/api/train`
|
||||
* POST to re-train your profile
|
||||
* `?nocache=true` - re-train profile from scratch
|
||||
* `/api/unknown-words`
|
||||
* GET words that Rhasspy doesn't know in your sentences
|
||||
* See `unknown_words.txt` in your profile directory
|
||||
|
||||
## Websocket API
|
||||
|
||||
* `/api/events/intent`
|
||||
* Listen for recognized intents published as JSON
|
||||
* `/api/events/log`
|
||||
* Listen for log messages published as plain text
|
||||
|
||||
## MQTT API
|
||||
|
||||
Rhasspy implements part of the [Hermes](https://docs.snips.ai/reference/hermes) protocol. Various services of Rhasspy can be configured to pass along MQTT messages or to react to MQTT messages following the Hermes protocol.
|
||||
|
||||
* `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>`
|
||||
* Rhasspy publishes audio in WAV format to this topic. By default it is 16 kHz, 16-bit mono for compatibility reasons, but other types are possible too.
|
||||
* `SITE_ID` is set in Rhasspy's `mqtt` configuration.
|
||||
* `REQUEST_ID` is generated using `uuid.uuid4` each time a sound is played.
|
||||
* `hermes/audioServer/<SITE_ID>/audioFrame`
|
||||
* Rhasspy listens to this topic for WAV data. Audio is automatically converted to 16 kHz, 16-bit mono audio and played.
|
||||
* `SITE_ID` is set in Rhasspy's `mqtt` configuration.
|
||||
* `hermes/asr/startListening`
|
||||
* Rhasspy wakes up and starts recording on receiving this topic.
|
||||
* The payload is a JSON object with a `siteId` key that holds Rhasspy's site ID.
|
||||
* `hermes/asr/stopListening`
|
||||
* Rhasspy stops recording and processes the voice command on receiving this topic.
|
||||
* The payload is a JSON object with a `siteId` key that holds Rhasspy's site ID.
|
||||
* `hermes/intent/<INTENT_NAME>`
|
||||
* Rhasspy publishes a message to this topic on recognition of an intent.
|
||||
* The payload is a JSON object with the recognized intent, entities and text.
|
||||
* `hermes/nlu/intentNotRecognized`
|
||||
* Rhasspy publishes a message to this topic when it doesn't recognize an intent.
|
||||
* `hermes/asr/textCaptured`
|
||||
* Rhasspy publishes a transcription to this topic each time a voice command is recognized.
|
||||
* `hermes/hotword/<WAKEWORD_ID>/detected`
|
||||
* Rhasspy wakes up when a message is received on this topic.
|
||||
|
||||
## Command Line
|
||||
|
||||
Rhasspy provides a powerful [command-line interface](usage.md#command-line) called `rhasspy-cli`.
|
||||
|
||||
For `rhasspy-cli --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>`, `<COMMAND>` can be:
|
||||
|
||||
* `info`
|
||||
* Print profile JSON to standard out
|
||||
* Add `--defaults` to only print settings from `defaults.json`
|
||||
* `wav2text`
|
||||
* Convert WAV file(s) to text
|
||||
* `wav2intent`
|
||||
* Convert WAV file(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `text2intent`
|
||||
* Convert text command(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `train`
|
||||
* Re-train your profile
|
||||
* `mic2wav`
|
||||
* Listen for a voice command and output WAV data
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2text`
|
||||
* Listen for a voice command and convert it to text
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2intent`
|
||||
* Listen for a voice command output intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `word2phonemes`
|
||||
* Print the CMU phonemes for a word (possibly unknown)
|
||||
* Add `-n <COUNT>` to control the maximum number of guessed pronunciations
|
||||
* `word2wav`
|
||||
* Pronounce a word (possibly unknown) and output WAV data
|
||||
* `text2speech`
|
||||
* Speaks one or more sentences using Rhasspy's text to speech system
|
||||
* `text2wav`
|
||||
* Converts a single sentence to WAV using Rhasspy's text to speech system
|
||||
* `sleep`
|
||||
* Run Rhasspy and wait until wake word is spoken
|
||||
* `download`
|
||||
* Download necessary profile files from the internet
|
||||
|
||||
### Profile Operations
|
||||
|
||||
Print the complete JSON for the English profile with:
|
||||
|
||||
rhasspy-cli --profile en info
|
||||
|
||||
You can combine this with other commands, such as `jq` to get at specific pieces:
|
||||
|
||||
rhasspy-cli info --profile en | jq .wake.pocketsphinx.keyphrase
|
||||
|
||||
Output (JSON):
|
||||
|
||||
"okay rhasspy"
|
||||
|
||||
### Training
|
||||
|
||||
Retrain your the English profile with:
|
||||
|
||||
rhasspy-cli --profile en train
|
||||
|
||||
Add `--debug` before `train` for more information.
|
||||
|
||||
### Speech to Text/Intent
|
||||
|
||||
Convert a WAV file to text from stdin:
|
||||
|
||||
rhasspy-cli --profile en wav2text < what-time-is-it.wav
|
||||
|
||||
Output (text):
|
||||
|
||||
what time is it
|
||||
|
||||
Convert multiple WAV files:
|
||||
|
||||
rhasspy-cli --profile en wav2text what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON)
|
||||
|
||||
```json
|
||||
{
|
||||
"what-time-is-it.wav": "what time is it",
|
||||
"turn-on-the-living-room-lamp.wav": "turn on the living room lamp"
|
||||
}
|
||||
```
|
||||
|
||||
Convert multiple WAV file(s) to intents **and** handle them:
|
||||
|
||||
rhasspy-cli --profile en wav2intent --handle what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"what_time_is_it.wav": {
|
||||
"text": "what time is it",
|
||||
"intent": {
|
||||
"name": "GetTime",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": []
|
||||
},
|
||||
"turn_on_living_room_lamp.wav": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Text to Intent
|
||||
|
||||
Handle a command as if it was spoken:
|
||||
|
||||
rhasspy-cli --profile en text2intent --handle "turn off the living room lamp"
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn off the living room lamp": {
|
||||
"text": "turn off the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "off"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Record Your Voice
|
||||
|
||||
Save a voice command to a WAV:
|
||||
|
||||
rhasspy-cli --profile en mic2wav > my-voice-command.wav
|
||||
|
||||
You can listen to it with:
|
||||
|
||||
aplay my-voice-command.wav
|
||||
|
||||
### Test Your Wake Word
|
||||
|
||||
Start Rhasspy and wait for wake word:
|
||||
|
||||
rhasspy-cli --profile en sleep
|
||||
|
||||
Should exit and print the wake word when its spoken.
|
||||
|
||||
### Text to Speech
|
||||
|
||||
Have Rhasspy speak one or more sentences:
|
||||
|
||||
rhasspy-cli --profile en text2speech "We ride at dawn!"
|
||||
|
||||
Use a different text to speech system and voice:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'flite' \
|
||||
--set 'text_to_speech.flite.voice' 'slt' \
|
||||
text2speech "We ride at dawn!"
|
||||
|
||||
### Pronounce Words
|
||||
|
||||
Speak words Rhasspy doesn't know!
|
||||
|
||||
rhasspy-cli --profile en word2wav raxacoricofallapatorius | aplay
|
||||
|
||||
### Text to Speech to Text to Intent
|
||||
|
||||
Use the miracle of Unix pipes to have Rhasspy interpret voice commands from itself:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'picotts' \
|
||||
text2wav "turn on the living room lamp" | \
|
||||
rhasspy-cli --profile en wav2text | \
|
||||
rhasspy-cli --profile en text2intent
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn on the living room lamp": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on",
|
||||
"name": "living room lamp"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Profile Settings
|
||||
|
||||
All available profile sections and settings are listed below:
|
||||
|
||||
* `rhasspy` - configuration for Rhasspy assistant
|
||||
* `preload_profile` - true if speech/intent recognizers should be loaded immediately for default profile (default: `true`)
|
||||
* `listen_on_start` - true if Rhasspy should listen for wake word at startup (default: `true`)
|
||||
* `load_timeout_sec` - number of seconds to wait for internal actors before proceeding with start up
|
||||
* `home_assistant` - how to communicate with Home Assistant/Hass.io
|
||||
* `url` - Base URL of Home Assistant server (no `/api`)
|
||||
* `access_token` - long-lived access token for Home Assistant (Hass.io token is used automatically)
|
||||
* `api_password` - Password, if you have that enabled (deprecated)
|
||||
* `pem_file` - Full path to your <a href="http://docs.python-requests.org/en/latest/user/advanced/#ssl-cert-verification">CA_BUNDLE file or a directory with certificates of trusted CAs</a>
|
||||
* `event_type_format` - Python format string used to create event type from intent type (`{0}`)
|
||||
* `speech_to_text` - transcribing [voice commands to text](speech-to-text.md)
|
||||
* `system` - name of speech to text system (`pocketsphinx`, `kaldi`, `remote`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for [Pocketsphinx](speech-to-text.md#pocketsphinx)
|
||||
* `compatible` - true if profile can use pocketsphinx for speech recognition
|
||||
* `acoustic_model` - directory with CMU 16 kHz acoustic model
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `language_model` - text file with trigram [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) built from example sentences
|
||||
* `open_transcription` - true if general language model should be used (custom voices commands ignored)
|
||||
* `base_language_model` - large general language model (read only)
|
||||
* `mllr_matrix` - MLLR matrix from [acoustic model tuning](https://cmusphinx.github.io/wiki/tutorialtuning/)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `kaldi` - configuration for [Kaldi](speech-to-text.md#kaldi)
|
||||
* `compatible` - true if profile can use Kaldi for speech recognition
|
||||
* `kaldi_dir` - absolute path to Kaldi root directory
|
||||
* `model_dir` - directory where Kaldi model is stored (relative to profile directory)
|
||||
* `graph` - directory where HCLG.fst is located (relative to `model_dir`)
|
||||
* `base_graph` - directory where large general HCLG.fst is located (relative to `model_dir`)
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `open_transcription` - true if general language model should be used (custom voices commands ignored)
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `remote` - configuration for [remote Rhasspy server](speech-to-text.md#remote-http-server)
|
||||
* `url` - URL to POST WAV data for transcription (e.g., `http://your-rhasspy-server:12101/api/speech-to-text`)
|
||||
* `command` - configuration for [external speech-to-text program](speech-to-text.md#command)
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `sentences_ini` - Ini file with example [sentences/JSGF templates](training.md#sentencesini) grouped by intent
|
||||
* `sentences_dir` - Directory with additional sentence templates (default: `intents`)
|
||||
* `g2p_model` - finite-state transducer for phonetisaurus to guess word pronunciations
|
||||
* `g2p_casing` - casing to force for g2p model (`upper`, `lower`, or blank)
|
||||
* `dictionary_casing` - casing to force for dictionary words (`upper`, `lower`, or blank)
|
||||
* `slots_dir` - directory to look for [slots lists](training.md#slots-lists) (default: `slots`)
|
||||
* `slot_programs` - directory to look for [slot programs](training.md#slot-programs) (default `slot_programs`)
|
||||
* `fsts_dir` - directory to write generated finite state transducers from JSGF grammars
|
||||
* `intent` - transforming text commands to intents
|
||||
* `system` - intent recognition system (`fsticuffs`, `fuzzywuzzy`, `rasa`, `remote`, `adapt`, `command`, or `dummy`)
|
||||
* `fsticuffs` - configuration for [OpenFST-based](https://www.openfst.org) intent recognizer
|
||||
* `intent_fst` - path to generated finite state transducer with all intents combined
|
||||
* `converters_dir` - directory to look for [converter](training.md#converters) programs (default: `converters`)
|
||||
* `ignore_unknown_words` - true if words not in the FST symbol table should be ignored
|
||||
* `fuzzy` - true if text is matching in a fuzzy manner, skipping words in `stop_words.txt`
|
||||
* `fuzzywuzzy` - configuration for simplistic [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) based intent recognizer
|
||||
* `examples_json` - JSON file with intents/example sentences
|
||||
* `min_confidence` - minimum confidence required for intent to be converted to a JSON event (0-1)
|
||||
* `remote` - configuration for remote Rhasspy server
|
||||
* `url` - URL to POST text to for intent recognition (e.g., `http://your-rhasspy-server:12101/api/text-to-intent`)
|
||||
* `rasa` - configuration for [Rasa NLU](https://rasa.com/) based intent recognizer
|
||||
* `url` - URL of remote Rasa NLU server (e.g., `http://localhost:5005/`)
|
||||
* `examples_markdown` - Markdown file to generate with intents/example sentences
|
||||
* `project_name` - name of project to generate during training
|
||||
* `adapt` - configuration for [Mycroft Adapt](https://github.com/MycroftAI/adapt) based intent recognizer
|
||||
* `stop_words` - text file with words to ignore in training sentences
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `replace_numbers` if true, automatically replace number ranges (`N..M`) or numbers (`N`) with words
|
||||
* `text_to_speech` - pronouncing words
|
||||
* `system` - text to speech system (`espeak`, `flite`, `picotts`, `marytts`, `command`, or `dummy`)
|
||||
* `espeak` - configuration for [eSpeak](http://espeak.sourceforge.net)
|
||||
* `phoneme_map` - text file mapping CMU phonemes to eSpeak phonemes
|
||||
* `flite` - configuration for [flite](http://www.festvox.org/flite)
|
||||
* `voice` - name of voice to use (e.g., `kal16`, `rms`, `awb`)
|
||||
* `picotts` - configuration for [PicoTTS](https://en.wikipedia.org/wiki/SVOX)
|
||||
* `language` - language to use (default if not present)
|
||||
* `marytts` - configuration for [MaryTTS](http://mary.dfki.de)
|
||||
* `url` - address:port of MaryTTS server (port is usually 59125)
|
||||
* `voice` - name of voice to use (e.g., `cmu-slt`). Default if not present.
|
||||
* `locale` - name of locale to use (e.g., `en-US`). Default if not present.
|
||||
* `wavenet` - configuration for Google's [WaveNet](https://cloud.google.com/text-to-speech/docs/wavenet)
|
||||
* `cache_dir` - path to directory in your profile where WAV files are cached
|
||||
* `credentials_json` - path to the JSON credentials file (generated online)
|
||||
* `gender` - gender of speaker (`MALE` `FEMALE`)
|
||||
* `language_code` - language/locale e.g. `en-US`,
|
||||
* `sample_rate` - WAV sample rate (default: 22050)
|
||||
* `url` - URL of WaveNet endpoint
|
||||
* `voice` - voice to use (e.g., `Wavenet-C`)
|
||||
* `fallback_tts` - text to speech system to use when offline or error occurs (e.g., `espeak`)
|
||||
* `phoneme_examples` - text file with examples for each CMU phoneme
|
||||
* `training` - training speech/intent recognizers
|
||||
* `dictionary_number_duplicates` - true if duplicate words in dictionary should be suffixed by `(2)`, `(3)`, etc.
|
||||
* `tokenizer` - system used to break sentences into words (`regex` only for now)
|
||||
* `regex` - configuration for regex tokenizer
|
||||
* `replace` - list of dictionaries with patterns/replacements used on each example sentence
|
||||
* `split` - pattern used to break sentences into words
|
||||
* `unknown_words` - configuration for dealing with words not in base/custom dictionaries
|
||||
* `fail_when_present` - true if Rhasspy should halt training when unknown words are found
|
||||
* `guess_pronunciations` - true if [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) should be used to guess how an unknown word is pronounced
|
||||
* `speech_to_text` - training for speech decoder
|
||||
* `system` - speech to text training system (`auto`, `pocketsphinx`, `kaldi`, `command`, or `dummy`)
|
||||
* `command` - configuration for external speech-to-text training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `intent` - training for intent recognizer
|
||||
* `system` - intent recognizer training system (`auto`, `fsticuffs`, `fuzzywuzzy`, `rasa`, `adapt`, `command`, or `dummy`)
|
||||
* `command` - configuration for external intent recognizer training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `wake` - waking Rhasspy up for speech input
|
||||
* `system` - wake word recognition system (`pocketsphinx`, `snowboy`, `precise`, `porcupine`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for Pocketsphinx wake word recognizer
|
||||
* `keyphrase` - phrase to wake up on (3-4 syllables recommended)
|
||||
* `threshold` - sensitivity of detection (recommended range 1e-50 to 1e-5)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Pocketsphinx (default 960)
|
||||
* `snowboy` - configuration for [snowboy](https://snowboy.kitt.ai)
|
||||
* `model` - path to model file(s), separated by commas (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `audio_gain` - audio gain (default 1)
|
||||
* `apply_frontend` - true if ApplyFrontend should be set
|
||||
* `chunk_size` - number of bytes per chunk to feed to snowboy (default 960)
|
||||
* `model_settings` - settings for each snowboy model path (e.g., `snowboy/snowboy.umdl`)
|
||||
* `<MODEL_PATH>`
|
||||
* `sensitivity` - model sensitivity
|
||||
* `audio_gain` - audio gain
|
||||
* `apply_frontend` - true if ApplyFrontend should be set
|
||||
* `precise` - configuration for [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* `engine_path` - path to the precise-engine binary
|
||||
* `model` - path to model file (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `trigger_level` - number of events to trigger activation (default 3)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Precise (default 2048)
|
||||
* `porcupine` - configuration for [PicoVoice's Porcupine](https://github.com/Picovoice/Porcupine)
|
||||
* `library_path` - path to `libpv_porcupine.so` for your platform/architecture
|
||||
* `model_path` - path to the `porcupine_params.pv` (lib/common)
|
||||
* `keyword_path` - path to the `.ppn` keyword file
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `microphone` - configuration for audio recording
|
||||
* `system` - audio recording system (`pyaudio`, `arecord`, `hermes`, `gstreamer`, `http`, or `dummy`)
|
||||
* `pyaudio` - configuration for [PyAudio](https://people.csail.mit.edu/hubert/pyaudio/) microphone
|
||||
* `device` - index of device to use or empty for default device
|
||||
* `frames_per_buffer` - number of frames to read at a time (default 480)
|
||||
* `arecord` - configuration for ALSA microphone
|
||||
* `device` - name of ALSA device (see `arecord -L`) to use or empty for default device
|
||||
* `chunk_size` - number of bytes to read at a time (default 960)
|
||||
* `http` - configuration for HTTP audio stream
|
||||
* `host` - hostname or IP address of HTTP audio server (default 127.0.0.1)
|
||||
* `port` - port to receive audio stream on (default 12333)
|
||||
* `stop_after` - one of "never", "text", or "intent" ([see documentation](audio-input.md#http-stream))
|
||||
* `gstreamer` - configuration for GStreamer audio recorder
|
||||
* `pipeline` - GStreamer pipeline (e.g., `FILTER ! FILTER ! ...`) without sink
|
||||
* `hermes` - configuration for MQTT "microphone" ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* Subscribes to WAV data from `hermes/audioServer/<SITE_ID>/audioFrame`
|
||||
* Requires MQTT to be enabled
|
||||
* `sounds` - configuration for feedback sounds from Rhasspy
|
||||
* `system` - which sound output system to use (`aplay`, `hermes`, or `dummy`)
|
||||
* `wake` - path to WAV file to play when Rhasspy wakes up
|
||||
* `recorded` - path to WAV file to play when a command finishes recording
|
||||
* `aplay` - configuration for ALSA speakers
|
||||
* `device` - name of ALSA device (see `aplay -L`) to use or empty for default device
|
||||
* `hermes` - configuration for MQTT "speakers" ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* WAV data published to `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>`
|
||||
* Requires MQTT to be enabled
|
||||
* `command`
|
||||
* `system` - which voice command listener system to use (`webrtcvad`, `oneshot`, `hermes`, or `dummy`)
|
||||
* `webrtcvad` - configuration for [webrtcvad](https://github.com/wiseman/py-webrtcvad) system
|
||||
* `sample_rate` - sample rate of input audio
|
||||
* `chunk_size` - bytes per buffer (must be 10,20,30 ms)
|
||||
* `vad_mode` - sensitivity of `webrtcvad` (0-3)
|
||||
* `min_sec` - minimum number of seconds in a command
|
||||
* `silence_sec` - number of seconds of silences after voice command before stopping
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `throwaway_buffers` - number of buffers to drop when recording starts
|
||||
* `speech_buffers` - number of buffers with speech before command starts
|
||||
* `oneshot` - configuration for voice command system that takes first audio frame as entire command
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `command` - configuration for external voice command program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `hermes` - configuration for MQTT-based voice command system that listens betweens `startListening` and `stopListening` commands ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `handle`
|
||||
* `system` - which intent handling system to use (`hass`, `command`, or `dummy`)
|
||||
* `forward_to_hass` - true if intents are always forwarded to Home Assistant (even if `system` is `command` or `remote`)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `remote` - configuration for remote HTTP intent handler
|
||||
* `url` - URL to POST intent JSON to and receive response JSON from
|
||||
* `mqtt` - configuration for MQTT ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* `enabled` - true if MQTT client should be started
|
||||
* `host` - MQTT host
|
||||
* `port` - MQTT port
|
||||
* `username` - MQTT username (blank for anonymous)
|
||||
* `password` - MQTT password
|
||||
* `reconnect_sec` - number of seconds before client will reconnect
|
||||
* `site_id` - ID of site ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* `publish_intents` - true if intents are published to MQTT
|
||||
* `download` - configuration for profile file downloading
|
||||
* `cache_dir` - directory in your profile where downloaded files are cached
|
||||
* `conditions` - profile settings that will trigger file downloads
|
||||
* keys are profile setting paths (e.g., `wake.system`)
|
||||
* values are dictionaries whose keys are profile settings values (e.g., `snowboy`)
|
||||
* settings may have the form `<=N` or `!X` to mean "less than or equal to N" or "not X"
|
||||
* leaf nodes are dictionaries whose keys are destination file paths and whose values reference the `files` dictionary
|
||||
* `files` - locations, etc. of files to download
|
||||
* keys are names of files
|
||||
* values are dictionaries with:
|
||||
* `url` - URL of file to download
|
||||
* `cache` - `false` if file should be downloaded directly into profile (skipping cache)
|
||||
@@ -4,10 +4,10 @@ Rhasspy's primary function is convert voice commands to JSON events. The first s
|
||||
|
||||
The following table summarizes language support for the various speech to text systems:
|
||||
|
||||
| System | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt | ca |
|
||||
| ------ | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| [pocketsphinx](speech-to-text.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ |
|
||||
| [kaldi](speech-to-text.md#kaldi) | ✓ | ✓ | | ✓ | | ✓ | | | | | ✓ | | |
|
||||
| System | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt |
|
||||
| ------ | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| [pocketsphinx](speech-to-text.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | |
|
||||
| [kaldi](speech-to-text.md#kaldi) | | | | | | | | | | | ✓ | ✓ |
|
||||
|
||||
## Pocketsphinx
|
||||
|
||||
@@ -24,7 +24,9 @@ Add to your [profile](profiles.md):
|
||||
"base_dictionary": "base_dictionary.txt",
|
||||
"custom_words": "custom_words.txt",
|
||||
"dictionary": "dictionary.txt",
|
||||
"language_model": "language_model.txt"
|
||||
"language_model": "language_model.txt",
|
||||
"unknown_words": "unknown_words.txt",
|
||||
"mllr_matrix": "acoustic_model_mllr"
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -36,10 +38,11 @@ When Rhasspy starts, it creates a pocketsphinx decoder with the following attrib
|
||||
* `hmm` - `speech_to_text.pocketsphinx.acoustic_model` (directory)
|
||||
* `dict` - `speech_to_text.pocketsphinx.dictionary` (file)
|
||||
* `lm` - `speech_to_text.pocketsphinx.language_model` (file)
|
||||
* `mllr` - `speech_to_text.pocketsphinx.mllr_matrix` (file, optional)
|
||||
|
||||
### Open Transcription
|
||||
The `mllr_matrix` file is intended for advanced users who want to [tune/adapt their acoustic models](https://cmusphinx.github.io/wiki/tutorialadapt). This can increase the performance of Rhasspy's speech recognition for a specific user/microphone/acoustic environment.
|
||||
|
||||
If you just want to use Rhasspy for general speech to text, you can set `speech_to_text.pocketsphinx.open_transcription` to `true` in your profile. This will use the included general language model (much slower) and ignore any custom voice commands you've specified. For English, German, and Dutch, you may want to use [Kaldi](#kaldi) instead for better results.
|
||||
Pocketsphinx allows Rhasspy to support Enlgish (en), German (de), Dutch (nl), Spanish (es), Italian (it), French (fr), Greek (el), Russian (ru), Hindi (hi), and Mandarin (zh).
|
||||
|
||||
See `rhasspy.stt.PocketsphinxDecoder` for details.
|
||||
|
||||
@@ -73,10 +76,6 @@ This requires Kaldi to be installed, which is...challenging. The [Docker image o
|
||||
|
||||
Rhasspy expects a Kaldi-compatible profile to contain a `model` directory with a `train.sh` and `decode.sh` script. See the Vietnamese (vi) or Portuguese (pt) [profile](https://github.com/synesthesiam/rhasspy-profiles/releases) for an example.
|
||||
|
||||
### Open Transcription
|
||||
|
||||
If you just want to use Rhasspy for general speech to text, you can set `speech_to_text.kaldi.open_transcription` to `true` in your profile. This will use the included general language model (much slower) and ignore any custom voice commands you've specified.
|
||||
|
||||
## Remote HTTP Server
|
||||
|
||||
Uses a remote HTTP server to transform speech (WAV) to text.
|
||||
@@ -94,43 +93,10 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
```
|
||||
|
||||
During speech recognition, 16-bit 16 kHz mono WAV data will be POST-ed to the endpoint with the `Content-Type` set to `audio/wav`. A `text/plain` response with the transcription is expected back. An additional `profile` query argument is sent with the current profile name, so the POST URL is effectively something like `http://remote-server:12101/api/speech-to-text?profile=en`.
|
||||
During speech recognition, 16-bit 16Khz mono WAV data will be POST-ed to the endpoint with the `Content-Type` set to `audio/wav`. A `text/plain` response with the transcription is expected back. An additional `profile` query argument is sent with the current profile name, so the POST URL is effectively something like `http://remote-server:12101/api/speech-to-text?profile=en`.
|
||||
|
||||
See `rhasspy.stt.RemoteDecoder` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Publishes transcriptions to `hermes/asr/textCaptured` ([Hermes protocol](https://docs.snips.ai/reference/hermes)) each time a voice command is spoken.
|
||||
|
||||
This is enabled by default.
|
||||
|
||||
## Home Assistant STT Platform
|
||||
|
||||
Use an [STT platform](https://www.home-assistant.io/integrations/stt) on your Home Assistant server.
|
||||
This is the same way [Ada](https://github.com/home-assistant/ada) sends speech to Home Assistant.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"speech_to_text": {
|
||||
"system": "hass_stt",
|
||||
"hass_stt": {
|
||||
"platform": "...",
|
||||
"sample_rate": 16000,
|
||||
"bit_size": 16,
|
||||
"channels": 1,
|
||||
"language": "en-US"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The settings from your profile's `home_assistant` section are automatically used (URL, access token, etc.).
|
||||
|
||||
Rhasspy will convert audio to the configured format before streaming it to Home Assistant.
|
||||
In the future, this will be auto-detected from the STT platform API.
|
||||
|
||||
See `rhasspy.stt.HomeAssistantSTTIntegration` for details.
|
||||
|
||||
## Command
|
||||
|
||||
Calls a custom external program to do speech recognition.
|
||||
@@ -149,7 +115,7 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
```
|
||||
|
||||
When a voice command is received, Rhasspy will call your program and push the recorded WAV data (16-bit 16 kHz mono) to standard in. Your program should print the text transcription to standard out.
|
||||
When a voice command is received, Rhasspy will call your program and push the recorded WAV data (16-bit 16 Khz mono) to standard in. Your program should print the text transcription to standard out.
|
||||
|
||||
The following environment variables are available to your program:
|
||||
|
||||
|
||||
@@ -4,13 +4,13 @@ After you voice command has been [handled](intent-handling.md), it's common to p
|
||||
|
||||
The following table summarizes language support for the various text to speech systems:
|
||||
|
||||
| System | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt | ca |
|
||||
| ------ | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| [espeak](text-to-speech.md#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| [flite](text-to-speech.md#flite) | ✓ | | | | | | | | ✓ | | | | |
|
||||
| [picotts](text-to-speech.md#picotts) | ✓ | ✓ | ✓ | ✓ | ✓ | | | | | | | | |
|
||||
| [marytts](text-to-speech.md#marytts) | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | | |
|
||||
| [wavenet](text-to-speech.md#google-wavenet) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | |
|
||||
| System | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt |
|
||||
| ------ | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| [espeak](text-to-speech.md#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| [flite](text-to-speech.md#flite) | ✓ | | | | | | | | ✓ | | | |
|
||||
| [picotts](text-to-speech.md#picotts) | ✓ | | | | | | | | | | | |
|
||||
| [marytts](text-to-speech.md#marytts) | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | |
|
||||
| [wavenet](text-to-speech.md#google-wavenet) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ |
|
||||
|
||||
## eSpeak
|
||||
|
||||
@@ -89,24 +89,8 @@ To run the Docker image, simply execute:
|
||||
```bash
|
||||
docker run -it -p 59125:59125 synesthesiam/marytts:5.2
|
||||
```
|
||||
|
||||
and visit [http://localhost:59125](http://localhost:59125) after it starts.
|
||||
|
||||
If you're using [docker compose](https://docs.docker.com/compose/), add the following to your docker-compose.yml file:
|
||||
|
||||
marytts:
|
||||
image: synesthesiam/marytts:5.2
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "59125:59125"
|
||||
|
||||
When using docker-compose, set `marytts.url` in your profile to be `http://marytts:59125`. This will allow rhasspy, from within
|
||||
its docker container, to resolve and connect to marytts (its sibling container).
|
||||
|
||||
|
||||
### Adding Voices
|
||||
|
||||
For more English voices, run the following commands in a Bash shell:
|
||||
|
||||
and visit [http://localhost:59125](http://localhost:59125) after it starts. For more English voices, run the following commands in a Bash shell:
|
||||
|
||||
```bash
|
||||
mkdir -p marytts-5.2/download
|
||||
@@ -127,37 +111,6 @@ Change the first line to select the voice you'd like to add. It's not recommende
|
||||
|
||||
See `rhasspy.tts.MaryTTSSentenceSpeaker` for details.
|
||||
|
||||
### Audio Effects
|
||||
|
||||
MaryTTS is capable of applying several audio effects when producing speech. See the web interface at [http://localhost:59125](http://localhost:59125)
|
||||
to experiment with this.
|
||||
|
||||
|
||||
To use these effects within Rhasspy, set `text_to_speech.marytts.effects` within your profile, for example:
|
||||
|
||||
```json
|
||||
"text_to_speech": {
|
||||
"system": "marytts",
|
||||
"marytts": {
|
||||
"url": "http://localhost:59125",
|
||||
"effects": {
|
||||
"effect_Volume_selected": "on",
|
||||
"effect_Volume_parameters": "amount=0.9;",
|
||||
"effect_TractScaler_selected": "on",
|
||||
"effect_TractScaler_parameters": "amount:1.2;",
|
||||
"effect_F0Add_selected": "on",
|
||||
"effect_F0Add_parameters": "f0Add:-50.0;",
|
||||
"effect_Robot_selected": "on",
|
||||
"effect_Robot_parameters": "amount=50.0;"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can determine the names of the parameters by examining the web interface [http://localhost:59125](http://localhost:59125)
|
||||
using your browser's Developer Tools.
|
||||
|
||||
|
||||
## Google WaveNet
|
||||
|
||||
Uses Google's [WaveNet](https://cloud.google.com/text-to-speech/docs/wavenet) text to speech system. This **requires a Google account and an internet connection to function**. Rhasspy will cache WAV files for previously spoken sentences, but you will be sending Google information for every new sentence that Rhasspy speaks.
|
||||
@@ -190,25 +143,6 @@ Contributed by [Romkabouter](https://github.com/Romkabouter).
|
||||
|
||||
See `rhasspy.tts.GoogleWaveNetSentenceSpeaker` for details.
|
||||
|
||||
## Home Assistant TTS Platform
|
||||
|
||||
Use a [TTS platform](https://www.home-assistant.io/integrations/tts) on your Home Assistant server.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"text_to_speech": {
|
||||
"system": "hass_tts",
|
||||
"hass_tts": {
|
||||
"platform": "..."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The settings from your profile's `home_assistant` section are automatically used (URL, access token, etc.).
|
||||
|
||||
See `rhasspy.tts.HomeAssistantSentenceSpeaker` for details.
|
||||
|
||||
## Command
|
||||
|
||||
You can extend Rhasspy easily with your own external text to speech system. When a sentence needs to be spoken, Rhasspy will call your custom program with the text given on standard in. Your program should return the corresponding WAV data on standard out.
|
||||
@@ -225,7 +159,7 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
```
|
||||
|
||||
For compatibility with other services and Rhasspy components, it's best to return 16 kHz, 16-bit mono WAV data.
|
||||
For compatibility with other services and Rhasspy components, it's best to return 16 Khz, 16-bit mono WAV data.
|
||||
|
||||
See `rhasspy.tts.CommandSentenceSpeaker` for details.
|
||||
|
||||
|
||||
@@ -1,319 +1,48 @@
|
||||
# Training
|
||||
|
||||
Rhasspy is designed to recognize voice commands [in a template language](#sentencesini). These commands are categorized by **intent**, and may contain [slots](#slots-lists) or [named entities](#tags), such as the color and name of a light.
|
||||
Rhasspy is designed to recognize only the specific set of voice commands that [you provide](#sentencesini). These commands are categorized by **intent**, and may contain variable **slots** or **entities**, such as the color and name of a light.
|
||||
|
||||
* Intent Recognition
|
||||
* [Basic Syntax](#basic-syntax)
|
||||
* [Named Entities](#tags)
|
||||
* [Number Ranges](#number-ranges)
|
||||
* [Slots](#slots-lists)
|
||||
* [Slot Synonyms](#slot-synonyms)
|
||||
* [Slot Programs](#slot-programs)
|
||||
* [Converters](#converters)
|
||||
* Speech Recognition
|
||||
* [Custom Words](#custom-words)
|
||||
* [Language Model Mixing](#language-model-mixing)
|
||||
During the training process, Rhasspy simulataneously trains *both* a speech and intent recognizer. The speech recognizer converts voice commands to text, and the intent recognizer converts text to JSON events. Combined, they enable a low power, offline system like a Raspberry Pi to understand and respond to your voice commands.
|
||||
|
||||
## How It Works
|
||||
|
||||
Recognizing voice commands typically involves two main steps:
|
||||
|
||||
1. Speech to text (transcription)
|
||||
2. Text to intent (recognition)
|
||||
|
||||
For step (1), Rhasspy uses [pocketsphinx](https://github.com/cmusphinx/pocketsphinx) or [Kaldi](https://kaldi-asr.org), and generates a custom [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) during the training process. Specifically, the steps are:
|
||||
|
||||
1. Convert the grammar from your [sentences.ini](#sentencesini) file to a [finite state transducer](https://www.openfst.org)
|
||||
2. (Optionally) generate all possible sentences that can be spoken with entities tagged (e.g., `name` is `bedroom light`, `color` is `red`)
|
||||
3. Use the [opengrm](http://www.opengrm.org/twiki/bin/view/GRM/NGramLibrary) toolkit to create a custom language model
|
||||
4. Train an intent recognizer with the tagged sentences
|
||||
|
||||
Additionally, a custom [CMU phonetic dictionary](https://cmusphinx.github.io/wiki/tutorialdict/) is generated with *only* the words in your voice commands (and wake word, if you're using a [pocketsphinx keyphrase](wake-word.md#pocketsphinx)). If the pronunciation of a word is not known, Rhasspy calls out to [phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) to get a guess, and then halts training. Once you've confirmed the pronunciations by adding them to your [custom words](#custom-words), training can continue.
|
||||
|
||||
For step (4), Rhasspy can use a [variety of intent recognition systems](intent-recognition.md). However, most are all trained from the **tagged sentences** generated from [sentences.ini](#sentencesini), e.g., `turn [on](state) the [living room lamp](name)`. These sentences are transformed into JSON, like:
|
||||
|
||||
{
|
||||
"ChangeLightState": [
|
||||
{
|
||||
"text": "turn on the living room lamp",
|
||||
"entities": [
|
||||
{ "entity": "state", "value": "on" },
|
||||
{ "entity": "name", "value": "living room lamp" }
|
||||
]
|
||||
},
|
||||
...
|
||||
],
|
||||
...
|
||||
}
|
||||
|
||||
and provided as training material to the intent recognition system. The [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) system, for example, simply saves the JSON file and, during recognition, finds the closest matching sentence according to the [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance). The [default intent recognizer](intent-recognition.md#fsticuffs) interacts directly with the finite state transducer(s) generated in step (1) and, while less tolerant of errors than `fuzzywuzzy`, is significantly faster for large sets of voice commands (i.e., millions).
|
||||
|
||||
More sophisticated systems like [rasaNLU](intent-recognition.md#rasanlu) use machine learning techniques to classify sentences by intent and assign slota (entity) values. These systems are much better at recognizing sentences not seen during training, but can take minutes to hours to train.
|
||||
|
||||
## sentences.ini
|
||||
|
||||
Voice commands stored in an [ini file](https://docs.python.org/3/library/configparser.html) whose "sections" are intents and "values" are sentence templates.
|
||||
|
||||
### Basic Syntax
|
||||
|
||||
To get started, simply list your intents (surround by brackets) and the possible ways of invoking them below:
|
||||
|
||||
```
|
||||
[TestIntent1]
|
||||
this is a sentence
|
||||
this is another sentence for the same intent
|
||||
|
||||
[TestIntent2]
|
||||
this is a sentence for a different intent
|
||||
```
|
||||
|
||||
If you say "this is a sentence" after hitting the `Train` button, it will generate a `TestIntent1`.
|
||||
|
||||
### Groups
|
||||
|
||||
You can group multiple words together using `(parentheses)` like:
|
||||
|
||||
```
|
||||
turn on the (living room lamp)
|
||||
```
|
||||
|
||||
Groups (sometimes called sequences) can be [tagged](#tags) and [substituted](#substitutions) like single words. They may also contain [alternatives](#alternatives).
|
||||
|
||||
### Optional Words
|
||||
|
||||
Within a sentence template, you can specify optional word(s) by surrounding them `[with brackets]`. For example:
|
||||
|
||||
```
|
||||
[an] example sentence [with] some optional words
|
||||
```
|
||||
|
||||
will match:
|
||||
|
||||
* `an example sentence with some optional words`
|
||||
* `example sentence with some optional words`
|
||||
* `an example sentence some optional words`
|
||||
* `example sentence some optional words`
|
||||
|
||||
### Alternatives
|
||||
|
||||
A set of items where only one is matched at a time is `(specified | like | this)`. For N items, there will be N matched sentences (unless you nest optional words, etc.). The template:
|
||||
|
||||
```
|
||||
set the light to (red | green | blue)
|
||||
```
|
||||
|
||||
will match:
|
||||
|
||||
* `set the light to red`
|
||||
* `set the light to green`
|
||||
* `set the light to blue`
|
||||
|
||||
### Tags
|
||||
|
||||
Named entities are marked in your sentence templates with `{tags}`. The name of the `{entity}` is between the curly braces, while the `(value of the){entity}` comes immediately before:
|
||||
|
||||
```
|
||||
[SetLightColor]
|
||||
set the light to (red | green | blue){color}
|
||||
```
|
||||
|
||||
With the `{color}` tag attached to `(red | green | blue)`, Rhasspy will match:
|
||||
|
||||
* `set the light to [red](color)`
|
||||
* `set the light to [green](color)`
|
||||
* `set the light to [blue](color)`
|
||||
|
||||
When the `SetLightColor` intent is recognized, the JSON event will contain a `color` property whose value is either "red", "green" or "blue".
|
||||
|
||||
#### Tag Synonyms
|
||||
|
||||
Tag/named entity values can be (substituted](#substitutions) using the colon (`:`) inside the `{curly:braces}` like:
|
||||
|
||||
```
|
||||
turn on the (living room lamp){name:light_1}
|
||||
```
|
||||
|
||||
Now the `name` property of the intent JSON event will contain "light_1" instead of "living room lamp".
|
||||
|
||||
### Substitutions
|
||||
|
||||
The colon (`:`) is used to put something different than what's spoken into the recognized intent JSON. The left-hand side of the `:` is what Rhasspy expects to hear, while the right-hand side is what gets put into the intent:
|
||||
|
||||
```
|
||||
turn on the (living room lamp):light_1
|
||||
```
|
||||
|
||||
In this example, the spoken phrase "living room lamp" will be replaced by "light_1" in the recognized intent. Substitutions work for single words, [groups](#groups), [alternatives](#alternatives), and [tags](#tags):
|
||||
|
||||
```
|
||||
turn on the living room lamp:light
|
||||
(turn | switch):switch on the living room lamp
|
||||
turn (on){action:activate} the living room lamp
|
||||
```
|
||||
|
||||
See [tag synonyms](#tag-synonyms) for more details on tag substitution.
|
||||
|
||||
You can leave the left-hand or right-hand side (or both!) of the `:` empty:
|
||||
|
||||
```
|
||||
these: words: will: be: dropped:
|
||||
:these :will :be :added
|
||||
```
|
||||
|
||||
When the right-hand side is empty (`dropped:`), the spoken word will not appear in the intent. An empty left-hand side (`:added`) means the word is *not* spoken, but will appear in the intent.
|
||||
|
||||
Leaving **both** sides empty does nothing unless you attach a [tag](#tags) it. This allows you to embed a named entity in a voice command without matching specific words:
|
||||
|
||||
```
|
||||
turn on the living room lamp (:){domain:light}
|
||||
```
|
||||
|
||||
An intent from the example above will contain a `domain` entity whose value is `light`.
|
||||
|
||||
### Rules
|
||||
|
||||
Rules allow you to reuse parts of your sentence templates. They're defined by `rule_name = ...` alongside other sentences and referenced by `<rule_name>`. For example:
|
||||
|
||||
```
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
```
|
||||
|
||||
which is equivalent to:
|
||||
|
||||
```
|
||||
set the light to (red | green | blue)
|
||||
```
|
||||
|
||||
You can **share rules** across intents by referencing them as `<IntentName.rule_name>` like:
|
||||
|
||||
[SetLightColor]
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
|
||||
[GetLightColor]
|
||||
is the light <SetLightColor.colors>
|
||||
|
||||
The second intent (`GetLightColor`) references the `colors` rule from `SetLightColor`. Rule references without a dot must exist in the current intent.
|
||||
|
||||
### Number Ranges
|
||||
|
||||
Rhasspy supports using number literals (`75`) and number ranges (`1..10`) directly in your sentence templates. During training, the [num2words](https://pypi.org/project/num2words) package is used to generate words that the speech recognizer can handle ("seventy five"). For example:
|
||||
|
||||
```
|
||||
[SetBrightness]
|
||||
set brightness to (0..100){brightness}
|
||||
```
|
||||
|
||||
The `brightness` property of the recognized `SetBrightness` intent will automatically be [converted](#converters) to an integer for you. You can optionally add a step to the integer range:
|
||||
|
||||
```
|
||||
evens = 0..100,2
|
||||
odds = 1..100,2
|
||||
```
|
||||
|
||||
Under the hood, number ranges are actually references to the `rhasspy/number` [slot program](#slot-programs). You can override this behavior by creating your `slot_programs/rhasspy/number` program or disable it entirely by setting `intent.replace_numbers` to `false` in [your profile](profiles.md).
|
||||
|
||||
### Slots Lists
|
||||
|
||||
Large [alternatives](#alternatives) can become unwieldy quickly. For example, say you have a list of movie names:
|
||||
|
||||
```
|
||||
movies = ("Primer" | "Moon" | "Chronicle" | "Timecrimes" | "Mulholland Drive" | ... )
|
||||
```
|
||||
|
||||
Rather than keep this list in `sentences.ini`, you may put each movie name on a separate line in a file named `slots/movies` (no file extension) and reference it as `$movies`. Rhasspy automatically loads all files in the `slots` directory of your [profile](#profiles.md) and makes them available as slots lists.
|
||||
|
||||
For the example above, the file `slots/movies` should contain:
|
||||
|
||||
```
|
||||
Primer
|
||||
Moon
|
||||
Chronicle
|
||||
Timecrimes
|
||||
Mullholand Drive
|
||||
```
|
||||
|
||||
Now you can simply use the placeholder `$movies` in your sentence templates:
|
||||
|
||||
```
|
||||
[PlayMovie]
|
||||
play ($movies){movie_name}
|
||||
```
|
||||
|
||||
When matched, the `PlayMovie` intent JSON will contain `movie_name` property with either "Primer", "Moon", etc.
|
||||
|
||||
Make sure to **re-train** Rhasspy whenever you update your slot values!
|
||||
|
||||
#### Slot Directories
|
||||
|
||||
Slot files can be put in **sub-directories** under `slots`. A list in `slots/foo/bar` should be referenced in `sentences.ini` as `$foo/bar`.
|
||||
|
||||
#### Slot Synonyms
|
||||
|
||||
Slot values are themselves sentence templates! So you can use all of the familiar syntax from above. Slot "synonyms" can be created simply using [substitutions](#substitutions). So a file named `slots/rooms` may contain:
|
||||
|
||||
```
|
||||
[the:] (den | playroom | downstairs):den
|
||||
```
|
||||
|
||||
which is referenced by `$rooms` and will match:
|
||||
|
||||
* the den
|
||||
* den
|
||||
* the playroom
|
||||
* playroom
|
||||
* the downstairs
|
||||
* downstairs
|
||||
|
||||
This will always output just "den" because `[the:]` optionally matches "the" and then drops the word.
|
||||
|
||||
#### Slot Programs
|
||||
|
||||
Slot lists are great if your slot values always stay the same and are easily written out by hand. If you have slot values that you need to be generated *each time Rhasspy is trained*, you can use slot programs.
|
||||
|
||||
Create a directory named `slot_programs` in your profile (e.g., `$HOME/.config/rhasspy/profiles/en/slot_programs`):
|
||||
|
||||
```bash
|
||||
slot_programs="${HOME}/.config/rhasspy/profiles/en/slot_programs"
|
||||
mkdir -p "${slot_programs}"
|
||||
```
|
||||
|
||||
Add a file in `slot_programs` with the name of your slot, e.g. `colors`. Write a program in this file, such as a bash script. Make sure to include the [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)) and mark the file as executable:
|
||||
|
||||
```bash
|
||||
cat <<EOF > "${slot_programs}/colors"
|
||||
#/usr/bin/env bash
|
||||
echo 'red'
|
||||
echo 'green'
|
||||
echo 'blue'
|
||||
EOF
|
||||
|
||||
chmod +x "${slot_programs}/colors"
|
||||
```
|
||||
|
||||
Now, when you reference `$colors` in your `sentences.ini`, Rhasspy will run the program you wrote and collect the slot values from each line. Note that you can output all the same things as regular [slots lists](#slots-lists), including optional words, alternatives, etc.
|
||||
|
||||
You can pass **arguments** to your program using the syntax `$name,arg1,arg2,...` in `sentences.ini` (no spaces). Arguments will be pass on the command-line, so `arg1` and `arg2` will be `$1` and `$2` in a bash script.
|
||||
|
||||
Like regular slots lists, slot programs can also be put in sub-directories under `slot_programs`. A program in `slot_programs/foo/bar` should be referenced in `sentences.ini` as `$foo/bar`.
|
||||
|
||||
#### Built-in Slots
|
||||
|
||||
Rhasspy includes a few built-in slots for each language:
|
||||
|
||||
* `$rhasspy/days` - day names of the week
|
||||
* `$rhasspy/months` - month names of the year
|
||||
|
||||
### Converters
|
||||
|
||||
By default, all named entity values in a recognized intent's JSON are strings. If you need a different data type, such as an integer or float, or want to do some kind of complex *conversion*, use a converter:
|
||||
|
||||
```
|
||||
[SetBrightness]
|
||||
set brightness to (low:0 | medium:0.5 | high:1){brightness!float}
|
||||
```
|
||||
|
||||
The `!name` syntax calls a converter by name. Rhasspy includes several built-in converters:
|
||||
|
||||
* int - convert to integer
|
||||
* float - convert to real
|
||||
* bool - convert to boolean
|
||||
* lower - lower-case
|
||||
* upper - upper-case
|
||||
|
||||
You can define your own converters by placing a file in the `converters` directory of your profile. Like [slot programs](#slot-programs), this file should contain a [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)) and be marked as executable (`chmod +x`). A file named `converters/foo/bar` should be referenced as `!foo/bar` in `sentences.ini`.
|
||||
|
||||
Your custom converter will receive the value to convert on standard in (`stdin`) encoded as JSON. You should print a converted JSON value to standard out `stdout`. The example below demonstrates converting a string value into an integer:
|
||||
|
||||
```python
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import json
|
||||
|
||||
value = json.load(sys.stdin)
|
||||
print(int(value))
|
||||
```
|
||||
|
||||
Converters can be *chained*, so `!foo!bar` will call the `foo` converter and then pass the result to `bar`.
|
||||
|
||||
### Special Cases
|
||||
|
||||
If one of your sentences happens to start with an optional word (e.g., `[the]`), this can lead to a problem:
|
||||
|
||||
[SomeIntent]
|
||||
[the] problem sentence
|
||||
|
||||
Python's [configparser](https://docs.python.org/3/library/configparser.html) will interpret `[the]` as a new section header, which will produce a new intent, grammar, etc. Rhasspy handles this special case by using a backslash escape sequence (`\[`):
|
||||
|
||||
[SomeIntent]
|
||||
\[the] problem sentence
|
||||
|
||||
Now `[the]` will be properly interpreted as a sentence under `[SomeIntent]`. You only need to escape a `[` if it's the **very first** character in your sentence.
|
||||
Voice commands are recognized by Rhasspy from a set of sentences that you define in your [profile](profiles.md). These are stored in an [ini file](https://docs.python.org/3/library/configparser.html) whose "values" are simplified [JSGF grammars](https://www.w3.org/TR/jsgf/). The set of all sentences *generated* from these grammars is used to train an [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) and an intent recognizer.
|
||||
|
||||
### Motivation
|
||||
|
||||
@@ -336,7 +65,163 @@ Compared to JSON, YAML, etc., there is minimal syntactic overhead for the purpos
|
||||
3. You cannot share commonly *repeated phrases* across sentences or intents.
|
||||
4. There is no way to *tag phrases* so the intent recognizer knows the values for an intent's slots (e.g., color).
|
||||
|
||||
Each of these shortcomings are addressed by considering the space between intent headings (`[Intent 1]`, etc.) as a **grammar** that represent many possible voice commands. The possible sentences, stripped of their tags, are used as input to [opengrm](https://www.opengrm.org) to produce a standard ARPA language model for [pocketsphinx](https://github.com/cmusphinx/pocketsphinx) or [Kaldi](https://kaldi-asr.org). The tagged sentences are then used to train an intent recognizer.
|
||||
Each of these shortcomings are addressed by considering the space between intent headings (`[Intent 1]`, etc.) as a **grammar** that will *generate* tagged sentences in [rasaNLU's training data format](https://rasa.com/docs/nlu/dataformat/#markdown-format). The generated sentences, stripped of their tags, are used as input to [opengrm](https://www.opengrm.org) to produce a language model for [pocketsphinx](https://github.com/cmusphinx/pocketsphinx) or [Kaldi](https://kaldi-asr.org). The tagged sentences are then used to train an intent recognizer.
|
||||
|
||||
### Optional Words
|
||||
|
||||
Within a sentence, you can specify optional word(s) by surrounding them `[with brackets]`. These will generate at least two sentences: one with the optional word(s), and one without. So the following sentence template:
|
||||
|
||||
[an] example sentence [with] some optional words
|
||||
|
||||
will generate 4 concrete sentences:
|
||||
|
||||
1. `an example sentence with some optional words`
|
||||
2. `example sentence with some optional words`
|
||||
3. `an example sentence some optional words`
|
||||
4. `example sentence some optional words`
|
||||
|
||||
### Alternatives
|
||||
|
||||
A set of items, where only one is present at a time, is `(specified | like | this)`. For N items, there will be N sentences generated (unless you nest optional words, etc.). The template:
|
||||
|
||||
set the light to (red | green | blue)
|
||||
|
||||
will generate:
|
||||
|
||||
1. `set the light to red`
|
||||
2. `set the light to green`
|
||||
3. `set the light to blue`
|
||||
|
||||
### Rules
|
||||
|
||||
Rules allow you to reuse common phrases, alternatives, etc. Rules are defined by `rule_name = ...` alongside your sentences and referenced by `<rule_name>`. The template above with colors could be rewritten as:
|
||||
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
|
||||
which will generate the same 4 sentences as above. Importantly, you can **share rules** across intents by prefixing the rule's name with the intent name followed by a dot:
|
||||
|
||||
[SetLightColor]
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
|
||||
[GetLightColor]
|
||||
is the light <SetLightColor.colors>
|
||||
|
||||
The second intent (`GetLightColor`) references the `colors` rule from `SetLightColor`.
|
||||
|
||||
### Tags
|
||||
|
||||
The example templates above will generate sentences for training the speech recognizer, but using them to train the intent recognizer will not be satisfactory. The `SetLightColor` intent, when recognized, will result in a Home Assistant event called `rhasspy_SetLightColor`. But the actual *color* will not be provided because the intent recognizer is not aware that a `color` slot should exist (and has the values `red`, `green`, and `blue`).
|
||||
|
||||
Luckily, JSGF has a [tag feature](https://www.w3.org/TR/jsgf/#15057) that lets you annotate portions of sentences/rules. Rhasspy assumes that the tags themselves are *slot/entity names* and the tagged portions of the sentence are *slot/entity values*. The `SetLightColor` example can be extended with tags like this:
|
||||
|
||||
[SetLightColor]
|
||||
colors = (red | green | blue){color}
|
||||
set the light to <colors>
|
||||
|
||||
With the `{color}` tag attached to the `(red | green | blue)` alternative set, each color name will carry the tag. This is the same as typing `((red){color} | (green){color} | (blue){color})`, but less verbose. Rhasspy will now generate the following **tagged sentences**:
|
||||
|
||||
1. `set the light to [red](color)`
|
||||
2. `set the light to [green](color)`
|
||||
3. `set the light to [blue](color)`
|
||||
|
||||
When the `SetLightColor` intent is recognized now, the corresponding JSON event (`rhasspy_SetLightColor` in Home Assistant) will have the following properties:
|
||||
|
||||
{
|
||||
"color": "red"
|
||||
}
|
||||
|
||||
|
||||
A Home Assistant [automation](https://www.home-assistant.io/docs/automation) can use the slot values to take an appropriate action, such as [setting an RGB light's color](https://www.home-assistant.io/docs/automation/action/) to `[255,0,0]` (red).
|
||||
|
||||
#### Tag Synonyms
|
||||
|
||||
There are times where you want to match a particular part of your sentence with a tag, but want the actual *value* of the tag to be something different than the matched text. This is needed if you want to talk about entities in Home Assistant, for example, with phrases like "the living room lamp", but want to pass the appropriate entity id (say `lamp_1`) to Home Assistant instead.
|
||||
|
||||
Normally, you would tag part of a sentence like this:
|
||||
|
||||
[ChangeLightState]
|
||||
turn on the (living room lamp){name}
|
||||
|
||||
When this intent is activated, Rhasspy will send a JSON event (named `rhasspy_ChangeLightState` in Home Assistant) with:
|
||||
|
||||
{
|
||||
"name": "living room lamp"
|
||||
}
|
||||
|
||||
You can catch this event in a Home Assistant automation, match the `name` "living room name", and do something with the `lamp_1` entity. That's fine for one instance, but would require a separate rule for every `name`! Instead, let's add a tag **synonym**:
|
||||
|
||||
[ChangeLightState]
|
||||
turn on the (living room lamp){name:lamp_1}
|
||||
|
||||
The tag label and synonym are separated by a ":". When this sentence is spoken and the intent is activated, the same `rhasspy_ChangeLightState` event will be sent to Home Assistant, but with the following data:
|
||||
|
||||
{
|
||||
"name": "lamp_1"
|
||||
}
|
||||
|
||||
Now in your Home Assistant automation, you could use [templating](https://www.home-assistant.io/docs/automation/templating/) to plug the `name` directly into the `entity_id` field of an action. One rule to rule them all.
|
||||
|
||||
This same technique could be used to replace number words with digits, like:
|
||||
|
||||
[SetTimer]
|
||||
set a timer for (ten){number:10} seconds
|
||||
|
||||
which would generate an event like this when recognized:
|
||||
|
||||
{
|
||||
"number": "10"
|
||||
}
|
||||
|
||||
### Slots Lists
|
||||
|
||||
In the `SetLightColor` example above, the color names are stored in `sentences.ini` as a rule:
|
||||
|
||||
colors = (red | green | blue)
|
||||
|
||||
Ths is convenient when the list of colors is small, changes infrequently, and does not depend on an external service.
|
||||
But what if this was a list of movie names that were stored on your [Kodi Home Theater](https://kodi.tv)?
|
||||
|
||||
movies = ("Primer" | "Moon" | "Chronicle" | "Timecrimes" | "Mulholland Drive" | ... )
|
||||
|
||||
It would be much easier if this list was stored externally, but could be *referenced* in the appropriate places in the grammar.
|
||||
This is possible in Rhasspy by placing text files in the `speech_to_text.slots_dir` directory specified in your [profile](profiles.md) ("slots" by default).
|
||||
|
||||
If you're using the English (`en`) profile, for example, create the file `profiles/en/slots/movies` and add the following content:
|
||||
|
||||
Primer
|
||||
Moon
|
||||
Chronicle
|
||||
Timecrimes
|
||||
Mullholand Drive
|
||||
|
||||
This list of movie can now be referenced as `$movies` in your your `sentences.ini` file! Something like:
|
||||
|
||||
[PlayMovie]
|
||||
play ($movies){movie_name}
|
||||
|
||||
will generate `rhasspy_PlayMovie` events like:
|
||||
|
||||
{
|
||||
"movie_name": "Primer"
|
||||
}
|
||||
|
||||
If you update the `movies` file, make sure to re-train Rhasspy in order to pick up the new movie names.
|
||||
|
||||
### Special Cases
|
||||
|
||||
If one of your sentences happens to start with an optional word (e.g., `[the]`), this can lead to a problem:
|
||||
|
||||
[SomeIntent]
|
||||
[the] problem sentence
|
||||
|
||||
Python's [configparser](https://docs.python.org/3/library/configparser.html) will interpret `[the]` as a new section header, which will produce a new intent, grammar, etc. Rhasspy handles this special case by using a backslash escape sequence (`\[`):
|
||||
|
||||
[SomeIntent]
|
||||
\[the] problem sentence
|
||||
|
||||
Now `[the]` will be properly interpreted as a sentence under `[SomeIntent]`. You only need to escape a `[` if it's the **very first** character in your sentence.
|
||||
|
||||
## Custom Words
|
||||
|
||||
@@ -347,15 +232,174 @@ Rhasspy looks for words you've defined outside of your profile's base dictionary
|
||||
|
||||
You can use the [Words tab](usage.md#words-tab) in Rhasspy's web interface to generate this dictionary. During training, Rhasspy will merge `custom_words.txt` into your `dictionary.txt` file so the [speech to text](speech-to-text.md) system knows the words in your voice commands are pronounced.
|
||||
|
||||
## Speech to Text
|
||||
|
||||
By default, Rhasspy generates training sentences from your [sentences.ini](#sentencesini) file, and then trains a custom language model using [opengrm](https://www.opengrm.org). You can call a **custom program** instead if you want to use a different language modeling toolkit or your custom speech to text system needs special training.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"training": {
|
||||
"speech_to_text": {
|
||||
"system": "command",
|
||||
"command": {
|
||||
"program": "/path/to/program",
|
||||
"arguments": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
When training, your program will be called with all of the training sentences grouped by intent in JSON to standard in. No output is expected from your program besides a successful exit code. **NOTE**: Rhasspy will not generate `dictionary.txt` or `language_model.txt` if you use a custom program.
|
||||
|
||||
The input JSON is an object where each key is the name of an intent and the values are lists of training sentence objects. Each sentence object has the text of the sentence, all tagged entities, and the tokens of the sentence.
|
||||
|
||||
Example input:
|
||||
|
||||
{
|
||||
"GetTime": [
|
||||
{
|
||||
"sentence": "what time is it",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"what",
|
||||
"time",
|
||||
"is",
|
||||
"it"
|
||||
]
|
||||
},
|
||||
{
|
||||
"sentence": "tell me the time",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"tell",
|
||||
"me",
|
||||
"the",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
],
|
||||
"ChangeLightColor": [
|
||||
{
|
||||
"sentence": "set the bedroom light to red",
|
||||
"entities": [
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "bedroom light"
|
||||
},
|
||||
{
|
||||
"entity": "color",
|
||||
"value": "red"
|
||||
}
|
||||
],
|
||||
"tokens": [
|
||||
"set",
|
||||
"the",
|
||||
"bedroom",
|
||||
"light",
|
||||
"to",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
See [train-stt.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/train-stt.sh) for an example program.
|
||||
|
||||
## Intent Recognition
|
||||
|
||||
During training, Rhasspy uses the sentences generated from [sentences.ini](#sentencesini) as training material for the selected intent recognition system. If your intent recognition system requires some special training, you can call a **custom program** here.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"training": {
|
||||
"intent": {
|
||||
"system": "command",
|
||||
"command": {
|
||||
"program": "/path/to/program",
|
||||
"arguments": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
During training, Rhasspy will call your program with the training sentences grouped by intent in JSON printed to standard in. No output is expected, besides a successful exit code.
|
||||
|
||||
The input JSON is an object where each key is the name of an intent and the values are lists of training sentence objects. Each sentence object has the text of the sentence, all tagged entities, and the tokens of the sentence.
|
||||
|
||||
Example input:
|
||||
|
||||
```json
|
||||
{
|
||||
"GetTime": [
|
||||
{
|
||||
"sentence": "what time is it",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"what",
|
||||
"time",
|
||||
"is",
|
||||
"it"
|
||||
]
|
||||
},
|
||||
{
|
||||
"sentence": "tell me the time",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"tell",
|
||||
"me",
|
||||
"the",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
],
|
||||
"ChangeLightColor": [
|
||||
{
|
||||
"sentence": "set the bedroom light to red",
|
||||
"entities": [
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "bedroom light"
|
||||
},
|
||||
{
|
||||
"entity": "color",
|
||||
"value": "red"
|
||||
}
|
||||
],
|
||||
"tokens": [
|
||||
"set",
|
||||
"the",
|
||||
"bedroom",
|
||||
"light",
|
||||
"to",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
The following environment variables are available to your program:
|
||||
|
||||
* `$RHASSPY_BASE_DIR` - path to the directory where Rhasspy is running from
|
||||
* `$RHASSPY_PROFILE` - name of the current profile (e.g., "en")
|
||||
* `$RHASSPY_PROFILE_DIR` - directory of the current profile (where `profile.json` is)
|
||||
|
||||
See [train-intent.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/train-intent.sh) for an example program.
|
||||
|
||||
|
||||
## Language Model Mixing
|
||||
|
||||
Rhasspy is designed to only respond to the voice commands you specify in [sentences.ini](training.md#sentencesini), but both the Pocketsphinx and Kaldi speech to text systems are capable of transcribing open ended speech. While this will never be as good as a cloud-based system, Rhasspy [offers it as an option](speech-to-text.md#open-transcription).
|
||||
Rhasspy is designed to only respond to the voice commands you specify in [sentences.ini](training.md#sentencesini), but both the Pocketsphinx and Kaldi speech to text systems are capable of transcribing open ended speech. While this will never be as good as a cloud-based system, Rhasspy offers it as an option.
|
||||
|
||||
A middle ground between open transcription and custom voice commands is **language model mixing**. During training, Rhasspy can mix a (large) pre-built language model with the custom-generated one. You specify a **mixture weight** (0-1), which controls how much of an influence the large language model has; a mixture weight of 0 makes Rhasspy sensitive *only* to your voice commands, which is the default.
|
||||
Open ended speech is achieved in Rhasspy by the inclusion of `base_dictionary.txt` and `base_language_model.txt` files in every profile. The former is a dictionary containing the pronunciations all possible words. The latter is a large language model trained on very large corpus of text in the profile's language (usually books and web pages).
|
||||
|
||||
During training, Rhasspy can **mix** this large, open ended language model with the one generated specifically for your voice commands. You specify a **mixture weight**, which controls how much of an influence the large language model has; a mixture weight of 0 makes Rhasspy sensitive *only* to your voice commands, which is the default.
|
||||
|
||||

|
||||
|
||||
To see the effect of language model mixing, consider a simple `sentences.ini` file:
|
||||
To see the effect of language model mixing, consder a simple `sentences.ini` file:
|
||||
|
||||
```
|
||||
[ChangeLightState]
|
||||
@@ -389,7 +433,7 @@ $ rhasspy-cli --profile en \
|
||||
OK
|
||||
```
|
||||
|
||||
Note that training will take **significantly** longer because of the size of the base language model. Now, let's test our two WAV files:
|
||||
Note that training will take **significantly** longer because of the size of the base langauge model. Now, let's test our two WAV files:
|
||||
|
||||
```
|
||||
$ rhasspy-cli --profile en wav2text < turn_on_living_room_lamp.wav
|
||||
@@ -424,6 +468,15 @@ $ echo 'would you please turn on the living room lamp' | \
|
||||
"value": "on"
|
||||
}
|
||||
],
|
||||
"tokens": [
|
||||
"turn",
|
||||
"on",
|
||||
"the",
|
||||
"living",
|
||||
"room",
|
||||
"lamp"
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on"
|
||||
}
|
||||
@@ -433,6 +486,7 @@ $ echo 'would you please turn on the living room lamp' | \
|
||||
|
||||
But this works only because the default intent recognizer ([fsticuffs](intent-recognition.md#fsticuffs)) ignores unknown words by default, so "would you please" is not interpreted. Changing "lamp" to "light" in the input sentence will reveal the problem:
|
||||
|
||||
|
||||
```
|
||||
$ echo 'would you please turn on the living room light | \
|
||||
rhasspy-cli --profile en text2intent
|
||||
@@ -445,6 +499,7 @@ $ echo 'would you please turn on the living room light | \
|
||||
"confidence": 0
|
||||
},
|
||||
"entities": [],
|
||||
"speech_confidence": 1,
|
||||
"slots": {}
|
||||
}
|
||||
}
|
||||
@@ -480,6 +535,7 @@ $ echo 'would you please turn on the living room light' | \
|
||||
"value": "on"
|
||||
}
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on"
|
||||
}
|
||||
@@ -489,4 +545,4 @@ $ echo 'would you please turn on the living room light' | \
|
||||
|
||||
This works well for our toy example, but will not scale well when there are thousands of voice commands represented in `sentences.ini` or if the words used are significantly different than in the training set ("light" and "lamp" are close enough for `fuzzywuzzy`).
|
||||
|
||||
A machine learning-based intent recognizer, like [flair](intent-recognition.md#flair) or [Rasa](intent-recognition.md#rasanlu), would be a better choice for open ended speech.
|
||||
A machine learning-based intent recognizer, like [flar](intent-recognition.md#flair), would be a better choice for open ended speech.
|
||||
|
||||
@@ -1,309 +0,0 @@
|
||||
# Tutorials
|
||||
|
||||
* [RGB Light Example](#rgb-light-example)
|
||||
* [Client/Server Setup](#clientserver-setup)
|
||||
|
||||
## RGB Light Example
|
||||
|
||||
Let's say you have an RGB light of some kind in your bedroom that's [hooked up already to Home Assistant](https://www.home-assistant.io/components/light.mqtt). You'd like to be able to say things like "*set the bedroom light to red*" to change its color. To start, let's write a [Home Assistant automation](https://www.home-assistant.io/docs/automation/action/) to help you out:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
...
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
Now you just need the trigger! Rhasspy will send events that can be caught with the [event trigger platform](https://www.home-assistant.io/docs/automation/trigger/#event-trigger). A different event will be sent for each *intent* that you define, with slot values corresponding to important parts of the command (like light name and color). Let's start by defining an intent in Rhasspy called `ChangeLightState` that can be said a few different ways:
|
||||
|
||||
[ChangeLightState]
|
||||
colors = (red | green | blue) {color}
|
||||
set [the] (bedroom){name} [to] <colors>
|
||||
|
||||
This is a [simplified JSGF grammar](training.md#sentencesini) that will generate the following sentences:
|
||||
|
||||
* set the bedroom to red
|
||||
* set the bedroom to green
|
||||
* set the bedroom to blue
|
||||
* set the bedroom red
|
||||
* set the bedroom green
|
||||
* set the bedroom blue
|
||||
* set bedroom to red
|
||||
* set bedroom to green
|
||||
* set bedroom to blue
|
||||
* set bedroom red
|
||||
* set bedroom green
|
||||
* set bedroom blue
|
||||
|
||||
Rhasspy uses these sentences to create an [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) for speech recognition, and also train an intent recognizer that can extract relevant parts of the command. The `{color}` tag in the `colors` rule will make Rhasspy put a `color` property in each event with the name of the recognized color (red, green, or blue). Likewise, the `{name}` tag on `bedroom` will add a `name` property to the event.
|
||||
|
||||
If trained on these sentences, Rhasspy will now recognize commands like "*set the bedroom light to red*" and send a `rhasspy_ChangeLightState` to Home Assistant with the following data:
|
||||
|
||||
{
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
|
||||
You can now fill in the rest of the Home Assistant automation:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
platform: event
|
||||
event_type: rhasspy_ChangeLightState
|
||||
event_data:
|
||||
name: bedroom
|
||||
color: red
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
This will handle the specific case of setting the bedroom light to red, but not any other color. You can either add additional automations to handle these, or make use of [automation templating](https://www.home-assistant.io/docs/automation/templating/) to do it all at once. [Home Assistant Template Example](Home-Assistant-Template-Example)
|
||||
|
||||
### Home Assistant Template Example
|
||||
|
||||
Using the following additions, you can get Home Assistant to respond to turning on / off *ANY* light in your setup.
|
||||
|
||||
#### Slots
|
||||
|
||||
Add the following JSON to the Slots tab in your Rhasspy web interface:
|
||||
|
||||
```json
|
||||
{
|
||||
"lights": [
|
||||
"(living room wall):light.bulb_3",
|
||||
"(living room desk):switch.m4",
|
||||
"(living room floor):switch.sonoff",
|
||||
"(bar lights):switch.maxcio1",
|
||||
"(entry wall):light.bulb_4",
|
||||
"(guest wall):light.bulb_6",
|
||||
"(guest floor):switch.m5",
|
||||
"(bedroom wall):light.bulb_5",
|
||||
"(bedroom desk):light.bulb_1",
|
||||
"(bedroom floor):light.bulb_2"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### Sentences
|
||||
|
||||
A simple sentence to turn any of the lights in the slots file on or off.
|
||||
Note the use of the `<state>` rule and the slot `$lights`
|
||||
|
||||
```
|
||||
[ChangeLightState]
|
||||
state = (on | off) {light_state}
|
||||
turn [the] ($lights) {light_name} <state>
|
||||
```
|
||||
|
||||
#### Home Assistant
|
||||
|
||||
In your Home Assistant `automations.yaml` file, use a `data_template` to get the Rhasspy event data with `trigger.event.data.<your property name>` and then pass those along to a script:
|
||||
|
||||
```yaml
|
||||
- id: '1577164768008'
|
||||
alias: Rhasspy Light States
|
||||
description: Voice Control on/off states for all lights
|
||||
trigger:
|
||||
- event_data: {}
|
||||
event_type: rhasspy_ChangeLightState
|
||||
platform: event
|
||||
condition: []
|
||||
action:
|
||||
- alias: ''
|
||||
data_template:
|
||||
light_name: "{{ trigger.event.data.light_name }}"
|
||||
light_state: "{{ trigger.event.data.light_state }}"
|
||||
service: script.rhasspy_light_state
|
||||
|
||||
```
|
||||
|
||||
In `scripts.yaml`, the `service_template` casts the `light_state` into a string and checks to see if you said 'on' or 'off'. The homeassistant-service can toggle both lights and switches, which is helpful if you have a combination of "light" types:
|
||||
|
||||
```yaml
|
||||
rhasspy_light_state:
|
||||
alias: change_light_state
|
||||
fields:
|
||||
light_name:
|
||||
description: "Light Entity"
|
||||
example: light.bulb_1
|
||||
light_state:
|
||||
description: "State to change the light to"
|
||||
example: on
|
||||
sequence:
|
||||
- service_template: >
|
||||
{% set this_state = light_state | string %}
|
||||
{% if this_state == 'on' %}
|
||||
homeassistant.turn_on
|
||||
{%else %}
|
||||
homeassistant.turn_off
|
||||
{% endif %}
|
||||
|
||||
data_template:
|
||||
entity_id: "{{ light_name }}"
|
||||
```
|
||||
|
||||
## Client/Server Setup
|
||||
|
||||
Contributed by [jaburges](https://community.home-assistant.io/u/jaburges)
|
||||
|
||||
* Hardware used:
|
||||
* Raspberry Pi 3B w/ 8GB SD card
|
||||
* [Seeed 4 Mic Array](https://www.amazon.com/seeed-Studio-ReSpeaker-4-Mic-Raspberry/dp/B076SSR1W1)
|
||||
* Software used:
|
||||
* [Raspbian Buster Lite](https://downloads.raspberrypi.org/raspbian_lite_latest)
|
||||
* [Etcher](https://www.balena.io/etcher/)
|
||||
* Docker ([install Docker](installation.md#docker))
|
||||
|
||||
### Server Steps
|
||||
|
||||
1. Assuming you already have docker running, create a directory for Rhasspy, and subdirectory called profiles.
|
||||
2. Pull and Run docker image:
|
||||
|
||||
docker run -p 12101:12101 \
|
||||
--restart unless-stopped \
|
||||
--name rhasspy \
|
||||
-v "/<PATH_TO>/rhasspy/profiles:/profiles" \
|
||||
synesthesiam/rhasspy-server:latest \
|
||||
--user-profiles /profiles \
|
||||
--profile en
|
||||
|
||||
3. Go to server URL `http://<Server_IP>:12101` (you may be asked to download files)
|
||||
4. Go to settings and check configuration (and save along the way):
|
||||
|
||||
[Rhasspy]
|
||||
Listen for wake word on Startup = UNchecked
|
||||
|
||||
[Intent Handling]
|
||||
Do not handle intent on this device
|
||||
#There is no harm in having the Server handle Intents, but the Client must handle Intents
|
||||
|
||||
[Wake Word]
|
||||
No Wake word on this device
|
||||
|
||||
[Voice Detection]
|
||||
No voice communication on this device
|
||||
|
||||
[Speech Recognition]
|
||||
Do Speech recognition with pocketsphinx
|
||||
|
||||
[Intent Recognition]
|
||||
Do intent recognition with fuzzywuzzy
|
||||
|
||||
[Text to Speech]
|
||||
No Text to speech on this device
|
||||
|
||||
[Audio Recording]
|
||||
No recording on this device
|
||||
|
||||
[Audio Playing]
|
||||
No Playback on this device
|
||||
|
||||
5. Check Slots, and Sentences tabs and make sure to hit `Train` and then `Restart`
|
||||
|
||||
### Client Steps
|
||||
|
||||
1. Flash 8Gb MicroSD Card with [Buster](https://downloads.raspberrypi.org/raspbian_lite_latest) with [Etcher](https://www.balena.io/etcher/).
|
||||
2. Remove and re-insert MicroSD card and add files to the root directory (for headless setup - meaning no screen needed). You only need `wpa_supplicant` if you plan to use WiFi.
|
||||
* a file simply called `ssh`
|
||||
* `wpa_supplicant.conf` ([example here](https://pastebin.com/cDhyhQLs))
|
||||
3. Insert the MicroSD card in the Pi, use a proper Power Supply and check your router for the IP address it gets.
|
||||
4. SSH into the Pi using that IP address (I use [Putty](https://the.earth.li/~sgtatham/putty/latest/w64/putty-64bit-0.73-installer.msi)) using pi default user/pass = pi/raspberry.
|
||||
You are going to want to change that in the future!
|
||||
5. Install git:
|
||||
|
||||
sudo apt install git
|
||||
|
||||
6. Install Seeed mic array based on info [here](https://github.com/respeaker/seeed-voicecard)
|
||||
|
||||
git clone https://github.com/respeaker/seeed-voicecard
|
||||
cd seeed-voicecard
|
||||
sudo ./install.sh
|
||||
sudo reboot
|
||||
|
||||
7. Plug in Seeed speaker and check install was successful against expected result here 5:
|
||||
|
||||
arecord -L
|
||||
|
||||
8. Install docker:
|
||||
|
||||
curl -sSL https://get.docker.com | sh
|
||||
|
||||
9. Modify user permissions to access docker without using `sudo` all the time ;)
|
||||
|
||||
sudo usermod -a -G docker pi
|
||||
|
||||
10. Close SSH, and relaunch SSH connection to use new permissions.
|
||||
11. Create directories for Rhasspy Docker image to use:
|
||||
|
||||
cd /home/pi
|
||||
mkdir rhasspy
|
||||
cd rhasspy
|
||||
mkdir profiles
|
||||
|
||||
12. Pull and run docker image:
|
||||
|
||||
docker run -p 12101:12101 \
|
||||
--restart unless-stopped \
|
||||
--name rhasspy \
|
||||
-v "/home/pi/rhasspy/profiles:/profiles" \
|
||||
--device /dev/snd:/dev/snd \
|
||||
synesthesiam/rhasspy-server:latest \
|
||||
--user-profiles /profiles \
|
||||
--profile en
|
||||
|
||||
13. Go to Client URL `http://<Pi_IP_address>:12101` (you will be asked to download some files)
|
||||
(At time of writing I put Wakeword, voice detection and recognition on the client)
|
||||
14. Under settings ensure the following is selected, Save along the way. You will need to Train once also.
|
||||
|
||||
[Rhasspy]
|
||||
Listen for wake word on Startup = checked
|
||||
|
||||
[Home Assistant]
|
||||
Enable Intent Handling on this device
|
||||
#Do not use Home Assistant if using Node-Red
|
||||
|
||||
[Wake Word]
|
||||
Use snowboy (this should trigger a download of more files)
|
||||
|
||||
[Voice Detection]
|
||||
Use webrtcvad and listen for silence
|
||||
|
||||
[Speech Recognition]
|
||||
Use Remote Rhasspy server for speech recognition:
|
||||
URL = http://<SERVER_IP>:12101/api/speech-to-text
|
||||
|
||||
[Intent Recognition]
|
||||
Use Remote Rhasspy server for speech recognition:
|
||||
URL = http://<SERVER_IP>:12101/api/text-to-intent
|
||||
|
||||
[Text to Speech]
|
||||
No Text to speech on this device
|
||||
|
||||
[Audio Recording]
|
||||
Use PyAudio (default)
|
||||
Input Device = seeed-4mic-voicecard (you can test this if you want)
|
||||
|
||||
[Audio Playing]
|
||||
No Playback on this device
|
||||
|
||||
### Node-Red Config
|
||||
|
||||
1. Import [this flow](https://github.com/synesthesiam/rhasspy/blob/cda3a02775865d49b52d32a3af7264b7cbd69472/examples/nodered/time-light-flow.js) from the Rhasspy examples
|
||||
2. Attach a debug node to the websocket in and configure it to show full msg object.
|
||||
3. I edited light text node to take this:
|
||||
|
||||
{
|
||||
"domain": "light",
|
||||
"service": "turn_{{slots.state}}",
|
||||
"entity_id": "{{slots.name}}"
|
||||
}
|
||||
|
||||
4. Add a call service node after the light text and leave it blank. Deploy and Enjoy offline voice assistant.
|
||||
|
||||
Pick a light (that is a light domain not a switch), and say "Snowboy, turn bedroom light off" :)
|
||||
@@ -1,31 +1,11 @@
|
||||
# Usage
|
||||
|
||||
You can interact with Rhasspy in more ways than your voice:
|
||||
|
||||
* [Web Interface](#web-interface)
|
||||
* [Home Assistant](#home-assistant)
|
||||
* [Node-RED with Websockets](#node-red)
|
||||
* [MQTT and Snips](#mqtt-and-snips)
|
||||
* [HTTP API](#http-api)
|
||||
* [Command Line](#command-line)
|
||||
You can interact with Rhasspy in different ways besides just your voice. Rhasspy includes a [web interface](#web-inteface), typically hosted on port 12101. There is also an [HTTP API](#http-api) that lets you programmatically manipulate Rhasspy from external programs or services. A [command-line interface](#command-line) is available as well to allow for Rhasspy to be easily included in shell scripts. Lastly, Rhasspy subscribes and publishes to specific [MQTT topics](#mqtt) in accordance with (a portion of) the [Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol).
|
||||
|
||||
## Web Interface
|
||||
|
||||
A browser-based interface for Rhasspy is available on port 12101 by default ([http://localhost:12101](http://localhost:12101) if running locally). From this interface, you can test voice commands, add new voice commands, re-train, and edit your profile.
|
||||
|
||||
### Top Bar
|
||||
|
||||
The top bar of the web interface lets you perform some global actions on Rhasspy, regardless of which tab you have selected.
|
||||
|
||||

|
||||
|
||||
* Click the Rhasspy logo to reload the page
|
||||
* Click the version number to test the [HTTP API](#http-api)
|
||||
* The green `Train` button will re-train your profile
|
||||
* Use the `Clear Cache` drop down to train from scratch
|
||||
* The yellow `Wake` button will wake Rhasspy up and start listening for a voice command
|
||||
* The red `Restart` button forces Rhasspy to restart
|
||||
|
||||
### Speech Tab
|
||||
|
||||
Test voice and text commands.
|
||||
@@ -34,28 +14,17 @@ Test voice and text commands.
|
||||
|
||||
* Record a voice command with `Hold to Record` or `Tap to Record`
|
||||
* Upload a WAV file with a voice command
|
||||
* Enter a text command and either execute it (`Get Intent`) or `Speak` the sentence
|
||||
* Enter a text command and execute it
|
||||
* Uncheck `Send to Home Assistant` if you **don't** want Rhasspy to send events to Home Assistant
|
||||
|
||||
### Sentences Tab
|
||||
|
||||
Add new voice commands to Rhasspy using the [template syntax](training.md#sentencesini).
|
||||
Add new voice commands to Rhasspy.
|
||||
|
||||

|
||||
|
||||
* Edits `sentences.ini` by default
|
||||
* Use the `Add File` button to create additional sentence template files
|
||||
* These should be prefixed by the `sentences_dir` in your [profile](profiles.md). For example, `intents/more-commands.ini`
|
||||
* The drop down can be used to switch editing between different template files
|
||||
|
||||
### Slots Tab
|
||||
|
||||
Edit your [slots lists](training.md#slots-lists) as JSON (keys = slot names, values = lists of slot values).
|
||||
|
||||

|
||||
|
||||
* New slot values will overwrite previous ones
|
||||
* Delete a slot by providing an empty list for its JSON key
|
||||
See documentation on [sentences.ini](training.md#sentencesini) for more information.
|
||||
Make sure to re-train after saving!
|
||||
|
||||
### Words Tab
|
||||
|
||||
@@ -88,59 +57,52 @@ Direct interface for editing your [profile](profiles.md).
|
||||
|
||||

|
||||
|
||||
### Log Tab
|
||||
## HTTP API
|
||||
|
||||
Streams Rhasspy's log output over a websocket.
|
||||
Rhasspy features a comprehensive HTTP API available at `/api`, documented with [OpenAPI 3](https://github.com/OAI/OpenAPI-Specification) (Swagger). Some notable endpoints are:
|
||||
|
||||

|
||||
* `/api/profile`
|
||||
* GET the JSON for your profile, or POST to overwrite it
|
||||
* `/api/listen-for-command`
|
||||
* POST to wake Rhasspy up and start listening for a voice command
|
||||
* `/api/start-recording`
|
||||
* POST to have Rhasspy start recording a voice command
|
||||
* `/api/stop-recording`
|
||||
* POST to have Rhasspy stop recording and process recorded data as a voice command
|
||||
* `/api/train`
|
||||
* POST to re-train your profile
|
||||
* `/api/speech-to-intent`
|
||||
* POST a WAV file and have Rhasspy process it as a voice command
|
||||
* `/api/text-to-intent`
|
||||
* POST text and have Rhasspy process it as command
|
||||
* `/api/text-to-speech`
|
||||
* POST text and have Rhasspy speak it
|
||||
|
||||
See `public/swagger.yaml` in Rhasspy's repository for all available endpoints, or visit `/api` on your Rhasspy web server (e.g., [http://locahost:12101/api](http://localhost:12101/api)).
|
||||
|
||||
## Home Assistant
|
||||
## Secure Hosting with HTTPS
|
||||
|
||||
Rhasspy communicates with Home Assistant directly over its [REST API](https://developers.home-assistant.io/docs/en/external_api_rest.html).
|
||||
Specifically, Rhasspy intents are POST-ed to the [events endpoint](https://developers.home-assistant.io/docs/en/external_api_rest.html#post-api-events-lt-event_type).
|
||||
If you need to access Rhasspy's web interface/API through HTTPS (formally SSL), you can provide a certificate and key file via command-line parameters or the Hass.IO configuration.
|
||||
|
||||
If you have a Rhasspy intent named `ChangeLightColor` with `name` and `color` slots like in the [RGB light example](index.md#rgb-light-example), then Home Assistant will receive an event of type `rhasspy_ChangeLightColor` whose event data is:
|
||||
If you're running Rhasspy via Docker or in a virtual environment, add `--ssl <CERT_FILE> <KEY_FILE>` to the command-line arguments where `<CERT_FILE>` is your SSL certificate and `<KEY_FILE>` is your SSL key file.
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
```
|
||||
You can generate a self-signed certificate with the following command:
|
||||
|
||||
when you say "set the bedroom to red". You should write a custom [automation with an event trigger](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) to do something when this event arrives. Catching the example event would look like:
|
||||
openssl req -x509 -newkey rsa:4096 -nodes -out cert.pem -keyout key.pem -days 365
|
||||
|
||||
After answering the series of questions, you should have `cert.pem` and `key.pem` in your current directory. Then run Rhasspy with:
|
||||
|
||||
```yaml
|
||||
automation:
|
||||
trigger:
|
||||
platform: event
|
||||
event_type: rhasspy_ChangeLightColor
|
||||
event_data:
|
||||
color: red
|
||||
action:
|
||||
...
|
||||
```
|
||||
<RHASSPY COMMAND> --ssl cert.pem key.pem
|
||||
|
||||
The web interface will now be available at [https://localhost:12101](https://locahost:12101) and the web socket events at `wss://localhost:12101/api/events/intent`
|
||||
|
||||
You've now added offline, private voice commands to your Home Assistant. Happy automating!
|
||||
In Hass.IO, you will need to set the following options via the web interface or in your JSON configuration:
|
||||
|
||||
### Getting the Spoken Text
|
||||
* `ssl`: `true`
|
||||
* `certfile`: `cert.pem`
|
||||
* `keyfile`: `key.pem`
|
||||
|
||||
The Home Assistant event will contain two extra slots besides the ones you specify:
|
||||
|
||||
* `_text` - spoken voice command text with [substitutions](training.md#substitutions)
|
||||
* `_raw_text` - literal transcription of voice command
|
||||
|
||||
## Node-RED
|
||||
|
||||
Rhasspy can interact directly with [Node-RED](https://nodered.org) directly through [websockets](usage.md#websocket-events).
|
||||
Simply add a websocket input and set the path to `ws://<rhasspy>:12101/api/events/intent` where `<rhasspy>` is the hostname or IP address of your Rhasspy server.
|
||||
Make sure to also set send/receive to "entire message".
|
||||
|
||||

|
||||
|
||||
More example flows are available [on Github](https://github.com/synesthesiam/rhasspy/tree/master/examples/nodered).
|
||||
|
||||
### WebSocket Events
|
||||
## WebSocket Events
|
||||
|
||||
Whenever a voice command is recognized, Rhasspy emits JSON events over a websocket connection available at `ws://rhasspy:12101/api/events/intent` (replace `ws://` with `wss://` if you're using [secure hosting](usage.md#secure-hosting-with-https)).
|
||||
You can listen to these events in a [Node-RED](https://nodered.org) flow, and easily add offline, private voice commands to your home automation set up!
|
||||
@@ -171,41 +133,44 @@ For the `ChangLightState` intent from the [RGB Light Example](index.md#rgb-light
|
||||
}
|
||||
```
|
||||
|
||||
## MQTT and Snips
|
||||
## Home Assistant
|
||||
|
||||
Rhasspy is able to interoperate with Snips.AI services using the [Hermes protocol](https://docs.snips.ai/reference/hermes) over [MQTT](http://mqtt.org). The following components are Snips/Hermes compatible:
|
||||
Rhasspy communicates with Home Assistant directly over its [REST API](https://developers.home-assistant.io/docs/en/external_api_rest.html).
|
||||
Specifically, Rhasspy intents are POST-ed to the [events endpoint](https://developers.home-assistant.io/docs/en/external_api_rest.html#post-api-events-lt-event_type).
|
||||
|
||||
* [Microphone input](audio-input.md#mqtthermes)
|
||||
* [Wake word](wake-word.md#mqtthermes)
|
||||
* [Speech to text](speech-to-text.md#mqtthermes)
|
||||
* [Intent recognition](intent-recognition.md#mqtthermes)
|
||||
* [Audio output](audio-output.md#mqtthermes)
|
||||
If you have a Rhasspy intent named `ChangeLightColor` with `name` and `color` slots like in the [RGB light example](index.md#rgb-light-example), then Home Assistant will receive an event of type `rhasspy_ChangeLightColor` whose event data is:
|
||||
|
||||
## HTTP API
|
||||
```json
|
||||
{
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
```
|
||||
|
||||
Rhasspy features a comprehensive HTTP API available at `/api/`, documented with [OpenAPI 3](https://github.com/OAI/OpenAPI-Specification) (Swagger). See the [HTTP API reference](reference.md#http-api) for more details.
|
||||
when you say "set the bedroom to red". You should write a custom [automation with an event trigger](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) to do something when this event arrives. Catching the example event would look like:
|
||||
|
||||
### Secure Hosting with HTTPS
|
||||
```yaml
|
||||
automation:
|
||||
trigger:
|
||||
platform: event
|
||||
event_type: rhasspy_ChangeLightColor
|
||||
event_data:
|
||||
color: red
|
||||
action:
|
||||
...
|
||||
```
|
||||
|
||||
If you need to access Rhasspy's web interface/API through HTTPS (formally SSL), you can provide a certificate and key file via command-line parameters or the Hass.io configuration.
|
||||
You've now added offline, private voice commands to your Home Assistant. Happy automating!
|
||||
|
||||
If you're running Rhasspy via Docker or in a virtual environment, add `--ssl <CERT_FILE> <KEY_FILE>` to the command-line arguments where `<CERT_FILE>` is your SSL certificate and `<KEY_FILE>` is your SSL key file.
|
||||
## Node-RED
|
||||
|
||||
You can generate a self-signed certificate with the following command:
|
||||
Rhasspy can interact directly with [Node-RED](https://nodered.org) directly through [websockets](usage.md#websocket-events).
|
||||
Simply add a websocket input and set the path to `ws://<rhasspy>:12101/api/events/intent` where `<rhasspy>` is the hostname or IP address of your Rhasspy server.
|
||||
Make sure to also set send/receive to "entire message".
|
||||
|
||||
openssl req -x509 -newkey rsa:4096 -nodes -out cert.pem -keyout key.pem -days 365
|
||||

|
||||
|
||||
After answering the series of questions, you should have `cert.pem` and `key.pem` in your current directory. Then run Rhasspy with:
|
||||
|
||||
<RHASSPY COMMAND> --ssl cert.pem key.pem
|
||||
|
||||
The web interface will now be available at [https://localhost:12101](https://localhost:12101) and the web socket events at `wss://localhost:12101/api/events/intent`
|
||||
|
||||
In Hass.io, you will need to set the following options via the web interface or in your JSON configuration:
|
||||
|
||||
* `ssl`: `true`
|
||||
* `certfile`: `cert.pem`
|
||||
* `keyfile`: `key.pem`
|
||||
More example flows are available [on Github](https://github.com/synesthesiam/rhasspy/tree/master/examples/nodered).
|
||||
|
||||
## Command Line
|
||||
|
||||
@@ -213,18 +178,18 @@ You can access portions of Rhasspy's functionality without running a web server
|
||||
The `rhasspy` Python module runs this interface in its `__main__`, so it's accessible from Rhasspy's source code directory by running:
|
||||
|
||||
python3 -m rhasspy <COMMAND> <ARGUMENTS>
|
||||
|
||||
|
||||
This will only work inside a properly set up [virtual environment](installation.md#virtual-environment), however.
|
||||
If you run Rhasspy through [Docker](installation.md#docker), the [rhasspy-cli](https://github.com/synesthesiam/rhasspy/blob/master/bin/rhasspy-cli) script should be used instead:
|
||||
|
||||
wget https://github.com/synesthesiam/rhasspy/blob/master/bin/rhasspy-cli
|
||||
chmod +x rhasspy-cli
|
||||
./rhasspy-cli --help
|
||||
|
||||
|
||||
Put this script in your `~/bin` directory so that you can refer to it as `rhasspy-cli` from any directory.
|
||||
By default, it will look for profiles in `$XDG_CONFIG_FILE/rhasspy/profiles`, which is probably `~/.config/rhasspy/profiles` (see [XDG specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) for more information).
|
||||
|
||||
**Beware**: the `rhasspy-cli` script runs under your user account and grants Rhasspy **write access to your home directory**.
|
||||
|
||||
**Beware**: the `rhasspy-cli` script run under your user accout and grants Rhasspy **write access to your home directory**.
|
||||
This is needed to save files during the training process, and to avoid those files being owned by `root`.
|
||||
The [rhasspy-cli-ro](https://github.com/synesthesiam/rhasspy/blob/master/bin/rhasspy-cli-ro) script can be used for read only operations, such as speech to text or intent handling, but cannot make any changes to your file system.
|
||||
|
||||
@@ -233,13 +198,240 @@ The [rhasspy-cli-ro](https://github.com/synesthesiam/rhasspy/blob/master/bin/rha
|
||||
The `rhasspy-cli` script takes a command and a set of arguments:
|
||||
|
||||
rhasspy-cli --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>
|
||||
|
||||
|
||||
Adding `--debug` before the command will print additional information to the console:
|
||||
|
||||
rhasspy-cli --debug --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>
|
||||
|
||||
|
||||
You can override profile settings with `--set` like this:
|
||||
|
||||
rhasspy-cli --profile <PROFILE_NAME> --set <SETTING_NAME> <SETTING_VALUE> ... <COMMAND> <ARGUMENTS>
|
||||
|
||||
### Available Commands
|
||||
|
||||
See the [command-line reference](reference.md#command-line) for available commands.
|
||||
For `rhasspy-cli --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>`, `<COMMAND>` can be:
|
||||
|
||||
* `info`
|
||||
* Print profile JSON to standard out
|
||||
* Add `--defaults` to only print settings from `defaults.json`
|
||||
* `wav2text`
|
||||
* Convert WAV file(s) to text
|
||||
* `wav2intent`
|
||||
* Convert WAV file(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `text2intent`
|
||||
* Convert text command(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `train`
|
||||
* Re-train your profile
|
||||
* `mic2wav`
|
||||
* Listen for a voice command and output WAV data
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2text`
|
||||
* Listen for a voice command and convert it to text
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2intent`
|
||||
* Listen for a voice command output intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `word2phonemes`
|
||||
* Print the CMU phonemes for a word (possibly unknown)
|
||||
* Add `-n <COUNT>` to control the maximum number of guessed pronunciations
|
||||
* `word2wav`
|
||||
* Pronounce a word (possibly unknown) and output WAV data
|
||||
* `text2speech`
|
||||
* Speaks one or more sentences using Rhasspy's text to speech system
|
||||
* `text2wav`
|
||||
* Converts a single sentence to WAV using Rhasspy's text to speech system
|
||||
* `sleep`
|
||||
* Run Rhasspy and wait until wake word is spoken
|
||||
* `download`
|
||||
* Download necessary profile files from the internet
|
||||
|
||||
### Profile Operations
|
||||
|
||||
Print the complete JSON for the Enlgish profile with:
|
||||
|
||||
rhasspy-cli --profile en info
|
||||
|
||||
You can combine this with other commands, such as `jq` to get at specific pieces:
|
||||
|
||||
rhasspy-cli info --profile en | jq .wake.pocketsphinx.keyphrase
|
||||
|
||||
Output (JSON):
|
||||
|
||||
"okay rhasspy"
|
||||
|
||||
### Training
|
||||
|
||||
Retrain your the English profile with:
|
||||
|
||||
rhasspy-cli --profile en train
|
||||
|
||||
Add `--debug` before `train` for more information.
|
||||
|
||||
### Speech to Text/Intent
|
||||
|
||||
Convert a WAV file to text from stdin:
|
||||
|
||||
rhasspy-cli --profile en wav2text < what-time-is-it.wav
|
||||
|
||||
Output (text):
|
||||
|
||||
what time is it
|
||||
|
||||
Convert multiple WAV files:
|
||||
|
||||
rhasspy-cli --profile en wav2text what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON)
|
||||
|
||||
```json
|
||||
{
|
||||
"what-time-is-it.wav": "what time is it",
|
||||
"turn-on-the-living-room-lamp.wav": "turn on the living room lamp"
|
||||
}
|
||||
```
|
||||
|
||||
Convert multiple WAV file(s) to intents **and** handle them:
|
||||
|
||||
rhasspy-cli --profile en wav2intent --handle what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"what_time_is_it.wav": {
|
||||
"text": "what time is it",
|
||||
"intent": {
|
||||
"name": "GetTime",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": []
|
||||
},
|
||||
"turn_on_living_room_lamp.wav": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Text to Intent
|
||||
|
||||
Handle a command as if it was spoken:
|
||||
|
||||
rhasspy-cli --profile en text2intent --handle "turn off the living room lamp"
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn off the living room lamp": {
|
||||
"text": "turn off the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "off"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Record Your Voice
|
||||
|
||||
Save a voice command to a WAV:
|
||||
|
||||
rhasspy-cli --profile en mic2wav > my-voice-command.wav
|
||||
|
||||
You can listen to it with:
|
||||
|
||||
aplay my-voice-command.wav
|
||||
|
||||
### Test Your Wake Word
|
||||
|
||||
Start Rhasspy and wait for wake word:
|
||||
|
||||
rhasspy-cli --profile en sleep
|
||||
|
||||
Should exit and print the wake word when its spoken.
|
||||
|
||||
### Text to Speech
|
||||
|
||||
Have Rhasspy speak one or more sentences:
|
||||
|
||||
rhasspy-cli --profile en text2speech "We ride at dawn!"
|
||||
|
||||
Use a different text to speech system and voice:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'flite' \
|
||||
--set 'text_to_speech.flite.voice' 'slt' \
|
||||
text2speech "We ride at dawn!"
|
||||
|
||||
### Pronounce Words
|
||||
|
||||
Speak words Rhasspy doesn't know!
|
||||
|
||||
rhasspy-cli --profile en word2wav raxacoricofallapatorius | aplay
|
||||
|
||||
### Text to Speech to Text to Intent
|
||||
|
||||
Use the miracle of Unix pipes to have Rhasspy interpret voice commands from itself:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'picotts' \
|
||||
text2wav "turn on the living room lamp" | \
|
||||
rhasspy-cli --profile en wav2text | \
|
||||
rhasspy-cli --profile en text2intent
|
||||
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn on the living room lamp": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on",
|
||||
"name": "living room lamp"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -6,102 +6,11 @@ You can also wake Rhasspy up using the [HTTP API](usage.md#http-api) by POST-ing
|
||||
|
||||
The following table summarizes the key characteristics of each wake word system:
|
||||
|
||||
| System | Performance | Training to Customize | Online Sign Up |
|
||||
| ------ | ----------- | ----------------- | ----------------------- |
|
||||
| [porcupine](wake-word.md#porcupine) | excellent | yes, offline | no |
|
||||
| [snowboy](wake-word.md#snowboy) | good | yes, online | yes |
|
||||
| [pocketsphinx](wake-word.md#pocketsphinx) | poor | no | no |
|
||||
| [precise](wake-word.md#mycroft-precise) | moderate | yes, offline | no |
|
||||
|
||||
## Porcupine
|
||||
|
||||
Listens for a wake word with [porcupine](https://github.com/Picovoice/Porcupine). This system has the best performance out of the box. If you want a custom wake word, however, you will need to re-run their optimizer tool every 30 days.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"wake": {
|
||||
"system": "porcupine",
|
||||
"porcupine": {
|
||||
"library_path": "porcupine/libpv_porcupine.so",
|
||||
"model_path": "porcupine/porcupine_params.pv",
|
||||
"keyword_path": "porcupine/porcupine.ppn",
|
||||
"sensitivity": 0.5
|
||||
}
|
||||
},
|
||||
|
||||
"rhasspy": {
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
There are a lot of [keyword files](https://github.com/Picovoice/Porcupine/tree/master/resources/keyword_files) available for download. Use the `linux` platform if you're on desktop/laptop (`amd64`) and the `raspberrypi` platform if you're using a Raspberry Pi (`armhf`/`aarch64`). The `.ppn` files should go in the `porcupine` directory inside your profile (referenced by `keyword_path`).
|
||||
|
||||
If you want to create a custom wake word, you will need to use the [Picovoice Console](https://github.com/Picovoice/porcupine#picovoice-console). **NOTE**: the generated keyword file is only valid for 30 days, though you can always just re-run the optimizer.
|
||||
|
||||
See `rhasspy.wake.PorcupineWakeListener` for details.
|
||||
|
||||
## Snowboy
|
||||
|
||||
Listens for one or more wake words with [snowboy](https://snowboy.kitt.ai). This system has the good performance out of the box, but requires an online service to train.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"wake": {
|
||||
"system": "snowboy",
|
||||
"hermes": {
|
||||
"wakeword_id": "default"
|
||||
},
|
||||
"snowboy": {
|
||||
"model": "snowboy/snowboy.umdl",
|
||||
"audio_gain": 1,
|
||||
"sensitivity": "0.5",
|
||||
"apply_frontend": false
|
||||
}
|
||||
},
|
||||
|
||||
"rhasspy": {
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
If your hotword model has multiple embedded hotwords (such as `jarvis.umdl`), the "sensitivity" parameter should contain sensitivities for each embedded hotword separated by commas (e.g., "0.5,0.5").
|
||||
|
||||
Visit [the snowboy website](https://snowboy.kitt.ai) to train your own wake word model (requires linking to a GitHub/Google/Facebook account). This *personal* model with end with `.pmdl`, and should go in your profile directory. Then, set `wake.snowboy.model` to the name of that file.
|
||||
|
||||
You also have the option of using a pre-train *universal* model (`.umdl`) from [Kitt.AI](https://github.com/Kitt-AI/snowboy/tree/master/resources/models).
|
||||
|
||||
### Multiple Wake Words
|
||||
|
||||
You can have `snowboy` listen for multiple wake words with different models, each with their own settings. You will need to download each model file to the `snowboy` directory in your profile.
|
||||
|
||||
For example, to use both the `snowboy.umdl` and `jarvis.umdl` models, add this to your profile:
|
||||
|
||||
```json
|
||||
"wake": {
|
||||
"system": "snowboy",
|
||||
"snowboy": {
|
||||
"model": "snowboy/snowboy.umdl,snowboy/jarvis.umdl",
|
||||
"model_settings": {
|
||||
"snowboy/snowboy.umdl": {
|
||||
"sensitivity": "0.5",
|
||||
"audio_gain": 1,
|
||||
"apply_frontend": false
|
||||
},
|
||||
"snowboy/jarvis.umdl": {
|
||||
"sensitivity": "0.5,0.5",
|
||||
"audio_gain": 1,
|
||||
"apply_frontend": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Make sure to include all models you want in the `model` setting (separated by commas). Each model may have different settings in `model_settings`. If a setting is not present, the default values under `snowboy` will be used.
|
||||
|
||||
See `rhasspy.wake.SnowboyWakeListener` for details.
|
||||
| System | Performance | Requires Training | Requires Online Sign Up |
|
||||
| ------ | ----------- | ----------------- | ----------------------- |
|
||||
| [pocketsphinx](wake-word.md#pocketsphinx) | poor | no | no |
|
||||
| [precise](wake-word.md#mycroft-precise) | moderate | yes, offline | no |
|
||||
| [snowboy](wake-word.md#snowboy) | good | yes, online | yes |
|
||||
|
||||
## Pocketsphinx
|
||||
|
||||
@@ -123,7 +32,7 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Set `wake.pocketsphinx.keyphrase` to whatever you like, though 3-4 syllables is recommended. Make sure to [train](training.md) and restart Rhasspy whenever you change the keyphrase.
|
||||
|
||||
The `wake.pocketsphinx.threshold` should be in the range 1e-50 to 1e-5. The smaller the number, the less like the keyphrase is to be observed. At least one person has written a script to [automatically tune the threshold](https://medium.com/@PankajB96/automatic-tuning-of-keyword-spotting-thresholds-a27256869d31).
|
||||
@@ -151,14 +60,45 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Follow [the instructions from Mycroft AI](https://github.com/MycroftAI/mycroft-precise/wiki/Training-your-own-wake-word#how-to-train-your-own-wake-word) to train your own wake word model. When you're finished, place **both** the `.pb` and `.pb.params` files in your profile directory, and set `wake.precise.model` to the name of the `.pb` file.
|
||||
|
||||
|
||||
See `rhasspy.wake.PreciseWakeListener` for details.
|
||||
|
||||
## Snowboy
|
||||
|
||||
Listens for a wake word with [snowboy](https://snowboy.kitt.ai). This system has the best performance out of the box, but requires an online service to train.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"wake": {
|
||||
"system": "snowboy",
|
||||
"hermes": {
|
||||
"wakeword_id": "default"
|
||||
},
|
||||
"snowboy": {
|
||||
"model": "model-name-in-profile.(u|p)mdl",
|
||||
"audio_gain": 1,
|
||||
"sensitivity": 0.5,
|
||||
"chunk_size": 960
|
||||
}
|
||||
},
|
||||
|
||||
"rhasspy": {
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
Visit [the snowboy website](https://snowboy.kitt.ai) to train your own wake word model (requires linking to a GitHub/Google/Facebook account). This *personal* model with end with `.pmdl`, and should go in your profile directory. Then, set `wake.snowboy.model` to the name of that file.
|
||||
|
||||
You also have the option of using a pre-train *universal* model (`.umdl`) from [Kitt.AI](https://github.com/Kitt-AI/snowboy/tree/master/resources/models). I've received errors using anything but `snowboy.umdl`, but YMMV.
|
||||
|
||||
See `rhasspy.wake.SnowboyWakeListener` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Subscribes to the `hermes/hotword/<WAKEWORD_ID>/detected` topic, and wakes Rhasspy up when a message is received ([Hermes protocol](https://docs.snips.ai/reference/hermes)). This allows Rhasspy to use the wake word functionality in [Snips.AI](https://snips.ai/).
|
||||
Subscribes to the `hermes/hotword/<WAKEWORD_ID>/detected` topic, and wakes Rhasspy up when a message is received ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)). This allows Rhasspy to use the wake word functionality in [Snips.AI](https://snips.ai/).
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -184,7 +124,7 @@ Add to your [profile](profiles.md):
|
||||
"site_id": "default"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Adjust the `mqtt` configuration to connect to your MQTT broker.
|
||||
Set `mqtt.site_id` to match your Snips.AI siteId and `wake.hermes.wakeword_id` to match your Snips.AI wakewordId.
|
||||
|
||||
@@ -209,7 +149,7 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
When Rhasspy starts, your program will be called with the given arguments. Once your program detects the wake word, it should print it to standard out and exit. Rhasspy will call your program again when it goes back to sleep. If the empty string is printed, Rhasspy will **not** wake up and your program will be called again.
|
||||
|
||||
The following environment variables are available to your program:
|
||||
|
||||
@@ -1,137 +1,130 @@
|
||||
#!/usr/bin/env bash
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
cpu_arch=$(uname --m)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Command-line Arguments
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
. "${this_dir}/etc/shflags"
|
||||
|
||||
DEFINE_string 'download-dir' "${this_dir}/download" 'Directory to cache downloaded files'
|
||||
DEFINE_boolean 'precise' true 'Install Mycroft Precise'
|
||||
DEFINE_boolean 'kaldi' true 'Install Kaldi'
|
||||
DEFINE_boolean 'offline' false "Don't download anything"
|
||||
DEFINE_boolean 'all-cpu' false 'Download dependencies for all CPU architectures'
|
||||
DEFINE_string 'cpu-arch' "${cpu_arch}" 'CPU architecture (x86_64, armv7l, arm64v8, armv6l)'
|
||||
|
||||
FLAGS "$@" || exit $?
|
||||
eval set -- "${FLAGS_ARGV}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Default Settings
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
set -e
|
||||
|
||||
cpu_arch="${FLAGS_cpu_arch}"
|
||||
download_dir="${FLAGS_download_dir}"
|
||||
# Directory of *this* script
|
||||
DIR="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
# Place where downloaded artifacts are stored
|
||||
download_dir="${DIR}/download"
|
||||
mkdir -p "${download_dir}"
|
||||
|
||||
if [[ "${FLAGS_offline}" -eq "${FLAGS_TRUE}" ]]; then
|
||||
offline='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_all_cpu}" -eq "${FLAGS_TRUE}" ]]; then
|
||||
all_cpu='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_precise}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_precise='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_kaldi}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_kaldi='true'
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
function maybe_download {
|
||||
if [[ ! -s "$2" ]]; then
|
||||
if [[ -n "${offline}" ]]; then
|
||||
echo "Need to download $1 but offline."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$2")"
|
||||
curl -sSfL -o "$2" "$1" || { echo "Can't download $1"; exit 1; }
|
||||
echo "$1 => $2"
|
||||
fi
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# CPU architecture
|
||||
CPU_ARCHS=("x86_64" "armv7l" "arm64v8")
|
||||
FRIENDLY_ARCHS=("amd64" "armhf" "aarch64")
|
||||
|
||||
declare -A CPU_TO_FRIENDLY
|
||||
CPU_TO_FRIENDLY["x86_64"]="amd64"
|
||||
CPU_TO_FRIENDLY["armv7l"]="armhf"
|
||||
CPU_TO_FRIENDLY["arm64v8"]="aarch64"
|
||||
CPU_TO_FRIENDLY["armv6l"]="armv6l"
|
||||
|
||||
# CPU architecture
|
||||
if [[ -n "${all_cpu}" ]]; then
|
||||
CPU_ARCHS=("x86_64" "armv7l" "arm64v8")
|
||||
FRIENDLY_ARCHS=("amd64" "armhf" "aarch64")
|
||||
else
|
||||
CPU_ARCHS=("${cpu_arch}")
|
||||
FRIENDLY_ARCHS=("${CPU_TO_FRIENDLY[${cpu_arch}]}")
|
||||
# -----------------------------------------------------------------------------
|
||||
# OpenFST
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
openfst_file="${download_dir}/openfst-1.6.2.tar.gz"
|
||||
if [[ ! -f "${openfst_file}" ]]; then
|
||||
openfst_url='http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.2.tar.gz'
|
||||
echo "Downloading OpenFST source (${openfst_url})"
|
||||
curl -sSfL -o "${openfst_file}" "${openfst_url}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Rhasspy
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}"; do
|
||||
rhasspy_files=("rhasspy-tools_${FRIENDLY_ARCH}.tar.gz" "rhasspy-web-dist.tar.gz")
|
||||
for rhasspy_file_name in "${rhasspy_files[@]}"; do
|
||||
rhasspy_file="${download_dir}/${rhasspy_file_name}"
|
||||
rhasspy_file_url="https://github.com/synesthesiam/rhasspy/releases/download/v2.0/${rhasspy_file_name}"
|
||||
maybe_download "${rhasspy_file_url}" "${rhasspy_file}"
|
||||
done
|
||||
done
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Pocketsphinx for Python
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
pocketsphinx_file="${download_dir}/pocketsphinx-python.tar.gz"
|
||||
pocketsphinx_url='https://github.com/synesthesiam/pocketsphinx-python/releases/download/v1.0/pocketsphinx-python.tar.gz'
|
||||
maybe_download "${pocketsphinx_url}" "${pocketsphinx_file}"
|
||||
if [[ ! -f "${pocketsphinx_file}" ]]; then
|
||||
pocketsphinx_url='https://github.com/synesthesiam/pocketsphinx-python/releases/download/v1.0/pocketsphinx-python.tar.gz'
|
||||
echo "Downloading pocketsphinx (${pocketsphinx_url})"
|
||||
curl -sSfL -o "${pocketsphinx_file}" "${pocketsphinx_url}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# jsgf2fst
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
jsgf2fst_file="${download_dir}/jsgf2fst-0.1.0.tar.gz"
|
||||
if [[ ! -f "${jsgf2fst_file}" ]]; then
|
||||
jsgf2fst_url='https://github.com/synesthesiam/jsgf2fst/releases/download/v0.1.0/jsgf2fst-0.1.0.tar.gz'
|
||||
echo "Downloading jsgf2fst (${jsgf2fst_url})"
|
||||
curl -sSfL -o "${jsgf2fst_file}" "${jsgf2fst_url}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Snowboy
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
snowboy_file="${download_dir}/snowboy-1.3.0.tar.gz"
|
||||
snowboy_url='https://github.com/Kitt-AI/snowboy/archive/v1.3.0.tar.gz'
|
||||
maybe_download "${snowboy_url}" "${snowboy_file}"
|
||||
if [[ ! -f "${snowboy_file}" ]]; then
|
||||
snowboy_url='https://github.com/Kitt-AI/snowboy/archive/v1.3.0.tar.gz'
|
||||
echo "Downloading snowboy (${snowboy_url})"
|
||||
curl -sSfL -o "${snowboy_file}" "${snowboy_url}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Mycroft Precise
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_precise}" ]]; then
|
||||
for CPU_ARCH in "${CPU_ARCHS[@]}"; do
|
||||
case $CPU_ARCH in
|
||||
x86_64|armv7l)
|
||||
precise_file="${download_dir}/precise-engine_0.3.0_${CPU_ARCH}.tar.gz"
|
||||
precise_url="https://github.com/MycroftAI/mycroft-precise/releases/download/v0.3.0/precise-engine_0.3.0_${CPU_ARCH}.tar.gz"
|
||||
maybe_download "${precise_url}" "${precise_file}"
|
||||
esac
|
||||
done
|
||||
for CPU_ARCH in "x86_64" "armv7l"
|
||||
do
|
||||
precise_file="${download_dir}/precise-engine_0.3.0_${CPU_ARCH}.tar.gz"
|
||||
if [[ ! -f "${precise_file}" ]]; then
|
||||
precise_url="https://github.com/MycroftAI/mycroft-precise/releases/download/v0.3.0/precise-engine_0.3.0_${CPU_ARCH}.tar.gz"
|
||||
echo "Downloading Mycroft Precise (${precise_url})"
|
||||
curl -sSfL -o "${precise_file}" "${precise_url}"
|
||||
fi
|
||||
done
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Opengrm
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "$(which ngramcount)" ]]; then
|
||||
opengrm_file="${download_dir}/opengrm-ngram-1.3.3.tar.gz"
|
||||
if [[ ! -f "${opengrm_file}" ]]; then
|
||||
opengrm_url='https://www.opengrm.org/twiki/pub/GRM/NGramDownload/opengrm-ngram-1.3.3.tar.gz'
|
||||
echo "Download Opengrm (${opengrm_url})"
|
||||
curl -sSfLk -o "${opengrm_file}" "${opengrm_url}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Phonetisaurus
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}"
|
||||
do
|
||||
# Install pre-built package
|
||||
phonetisaurus_file="${download_dir}/phonetisaurus-2019_${FRIENDLY_ARCH}.deb"
|
||||
if [[ ! -f "${phonetisaurus_file}" ]]; then
|
||||
phonetisaurus_url="https://github.com/synesthesiam/phonetisaurus-2019/releases/download/v1.0/phonetisaurus-2019_${FRIENDLY_ARCH}.deb"
|
||||
echo "Downloading phonetisaurus (${phonetisaurus_url})"
|
||||
curl -sSfL -o "${phonetisaurus_file}" "${phonetisaurus_url}"
|
||||
fi
|
||||
done
|
||||
|
||||
# Build from source
|
||||
phonetisaurus_file="${download_dir}/phonetisaurus-2019.zip"
|
||||
if [[ ! -f "${phonetisaurus_file}" ]]; then
|
||||
phonetisaurus_url="https://github.com/synesthesiam/phonetisaurus-2019/releases/download/v1.0/phonetisaurus-2019_${FRIENDLY_ARCH}.deb"
|
||||
echo "Downloading phonetisaurus source (${phonetisaurus_url})"
|
||||
curl -sSfL -o "${phonetisaurus_file}" "${phonetisaurus_url}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Kaldi
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_kaldi}" ]]; then
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}"; do
|
||||
# Install pre-built package
|
||||
kaldi_file="${download_dir}/kaldi_${FRIENDLY_ARCH}.tar.gz"
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}"
|
||||
do
|
||||
# Install pre-built package
|
||||
kaldi_file="${download_dir}/kaldi_${FRIENDLY_ARCH}.tar.gz"
|
||||
if [[ ! -f "${kaldi_file}" ]]; then
|
||||
kaldi_url="https://github.com/synesthesiam/kaldi-docker/releases/download/v1.0/kaldi_${FRIENDLY_ARCH}.tar.gz"
|
||||
maybe_download "${kaldi_url}" "${kaldi_file}"
|
||||
done
|
||||
fi
|
||||
echo "Downloading kaldi (${kaldi_url})"
|
||||
curl -sSfL -o "${kaldi_file}" "${kaldi_url}"
|
||||
fi
|
||||
done
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -53,7 +53,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
default_view:
|
||||
view: true
|
||||
view: yes
|
||||
entities:
|
||||
- group.inside
|
||||
- group.garage
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
# ATLAS specific Linux ARM configuration
|
||||
|
||||
ifndef DOUBLE_PRECISION
|
||||
$(error DOUBLE_PRECISION not defined.)
|
||||
endif
|
||||
ifndef OPENFSTINC
|
||||
$(error OPENFSTINC not defined.)
|
||||
endif
|
||||
ifndef OPENFSTLIBS
|
||||
$(error OPENFSTLIBS not defined.)
|
||||
endif
|
||||
ifndef ATLASINC
|
||||
$(error ATLASINC not defined.)
|
||||
endif
|
||||
ifndef ATLASLIBS
|
||||
$(error ATLASLIBS not defined.)
|
||||
endif
|
||||
|
||||
CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
|
||||
-Wall -Wno-sign-compare -Wno-unused-local-typedefs \
|
||||
-Wno-deprecated-declarations -Winit-self \
|
||||
-DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
|
||||
-DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \
|
||||
-ftree-vectorize -pthread \
|
||||
-g # -O0 -DKALDI_PARANOID
|
||||
|
||||
ifeq ($(KALDI_FLAVOR), dynamic)
|
||||
CXXFLAGS += -fPIC
|
||||
endif
|
||||
|
||||
# Compiler specific flags
|
||||
COMPILER = $(shell $(CXX) -v 2>&1)
|
||||
ifeq ($(findstring clang,$(COMPILER)),clang)
|
||||
# Suppress annoying clang warnings that are perfectly valid per spec.
|
||||
CXXFLAGS += -Wno-mismatched-tags
|
||||
endif
|
||||
|
||||
LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic
|
||||
LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl
|
||||
@@ -1,19 +0,0 @@
|
||||
[Unit]
|
||||
Description=Rhasspy
|
||||
After=syslog.target network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/home/<USER>/path/to/rhasspy
|
||||
ExecStart=/bin/bash -lc './run-venv.sh --profile <LANGUAGE>'
|
||||
|
||||
RestartSec=1
|
||||
Restart=on-failure
|
||||
|
||||
StandardOutput=syslog
|
||||
StandardError=syslog
|
||||
|
||||
SyslogIdentifier=rhasspy
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,11 +1,11 @@
|
||||
{
|
||||
"text": "σβήσε το φως του γκαράζ",
|
||||
"text": "ενεργοποιήστε τη λάμπα του καθιστικού",
|
||||
"intent": {
|
||||
"name": "ChangeLightState"
|
||||
},
|
||||
"entities": {
|
||||
"name": "γκαράζ",
|
||||
"state": "σβήσε"
|
||||
"name": "καθιστικού",
|
||||
"state": "ενεργοποιήστε"
|
||||
},
|
||||
"words": {
|
||||
"known": {
|
||||
|
||||
@@ -10,11 +10,11 @@
|
||||
"words": {
|
||||
"known": {
|
||||
"word": "tempo",
|
||||
"phonemes": "t e~ p u"
|
||||
"phonemes": "T E~+ P UX"
|
||||
},
|
||||
"unknown": {
|
||||
"word": "tempoada",
|
||||
"phonemes": "t e~ p o a d a"
|
||||
"phonemes": "T E~ P O A+ D AX"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"text": "är garage dörren öppen",
|
||||
"intent": {
|
||||
"name": "GetGarageState"
|
||||
},
|
||||
"entities": {
|
||||
},
|
||||
"words": {
|
||||
"known": {
|
||||
"word": "klockan",
|
||||
"phonemes": "k l ol s k abl n"
|
||||
},
|
||||
"unknown": {
|
||||
"word": "klockanklacken",
|
||||
"phonemes": "k l ol s k abl n k l a k el n"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -27,7 +27,7 @@
|
||||
"intent": {
|
||||
"system": "remote",
|
||||
"rasa": {
|
||||
"url": "http://localhost:5005/"
|
||||
"url": "http://localhost:5000/"
|
||||
},
|
||||
"remote": {
|
||||
"url": "http://server:12101/api/text-to-intent"
|
||||
@@ -70,4 +70,4 @@
|
||||
"username": "",
|
||||
"site_id": "default"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -40,7 +40,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -50,7 +50,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
"intent": {
|
||||
"system": "fuzzywuzzy",
|
||||
"rasa": {
|
||||
"url": "http://localhost:5005/"
|
||||
"url": "http://localhost:5000/"
|
||||
},
|
||||
"remote": {
|
||||
"url": "http://my-server:12101/api/text-to-intent"
|
||||
@@ -73,4 +73,4 @@
|
||||
"username": "",
|
||||
"site_id": "default"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -75,7 +75,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -85,7 +85,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
"intent": {
|
||||
"system": "fuzzywuzzy",
|
||||
"rasa": {
|
||||
"url": "http://localhost:5005/"
|
||||
"url": "http://localhost:5000/"
|
||||
},
|
||||
"remote": {
|
||||
"url": "http://my-server:12101/api/text-to-intent"
|
||||
|
||||
@@ -42,7 +42,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -52,7 +52,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
"intent": {
|
||||
"system": "fuzzywuzzy",
|
||||
"rasa": {
|
||||
"url": "http://localhost:5005/"
|
||||
"url": "http://localhost:5000/"
|
||||
},
|
||||
"remote": {
|
||||
"url": "http://my-server:12101/api/text-to-intent"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
if [[ -z "$(command -v phonetisaurus-train)" ]]; then
|
||||
if [[ -z "$(which phonetisaurus-train)" ]]; then
|
||||
echo "Phonetisaurus not installed!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -4,7 +4,6 @@ nav:
|
||||
- Home: index.md
|
||||
- Hardware: hardware.md
|
||||
- Installation: installation.md
|
||||
- Tutorials: tutorials.md
|
||||
- Usage: usage.md
|
||||
- Profiles: profiles.md
|
||||
- Training: training.md
|
||||
@@ -16,7 +15,5 @@ nav:
|
||||
- Intent Recognition: intent-recognition.md
|
||||
- Intent Handling: intent-handling.md
|
||||
- Text to Speech: text-to-speech.md
|
||||
- Reference: reference.md
|
||||
- Development: development.md
|
||||
- License: license.md
|
||||
- About: about.md
|
||||
|
||||
@@ -52,31 +52,4 @@ ignore_missing_imports = True
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-cerberus.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-flair.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-google.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-networkx.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-num2words.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-doit.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-json5.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-quart.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-quart_cors.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-swagger_ui.*]
|
||||
ignore_missing_imports = True
|
||||
@@ -1,168 +0,0 @@
|
||||
#
|
||||
# Copyright 2018 Picovoice Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
from ctypes import *
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Porcupine(object):
|
||||
"""Python binding for Picovoice's wake word detection (aka Porcupine) library."""
|
||||
|
||||
class PicovoiceStatuses(Enum):
|
||||
"""Status codes corresponding to 'pv_status_t' defined in 'include/picovoice.h'"""
|
||||
|
||||
SUCCESS = 0
|
||||
OUT_OF_MEMORY = 1
|
||||
IO_ERROR = 2
|
||||
INVALID_ARGUMENT = 3
|
||||
|
||||
_PICOVOICE_STATUS_TO_EXCEPTION = {
|
||||
PicovoiceStatuses.OUT_OF_MEMORY: MemoryError,
|
||||
PicovoiceStatuses.IO_ERROR: IOError,
|
||||
PicovoiceStatuses.INVALID_ARGUMENT: ValueError
|
||||
}
|
||||
|
||||
class CPorcupine(Structure):
|
||||
pass
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
library_path,
|
||||
model_file_path,
|
||||
keyword_file_path=None,
|
||||
sensitivity=None,
|
||||
keyword_file_paths=None,
|
||||
sensitivities=None):
|
||||
"""
|
||||
Loads Porcupine's shared library and creates an instance of wake word detection object.
|
||||
|
||||
:param library_path: Absolute path to Porcupine's shared library.
|
||||
:param model_file_path: Absolute path to file containing model parameters.
|
||||
:param keyword_file_path: Absolute path to keyword file containing hyper-parameters. If not present then
|
||||
'keyword_file_paths' will be used.
|
||||
:param sensitivity: Sensitivity parameter. A higher sensitivity value lowers miss rate at the cost of increased
|
||||
false alarm rate. For more information regarding this parameter refer to 'include/pv_porcupine.h'. If not
|
||||
present then 'sensitivities' is used.
|
||||
:param keyword_file_paths: List of absolute paths to keyword files. Intended to be used for multiple keyword
|
||||
scenario. This parameter is used only when 'keyword_file_path' is not set.
|
||||
:param sensitivities: List of sensitivity parameters. Intended to be used for multiple keyword scenario. This
|
||||
parameter is used only when 'sensitivity' is not set.
|
||||
"""
|
||||
|
||||
if not os.path.exists(library_path):
|
||||
raise IOError(f"Could not find Porcupine's library at '{library_path}'")
|
||||
|
||||
library = cdll.LoadLibrary(library_path)
|
||||
|
||||
if not os.path.exists(model_file_path):
|
||||
raise IOError(f"Could not find model file at '{model_file_path}'")
|
||||
|
||||
if sensitivity is not None and keyword_file_path is not None:
|
||||
if not os.path.exists(keyword_file_path):
|
||||
raise IOError(f"Could not find keyword file at '{keyword_file_path}'")
|
||||
keyword_file_paths = [keyword_file_path]
|
||||
|
||||
if not (0 <= sensitivity <= 1):
|
||||
raise ValueError('Sensitivity should be within [0, 1]')
|
||||
sensitivities = [sensitivity]
|
||||
elif sensitivities is not None and keyword_file_paths is not None:
|
||||
if len(keyword_file_paths) != len(sensitivities):
|
||||
raise ValueError("Different number of sensitivity and keyword file path parameters are provided.")
|
||||
|
||||
for x in keyword_file_paths:
|
||||
if not os.path.exists(os.path.expanduser(x)):
|
||||
raise IOError(f"Could not find keyword file at '{x}'")
|
||||
|
||||
for x in sensitivities:
|
||||
if not (0 <= x <= 1):
|
||||
raise ValueError('Sensitivity should be within [0, 1]')
|
||||
else:
|
||||
raise ValueError("Sensitivity and/or keyword file path is missing")
|
||||
|
||||
self._num_keywords = len(keyword_file_paths)
|
||||
|
||||
init_func = library.pv_porcupine_multiple_keywords_init
|
||||
init_func.argtypes = [
|
||||
c_char_p,
|
||||
c_int,
|
||||
POINTER(c_char_p),
|
||||
POINTER(c_float),
|
||||
POINTER(POINTER(self.CPorcupine))]
|
||||
init_func.restype = self.PicovoiceStatuses
|
||||
|
||||
self._handle = POINTER(self.CPorcupine)()
|
||||
|
||||
status = init_func(
|
||||
model_file_path.encode(),
|
||||
self._num_keywords,
|
||||
(c_char_p * self._num_keywords)(*[os.path.expanduser(x).encode() for x in keyword_file_paths]),
|
||||
(c_float * self._num_keywords)(*sensitivities),
|
||||
byref(self._handle))
|
||||
if status is not self.PicovoiceStatuses.SUCCESS:
|
||||
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Initialization failed')
|
||||
|
||||
self.process_func = library.pv_porcupine_multiple_keywords_process
|
||||
self.process_func.argtypes = [POINTER(self.CPorcupine), POINTER(c_short), POINTER(c_int)]
|
||||
self.process_func.restype = self.PicovoiceStatuses
|
||||
|
||||
self._delete_func = library.pv_porcupine_delete
|
||||
self._delete_func.argtypes = [POINTER(self.CPorcupine)]
|
||||
self._delete_func.restype = None
|
||||
|
||||
self._sample_rate = library.pv_sample_rate()
|
||||
self._frame_length = library.pv_porcupine_frame_length()
|
||||
|
||||
@property
|
||||
def sample_rate(self):
|
||||
"""Audio sample rate accepted by Porcupine library."""
|
||||
|
||||
return self._sample_rate
|
||||
|
||||
@property
|
||||
def frame_length(self):
|
||||
"""Number of audio samples per frame expected by C library."""
|
||||
|
||||
return self._frame_length
|
||||
|
||||
def process(self, pcm):
|
||||
"""
|
||||
Monitors incoming audio stream for given wake word(s).
|
||||
|
||||
:param pcm: An array (or array-like) of consecutive audio samples. For more information regarding required audio
|
||||
properties (i.e. sample rate, number of channels encoding, and number of samples per frame) please refer to
|
||||
'include/pv_porcupine.h'.
|
||||
:return: For a single wake-word use cse True if wake word is detected. For multiple wake-word use case it
|
||||
returns the index of detected wake-word. Indexing is 0-based and according to ordering of input keyword file
|
||||
paths. It returns -1 when no keyword is detected.
|
||||
"""
|
||||
|
||||
result = c_int()
|
||||
status = self.process_func(self._handle, (c_short * len(pcm))(*pcm), byref(result))
|
||||
if status is not self.PicovoiceStatuses.SUCCESS:
|
||||
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Processing failed')
|
||||
|
||||
keyword_index = result.value
|
||||
|
||||
if self._num_keywords == 1:
|
||||
return keyword_index == 0
|
||||
else:
|
||||
return keyword_index
|
||||
|
||||
def delete(self):
|
||||
"""Releases resources acquired by Porcupine's library."""
|
||||
|
||||
self._delete_func(self._handle)
|
||||
@@ -1,37 +0,0 @@
|
||||
A abby A B BV I
|
||||
AE adéu AE DH E W
|
||||
AO això AE Y SH AO
|
||||
B baix B A Y SH
|
||||
BV alba A L BV AE
|
||||
C guia C I AE
|
||||
CH boig B O CH
|
||||
D andy A N D I
|
||||
DH cada K A DH AE
|
||||
E cent S E N
|
||||
EA acte A K T EA
|
||||
EE aneu AE N EE W
|
||||
F cafè K AE F EE
|
||||
G èxit EE G Z I T
|
||||
GH agut AE GH U T
|
||||
I ahir AE I
|
||||
J boja B O J AE
|
||||
K amic AE M I K
|
||||
L ales A L EA S
|
||||
LY allí AE LY I
|
||||
M amor AE M O R
|
||||
N anam A N AE M
|
||||
NG banc B A NG K
|
||||
NY anys A NY S
|
||||
O avió AE BV Y O
|
||||
P capa K A P AE
|
||||
R aire A Y R EA
|
||||
RR arma A RR M AE
|
||||
S avís AE BV I S
|
||||
SH així AE Y SH I
|
||||
T alta A L T AE
|
||||
U algú AE L GH U
|
||||
UO alto A L T UO
|
||||
V vagi V A GH I
|
||||
W aqui A K W I
|
||||
Y avui AE BV U Y
|
||||
Z base B A Z EA
|
||||
@@ -1,29 +0,0 @@
|
||||
{
|
||||
"language": "ca",
|
||||
"name": "ca",
|
||||
"locale": "ca_ES",
|
||||
"speech_to_text": {
|
||||
"system": "pocketsphinx",
|
||||
"dictionary_casing": "lower"
|
||||
},
|
||||
"download": {
|
||||
"conditions": {
|
||||
"speech_to_text.system": {
|
||||
"pocketsphinx": {
|
||||
"acoustic_model": "ca-es-models-v0.4.0.zip:ca-es/acoustic-model",
|
||||
"base_dictionary.txt": "ca-es-models-v0.4.0.zip:ca-es/pronounciation-dictionary.dict",
|
||||
"g2p.fst": "ca-g2p.tar.gz:g2p.fst"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"files": {
|
||||
"ca-es-models-v0.4.0.zip": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-ca/ca-es-models-v0.4.0.zip"
|
||||
},
|
||||
"ca-g2p.tar.gz": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-ca/ca-g2p.tar.gz"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
[GetTime]
|
||||
quina hora es
|
||||
|
||||
[GetTemperature]
|
||||
quina és la temperatura
|
||||
|
||||
[GetGarageState]
|
||||
la porta del garatge està oberta
|
||||
està la porta del garatge tancada
|
||||
|
||||
[ChangeLightState]
|
||||
light_name = (làmpada de la sala | llum del garatge) {name}
|
||||
light_state = (encén:on | apagueu:on | engegueu:off) {state}
|
||||
|
||||
<light_state> [la] <light_state>
|
||||
@@ -1,29 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser("number")
|
||||
parser.add_argument("lower", type=int, help="Lower bound")
|
||||
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
|
||||
args, rest_args = parser.parse_known_args()
|
||||
|
||||
lower = args.lower
|
||||
upper = args.upper
|
||||
step = 1
|
||||
|
||||
if rest_args:
|
||||
step = int(rest_args[0])
|
||||
|
||||
if upper < lower:
|
||||
lower, upper = upper, lower
|
||||
|
||||
for n in range(lower, upper + 1, step):
|
||||
print(n)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||