Compare commits
283 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0c37c88e61 | |||
| 50f05506a2 | |||
| b62068da69 | |||
| f69d7484ba | |||
| e478765b4e | |||
| 8bf673c412 | |||
| 8854f26dc1 | |||
| cf8f2319b8 | |||
| 782721fab3 | |||
| 43dc7a07c9 | |||
| be42447620 | |||
| 3a884888c4 | |||
| 3133666390 | |||
| ff0aea4177 | |||
| fa418d3129 | |||
| d4314a85d9 | |||
| 3d42551bfc | |||
| 6411924cf2 | |||
| 77d2196f00 | |||
| 3c53cd0236 | |||
| 46a75ec83e | |||
| 3e9c4bef68 | |||
| 465d1e61e0 | |||
| 563fcfcd49 | |||
| 0ff965a965 | |||
| 998eff8708 | |||
| 1dcf984669 | |||
| 21c2b2ef9d | |||
| 958c12cfb4 | |||
| 04efe9f151 | |||
| 6dbc8dc35e | |||
| 2cac48292a | |||
| a4528d40fb | |||
| 4cd8b307c7 | |||
| de0824e49a | |||
| 74ba8c1c4a | |||
| 9428abdd40 | |||
| a6c425d65f | |||
| 015c37fa5d | |||
| 32f7e37657 | |||
| 54063feb03 | |||
| 3bc36f2fb1 | |||
| e0401d4c18 | |||
| 73124075ae | |||
| c410f4eea7 | |||
| 3f0545ed0f | |||
| 331138f300 | |||
| 33b847b828 | |||
| d770679373 | |||
| 86e695a7a4 | |||
| 07d1cc4e43 | |||
| b68e5caf01 | |||
| da4d994e75 | |||
| 627e6e8b3d | |||
| f0ec0486f7 | |||
| 1febc3d1d8 | |||
| 2879802f2f | |||
| 21a2a8f9b4 | |||
| a96f80237e | |||
| fc68d04f29 | |||
| e00c1448cb | |||
| f04ad3bfeb | |||
| eb11f90cab | |||
| 2c612ee669 | |||
| dfe92f9d0e | |||
| c59e7b42ab | |||
| 948705a87b | |||
| 6b0b5c1799 | |||
| 9553691e88 | |||
| 997456631e | |||
| 104165198b | |||
| f405b827f4 | |||
| 089568cf9f | |||
| a92d88ff8f | |||
| c60030b48f | |||
| b400d651f6 | |||
| 2dfa9aa782 | |||
| bd2c065415 | |||
| 1b95144b05 | |||
| 3f60936471 | |||
| 9a3c2f8a3f | |||
| 1fb75f24d7 | |||
| 6c0187e606 | |||
| 16262ec896 | |||
| 9d1303ed21 | |||
| a12e537110 | |||
| 63fb3cf046 | |||
| f5e6666931 | |||
| 44a9c84bc7 | |||
| 9a076936c5 | |||
| 102b29ecf6 | |||
| 51455bfd97 | |||
| 5bf6086164 | |||
| 08ebaf0914 | |||
| 4b3f26c12f | |||
| 707c31e4d3 | |||
| 509d47ea0f | |||
| 1d2b08df6e | |||
| 0608443482 | |||
| 9a1a41385c | |||
| 2d8095f0e1 | |||
| 8f3c1c5d61 | |||
| deb742d768 | |||
| fa24588ea4 | |||
| ed581ecf9d | |||
| f8aedd4ef5 | |||
| 14c1386496 | |||
| 153b642057 | |||
| dec32102dd | |||
| f365c69265 | |||
| 1724c328b7 | |||
| 6db4a8d341 | |||
| b70e8a8569 | |||
| 2e4828da06 | |||
| 96cfe69753 | |||
| 3e8e246c1c | |||
| 80a5008b93 | |||
| e26ecf82f1 | |||
| e4db52f845 | |||
| 846313e236 | |||
| b68a3fac4a | |||
| 7459f0d9d9 | |||
| 617b789d89 | |||
| bb20cd280b | |||
| ce780feb74 | |||
| 2225262a53 | |||
| 5b5529339b | |||
| 90f5c5aef7 | |||
| 78f263582d | |||
| 6c8608f1a1 | |||
| 3e5437856b | |||
| 15aaea2810 | |||
| 61d8930e38 | |||
| 5748b2dc3a | |||
| 8f7158f7cc | |||
| 97226286e3 | |||
| 896b3ddfba | |||
| 1772f6e740 | |||
| b5dfd6518b | |||
| 2730c131d0 | |||
| 05ded030c8 | |||
| 3b90383145 | |||
| 1bb5462150 | |||
| 95a354e2a3 | |||
| d203a3ed75 | |||
| 59d473b931 | |||
| 17737f7fed | |||
| 76cf173849 | |||
| 55d1cfacdd | |||
| 4f6d02169c | |||
| 74761b942f | |||
| b88acb3a34 | |||
| 7b323a08bb | |||
| ec55dbfa5b | |||
| 15af0ae3c1 | |||
| f8542f7ac1 | |||
| de67b3318c | |||
| b47dca03aa | |||
| 89a1921c3e | |||
| e5fe2a31b3 | |||
| afdd241c57 | |||
| bea38cc64f | |||
| c2562aa674 | |||
| 7dec472ec4 | |||
| 007ea4266e | |||
| a627f8746c | |||
| 13f183afd4 | |||
| 130cbeb7a8 | |||
| 358e7b087e | |||
| 8e2d2f2352 | |||
| ac3c92e24a | |||
| a501c52954 | |||
| f8f0b48140 | |||
| 2a8972fb99 | |||
| ef0211505a | |||
| c961fc8814 | |||
| cbbfc23395 | |||
| 0c2a1931f6 | |||
| cfa90ea5d5 | |||
| 414457f150 | |||
| 0dbb84b355 | |||
| b1ff836c4e | |||
| 640be7b0ac | |||
| 421f59518a | |||
| 3eb4368b37 | |||
| a2df6149bb | |||
| 5b60b17dd3 | |||
| 62626cc6a1 | |||
| 2e09b75f52 | |||
| aa658fb29b | |||
| ad2e208fc2 | |||
| f32cd5c93a | |||
| 927123d491 | |||
| 9cc9e2efe6 | |||
| b9ef100721 | |||
| da5b9b2fb5 | |||
| 27c681f758 | |||
| 2af6130d22 | |||
| 268c2c5295 | |||
| 797615acf7 | |||
| ec786fd5db | |||
| bcda393d7b | |||
| e7a67ad2be | |||
| 306a7e62bd | |||
| 99b35270c8 | |||
| a69b445d51 | |||
| 59ee156c1d | |||
| afcc2c59d4 | |||
| 25afcc3559 | |||
| 8cd3a10299 | |||
| 67a841c080 | |||
| 28e098330a | |||
| 91f6571662 | |||
| d9ef37c005 | |||
| f93dfe8a4e | |||
| 1322ba3a3b | |||
| 163cd9670c | |||
| 580ea54b42 | |||
| c4a9b60990 | |||
| 89875f1644 | |||
| 6f6924cee4 | |||
| 19830d8d39 | |||
| a9ccba37ee | |||
| 21ab4d4be2 | |||
| 72383facae | |||
| d663c880f7 | |||
| cad172b450 | |||
| 713b199669 | |||
| 8fdbddb2b8 | |||
| df974c4faf | |||
| 72fd9ced65 | |||
| a82ecb8b52 | |||
| d052be5290 | |||
| 481bd3883f | |||
| 97c68d1d0d | |||
| b0500afa3f | |||
| b356d2218f | |||
| a5ce7e6ef3 | |||
| b5109850ae | |||
| 974784bb4f | |||
| 81680f00d2 | |||
| 4718800ec0 | |||
| a9f4122875 | |||
| 11d311f7ed | |||
| 7c50ea5790 | |||
| 18dca38e9a | |||
| 94b59c16bc | |||
| 7921287040 | |||
| 38211e06ba | |||
| 2269bebf33 | |||
| 565506b1df | |||
| cc7e1b9a25 | |||
| c76c78674c | |||
| 3a925952e2 | |||
| 9f6420e4cc | |||
| 2e720ffa67 | |||
| 55a4788cc6 | |||
| 6cf18735c5 | |||
| f27e333ac8 | |||
| 547a63ab59 | |||
| 45ea5996ce | |||
| 37cf6c85da | |||
| 4383145401 | |||
| f7ed88de8b | |||
| b1d7695a4c | |||
| f58a2451cf | |||
| 049a173b14 | |||
| c02ff73be8 | |||
| cda3a02775 | |||
| 292a2fdf10 | |||
| 07dcbebf79 | |||
| 59ba6e5dda | |||
| d08b62148d | |||
| 91bce4cb8b | |||
| d8d6486508 | |||
| 3f1d0946be | |||
| e744330761 | |||
| 5e6030818d | |||
| 8d0f6f37a2 | |||
| 396531f6ec | |||
| 51d9bc0c8f | |||
| bea3f30789 | |||
| 9f8babff34 |
@@ -1,26 +1,176 @@
|
||||
.git/
|
||||
.venv/
|
||||
node_modules/
|
||||
__pycache__/
|
||||
test/
|
||||
tools/
|
||||
etc/test/
|
||||
download/precise-engine/
|
||||
download/kaldi/
|
||||
opt/
|
||||
*
|
||||
|
||||
etc/homeassistant/config/.storage
|
||||
examples/typical/home-assistant/config/.storage
|
||||
examples/typical-intent/home-assistant/config/.storage
|
||||
examples/client-server/home-assistant/config/.storage
|
||||
examples/mqtt-hermes/home-assistant/config/.storage
|
||||
!download/
|
||||
|
||||
profiles/*/base_dictionary.txt
|
||||
profiles/*/base_language_model.txt
|
||||
profiles/*/acoustic_model/
|
||||
profiles/*/g2p.fst
|
||||
!requirements.txt
|
||||
!dist/
|
||||
!etc/wav
|
||||
!etc/shflags
|
||||
!create-venv.sh
|
||||
!download-dependencies.sh
|
||||
|
||||
profiles/en-kaldi/
|
||||
profiles/en-zamia/
|
||||
!docker/run.sh
|
||||
!docker/rhasspy
|
||||
|
||||
profiles/*/download/
|
||||
!profiles/defaults.json
|
||||
|
||||
!profiles/zh/profile.json
|
||||
!profiles/zh/custom_words.txt
|
||||
!profiles/zh/espeak_phonemes.txt
|
||||
!profiles/zh/phoneme_examples.txt
|
||||
!profiles/zh/frequent_words.txt
|
||||
!profiles/zh/sentences.ini
|
||||
!profiles/zh/stop_words.txt
|
||||
!profiles/zh/slots
|
||||
!profiles/zh/slot_programs
|
||||
|
||||
!profiles/hi/profile.json
|
||||
!profiles/hi/custom_words.txt
|
||||
!profiles/hi/espeak_phonemes.txt
|
||||
!profiles/hi/phoneme_examples.txt
|
||||
!profiles/hi/frequent_words.txt
|
||||
!profiles/hi/sentences.ini
|
||||
!profiles/hi/stop_words.txt
|
||||
!profiles/hi/slots
|
||||
!profiles/hi/slot_programs
|
||||
|
||||
!profiles/el/profile.json
|
||||
!profiles/el/custom_words.txt
|
||||
!profiles/el/espeak_phonemes.txt
|
||||
!profiles/el/phoneme_examples.txt
|
||||
!profiles/el/frequent_words.txt
|
||||
!profiles/el/sentences.ini
|
||||
!profiles/el/stop_words.txt
|
||||
!profiles/el/slots
|
||||
!profiles/el/slot_programs
|
||||
|
||||
!profiles/es/profile.json
|
||||
!profiles/es/custom_words.txt
|
||||
!profiles/es/espeak_phonemes.txt
|
||||
!profiles/es/phoneme_examples.txt
|
||||
!profiles/es/frequent_words.txt
|
||||
!profiles/es/sentences.ini
|
||||
!profiles/es/stop_words.txt
|
||||
!profiles/es/slots
|
||||
!profiles/es/slot_programs
|
||||
|
||||
!profiles/it/profile.json
|
||||
!profiles/it/custom_words.txt
|
||||
!profiles/it/espeak_phonemes.txt
|
||||
!profiles/it/phoneme_examples.txt
|
||||
!profiles/it/frequent_words.txt
|
||||
!profiles/it/sentences.ini
|
||||
!profiles/it/stop_words.txt
|
||||
!profiles/it/slots
|
||||
!profiles/it/slot_programs
|
||||
|
||||
!profiles/ru/profile.json
|
||||
!profiles/ru/custom_words.txt
|
||||
!profiles/ru/espeak_phonemes.txt
|
||||
!profiles/ru/phoneme_examples.txt
|
||||
!profiles/ru/frequent_words.txt
|
||||
!profiles/ru/sentences.ini
|
||||
!profiles/ru/stop_words.txt
|
||||
!profiles/ru/slots
|
||||
!profiles/ru/slot_programs
|
||||
|
||||
!profiles/pt/profile.json
|
||||
!profiles/pt/custom_words.txt
|
||||
!profiles/pt/espeak_phonemes.txt
|
||||
!profiles/pt/phoneme_examples.txt
|
||||
!profiles/pt/frequent_words.txt
|
||||
!profiles/pt/sentences.ini
|
||||
!profiles/pt/stop_words.txt
|
||||
!profiles/pt/slots
|
||||
!profiles/pt/slot_programs
|
||||
|
||||
!profiles/sv/profile.json
|
||||
!profiles/sv/custom_words.txt
|
||||
!profiles/sv/espeak_phonemes.txt
|
||||
!profiles/sv/phoneme_examples.txt
|
||||
!profiles/sv/frequent_words.txt
|
||||
!profiles/sv/sentences.ini
|
||||
!profiles/sv/stop_words.txt
|
||||
!profiles/sv/slots
|
||||
!profiles/sv/slot_programs
|
||||
|
||||
!profiles/vi/profile.json
|
||||
!profiles/vi/custom_words.txt
|
||||
!profiles/vi/espeak_phonemes.txt
|
||||
!profiles/vi/phoneme_examples.txt
|
||||
!profiles/vi/frequent_words.txt
|
||||
!profiles/vi/sentences.ini
|
||||
!profiles/vi/stop_words.txt
|
||||
!profiles/vi/slots
|
||||
!profiles/vi/slot_programs
|
||||
|
||||
!profiles/ca/profile.json
|
||||
!profiles/ca/custom_words.txt
|
||||
!profiles/ca/espeak_phonemes.txt
|
||||
!profiles/ca/phoneme_examples.txt
|
||||
!profiles/ca/frequent_words.txt
|
||||
!profiles/ca/sentences.ini
|
||||
!profiles/ca/stop_words.txt
|
||||
!profiles/ca/slots
|
||||
!profiles/ca/slot_programs
|
||||
|
||||
!profiles/nl/profile.json
|
||||
!profiles/nl/custom_words.txt
|
||||
!profiles/nl/espeak_phonemes.txt
|
||||
!profiles/nl/phoneme_examples.txt
|
||||
!profiles/nl/frequent_words.txt
|
||||
!profiles/nl/sentences.ini
|
||||
!profiles/nl/stop_words.txt
|
||||
!profiles/nl/slots
|
||||
!profiles/nl/slot_programs
|
||||
!profiles/nl/kaldi/custom_words.txt
|
||||
!profiles/nl/kaldi/espeak_phonemes.txt
|
||||
!profiles/nl/kaldi/phoneme_examples.txt
|
||||
|
||||
!profiles/de/profile.json
|
||||
!profiles/de/custom_words.txt
|
||||
!profiles/de/espeak_phonemes.txt
|
||||
!profiles/de/phoneme_examples.txt
|
||||
!profiles/de/frequent_words.txt
|
||||
!profiles/de/sentences.ini
|
||||
!profiles/de/stop_words.txt
|
||||
!profiles/de/slots
|
||||
!profiles/de/slot_programs
|
||||
!profiles/de/kaldi/custom_words.txt
|
||||
!profiles/de/kaldi/espeak_phonemes.txt
|
||||
!profiles/de/kaldi/phoneme_examples.txt
|
||||
|
||||
!profiles/fr/profile.json
|
||||
!profiles/fr/custom_words.txt
|
||||
!profiles/fr/espeak_phonemes.txt
|
||||
!profiles/fr/phoneme_examples.txt
|
||||
!profiles/fr/frequent_words.txt
|
||||
!profiles/fr/sentences.ini
|
||||
!profiles/fr/stop_words.txt
|
||||
!profiles/fr/slots
|
||||
!profiles/fr/slot_programs
|
||||
!profiles/fr/kaldi/custom_words.txt
|
||||
!profiles/fr/kaldi/espeak_phonemes.txt
|
||||
!profiles/fr/kaldi/phoneme_examples.txt
|
||||
|
||||
!profiles/en/profile.json
|
||||
!profiles/en/custom_words.txt
|
||||
!profiles/en/espeak_phonemes.txt
|
||||
!profiles/en/phoneme_examples.txt
|
||||
!profiles/en/frequent_words.txt
|
||||
!profiles/en/sentences.ini
|
||||
!profiles/en/stop_words.txt
|
||||
!profiles/en/slots
|
||||
!profiles/en/slot_programs
|
||||
!profiles/en/kaldi/custom_words.txt
|
||||
!profiles/en/kaldi/espeak_phonemes.txt
|
||||
!profiles/en/kaldi/phoneme_examples.txt
|
||||
|
||||
!rhasspy/profile_schema.json
|
||||
!rhasspy/*.py
|
||||
!rhasspy/train/*.py
|
||||
!rhasspy/train/jsgf2fst/*.py
|
||||
!*.py
|
||||
!VERSION
|
||||
|
||||
!pip
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
## [2.4.20] - 2020 Apr 10
|
||||
|
||||
### Added
|
||||
|
||||
- libasound2-plugins to Docker image (for Hass.IO)
|
||||
- MQTT TLS support (thanks https://github.com/ofekd)
|
||||
- Mycroft Precise 0.3.0 added to Docker image
|
||||
|
||||
### Changed
|
||||
|
||||
- Properly accept websocket connections
|
||||
- Don't error out on missing porcupine files
|
||||
- Fix rawValue in MQTT messages
|
||||
|
||||
## [2.4.19] - 2020 Mar 04
|
||||
|
||||
### Added
|
||||
|
||||
- Support for Google Cloud speech to text
|
||||
- Rasa NLU minimum confidence parameter
|
||||
|
||||
### Changed
|
||||
|
||||
- Using tagged version of porcupine wake models to avoid incompatibilities
|
||||
- Fix Rasa NLU first entity only bug
|
||||
- Fix siteId null bug
|
||||
|
||||
## [2.4.18] - 2020 Feb 07
|
||||
|
||||
### Added
|
||||
|
||||
- /api/listen-for-wake accepts "on" and "off" as POST data to enable/disable wake word
|
||||
- /api/events/wake websocket endpoint reports wake up events
|
||||
- /api/events/text websocket endpoint reports transcription events
|
||||
- Rhasspy logo changes in web UI when wake word is detected
|
||||
- espeak arguments list for text to speech
|
||||
|
||||
### Changed
|
||||
|
||||
- STT output casing is fixed outside of HTTP API calls
|
||||
- All voice commands show up in web UI test page
|
||||
- Play last voice command button in web UI works for any command
|
||||
- Fixed commas in numbers with thousand separators
|
||||
- Words from Pocketsphinx wake keyphrase are added to dictionary
|
||||
- Pocketsphinx wake word keyphrase casing is fixed
|
||||
|
||||
## [2.4.17] - 2020 Jan 21
|
||||
|
||||
### Added
|
||||
|
||||
- Button to web UI to play last recorded voice command
|
||||
- RHASSPY_LOG_LEVEL environment variable
|
||||
- Web UI feedback during download
|
||||
- Add "asoundrc" config option to Hass.IO add-on
|
||||
|
||||
### Changed
|
||||
|
||||
- Moved $profile/kaldi/custom_words.txt to $profile/kaldi_custom_words.txt
|
||||
- Slot substitution casing is kept during training/recognition
|
||||
- Fixed fuzzywuzzy and other intent recognizer training after addition of converters
|
||||
- Fix thread max count issue
|
||||
- Hide web UI alerts after 10 seconds
|
||||
- Delete partially downloaded profile files
|
||||
- Force slot programs to run each training cycle
|
||||
- Fix _raw_text in Hass event being same as _text
|
||||
|
||||
### Removed
|
||||
|
||||
- Flair intent recognizer
|
||||
|
||||
## [2.4.16] - 2020 Jan 5
|
||||
|
||||
### Added
|
||||
|
||||
- Number ranges (0..100)
|
||||
- Converters for transforming JSON values in intents (!int)
|
||||
- Slot programs for generating slot values
|
||||
- $rhasspy/days and $rhasspy/months built-in slots
|
||||
|
||||
## [2.4.15] - 2019 Dec 27
|
||||
|
||||
### Added
|
||||
|
||||
- Preliminary support for Raspberry Pi Zero (no Kaldi)
|
||||
- Play error sound when intent not recognized
|
||||
- _text and _raw_text to Home Assistant events
|
||||
|
||||
### Changed
|
||||
|
||||
- Disable wake word when TTS is speaking
|
||||
- Use json5 library to parse profile
|
||||
- Remove picotts pop sound
|
||||
- Don't open/close microphone after wake-up
|
||||
|
||||
## [2.4.14] - 2019 Dec 19
|
||||
|
||||
### Added
|
||||
|
||||
- Ability to split sentences across multiple .ini file in intents directory
|
||||
- Support (future) /api/intent for Home Assistant
|
||||
- Support for Home Assistant TTS system
|
||||
- Emulate MaryTTS /process API in web API
|
||||
- Include wakeId/siteId in JSON intent (MQTT/Websocket)
|
||||
- ?voice and ?language query parameters to /api/text-to-speech
|
||||
@@ -0,0 +1,80 @@
|
||||
FROM ubuntu:eoan as build
|
||||
ARG TARGETPLATFORM
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV RHASSPY_APP /usr/share/rhasspy
|
||||
ENV RHASSPY_VENV ${RHASSPY_APP}/.venv
|
||||
|
||||
WORKDIR /
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
python3 python3-dev python3-setuptools python3-pip python3-venv \
|
||||
build-essential swig portaudio19-dev libatlas-base-dev
|
||||
|
||||
COPY etc/shflags ${RHASSPY_APP}/etc/
|
||||
COPY download/rhasspy-tools_*.tar.gz \
|
||||
download/kaldi_*.tar.gz \
|
||||
download/pocketsphinx-python.tar.gz \
|
||||
download/snowboy-1.3.0.tar.gz \
|
||||
download/precise-engine_0.3.0_*.tar.gz \
|
||||
${RHASSPY_APP}/download/
|
||||
COPY create-venv.sh download-dependencies.sh requirements.txt ${RHASSPY_APP}/
|
||||
RUN cd ${RHASSPY_APP} && ./create-venv.sh --nosystem --noweb
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
FROM ubuntu:eoan
|
||||
ARG TARGETPLATFORM
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV RHASSPY_APP /usr/share/rhasspy
|
||||
ENV RHASSPY_VENV ${RHASSPY_APP}/.venv
|
||||
|
||||
WORKDIR /
|
||||
|
||||
COPY --from=build ${RHASSPY_VENV} ${RHASSPY_VENV}
|
||||
COPY --from=build ${RHASSPY_APP}/opt/kaldi/ ${RHASSPY_APP}/opt/kaldi/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
python3 python3-dev python3-setuptools python3-pip python3-venv \
|
||||
bash jq unzip curl perl \
|
||||
libportaudio2 libatlas3-base \
|
||||
libgfortran4 ca-certificates \
|
||||
sox espeak flite libttspico-utils alsa-utils lame \
|
||||
libasound2-plugins \
|
||||
libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev \
|
||||
gstreamer1.0-tools gstreamer1.0-plugins-good \
|
||||
mosquitto-clients
|
||||
|
||||
# Web interface
|
||||
ADD download/rhasspy-web-dist.tar.gz ${RHASSPY_APP}/
|
||||
|
||||
RUN ldconfig
|
||||
|
||||
# Copy script to run
|
||||
COPY docker/run.sh /run.sh
|
||||
RUN chmod +x /run.sh
|
||||
|
||||
COPY profiles/ ${RHASSPY_APP}/profiles/
|
||||
|
||||
COPY profiles/defaults.json ${RHASSPY_APP}/profiles/
|
||||
COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
COPY VERSION ${RHASSPY_APP}/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX ${RHASSPY_APP}/opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -1,47 +1,19 @@
|
||||
.PHONY: web-dist docker manifest docs-uml g2p
|
||||
.PHONY: web-dist docker manifest docs-uml g2p check
|
||||
SHELL := bash
|
||||
|
||||
DOCKER_PLATFORMS = linux/amd64,linux/arm64,linux/arm/v7
|
||||
|
||||
all: docker
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Docker
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
docker: web-dist docker-amd64 docker-armhf docker-aarch64 docker-push manifest
|
||||
|
||||
docker-amd64:
|
||||
docker build . -f docker/templates/dockerfiles/Dockerfile.prebuilt.alsa.all \
|
||||
--build-arg BUILD_ARCH=amd64 \
|
||||
--build-arg CPU_ARCH=x86_64 \
|
||||
--build-arg BUILD_FROM=ubuntu:bionic \
|
||||
-t synesthesiam/rhasspy-server:amd64
|
||||
|
||||
docker-armhf:
|
||||
docker build . -f docker/templates/dockerfiles/Dockerfile.prebuilt.alsa.all \
|
||||
--build-arg BUILD_ARCH=armhf \
|
||||
--build-arg CPU_ARCH=armv7l \
|
||||
--build-arg BUILD_FROM=arm32v7/ubuntu:bionic \
|
||||
-t synesthesiam/rhasspy-server:armhf
|
||||
|
||||
docker-aarch64:
|
||||
docker build . -f docker/templates/dockerfiles/Dockerfile.prebuilt.alsa.all \
|
||||
--build-arg BUILD_ARCH=aarch64 \
|
||||
--build-arg CPU_ARCH=arm64v8 \
|
||||
--build-arg BUILD_FROM=arm64v8/ubuntu:bionic \
|
||||
-t synesthesiam/rhasspy-server:aarch64
|
||||
|
||||
docker-push:
|
||||
docker push synesthesiam/rhasspy-server:amd64
|
||||
docker push synesthesiam/rhasspy-server:armhf
|
||||
docker push synesthesiam/rhasspy-server:aarch64
|
||||
|
||||
manifest:
|
||||
docker manifest push --purge synesthesiam/rhasspy-server:latest
|
||||
docker manifest create --amend synesthesiam/rhasspy-server:latest \
|
||||
synesthesiam/rhasspy-server:amd64 \
|
||||
synesthesiam/rhasspy-server:armhf \
|
||||
synesthesiam/rhasspy-server:aarch64
|
||||
docker manifest annotate synesthesiam/rhasspy-server:latest synesthesiam/rhasspy-server:armhf --os linux --arch arm
|
||||
docker manifest annotate synesthesiam/rhasspy-server:latest synesthesiam/rhasspy-server:aarch64 --os linux --arch arm64
|
||||
docker manifest push synesthesiam/rhasspy-server:latest
|
||||
docker: web-dist
|
||||
docker buildx build . \
|
||||
--platform $(DOCKER_PLATFORMS) \
|
||||
--tag synesthesiam/rhasspy-server:latest \
|
||||
--push
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Yarn (Vue)
|
||||
@@ -81,5 +53,7 @@ g2p: $(G2P_MODELS)
|
||||
# Testing
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
mypy:
|
||||
mypy app.py rhasspy
|
||||
check:
|
||||
flake8 --exclude=lexconvert.py app.py test.py rhasspy/*.py
|
||||
pylint --ignore=lexconvert.py app.py test.py rhasspy/*.py
|
||||
mypy app.py test.py rhasspy/*.py
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||

|
||||
# Rhasspy has [moved and improved!](https://github.com/rhasspy/rhasspy)
|
||||
|
||||
Rhasspy (pronounced RAH-SPEE) is an offline, [multilingual](#supported-languages) voice assistant toolkit inspired by [Jasper](https://jasperproject.github.io/) that works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
|
||||
This repository contains code for an older version of Rhasspy (2.4).
|
||||
|
||||
* [Documentation](https://rhasspy.readthedocs.io/)
|
||||
---
|
||||
|
||||
Rhasspy (pronounced RAH-SPEE) is an offline voice assistant toolkit inspired by [Jasper](https://jasperproject.github.io/) that [supports many languages](#supported-languages). It works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
|
||||
|
||||
**A newer version of Rhasspy (2.5) is available at [https://github.com/rhasspy/rhasspy](https://github.com/rhasspy/rhasspy)**
|
||||
|
||||
* [Documentation](https://rhasspy.readthedocs.io/en/v2.4.20/)
|
||||
* [Discussion](https://community.rhasspy.org)
|
||||
* [Video Introduction](https://www.youtube.com/watch?v=ijKTR_GqWwA)
|
||||
* [Hass.IO Add-On Repository](https://github.com/synesthesiam/hassio-addons)
|
||||
* [Discussion](https://community.home-assistant.io/t/rhasspy-offline-voice-assistant-toolkit/60862)
|
||||
|
||||
Rhasspy transca voice commands into [JSON](https://json.org) events that can trigger actions in home automation software, like [Home Assistant automations](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](https://rhasspy.readthedocs.io/en/latest/usage/#node-red). You define custom voice commands in a [profile](https://rhasspy.readthedocs.io/en/latest/profiles/) using a [specialized template syntax](https://rhasspy.readthedocs.io/en/latest/training/#sentencesini), and Rhasspy takes care of the rest.
|
||||
Rhasspy transcribes voice commands into [JSON](https://json.org) events that can trigger actions in home automation software, like [Home Assistant automations](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](https://rhasspy.readthedocs.io/en/latest/usage/#node-red). You define custom voice commands in a [profile](https://rhasspy.readthedocs.io/en/latest/profiles/) using a [specialized template syntax](https://rhasspy.readthedocs.io/en/latest/training/#sentencesini), and Rhasspy takes care of the rest.
|
||||
|
||||
To run Rhasspy with the English (en) profile using Docker:
|
||||
|
||||
@@ -58,7 +64,7 @@ The table below summarizes language support across the various supporting techno
|
||||
| | [rasaNLU](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#rasanlu) | *needs extra software* | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| **Text to Speech** | [espeak](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flite](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#flite) | ✓ | ✓ | | | | | | | | ✓ | | | | | |
|
||||
| | [picotts](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#picotts) | ✓ | ✓ | | | | | | | | | | | | | |
|
||||
| | [picotts](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#picotts) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | | | | | | | | |
|
||||
| | [marytts](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#marytts) | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | | | |
|
||||
| | [wavenet](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#google-wavenet) | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | ✓ | |
|
||||
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
defaults:
|
||||
-
|
||||
scope:
|
||||
path: ""
|
||||
values:
|
||||
render_with_liquid: false
|
||||
@@ -2,15 +2,20 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import atexit
|
||||
import concurrent.futures
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import time
|
||||
from functools import wraps
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Tuple, Union
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
||||
from uuid import uuid4
|
||||
|
||||
import attr
|
||||
import json5
|
||||
from quart import (
|
||||
Quart,
|
||||
Response,
|
||||
@@ -22,26 +27,33 @@ from quart import (
|
||||
websocket,
|
||||
)
|
||||
from quart_cors import cors
|
||||
from swagger_ui import quart_api_doc
|
||||
|
||||
from rhasspy.actor import ActorSystem, ConfigureEvent, RhasspyActor
|
||||
from rhasspy.core import RhasspyCore
|
||||
from rhasspy.dialogue import ProfileTrainingFailed
|
||||
from rhasspy.intent import IntentRecognized
|
||||
from rhasspy.events import (
|
||||
IntentRecognized,
|
||||
ProfileTrainingFailed,
|
||||
VoiceCommand,
|
||||
WakeWordDetected,
|
||||
WavTranscription,
|
||||
)
|
||||
from rhasspy.utils import (
|
||||
FunctionLoggingHandler,
|
||||
buffer_to_wav,
|
||||
load_phoneme_examples,
|
||||
recursive_remove,
|
||||
get_all_intents,
|
||||
get_ini_paths,
|
||||
get_wav_duration,
|
||||
load_phoneme_examples,
|
||||
read_dict,
|
||||
recursive_remove,
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Flask Web App Setup
|
||||
# Quart Web App Setup
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logging.root.setLevel(logging.DEBUG)
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
@@ -49,6 +61,10 @@ app = Quart("rhasspy")
|
||||
app.secret_key = str(uuid4())
|
||||
app = cors(app)
|
||||
|
||||
# WAV data from last voice command
|
||||
last_voice_wav: Optional[bytes] = None
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Parse Arguments
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -82,8 +98,19 @@ parser.add_argument(
|
||||
parser.add_argument(
|
||||
"--ssl", nargs=2, help="Use SSL with <CERT_FILE <KEY_FILE>", default=None
|
||||
)
|
||||
parser.add_argument("--log-level", default="DEBUG", help="Set logging level")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Set log level
|
||||
if "RHASSPY_LOG_LEVEL" in os.environ:
|
||||
log_level = os.environ["RHASSPY_LOG_LEVEL"]
|
||||
else:
|
||||
log_level = args.log_level
|
||||
|
||||
logging.basicConfig(level=getattr(logging, log_level.upper()))
|
||||
|
||||
|
||||
logger.debug(args)
|
||||
|
||||
system_profiles_dir = os.path.abspath(args.system_profiles)
|
||||
@@ -147,6 +174,15 @@ async def start_rhasspy() -> None:
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.route("/api/version")
|
||||
async def api_version() -> Response:
|
||||
"""Get Rhasspy version."""
|
||||
return await send_file(Path("VERSION"))
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.route("/api/profiles")
|
||||
async def api_profiles() -> Response:
|
||||
"""Get list of available profiles and verify necessary files."""
|
||||
@@ -166,7 +202,7 @@ async def api_profiles() -> Response:
|
||||
return jsonify(
|
||||
{
|
||||
"default_profile": core.profile.name,
|
||||
"profiles": sorted(list(profile_names)),
|
||||
"profiles": sorted(profile_names),
|
||||
"downloaded": downloaded,
|
||||
"missing_files": missing_files,
|
||||
}
|
||||
@@ -186,6 +222,14 @@ async def api_download_profile() -> str:
|
||||
return "OK"
|
||||
|
||||
|
||||
@app.route("/api/download-status", methods=["GET"])
|
||||
async def api_download_status() -> str:
|
||||
"""Get status of profile download"""
|
||||
assert core is not None
|
||||
|
||||
return "\n".join(core.download_status)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -236,8 +280,11 @@ async def api_speakers() -> Response:
|
||||
async def api_listen_for_wake() -> str:
|
||||
"""Make Rhasspy listen for a wake word"""
|
||||
assert core is not None
|
||||
core.listen_for_wake()
|
||||
return "OK"
|
||||
enabled_str = (await request.data).decode().strip().lower()
|
||||
enabled = enabled_str not in ["false", "off"]
|
||||
core.listen_for_wake(enabled)
|
||||
|
||||
return str(enabled)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -258,6 +305,10 @@ async def api_listen_for_command() -> Response:
|
||||
entity = request.args.get("entity")
|
||||
value = request.args.get("value")
|
||||
|
||||
# Emulate wake
|
||||
wake_json = json.dumps({"wakewordId": "default", "siteId": core.siteId})
|
||||
await add_ws_event("wake", wake_json)
|
||||
|
||||
return jsonify(
|
||||
await core.listen_for_command(
|
||||
handle=(not no_hass), timeout=timeout, entity=entity, value=value
|
||||
@@ -276,14 +327,14 @@ async def api_profile() -> Union[str, Response]:
|
||||
|
||||
if request.method == "POST":
|
||||
# Ensure that JSON is valid
|
||||
profile_json = await request.json
|
||||
profile_json = json5.loads(await request.data)
|
||||
recursive_remove(core.profile.system_json, profile_json)
|
||||
|
||||
profile_path = Path(core.profile.write_path("profile.json"))
|
||||
with open(profile_path, "w") as profile_file:
|
||||
json.dump(profile_json, profile_file, indent=4)
|
||||
|
||||
msg = "Wrote profile to %s" % profile_path
|
||||
msg = f"Wrote profile to {profile_path}"
|
||||
logger.debug(msg)
|
||||
return msg
|
||||
|
||||
@@ -294,7 +345,7 @@ async def api_profile() -> Union[str, Response]:
|
||||
if layers == "profile":
|
||||
# Local settings only
|
||||
profile_path = Path(core.profile.read_path("profile.json"))
|
||||
return send_file(profile_path) # , mimetype="application/json")
|
||||
return await send_file(profile_path)
|
||||
|
||||
return jsonify(core.profile.json)
|
||||
|
||||
@@ -349,7 +400,7 @@ async def api_pronounce() -> Union[Response, str]:
|
||||
|
||||
if download:
|
||||
# Return WAV
|
||||
return Response(wav_data) # , mimetype="audio/wav")
|
||||
return Response(wav_data, mimetype="audio/wav")
|
||||
|
||||
# Play through speakers
|
||||
core.play_wav_data(wav_data)
|
||||
@@ -415,7 +466,35 @@ async def api_sentences():
|
||||
assert core is not None
|
||||
|
||||
if request.method == "POST":
|
||||
# Update sentences
|
||||
# POST
|
||||
if request.mimetype == "application/json":
|
||||
# Update multiple ini files at once. Paths as keys (relative to
|
||||
# profile directory), sentences as values.
|
||||
num_chars = 0
|
||||
paths_written = []
|
||||
|
||||
sentences_dict = json5.loads(await request.data)
|
||||
for sentences_path, sentences_text in sentences_dict.items():
|
||||
# Path is relative to profile directory
|
||||
sentences_path = Path(core.profile.write_path(sentences_path))
|
||||
|
||||
if sentences_text.strip():
|
||||
# Overwrite file
|
||||
logger.debug("Writing %s", sentences_path)
|
||||
|
||||
sentences_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
sentences_path.write_text(sentences_text)
|
||||
|
||||
num_chars += len(sentences_text)
|
||||
paths_written.append(sentences_path)
|
||||
elif sentences_path.is_file():
|
||||
# Remove file
|
||||
logger.debug("Removing %s", sentences_path)
|
||||
sentences_path.unlink()
|
||||
|
||||
return f"Wrote {num_chars} char(s) to {[str(p) for p in paths_written]}"
|
||||
|
||||
# Update sentences.ini only
|
||||
sentences_path = Path(
|
||||
core.profile.write_path(core.profile.get("speech_to_text.sentences_ini"))
|
||||
)
|
||||
@@ -423,18 +502,48 @@ async def api_sentences():
|
||||
data = await request.data
|
||||
with open(sentences_path, "wb") as sentences_file:
|
||||
sentences_file.write(data)
|
||||
return "Wrote %s byte(s) to %s" % (len(data), sentences_path)
|
||||
return f"Wrote {len(data)} byte(s) to {sentences_path}"
|
||||
|
||||
# Return sentences
|
||||
sentences_path = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.sentences_ini"))
|
||||
# GET
|
||||
sentences_path_rel = core.profile.read_path(
|
||||
core.profile.get("speech_to_text.sentences_ini")
|
||||
)
|
||||
sentences_path = Path(sentences_path_rel)
|
||||
|
||||
if prefers_json():
|
||||
# Return multiple .ini files, keyed by path relative to profile
|
||||
# directory.
|
||||
sentences_dict = {}
|
||||
if sentences_path.is_file():
|
||||
try:
|
||||
# Try user profile dir first
|
||||
profile_dir = Path(core.profile.user_profiles_dir) / core.profile.name
|
||||
key = str(sentences_path.relative_to(profile_dir))
|
||||
except Exception:
|
||||
# Fall back to system profile dir
|
||||
profile_dir = Path(core.profile.system_profiles_dir) / core.profile.name
|
||||
key = str(sentences_path.relative_to(profile_dir))
|
||||
|
||||
sentences_dict[key] = sentences_path.read_text()
|
||||
|
||||
ini_dir = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.sentences_dir"))
|
||||
)
|
||||
|
||||
# Add all .ini files from sentences_dir
|
||||
if ini_dir.is_dir():
|
||||
for ini_path in ini_dir.glob("*.ini"):
|
||||
key = str(ini_path.relative_to(core.profile.read_path()))
|
||||
sentences_dict[key] = ini_path.read_text()
|
||||
|
||||
return jsonify(sentences_dict)
|
||||
|
||||
# Return sentences.ini contents only
|
||||
if not sentences_path.is_file():
|
||||
return "" # no sentences yet
|
||||
|
||||
# Return file contents
|
||||
return await send_file(sentences_path) # , mimetype="text/plain")
|
||||
return await send_file(sentences_path)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -446,10 +555,32 @@ async def api_custom_words():
|
||||
assert core is not None
|
||||
speech_system = core.profile.get("speech_to_text.system", "pocketsphinx")
|
||||
|
||||
# Temporary fix for kaldi/custom_words -> kaldi_custom_words.txt
|
||||
old_kaldi_words_path = Path(core.profile.read_path("kaldi/custom_words.txt"))
|
||||
if old_kaldi_words_path.is_file():
|
||||
new_kaldi_words_path = Path(
|
||||
core.profile.write_path(
|
||||
core.profile.get(
|
||||
"speech_to_text.kaldi.custom_words", "custom_words.txt"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
if (
|
||||
new_kaldi_words_path != old_kaldi_words_path
|
||||
and not new_kaldi_words_path.is_file()
|
||||
):
|
||||
logger.warning(
|
||||
"Moving %s to %s", str(old_kaldi_words_path), str(new_kaldi_words_path)
|
||||
)
|
||||
shutil.move(old_kaldi_words_path, new_kaldi_words_path)
|
||||
|
||||
if request.method == "POST":
|
||||
custom_words_path = Path(
|
||||
core.profile.write_path(
|
||||
core.profile.get(f"speech_to_text.{speech_system}.custom_words")
|
||||
core.profile.get(
|
||||
f"speech_to_text.{speech_system}.custom_words", "custom_words.txt"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -466,11 +597,13 @@ async def api_custom_words():
|
||||
print(line, file=custom_words_file)
|
||||
lines_written += 1
|
||||
|
||||
return "Wrote %s line(s) to %s" % (lines_written, custom_words_path)
|
||||
return f"Wrote {lines_written} line(s) to {custom_words_path}"
|
||||
|
||||
custom_words_path = Path(
|
||||
core.profile.read_path(
|
||||
core.profile.get(f"speech_to_text.{speech_system}.custom_words")
|
||||
core.profile.get(
|
||||
f"speech_to_text.{speech_system}.custom_words", "custom_words.txt"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -536,6 +669,7 @@ async def api_restart() -> str:
|
||||
@app.route("/api/speech-to-text", methods=["POST"])
|
||||
async def api_speech_to_text() -> str:
|
||||
"""Transcribe speech from WAV file."""
|
||||
global last_voice_wav
|
||||
no_header = request.args.get("noheader", "false").lower() == "true"
|
||||
assert core is not None
|
||||
|
||||
@@ -545,10 +679,20 @@ async def api_speech_to_text() -> str:
|
||||
# Wrap in WAV
|
||||
wav_data = buffer_to_wav(wav_data)
|
||||
|
||||
last_voice_wav = wav_data
|
||||
|
||||
start_time = time.perf_counter()
|
||||
result = await core.transcribe_wav(wav_data)
|
||||
end_time = time.perf_counter()
|
||||
|
||||
# Send to websocket
|
||||
await add_ws_event(
|
||||
"transcription",
|
||||
json.dumps(
|
||||
{"text": result.text, "wakewordId": "default", "siteId": core.siteId}
|
||||
),
|
||||
)
|
||||
|
||||
if prefers_json():
|
||||
return jsonify(
|
||||
{
|
||||
@@ -583,7 +727,7 @@ async def api_text_to_intent():
|
||||
|
||||
intent_json = json.dumps(intent)
|
||||
logger.debug(intent_json)
|
||||
await add_ws_event(WS_EVENT_INTENT, intent_json)
|
||||
await add_ws_event("intent", intent_json)
|
||||
|
||||
if not no_hass:
|
||||
# Send intent to Home Assistant
|
||||
@@ -598,11 +742,13 @@ async def api_text_to_intent():
|
||||
@app.route("/api/speech-to-intent", methods=["POST"])
|
||||
async def api_speech_to_intent() -> Response:
|
||||
"""Transcribe speech, recognize intent, and optionally handle."""
|
||||
global last_voice_wav
|
||||
assert core is not None
|
||||
no_hass = request.args.get("nohass", "false").lower() == "true"
|
||||
|
||||
# Prefer 16-bit 16Khz mono, but will convert with sox if needed
|
||||
wav_data = await request.data
|
||||
last_voice_wav = wav_data
|
||||
|
||||
# speech -> text
|
||||
start_time = time.time()
|
||||
@@ -610,6 +756,12 @@ async def api_speech_to_intent() -> Response:
|
||||
text = transcription.text
|
||||
logger.debug(text)
|
||||
|
||||
# Send to websocket
|
||||
await add_ws_event(
|
||||
"transcription",
|
||||
json.dumps({"text": text, "wakewordId": "default", "siteId": core.siteId}),
|
||||
)
|
||||
|
||||
# text -> intent
|
||||
intent = (await core.recognize_intent(text)).intent
|
||||
intent["speech_confidence"] = transcription.confidence
|
||||
@@ -619,7 +771,7 @@ async def api_speech_to_intent() -> Response:
|
||||
|
||||
intent_json = json.dumps(intent)
|
||||
logger.debug(intent_json)
|
||||
await add_ws_event(WS_EVENT_INTENT, intent_json)
|
||||
await add_ws_event("intent", intent_json)
|
||||
|
||||
if not no_hass:
|
||||
# Send intent to Home Assistant
|
||||
@@ -644,6 +796,7 @@ async def api_start_recording() -> str:
|
||||
@app.route("/api/stop-recording", methods=["POST"])
|
||||
async def api_stop_recording() -> Response:
|
||||
"""End recording voice command. Transcribe and handle."""
|
||||
global last_voice_wav
|
||||
assert core is not None
|
||||
no_hass = request.args.get("nohass", "false").lower() == "true"
|
||||
|
||||
@@ -657,20 +810,43 @@ async def api_stop_recording() -> Response:
|
||||
text = transcription.text
|
||||
logger.debug(text)
|
||||
|
||||
# Send to websocket
|
||||
await add_ws_event(
|
||||
"transcription",
|
||||
json.dumps({"text": text, "wakewordId": "default", "siteId": core.siteId}),
|
||||
)
|
||||
|
||||
intent = (await core.recognize_intent(text)).intent
|
||||
intent["speech_confidence"] = transcription.confidence
|
||||
|
||||
intent_json = json.dumps(intent)
|
||||
logger.debug(intent_json)
|
||||
await add_ws_event(WS_EVENT_INTENT, intent_json)
|
||||
await add_ws_event("intent", intent_json)
|
||||
|
||||
if not no_hass:
|
||||
# Send intent to Home Assistant
|
||||
intent = (await core.handle_intent(intent)).intent
|
||||
|
||||
# Save last voice command WAV data
|
||||
last_voice_wav = wav_data
|
||||
|
||||
return jsonify(intent)
|
||||
|
||||
|
||||
@app.route("/api/play-recording", methods=["POST"])
|
||||
async def api_play_recording() -> str:
|
||||
"""Play last recorded voice command through the configured audio output system"""
|
||||
global last_voice_wav
|
||||
assert core is not None
|
||||
|
||||
if last_voice_wav:
|
||||
# Play through speakers
|
||||
logger.debug("Playing %s byte(s)", len(last_voice_wav))
|
||||
core.play_wav_data(last_voice_wav)
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -682,7 +858,9 @@ async def api_unknown_words() -> Response:
|
||||
unknown_words = {}
|
||||
unknown_path = Path(
|
||||
core.profile.read_path(
|
||||
core.profile.get(f"speech_to_text.{speech_system}.unknown_words")
|
||||
core.profile.get(
|
||||
f"speech_to_text.{speech_system}.unknown_words", "unknown_words.txt"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -702,18 +880,28 @@ last_sentence = ""
|
||||
|
||||
|
||||
@app.route("/api/text-to-speech", methods=["POST"])
|
||||
async def api_text_to_speech() -> str:
|
||||
async def api_text_to_speech() -> Union[bytes, str]:
|
||||
"""Speak a sentence with text to speech system."""
|
||||
global last_sentence
|
||||
repeat = request.args.get("repeat", "false").strip().lower() == "true"
|
||||
play = request.args.get("play", "true").strip().lower() == "true"
|
||||
language = request.args.get("language")
|
||||
voice = request.args.get("voice")
|
||||
siteId = request.args.get("siteId")
|
||||
data = await request.data
|
||||
sentence = last_sentence if repeat else data.decode().strip()
|
||||
|
||||
assert core is not None
|
||||
await core.speak_sentence(sentence)
|
||||
result = await core.speak_sentence(
|
||||
sentence, play=play, language=language, voice=voice, siteId=siteId
|
||||
)
|
||||
|
||||
last_sentence = sentence
|
||||
|
||||
if not play:
|
||||
# Return WAV data instead of speaking
|
||||
return Response(result.wav_data, mimetype="audio/wav")
|
||||
|
||||
return sentence
|
||||
|
||||
|
||||
@@ -725,17 +913,19 @@ async def api_slots() -> Union[str, Response]:
|
||||
"""Get the values of all slots."""
|
||||
assert core is not None
|
||||
|
||||
slots_dir = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.slots_dir"))
|
||||
)
|
||||
|
||||
if request.method == "POST":
|
||||
overwrite_all = request.args.get("overwrite_all", "false").lower() == "true"
|
||||
new_slot_values = await request.json
|
||||
new_slot_values = json5.loads(await request.data)
|
||||
|
||||
slots_dir = Path(
|
||||
core.profile.write_path(
|
||||
core.profile.get("speech_to_text.slots_dir", "slots")
|
||||
)
|
||||
)
|
||||
|
||||
if overwrite_all:
|
||||
# Remote existing values first
|
||||
for name in new_slot_values.keys():
|
||||
for name in new_slot_values:
|
||||
slots_path = safe_join(slots_dir, f"{name}")
|
||||
if slots_path.is_file():
|
||||
try:
|
||||
@@ -747,32 +937,41 @@ async def api_slots() -> Union[str, Response]:
|
||||
if isinstance(values, str):
|
||||
values = [values]
|
||||
|
||||
slots_path = Path(
|
||||
core.profile.write_path(
|
||||
core.profile.get("speech_to_text.slots_dir", "slots"), f"{name}"
|
||||
)
|
||||
)
|
||||
slots_path = slots_dir / name
|
||||
|
||||
# Create directories
|
||||
slots_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Write data
|
||||
with open(slots_path, "w") as slots_file:
|
||||
for value in values:
|
||||
value = value.strip()
|
||||
if value:
|
||||
print(value, file=slots_file)
|
||||
# Merge with existing values
|
||||
values = {v.strip() for v in values}
|
||||
if slots_path.is_file():
|
||||
values.update(
|
||||
line.strip() for line in slots_path.read_text().splitlines()
|
||||
)
|
||||
|
||||
# Write merged values
|
||||
if values:
|
||||
with open(slots_path, "w") as slots_file:
|
||||
for value in values:
|
||||
if value:
|
||||
print(value, file=slots_file)
|
||||
|
||||
return "OK"
|
||||
|
||||
# Read slots into dictionary
|
||||
slots_dir = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.slots_dir", "slots"))
|
||||
)
|
||||
|
||||
slots_dict = {}
|
||||
for slot_file_path in slots_dir.glob("*"):
|
||||
if slot_file_path.is_file():
|
||||
slot_name = slot_file_path.name
|
||||
slots_dict[slot_name] = [
|
||||
line.strip() for line in slot_file_path.read_text().splitlines()
|
||||
]
|
||||
|
||||
if slots_dir.is_dir():
|
||||
for slot_file_path in slots_dir.glob("*"):
|
||||
if slot_file_path.is_file():
|
||||
slot_name = slot_file_path.name
|
||||
slots_dict[slot_name] = [
|
||||
line.strip() for line in slot_file_path.read_text().splitlines()
|
||||
]
|
||||
|
||||
return jsonify(slots_dict)
|
||||
|
||||
@@ -824,6 +1023,73 @@ def api_slots_by_name(name: str) -> Union[str, Response]:
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.route("/api/intents")
|
||||
def api_intents():
|
||||
"""Return JSON with information about intents."""
|
||||
assert core is not None
|
||||
|
||||
sentences_ini = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.sentences_ini"))
|
||||
)
|
||||
|
||||
sentences_dir = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.sentences_dir"))
|
||||
)
|
||||
|
||||
# Load all .ini files and parse
|
||||
ini_paths: List[Path] = get_ini_paths(sentences_ini, sentences_dir)
|
||||
intents: Dict[str, Any] = get_all_intents(ini_paths)
|
||||
|
||||
def add_type(item, item_dict: Dict[str, Any]):
|
||||
"""Add item_type to expression dictionary."""
|
||||
item_dict["item_type"] = type(item).__name__
|
||||
if hasattr(item, "items"):
|
||||
# Group, alternative, etc.
|
||||
for sub_item, sub_item_dict in zip(item.items, item_dict["items"]):
|
||||
add_type(sub_item, sub_item_dict)
|
||||
elif hasattr(item, "rule_body"):
|
||||
# Rule
|
||||
add_type(item.rule_body, item_dict["rule_body"])
|
||||
|
||||
# Convert to dictionary
|
||||
intents_dict = {}
|
||||
for intent_name, intent_sentences in intents.items():
|
||||
sentence_dicts = []
|
||||
for sentence in intent_sentences:
|
||||
sentence_dict = attr.asdict(sentence)
|
||||
|
||||
# Add item_type field
|
||||
add_type(sentence, sentence_dict)
|
||||
sentence_dicts.append(sentence_dict)
|
||||
|
||||
intents_dict[intent_name] = sentence_dicts
|
||||
|
||||
# Convert to JSON
|
||||
return jsonify(intents_dict)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.route("/process", methods=["GET"])
|
||||
async def marytts_process() -> Response:
|
||||
"""Emulate MaryTTS /process API"""
|
||||
global last_sentence
|
||||
|
||||
assert core is not None
|
||||
sentence = request.args.get("INPUT_TEXT", "")
|
||||
voice = request.args.get("VOICE")
|
||||
locale = request.args.get("LOCALE")
|
||||
spoken = await core.speak_sentence(
|
||||
sentence, play=False, voice=voice, language=locale
|
||||
)
|
||||
|
||||
return Response(spoken.wav_data, mimetype="audio/wav")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.errorhandler(Exception)
|
||||
async def handle_error(err) -> Tuple[str, int]:
|
||||
"""Return error as text."""
|
||||
@@ -835,31 +1101,38 @@ async def handle_error(err) -> Tuple[str, int]:
|
||||
# Static Routes
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
web_dir = os.path.join(os.getcwd(), "dist")
|
||||
web_dir = Path("dist")
|
||||
assert web_dir.is_dir(), f"Missing web directory {web_dir}"
|
||||
|
||||
|
||||
css_dir = web_dir / "css"
|
||||
js_dir = web_dir / "js"
|
||||
img_dir = web_dir / "img"
|
||||
webfonts_dir = web_dir / "webfonts"
|
||||
|
||||
|
||||
@app.route("/css/<path:filename>", methods=["GET"])
|
||||
async def css(filename) -> Response:
|
||||
"""CSS static endpoint."""
|
||||
return await send_from_directory(os.path.join(web_dir, "css"), filename)
|
||||
return await send_from_directory(css_dir, filename)
|
||||
|
||||
|
||||
@app.route("/js/<path:filename>", methods=["GET"])
|
||||
async def js(filename) -> Response:
|
||||
"""Javascript static endpoint."""
|
||||
return await send_from_directory(os.path.join(web_dir, "js"), filename)
|
||||
return await send_from_directory(js_dir, filename)
|
||||
|
||||
|
||||
@app.route("/img/<path:filename>", methods=["GET"])
|
||||
async def img(filename) -> Response:
|
||||
"""Image static endpoint."""
|
||||
return await send_from_directory(os.path.join(web_dir, "img"), filename)
|
||||
return await send_from_directory(img_dir, filename)
|
||||
|
||||
|
||||
@app.route("/webfonts/<path:filename>", methods=["GET"])
|
||||
async def webfonts(filename) -> Response:
|
||||
"""Web font static endpoint."""
|
||||
return await send_from_directory(os.path.join(web_dir, "webfonts"), filename)
|
||||
return await send_from_directory(webfonts_dir, filename)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
@@ -870,39 +1143,39 @@ async def webfonts(filename) -> Response:
|
||||
@app.route("/", methods=["GET"])
|
||||
async def index() -> Response:
|
||||
"""Render main web page."""
|
||||
return await send_file(os.path.join(web_dir, "index.html"))
|
||||
return await send_file(web_dir / "index.html")
|
||||
|
||||
|
||||
@app.route("/swagger.yaml", methods=["GET"])
|
||||
async def swagger_yaml() -> Response:
|
||||
"""OpenAPI static endpoint."""
|
||||
return await send_file(os.path.join(web_dir, "swagger.yaml"))
|
||||
return await send_file(web_dir / "swagger.yaml")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# WebSocket API
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
WS_EVENT_INTENT = 0
|
||||
WS_EVENT_LOG = 1
|
||||
|
||||
ws_queues: List[List[asyncio.Queue]] = [[], []]
|
||||
ws_locks: List[asyncio.Lock] = [asyncio.Lock(), asyncio.Lock()]
|
||||
user_queues: Set[asyncio.Queue] = set()
|
||||
logging_queues: Set[asyncio.Queue] = set()
|
||||
|
||||
|
||||
async def add_ws_event(event_type: int, text: str):
|
||||
"""Send text out to all websockets for a specific event."""
|
||||
async with ws_locks[event_type]:
|
||||
for q in ws_queues[event_type]:
|
||||
await q.put(text)
|
||||
async def add_ws_event(message_type: str, text: str):
|
||||
"""Send text out to all user websockets for a specific event."""
|
||||
for q in user_queues:
|
||||
await q.put((message_type, text))
|
||||
|
||||
|
||||
async def log_ws_event(text: str):
|
||||
"""Send logging message out to websockets."""
|
||||
for q in logging_queues:
|
||||
await q.put(text)
|
||||
|
||||
|
||||
# Send logging messages out to websocket
|
||||
logging.root.addHandler(
|
||||
FunctionLoggingHandler(
|
||||
lambda msg: asyncio.run_coroutine_threadsafe(
|
||||
add_ws_event(WS_EVENT_LOG, msg), loop
|
||||
)
|
||||
lambda msg: asyncio.run_coroutine_threadsafe(log_ws_event(msg), loop)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -912,6 +1185,8 @@ class WebSocketObserver(RhasspyActor):
|
||||
|
||||
def in_started(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in started state."""
|
||||
global last_voice_wav
|
||||
|
||||
if isinstance(message, IntentRecognized):
|
||||
# Add slots
|
||||
intent_slots = {}
|
||||
@@ -923,62 +1198,124 @@ class WebSocketObserver(RhasspyActor):
|
||||
# Convert to JSON
|
||||
intent_json = json.dumps(message.intent)
|
||||
self._logger.debug(intent_json)
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
add_ws_event(WS_EVENT_INTENT, intent_json), loop
|
||||
asyncio.run_coroutine_threadsafe(add_ws_event("intent", intent_json), loop)
|
||||
elif isinstance(message, WakeWordDetected):
|
||||
assert core is not None
|
||||
wake_json = json.dumps({"wakewordId": message.name, "siteId": core.siteId})
|
||||
asyncio.run_coroutine_threadsafe(add_ws_event("wake", wake_json), loop)
|
||||
elif isinstance(message, WavTranscription):
|
||||
assert core is not None
|
||||
transcription_json = json.dumps(
|
||||
{
|
||||
"text": message.text,
|
||||
"wakewordId": message.wakewordId,
|
||||
"siteId": core.siteId,
|
||||
}
|
||||
)
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
add_ws_event("transcription_json", transcription_json), loop
|
||||
)
|
||||
elif isinstance(message, VoiceCommand):
|
||||
# Save last voice command
|
||||
last_voice_wav = buffer_to_wav(message.data)
|
||||
|
||||
|
||||
def api_websocket(func):
|
||||
"""Wraps a websocket route to use a user websocket queue"""
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(*_args, **kwargs):
|
||||
global user_queues
|
||||
queue = asyncio.Queue()
|
||||
user_queues.add(queue)
|
||||
try:
|
||||
return await func(queue, *_args, **kwargs)
|
||||
except Exception:
|
||||
logger.exception("api_websocket")
|
||||
finally:
|
||||
user_queues.discard(queue)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@app.websocket("/api/events/intent")
|
||||
async def api_events_intent() -> None:
|
||||
@api_websocket
|
||||
async def api_events_intent(queue) -> None:
|
||||
"""Websocket endpoint to receive intents as JSON."""
|
||||
# Add new queue for websocket
|
||||
q: asyncio.Queue = asyncio.Queue()
|
||||
async with ws_locks[WS_EVENT_INTENT]:
|
||||
ws_queues[WS_EVENT_INTENT].append(q)
|
||||
await websocket.accept()
|
||||
|
||||
try:
|
||||
while True:
|
||||
text = await q.get()
|
||||
while True:
|
||||
message_type, text = await queue.get()
|
||||
if message_type == "intent":
|
||||
await websocket.send(text)
|
||||
except Exception:
|
||||
logger.exception("api_events_intent")
|
||||
|
||||
# Remove queue
|
||||
async with ws_locks[WS_EVENT_INTENT]:
|
||||
ws_queues[WS_EVENT_INTENT].remove(q)
|
||||
|
||||
@app.websocket("/api/events/text")
|
||||
@api_websocket
|
||||
async def api_events_text(queue) -> None:
|
||||
"""Websocket endpoint for transcriptions."""
|
||||
await websocket.accept()
|
||||
|
||||
while True:
|
||||
message_type, text = await queue.get()
|
||||
if message_type == "transcription":
|
||||
await websocket.send(text)
|
||||
|
||||
|
||||
@app.websocket("/api/events/wake")
|
||||
@api_websocket
|
||||
async def api_events_wake(queue) -> None:
|
||||
"""Websocket endpoint to report wake up."""
|
||||
await websocket.accept()
|
||||
|
||||
while True:
|
||||
message_type, text = await queue.get()
|
||||
if message_type == "wake":
|
||||
await websocket.send(text)
|
||||
|
||||
|
||||
@app.websocket("/api/events/log")
|
||||
async def api_events_log() -> None:
|
||||
"""Websocket endpoint to receive logging messages as text."""
|
||||
await websocket.accept()
|
||||
|
||||
# Add new queue for websocket
|
||||
q: asyncio.Queue = asyncio.Queue()
|
||||
async with ws_locks[WS_EVENT_LOG]:
|
||||
ws_queues[WS_EVENT_LOG].append(q)
|
||||
logging_queues.add(q)
|
||||
|
||||
try:
|
||||
while True:
|
||||
text = await q.get()
|
||||
await websocket.send(text)
|
||||
except Exception:
|
||||
logger.exception("api_events_log")
|
||||
except concurrent.futures.CancelledError:
|
||||
pass
|
||||
|
||||
# Remove queue
|
||||
async with ws_locks[WS_EVENT_LOG]:
|
||||
ws_queues[WS_EVENT_LOG].remove(q)
|
||||
logging_queues.discard(q)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Swagger UI
|
||||
quart_api_doc(
|
||||
app, config_path=(web_dir / "swagger.yaml"), url_prefix="/api", title="Rhasspy API"
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def prefers_json() -> bool:
|
||||
"""True if client prefers JSON over plain text."""
|
||||
return quality(request.accept_mimetypes, "application/json") > quality(
|
||||
request.accept_mimetypes, "text/plain"
|
||||
)
|
||||
|
||||
|
||||
def quality(accept, key: str) -> float:
|
||||
"""Return Accept quality for media type."""
|
||||
for option in accept.options:
|
||||
# pylint: disable=W0212
|
||||
if accept._values_match(key, option.value):
|
||||
return option.quality
|
||||
return 0.0
|
||||
@@ -991,6 +1328,9 @@ loop.run_until_complete(start_rhasspy())
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Disable useless logging messages
|
||||
logging.getLogger("wsproto").setLevel(logging.CRITICAL)
|
||||
|
||||
# Start web server
|
||||
if args.ssl is not None:
|
||||
logger.debug("Using SSL with certfile, keyfile = %s", args.ssl)
|
||||
|
||||
@@ -36,7 +36,7 @@ def main():
|
||||
|
||||
# Load dictionary
|
||||
word_dict = {}
|
||||
logging.info("Loading dictionary from %s" % args.dictionary)
|
||||
logging.info("Loading dictionary from %s", args.dictionary)
|
||||
with open(args.dictionary, "r") as dict_file:
|
||||
read_dict(dict_file, word_dict)
|
||||
|
||||
@@ -53,7 +53,7 @@ def main():
|
||||
all_words.append(word)
|
||||
|
||||
assert len(phonemes) == len(phoneme_words), "Not enough words to cover phonemes"
|
||||
logging.debug("Phonemes: %s" % ", ".join(phoneme_words.keys()))
|
||||
logging.debug("Phonemes: %s", ", ".join(phoneme_words))
|
||||
|
||||
phoneme_hyps = defaultdict(lambda: defaultdict(float))
|
||||
|
||||
@@ -66,7 +66,7 @@ def main():
|
||||
phoneme_hyps[phoneme][hyp] = count
|
||||
|
||||
# Sample words from the dictionary
|
||||
logging.info("Starting %s sample(s)" % args.samples)
|
||||
logging.info("Starting %s sample(s)", args.samples)
|
||||
phoneme_futures = {}
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Schedule eSpeak word samples
|
||||
@@ -80,7 +80,7 @@ def main():
|
||||
for i, future in enumerate(as_completed(phoneme_futures)):
|
||||
if i % len(phonemes) == 0:
|
||||
logging.info(
|
||||
"Sample %s of %s" % ((i // len(phonemes) + 1), args.samples)
|
||||
"Sample %s of %s", (i // len(phonemes) + 1), args.samples
|
||||
)
|
||||
|
||||
phoneme = phoneme_futures[future]
|
||||
@@ -113,14 +113,14 @@ def main():
|
||||
best = {}
|
||||
todo = set(phonemes)
|
||||
used = set()
|
||||
while len(todo) > 0:
|
||||
while todo:
|
||||
for phoneme in list(todo):
|
||||
best_to_worst = sorted(
|
||||
phoneme_hyps[phoneme].items(), key=lambda kv: kv[1], reverse=True
|
||||
)
|
||||
|
||||
for hyp, count in best_to_worst:
|
||||
if not hyp in used:
|
||||
if hyp not in used:
|
||||
best[phoneme] = hyp
|
||||
used.add(hyp)
|
||||
todo.remove(phoneme)
|
||||
@@ -165,7 +165,7 @@ def read_dict(dict_file, word_dict):
|
||||
"""
|
||||
for line in dict_file:
|
||||
line = line.strip()
|
||||
if len(line) == 0:
|
||||
if not line:
|
||||
continue
|
||||
|
||||
word, pronounce = re.split("[ ]+", line, maxsplit=1)
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import calendar
|
||||
import json
|
||||
import locale
|
||||
from pathlib import Path
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser("generate-slots")
|
||||
parser.add_argument("profiles_dir")
|
||||
args = parser.parse_args()
|
||||
|
||||
for profile_dir in Path(args.profiles_dir).glob("*"):
|
||||
if not profile_dir.is_dir():
|
||||
continue
|
||||
|
||||
with open(profile_dir / "profile.json", "r") as profile_file:
|
||||
profile = json.load(profile_file)
|
||||
locale_name = profile["locale"] + ".UTF-8"
|
||||
locale.setlocale(locale.LC_ALL, locale_name)
|
||||
print(locale_name)
|
||||
|
||||
slots_dir = profile_dir / "slots" / "rhasspy"
|
||||
slots_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Day names
|
||||
(slots_dir / "days").write_text('\n'.join(calendar.day_name))
|
||||
|
||||
# Month names
|
||||
(slots_dir / "months").write_text('\n'.join(filter(None, calendar.month_name)))
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -38,9 +38,7 @@ def main():
|
||||
|
||||
if not os.path.exists(html_path):
|
||||
# Download
|
||||
url = "https://www.ezglot.com/most-frequently-used-words.php?l={0}&submit=Select".format(
|
||||
language
|
||||
)
|
||||
url = f"https://www.ezglot.com/most-frequently-used-words.php?l={language}&submit=Select"
|
||||
print(f"Downloading from {url}")
|
||||
|
||||
with open(html_path, "w") as html_file:
|
||||
|
||||
@@ -26,7 +26,7 @@ def main():
|
||||
with open(args.dictionary, "r") as dict_file:
|
||||
for line in dict_file:
|
||||
line = line.strip()
|
||||
if len(line) == 0:
|
||||
if not line:
|
||||
continue
|
||||
|
||||
parts = re.split(r"[\t ]+", line)
|
||||
@@ -44,11 +44,11 @@ def main():
|
||||
|
||||
# Pick unique example words for every phoneme
|
||||
used_words = set()
|
||||
for phoneme in sorted(examples.keys()):
|
||||
for phoneme in sorted(examples):
|
||||
# Choose the shortest, unused example word for this phoneme.
|
||||
# Exclude words with 3 or fewer letters.
|
||||
for word, pron in sorted(examples[phoneme], key=lambda kv: len(kv[0])):
|
||||
if len(word) > 3 and (not word in used_words):
|
||||
if len(word) > 3 and (word not in used_words):
|
||||
# Output format is:
|
||||
# phoneme word pronunciation
|
||||
print(phoneme, word, " ".join(pron))
|
||||
|
||||
@@ -31,7 +31,7 @@ def main():
|
||||
with open(args.dictionary, "r") as dict_file:
|
||||
for line in dict_file:
|
||||
line = line.strip()
|
||||
if len(line) == 0:
|
||||
if not line:
|
||||
continue
|
||||
|
||||
parts = re.split(r"[\t ]+", line)
|
||||
@@ -70,7 +70,7 @@ def main():
|
||||
with open(args.frequent_phones, "r") as freq_phones_file:
|
||||
for line in freq_phones_file:
|
||||
line = line.strip()
|
||||
if len(line) == 0:
|
||||
if not line:
|
||||
continue
|
||||
|
||||
parts = re.split(r"[ ]+", line, maxsplit=1)
|
||||
@@ -82,7 +82,7 @@ def main():
|
||||
mappings = []
|
||||
bad_espeak = (":", ";", "-", "#")
|
||||
for word, espeak in freq_espeak.items():
|
||||
if not word in freq_phonemes:
|
||||
if word not in freq_phonemes:
|
||||
# No pronunciation
|
||||
continue
|
||||
|
||||
@@ -134,7 +134,7 @@ def main():
|
||||
m = 4
|
||||
for p in all_phonemes:
|
||||
candidate_counts = [
|
||||
(e, phoneme_counts[(cp, e)]) for (cp, e) in phoneme_counts.keys() if cp == p
|
||||
(e, phoneme_counts[(cp, e)]) for (cp, e) in phoneme_counts if cp == p
|
||||
]
|
||||
candidate_counts = [ec for ec in candidate_counts if ec[1] > n]
|
||||
candidate_counts = sorted(candidate_counts, key=lambda x: x[1], reverse=True)
|
||||
@@ -213,7 +213,7 @@ assign(P, E) :- maybe_assign(P, E).
|
||||
predicates = []
|
||||
for line in proc.stdout.splitlines():
|
||||
line = line.decode().strip()
|
||||
if len(line) == 0:
|
||||
if not line:
|
||||
continue
|
||||
elif line.startswith("OPTIMUM FOUND"):
|
||||
break
|
||||
|
||||
@@ -20,7 +20,7 @@ def main():
|
||||
with open(dict_path, "r") as dict_file:
|
||||
for line in dict_file:
|
||||
line = line.strip()
|
||||
if len(line) == 0:
|
||||
if not line:
|
||||
continue
|
||||
|
||||
parts = re.split(r"[ ]+", line)
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import json
|
||||
import random
|
||||
import datetime
|
||||
|
||||
|
||||
def speech(text):
|
||||
global o
|
||||
o["speech"] = {"text": text}
|
||||
|
||||
|
||||
# get json from stdin and load into python dict
|
||||
o = json.loads(sys.stdin.read())
|
||||
|
||||
intent = o["intent"]["name"]
|
||||
|
||||
if intent == "GetTime":
|
||||
now = datetime.datetime.now()
|
||||
speech("It's %s %d %s." % (now.strftime('%H'), now.minute, now.strftime('%p')))
|
||||
|
||||
elif intent == "Hello":
|
||||
replies = ['Hi!', 'Hello!', 'Hey there!', 'Greetings.']
|
||||
speech(random.choice(replies))
|
||||
|
||||
# convert dict to json and print to stdout
|
||||
print(json.dumps(o))
|
||||
@@ -12,7 +12,7 @@ def main():
|
||||
with open(sys.argv[1], "r") as dict_file:
|
||||
for line in dict_file:
|
||||
line = line.strip()
|
||||
if len(line) == 0:
|
||||
if not line:
|
||||
continue
|
||||
|
||||
parts = re.split(r"[ ]+", line)
|
||||
|
||||
@@ -0,0 +1,411 @@
|
||||
#!/usr/bin/env bash
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
CPU_ARCH="$(uname --m)"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Command-line Arguments
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
. "${this_dir}/etc/shflags"
|
||||
|
||||
DEFINE_string 'venv' "${this_dir}/.venv" 'Path to create virtual environment'
|
||||
DEFINE_string 'download-dir' "${this_dir}/download" 'Directory to cache downloaded files'
|
||||
DEFINE_string 'build-dir' "${this_dir}/build_${CPU_ARCH}" 'Directory to build dependencies in'
|
||||
DEFINE_boolean 'system' true 'Install system dependencies'
|
||||
DEFINE_boolean 'flair' false 'Install flair'
|
||||
DEFINE_boolean 'precise' false 'Install Mycroft Precise'
|
||||
DEFINE_boolean 'adapt' false 'Install Mycroft Adapt'
|
||||
DEFINE_boolean 'google' false 'Install Google Text to Speech'
|
||||
DEFINE_boolean 'kaldi' false 'Install Kaldi'
|
||||
DEFINE_boolean 'offline' false "Don't download anything"
|
||||
DEFINE_boolean 'web' true "Build Vue web interface with yarn"
|
||||
DEFINE_boolean 'sudo' true "Use sudo for apt"
|
||||
DEFINE_integer 'make-threads' 4 'Number of threads to use with make' 'j'
|
||||
DEFINE_string 'python' 'python3' 'Path to Python executable'
|
||||
|
||||
FLAGS "$@" || exit $?
|
||||
eval set -- "${FLAGS_ARGV}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Default Settings
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
set -e
|
||||
|
||||
python="${FLAGS_python}"
|
||||
venv="${FLAGS_venv}"
|
||||
|
||||
download_dir="${FLAGS_download_dir}"
|
||||
mkdir -p "${download_dir}"
|
||||
echo "Download directory: ${download_dir}"
|
||||
|
||||
build_dir="${FLAGS_build_dir}"
|
||||
mkdir -p "${build_dir}"
|
||||
echo "Build directory: ${build_dir}"
|
||||
|
||||
if [[ "${FLAGS_system}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_system='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_flair}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_flair='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_precise}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_precise='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_adapt}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_adapt='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_kaldi}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_kaldi='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_google}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_google='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_offline}" -eq "${FLAGS_TRUE}" ]]; then
|
||||
offline='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_web}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_web='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_sudo}" -eq "${FLAGS_TRUE}" ]]; then
|
||||
function run_sudo {
|
||||
sudo "$@"
|
||||
}
|
||||
else
|
||||
function run_sudo {
|
||||
"$@"
|
||||
}
|
||||
fi
|
||||
|
||||
make_threads="${FLAGS_make_threads}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Create a temporary directory for building stuff
|
||||
temp_dir="$(mktemp -d)"
|
||||
|
||||
function cleanup {
|
||||
rm -rf "${temp_dir}"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
function maybe_download {
|
||||
if [[ ! -s "$2" ]]; then
|
||||
if [[ -n "${offline}" ]]; then
|
||||
echo "Need to download $1 but offline."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$2")"
|
||||
curl -sSfL -o "$2" "$1" || { echo "Can't download $1"; exit 1; }
|
||||
echo "$1 => $2"
|
||||
fi
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Checking required programs"
|
||||
|
||||
if [[ -z "${no_web}" ]]; then
|
||||
if [[ ! -n "$(command -v yarn)" ]]; then
|
||||
echo "Please install yarn to continue (https://yarnpkg.com)"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_system}" ]]; then
|
||||
echo "Installing system dependencies"
|
||||
|
||||
run_sudo apt-get update
|
||||
run_sudo apt-get install --no-install-recommends \
|
||||
python3 python3-pip python3-venv python3-dev \
|
||||
python \
|
||||
build-essential autoconf autoconf-archive libtool automake bison \
|
||||
sox espeak flite swig portaudio19-dev \
|
||||
libatlas-base-dev \
|
||||
gfortran \
|
||||
sphinxbase-utils sphinxtrain pocketsphinx \
|
||||
jq checkinstall unzip xz-utils \
|
||||
curl \
|
||||
lame
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Downloading dependencies"
|
||||
|
||||
# Python-Pocketsphinx
|
||||
pocketsphinx_file="${download_dir}/pocketsphinx-python.tar.gz"
|
||||
if [[ ! -s "${pocketsphinx_file}" ]]; then
|
||||
pocketsphinx_url='https://github.com/synesthesiam/pocketsphinx-python/releases/download/v1.0/pocketsphinx-python.tar.gz'
|
||||
echo "Downloading pocketsphinx (${pocketsphinx_url})"
|
||||
maybe_download "${pocketsphinx_url}" "${pocketsphinx_file}"
|
||||
fi
|
||||
|
||||
# OpenFST
|
||||
openfst_dir="${build_dir}/openfst-1.6.9"
|
||||
if [[ ! -d "${openfst_dir}/build" ]]; then
|
||||
openfst_file="${download_dir}/openfst-1.6.9.tar.gz"
|
||||
|
||||
if [[ ! -s "${openfst_file}" ]]; then
|
||||
openfst_url='http://openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.9.tar.gz'
|
||||
echo "Downloading openfst (${openfst_url})"
|
||||
maybe_download "${openfst_url}" "${openfst_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Opengrm
|
||||
opengrm_dir="${build_dir}/opengrm-ngram-1.3.4"
|
||||
if [[ ! -d "${opengrm_dir}/build" ]]; then
|
||||
opengrm_file="${download_dir}/opengrm-ngram-1.3.4.tar.gz"
|
||||
|
||||
if [[ ! -s "${opengrm_file}" ]]; then
|
||||
opengrm_url='http://www.opengrm.org/twiki/pub/GRM/NGramDownload/opengrm-ngram-1.3.4.tar.gz'
|
||||
echo "Downloading opengrm (${opengrm_url})"
|
||||
maybe_download "${opengrm_url}" "${opengrm_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Phonetisaurus
|
||||
phonetisaurus_dir="${build_dir}/phonetisaurus"
|
||||
if [[ ! -d "${phonetisaurus_dir}/build" ]]; then
|
||||
phonetisaurus_file="${download_dir}/phonetisaurus-2019.tar.gz"
|
||||
|
||||
if [[ ! -s "${phonetisaurus_file}" ]]; then
|
||||
phonetisaurus_url='https://github.com/synesthesiam/docker-phonetisaurus/raw/master/download/phonetisaurus-2019.tar.gz'
|
||||
echo "Downloading phonetisaurus (${phonetisaurus_url})"
|
||||
maybe_download "${phonetisaurus_url}" "${phonetisaurus_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Kaldi
|
||||
kaldi_dir="${this_dir}/opt/kaldi"
|
||||
if [[ -z "${no_kaldi}" && ! -d "${kaldi_dir}" ]]; then
|
||||
install libatlas-base-dev libatlas3-base gfortran
|
||||
run_sudo ldconfig
|
||||
kaldi_file="${download_dir}/kaldi-2019.tar.gz"
|
||||
|
||||
if [[ ! -s "${kaldi_file}" ]]; then
|
||||
kaldi_url='https://github.com/kaldi-asr/kaldi/archive/master.tar.gz'
|
||||
echo "Downloading kaldi (${kaldi_url})"
|
||||
maybe_download "${kaldi_url}" "${kaldi_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Re-create virtual environment
|
||||
echo "Creating virtual environment"
|
||||
rm -rf "${venv}"
|
||||
"${python}" -m venv "${venv}"
|
||||
source "${venv}/bin/activate"
|
||||
pip3 install wheel setuptools
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# openfst
|
||||
# http://www.openfst.org
|
||||
#
|
||||
# Required to build languag models and do intent recognition.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ ! -d "${openfst_dir}/build" ]]; then
|
||||
echo "Building openfst (${openfst_file})"
|
||||
tar -C "${build_dir}" -xf "${openfst_file}" && \
|
||||
cd "${openfst_dir}" && \
|
||||
./configure "--prefix=${openfst_dir}/build" \
|
||||
--enable-far \
|
||||
--disable-static \
|
||||
--enable-shared \
|
||||
--enable-ngram-fsts && \
|
||||
make -j "${make_threads}" && \
|
||||
make install
|
||||
fi
|
||||
|
||||
# Copy build artifacts into virtual environment
|
||||
cp -R "${openfst_dir}"/build/include/* "${venv}/include/"
|
||||
cp -R "${openfst_dir}"/build/lib/*.so* "${venv}/lib/"
|
||||
cp -R "${openfst_dir}"/build/bin/* "${venv}/bin/"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# opengrm
|
||||
# http://www.opengrm.org/twiki/bin/view/GRM/NGramLibrary
|
||||
#
|
||||
# Required to build language models.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# opengrm
|
||||
if [[ ! -d "${opengrm_dir}/build" ]]; then
|
||||
echo "Building opengrm (${opengrm_file})"
|
||||
export CXXFLAGS="-I${venv}/include"
|
||||
export LDFLAGS="-L${venv}/lib"
|
||||
tar -C "${build_dir}" -xf "${opengrm_file}" && \
|
||||
cd "${opengrm_dir}" && \
|
||||
./configure "--prefix=${opengrm_dir}/build" && \
|
||||
make -j "${make_threads}" && \
|
||||
make install
|
||||
fi
|
||||
|
||||
# Copy build artifacts into virtual environment
|
||||
cp -R "${opengrm_dir}"/build/bin/* "${venv}/bin/"
|
||||
cp -R "${opengrm_dir}"/build/include/* "${venv}/include/"
|
||||
cp -R "${opengrm_dir}"/build/lib/*.so* "${venv}/lib/"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# phonetisaurus
|
||||
# https://github.com/AdolfVonKleist/Phonetisaurus
|
||||
#
|
||||
# Required to guess word pronunciations.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ ! -d "${phonetisaurus_dir}/build" ]]; then
|
||||
echo "Installing phonetisaurus (${phonetisaurus_file})"
|
||||
tar -C "${build_dir}" -xf "${phonetisaurus_file}" && \
|
||||
cd "${phonetisaurus_dir}" && \
|
||||
./configure "--prefix=${phonetisaurus_dir}/build" \
|
||||
--with-openfst-includes="${venv}/include" \
|
||||
--with-openfst-libs="${venv}/lib" && \
|
||||
make -j "${make_threads}" && \
|
||||
make install
|
||||
fi
|
||||
|
||||
# Copy build artifacts into virtual environment
|
||||
cp -R "${phonetisaurus_dir}"/build/bin/* "${venv}/bin/"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# kaldi
|
||||
# https://kaldi-asr.org
|
||||
#
|
||||
# Required for speech recognition with Kaldi-based profiles.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_kaldi}" && ! -f "${kaldi_dir}/src/online2bin/online2-wav-nnet3-latgen-faster" ]]; then
|
||||
echo "Installing kaldi (${kaldi_file})"
|
||||
|
||||
# armhf
|
||||
if [[ -f '/usr/lib/arm-linux-gnueabihf/libatlas.so' ]]; then
|
||||
# Kaldi install doesn't check here, despite in being in ldconfig
|
||||
export ATLASLIBDIR='/usr/lib/arm-linux-gnueabihf'
|
||||
fi
|
||||
|
||||
# aarch64
|
||||
if [[ -f '/usr/lib/aarch64-linux-gnu/libatlas.so' ]]; then
|
||||
# Kaldi install doesn't check here, despite in being in ldconfig
|
||||
export ATLASLIBDIR='/usr/lib/aarch64-linux-gnu'
|
||||
fi
|
||||
|
||||
tar -C "${build_dir}" -xf "${kaldi_file}" && \
|
||||
cp "${this_dir}/etc/linux_atlas_aarch64.mk" "${kaldi_dir}/src/makefiles/" && \
|
||||
cd "${kaldi_dir}/tools" && \
|
||||
make -j "${make_threads}" && \
|
||||
cd "${kaldi_dir}/src" && \
|
||||
./configure --shared --mathlib=ATLAS --use-cuda=no && \
|
||||
make depend -j "${make_threads}" && \
|
||||
make -j "${make_threads}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Python requirements
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Installing Python requirements"
|
||||
|
||||
"${python}" -m pip install requests
|
||||
|
||||
# pytorch is not available on ARM
|
||||
case "${CPU_ARCH}" in
|
||||
armv7l|arm64v8)
|
||||
no_flair="true" ;;
|
||||
esac
|
||||
|
||||
requirements_file="${temp_dir}/requirements.txt"
|
||||
temp_requirements_file="${temp_dir}/temp_requirements.txt"
|
||||
cp "${this_dir}/requirements.txt" "${requirements_file}"
|
||||
|
||||
# Exclude requirements
|
||||
if [[ -n "${no_flair}" ]]; then
|
||||
echo "Excluding flair from virtual environment"
|
||||
sed '/^flair/d' "${requirements_file}" > "${temp_requirements_file}" &&
|
||||
mv "${temp_requirements_file}" "${requirements_file}"
|
||||
|
||||
fi
|
||||
|
||||
if [[ -n "${no_precise}" ]]; then
|
||||
echo "Excluding Mycroft Precise from virtual environment"
|
||||
sed '/^precise-runner/d' "${requirements_file}" > "${temp_requirements_file}" &&
|
||||
mv "${temp_requirements_file}" "${requirements_file}"
|
||||
|
||||
fi
|
||||
|
||||
if [[ -n "${no_adapt}" ]]; then
|
||||
echo "Excluding Mycroft Adapt from virtual environment"
|
||||
sed '/^adapt-parser/d' "${requirements_file}" > "${temp_requirements_file}" &&
|
||||
mv "${temp_requirements_file}" "${requirements_file}"
|
||||
|
||||
fi
|
||||
|
||||
if [[ -n "${no_google}" ]]; then
|
||||
echo "Excluding Google Text to Speech from virtual environment"
|
||||
sed '/^google-cloud-texttospeech/d' "${requirements_file}" > "${temp_requirements_file}" &&
|
||||
mv "${temp_requirements_file}" "${requirements_file}"
|
||||
fi
|
||||
|
||||
# Install everything except openfst first
|
||||
sed '/^openfst/d' "${requirements_file}" > "${temp_requirements_file}" &&
|
||||
mv "${temp_requirements_file}" "${requirements_file}"
|
||||
|
||||
"${python}" -m pip install -r "${requirements_file}"
|
||||
|
||||
echo "Installing Python openfst wrapper"
|
||||
"${python}" -m pip install \
|
||||
--global-option=build_ext \
|
||||
--global-option="-I${venv}/include" \
|
||||
--global-option="-L${venv}/lib" \
|
||||
-r <(grep '^openfst' "${this_dir}/requirements.txt")
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Pocketsphinx for Python
|
||||
# https://github.com/cmusphinx/pocketsphinx
|
||||
#
|
||||
# Speech to text for most profiles.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
pocketsphinx_file="${download_dir}/pocketsphinx-python.tar.gz"
|
||||
echo "Installing Python pocketsphinx (${pocketsphinx_file})"
|
||||
|
||||
"${python}" -m pip install "${pocketsphinx_file}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Snowboy
|
||||
# https://snowboy.kitt.ai
|
||||
#
|
||||
# Wake word system
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
case "${CPU_ARCH}" in
|
||||
x86_64|armv7l)
|
||||
snowboy_file="${download_dir}/snowboy-1.3.0.tar.gz"
|
||||
echo "Installing snowboy (${snowboy_file})"
|
||||
"${python}" -m pip install "${snowboy_file}"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Not installing snowboy (${CPU_ARCH} not supported)"
|
||||
esac
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_web}" ]]; then
|
||||
echo "Building web interface"
|
||||
cd "${this_dir}" && yarn install && yarn build
|
||||
fi
|
||||
@@ -11,12 +11,15 @@ DEFINE_string 'venv' "${this_dir}/.venv" 'Path to create virtual environment'
|
||||
DEFINE_string 'download-dir' "${this_dir}/download" 'Directory to cache downloaded files'
|
||||
DEFINE_boolean 'system' true 'Install system dependencies'
|
||||
DEFINE_boolean 'flair' false 'Install flair'
|
||||
DEFINE_boolean 'precise' false 'Install Mycroft Precise'
|
||||
DEFINE_boolean 'precise' true 'Install Mycroft Precise'
|
||||
DEFINE_boolean 'adapt' true 'Install Mycroft Adapt'
|
||||
DEFINE_boolean 'google' false 'Install Google Text to Speech'
|
||||
DEFINE_boolean 'google' true 'Install Google Text to Speech'
|
||||
DEFINE_boolean 'kaldi' true 'Install Kaldi'
|
||||
DEFINE_boolean 'tools' true 'Install Rhasspy tools'
|
||||
DEFINE_boolean 'web' true 'Install web UI'
|
||||
DEFINE_boolean 'offline' false "Don't download anything"
|
||||
DEFINE_integer 'make-threads' 4 'Number of threads to use with make' 'j'
|
||||
DEFINE_string 'python' '' 'Path to Python executable'
|
||||
|
||||
FLAGS "$@" || exit $?
|
||||
eval set -- "${FLAGS_ARGV}"
|
||||
@@ -59,6 +62,14 @@ if [[ "${FLAGS_offline}" -eq "${FLAGS_TRUE}" ]]; then
|
||||
offline='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_tools}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_tools='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_web}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_web='true'
|
||||
fi
|
||||
|
||||
make_threads="${FLAGS_make_threads}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -75,14 +86,14 @@ trap cleanup EXIT
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
function maybe_download {
|
||||
if [[ ! -f "$2" ]]; then
|
||||
if [[ ! -z "${offline}" ]]; then
|
||||
if [[ ! -s "$2" ]]; then
|
||||
if [[ -n "${offline}" ]]; then
|
||||
echo "Need to download $1 but offline."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$2")"
|
||||
curl -sSfL -o "$2" "$1"
|
||||
curl -sSfL -o "$2" "$1" || { echo "Can't download $1"; exit 1; }
|
||||
echo "$1 => $2"
|
||||
fi
|
||||
}
|
||||
@@ -94,7 +105,7 @@ function maybe_download {
|
||||
if [[ -z "${no_system}" ]]; then
|
||||
echo "Installing system dependencies"
|
||||
sudo apt-get update
|
||||
sudo apt-get install --no-install-recommends --yes \
|
||||
sudo apt-get install --no-install-recommends \
|
||||
python3 python3-pip python3-venv python3-dev \
|
||||
python \
|
||||
build-essential autoconf autoconf-archive libtool automake bison \
|
||||
@@ -103,38 +114,45 @@ if [[ -z "${no_system}" ]]; then
|
||||
gfortran \
|
||||
sphinxbase-utils sphinxtrain pocketsphinx \
|
||||
jq checkinstall unzip xz-utils \
|
||||
curl
|
||||
curl \
|
||||
lame
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Python >= 3.6
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ ! -z "$(which python3.8)" ]]; then
|
||||
PYTHON='python3.8'
|
||||
elif [[ ! -z "$(which python3.7)" ]]; then
|
||||
PYTHON='python3.7'
|
||||
elif [[ ! -z "$(which python3.6)" ]]; then
|
||||
PYTHON='python3.6'
|
||||
if [[ -z "${FLAGS_python}" ]]; then
|
||||
# Auto-detect Python
|
||||
if [[ -n "$(command -v python3.8)" ]]; then
|
||||
PYTHON='python3.8'
|
||||
elif [[ -n "$(command -v python3.7)" ]]; then
|
||||
PYTHON='python3.7'
|
||||
elif [[ -n "$(command -v python3.6)" ]]; then
|
||||
PYTHON='python3.6'
|
||||
else
|
||||
echo "Installing Python 3.6 from source. This is going to take a LONG time."
|
||||
sudo apt-get install --no-install-recommends \
|
||||
tk-dev libncurses5-dev libncursesw5-dev \
|
||||
libreadline6-dev libdb5.3-dev libgdbm-dev \
|
||||
libsqlite3-dev libssl-dev libbz2-dev \
|
||||
libexpat1-dev liblzma-dev zlib1g-dev
|
||||
|
||||
python_file="${download_dir}/Python-3.6.8.tar.xz"
|
||||
python_url='https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tar.xz'
|
||||
maybe_download "${python_url}" "${python_file}"
|
||||
|
||||
tar -C "${temp_dir}" -xf "${python_file}"
|
||||
cd "${temp_dir}/Python-3.6.8" && \
|
||||
./configure && \
|
||||
make -j "${make_threads}" && \
|
||||
sudo make altinstall
|
||||
|
||||
PYTHON='python3.6'
|
||||
fi
|
||||
else
|
||||
echo "Installing Python 3.6 from source. This is going to take a LONG time."
|
||||
sudo apt-get install --no-install-recommends --yes \
|
||||
tk-dev libncurses5-dev libncursesw5-dev \
|
||||
libreadline6-dev libdb5.3-dev libgdbm-dev \
|
||||
libsqlite3-dev libssl-dev libbz2-dev \
|
||||
libexpat1-dev liblzma-dev zlib1g-dev
|
||||
|
||||
python_file="${download_dir}/Python-3.6.8.tar.xz"
|
||||
python_url='https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tar.xz'
|
||||
maybe_download "${python_url}" "${python_file}"
|
||||
|
||||
tar -C "${temp_dir}" -xf "${python_file}"
|
||||
cd "${temp_dir}/Python-3.6.8" && \
|
||||
./configure && \
|
||||
make -j "${make_threads}" && \
|
||||
sudo make altinstall
|
||||
|
||||
PYTHON='python3.6'
|
||||
# User-provided Python
|
||||
PYTHON="${FLAGS_python}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -152,25 +170,37 @@ case "${CPU_ARCH}" in
|
||||
FRIENDLY_ARCH=armhf
|
||||
;;
|
||||
|
||||
arm64v8)
|
||||
aarch64|arm64v8)
|
||||
FRIENDLY_ARCH=aarch64
|
||||
;;
|
||||
|
||||
*)
|
||||
FRIENDLY_ARCH="${CPU_ARCH}"
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "Downloading dependencies"
|
||||
download_args=()
|
||||
if [[ ! -z "${offline}" ]]; then
|
||||
if [[ -n "${offline}" ]]; then
|
||||
download_args+=('--offline')
|
||||
fi
|
||||
|
||||
if [[ ! -z "${no_precise}" ]]; then
|
||||
if [[ -n "${no_precise}" ]]; then
|
||||
download_args+=('--noprecise')
|
||||
fi
|
||||
|
||||
if [[ ! -z "${no_kaldi}" ]]; then
|
||||
if [[ -n "${no_kaldi}" ]]; then
|
||||
download_args+=('--nokaldi')
|
||||
fi
|
||||
|
||||
if [[ -n "${no_tools}" ]]; then
|
||||
download_args+=('--notools')
|
||||
fi
|
||||
|
||||
if [[ -n "${no_web}" ]]; then
|
||||
download_args+=('--noweb')
|
||||
fi
|
||||
|
||||
bash download-dependencies.sh "${download_args[@]}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -190,10 +220,12 @@ echo "Creating new virtual environment"
|
||||
mkdir -p "${venv}"
|
||||
"${PYTHON}" -m venv "${venv}"
|
||||
|
||||
# Extract Rhasspy tools
|
||||
rhasspy_tools_file="${download_dir}/rhasspy-tools_${FRIENDLY_ARCH}.tar.gz"
|
||||
echo "Extracting tools (${rhasspy_tools_file})"
|
||||
tar -C "${venv}" -xf "${rhasspy_tools_file}"
|
||||
if [[ -z "${no_tools}" ]]; then
|
||||
# Extract Rhasspy tools
|
||||
rhasspy_tools_file="${download_dir}/rhasspy-tools_${FRIENDLY_ARCH}.tar.gz"
|
||||
echo "Extracting tools (${rhasspy_tools_file})"
|
||||
tar -C "${venv}" -xf "${rhasspy_tools_file}"
|
||||
fi
|
||||
|
||||
# Force .venv/lib to be used
|
||||
export LD_LIBRARY_PATH="${venv}/lib:${LD_LIBRARY_PATH}"
|
||||
@@ -201,52 +233,60 @@ export LD_LIBRARY_PATH="${venv}/lib:${LD_LIBRARY_PATH}"
|
||||
# shellcheck source=/dev/null
|
||||
source "${venv}/bin/activate"
|
||||
|
||||
echo "Upgrading pip"
|
||||
python3 -m pip install --upgrade pip
|
||||
|
||||
echo "Installing Python requirements"
|
||||
"${PYTHON}" -m pip install wheel setuptools
|
||||
"${PYTHON}" -m pip install requests
|
||||
python3 -m pip install wheel setuptools
|
||||
python3 -m pip install requests
|
||||
|
||||
# pytorch is not available on ARM
|
||||
case "${CPU_ARCH}" in
|
||||
armv7l|arm64v8)
|
||||
no_flair="true" ;;
|
||||
no_flair="true" ;;
|
||||
esac
|
||||
|
||||
requirements_file="${temp_dir}/requirements.txt"
|
||||
cp "${this_dir}/requirements.txt" "${requirements_file}"
|
||||
|
||||
# Exclude requirements
|
||||
if [[ ! -z "${no_flair}" ]]; then
|
||||
if [[ -n "${no_flair}" ]]; then
|
||||
echo "Excluding flair from virtual environment"
|
||||
sed -i '/^flair/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ ! -z "${no_precise}" ]]; then
|
||||
if [[ -n "${no_precise}" ]]; then
|
||||
echo "Excluding Mycroft Precise from virtual environment"
|
||||
sed -i '/^precise-runner/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ ! -z "${no_adapt}" ]]; then
|
||||
if [[ -n "${no_adapt}" ]]; then
|
||||
echo "Excluding Mycroft Adapt from virtual environment"
|
||||
sed -i '/^adapt-parser/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ ! -z "${no_google}" ]]; then
|
||||
if [[ -n "${no_google}" ]]; then
|
||||
echo "Excluding Google Text to Speech from virtual environment"
|
||||
sed -i '/^google-cloud-texttospeech/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
"${PYTHON}" -m pip install \
|
||||
--global-option=build_ext \
|
||||
--global-option="-I${venv}/include" \
|
||||
--global-option="-L${venv}/lib" \
|
||||
-r "${requirements_file}"
|
||||
# Install everything except openfst first
|
||||
sed -i '/^openfst/d' "${requirements_file}"
|
||||
python3 -m pip install -r "${requirements_file}"
|
||||
|
||||
# Install Python openfst wrapper
|
||||
python3 -m pip install \
|
||||
--global-option=build_ext \
|
||||
--global-option="-I${venv}/include" \
|
||||
--global-option="-L${venv}/lib" \
|
||||
-r <(grep '^openfst' "${this_dir}/requirements.txt")
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Pocketsphinx for Python
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
pocketsphinx_file="${download_dir}/pocketsphinx-python.tar.gz"
|
||||
"${PYTHON}" -m pip install "${pocketsphinx_file}"
|
||||
python3 -m pip install "${pocketsphinx_file}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Snowboy
|
||||
@@ -255,7 +295,7 @@ pocketsphinx_file="${download_dir}/pocketsphinx-python.tar.gz"
|
||||
case "${CPU_ARCH}" in
|
||||
x86_64|armv7l)
|
||||
snowboy_file="${download_dir}/snowboy-1.3.0.tar.gz"
|
||||
"${PYTHON}" -m pip install "${snowboy_file}"
|
||||
python3 -m pip install "${snowboy_file}"
|
||||
;;
|
||||
|
||||
*)
|
||||
@@ -266,9 +306,9 @@ esac
|
||||
# Mycroft Precise
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_precise}" && -z "$(which precise-engine)" ]]; then
|
||||
if [[ -z "${no_precise}" && -z "$(command -v precise-engine)" ]]; then
|
||||
case "${CPU_ARCH}" in
|
||||
x86_64|armv7l)
|
||||
x86_64|armv7l|aarch64)
|
||||
echo "Installing Mycroft Precise"
|
||||
precise_file="${download_dir}/precise-engine_0.3.0_${CPU_ARCH}.tar.gz"
|
||||
precise_install="${venv}/lib"
|
||||
@@ -296,9 +336,11 @@ fi
|
||||
# Web Interface
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
rhasspy_web_file="${download_dir}/rhasspy-web-dist.tar.gz"
|
||||
echo "Extracting web interface (${rhasspy_web_file})"
|
||||
tar -C "${this_dir}" -xf "${rhasspy_web_file}"
|
||||
if [[ -z "${no_web}" ]]; then
|
||||
rhasspy_web_file="${download_dir}/rhasspy-web-dist.tar.gz"
|
||||
echo "Extracting web interface (${rhasspy_web_file})"
|
||||
tar -C "${this_dir}" -xf "${rhasspy_web_file}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
Package: rhasspy-server
|
||||
Version: 2.4.8
|
||||
Version: 2.4.10
|
||||
Section: utils
|
||||
Priority: optional
|
||||
Depends: sox,alsa-utils,espeak,libstdc++6,jq,xz-utils,unzip,curl,sphinxbase-utils,sphinxtrain,flite,libatlas-base-dev,gfortran
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
rhasspy_version="2.4.8"
|
||||
rhasspy_version="2.4.10"
|
||||
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
@@ -46,7 +46,7 @@ fi
|
||||
cd "${this_dir}"
|
||||
source "${venv}/bin/activate"
|
||||
|
||||
if [[ -z "$(which pyinstaller)" ]]; then
|
||||
if [[ -z "$(command -v pyinstaller)" ]]; then
|
||||
echo "Missing PyInstaller"
|
||||
exit 1
|
||||
fi
|
||||
@@ -131,7 +131,7 @@ cp "${this_dir}/app.py" "${share_dir}/src/"
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Copying Kaldi"
|
||||
kaldi_src="${venv}/kaldi"
|
||||
kaldi_src="${this_dir}/opt/kaldi"
|
||||
if [[ ! -d "${kaldi_src}" ]]; then
|
||||
echo "Missing Kaldi at ${kaldi_src}"
|
||||
exit 1
|
||||
@@ -145,7 +145,7 @@ rsync -av --delete "${kaldi_src}/" "${kaldi_dest}/"
|
||||
rm -f "${kaldi_dest}/egs/wsj/s5/utils/utils"
|
||||
|
||||
# Turn duplicate .so files into symbolic links
|
||||
function fix_library_links() {
|
||||
function fix_library_links {
|
||||
lib_dir="$1"
|
||||
|
||||
for lib in "${lib_dir}"/*.so; do
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env bash
|
||||
DIR="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
# Try to detemine where Rhasspy is located
|
||||
if [[ -z "${RHASSPY_APP}" ]]; then
|
||||
@@ -16,13 +15,35 @@ if [[ -f "${CONFIG_PATH}" ]]; then
|
||||
# Hass.IO configuration
|
||||
profile_name="$(jq --raw-output '.profile_name' "${CONFIG_PATH}")"
|
||||
profile_dir="$(jq --raw-output '.profile_dir' "${CONFIG_PATH}")"
|
||||
RHASSPY_ARGS="--profile \"${profile_name}\" --user-profiles \"${profile_dir}\""
|
||||
RHASSPY_ARGS=('--profile' "${profile_name}" '--user-profiles' "${profile_dir}")
|
||||
|
||||
# Copy user-defined asoundrc to root
|
||||
asoundrc="$(jq --raw-output '.asoundrc' "${CONFIG_PATH}")"
|
||||
if [[ ! -z "${asoundrc}" ]]; then
|
||||
echo "${asoundrc}" > /root/.asoundrc
|
||||
fi
|
||||
|
||||
# Add SSL settings
|
||||
ssl="$(jq --raw-output '.ssl' "${CONFIG_PATH}")"
|
||||
if [[ "${ssl}" == 'true' ]]; then
|
||||
certfile="$(jq --raw-output '.certfile' "${CONFIG_PATH}")"
|
||||
keyfile="$(jq --raw-output '.keyfile' "${CONFIG_PATH}")"
|
||||
RHASSPY_ARGS+=('--ssl' "/ssl/${certfile}" "/ssl/${keyfile}")
|
||||
fi
|
||||
fi
|
||||
|
||||
cd "${RHASSPY_APP}"
|
||||
RHASSPY_VENV="${RHASSPY_APP}/.venv"
|
||||
if [[ -d "${RHASSPY_VENV}" ]]; then
|
||||
source "${RHASSPY_VENV}/bin/activate"
|
||||
|
||||
if [[ -z "${RHASSPY_ARGS}" ]]; then
|
||||
# Force .venv/lib to be used
|
||||
export LD_LIBRARY_PATH="${RHASSPY_VENV}/lib:${LD_LIBRARY_PATH}"
|
||||
fi
|
||||
|
||||
cd "${RHASSPY_APP}" || exit 1
|
||||
|
||||
if [[ -z "${RHASSPY_ARGS[*]}" ]]; then
|
||||
python3 app.py "$@"
|
||||
else
|
||||
python3 app.py "${RHASSPY_ARGS}" "$@"
|
||||
python3 app.py "${RHASSPY_ARGS[@]}" "$@"
|
||||
fi
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
ARG BUILD_FROM
|
||||
FROM $BUILD_FROM
|
||||
LABEL maintainer="Michael Hansen <hansen.mike@gmail.com>"
|
||||
|
||||
ARG BUILD_ARCH
|
||||
ARG CPU_ARCH
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
ARG MAKE_THREADS=4
|
||||
|
||||
WORKDIR /
|
||||
|
||||
QEMU
|
||||
|
||||
SYSTEM_DEPENDENCIES
|
||||
|
||||
RHASSPY_TOOLS
|
||||
|
||||
PYTHON_REQUIREMENTS
|
||||
|
||||
PYTHON_POCKETSPHINX
|
||||
|
||||
SNOWBOY
|
||||
|
||||
TTS
|
||||
|
||||
KALDI
|
||||
|
||||
RUN ldconfig
|
||||
|
||||
# Copy bw and mllr_solve to /usr/bin
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
GSTREAMER
|
||||
|
||||
PULSEAUDIO
|
||||
|
||||
# Copy script to run
|
||||
COPY docker/run.sh /run.sh
|
||||
RUN chmod +x /run.sh
|
||||
|
||||
RHASSPY_USER
|
||||
|
||||
PROFILES
|
||||
|
||||
RHASSPY_CODE
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -1,127 +0,0 @@
|
||||
COPY profiles/zh/profile.json \
|
||||
profiles/zh/custom_words.txt \
|
||||
profiles/zh/espeak_phonemes.txt \
|
||||
profiles/zh/phoneme_examples.txt \
|
||||
profiles/zh/frequent_words.txt \
|
||||
profiles/zh/sentences.ini \
|
||||
profiles/zh/stop_words.txt ${RHASSPY_APP}/profiles/zh/
|
||||
|
||||
COPY profiles/hi/ \
|
||||
profiles/hi/profile.json \
|
||||
profiles/hi/custom_words.txt \
|
||||
profiles/hi/espeak_phonemes.txt \
|
||||
profiles/hi/phoneme_examples.txt \
|
||||
profiles/hi/frequent_words.txt \
|
||||
profiles/hi/sentences.ini \
|
||||
profiles/hi/stop_words.txt ${RHASSPY_APP}/profiles/hi/
|
||||
|
||||
COPY profiles/el/profile.json \
|
||||
profiles/el/custom_words.txt \
|
||||
profiles/el/espeak_phonemes.txt \
|
||||
profiles/el/phoneme_examples.txt \
|
||||
profiles/el/frequent_words.txt \
|
||||
profiles/el/sentences.ini \
|
||||
profiles/el/stop_words.txt ${RHASSPY_APP}/profiles/el/
|
||||
|
||||
COPY profiles/de/profile.json \
|
||||
profiles/de/custom_words.txt \
|
||||
profiles/de/espeak_phonemes.txt \
|
||||
profiles/de/phoneme_examples.txt \
|
||||
profiles/de/frequent_words.txt \
|
||||
profiles/de/sentences.ini \
|
||||
profiles/de/stop_words.txt ${RHASSPY_APP}/profiles/de/
|
||||
|
||||
COPY profiles/de/kaldi/custom_words.txt \
|
||||
profiles/de/kaldi/espeak_phonemes.txt \
|
||||
profiles/de/kaldi/phoneme_examples.txt \
|
||||
${RHASSPY_APP}/profiles/de/kaldi/
|
||||
|
||||
COPY profiles/it/profile.json \
|
||||
profiles/it/custom_words.txt \
|
||||
profiles/it/espeak_phonemes.txt \
|
||||
profiles/it/phoneme_examples.txt \
|
||||
profiles/it/frequent_words.txt \
|
||||
profiles/it/sentences.ini \
|
||||
profiles/it/stop_words.txt ${RHASSPY_APP}/profiles/it/
|
||||
|
||||
COPY profiles/es/profile.json \
|
||||
profiles/es/custom_words.txt \
|
||||
profiles/es/espeak_phonemes.txt \
|
||||
profiles/es/phoneme_examples.txt \
|
||||
profiles/es/frequent_words.txt \
|
||||
profiles/es/sentences.ini \
|
||||
profiles/es/stop_words.txt ${RHASSPY_APP}/profiles/es/
|
||||
|
||||
COPY profiles/fr/profile.json \
|
||||
profiles/fr/custom_words.txt \
|
||||
profiles/fr/espeak_phonemes.txt \
|
||||
profiles/fr/phoneme_examples.txt \
|
||||
profiles/fr/frequent_words.txt \
|
||||
profiles/fr/sentences.ini \
|
||||
profiles/fr/stop_words.txt ${RHASSPY_APP}/profiles/fr/
|
||||
|
||||
COPY profiles/ru/profile.json \
|
||||
profiles/ru/custom_words.txt \
|
||||
profiles/ru/espeak_phonemes.txt \
|
||||
profiles/ru/phoneme_examples.txt \
|
||||
profiles/ru/frequent_words.txt \
|
||||
profiles/ru/sentences.ini \
|
||||
profiles/ru/stop_words.txt ${RHASSPY_APP}/profiles/ru/
|
||||
|
||||
COPY profiles/nl/profile.json \
|
||||
profiles/nl/custom_words.txt \
|
||||
profiles/nl/espeak_phonemes.txt \
|
||||
profiles/nl/phoneme_examples.txt \
|
||||
profiles/nl/frequent_words.txt \
|
||||
profiles/nl/sentences.ini \
|
||||
profiles/nl/stop_words.txt ${RHASSPY_APP}/profiles/nl/
|
||||
|
||||
COPY profiles/nl/kaldi/custom_words.txt \
|
||||
profiles/nl/kaldi/espeak_phonemes.txt \
|
||||
profiles/nl/kaldi/phoneme_examples.txt \
|
||||
${RHASSPY_APP}/profiles/nl/kaldi/
|
||||
|
||||
COPY profiles/vi/profile.json \
|
||||
profiles/vi/custom_words.txt \
|
||||
profiles/vi/espeak_phonemes.txt \
|
||||
profiles/vi/phoneme_examples.txt \
|
||||
profiles/vi/frequent_words.txt \
|
||||
profiles/vi/sentences.ini \
|
||||
profiles/vi/stop_words.txt ${RHASSPY_APP}/profiles/vi/
|
||||
|
||||
COPY profiles/pt/profile.json \
|
||||
profiles/pt/custom_words.txt \
|
||||
profiles/pt/espeak_phonemes.txt \
|
||||
profiles/pt/phoneme_examples.txt \
|
||||
profiles/pt/frequent_words.txt \
|
||||
profiles/pt/sentences.ini \
|
||||
profiles/pt/stop_words.txt ${RHASSPY_APP}/profiles/pt/
|
||||
|
||||
COPY profiles/sv/profile.json \
|
||||
profiles/sv/custom_words.txt \
|
||||
profiles/sv/espeak_phonemes.txt \
|
||||
profiles/sv/phoneme_examples.txt \
|
||||
profiles/sv/frequent_words.txt \
|
||||
profiles/sv/sentences.ini \
|
||||
profiles/sv/stop_words.txt ${RHASSPY_APP}/profiles/sv/
|
||||
|
||||
COPY profiles/ca/profile.json \
|
||||
profiles/ca/custom_words.txt \
|
||||
profiles/ca/espeak_phonemes.txt \
|
||||
profiles/ca/phoneme_examples.txt \
|
||||
profiles/ca/frequent_words.txt \
|
||||
profiles/ca/sentences.ini \
|
||||
profiles/ca/stop_words.txt ${RHASSPY_APP}/profiles/ca/
|
||||
|
||||
COPY profiles/en/profile.json \
|
||||
profiles/en/custom_words.txt \
|
||||
profiles/en/espeak_phonemes.txt \
|
||||
profiles/en/phoneme_examples.txt \
|
||||
profiles/en/frequent_words.txt \
|
||||
profiles/en/sentences.ini \
|
||||
profiles/en/stop_words.txt ${RHASSPY_APP}/profiles/en/
|
||||
|
||||
COPY profiles/en/kaldi/custom_words.txt \
|
||||
profiles/en/kaldi/espeak_phonemes.txt \
|
||||
profiles/en/kaldi/phoneme_examples.txt \
|
||||
${RHASSPY_APP}/profiles/en/kaldi/
|
||||
@@ -1 +0,0 @@
|
||||
ENV RHASSPY_APP /usr/share/rhasspy
|
||||
@@ -1,207 +0,0 @@
|
||||
ARG BUILD_FROM
|
||||
FROM $BUILD_FROM
|
||||
LABEL maintainer="Michael Hansen <hansen.mike@gmail.com>"
|
||||
|
||||
ARG BUILD_ARCH
|
||||
ARG CPU_ARCH
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
ARG MAKE_THREADS=4
|
||||
|
||||
WORKDIR /
|
||||
|
||||
COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/openfst-1.6.2-1_${BUILD_ARCH}.deb /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
apt-get install --no-install-recommends --yes libfst-dev libfst-tools; \
|
||||
else \
|
||||
dpkg -i /openfst-1.6.2-1_${BUILD_ARCH}.deb; \
|
||||
rm /openfst*.deb; \
|
||||
fi
|
||||
|
||||
RHASSPY_TOOLS
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
mv /requirements-noflair.txt /requirements.txt; \
|
||||
fi
|
||||
RUN python3 -m pip install --no-cache-dir -r /requirements.txt
|
||||
|
||||
# Install Pocketsphinx Python module with no sound
|
||||
COPY download/pocketsphinx-python.tar.gz /
|
||||
RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
rm -rf /pocketsphinx-python*
|
||||
|
||||
# Install snowboy
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
tar -C /opt -xzf /kaldi.tar.gz && \
|
||||
rm /kaldi.tar.gz
|
||||
|
||||
RUN ldconfig
|
||||
|
||||
# Copy bw and mllr_solve to /usr/bin
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
ENV RHASSPY_APP /usr/share/rhasspy
|
||||
|
||||
# Copy script to run
|
||||
COPY docker/run.sh /run.sh
|
||||
RUN chmod +x /run.sh
|
||||
|
||||
|
||||
|
||||
COPY profiles/zh/profile.json \
|
||||
profiles/zh/custom_words.txt \
|
||||
profiles/zh/espeak_phonemes.txt \
|
||||
profiles/zh/phoneme_examples.txt \
|
||||
profiles/zh/frequent_words.txt \
|
||||
profiles/zh/sentences.ini \
|
||||
profiles/zh/stop_words.txt ${RHASSPY_APP}/profiles/zh/
|
||||
|
||||
COPY profiles/hi/ \
|
||||
profiles/hi/profile.json \
|
||||
profiles/hi/custom_words.txt \
|
||||
profiles/hi/espeak_phonemes.txt \
|
||||
profiles/hi/phoneme_examples.txt \
|
||||
profiles/hi/frequent_words.txt \
|
||||
profiles/hi/sentences.ini \
|
||||
profiles/hi/stop_words.txt ${RHASSPY_APP}/profiles/hi/
|
||||
|
||||
COPY profiles/el/profile.json \
|
||||
profiles/el/custom_words.txt \
|
||||
profiles/el/espeak_phonemes.txt \
|
||||
profiles/el/phoneme_examples.txt \
|
||||
profiles/el/frequent_words.txt \
|
||||
profiles/el/sentences.ini \
|
||||
profiles/el/stop_words.txt ${RHASSPY_APP}/profiles/el/
|
||||
|
||||
COPY profiles/de/profile.json \
|
||||
profiles/de/custom_words.txt \
|
||||
profiles/de/espeak_phonemes.txt \
|
||||
profiles/de/phoneme_examples.txt \
|
||||
profiles/de/frequent_words.txt \
|
||||
profiles/de/sentences.ini \
|
||||
profiles/de/stop_words.txt ${RHASSPY_APP}/profiles/de/
|
||||
|
||||
COPY profiles/it/profile.json \
|
||||
profiles/it/custom_words.txt \
|
||||
profiles/it/espeak_phonemes.txt \
|
||||
profiles/it/phoneme_examples.txt \
|
||||
profiles/it/frequent_words.txt \
|
||||
profiles/it/sentences.ini \
|
||||
profiles/it/stop_words.txt ${RHASSPY_APP}/profiles/it/
|
||||
|
||||
COPY profiles/es/profile.json \
|
||||
profiles/es/custom_words.txt \
|
||||
profiles/es/espeak_phonemes.txt \
|
||||
profiles/es/phoneme_examples.txt \
|
||||
profiles/es/frequent_words.txt \
|
||||
profiles/es/sentences.ini \
|
||||
profiles/es/stop_words.txt ${RHASSPY_APP}/profiles/es/
|
||||
|
||||
COPY profiles/fr/profile.json \
|
||||
profiles/fr/custom_words.txt \
|
||||
profiles/fr/espeak_phonemes.txt \
|
||||
profiles/fr/phoneme_examples.txt \
|
||||
profiles/fr/frequent_words.txt \
|
||||
profiles/fr/sentences.ini \
|
||||
profiles/fr/stop_words.txt ${RHASSPY_APP}/profiles/fr/
|
||||
|
||||
COPY profiles/ru/profile.json \
|
||||
profiles/ru/custom_words.txt \
|
||||
profiles/ru/espeak_phonemes.txt \
|
||||
profiles/ru/phoneme_examples.txt \
|
||||
profiles/ru/frequent_words.txt \
|
||||
profiles/ru/sentences.ini \
|
||||
profiles/ru/stop_words.txt ${RHASSPY_APP}/profiles/ru/
|
||||
|
||||
COPY profiles/nl/profile.json \
|
||||
profiles/nl/custom_words.txt \
|
||||
profiles/nl/espeak_phonemes.txt \
|
||||
profiles/nl/phoneme_examples.txt \
|
||||
profiles/nl/frequent_words.txt \
|
||||
profiles/nl/sentences.ini \
|
||||
profiles/nl/stop_words.txt ${RHASSPY_APP}/profiles/nl/
|
||||
|
||||
COPY profiles/vi/profile.json \
|
||||
profiles/vi/custom_words.txt \
|
||||
profiles/vi/espeak_phonemes.txt \
|
||||
profiles/vi/phoneme_examples.txt \
|
||||
profiles/vi/frequent_words.txt \
|
||||
profiles/vi/sentences.ini \
|
||||
profiles/vi/stop_words.txt ${RHASSPY_APP}/profiles/vi/
|
||||
|
||||
COPY profiles/pt/profile.json \
|
||||
profiles/pt/custom_words.txt \
|
||||
profiles/pt/espeak_phonemes.txt \
|
||||
profiles/pt/phoneme_examples.txt \
|
||||
profiles/pt/frequent_words.txt \
|
||||
profiles/pt/sentences.ini \
|
||||
profiles/pt/stop_words.txt ${RHASSPY_APP}/profiles/pt/
|
||||
|
||||
COPY profiles/sv/profile.json \
|
||||
profiles/sv/custom_words.txt \
|
||||
profiles/sv/espeak_phonemes.txt \
|
||||
profiles/sv/phoneme_examples.txt \
|
||||
profiles/sv/frequent_words.txt \
|
||||
profiles/sv/sentences.ini \
|
||||
profiles/sv/stop_words.txt ${RHASSPY_APP}/profiles/sv/
|
||||
|
||||
COPY profiles/ca/profile.json \
|
||||
profiles/ca/custom_words.txt \
|
||||
profiles/ca/espeak_phonemes.txt \
|
||||
profiles/ca/phoneme_examples.txt \
|
||||
profiles/ca/frequent_words.txt \
|
||||
profiles/ca/sentences.ini \
|
||||
profiles/ca/stop_words.txt ${RHASSPY_APP}/profiles/ca/
|
||||
|
||||
COPY profiles/en/profile.json \
|
||||
profiles/en/custom_words.txt \
|
||||
profiles/en/espeak_phonemes.txt \
|
||||
profiles/en/phoneme_examples.txt \
|
||||
profiles/en/frequent_words.txt \
|
||||
profiles/en/sentences.ini \
|
||||
profiles/en/stop_words.txt ${RHASSPY_APP}/profiles/en/
|
||||
|
||||
COPY profiles/defaults.json ${RHASSPY_APP}/profiles/
|
||||
COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -1,96 +0,0 @@
|
||||
ARG BUILD_FROM
|
||||
FROM $BUILD_FROM
|
||||
LABEL maintainer="Michael Hansen <hansen.mike@gmail.com>"
|
||||
|
||||
ARG BUILD_ARCH
|
||||
ARG CPU_ARCH
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
ARG MAKE_THREADS=4
|
||||
|
||||
WORKDIR /
|
||||
|
||||
COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/openfst-1.6.2-1_${BUILD_ARCH}.deb /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
apt-get install --no-install-recommends --yes libfst-dev libfst-tools; \
|
||||
else \
|
||||
dpkg -i /openfst-1.6.2-1_${BUILD_ARCH}.deb; \
|
||||
rm /openfst*.deb; \
|
||||
fi
|
||||
|
||||
RHASSPY_TOOLS
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
mv /requirements-noflair.txt /requirements.txt; \
|
||||
fi
|
||||
RUN python3 -m pip install --no-cache-dir -r /requirements.txt
|
||||
|
||||
# Install Pocketsphinx Python module with no sound
|
||||
COPY download/pocketsphinx-python.tar.gz /
|
||||
RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
rm -rf /pocketsphinx-python*
|
||||
|
||||
# Install snowboy
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
tar -C /opt -xzf /kaldi.tar.gz && \
|
||||
rm /kaldi.tar.gz
|
||||
|
||||
RUN ldconfig
|
||||
|
||||
# Copy bw and mllr_solve to /usr/bin
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
ENV RHASSPY_APP /usr/share/rhasspy
|
||||
|
||||
# Copy script to run
|
||||
COPY docker/run.sh /run.sh
|
||||
RUN chmod +x /run.sh
|
||||
|
||||
|
||||
|
||||
COPY profiles/en/ ${RHASSPY_APP}/profiles/en/
|
||||
|
||||
COPY profiles/defaults.json ${RHASSPY_APP}/profiles/
|
||||
COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -1,113 +0,0 @@
|
||||
ARG BUILD_FROM
|
||||
FROM $BUILD_FROM
|
||||
LABEL maintainer="Michael Hansen <hansen.mike@gmail.com>"
|
||||
|
||||
ARG BUILD_ARCH
|
||||
ARG CPU_ARCH
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
ARG MAKE_THREADS=4
|
||||
|
||||
WORKDIR /
|
||||
|
||||
COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/openfst-1.6.2-1_${BUILD_ARCH}.deb /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
apt-get install --no-install-recommends --yes libfst-dev libfst-tools; \
|
||||
else \
|
||||
dpkg -i /openfst-1.6.2-1_${BUILD_ARCH}.deb; \
|
||||
rm /openfst*.deb; \
|
||||
fi
|
||||
|
||||
RHASSPY_TOOLS
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
mv /requirements-noflair.txt /requirements.txt; \
|
||||
fi
|
||||
RUN python3 -m pip install --no-cache-dir -r /requirements.txt
|
||||
|
||||
# Install Pocketsphinx Python module with no sound
|
||||
COPY download/pocketsphinx-python.tar.gz /
|
||||
RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
rm -rf /pocketsphinx-python*
|
||||
|
||||
# Install snowboy
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
tar -C /opt -xzf /kaldi.tar.gz && \
|
||||
rm /kaldi.tar.gz
|
||||
|
||||
RUN ldconfig
|
||||
|
||||
# Copy bw and mllr_solve to /usr/bin
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
RUN apt-get install -y pulseaudio
|
||||
COPY etc/pulseaudio.client.conf /etc/pulse/client.conf
|
||||
|
||||
# Create new user
|
||||
ENV UNAME=rhasspy
|
||||
RUN export UNAME=$UNAME UID=1000 GID=1000 && \
|
||||
mkdir -p "/home/${UNAME}" && \
|
||||
echo "${UNAME}:x:${UID}:${GID}:${UNAME} User,,,:/home/${UNAME}:/bin/bash" >> /etc/passwd && \
|
||||
echo "${UNAME}:x:${UID}:" >> /etc/group && \
|
||||
mkdir -p /etc/sudoers.d && \
|
||||
echo "${UNAME} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/${UNAME} && \
|
||||
chmod 0440 /etc/sudoers.d/${UNAME} && \
|
||||
chown ${UID}:${GID} -R /home/${UNAME} && \
|
||||
gpasswd -a ${UNAME} audio
|
||||
|
||||
ENV RHASSPY_APP /home/${UNAME}
|
||||
|
||||
# Copy script to run
|
||||
COPY docker/run.sh /run.sh
|
||||
RUN chmod +x /run.sh
|
||||
|
||||
# Switch to new user
|
||||
USER $UNAME
|
||||
ENV HOME /home/${UNAME}
|
||||
|
||||
COPY profiles/en/ ${RHASSPY_APP}/profiles/en/
|
||||
|
||||
COPY profiles/defaults.json ${RHASSPY_APP}/profiles/
|
||||
COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -1,216 +0,0 @@
|
||||
ARG BUILD_FROM
|
||||
FROM $BUILD_FROM
|
||||
LABEL maintainer="Michael Hansen <hansen.mike@gmail.com>"
|
||||
|
||||
ARG BUILD_ARCH
|
||||
ARG CPU_ARCH
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
ARG MAKE_THREADS=4
|
||||
|
||||
WORKDIR /
|
||||
|
||||
COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/rhasspy-tools_${BUILD_ARCH}.tar.gz /
|
||||
RUN tar -C /usr -xvf /rhasspy-tools_${BUILD_ARCH}.tar.gz && \
|
||||
rm -f /rhasspy-tools_${BUILD_ARCH}.tar.gz
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
mv /requirements-noflair.txt /requirements.txt; \
|
||||
fi
|
||||
RUN python3 -m pip install --no-cache-dir -r /requirements.txt
|
||||
|
||||
# Install Pocketsphinx Python module with no sound
|
||||
COPY download/pocketsphinx-python.tar.gz /
|
||||
RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
rm -rf /pocketsphinx-python*
|
||||
|
||||
# Install snowboy
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
tar -C /opt -xzf /kaldi.tar.gz && \
|
||||
rm /kaldi.tar.gz
|
||||
|
||||
RUN ldconfig
|
||||
|
||||
# Copy bw and mllr_solve to /usr/bin
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
ENV RHASSPY_APP /usr/share/rhasspy
|
||||
|
||||
# Copy script to run
|
||||
COPY docker/run.sh /run.sh
|
||||
RUN chmod +x /run.sh
|
||||
|
||||
|
||||
|
||||
COPY profiles/zh/profile.json \
|
||||
profiles/zh/custom_words.txt \
|
||||
profiles/zh/espeak_phonemes.txt \
|
||||
profiles/zh/phoneme_examples.txt \
|
||||
profiles/zh/frequent_words.txt \
|
||||
profiles/zh/sentences.ini \
|
||||
profiles/zh/stop_words.txt ${RHASSPY_APP}/profiles/zh/
|
||||
|
||||
COPY profiles/hi/ \
|
||||
profiles/hi/profile.json \
|
||||
profiles/hi/custom_words.txt \
|
||||
profiles/hi/espeak_phonemes.txt \
|
||||
profiles/hi/phoneme_examples.txt \
|
||||
profiles/hi/frequent_words.txt \
|
||||
profiles/hi/sentences.ini \
|
||||
profiles/hi/stop_words.txt ${RHASSPY_APP}/profiles/hi/
|
||||
|
||||
COPY profiles/el/profile.json \
|
||||
profiles/el/custom_words.txt \
|
||||
profiles/el/espeak_phonemes.txt \
|
||||
profiles/el/phoneme_examples.txt \
|
||||
profiles/el/frequent_words.txt \
|
||||
profiles/el/sentences.ini \
|
||||
profiles/el/stop_words.txt ${RHASSPY_APP}/profiles/el/
|
||||
|
||||
COPY profiles/de/profile.json \
|
||||
profiles/de/custom_words.txt \
|
||||
profiles/de/espeak_phonemes.txt \
|
||||
profiles/de/phoneme_examples.txt \
|
||||
profiles/de/frequent_words.txt \
|
||||
profiles/de/sentences.ini \
|
||||
profiles/de/stop_words.txt ${RHASSPY_APP}/profiles/de/
|
||||
|
||||
COPY profiles/de/kaldi/custom_words.txt \
|
||||
profiles/de/kaldi/espeak_phonemes.txt \
|
||||
profiles/de/kaldi/phoneme_examples.txt \
|
||||
${RHASSPY_APP}/profiles/de/kaldi/
|
||||
|
||||
COPY profiles/it/profile.json \
|
||||
profiles/it/custom_words.txt \
|
||||
profiles/it/espeak_phonemes.txt \
|
||||
profiles/it/phoneme_examples.txt \
|
||||
profiles/it/frequent_words.txt \
|
||||
profiles/it/sentences.ini \
|
||||
profiles/it/stop_words.txt ${RHASSPY_APP}/profiles/it/
|
||||
|
||||
COPY profiles/es/profile.json \
|
||||
profiles/es/custom_words.txt \
|
||||
profiles/es/espeak_phonemes.txt \
|
||||
profiles/es/phoneme_examples.txt \
|
||||
profiles/es/frequent_words.txt \
|
||||
profiles/es/sentences.ini \
|
||||
profiles/es/stop_words.txt ${RHASSPY_APP}/profiles/es/
|
||||
|
||||
COPY profiles/fr/profile.json \
|
||||
profiles/fr/custom_words.txt \
|
||||
profiles/fr/espeak_phonemes.txt \
|
||||
profiles/fr/phoneme_examples.txt \
|
||||
profiles/fr/frequent_words.txt \
|
||||
profiles/fr/sentences.ini \
|
||||
profiles/fr/stop_words.txt ${RHASSPY_APP}/profiles/fr/
|
||||
|
||||
COPY profiles/ru/profile.json \
|
||||
profiles/ru/custom_words.txt \
|
||||
profiles/ru/espeak_phonemes.txt \
|
||||
profiles/ru/phoneme_examples.txt \
|
||||
profiles/ru/frequent_words.txt \
|
||||
profiles/ru/sentences.ini \
|
||||
profiles/ru/stop_words.txt ${RHASSPY_APP}/profiles/ru/
|
||||
|
||||
COPY profiles/nl/profile.json \
|
||||
profiles/nl/custom_words.txt \
|
||||
profiles/nl/espeak_phonemes.txt \
|
||||
profiles/nl/phoneme_examples.txt \
|
||||
profiles/nl/frequent_words.txt \
|
||||
profiles/nl/sentences.ini \
|
||||
profiles/nl/stop_words.txt ${RHASSPY_APP}/profiles/nl/
|
||||
|
||||
COPY profiles/nl/kaldi/custom_words.txt \
|
||||
profiles/nl/kaldi/espeak_phonemes.txt \
|
||||
profiles/nl/kaldi/phoneme_examples.txt \
|
||||
${RHASSPY_APP}/profiles/nl/kaldi/
|
||||
|
||||
COPY profiles/vi/profile.json \
|
||||
profiles/vi/custom_words.txt \
|
||||
profiles/vi/espeak_phonemes.txt \
|
||||
profiles/vi/phoneme_examples.txt \
|
||||
profiles/vi/frequent_words.txt \
|
||||
profiles/vi/sentences.ini \
|
||||
profiles/vi/stop_words.txt ${RHASSPY_APP}/profiles/vi/
|
||||
|
||||
COPY profiles/pt/profile.json \
|
||||
profiles/pt/custom_words.txt \
|
||||
profiles/pt/espeak_phonemes.txt \
|
||||
profiles/pt/phoneme_examples.txt \
|
||||
profiles/pt/frequent_words.txt \
|
||||
profiles/pt/sentences.ini \
|
||||
profiles/pt/stop_words.txt ${RHASSPY_APP}/profiles/pt/
|
||||
|
||||
COPY profiles/sv/profile.json \
|
||||
profiles/sv/custom_words.txt \
|
||||
profiles/sv/espeak_phonemes.txt \
|
||||
profiles/sv/phoneme_examples.txt \
|
||||
profiles/sv/frequent_words.txt \
|
||||
profiles/sv/sentences.ini \
|
||||
profiles/sv/stop_words.txt ${RHASSPY_APP}/profiles/sv/
|
||||
|
||||
COPY profiles/ca/profile.json \
|
||||
profiles/ca/custom_words.txt \
|
||||
profiles/ca/espeak_phonemes.txt \
|
||||
profiles/ca/phoneme_examples.txt \
|
||||
profiles/ca/frequent_words.txt \
|
||||
profiles/ca/sentences.ini \
|
||||
profiles/ca/stop_words.txt ${RHASSPY_APP}/profiles/ca/
|
||||
|
||||
COPY profiles/en/profile.json \
|
||||
profiles/en/custom_words.txt \
|
||||
profiles/en/espeak_phonemes.txt \
|
||||
profiles/en/phoneme_examples.txt \
|
||||
profiles/en/frequent_words.txt \
|
||||
profiles/en/sentences.ini \
|
||||
profiles/en/stop_words.txt ${RHASSPY_APP}/profiles/en/
|
||||
|
||||
COPY profiles/en/kaldi/custom_words.txt \
|
||||
profiles/en/kaldi/espeak_phonemes.txt \
|
||||
profiles/en/kaldi/phoneme_examples.txt \
|
||||
${RHASSPY_APP}/profiles/en/kaldi/
|
||||
|
||||
COPY profiles/defaults.json ${RHASSPY_APP}/profiles/
|
||||
COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -1,91 +0,0 @@
|
||||
ARG BUILD_FROM
|
||||
FROM $BUILD_FROM
|
||||
LABEL maintainer="Michael Hansen <hansen.mike@gmail.com>"
|
||||
|
||||
ARG BUILD_ARCH
|
||||
ARG CPU_ARCH
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
ARG MAKE_THREADS=4
|
||||
|
||||
WORKDIR /
|
||||
|
||||
COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libfst-dev libfst-tools \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/rhasspy-tools_${BUILD_ARCH}.tar.gz /
|
||||
RUN tar -C /usr -xvf /rhasspy-tools_${BUILD_ARCH}.tar.gz && \
|
||||
rm -f /rhasspy-tools_${BUILD_ARCH}.tar.gz
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
mv /requirements-noflair.txt /requirements.txt; \
|
||||
fi
|
||||
RUN python3 -m pip install --no-cache-dir -r /requirements.txt
|
||||
|
||||
# Install Pocketsphinx Python module with no sound
|
||||
COPY download/pocketsphinx-python.tar.gz /
|
||||
RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
rm -rf /pocketsphinx-python*
|
||||
|
||||
# Install snowboy
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
tar -C /opt -xzf /kaldi.tar.gz && \
|
||||
rm /kaldi.tar.gz
|
||||
|
||||
RUN ldconfig
|
||||
|
||||
# Copy bw and mllr_solve to /usr/bin
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
ENV RHASSPY_APP /usr/share/rhasspy
|
||||
|
||||
# Copy script to run
|
||||
COPY docker/run.sh /run.sh
|
||||
RUN chmod +x /run.sh
|
||||
|
||||
|
||||
|
||||
COPY profiles/en/ ${RHASSPY_APP}/profiles/en/
|
||||
|
||||
COPY profiles/defaults.json ${RHASSPY_APP}/profiles/
|
||||
COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -1,108 +0,0 @@
|
||||
ARG BUILD_FROM
|
||||
FROM $BUILD_FROM
|
||||
LABEL maintainer="Michael Hansen <hansen.mike@gmail.com>"
|
||||
|
||||
ARG BUILD_ARCH
|
||||
ARG CPU_ARCH
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
ARG MAKE_THREADS=4
|
||||
|
||||
WORKDIR /
|
||||
|
||||
COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libfst-dev libfst-tools \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/rhasspy-tools_${BUILD_ARCH}.tar.gz /
|
||||
RUN tar -C /usr -xvf /rhasspy-tools_${BUILD_ARCH}.tar.gz && \
|
||||
rm -f /rhasspy-tools_${BUILD_ARCH}.tar.gz
|
||||
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
mv /requirements-noflair.txt /requirements.txt; \
|
||||
fi
|
||||
RUN python3 -m pip install --no-cache-dir -r /requirements.txt
|
||||
|
||||
# Install Pocketsphinx Python module with no sound
|
||||
COPY download/pocketsphinx-python.tar.gz /
|
||||
RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
rm -rf /pocketsphinx-python*
|
||||
|
||||
# Install snowboy
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
tar -C /opt -xzf /kaldi.tar.gz && \
|
||||
rm /kaldi.tar.gz
|
||||
|
||||
RUN ldconfig
|
||||
|
||||
# Copy bw and mllr_solve to /usr/bin
|
||||
RUN find / -name bw -exec cp '{}' /usr/bin/ \;
|
||||
RUN find / -name mllr_solve -exec cp '{}' /usr/bin/ \;
|
||||
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
|
||||
RUN apt-get install -y pulseaudio
|
||||
COPY etc/pulseaudio.client.conf /etc/pulse/client.conf
|
||||
|
||||
# Create new user
|
||||
ENV UNAME=rhasspy
|
||||
RUN export UNAME=$UNAME UID=1000 GID=1000 && \
|
||||
mkdir -p "/home/${UNAME}" && \
|
||||
echo "${UNAME}:x:${UID}:${GID}:${UNAME} User,,,:/home/${UNAME}:/bin/bash" >> /etc/passwd && \
|
||||
echo "${UNAME}:x:${UID}:" >> /etc/group && \
|
||||
mkdir -p /etc/sudoers.d && \
|
||||
echo "${UNAME} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/${UNAME} && \
|
||||
chmod 0440 /etc/sudoers.d/${UNAME} && \
|
||||
chown ${UID}:${GID} -R /home/${UNAME} && \
|
||||
gpasswd -a ${UNAME} audio
|
||||
|
||||
ENV RHASSPY_APP /home/${UNAME}
|
||||
|
||||
# Copy script to run
|
||||
COPY docker/run.sh /run.sh
|
||||
RUN chmod +x /run.sh
|
||||
|
||||
# Switch to new user
|
||||
USER $UNAME
|
||||
ENV HOME /home/${UNAME}
|
||||
|
||||
COPY profiles/en/ ${RHASSPY_APP}/profiles/en/
|
||||
|
||||
COPY profiles/defaults.json ${RHASSPY_APP}/profiles/
|
||||
COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
@@ -1 +0,0 @@
|
||||
COPY profiles/en/ ${RHASSPY_APP}/profiles/en/
|
||||
@@ -1,7 +0,0 @@
|
||||
COPY download/phonetisaurus-2019.tar.gz /phonetisaurus.tar.gz
|
||||
RUN cd / && tar -xf phonetisaurus.tar.gz
|
||||
RUN cd /phonetisaurus && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
rm -rf /phonetisaurus*
|
||||
@@ -1,18 +0,0 @@
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
|
||||
COPY download/openfst-1.6.2-1_${BUILD_ARCH}.deb /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
apt-get install --no-install-recommends --yes libfst-dev libfst-tools; \
|
||||
else \
|
||||
dpkg -i /openfst-1.6.2-1_${BUILD_ARCH}.deb; \
|
||||
rm /openfst*.deb; \
|
||||
fi
|
||||
@@ -1,67 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Directory of *this* script
|
||||
DIR="$( cd "$( dirname "$0" )" && pwd )"
|
||||
template="$DIR/Dockerfile.template"
|
||||
out="$DIR/dockerfiles"
|
||||
mkdir -p "$out"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Uppercases an input string
|
||||
function upper {
|
||||
tr '[[:lower:]]' '[[:upper:]]'
|
||||
}
|
||||
|
||||
# Creates m4 "define" statements from text files in one or more directories.
|
||||
# The contents of dir/file.txt will be the value of variable FILE.
|
||||
function set_variables {
|
||||
echo "divert(-1)"
|
||||
while [[ ! -z "$1" ]]; do
|
||||
if [[ -d "$1" ]]; then
|
||||
for var_file in $(find "$1" -type f -name "*.txt"); do
|
||||
var_name=$(basename "$var_file" .txt | upper)
|
||||
echo "define(\`$var_name', \`$(cat $var_file)')"
|
||||
done
|
||||
elif [[ -f "$1" ]]; then
|
||||
var_file="$1"
|
||||
var_name=$(basename "$var_file" .txt | upper)
|
||||
echo "define(\`$var_name', \`$(cat $var_file)')"
|
||||
fi
|
||||
|
||||
shift
|
||||
done
|
||||
echo "divert(0)dnl"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
#------------
|
||||
# From source
|
||||
#------------
|
||||
# set_variables "$DIR/shared/" "$DIR/from-source/" \
|
||||
# "$DIR/alsa/" "$DIR/en_profile/" \
|
||||
# | cat - "$template" | m4 > "$out/Dockerfile.from-source.alsa.en"
|
||||
|
||||
# set_variables "$DIR/shared/" "$DIR/from-source/" \
|
||||
# "$DIR/pulseaudio/" "$DIR/en_profile/" \
|
||||
# | cat - "$template" | m4 > "$out/Dockerfile.from-source.pulseaudio.en"
|
||||
|
||||
# set_variables "$DIR/shared/" "$DIR/from-source/" \
|
||||
# "$DIR/alsa/" "$DIR/all_profiles/" \
|
||||
# | cat - "$template" | m4 > "$out/Dockerfile.from-source.alsa.all"
|
||||
|
||||
#-----------
|
||||
# Pre-built
|
||||
#-----------
|
||||
# set_variables "$DIR/shared/" "$DIR/prebuilt/" \
|
||||
# "$DIR/alsa/" "$DIR/en_profile/" \
|
||||
# | cat - "$template" | m4 > "$out/Dockerfile.prebuilt.alsa.en"
|
||||
|
||||
# set_variables "$DIR/shared/" "$DIR/prebuilt/" \
|
||||
# "$DIR/pulseaudio/" "$DIR/en_profile/" \
|
||||
# | cat - "$template" | m4 > "$out/Dockerfile.prebuilt.pulseaudio.en"
|
||||
|
||||
set_variables "$DIR/shared/" "$DIR/prebuilt/" \
|
||||
"$DIR/alsa/" "$DIR/all_profiles/" \
|
||||
| cat - "$template" | m4 > "$out/Dockerfile.prebuilt.alsa.all"
|
||||
@@ -1,3 +0,0 @@
|
||||
COPY download/phonetisaurus-2019_${BUILD_ARCH}.deb /phonetisaurus.deb
|
||||
RUN dpkg -i /phonetisaurus.deb && \
|
||||
rm /phonetisaurus.deb
|
||||
@@ -1,3 +0,0 @@
|
||||
COPY download/rhasspy-tools_${BUILD_ARCH}.tar.gz /
|
||||
RUN tar -C /usr -xvf /rhasspy-tools_${BUILD_ARCH}.tar.gz && \
|
||||
rm -f /rhasspy-tools_${BUILD_ARCH}.tar.gz
|
||||
@@ -1,10 +0,0 @@
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends --yes \
|
||||
bash jq unzip \
|
||||
python3 python3-pip python3-dev \
|
||||
build-essential portaudio19-dev swig \
|
||||
libatlas-base-dev \
|
||||
sox espeak flite alsa-utils \
|
||||
git curl \
|
||||
autoconf libtool automake bison \
|
||||
sphinxbase-utils sphinxtrain
|
||||
@@ -1,16 +0,0 @@
|
||||
RUN apt-get install -y pulseaudio
|
||||
COPY etc/pulseaudio.client.conf /etc/pulse/client.conf
|
||||
|
||||
# Create new user
|
||||
ENV UNAME=rhasspy
|
||||
RUN export UNAME=$UNAME UID=1000 GID=1000 && \
|
||||
mkdir -p "/home/${UNAME}" && \
|
||||
echo "${UNAME}:x:${UID}:${GID}:${UNAME} User,,,:/home/${UNAME}:/bin/bash" >> /etc/passwd && \
|
||||
echo "${UNAME}:x:${UID}:" >> /etc/group && \
|
||||
mkdir -p /etc/sudoers.d && \
|
||||
echo "${UNAME} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/${UNAME} && \
|
||||
chmod 0440 /etc/sudoers.d/${UNAME} && \
|
||||
chown ${UID}:${GID} -R /home/${UNAME} && \
|
||||
gpasswd -a ${UNAME} audio
|
||||
|
||||
ENV RHASSPY_APP /home/${UNAME}
|
||||
@@ -1,3 +0,0 @@
|
||||
# Switch to new user
|
||||
USER $UNAME
|
||||
ENV HOME /home/${UNAME}
|
||||
@@ -1,2 +0,0 @@
|
||||
# Install gstreamer and plugins
|
||||
RUN apt-get install --no-install-recommends --yes gstreamer1.0-tools gstreamer1.0-plugins-good
|
||||
@@ -1,4 +0,0 @@
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
tar -C /opt -xzf /kaldi.tar.gz && \
|
||||
rm /kaldi.tar.gz
|
||||
@@ -1,9 +0,0 @@
|
||||
# Install mitlm
|
||||
RUN apt-get install --no-install-recommends --yes gfortran
|
||||
COPY download/mitlm-0.4.2.tar.xz /
|
||||
RUN cd / && tar -xf mitlm-0.4.2.tar.xz && cd mitlm-0.4.2/ && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
rm -rf /mitlm-0.4.2*
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
# Install Mycroft Precise
|
||||
COPY download/precise-engine_0.3.0_${CPU_ARCH}.tar.gz /precise-engine.tar.gz
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then \
|
||||
cd / && tar -xzf /precise-engine.tar.gz && \
|
||||
ln -s /precise-engine/precise-engine /usr/bin/precise-engine && \
|
||||
rm /precise-engine.tar.gz; \
|
||||
fi
|
||||
@@ -1,9 +0,0 @@
|
||||
# Install Opengrm
|
||||
COPY download/opengrm-ngram-1.3.3.tar.gz /
|
||||
RUN cd / && tar -xf opengrm-ngram-1.3.3.tar.gz && \
|
||||
cd opengrm-ngram-1.3.3 && \
|
||||
./configure && \
|
||||
make -j $MAKE_THREADS && \
|
||||
make install && \
|
||||
ldconfig && \
|
||||
rm -rf /opengrm*
|
||||
@@ -1,4 +0,0 @@
|
||||
# Install Pocketsphinx Python module with no sound
|
||||
COPY download/pocketsphinx-python.tar.gz /
|
||||
RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
rm -rf /pocketsphinx-python*
|
||||
@@ -1,10 +0,0 @@
|
||||
# Install Python dependencies
|
||||
RUN python3 -m pip install --no-cache-dir setuptools wheel
|
||||
|
||||
RUN apt-get install -y libfreetype6-dev libpng-dev pkg-config libffi-dev libssl-dev
|
||||
COPY requirements.txt /requirements.txt
|
||||
RUN if [ "$BUILD_ARCH" != "amd64" ]; then \
|
||||
grep -v flair /requirements.txt > /requirements-noflair.txt; \
|
||||
mv /requirements-noflair.txt /requirements.txt; \
|
||||
fi
|
||||
RUN python3 -m pip install --no-cache-dir -r /requirements.txt
|
||||
@@ -1,2 +0,0 @@
|
||||
COPY etc/qemu-arm-static /usr/bin/
|
||||
COPY etc/qemu-aarch64-static /usr/bin/
|
||||
@@ -1,9 +0,0 @@
|
||||
COPY profiles/defaults.json ${RHASSPY_APP}/profiles/
|
||||
COPY docker/rhasspy ${RHASSPY_APP}/bin/
|
||||
COPY dist/ ${RHASSPY_APP}/dist/
|
||||
COPY etc/wav/* ${RHASSPY_APP}/etc/wav/
|
||||
COPY rhasspy/profile_schema.json ${RHASSPY_APP}/rhasspy/
|
||||
COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
@@ -1,3 +0,0 @@
|
||||
# Install snowboy
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
@@ -1 +0,0 @@
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
@@ -1 +1 @@
|
||||
theme: jekyll-theme-cayman
|
||||
theme: jekyll-theme-cayman
|
||||
|
||||
@@ -2,15 +2,49 @@
|
||||
|
||||
Rhasspy was created and is currently maintained by [Michael Hansen](https://synesthesiam.com/).
|
||||
|
||||

|
||||
<img src="../img/mike-head.png" style="max-height: 100px;" title="Mike head">
|
||||
|
||||
Special thanks to:
|
||||
|
||||
* [Romkabouter](https://github.com/Romkabouter)
|
||||
* [koenvervloesem](https://github.com/koenvervloesem)
|
||||
* [FunkyBoT](https://community.home-assistant.io/u/FunkyBoT)
|
||||
* [fastjack](https://community.rhasspy.org/u/fastjack)
|
||||
* [S_n_Nguy_n](https://community.home-assistant.io/u/S_n_Nguy_n)
|
||||
|
||||
## Motivation
|
||||
|
||||
A typical voice assistant (Alexa, Google Home, etc.) solves a number of important problems:
|
||||
|
||||
1. Deciding when to record audio ([wake word](wake-word.md))
|
||||
2. Listening for voice commands ([command listener](command-listener.md))
|
||||
3. Transcribing command/question ([speech to text](speech-to-text.md))
|
||||
4. Interpreting the speaker's **intent** from the text ([intent recognition](intent-recognition.md))
|
||||
5. Fulfilling the speaker's intent ([intent handling](intent-handling.md))
|
||||
|
||||
Rhasspy provides **offline, private solutions** to problems 1-4 using off-the-shelf tools. These tools are:
|
||||
|
||||
* **Wake word**
|
||||
* [Pocketsphinx keyphrase](https://cmusphinx.github.io/wiki/tutoriallm/#using-keyword-lists-with-pocketsphinx)
|
||||
* [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* [snowboy](https://snowboy.kitt.ai)
|
||||
* [porcupine](https://github.com/Picovoice/Porcupine)
|
||||
* **Command listener**
|
||||
* [webrtcvad](https://github.com/wiseman/py-webrtcvad)
|
||||
* **Speech to text**
|
||||
* [Pocketsphinx](https://github.com/cmusphinx/pocketsphinx)
|
||||
* [Kaldi](https://kaldi-asr.org)
|
||||
* **Intent recognition**
|
||||
* [OpenFST](https://www.openfst.org)
|
||||
* [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy)
|
||||
* [Mycroft Adapt](https://github.com/MycroftAI/adapt)
|
||||
* [flair](http://github.com/zalandoresearch/flair)
|
||||
* [Rasa NLU](https://rasa.com/)
|
||||
|
||||
For problem 5 (fulfilling the speaker's intent), Rhasspy works with external home automation software, such as Home Assistant's built-in [automation capability](https://www.home-assistant.io/docs/automation/) or a [Node-RED flow](https://nodered.org).
|
||||
|
||||
For each intent you define, Rhasspy emits a JSON event that can do anything Home Assistant can do (toggle switches, call REST services, etc.). This means that Rhasspy will do very little out of the box compared to other voice assistants, but there are also be *no limits* to what can be done.
|
||||
|
||||
## Supporting Tools
|
||||
|
||||
The following tools/libraries help to support Rhasspy:
|
||||
|
||||
@@ -22,11 +22,11 @@ Add to your [profile](profiles.md):
|
||||
```
|
||||
|
||||
Set `microphone.pyaudio.device` to a PyAudio device number or leave blank for the default device.
|
||||
Streams 30ms chunks of 16-bit, 16 Khz mono audio by default (480 frames).
|
||||
Streams 30ms chunks of 16-bit, 16 kHz mono audio by default (480 frames).
|
||||
|
||||
See `rhasspy.audio_recorder.PyAudioRecorder` for details.
|
||||
|
||||
## ALSA
|
||||
## ALSA
|
||||
|
||||
Starts an `arecord` process locally and reads audio data from its standard out.
|
||||
Works best with [ALSA](https://www.alsa-project.org/main/index.php/Main_Page).
|
||||
@@ -42,7 +42,7 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Set `microphone.arecord.device` to the name of the ALSA device to use (`-D` flag
|
||||
to `arecord`) or leave blank for the default device.
|
||||
By default, calls `arecord -t raw -r 16000 -f S16_LE -c 1` and reads 30ms (960
|
||||
@@ -52,7 +52,7 @@ See `rhasspy.audio_recorder.ARecordAudioRecorder` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Listens to the `hermes/audioServer/<SITE_ID>/audioFrame` topic for WAV data ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)).
|
||||
Listens to the `hermes/audioServer/<SITE_ID>/audioFrame` topic for WAV data ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
This allows Rhasspy to receive audio from [Snips.AI](https://snips.ai/).
|
||||
Audio data is automatically converted to 16-bit, 16 kHz mono with [sox](http://sox.sourceforge.net).
|
||||
|
||||
@@ -69,10 +69,18 @@ Add to your [profile](profiles.md):
|
||||
"username": "",
|
||||
"port": 1883,
|
||||
"password": "",
|
||||
"site_id": "default"
|
||||
"site_id": "default",
|
||||
"tls": {
|
||||
"enabled": false,
|
||||
"ca_certs": "",
|
||||
"cert_reqs": "CERT_REQUIRED",
|
||||
"certfile": "",
|
||||
"ciphers": "",
|
||||
"keyfile": ""
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Adjust the `mqtt` configuration to connect to your MQTT broker.
|
||||
Set `mqtt.site_id` to match your Snips.AI siteId.
|
||||
|
||||
@@ -80,7 +88,7 @@ See `rhasspy.audio_recorder.HermesAudioRecorder` for details.
|
||||
|
||||
## HTTP Stream
|
||||
|
||||
Accepts chunks of 16-bit 16Khz mono audio via an HTTP POST stream (assumes [chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding)).
|
||||
Accepts chunks of 16-bit 16 kHz mono audio via an HTTP POST stream (assumes [chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding)).
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -95,7 +103,7 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
```
|
||||
|
||||
Set `microphone.http.stop_after` to one of "never", "text", or "intent". When set to "never", you can continously stream (chunked) audio into Rhasspy across multiple voice commands. When set to "text" or "intent", the stream will be closed when the first voice command has been transcribed ("text") or recognized ("intent"). Once closed, you can perform an HTTP GET request to the stream URL to retrieve the result (text for transcriptions or JSON for intent).
|
||||
Set `microphone.http.stop_after` to one of "never", "text", or "intent". When set to "never", you can continuously stream (chunked) audio into Rhasspy across multiple voice commands. When set to "text" or "intent", the stream will be closed when the first voice command has been transcribed ("text") or recognized ("intent"). Once closed, you can perform an HTTP GET request to the stream URL to retrieve the result (text for transcriptions or JSON for intent).
|
||||
|
||||
Note that `microphone.http.port` must be different than Rhasspy's webserver port (usually 12101).
|
||||
|
||||
@@ -122,7 +130,7 @@ Set `microphone.gstreamer.pipeline` to your GStreamer pipeline **without a sink*
|
||||
udpsrc port=12333 ! rawaudioparse use-sink-caps=false format=pcm pcm-format=s16le sample-rate=16000 num-channels=1 ! queue ! audioconvert ! audioresample
|
||||
```
|
||||
|
||||
which "simply" receives raw 16-bit 16khz audio chunks via UDP port 12333. You could stream microphone audio to Rhasspy from another machine by running the following terminal command:
|
||||
which "simply" receives raw 16-bit 16 kHz audio chunks via UDP port 12333. You could stream microphone audio to Rhasspy from another machine by running the following terminal command:
|
||||
|
||||
```bash
|
||||
gst-launch-1.0 \
|
||||
@@ -152,4 +160,3 @@ Add to your [profile](profiles.md):
|
||||
```
|
||||
|
||||
See `rhasspy.audio_recorder.DummyAudioRecorder` for details.
|
||||
|
||||
|
||||
@@ -9,41 +9,52 @@ Plays WAV files on the local device by calling the `aplay` command. Should work
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
"sounds": {
|
||||
"system": "aplay",
|
||||
"aplay": {
|
||||
"device": ""
|
||||
}
|
||||
}
|
||||
|
||||
```json
|
||||
"sounds": {
|
||||
"system": "aplay",
|
||||
"aplay": {
|
||||
"device": ""
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If provided, `sounds.aplay.device` is passed to `aplay` with the `-D` argument.
|
||||
Leave it blank to use the default device.
|
||||
|
||||
See `rhasspy.audio_player.APlayAudioPlayer` for details.
|
||||
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Publishes WAV data to the `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>` topic ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)).
|
||||
Publishes WAV data to the `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>` topic ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
This allows Rhasspy to send audio to [Snips.AI](https://snips.ai/).
|
||||
|
||||
Rhasspy will always try to send 16 kHz, 16-bit mono audio.
|
||||
Rhasspy will by default send 16 kHz, 16-bit mono audio, unless specified otherwise.
|
||||
The request id is generated each time a sound is played using `uuid.uuid4`.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
"sounds": {
|
||||
"system": "hermes"
|
||||
},
|
||||
|
||||
"mqtt": {
|
||||
"enabled": true,
|
||||
"host": "localhost",
|
||||
"username": "",
|
||||
"port": 1883,
|
||||
"password": "",
|
||||
"site_id": "default"
|
||||
}
|
||||
```json
|
||||
"sounds": {
|
||||
"system": "hermes"
|
||||
},
|
||||
|
||||
"mqtt": {
|
||||
"enabled": true,
|
||||
"host": "localhost",
|
||||
"username": "",
|
||||
"port": 1883,
|
||||
"password": "",
|
||||
"site_id": "default",
|
||||
"tls": {
|
||||
"enabled": false,
|
||||
"ca_certs": "",
|
||||
"cert_reqs": "CERT_REQUIRED",
|
||||
"certfile": "",
|
||||
"ciphers": "",
|
||||
"keyfile": ""
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Adjust the `mqtt` configuration to connect to your MQTT broker.
|
||||
Set `mqtt.site_id` to match your Snips.AI siteId.
|
||||
|
||||
@@ -11,7 +11,6 @@ You can also make Rhasspy record a voice command using the [HTTP API](usage.md#h
|
||||
2. Speaking your voice command
|
||||
3. POST-ing to `/api/stop-recording`. Rhasspy will stop recording and process the voice command.
|
||||
|
||||
|
||||
## WebRTCVAD
|
||||
|
||||
Listens for a voice commands using [webrtcvad](https://github.com/wiseman/py-webrtcvad) to detect speech and silence.
|
||||
@@ -33,11 +32,11 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
This system listens for up to `timeout_sec` for a voice command. The first few frames of audio data are discarded (`throwaway_buffers`) to avoid clicks from the microphone being engaged. When speech is detected for some number of successive frames (`speech_buffers`), the voice command is considered to have *started*. After `min_sec`, Rhasspy will start listening for silence. If at least `silence_sec` goes by without any speech detected, the command is considered *finished*, and the recorded WAV data is sent to the [speech recognition system](speech-to-text.md).
|
||||
|
||||
You may want to adjust `min_sec`, `silence_sec`, and `vad_mode` for your environment.
|
||||
These control how short a voice command can be (`min_sec`), how much silence is required before Rhasspy stops listening (`silence_sec`), and how sensitive the voice activity detector is (`vad_mode`, higher is more sensitive).
|
||||
These control how short a voice command can be (`min_sec`), how much silence is required before Rhasspy stops listening (`silence_sec`), and how aggressive the voice activity filter `vad_mode` is: this is an integer between 0 and 3. 0 is the least aggressive about filtering out non-speech, 3 is the most aggressive.
|
||||
|
||||
**NOTE**: you must set `chunk_size` such that (relative to sample rate) it produces 10, 20, or 30 millisecond buffers. This is required by `webrtcvad`.
|
||||
|
||||
@@ -60,15 +59,15 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
See `rhasspy.command_listener.OneShotCommandListener` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Subscribes to the `hermes/asr/startListening` and `hermes/asr/stopListening` topics ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)).
|
||||
Subscribes to the `hermes/asr/startListening` and `hermes/asr/stopListening` topics ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
This allows Rhasspy to be controlled by [Snips.AI](https://snips.ai/).
|
||||
|
||||
Wakes up Rhasspy when `startListening` is received and starts recording. Stops recording when `stopListening` is received and processes the voice command.
|
||||
Wakes up Rhasspy when `startListening` is received and starts recording. Stops recording when `stopListening` is received and processes the voice command.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -87,7 +86,15 @@ Add to your [profile](profiles.md):
|
||||
"username": "",
|
||||
"port": 1883,
|
||||
"password": "",
|
||||
"site_id": "default"
|
||||
"site_id": "default",
|
||||
"tls": {
|
||||
"enabled": false,
|
||||
"ca_certs": "",
|
||||
"cert_reqs": "CERT_REQUIRED",
|
||||
"certfile": "",
|
||||
"ciphers": "",
|
||||
"keyfile": ""
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -96,12 +103,16 @@ Set `mqtt.site_id` to match your Snips.AI siteId.
|
||||
|
||||
Using [mosquitto_pub](https://mosquitto.org/man/mosquitto_pub-1.html), wake up Rhasspy with:
|
||||
|
||||
mosquitto_pub -t 'hermes/asr/startListening' -m '{ "siteId": "default" }'
|
||||
|
||||
```bash
|
||||
mosquitto_pub -t 'hermes/asr/startListening' -m '{ "siteId": "default" }'
|
||||
```
|
||||
|
||||
Say your voice command, then stop recording with:
|
||||
|
||||
mosquitto_pub -t 'hermes/asr/stopListening' -m '{ "siteId": "default" }'
|
||||
|
||||
```bash
|
||||
mosquitto_pub -t 'hermes/asr/stopListening' -m '{ "siteId": "default" }'
|
||||
```
|
||||
|
||||
Rhasspy should process your voice command.
|
||||
|
||||
See `rhasspy.command.HermesCommandListener` for details.
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
# Development
|
||||
|
||||
Rhasspy's code can be found [on GitHub](https://github.com/synesthesiam/rhasspy).
|
||||
|
||||
## Set up your development environment
|
||||
|
||||
If you want to start developing on Rhasspy, [fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the repository, and clone your fork:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/<your_username>/rhasspy.git
|
||||
cd rhasspy
|
||||
```
|
||||
|
||||
Add the original repository as an [upstream remote](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/configuring-a-remote-for-a-fork):
|
||||
|
||||
```bash
|
||||
git remote add upstream https://github.com/synesthesiam/rhasspy.git
|
||||
```
|
||||
|
||||
Then follow the installation steps for a [virtual environment](installation.md#virtual-environment). If the `create-venv.sh` script fails, please [report an issue](https://github.com/synesthesiam/rhasspy/issues) before proceeding.
|
||||
|
||||
If you pull changes, make sure to re-download and extract `rhasspy-web-dist.tar.gz` from [the releases page](https://github.com/synesthesiam/rhasspy/releases/tag/v2.0). This contains the pre-compiled web artifacts. Alternatively, you can install [yarn](https://yarnpkg.com) and run `yarn build` in the `rhasspy` directory after a `git pull`.
|
||||
|
||||
## Run the unit tests
|
||||
|
||||
A good start to check whether your development environment is set up correctly (or to find some bugs) is to run the unit tests:
|
||||
|
||||
```bash
|
||||
./run-tests.sh
|
||||
```
|
||||
|
||||
This will run tests against pre-recorded WAV files in `rhasspy/etc/test` for specific languages. You can run tests only for a specific language (profile) like this:
|
||||
|
||||
```bash
|
||||
./run-tests.sh -p en
|
||||
```
|
||||
|
||||
It’s good practice to run the unit tests before and after you work on something, to be sure your changes don't accidentally break something.
|
||||
|
||||
## Keeping your fork synchronized
|
||||
|
||||
When the upstream repository has new commits, you should [synchronize your fork](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/syncing-a-fork):
|
||||
|
||||
```bash
|
||||
git fetch upstream
|
||||
git checkout master
|
||||
git merge upstream/master
|
||||
```
|
||||
|
||||
Then [update your fork on GitHub](https://help.github.com/en/github/using-git/pushing-commits-to-a-remote-repository):
|
||||
|
||||
```bash
|
||||
git push
|
||||
```
|
||||
|
||||
Your fork is now synchronized to the original repository.
|
||||
|
||||
## Development practices
|
||||
|
||||
* Before starting significant work, please propose it and discuss it first on the [issue tracker](https://github.com/synesthesiam/rhasspy/issues) on GitHub. Other people may have suggestions, will want to collaborate and will wish to review your code.
|
||||
* Please work on one piece of conceptual work at a time. Keep each narrative of work in a different branch.
|
||||
* As much as possible, have each commit solve one problem.
|
||||
* A commit must not leave the project in a non-functional state.
|
||||
* Run the unit tests before you create a commit.
|
||||
* Treat code, tests and documentation as one.
|
||||
* Create a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork) from your fork.
|
||||
|
||||
## Development workflow
|
||||
|
||||
If you want to start working on a specific feature or bug fix, this is an example workflow:
|
||||
|
||||
* Synchronize your fork with the upstream repository.
|
||||
* Create a new branch: `git checkout -b <nameofbranch>`
|
||||
* Create your changes.
|
||||
* Add the changed files with `git add <files>`.
|
||||
* Commit your changes with `git commit`.
|
||||
* Push your changes to your fork on GitHub.
|
||||
* Create a pull request from your fork.
|
||||
|
||||
## License of contributions
|
||||
|
||||
By submitting patches to this project, you agree to allow them to be redistributed under the project’s [license](license.md) according to the normal forms and usages of the open source community.
|
||||
|
||||
It is your responsibility to make sure you have all the necessary rights to contribute to the project.
|
||||
@@ -4,6 +4,9 @@ Rhasspy is designed to be run on different kinds of hardware, such as:
|
||||
|
||||
* Raspberry Pi 2-3 B/B+ (`armhf`/`aarch64`)
|
||||
* Desktop/laptop/server (`amd64`)
|
||||
* Raspberry Pi Zero (`armv6l`)
|
||||
* You must use a [virtual environment](installation.md#virtual-environment)
|
||||
* The [Kaldi speech recognizer](speech-to-text.md#kaldi) is **not** supported
|
||||
|
||||
The table below summarizes architecture compatibility with Rhasspy's components:
|
||||
|
||||
@@ -30,7 +33,7 @@ The table below summarizes architecture compatibility with Rhasspy's components:
|
||||
|
||||
To run Rhasspy on a Raspberry Pi, you'll need at least a 4 GB SD card and a good power supply. I highly recommend the [CanaKit Starter Kit](https://www.amazon.com/CanaKit-Raspberry-Starter-Premium-Black/dp/B07BCC8PK7), which includes a 32 GB SD card, a 2.5 A power supply, and a case.
|
||||
|
||||
Some components of Rhasspy will not work on the Raspberry Pi 3 B+ model (`aarch64`). As of the time of this writing, these are:
|
||||
Some components of Rhasspy will not work on the Raspberry Pi 3 B+ model with a 64-bit operating system (`aarch64`). As of the time of this writing, these are:
|
||||
|
||||
* [snowboy](wake-word.md#snowboy) (wake word)
|
||||
* [Mycroft Precise](wake-word.md#mycroft-precise) (wake word)
|
||||
|
||||
|
After Width: | Height: | Size: 20 KiB |
@@ -0,0 +1,140 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="149.42726mm"
|
||||
height="36.848656mm"
|
||||
viewBox="0 0 149.42726 36.848656"
|
||||
version="1.1"
|
||||
id="svg860"
|
||||
inkscape:version="0.92.3 (2405546, 2018-03-11)"
|
||||
sodipodi:docname="rhasspy-discourse-logo.svg"
|
||||
inkscape:export-filename="./rhasspy-discourse-logo.png"
|
||||
inkscape:export-xdpi="82.716721"
|
||||
inkscape:export-ydpi="82.716721">
|
||||
<defs
|
||||
id="defs854" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="0.9899495"
|
||||
inkscape:cx="268.11251"
|
||||
inkscape:cy="139.11788"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
fit-margin-top="0"
|
||||
fit-margin-left="0"
|
||||
fit-margin-right="0"
|
||||
fit-margin-bottom="0"
|
||||
inkscape:window-width="1440"
|
||||
inkscape:window-height="755"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-maximized="1" />
|
||||
<metadata
|
||||
id="metadata857">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(47.552776,-100.1735)">
|
||||
<circle
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="path1476"
|
||||
cx="-29.128448"
|
||||
cy="118.59783"
|
||||
r="18.174328" />
|
||||
<g
|
||||
transform="matrix(0.80207931,0,0,0.80207931,-74.139422,96.215375)"
|
||||
id="g2275">
|
||||
<g
|
||||
id="text817"
|
||||
style="font-style:normal;font-weight:normal;font-size:41.37965775px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1.03449142"
|
||||
transform="rotate(-45)"
|
||||
aria-label="R">
|
||||
<path
|
||||
sodipodi:nodetypes="ccccccccccccccccssccccccccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path819"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:41.38083267px;font-family:'CC Adamantium';-inkscape-font-specification:'CC Adamantium, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:1.03449142"
|
||||
d="M 31.509252,62.491941 31.16794,75.667305 H 30.83455 L 28.604505,65.027061 15.316738,59.73528 13.781121,75.667305 H 13.286086 L 11.528138,71.268365 10.110857,66.929543 9.526899,61.048002 6.9616279,56.651114 8.7144255,54.299084 6.356732,51.899807 8.2521138,50.246675 6.1006224,45.789404 9.891565,42.358563 c 2.435726,-1.492588 4.806268,-0.545105 7.30443,-1.317335 4.174203,-1.290327 7.29492,-1.792422 11.275957,5.059621 0.756691,1.302392 3.239334,1.578749 4.130578,3.198298 -0.882306,1.555823 -2.064327,2.923061 -3.546063,4.101714 -1.481735,1.171918 -3.152055,2.175457 -5.01096,3.010617 -1.852169,0.828425 -3.852512,1.535617 -6.001029,2.121576 z M 25.51612,48.388298 c -6.142518,4.42909 -6.341445,-0.106922 -8.663766,-3.716207 l -1.283048,13.860963 c 5.545523,-1.913183 8.340713,-6.051669 9.946814,-10.144756 z" />
|
||||
</g>
|
||||
<ellipse
|
||||
ry="0.93544334"
|
||||
rx="0.33408689"
|
||||
cy="21.859995"
|
||||
cx="52.059788"
|
||||
id="path2115"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<ellipse
|
||||
transform="rotate(-45)"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
id="ellipse2117"
|
||||
cx="18.873178"
|
||||
cy="50.914211"
|
||||
rx="0.33408689"
|
||||
ry="0.93544334" />
|
||||
<path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2119"
|
||||
d="m 64.331743,23.950737 -0.788701,-2.167883 0.785662,-0.376444 0.715334,2.441702 z"
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 69.630908,29.701094 1.48309,-1.766977 0.718843,0.492181 -1.75691,1.840348 z"
|
||||
id="path2121"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
sodipodi:nodetypes="cscc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2123"
|
||||
d="m 47.978861,19.145376 c -0.0362,0.284741 -0.632118,0.443544 -1.331028,0.354698 -0.698909,-0.08885 -1.236142,-0.391701 -1.199944,-0.676442 z"
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<path
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
d="m 49.344113,18.846496 c 0.224679,0.178626 0.762248,-0.123625 1.200693,-0.675116 0.438451,-0.55148 0.611752,-1.143345 0.387075,-1.321972 z"
|
||||
id="path2126"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="cscc" />
|
||||
<path
|
||||
sodipodi:nodetypes="ccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2128"
|
||||
d="m 43.707615,19.788656 8.68626,10.557147 c 2.944473,-4.699489 1.792375,-9.398979 -0.200452,-14.098468"
|
||||
style="opacity:1;fill:none;stroke:#808080;stroke-width:0.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
</g>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:30.12816238px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5"
|
||||
x="-5.9640822"
|
||||
y="128.49496"
|
||||
id="text824"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan822"
|
||||
x="-5.9640822"
|
||||
y="128.49496"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:30.12828064px;font-family:'Sansus Webissimo';-inkscape-font-specification:'Sansus Webissimo, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#ffffff;stroke:#000000;stroke-width:0.5">RHASSPY</tspan></text>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 6.9 KiB |
|
After Width: | Height: | Size: 16 KiB |
|
After Width: | Height: | Size: 38 KiB |
@@ -0,0 +1,123 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="36.848656mm"
|
||||
height="36.848656mm"
|
||||
viewBox="0 0 36.848656 36.848656"
|
||||
version="1.1"
|
||||
id="svg860"
|
||||
inkscape:version="0.92.3 (2405546, 2018-03-11)"
|
||||
sodipodi:docname="rhasspy-raven-square.svg"
|
||||
inkscape:export-filename="./rhasspy-discourse-square-logo-nocircle.png"
|
||||
inkscape:export-xdpi="352.92468"
|
||||
inkscape:export-ydpi="352.92468">
|
||||
<defs
|
||||
id="defs854" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="1.979899"
|
||||
inkscape:cx="-98.08577"
|
||||
inkscape:cy="43.808495"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
fit-margin-top="0"
|
||||
fit-margin-left="0"
|
||||
fit-margin-right="0"
|
||||
fit-margin-bottom="0"
|
||||
inkscape:window-width="1440"
|
||||
inkscape:window-height="755"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-maximized="1" />
|
||||
<metadata
|
||||
id="metadata857">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(47.552776,-100.1735)">
|
||||
<g
|
||||
transform="matrix(0.80207931,0,0,0.80207931,-74.139422,96.215375)"
|
||||
id="g2275">
|
||||
<g
|
||||
id="text817"
|
||||
style="font-style:normal;font-weight:normal;font-size:41.37965775px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1.03449142"
|
||||
transform="rotate(-45)"
|
||||
aria-label="R">
|
||||
<path
|
||||
sodipodi:nodetypes="ccccccccccccccccssccccccccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path819"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:41.38083267px;font-family:'CC Adamantium';-inkscape-font-specification:'CC Adamantium, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:1.03449142"
|
||||
d="M 31.509252,62.491941 31.16794,75.667305 H 30.83455 L 28.604505,65.027061 15.316738,59.73528 13.781121,75.667305 H 13.286086 L 11.528138,71.268365 10.110857,66.929543 9.526899,61.048002 6.9616279,56.651114 8.7144255,54.299084 6.356732,51.899807 8.2521138,50.246675 6.1006224,45.789404 9.891565,42.358563 c 2.435726,-1.492588 4.806268,-0.545105 7.30443,-1.317335 4.174203,-1.290327 7.29492,-1.792422 11.275957,5.059621 0.756691,1.302392 3.239334,1.578749 4.130578,3.198298 -0.882306,1.555823 -2.064327,2.923061 -3.546063,4.101714 -1.481735,1.171918 -3.152055,2.175457 -5.01096,3.010617 -1.852169,0.828425 -3.852512,1.535617 -6.001029,2.121576 z M 25.51612,48.388298 c -6.142518,4.42909 -6.341445,-0.106922 -8.663766,-3.716207 l -1.283048,13.860963 c 5.545523,-1.913183 8.340713,-6.051669 9.946814,-10.144756 z" />
|
||||
</g>
|
||||
<ellipse
|
||||
ry="0.93544334"
|
||||
rx="0.33408689"
|
||||
cy="21.859995"
|
||||
cx="52.059788"
|
||||
id="path2115"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<ellipse
|
||||
transform="rotate(-45)"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
id="ellipse2117"
|
||||
cx="18.873178"
|
||||
cy="50.914211"
|
||||
rx="0.33408689"
|
||||
ry="0.93544334" />
|
||||
<path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2119"
|
||||
d="m 64.331743,23.950737 -0.788701,-2.167883 0.785662,-0.376444 0.715334,2.441702 z"
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 69.630908,29.701094 1.48309,-1.766977 0.718843,0.492181 -1.75691,1.840348 z"
|
||||
id="path2121"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
sodipodi:nodetypes="cscc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2123"
|
||||
d="m 47.978861,19.145376 c -0.0362,0.284741 -0.632118,0.443544 -1.331028,0.354698 -0.698909,-0.08885 -1.236142,-0.391701 -1.199944,-0.676442 z"
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<path
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
d="m 49.344113,18.846496 c 0.224679,0.178626 0.762248,-0.123625 1.200693,-0.675116 0.438451,-0.55148 0.611752,-1.143345 0.387075,-1.321972 z"
|
||||
id="path2126"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="cscc" />
|
||||
<path
|
||||
sodipodi:nodetypes="ccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2128"
|
||||
d="m 43.707615,19.788656 8.68626,10.557147 c 2.944473,-4.699489 1.792375,-9.398979 -0.200452,-14.098468"
|
||||
style="opacity:1;fill:none;stroke:#808080;stroke-width:0.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 5.8 KiB |
|
After Width: | Height: | Size: 181 KiB |
|
Before Width: | Height: | Size: 65 KiB After Width: | Height: | Size: 38 KiB |
|
After Width: | Height: | Size: 96 KiB |
|
Before Width: | Height: | Size: 76 KiB After Width: | Height: | Size: 42 KiB |
|
Before Width: | Height: | Size: 94 KiB After Width: | Height: | Size: 45 KiB |
|
After Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 73 KiB After Width: | Height: | Size: 45 KiB |
|
After Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 50 KiB After Width: | Height: | Size: 33 KiB |
|
Before Width: | Height: | Size: 85 KiB After Width: | Height: | Size: 55 KiB |
@@ -1,41 +1,51 @@
|
||||

|
||||
<img src="img/rhasspy.svg" style="max-height: 200px;" title="Rhasspy logo">
|
||||
|
||||
Rhasspy (pronounced RAH-SPEE) is an [open source](https://github.com/synesthesiam/rhasspy), fully offline voice assistant toolkit for [many languages](#supported-languages) that works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
|
||||
|
||||
Rhasspy transforms voice commands into [JSON](https://json.org) events that can trigger actions in home automation software, like [Home Assistant automations](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](usage.md#node-red). You define custom voice commands in a [profile](profiles.md) using a [specialized template syntax](training.md), and Rhasspy takes care of the rest.
|
||||
You specify voice commands in a [template language](training.md):
|
||||
|
||||
## Motivation
|
||||
```
|
||||
[LightState]
|
||||
states = (on | off)
|
||||
turn (<states>){state} [the] light
|
||||
```
|
||||
|
||||
A typical voice assistant (Alexa, Google Home, etc.) solves a number of important problems:
|
||||
and Rhasspy will produce [JSON](https://json.org) events that can trigger actions in [home automation software](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](usage.md#node-red):
|
||||
|
||||
1. Deciding when to record audio ([wake word](wake-word.md))
|
||||
2. Listening for voice commands ([command listener](command-listener.md))
|
||||
3. Transcribing command/question ([speech to text](speech-to-text.md))
|
||||
4. Interpreting the speaker's **intent** from the text ([intent recognition](intent-recognition.md))
|
||||
5. Fulfilling the speaker's intent ([intent handling](intent-handling.md))
|
||||
```json
|
||||
{
|
||||
"text": "turn on the light",
|
||||
"intent": {
|
||||
"name": "LightState"
|
||||
},
|
||||
"slots": {
|
||||
"state": "on"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Rhasspy provides **offline, private solutions** to problems 1-4 using off-the-shelf tools. These tools are:
|
||||
Rhasspy is <strong>optimized for</strong>:
|
||||
|
||||
* **Wake word**
|
||||
* [Pocketsphinx keyphrase](https://cmusphinx.github.io/wiki/tutoriallm/#using-keyword-lists-with-pocketsphinx)
|
||||
* [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* [snowboy](https://snowboy.kitt.ai)
|
||||
* [porcupine](https://github.com/Picovoice/Porcupine)
|
||||
* **Command listener**
|
||||
* [webrtcvad](https://github.com/wiseman/py-webrtcvad)
|
||||
* **Speech to text**
|
||||
* [Pocketsphinx](https://github.com/cmusphinx/pocketsphinx)
|
||||
* [Kaldi](https://kaldi-asr.org)
|
||||
* **Intent recognition**
|
||||
* [OpenFST](https://www.openfst.org)
|
||||
* [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy)
|
||||
* [Mycroft Adapt](https://github.com/MycroftAI/adapt)
|
||||
* [flair](http://github.com/zalandoresearch/flair)
|
||||
* [Rasa NLU](https://rasa.com/)
|
||||
* Working with external services via [MQTT](usage.md#mqtt), [HTTP](usage.md#http-api), and [Websockets](usage.md#websocket-events)
|
||||
* Home Assistant and Hass.IO have [built-in support](usage.md#home-assistant)
|
||||
* Pre-specified voice commands that are described well [by a grammar](training.md#sentencesini)
|
||||
* You can also do [open-ended speech recognition](speech-to-text.md#open-transcription)
|
||||
* Voice commands with [uncommon words or pronunciations](usage.md#words-tab)
|
||||
* New words are added phonetically with [automated assistance](https://github.com/AdolfVonKleist/Phonetisaurus)
|
||||
|
||||
For problem 5 (fulfilling the speaker's intent), Rhasspy works with external home automation software, such as Home Assistant's built-in [automation capability](https://www.home-assistant.io/docs/automation/) or a [Node-RED flow](https://nodered.org).
|
||||
## Getting Started
|
||||
|
||||
For each intent you define, Rhasspy emits a JSON event that can do anything Home Assistant can do (toggle switches, call REST services, etc.). This means that Rhasspy will do very little out of the box compared to other voice assistants, but there are also be *no limits* to what can be done.
|
||||
Ready to try Rhasspy? Follow the steps below and check out the [tutorials](tutorials.md).
|
||||
|
||||
1. Make sure you have the [necessary hardware](hardware.md)
|
||||
2. Choose an [installation method](installation.md)
|
||||
3. Access the [web interface](usage.md#web-interface) to download a profile
|
||||
4. Author your [custom voice commands](training.md) and train Rhasspy
|
||||
5. Connect Rhasspy to [Home Assistant](usage.md#home-assistant) or a [Node-RED](usage.md#node-red) flow
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you have problems, please stop by the [Rhasspy community site](https://community.rhasspy.org) or [open a GitHub issue](https://github.com/synesthesiam/rhasspy/issues).
|
||||
|
||||
## Supported Languages
|
||||
|
||||
@@ -56,124 +66,7 @@ Rhasspy supports the following languages:
|
||||
* Swedish (`sv`)
|
||||
* Catalan (`ca`)
|
||||
|
||||
Support for these languages comes directly from existing [CMU Sphinx](https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/) and [Kaldi](https://montreal-forced-aligner.readthedocs.io/en/latest/pretrained_models.html) acoustic models.
|
||||
|
||||
It is possible to extend Rhasspy to new languages with only:
|
||||
|
||||
* A [phonetic dictionary](https://cmusphinx.github.io/wiki/tutorialdict/#using-g2p-seq2seq-to-extend-the-dictionary)
|
||||
* A trained [acoustic model](https://cmusphinx.github.io/wiki/tutorialam/)
|
||||
* A [grapheme to phoneme model](https://github.com/AdolfVonKleist/Phonetisaurus)
|
||||
|
||||
The table below summarizes language support across the various supporting technologies that Rhasspy uses:
|
||||
|
||||
| Category | Name | Offline? | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt | sv | ca |
|
||||
| -------- | ------ | -------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| **Wake Word** | [pocketsphinx](wake-word.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | | | |
|
||||
| | [porcupine](wake-word.md#porcupine) | ✓ | ✓ | | | | | | | | | | | | | |
|
||||
| | [snowboy](wake-word.md#snowboy) | *requires account* | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| | [precise](wake-word.md#mycroft-precise) | ✓ | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| **Speech to Text** | [pocketsphinx](speech-to-text.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | ✓ |
|
||||
| | [kaldi](speech-to-text.md#kaldi) | ✓ | | | | | | | | | | | ✓ | | ✓ | |
|
||||
| **Intent Recognition** | [fsticuffs](intent-recognition.md#fsticuffs) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [adapt](intent-recognition.md#mycroft-adapt) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flair](intent-recognition.md#flair) | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | | | | | ✓ | | ✓ |
|
||||
| | [rasaNLU](intent-recognition.md#rasanlu) | *needs extra software* | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| **Text to Speech** | [espeak](text-to-speech.md#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flite](text-to-speech.md#flite) | ✓ | ✓ | | | | | | | | ✓ | | | | | |
|
||||
| | [picotts](text-to-speech.md#picotts) | ✓ | ✓ | | | | | | | | | | | | | |
|
||||
| | [marytts](text-to-speech.md#marytts) | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | | | |
|
||||
| | [wavenet](text-to-speech.md#google-wavenet) | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | ✓ | |
|
||||
|
||||
• - yes, but requires training/customization
|
||||
|
||||
## How It Works
|
||||
|
||||
Rhasspy starts off asleep, listening for a [wake word](wake-word.md). Once awoken, it listens for a [voice command](command-listener.md). After recording the command, its transcribed with the [speech to text](speech-to-text.md) system into text, which is then run through an [intent recognizer](intent-recognition.md). Finally, the recognized intent is used to generate an event that can be [handled by Home Assistant or Node-RED](intent-handling.md).
|
||||
|
||||

|
||||
|
||||
## Customization
|
||||
|
||||
Every step of Rhasspy's processing pipeline can be customized, including using a remote Rhasspy server via its [HTTP API](usage.md#http-api) for [speech to text](speech-to-text.md#remote-http-server) and [intent recognition](intent-recognition.md#remote-http-server). Some useful Rhasspy API endpoints are:
|
||||
|
||||
* `/api/listen-for-command`
|
||||
* POST to wake Rhasspy up and start listening for a voice command
|
||||
* `/api/train`
|
||||
* POST to re-train your profile
|
||||
* `/api/speech-to-intent`
|
||||
* POST a WAV file and have Rhasspy process it as a voice command
|
||||
* `/api/text-to-intent`
|
||||
* POST text and have Rhasspy process it as command
|
||||
* `/api/text-to-speech`
|
||||
* POST text and have Rhasspy speak it
|
||||
|
||||
Additionally, you can call out to a custom external program for [wake word detection](wake-word.md#command), [voice command listening](command-listener.md#command), [speech recognition](speech-to-text.md#command), [intent recognition](intent-recognition.md#command), and event [intent handling](intent-handling.md#command)! This means that you can use Rhasspy as a general voice command toolkit, with or without Home Assistant.
|
||||
|
||||
## RGB Light Example
|
||||
|
||||
Let's say you have an RGB light of some kind in your bedroom that's [hooked up already to Home Assistant](https://www.home-assistant.io/components/light.mqtt). You'd like to be able to say things like "*set the bedroom light to red*" to change its color. To start, let's write a [Home Assistant automation](https://www.home-assistant.io/docs/automation/action/) to help you out:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
...
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
Now you just need the trigger! Rhasspy will send events that can be caught with the [event trigger platform](https://www.home-assistant.io/docs/automation/trigger/#event-trigger). A different event will be sent for each *intent* that you define, with slot values corresponding to important parts of the command (like light name and color). Let's start by defining an intent in Rhasspy called `ChangeLightColor` that can be said a few different ways:
|
||||
|
||||
[ChangeLightColor]
|
||||
colors = (red | green | blue) {color}
|
||||
set [the] (bedroom){name} [to] <colors>
|
||||
|
||||
This is a [simplified JSGF grammar](doc/sentences/md) that will generate the following sentences:
|
||||
|
||||
* set the bedroom to red
|
||||
* set the bedroom to green
|
||||
* set the bedroom to blue
|
||||
* set the bedroom red
|
||||
* set the bedroom green
|
||||
* set the bedroom blue
|
||||
* set bedroom to red
|
||||
* set bedroom to green
|
||||
* set bedroom to blue
|
||||
* set bedroom red
|
||||
* set bedroom green
|
||||
* set bedroom blue
|
||||
|
||||
Rhasspy uses these sentences to create an [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) for speech recognition, and also train an intent recognizer that can extract relevant parts of the command. The `{color}` tag in the `colors` rule will make Rhasspy put a `color` property in each event with the name of the recognized color (red, green, or blue). Likewise, the `{name}` tag on `bedroom` will add a `name` property to the event.
|
||||
|
||||
If trained on these sentences, Rhasspy will now recognize commands like "*set the bedroom light to red*" and send a `rhasspy_ChangeLightState` to Home Assistant with the following data:
|
||||
|
||||
{
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
|
||||
You can now fill in the rest of the Home Assistant automation:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
platform: event
|
||||
event_type: rhasspy_ChangeLightState
|
||||
event_data:
|
||||
name: bedroom
|
||||
color: red
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
This will handle the specific case of setting the bedroom light to red, but not any other color. You can either add additional automations to handle these, or make use of [automation templating](https://www.home-assistant.io/docs/automation/templating/) to do it all at once.
|
||||
|
||||
Intended Audience
|
||||
---------------------
|
||||
## Intended Audience
|
||||
|
||||
Rhasspy is intended for advanced users that want to have a voice interface to Home Assistant, but value **privacy** and **freedom** above all else. There are many other voice assistants, but none (to my knowledge) that:
|
||||
|
||||
|
||||
@@ -2,20 +2,22 @@
|
||||
|
||||
Rhasspy should run in a variety of software environments, including:
|
||||
|
||||
* Within a [Docker](https://www.docker.com/) container
|
||||
* As a [Hass.io add-on](https://www.home-assistant.io/addons/)
|
||||
* Inside a [Python virtual environment](https://docs.python-guide.org/dev/virtualenvs/)
|
||||
* Within a [Docker](#docker) container
|
||||
* As a [Hass.io add-on](#hassio)
|
||||
* Inside a [Python virtual environment](#virtual-environment)
|
||||
* Running as a [service](#running-as-a-service)
|
||||
* Build [from source](#build-from-source)
|
||||
|
||||
### Docker
|
||||
## Docker
|
||||
|
||||
The easiest way to try Rhasspy is with Docker. To get started, make sure you have [Docker installed](https://docs.docker.com/install/):
|
||||
|
||||
curl -sSL https://get.docker.com | sh
|
||||
|
||||
|
||||
and that your user is part of the `docker` group:
|
||||
|
||||
sudo usermod -a -G docker $USER
|
||||
|
||||
|
||||
**Be sure to reboot** after adding yourself to the `docker` group!
|
||||
|
||||
Next, start the [Rhasspy Docker image](https://hub.docker.com/r/synesthesiam/rhasspy-server) in the background:
|
||||
@@ -27,9 +29,9 @@ Next, start the [Rhasspy Docker image](https://hub.docker.com/r/synesthesiam/rha
|
||||
synesthesiam/rhasspy-server:latest \
|
||||
--user-profiles /profiles \
|
||||
--profile en
|
||||
|
||||
|
||||
This will start Rhasspy with the English profile (`en`) in the background (`-d`) on port 12101 (`-p`) and give Rhasspy access to your microphone (`--device`). Any changes you make to [your profile](profiles.md) will be saved to `~/.config/rhasspy`.
|
||||
|
||||
|
||||
Once it starts, Rhasspy's web interface should be accessible at [http://localhost:12101](http://localhost:12101). If something went wrong, trying running docker with `-it` instead of `-d` to see the output.
|
||||
|
||||
If you're using [docker compose](https://docs.docker.com/compose/), add the following to your `docker-compose.yml` file:
|
||||
@@ -44,10 +46,25 @@ If you're using [docker compose](https://docs.docker.com/compose/), add the foll
|
||||
devices:
|
||||
- "/dev/snd:/dev/snd"
|
||||
command: --user-profiles /profiles --profile en
|
||||
|
||||
### Updating Docker Image
|
||||
|
||||
### Hass.io
|
||||
To update your Rhasspy Docker image, just run:
|
||||
|
||||
The second easiest was to install Rhasspy is as a [Hass.io add-on](https://www.home-assistant.io/addons/). Following the [installation instructions for Hass.io](https://www.home-assistant.io/hassio/installation/) before proceeding.
|
||||
```bash
|
||||
docker pull synesthesiam/rhasspy-server:latest
|
||||
```
|
||||
on your Rhasspy server and restart the Docker container. This may require running something like:
|
||||
|
||||
```bash
|
||||
docker rm <container-name>
|
||||
```
|
||||
|
||||
before doing a `docker run...`
|
||||
|
||||
## Hass.io
|
||||
|
||||
The second easiest way to install Rhasspy is as a [Hass.io add-on](https://www.home-assistant.io/addons/). Follow the [installation instructions for Hass.io](https://www.home-assistant.io/hassio/installation/) before proceeding.
|
||||
|
||||
To install the add-on, add my [Hass.IO Add-On Repository](https://github.com/synesthesiam/hassio-addons) in the Add-On Store, refresh, then install the "Rhasspy Assistant" under “Synesthesiam Hass.IO Add-Ons” (all the way at the bottom of the Add-On Store screen).
|
||||
|
||||
@@ -61,35 +78,127 @@ Before starting the add-on, make sure to give it access to your microphone and s
|
||||
|
||||

|
||||
|
||||
### Updating Hass.IO Add-On
|
||||
|
||||
### Virtual Environment
|
||||
You should receive notifications when a new version of Rhasspy is available for Hass.IO. Follow the instructions from Hass.IO on how to update the add-on.
|
||||
|
||||
## Virtual Environment
|
||||
|
||||
Rhasspy can be installed into a Python virtual environment, though there are a number of requirements. This may be desirable, however, if you have trouble getting Rhasspy to access your microphone from within a Docker container. To start, clone the repo somewhere:
|
||||
|
||||
git clone https://github.com/synesthesiam/rhasspy.git
|
||||
|
||||
```bash
|
||||
git clone https://github.com/synesthesiam/rhasspy.git
|
||||
```
|
||||
|
||||
Then run the `download-dependencies.sh` and `create-venv.sh` scripts (assumes a Debian distribution):
|
||||
|
||||
cd rhasspy/
|
||||
./download-dependencies.sh
|
||||
./create-venv.sh
|
||||
|
||||
```bash
|
||||
cd rhasspy/
|
||||
./download-dependencies.sh
|
||||
./create-venv.sh
|
||||
```
|
||||
|
||||
Once the installation finishes (5-10 minutes on a Raspberry Pi 3), you can use the `run-venv.sh` script to start Rhasspy:
|
||||
|
||||
./run-venv.sh --profile en
|
||||
|
||||
```bash
|
||||
./run-venv.sh --profile en
|
||||
```
|
||||
|
||||
If all is well, the web interface will be available at [http://localhost:12101](http://localhost:12101)
|
||||
|
||||
### Software Requirements
|
||||
### Updating Virtual Environment
|
||||
|
||||
At its core, Rhasspy requires:
|
||||
To update your Rhasspy virtual environment to the latest version, run:
|
||||
|
||||
```bash
|
||||
git pull origin master
|
||||
```
|
||||
|
||||
in your `rhasspy` directory, and then update your Python dependencies:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
pip3 install -r requirements.txt
|
||||
```
|
||||
|
||||
You should also re-build the web interface:
|
||||
|
||||
1. Install [yarn](https://yarnpkg.com) on your system
|
||||
2. Run `yarn install && yarn build` in the `rhasspy` directory
|
||||
3. Restart any running instances of Rhasspy
|
||||
|
||||
### Running as a Service
|
||||
|
||||
Once installed, Rhasspy can be run as a [systemd service](https://systemd.io/). An [example unit file](https://github.com/synesthesiam/rhasspy/blob/master/etc/rhasspy.service) is available (thanks [UnderpantsGnome](https://github.com/UnderpantsGnome)):
|
||||
|
||||
```
|
||||
[Unit]
|
||||
Description=Rhasspy
|
||||
After=syslog.target network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/home/<USER>/path/to/rhasspy
|
||||
ExecStart=/bin/bash -lc './run-venv.sh --profile <LANGUAGE>'
|
||||
|
||||
RestartSec=1
|
||||
Restart=on-failure
|
||||
|
||||
StandardOutput=syslog
|
||||
StandardError=syslog
|
||||
|
||||
SyslogIdentifier=rhasspy
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
* Replace `/home/<USER>/path/to/rhasspy` with the full path to your Rhasspy installation (where `run-venv.sh` is).
|
||||
* Replace `<LANGUAGE>` with your profile language (e.g., `en`)
|
||||
|
||||
Create a file named `rhasspy.service` in the `/home/<USER>/.config/systemd/user` directory (you may need to create the directory itself). Once the file has been saved, run:
|
||||
|
||||
```bash
|
||||
systemctl --user daemon-reload
|
||||
```
|
||||
|
||||
Then, you can start Rhasspy with:
|
||||
|
||||
```bash
|
||||
systemctl --user start rhasspy
|
||||
```
|
||||
|
||||
If you'd like Rhasspy to start on boot, run:
|
||||
|
||||
```bash
|
||||
systemctl --user enable --now rhasspy
|
||||
```
|
||||
|
||||
## Build From Source
|
||||
|
||||
The `create-venv.sh` script uses [pre-compiled binaries](https://github.com/synesthesiam/rhasspy/releases/tag/v2.0) for Rhasspy's required tools:
|
||||
|
||||
* [OpenFST](https://www.openfst.org)
|
||||
* [Opengrm](http://www.opengrm.org/twiki/bin/view/GRM/NGramLibrary)
|
||||
* [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus)
|
||||
* [Kaldi](https://kaldi-asr.org)
|
||||
|
||||
The [build-from-source.sh](https://github.com/synesthesiam/rhasspy/blob/master/build-from-source.sh) attempts to build all of these tools from source. The binary artifacts (command-line tools, shared libraries) are installed into the `bin` and `lib` directories of a Python virtual environment. The `run-venv.sh` script automatically adds these directories to `PATH` and `LD_LIBRARY_PATH` before starting Rhasspy.
|
||||
|
||||
### Swap Size
|
||||
|
||||
On low memory devices like the Raspberry Pi, building the tools above can quickly consume the entire RAM. Before building, it's highly recommended that you increase the available swap space by several gigabytes:
|
||||
|
||||
1. Edit `/etc/dphys-swapfile`
|
||||
2. Change `CONF_SWAPSIZE` to something large, like 2048 (2GB)
|
||||
3. Reboot
|
||||
|
||||
### Kaldi
|
||||
|
||||
You can skip building Kaldi if you plan to just [use Pocketsphinx](speech-to-text.md#pocketsphinx) for speech recognition.
|
||||
|
||||
### Updating Source Install
|
||||
|
||||
Follow the same instructions as [updating a virtual environment](#updating-virtual-environment).
|
||||
|
||||
* Linux
|
||||
* Python 3.6
|
||||
* [Flask](https://pypi.org/project/Flask/) web server, including
|
||||
* [flask-swagger-ui](https://pypi.org/project/flask-swagger-ui/) for HTTP API documentation
|
||||
* [Flask-Cors](https://pypi.org/project/Flask-Cors/) for [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) stuff
|
||||
* [Flask-Sockets](https://pypi.org/project/Flask-Sockets/) for websocket support
|
||||
* [pydash](https://pypi.org/project/pydash/) utility library
|
||||
|
||||
To actually use any components, however, requires a lot of [extra software](about.md#supporting-tools).
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
# Intent Handling
|
||||
|
||||
After a voice command has been transcribed and your intent has been successfully recognized, Rhasspy is ready to send a JSON event to Home Assistant or Node-RED.
|
||||
After a voice command has been transcribed and your intent has been successfully recognized, Rhasspy is ready to send a JSON event to another system like Home Assistant or Node-RED.
|
||||
|
||||
* [Home Assistant](#home-assistant)
|
||||
* [Remote Server](#remote-server)
|
||||
* [Command](#command)
|
||||
|
||||
Regardless of which intent handling system you choose, Rhasspy emits JSON events [over a websocket connection](usage.md#websocket-events).
|
||||
|
||||
@@ -61,6 +65,25 @@ In order to do something with the `rhasspy_ChangeLightColor` event, create an au
|
||||
|
||||
See the documentation on [actions](https://www.home-assistant.io/docs/automation/action/) for the different things you can do with Home Assistant.
|
||||
|
||||
### Intents
|
||||
|
||||
More recent versions of Home Assistant can accept intents directly. Add the following to your `configuration.yaml` file:
|
||||
|
||||
```yaml
|
||||
intent:
|
||||
```
|
||||
|
||||
This will enable intents over the HTTP API. Next, write [intent scripts](https://www.home-assistant.io/integrations/intent_script) to handle each Rhasspy intent:
|
||||
|
||||
```yaml
|
||||
intent_script:
|
||||
ChangeLightColor:
|
||||
action:
|
||||
...
|
||||
```
|
||||
|
||||
The possible [actions](https://www.home-assistant.io/docs/automation/action/) are the same as in automations.
|
||||
|
||||
### MQTT
|
||||
|
||||
In addition to events, Rhasspy can also publish intents through MQTT ([Hermes protocol](https://docs.snips.ai/reference/dialogue#intent)).
|
||||
@@ -70,14 +93,22 @@ Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"mqtt": {
|
||||
"enabled": true,
|
||||
"host": "localhost",
|
||||
"username": "",
|
||||
"password": "",
|
||||
"port": 1883,
|
||||
"reconnect_sec": 5,
|
||||
"site_id": "default",
|
||||
"publish_intents": true
|
||||
"enabled": true,
|
||||
"host": "localhost",
|
||||
"username": "",
|
||||
"password": "",
|
||||
"port": 1883,
|
||||
"reconnect_sec": 5,
|
||||
"site_id": "default",
|
||||
"publish_intents": true,
|
||||
"tls": {
|
||||
"enabled": false,
|
||||
"ca_certs": "",
|
||||
"cert_reqs": "CERT_REQUIRED",
|
||||
"certfile": "",
|
||||
"ciphers": "",
|
||||
"keyfile": ""
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -112,10 +143,60 @@ Set `home_assistant.pem_file` to the full path to your <a href="http://docs.pyth
|
||||
|
||||
Use the environment variable `RHASSPY_PROFILE_DIR` to reference your current profile's directory. For example, `$RHASSPY_PROFILE_DIR/my.pem` will tell Rhasspy to use a file named `my.pem` in your profile directory when verifying your self-signed certificate.
|
||||
|
||||
## Remote Server
|
||||
|
||||
Rhasspy can POST the intent JSON to a remote URL.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"handle": {
|
||||
"system": "remote",
|
||||
"remote": {
|
||||
"url": "http://<address>:<port>/path/to/endpoint"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
When an intent is recognized, Rhasspy will POST to `handle.remote.url` with the intent JSON. You should **return JSON** back, optionally with additional information. If `handle.forward_to_hass` is `true`, Rhasspy will look for a `hass_event` property of the returned JSON with the following structure:
|
||||
|
||||
```json
|
||||
{
|
||||
// rest of input JSON
|
||||
// ...
|
||||
"hass_event": {
|
||||
"event_type": "...",
|
||||
"event_data": {
|
||||
"key": "value",
|
||||
// ...
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Rhasspy will create the Home Assistant event based on this information. If it is **not** present, the remaining intent information will be used to construct the event as normal (i.e., `intent` and `entities`). If `handle.forward_to_hass` is `false`, the output of your program is not used.
|
||||
|
||||
### Speech
|
||||
|
||||
If the returned JSON contains a "speech" key like this:
|
||||
|
||||
```json
|
||||
{
|
||||
...
|
||||
"speech": {
|
||||
"text": "Some text to speak."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
then Rhasspy will forward `speech.text` to the configured [text to speech](text-to-speech.md) system.
|
||||
|
||||
See `rhasspy.intent_handler.RemoteIntentHandler` for details.
|
||||
|
||||
## Command
|
||||
|
||||
Once an intent is successfully recognized, Rhasspy will send an event to Home Assistant with the details. You can call a custom program instead *or in addition* to this behavior.
|
||||
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
@@ -144,7 +225,7 @@ When an intent is recognized, Rhasspy will call your custom program with the int
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Rhasspy will create the Home Assistant event based on this information. If it is **not** present, the remaining intent information will be used to construct the event as normal (i.e., `intent` and `entities`). If `handle.forward_to_hass` is `false`, the output of your program is not used.
|
||||
|
||||
The following environment variables are available to your program:
|
||||
@@ -153,7 +234,22 @@ The following environment variables are available to your program:
|
||||
* `$RHASSPY_PROFILE` - name of the current profile (e.g., "en")
|
||||
* `$RHASSPY_PROFILE_DIR` - directory of the current profile (where `profile.json` is)
|
||||
|
||||
See [handle.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/handle.sh) for an example program.
|
||||
See [handle.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/handle.sh) or [handle.py](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/handle.py) for example programs.
|
||||
|
||||
### Speech
|
||||
|
||||
If the returned JSON contains a "speech" key like this:
|
||||
|
||||
```json
|
||||
{
|
||||
...
|
||||
"speech": {
|
||||
"text": "Some text to speak."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
then Rhasspy will forward `speech.text` to the configured [text to speech](text-to-speech.md) system.
|
||||
|
||||
See `rhasspy.intent_handler.CommandIntentHandler` for details.
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Intent Recognition
|
||||
|
||||
After your voice command has been transcribed by the [speech to text](speech-to-text.md) system, the next step is to recognize your intent.
|
||||
After your voice command has been transcribed by the [speech to text](speech-to-text.md) system, the next step is to recognize your intent.
|
||||
The end result is a JSON event with information about the intent.
|
||||
|
||||
The following table summarizes the trade-offs of using each intent recognizer:
|
||||
@@ -10,8 +10,8 @@ The following table summarizes the trade-offs of using each intent recognizer:
|
||||
| [fsticuffs](intent-recognition.md#fsticuffs) | 1M+ | very fast | very fast | ignores unknown words |
|
||||
| [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) | 12-100 | fast | fast | fuzzy string matching |
|
||||
| [adapt](intent-recognition.md#mycroft-adapt) | 100-1K | moderate | fast | ignores unknown words |
|
||||
| [flair](intent-recognition.md#flair) | 1K-100K | very slow | moderate | handles unseen words |
|
||||
| [rasaNLU](intent-recognition.md#rasanlu) | 1K-100K | very slow | moderate | handles unseen words |
|
||||
| [flair](intent-recognition.md#flair) | 1K-100K | very slow | moderate | handles unseen words |
|
||||
|
||||
## Fsticuffs
|
||||
|
||||
@@ -55,13 +55,13 @@ See `rhasspy.intent.FuzzyWuzzyRecognizer` for details.
|
||||
|
||||
## Mycroft Adapt
|
||||
|
||||
Recognizes intents using [Mycroft Adapt](https://github.com/MycroftAI/adapt). Works best when you have a medium number of sentences (hundreds to thousands) and need to be able to recognize sentences not seen during training (no new words, though).
|
||||
Recognizes intents using [Mycroft Adapt](https://github.com/MycroftAI/adapt). Works best when you have a medium number of sentences (hundreds to thousands) and need to be able to recognize sentences not seen during training (no new words, though). This recognizer does not support converters, i.e. numbers are not converted back to integers.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"intent": {
|
||||
"system": "adapt",
|
||||
"system": "adapt",
|
||||
"adapt": {
|
||||
"stop_words": "stop_words.txt"
|
||||
}
|
||||
@@ -80,7 +80,7 @@ Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"intent": {
|
||||
"system": "flair",
|
||||
"system": "flair",
|
||||
"flair": {
|
||||
"data_dir": "flair_data",
|
||||
"max_epochs": 25,
|
||||
@@ -155,6 +155,12 @@ Because Home Assistant will already handle your intent (probably using an [inten
|
||||
|
||||
See `rhasspy.intent.HomeAssistantConversationRecognizer` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Publishes intent recognitions/failures to `hermes/intent/<INTENT_NAME>` or `hermes/nlu/intentNotRecognized` ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
|
||||
This is enabled by default and controlled by the `mqtt.publish_intents` setting in your [profile](profiles.md).
|
||||
|
||||
## Command
|
||||
|
||||
Recognizes intents from text using a custom external program.
|
||||
@@ -190,7 +196,7 @@ When a voice command is successfully transcribed, your program will be called wi
|
||||
"text": "set the bedroom light to red"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
The following environment variables are available to your program:
|
||||
|
||||
* `$RHASSPY_BASE_DIR` - path to the directory where Rhasspy is running from
|
||||
|
||||
@@ -40,226 +40,8 @@ If you need to install Rhasspy onto a machine that is not connected to the inter
|
||||
2. `fr-g2p.tar.gz`
|
||||
3. `fr-small.lm.gz`
|
||||
|
||||
If your user profile directory is `$HOME/.config/rhasspy/profiles`, then you should download/copy all three artifacts to `$HOME/.config/rhasspy/profiles/fr/download` on the offline machine. Now, when Rhasspy loads the `fr` profile and you click "Download", it will extract the files in the `download` directory without going out to the internet.
|
||||
|
||||
If you want to know precisely which files Rhasspy is looking for for a given profile, visit the `profiles` directory in [the source code](https://github.com/synesthesiam/rhasspy/tree/master/profiles) and examine these scripts in that profile's directory:
|
||||
|
||||
* `download-profile.sh`
|
||||
* Downloads and extracts all required binary artifacts. Uses cache in `download` directory unless `--delete` option is given.
|
||||
* `check-profile.sh`
|
||||
* Verifies that required binary artifacts are present. Returns non-zero exit code if download is required.
|
||||
If your user profile directory is `$HOME/.config/rhasspy/profiles`, then you should download/copy all three artifacts to `$HOME/.config/rhasspy/profiles/fr/download` on the offline machine. Now, when Rhasspy loads the `fr` profile and you click "Download", it will extract the files in the `download` directory without going out to the internet.
|
||||
|
||||
## Available Settings
|
||||
|
||||
All available profile sections and settings are listed below:
|
||||
|
||||
* `rhasspy` - configuration for Rhasspy assistant
|
||||
* `preload_profile` - true if speech/intent recognizers should be loaded immediately for default profile (default: `true`)
|
||||
* `listen_on_start` - true if Rhasspy should listen for wake word at startup (default: `true`)
|
||||
* `load_timeout_sec` - number of seconds to wait for internal actors before proceeding with start up
|
||||
* `home_assistant` - how to communicate with Home Assistant/Hass.io
|
||||
* `url` - Base URL of Home Assistant server (no `/api`)
|
||||
* `access_token` - long-lived access token for Home Assistant (Hass.io token is used automatically)
|
||||
* `api_password` - Password, if you have that enabled (deprecated)
|
||||
* `pem_file` - Full path to your <a href="http://docs.python-requests.org/en/latest/user/advanced/#ssl-cert-verification">CA_BUNDLE file or a directory with certificates of trusted CAs</a>
|
||||
* `event_type_format` - Python format string used to create event type from intent type (`{0}`)
|
||||
* `speech_to_text` - transcribing [voice commands to text](speech-to-text.md)
|
||||
* `system` - name of speech to text system (`pocketsphinx`, `remote`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for [Pocketsphinx](speech-to-text.md#pocketsphinx)
|
||||
* `compatible` - true if profile can use pocketsphinx for speech recognition
|
||||
* `acoustic_model` - directory with CMU 16Khz acoustic model
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `language_model` - text file with trigram [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) built from example sentences
|
||||
* `open_transcription` - true if general language model should be used (custom voices commands ignored)
|
||||
* `base_language_model` - large general language model (read only)
|
||||
* `mllr_matrix` - MLLR matrix from [acoustic model tuning](https://cmusphinx.github.io/wiki/tutorialtuning/)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `kaldi` - configuration for [Kaldi](speech-to-text.md#kaldi)
|
||||
* `compatible` - true if profile can use Kaldi for speech recognition
|
||||
* `kaldi_dir` - absolute path to Kaldi root directory
|
||||
* `model_dir` - directory where Kaldi model is stored (relative to profile directory)
|
||||
* `graph` - directory where HCLG.fst is located (relative to `model_dir`)
|
||||
* `base_graph` - directory where large general HCLG.fst is located (relative to `model_dir`)
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `open_transcription` - true if general language model should be used (custom voices commands ignored)
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `remote` - configuration for [remote Rhasspy server](speech-to-text.md#remote-http-server)
|
||||
* `url` - URL to POST WAV data for transcription (e.g., `http://your-rhasspy-server:12101/api/speech-to-text`)
|
||||
* `command` - configuration for [external speech-to-text program](speech-to-text.md#command)
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `sentences_ini` - Ini file with example [sentences/JSGF templates](training.md#sentencesini) grouped by intent
|
||||
* `g2p_model` - finite-state transducer for phonetisaurus to guess word pronunciations
|
||||
* `g2p_casing` - casing to force for g2p model (`upper`, `lower`, or blank)
|
||||
* `dictionary_casing` - casing to force for dictionary words (`upper`, `lower`, or blank)
|
||||
* `grammars_dir` - directory to write generated JSGF grammars from sentences ini file
|
||||
* `fsts_dir` - directory to write generated finite state transducers from JSGF grammars
|
||||
* `intent` - transforming text commands to intents
|
||||
* `system` - intent recognition system (`fsticuffs`, `fuzzywuzzy`, `rasa`, `remote`, `adapt`, `command`, or `dummy`)
|
||||
* `fsticuffs` - configuration for [OpenFST-based](https://www.openfst.org) intent recognizer
|
||||
* `intent_fst` - path to generated finite state transducer with all intents combined
|
||||
* `ignore_unknown_words` - true if words not in the FST symbol table should be ignored
|
||||
* `fuzzy` - true if text is matching in a fuzzy manner, skipping words in `stop_words.txt`
|
||||
* `fuzzywuzzy` - configuration for simplistic [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) based intent recognizer
|
||||
* `examples_json` - JSON file with intents/example sentences
|
||||
* `min_confidence` - minimum confidence required for intent to be converted to a JSON event (0-1)
|
||||
* `remote` - configuration for remote Rhasspy server
|
||||
* `url` - URL to POST text to for intent recognition (e.g., `http://your-rhasspy-server:12101/api/text-to-intent`)
|
||||
* `rasa` - configuration for [Rasa NLU](https://rasa.com/) based intent recognizer
|
||||
* `url` - URL of remote Rasa NLU server (e.g., `http://localhost:5005/`)
|
||||
* `examples_markdown` - Markdown file to generate with intents/example sentences
|
||||
* `project_name` - name of project to generate during training
|
||||
* `adapt` - configuration for [Mycroft Adapt](https://github.com/MycroftAI/adapt) based intent recognizer
|
||||
* `stop_words` - text file with words to ignore in training sentences
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `text_to_speech` - pronouncing words
|
||||
* `system` - text to speech system (`espeak`, `flite`, `picotts`, `marytts`, `command`, or `dummy`)
|
||||
* `espeak` - configuration for [eSpeak](http://espeak.sourceforge.net)
|
||||
* `phoneme_map` - text file mapping CMU phonemes to eSpeak phonemes
|
||||
* `flite` - configuration for [flite](http://www.festvox.org/flite)
|
||||
* `voice` - name of voice to use (e.g., `kal16`, `rms`, `awb`)
|
||||
* `picotts` - configuration for [PicoTTS](https://en.wikipedia.org/wiki/SVOX)
|
||||
* `language` - language to use (default if not present)
|
||||
* `marytts` - configuration for [MaryTTS](http://mary.dfki.de)
|
||||
* `url` - address:port of MaryTTS server (port is usually 59125)
|
||||
* `voice` - name of voice to use (e.g., `cmu-slt`). Default if not present.
|
||||
* `locale` - name of locale to use (e.g., `en-US`). Default if not present.
|
||||
* `wavenet` - configuration for Google's [WaveNet](https://cloud.google.com/text-to-speech/docs/wavenet)
|
||||
* `cache_dir` - path to directory in your profile where WAV files are cached
|
||||
* `credentials_json` - path to the JSON credentials file (generated online)
|
||||
* `gender` - gender of speaker (`MALE` `FEMALE`)
|
||||
* `language_code` - language/locale e.g. `en-US`,
|
||||
* `sample_rate` - WAV sample rate (default: 22050)
|
||||
* `url` - URL of WaveNet endpoint
|
||||
* `voice` - voice to use (e.g., `Wavenet-C`)
|
||||
* `fallback_tts` - text to speech system to use when offline or error occurs (e.g., `espeak`)
|
||||
* `phoneme_examples` - text file with examples for each CMU phoneme
|
||||
* `training` - training speech/intent recognizers
|
||||
* `dictionary_number_duplicates` - true if duplicate words in dictionary should be suffixed by `(2)`, `(3)`, etc.
|
||||
* `tokenizer` - system used to break sentences into words (`regex` only for now)
|
||||
* `regex` - configuration for regex tokenizer
|
||||
* `replace` - list of dictionaries with patterns/replacements used on each example sentence
|
||||
* `split` - pattern used to break sentences into words
|
||||
* `unknown_words` - configuration for dealing with words not in base/custom dictionaries
|
||||
* `fail_when_present` - true if Rhasspy should halt training when unknown words are found
|
||||
* `guess_pronunciations` - true if [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) should be used to guess how an unknown word is pronounced
|
||||
* `speech_to_text` - training for speech decoder
|
||||
* `system` - speech to text training system (`auto`, `pocketsphinx`, `kaldi`, `command`, or `dummy`)
|
||||
* `command` - configuration for external speech-to-text training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `intent` - training for intent recognizer
|
||||
* `system` - intent recognizer training system (`auto`, `fsticuffs`, `fuzzywuzzy`, `rasa`, `adapt`, `command`, or `dummy`)
|
||||
* `command` - configuration for external intent recognizer training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `wake` - waking Rhasspy up for speech input
|
||||
* `system` - wake word recognition system (`pocketsphinx`, `snowboy`, `precise`, `porcupine`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for Pocketsphinx wake word recognizer
|
||||
* `keyphrase` - phrase to wake up on (3-4 syllables recommended)
|
||||
* `threshold` - sensitivity of detection (recommended range 1e-50 to 1e-5)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Pocketsphinx (default 960)
|
||||
* `snowboy` - configuration for [snowboy](https://snowboy.kitt.ai)
|
||||
* `model` - path to model file (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `audio_gain` - audio gain (default 1)
|
||||
* `chunk_size` - number of bytes per chunk to feed to snowboy (default 960)
|
||||
* `precise` - configuration for [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* `engine_path` - path to the precise-engine binary
|
||||
* `model` - path to model file (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `trigger_level` - number of events to trigger activation (default 3)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Precise (default 2048)
|
||||
* `porcupine` - configuration for [PicoVoice's Porcupine](https://github.com/Picovoice/Porcupine)
|
||||
* `library_path` - path to `libpv_porcupine.so` for your platform/architecture
|
||||
* `model_path` - path to the `porcupine_params.pv` (lib/common)
|
||||
* `keyword_path` - path to the `.ppn` keyword file
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `microphone` - configuration for audio recording
|
||||
* `system` - audio recording system (`pyaudio`, `arecord`, `hermes`, `http`, or `dummy`)
|
||||
* `pyaudio` - configuration for [PyAudio](https://people.csail.mit.edu/hubert/pyaudio/) microphone
|
||||
* `device` - index of device to use or empty for default device
|
||||
* `frames_per_buffer` - number of frames to read at a time (default 480)
|
||||
* `arecord` - configuration for ALSA microphone
|
||||
* `device` - name of ALSA device (see `arecord -L`) to use or empty for default device
|
||||
* `chunk_size` - number of bytes to read at a time (default 960)
|
||||
* `http` - configuration for HTTP audio stream
|
||||
* `host` - hostname or IP address of HTTP audio server (default 127.0.0.1)
|
||||
* `port` - port to receive audio stream on (default 12333)
|
||||
* `stop_after` - one of "never", "text", or "intent" ([see documentation](audio-input.md#http-stream))
|
||||
* `gstreamer` - configuration for GStreamer audio recorder
|
||||
* `pipeline` - GStreamer pipeline (e.g., `FILTER ! FILTER ! ...`) without sink
|
||||
* `hermes` - configuration for MQTT "microphone" ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* Subscribes to WAV data from `hermes/audioServer/<SITE_ID>/audioFrame`
|
||||
* Requires MQTT to be enabled
|
||||
* `sounds` - configuration for feedback sounds from Rhasspy
|
||||
* `system` - which sound output system to use (`aplay`, `hermes`, or `dummy`)
|
||||
* `wake` - path to WAV file to play when Rhasspy wakes up
|
||||
* `recorded` - path to WAV file to play when a command finishes recording
|
||||
* `aplay` - configuration for ALSA speakers
|
||||
* `device` - name of ALSA device (see `aplay -L`) to use or empty for default device
|
||||
* `hermes` - configuration for MQTT "speakers" ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* WAV data published to `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>`
|
||||
* Requires MQTT to be enabled
|
||||
* `command`
|
||||
* `system` - which voice command listener system to use (`webrtcvad`, `oneshot`, `hermes`, or `dummy`)
|
||||
* `webrtcvad` - configuration for [webrtcvad](https://github.com/wiseman/py-webrtcvad) system
|
||||
* `sample_rate` - sample rate of input audio
|
||||
* `chunk_size` - bytes per buffer (must be 10,20,30 ms)
|
||||
* `vad_mode` - sensitivity of `webrtcvad` (0-3)
|
||||
* `min_sec` - minimum number of seconds in a command
|
||||
* `silence_sec` - number of seconds of silences after voice command before stopping
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `throwaway_buffers` - number of buffers to drop when recording starts
|
||||
* `speech_buffers` - number of buffers with speech before command starts
|
||||
* `oneshot` - configuration for voice command system that takes first audio frame as entire command
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `command` - configuration for external voice command program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `hermes` - configuration for MQTT-based voice command system that listens betweens `startListening` and `stopListening` commands ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `handle`
|
||||
* `system` - which intent handling system to use (`hass`, `command`, or `dummy`)
|
||||
* `forward_to_hass` - true if intents are always forwarded to Home Assistant (even if `system` is `command`)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `mqtt` - configuration for MQTT ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* `enabled` - true if MQTT client should be started
|
||||
* `host` - MQTT host
|
||||
* `port` - MQTT port
|
||||
* `username` - MQTT username (blank for anonymous)
|
||||
* `password` - MQTT password
|
||||
* `reconnect_sec` - number of seconds before client will reconnect
|
||||
* `site_id` - ID of site ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* `publish_intents` - true if intents are published to MQTT
|
||||
* `tuning` - configuration for acoustic model tuning
|
||||
* `system` - system for tuning (currently only `sphinxtrain`)
|
||||
* `sphinxtrain` - configuration for [sphinxtrain](https://github.com/cmusphinx/sphinxtrain) based acoustic model tuning
|
||||
* `mllr_matrix` - name of generated MLLR matrix (should match `speech_to_text.pocketsphinx.mllr_matrix`)
|
||||
* `download` - configuration for profile file downloading
|
||||
* `cache_dir` - directory in your profile where downloaded files are cached
|
||||
* `conditions` - profile settings that will trigger file downloads
|
||||
* keys are profile setting paths (e.g., `wake.system`)
|
||||
* values are dictionaries whose keys are profile settings values (e.g., `snowboy`)
|
||||
* settings may have the form `<=N` or `!X` to mean "less than or equal to N" or "not X"
|
||||
* leaf nodes are dictionaries whose keys are destination file paths and whose values reference the `files` dictionary
|
||||
* `files` - locations, etc. of files to download
|
||||
* keys are names of files
|
||||
* values are dictionaries with:
|
||||
* `url` - URL of file to download
|
||||
* `cache` - `false` if file should be downloaded directly into profile (skipping cache)
|
||||
See [the reference](reference.md#profile-settings) for all available profile settings.
|
||||
|
||||
@@ -0,0 +1,602 @@
|
||||
# Reference
|
||||
|
||||
* [Supported Languages](#supported-languages)
|
||||
* [HTTP API](#http-api)
|
||||
* [Websocket API](#websocket-api)
|
||||
* [MQTT API](#mqtt-api)
|
||||
* [Command Line](#command-line)
|
||||
* [Profile Settings](#profile-settings)
|
||||
|
||||
## Supported Languages
|
||||
|
||||
The table below lists which components and compatible with Rhasspy's supported languages.
|
||||
|
||||
| Category | Name | Offline? | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt | sv | ca |
|
||||
| -------- | ------ | -------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| **Wake Word** | [pocketsphinx](wake-word.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | | | |
|
||||
| | [porcupine](wake-word.md#porcupine) | ✓ | ✓ | | | | | | | | | | | | | |
|
||||
| | [snowboy](wake-word.md#snowboy) | *requires account* | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| | [precise](wake-word.md#mycroft-precise) | ✓ | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| **Speech to Text** | [pocketsphinx](speech-to-text.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | ✓ |
|
||||
| | [kaldi](speech-to-text.md#kaldi) | ✓ | ✓ | ✓ | | ✓ | | ✓ | | | | | ✓ | | ✓ | |
|
||||
| **Intent Recognition** | [fsticuffs](intent-recognition.md#fsticuffs) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [adapt](intent-recognition.md#mycroft-adapt) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flair](intent-recognition.md#flair) | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | | | | | ✓ | | ✓ |
|
||||
| | [rasaNLU](intent-recognition.md#rasanlu) | *needs extra software* | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| **Text to Speech** | [espeak](text-to-speech.md#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flite](text-to-speech.md#flite) | ✓ | ✓ | | | | | | | | ✓ | | | | | |
|
||||
| | [picotts](text-to-speech.md#picotts) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | | | | | | | | |
|
||||
| | [marytts](text-to-speech.md#marytts) | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | | | |
|
||||
| | [wavenet](text-to-speech.md#google-wavenet) | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | ✓ | |
|
||||
|
||||
• - yes, but requires training/customization
|
||||
|
||||
## HTTP API
|
||||
|
||||
Rhasspy's HTTP endpoints are documented below. You can also visit `/api/` in your Rhasspy server (note the final slash) to try out each endpoint.
|
||||
|
||||
Application authors may want to use the [rhasspy-client](https://pypi.org/project/rhasspy-client/), which provides a high-level interface to a remote Rhasspy server.
|
||||
|
||||
### Endpoints
|
||||
|
||||
* `/api/custom-words`
|
||||
* GET custom word dictionary as plain text, or POST to overwrite it
|
||||
* See `custom_words.txt` in your profile directory
|
||||
* `/api/download-profile`
|
||||
* Force Rhasspy to re-download profile
|
||||
* `?delete=true` - clear download cache
|
||||
* `/api/listen-for-command`
|
||||
* POST to wake Rhasspy up and start listening for a voice command
|
||||
* Returns intent JSON when command is finished
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `?timeout=<seconds>` - override default command timeout
|
||||
* `?entity=<entity>&value=<value>` - set custom entity/value in recognized intent
|
||||
* `/api/listen-for-wake`
|
||||
* POST "on" to have Rhasspy listen for a wake word
|
||||
* POST "off" to disable wake word
|
||||
* `/api/lookup`
|
||||
* POST word as plain text to look up or guess pronunciation
|
||||
* `?n=<number>` - return at most `n` guessed pronunciations
|
||||
* `/api/microphones`
|
||||
* GET list of available microphones
|
||||
* `/api/phonemes`
|
||||
* GET example phonemes from speech recognizer for your profile
|
||||
* See `phoneme_examples.txt` in your profile directory
|
||||
* `/api/play-wav`
|
||||
* POST to play WAV data
|
||||
* `/api/profile`
|
||||
* GET the JSON for your profile, or POST to overwrite it
|
||||
* `?layers=profile` to only see settings different from `defaults.json`
|
||||
* See `profile.json` in your profile directory
|
||||
* `/api/restart`
|
||||
* Restart Rhasspy server
|
||||
* `/api/sentences`
|
||||
* GET voice command templates or POST to overwrite
|
||||
* Set `Accept: application/json` to GET JSON with all sentence files
|
||||
* Set `Content-Type: application/json` to POST JSON with sentences for multiple files
|
||||
* See `sentences.ini` and `intents` directory in your profile
|
||||
* `/api/slots`
|
||||
* GET slot values as JSON or POST to add to/overwrite them
|
||||
* `?overwrite_all=true` to clear slots in JSON before writing
|
||||
* `/api/speakers`
|
||||
* GET list of available audio output devices
|
||||
* `/api/speech-to-intent`
|
||||
* POST a WAV file and have Rhasspy process it as a voice command
|
||||
* Returns intent JSON when command is finished
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `/api/speech-to-text`
|
||||
* POST a WAV file and have Rhasspy return the text transcription
|
||||
* Set `Accept: application/json` to receive JSON with more details
|
||||
* `?noheader=true` - send raw 16-bit 16Khz mono audio without a WAV header
|
||||
* `/api/start-recording`
|
||||
* POST to have Rhasspy start recording a voice command
|
||||
* `/api/stop-recording`
|
||||
* POST to have Rhasspy stop recording and process recorded data as a voice command
|
||||
* Returns intent JSON when command has been processed
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `/api/test-microphones`
|
||||
* GET list of available microphones and if they're working
|
||||
* `/api/text-to-intent`
|
||||
* POST text and have Rhasspy process it as command
|
||||
* Returns intent JSON when command has been processed
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `/api/text-to-speech`
|
||||
* POST text and have Rhasspy speak it
|
||||
* `?play=false` - get WAV data instead of having Rhasspy speak
|
||||
* `?voice=<voice>` - override default TTS voice
|
||||
* `?language=<language>` - override default TTS language or locale
|
||||
* `?repeat=true` - have Rhasspy repeat the last sentence it spoke
|
||||
* `/api/train`
|
||||
* POST to re-train your profile
|
||||
* `?nocache=true` - re-train profile from scratch
|
||||
* `/api/unknown-words`
|
||||
* GET words that Rhasspy doesn't know in your sentences
|
||||
* See `unknown_words.txt` in your profile directory
|
||||
|
||||
## Websocket API
|
||||
|
||||
* `/api/events/intent`
|
||||
* Listen for recognized intents published as JSON
|
||||
* `/api/events/log`
|
||||
* Listen for log messages published as plain text
|
||||
|
||||
## MQTT API
|
||||
|
||||
Rhasspy implements part of the [Hermes](https://docs.snips.ai/reference/hermes) protocol. Various services of Rhasspy can be configured to pass along MQTT messages or to react to MQTT messages following the Hermes protocol.
|
||||
|
||||
* `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>`
|
||||
* Rhasspy publishes audio in WAV format to this topic. By default it is 16 kHz, 16-bit mono for compatibility reasons, but other types are possible too.
|
||||
* `SITE_ID` is set in Rhasspy's `mqtt` configuration.
|
||||
* `REQUEST_ID` is generated using `uuid.uuid4` each time a sound is played.
|
||||
* `hermes/audioServer/<SITE_ID>/audioFrame`
|
||||
* Rhasspy listens to this topic for WAV data. Audio is automatically converted to 16 kHz, 16-bit mono audio and played.
|
||||
* `SITE_ID` is set in Rhasspy's `mqtt` configuration.
|
||||
* `hermes/asr/startListening`
|
||||
* Rhasspy wakes up and starts recording on receiving this topic.
|
||||
* The payload is a JSON object with a `siteId` key that holds Rhasspy's site ID.
|
||||
* `hermes/asr/stopListening`
|
||||
* Rhasspy stops recording and processes the voice command on receiving this topic.
|
||||
* The payload is a JSON object with a `siteId` key that holds Rhasspy's site ID.
|
||||
* `hermes/intent/<INTENT_NAME>`
|
||||
* Rhasspy publishes a message to this topic on recognition of an intent.
|
||||
* The payload is a JSON object with the recognized intent, entities and text.
|
||||
* `hermes/nlu/intentNotRecognized`
|
||||
* Rhasspy publishes a message to this topic when it doesn't recognize an intent.
|
||||
* `hermes/asr/textCaptured`
|
||||
* Rhasspy publishes a transcription to this topic each time a voice command is recognized.
|
||||
* `hermes/hotword/<WAKEWORD_ID>/detected`
|
||||
* Rhasspy wakes up when a message is received on this topic.
|
||||
|
||||
## Command Line
|
||||
|
||||
Rhasspy provides a powerful [command-line interface](usage.md#command-line) called `rhasspy-cli`.
|
||||
|
||||
For `rhasspy-cli --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>`, `<COMMAND>` can be:
|
||||
|
||||
* `info`
|
||||
* Print profile JSON to standard out
|
||||
* Add `--defaults` to only print settings from `defaults.json`
|
||||
* `wav2text`
|
||||
* Convert WAV file(s) to text
|
||||
* `wav2intent`
|
||||
* Convert WAV file(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `text2intent`
|
||||
* Convert text command(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `train`
|
||||
* Re-train your profile
|
||||
* `mic2wav`
|
||||
* Listen for a voice command and output WAV data
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2text`
|
||||
* Listen for a voice command and convert it to text
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2intent`
|
||||
* Listen for a voice command output intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `word2phonemes`
|
||||
* Print the CMU phonemes for a word (possibly unknown)
|
||||
* Add `-n <COUNT>` to control the maximum number of guessed pronunciations
|
||||
* `word2wav`
|
||||
* Pronounce a word (possibly unknown) and output WAV data
|
||||
* `text2speech`
|
||||
* Speaks one or more sentences using Rhasspy's text to speech system
|
||||
* `text2wav`
|
||||
* Converts a single sentence to WAV using Rhasspy's text to speech system
|
||||
* `sleep`
|
||||
* Run Rhasspy and wait until wake word is spoken
|
||||
* `download`
|
||||
* Download necessary profile files from the internet
|
||||
|
||||
### Profile Operations
|
||||
|
||||
Print the complete JSON for the English profile with:
|
||||
|
||||
rhasspy-cli --profile en info
|
||||
|
||||
You can combine this with other commands, such as `jq` to get at specific pieces:
|
||||
|
||||
rhasspy-cli info --profile en | jq .wake.pocketsphinx.keyphrase
|
||||
|
||||
Output (JSON):
|
||||
|
||||
"okay rhasspy"
|
||||
|
||||
### Training
|
||||
|
||||
Retrain your the English profile with:
|
||||
|
||||
rhasspy-cli --profile en train
|
||||
|
||||
Add `--debug` before `train` for more information.
|
||||
|
||||
### Speech to Text/Intent
|
||||
|
||||
Convert a WAV file to text from stdin:
|
||||
|
||||
rhasspy-cli --profile en wav2text < what-time-is-it.wav
|
||||
|
||||
Output (text):
|
||||
|
||||
what time is it
|
||||
|
||||
Convert multiple WAV files:
|
||||
|
||||
rhasspy-cli --profile en wav2text what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON)
|
||||
|
||||
```json
|
||||
{
|
||||
"what-time-is-it.wav": "what time is it",
|
||||
"turn-on-the-living-room-lamp.wav": "turn on the living room lamp"
|
||||
}
|
||||
```
|
||||
|
||||
Convert multiple WAV file(s) to intents **and** handle them:
|
||||
|
||||
rhasspy-cli --profile en wav2intent --handle what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"what_time_is_it.wav": {
|
||||
"text": "what time is it",
|
||||
"intent": {
|
||||
"name": "GetTime",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": []
|
||||
},
|
||||
"turn_on_living_room_lamp.wav": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Text to Intent
|
||||
|
||||
Handle a command as if it was spoken:
|
||||
|
||||
rhasspy-cli --profile en text2intent --handle "turn off the living room lamp"
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn off the living room lamp": {
|
||||
"text": "turn off the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "off"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Record Your Voice
|
||||
|
||||
Save a voice command to a WAV:
|
||||
|
||||
rhasspy-cli --profile en mic2wav > my-voice-command.wav
|
||||
|
||||
You can listen to it with:
|
||||
|
||||
aplay my-voice-command.wav
|
||||
|
||||
### Test Your Wake Word
|
||||
|
||||
Start Rhasspy and wait for wake word:
|
||||
|
||||
rhasspy-cli --profile en sleep
|
||||
|
||||
Should exit and print the wake word when its spoken.
|
||||
|
||||
### Text to Speech
|
||||
|
||||
Have Rhasspy speak one or more sentences:
|
||||
|
||||
rhasspy-cli --profile en text2speech "We ride at dawn!"
|
||||
|
||||
Use a different text to speech system and voice:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'flite' \
|
||||
--set 'text_to_speech.flite.voice' 'slt' \
|
||||
text2speech "We ride at dawn!"
|
||||
|
||||
### Pronounce Words
|
||||
|
||||
Speak words Rhasspy doesn't know!
|
||||
|
||||
rhasspy-cli --profile en word2wav raxacoricofallapatorius | aplay
|
||||
|
||||
### Text to Speech to Text to Intent
|
||||
|
||||
Use the miracle of Unix pipes to have Rhasspy interpret voice commands from itself:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'picotts' \
|
||||
text2wav "turn on the living room lamp" | \
|
||||
rhasspy-cli --profile en wav2text | \
|
||||
rhasspy-cli --profile en text2intent
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn on the living room lamp": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on",
|
||||
"name": "living room lamp"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Profile Settings
|
||||
|
||||
All available profile sections and settings are listed below:
|
||||
|
||||
* `rhasspy` - configuration for Rhasspy assistant
|
||||
* `preload_profile` - true if speech/intent recognizers should be loaded immediately for default profile (default: `true`)
|
||||
* `listen_on_start` - true if Rhasspy should listen for wake word at startup (default: `true`)
|
||||
* `load_timeout_sec` - number of seconds to wait for internal actors before proceeding with start up
|
||||
* `home_assistant` - how to communicate with Home Assistant/Hass.io
|
||||
* `url` - Base URL of Home Assistant server (no `/api`)
|
||||
* `access_token` - long-lived access token for Home Assistant (Hass.io token is used automatically)
|
||||
* `api_password` - Password, if you have that enabled (deprecated)
|
||||
* `pem_file` - Full path to your <a href="http://docs.python-requests.org/en/latest/user/advanced/#ssl-cert-verification">CA_BUNDLE file or a directory with certificates of trusted CAs</a>
|
||||
* `event_type_format` - Python format string used to create event type from intent type (`{0}`)
|
||||
* `speech_to_text` - transcribing [voice commands to text](speech-to-text.md)
|
||||
* `system` - name of speech to text system (`pocketsphinx`, `kaldi`, `remote`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for [Pocketsphinx](speech-to-text.md#pocketsphinx)
|
||||
* `compatible` - true if profile can use pocketsphinx for speech recognition
|
||||
* `acoustic_model` - directory with CMU 16 kHz acoustic model
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `language_model` - text file with trigram [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) built from example sentences
|
||||
* `open_transcription` - true if general language model should be used (custom voices commands ignored)
|
||||
* `base_language_model` - large general language model (read only)
|
||||
* `mllr_matrix` - MLLR matrix from [acoustic model tuning](https://cmusphinx.github.io/wiki/tutorialtuning/)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `kaldi` - configuration for [Kaldi](speech-to-text.md#kaldi)
|
||||
* `compatible` - true if profile can use Kaldi for speech recognition
|
||||
* `kaldi_dir` - absolute path to Kaldi root directory
|
||||
* `model_dir` - directory where Kaldi model is stored (relative to profile directory)
|
||||
* `graph` - directory where HCLG.fst is located (relative to `model_dir`)
|
||||
* `base_graph` - directory where large general HCLG.fst is located (relative to `model_dir`)
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `open_transcription` - true if general language model should be used (custom voices commands ignored)
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `remote` - configuration for [remote Rhasspy server](speech-to-text.md#remote-http-server)
|
||||
* `url` - URL to POST WAV data for transcription (e.g., `http://your-rhasspy-server:12101/api/speech-to-text`)
|
||||
* `command` - configuration for [external speech-to-text program](speech-to-text.md#command)
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `sentences_ini` - Ini file with example [sentences/JSGF templates](training.md#sentencesini) grouped by intent
|
||||
* `sentences_dir` - Directory with additional sentence templates (default: `intents`)
|
||||
* `g2p_model` - finite-state transducer for phonetisaurus to guess word pronunciations
|
||||
* `g2p_casing` - casing to force for g2p model (`upper`, `lower`, or blank)
|
||||
* `dictionary_casing` - casing to force for dictionary words (`upper`, `lower`, or blank)
|
||||
* `slots_dir` - directory to look for [slots lists](training.md#slots-lists) (default: `slots`)
|
||||
* `slot_programs` - directory to look for [slot programs](training.md#slot-programs) (default `slot_programs`)
|
||||
* `fsts_dir` - directory to write generated finite state transducers from JSGF grammars
|
||||
* `intent` - transforming text commands to intents
|
||||
* `system` - intent recognition system (`fsticuffs`, `fuzzywuzzy`, `rasa`, `remote`, `adapt`, `command`, or `dummy`)
|
||||
* `fsticuffs` - configuration for [OpenFST-based](https://www.openfst.org) intent recognizer
|
||||
* `intent_fst` - path to generated finite state transducer with all intents combined
|
||||
* `converters_dir` - directory to look for [converter](training.md#converters) programs (default: `converters`)
|
||||
* `ignore_unknown_words` - true if words not in the FST symbol table should be ignored
|
||||
* `fuzzy` - true if text is matching in a fuzzy manner, skipping words in `stop_words.txt`
|
||||
* `fuzzywuzzy` - configuration for simplistic [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) based intent recognizer
|
||||
* `examples_json` - JSON file with intents/example sentences
|
||||
* `min_confidence` - minimum confidence required for intent to be converted to a JSON event (0-1)
|
||||
* `remote` - configuration for remote Rhasspy server
|
||||
* `url` - URL to POST text to for intent recognition (e.g., `http://your-rhasspy-server:12101/api/text-to-intent`)
|
||||
* `rasa` - configuration for [Rasa NLU](https://rasa.com/) based intent recognizer
|
||||
* `url` - URL of remote Rasa NLU server (e.g., `http://localhost:5005/`)
|
||||
* `examples_markdown` - Markdown file to generate with intents/example sentences
|
||||
* `project_name` - name of project to generate during training
|
||||
* `adapt` - configuration for [Mycroft Adapt](https://github.com/MycroftAI/adapt) based intent recognizer
|
||||
* `stop_words` - text file with words to ignore in training sentences
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `replace_numbers` if true, automatically replace number ranges (`N..M`) or numbers (`N`) with words
|
||||
* `text_to_speech` - pronouncing words
|
||||
* `system` - text to speech system (`espeak`, `flite`, `picotts`, `marytts`, `command`, or `dummy`)
|
||||
* `espeak` - configuration for [eSpeak](http://espeak.sourceforge.net)
|
||||
* `phoneme_map` - text file mapping CMU phonemes to eSpeak phonemes
|
||||
* `flite` - configuration for [flite](http://www.festvox.org/flite)
|
||||
* `voice` - name of voice to use (e.g., `kal16`, `rms`, `awb`)
|
||||
* `picotts` - configuration for [PicoTTS](https://en.wikipedia.org/wiki/SVOX)
|
||||
* `language` - language to use (default if not present)
|
||||
* `marytts` - configuration for [MaryTTS](http://mary.dfki.de)
|
||||
* `url` - address:port of MaryTTS server (port is usually 59125)
|
||||
* `voice` - name of voice to use (e.g., `cmu-slt`). Default if not present.
|
||||
* `locale` - name of locale to use (e.g., `en-US`). Default if not present.
|
||||
* `wavenet` - configuration for Google's [WaveNet](https://cloud.google.com/text-to-speech/docs/wavenet)
|
||||
* `cache_dir` - path to directory in your profile where WAV files are cached
|
||||
* `credentials_json` - path to the JSON credentials file (generated online)
|
||||
* `gender` - gender of speaker (`MALE` `FEMALE`)
|
||||
* `language_code` - language/locale e.g. `en-US`,
|
||||
* `sample_rate` - WAV sample rate (default: 22050)
|
||||
* `url` - URL of WaveNet endpoint
|
||||
* `voice` - voice to use (e.g., `Wavenet-C`)
|
||||
* `fallback_tts` - text to speech system to use when offline or error occurs (e.g., `espeak`)
|
||||
* `phoneme_examples` - text file with examples for each CMU phoneme
|
||||
* `training` - training speech/intent recognizers
|
||||
* `dictionary_number_duplicates` - true if duplicate words in dictionary should be suffixed by `(2)`, `(3)`, etc.
|
||||
* `tokenizer` - system used to break sentences into words (`regex` only for now)
|
||||
* `regex` - configuration for regex tokenizer
|
||||
* `replace` - list of dictionaries with patterns/replacements used on each example sentence
|
||||
* `split` - pattern used to break sentences into words
|
||||
* `unknown_words` - configuration for dealing with words not in base/custom dictionaries
|
||||
* `fail_when_present` - true if Rhasspy should halt training when unknown words are found
|
||||
* `guess_pronunciations` - true if [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) should be used to guess how an unknown word is pronounced
|
||||
* `speech_to_text` - training for speech decoder
|
||||
* `system` - speech to text training system (`auto`, `pocketsphinx`, `kaldi`, `command`, or `dummy`)
|
||||
* `command` - configuration for external speech-to-text training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `intent` - training for intent recognizer
|
||||
* `system` - intent recognizer training system (`auto`, `fsticuffs`, `fuzzywuzzy`, `rasa`, `adapt`, `command`, or `dummy`)
|
||||
* `command` - configuration for external intent recognizer training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `wake` - waking Rhasspy up for speech input
|
||||
* `system` - wake word recognition system (`pocketsphinx`, `snowboy`, `precise`, `porcupine`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for Pocketsphinx wake word recognizer
|
||||
* `keyphrase` - phrase to wake up on (3-4 syllables recommended)
|
||||
* `threshold` - sensitivity of detection (recommended range 1e-50 to 1e-5)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Pocketsphinx (default 960)
|
||||
* `snowboy` - configuration for [snowboy](https://snowboy.kitt.ai)
|
||||
* `model` - path to model file(s), separated by commas (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `audio_gain` - audio gain (default 1)
|
||||
* `apply_frontend` - true if ApplyFrontend should be set
|
||||
* `chunk_size` - number of bytes per chunk to feed to snowboy (default 960)
|
||||
* `model_settings` - settings for each snowboy model path (e.g., `snowboy/snowboy.umdl`)
|
||||
* `<MODEL_PATH>`
|
||||
* `sensitivity` - model sensitivity
|
||||
* `audio_gain` - audio gain
|
||||
* `apply_frontend` - true if ApplyFrontend should be set
|
||||
* `precise` - configuration for [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* `engine_path` - path to the precise-engine binary
|
||||
* `model` - path to model file (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `trigger_level` - number of events to trigger activation (default 3)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Precise (default 2048)
|
||||
* `porcupine` - configuration for [PicoVoice's Porcupine](https://github.com/Picovoice/Porcupine)
|
||||
* `library_path` - path to `libpv_porcupine.so` for your platform/architecture
|
||||
* `model_path` - path to the `porcupine_params.pv` (lib/common)
|
||||
* `keyword_path` - path to the `.ppn` keyword file
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `microphone` - configuration for audio recording
|
||||
* `system` - audio recording system (`pyaudio`, `arecord`, `hermes`, `gstreamer`, `http`, or `dummy`)
|
||||
* `pyaudio` - configuration for [PyAudio](https://people.csail.mit.edu/hubert/pyaudio/) microphone
|
||||
* `device` - index of device to use or empty for default device
|
||||
* `frames_per_buffer` - number of frames to read at a time (default 480)
|
||||
* `arecord` - configuration for ALSA microphone
|
||||
* `device` - name of ALSA device (see `arecord -L`) to use or empty for default device
|
||||
* `chunk_size` - number of bytes to read at a time (default 960)
|
||||
* `http` - configuration for HTTP audio stream
|
||||
* `host` - hostname or IP address of HTTP audio server (default 127.0.0.1)
|
||||
* `port` - port to receive audio stream on (default 12333)
|
||||
* `stop_after` - one of "never", "text", or "intent" ([see documentation](audio-input.md#http-stream))
|
||||
* `gstreamer` - configuration for GStreamer audio recorder
|
||||
* `pipeline` - GStreamer pipeline (e.g., `FILTER ! FILTER ! ...`) without sink
|
||||
* `hermes` - configuration for MQTT "microphone" ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* Subscribes to WAV data from `hermes/audioServer/<SITE_ID>/audioFrame`
|
||||
* Requires MQTT to be enabled
|
||||
* `sounds` - configuration for feedback sounds from Rhasspy
|
||||
* `system` - which sound output system to use (`aplay`, `hermes`, or `dummy`)
|
||||
* `wake` - path to WAV file to play when Rhasspy wakes up
|
||||
* `recorded` - path to WAV file to play when a command finishes recording
|
||||
* `aplay` - configuration for ALSA speakers
|
||||
* `device` - name of ALSA device (see `aplay -L`) to use or empty for default device
|
||||
* `hermes` - configuration for MQTT "speakers" ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* WAV data published to `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>`
|
||||
* Requires MQTT to be enabled
|
||||
* `command`
|
||||
* `system` - which voice command listener system to use (`webrtcvad`, `oneshot`, `hermes`, or `dummy`)
|
||||
* `webrtcvad` - configuration for [webrtcvad](https://github.com/wiseman/py-webrtcvad) system
|
||||
* `sample_rate` - sample rate of input audio
|
||||
* `chunk_size` - bytes per buffer (must be 10,20,30 ms)
|
||||
* `vad_mode` - sensitivity of `webrtcvad` (0-3)
|
||||
* `min_sec` - minimum number of seconds in a command
|
||||
* `silence_sec` - number of seconds of silences after voice command before stopping
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `throwaway_buffers` - number of buffers to drop when recording starts
|
||||
* `speech_buffers` - number of buffers with speech before command starts
|
||||
* `oneshot` - configuration for voice command system that takes first audio frame as entire command
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `command` - configuration for external voice command program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `hermes` - configuration for MQTT-based voice command system that listens betweens `startListening` and `stopListening` commands ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `handle`
|
||||
* `system` - which intent handling system to use (`hass`, `command`, or `dummy`)
|
||||
* `forward_to_hass` - true if intents are always forwarded to Home Assistant (even if `system` is `command` or `remote`)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `remote` - configuration for remote HTTP intent handler
|
||||
* `url` - URL to POST intent JSON to and receive response JSON from
|
||||
* `mqtt` - configuration for MQTT ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* `enabled` - true if MQTT client should be started
|
||||
* `host` - MQTT host
|
||||
* `port` - MQTT port
|
||||
* `username` - MQTT username (blank for anonymous)
|
||||
* `password` - MQTT password
|
||||
* `reconnect_sec` - number of seconds before client will reconnect
|
||||
* `site_id` - ID of site ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* `publish_intents` - true if intents are published to MQTT
|
||||
* `download` - configuration for profile file downloading
|
||||
* `cache_dir` - directory in your profile where downloaded files are cached
|
||||
* `conditions` - profile settings that will trigger file downloads
|
||||
* keys are profile setting paths (e.g., `wake.system`)
|
||||
* values are dictionaries whose keys are profile settings values (e.g., `snowboy`)
|
||||
* settings may have the form `<=N` or `!X` to mean "less than or equal to N" or "not X"
|
||||
* leaf nodes are dictionaries whose keys are destination file paths and whose values reference the `files` dictionary
|
||||
* `files` - locations, etc. of files to download
|
||||
* keys are names of files
|
||||
* values are dictionaries with:
|
||||
* `url` - URL of file to download
|
||||
* `cache` - `false` if file should be downloaded directly into profile (skipping cache)
|
||||
@@ -7,7 +7,8 @@ The following table summarizes language support for the various speech to text s
|
||||
| System | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt | ca |
|
||||
| ------ | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| [pocketsphinx](speech-to-text.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ |
|
||||
| [kaldi](speech-to-text.md#kaldi) | ✓ | ✓ | | | | ✓ | | | | | ✓ | | |
|
||||
| [kaldi](speech-to-text.md#kaldi) | ✓ | ✓ | | ✓ | | ✓ | | | | | ✓ | | |
|
||||
| [google](speech-to-text.md#google-cloud) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
|
||||
## Pocketsphinx
|
||||
|
||||
@@ -77,6 +78,29 @@ Rhasspy expects a Kaldi-compatible profile to contain a `model` directory with a
|
||||
|
||||
If you just want to use Rhasspy for general speech to text, you can set `speech_to_text.kaldi.open_transcription` to `true` in your profile. This will use the included general language model (much slower) and ignore any custom voice commands you've specified.
|
||||
|
||||
## Google Cloud
|
||||
|
||||
Does speech recognition using [Google Cloud Speech-to-Text](https://cloud.google.com/speech-to-text) service.
|
||||
You will need an active Google Cloud subscription and a JSON private key connected to a service account enabled to use
|
||||
the speech-to-text API. The locale configured in your profile will be used for speech recognition.
|
||||
|
||||
```json
|
||||
{
|
||||
"locale": "en_US",
|
||||
"speech_to_text": {
|
||||
"system": "google",
|
||||
"google": {
|
||||
"credentials": "api-project-xxxxxxxx-abcdef.json",
|
||||
"min_confidence": 0.7
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Please note that this module sends the recorded audio after it's completed, so no streaming support.
|
||||
|
||||
See `rhasspy.stt.GoogleCloudDecoder` for details.
|
||||
|
||||
## Remote HTTP Server
|
||||
|
||||
Uses a remote HTTP server to transform speech (WAV) to text.
|
||||
@@ -98,6 +122,39 @@ During speech recognition, 16-bit 16 kHz mono WAV data will be POST-ed to the en
|
||||
|
||||
See `rhasspy.stt.RemoteDecoder` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Publishes transcriptions to `hermes/asr/textCaptured` ([Hermes protocol](https://docs.snips.ai/reference/hermes)) each time a voice command is spoken.
|
||||
|
||||
This is enabled by default.
|
||||
|
||||
## Home Assistant STT Platform
|
||||
|
||||
Use an [STT platform](https://www.home-assistant.io/integrations/stt) on your Home Assistant server.
|
||||
This is the same way [Ada](https://github.com/home-assistant/ada) sends speech to Home Assistant.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"speech_to_text": {
|
||||
"system": "hass_stt",
|
||||
"hass_stt": {
|
||||
"platform": "...",
|
||||
"sample_rate": 16000,
|
||||
"bit_size": 16,
|
||||
"channels": 1,
|
||||
"language": "en-US"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The settings from your profile's `home_assistant` section are automatically used (URL, access token, etc.).
|
||||
|
||||
Rhasspy will convert audio to the configured format before streaming it to Home Assistant.
|
||||
In the future, this will be auto-detected from the STT platform API.
|
||||
|
||||
See `rhasspy.stt.HomeAssistantSTTIntegration` for details.
|
||||
|
||||
## Command
|
||||
|
||||
Calls a custom external program to do speech recognition.
|
||||
|
||||
@@ -29,6 +29,19 @@ Add to your [profile](profiles.md):
|
||||
|
||||
Remove the `voice` option to have `espeak` use your profile's language automatically.
|
||||
|
||||
You may also pass additional arguments to the `espeak` command. For example,
|
||||
|
||||
```json
|
||||
"text_to_speech": {
|
||||
"system": "espeak",
|
||||
"espeak": {
|
||||
"arguments": ["-s", "80"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
will speak the sentence more slowly.
|
||||
|
||||
See `rhasspy.tts.EspeakSentenceSpeaker` for more details.
|
||||
|
||||
## Flite
|
||||
@@ -52,7 +65,9 @@ See `rhasspy.tts.FliteSentenceSpeaker` for details.
|
||||
|
||||
## PicoTTS
|
||||
|
||||
Uses SVOX's [picotts](https://en.wikipedia.org/wiki/SVOX) for text to speech. Sounds a bit better (to me) than `flite` or `espeak`, but only has a single English voice.
|
||||
Uses SVOX's [picotts](https://en.wikipedia.org/wiki/SVOX) for text to speech. Sounds a bit better (to me) than `flite` or `espeak`.
|
||||
|
||||
Included languages are `en-US`, `en-GB`, `de-DE`, `es-ES`, `fr-FR` and `it-IT`.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -89,8 +104,24 @@ To run the Docker image, simply execute:
|
||||
```bash
|
||||
docker run -it -p 59125:59125 synesthesiam/marytts:5.2
|
||||
```
|
||||
|
||||
and visit [http://localhost:59125](http://localhost:59125) after it starts. For more English voices, run the following commands in a Bash shell:
|
||||
|
||||
and visit [http://localhost:59125](http://localhost:59125) after it starts.
|
||||
|
||||
If you're using [docker compose](https://docs.docker.com/compose/), add the following to your docker-compose.yml file:
|
||||
|
||||
marytts:
|
||||
image: synesthesiam/marytts:5.2
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "59125:59125"
|
||||
|
||||
When using docker-compose, set `marytts.url` in your profile to be `http://marytts:59125`. This will allow rhasspy, from within
|
||||
its docker container, to resolve and connect to marytts (its sibling container).
|
||||
|
||||
|
||||
### Adding Voices
|
||||
|
||||
For more English voices, run the following commands in a Bash shell:
|
||||
|
||||
```bash
|
||||
mkdir -p marytts-5.2/download
|
||||
@@ -111,6 +142,37 @@ Change the first line to select the voice you'd like to add. It's not recommende
|
||||
|
||||
See `rhasspy.tts.MaryTTSSentenceSpeaker` for details.
|
||||
|
||||
### Audio Effects
|
||||
|
||||
MaryTTS is capable of applying several audio effects when producing speech. See the web interface at [http://localhost:59125](http://localhost:59125)
|
||||
to experiment with this.
|
||||
|
||||
|
||||
To use these effects within Rhasspy, set `text_to_speech.marytts.effects` within your profile, for example:
|
||||
|
||||
```json
|
||||
"text_to_speech": {
|
||||
"system": "marytts",
|
||||
"marytts": {
|
||||
"url": "http://localhost:59125",
|
||||
"effects": {
|
||||
"effect_Volume_selected": "on",
|
||||
"effect_Volume_parameters": "amount=0.9;",
|
||||
"effect_TractScaler_selected": "on",
|
||||
"effect_TractScaler_parameters": "amount:1.2;",
|
||||
"effect_F0Add_selected": "on",
|
||||
"effect_F0Add_parameters": "f0Add:-50.0;",
|
||||
"effect_Robot_selected": "on",
|
||||
"effect_Robot_parameters": "amount=50.0;"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can determine the names of the parameters by examining the web interface [http://localhost:59125](http://localhost:59125)
|
||||
using your browser's Developer Tools.
|
||||
|
||||
|
||||
## Google WaveNet
|
||||
|
||||
Uses Google's [WaveNet](https://cloud.google.com/text-to-speech/docs/wavenet) text to speech system. This **requires a Google account and an internet connection to function**. Rhasspy will cache WAV files for previously spoken sentences, but you will be sending Google information for every new sentence that Rhasspy speaks.
|
||||
@@ -143,6 +205,25 @@ Contributed by [Romkabouter](https://github.com/Romkabouter).
|
||||
|
||||
See `rhasspy.tts.GoogleWaveNetSentenceSpeaker` for details.
|
||||
|
||||
## Home Assistant TTS Platform
|
||||
|
||||
Use a [TTS platform](https://www.home-assistant.io/integrations/tts) on your Home Assistant server.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"text_to_speech": {
|
||||
"system": "hass_tts",
|
||||
"hass_tts": {
|
||||
"platform": "..."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The settings from your profile's `home_assistant` section are automatically used (URL, access token, etc.).
|
||||
|
||||
See `rhasspy.tts.HomeAssistantSentenceSpeaker` for details.
|
||||
|
||||
## Command
|
||||
|
||||
You can extend Rhasspy easily with your own external text to speech system. When a sentence needs to be spoken, Rhasspy will call your custom program with the text given on standard in. Your program should return the corresponding WAV data on standard out.
|
||||
|
||||
@@ -1,48 +1,319 @@
|
||||
# Training
|
||||
|
||||
Rhasspy is designed to recognize voice commands that [you provide](#sentencesini). These commands are categorized by **intent**, and may contain variable **slots** or **entities**, such as the color and name of a light.
|
||||
Rhasspy is designed to recognize voice commands [in a template language](#sentencesini). These commands are categorized by **intent**, and may contain [slots](#slots-lists) or [named entities](#tags), such as the color and name of a light.
|
||||
|
||||
During the training process, Rhasspy simultaneously trains *both* a speech and intent recognizer. The speech recognizer converts voice commands to text, and the intent recognizer converts text to JSON events. Combined, they enable a low power, offline system like a Raspberry Pi to understand and respond to your voice commands.
|
||||
|
||||
## How It Works
|
||||
|
||||
Recognizing voice commands typically involves two main steps:
|
||||
|
||||
1. Speech to text (transcription)
|
||||
2. Text to intent (recognition)
|
||||
|
||||
For step (1), Rhasspy uses [pocketsphinx](https://github.com/cmusphinx/pocketsphinx) or [Kaldi](https://kaldi-asr.org), and generates a custom [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) during the training process. Specifically, the steps are:
|
||||
|
||||
1. Convert the grammar from your [sentences.ini](#sentencesini) file to a [finite state transducer](https://www.openfst.org)
|
||||
2. (Optionally) generate all possible sentences that can be spoken with entities tagged (e.g., `name` is `bedroom light`, `color` is `red`)
|
||||
3. Use the [opengrm](http://www.opengrm.org/twiki/bin/view/GRM/NGramLibrary) toolkit to create a custom language model
|
||||
4. Train an intent recognizer with the tagged sentences
|
||||
|
||||
Additionally, a custom [CMU phonetic dictionary](https://cmusphinx.github.io/wiki/tutorialdict/) is generated with *only* the words in your voice commands (and wake word, if you're using a [pocketsphinx keyphrase](wake-word.md#pocketsphinx)). If the pronunciation of a word is not known, Rhasspy calls out to [phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) to get a guess, and then halts training. Once you've confirmed the pronunciations by adding them to your [custom words](#custom-words), training can continue.
|
||||
|
||||
For step (4), Rhasspy can use a [variety of intent recognition systems](intent-recognition.md). However, most are all trained from the **tagged sentences** generated from [sentences.ini](#sentencesini), e.g., `turn [on](state) the [living room lamp](name)`. These sentences are transformed into JSON, like:
|
||||
|
||||
{
|
||||
"ChangeLightState": [
|
||||
{
|
||||
"text": "turn on the living room lamp",
|
||||
"entities": [
|
||||
{ "entity": "state", "value": "on" },
|
||||
{ "entity": "name", "value": "living room lamp" }
|
||||
]
|
||||
},
|
||||
...
|
||||
],
|
||||
...
|
||||
}
|
||||
|
||||
and provided as training material to the intent recognition system. The [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) system, for example, simply saves the JSON file and, during recognition, finds the closest matching sentence according to the [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance). The [default intent recognizer](intent-recognition.md#fsticuffs) interacts directly with the finite state transducer(s) generated in step (1) and, while less tolerant of errors than `fuzzywuzzy`, is significantly faster for large sets of voice commands (i.e., millions).
|
||||
|
||||
More sophisticated systems like [Rasa NLU](intent-recognition.md#rasanlu) use machine learning techniques to classify sentences by intent and assign slot (entity) values. These systems are much better at recognizing sentences not seen during training, but can take minutes to hours to train.
|
||||
* Intent Recognition
|
||||
* [Basic Syntax](#basic-syntax)
|
||||
* [Named Entities](#tags)
|
||||
* [Number Ranges](#number-ranges)
|
||||
* [Slots](#slots-lists)
|
||||
* [Slot Synonyms](#slot-synonyms)
|
||||
* [Slot Programs](#slot-programs)
|
||||
* [Converters](#converters)
|
||||
* Speech Recognition
|
||||
* [Custom Words](#custom-words)
|
||||
* [Language Model Mixing](#language-model-mixing)
|
||||
|
||||
## sentences.ini
|
||||
|
||||
Voice commands are recognized by Rhasspy from a set of sentences that you define in your [profile](profiles.md). These are stored in an [ini file](https://docs.python.org/3/library/configparser.html) whose "values" are simplified [JSGF grammars](https://www.w3.org/TR/jsgf/). The set of all sentences *generated* from these grammars is used to train an [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) and an intent recognizer.
|
||||
Voice commands stored in an [ini file](https://docs.python.org/3/library/configparser.html) whose "sections" are intents and "values" are sentence templates.
|
||||
|
||||
### Basic Syntax
|
||||
|
||||
To get started, simply list your intents (surround by brackets) and the possible ways of invoking them below:
|
||||
|
||||
```
|
||||
[TestIntent1]
|
||||
this is a sentence
|
||||
this is another sentence for the same intent
|
||||
|
||||
[TestIntent2]
|
||||
this is a sentence for a different intent
|
||||
```
|
||||
|
||||
If you say "this is a sentence" after hitting the `Train` button, it will generate a `TestIntent1`.
|
||||
|
||||
### Groups
|
||||
|
||||
You can group multiple words together using `(parentheses)` like:
|
||||
|
||||
```
|
||||
turn on the (living room lamp)
|
||||
```
|
||||
|
||||
Groups (sometimes called sequences) can be [tagged](#tags) and [substituted](#substitutions) like single words. They may also contain [alternatives](#alternatives).
|
||||
|
||||
### Optional Words
|
||||
|
||||
Within a sentence template, you can specify optional word(s) by surrounding them `[with brackets]`. For example:
|
||||
|
||||
```
|
||||
[an] example sentence [with] some optional words
|
||||
```
|
||||
|
||||
will match:
|
||||
|
||||
* `an example sentence with some optional words`
|
||||
* `example sentence with some optional words`
|
||||
* `an example sentence some optional words`
|
||||
* `example sentence some optional words`
|
||||
|
||||
### Alternatives
|
||||
|
||||
A set of items where only one is matched at a time is `(specified | like | this)`. For N items, there will be N matched sentences (unless you nest optional words, etc.). The template:
|
||||
|
||||
```
|
||||
set the light to (red | green | blue)
|
||||
```
|
||||
|
||||
will match:
|
||||
|
||||
* `set the light to red`
|
||||
* `set the light to green`
|
||||
* `set the light to blue`
|
||||
|
||||
### Tags
|
||||
|
||||
Named entities are marked in your sentence templates with `{tags}`. The name of the `{entity}` is between the curly braces, while the `(value of the){entity}` comes immediately before:
|
||||
|
||||
```
|
||||
[SetLightColor]
|
||||
set the light to (red | green | blue){color}
|
||||
```
|
||||
|
||||
With the `{color}` tag attached to `(red | green | blue)`, Rhasspy will match:
|
||||
|
||||
* `set the light to [red](color)`
|
||||
* `set the light to [green](color)`
|
||||
* `set the light to [blue](color)`
|
||||
|
||||
When the `SetLightColor` intent is recognized, the JSON event will contain a `color` property whose value is either "red", "green" or "blue".
|
||||
|
||||
#### Tag Synonyms
|
||||
|
||||
Tag/named entity values can be (substituted](#substitutions) using the colon (`:`) inside the `{curly:braces}` like:
|
||||
|
||||
```
|
||||
turn on the (living room lamp){name:light_1}
|
||||
```
|
||||
|
||||
Now the `name` property of the intent JSON event will contain "light_1" instead of "living room lamp".
|
||||
|
||||
### Substitutions
|
||||
|
||||
The colon (`:`) is used to put something different than what's spoken into the recognized intent JSON. The left-hand side of the `:` is what Rhasspy expects to hear, while the right-hand side is what gets put into the intent:
|
||||
|
||||
```
|
||||
turn on the (living room lamp):light_1
|
||||
```
|
||||
|
||||
In this example, the spoken phrase "living room lamp" will be replaced by "light_1" in the recognized intent. Substitutions work for single words, [groups](#groups), [alternatives](#alternatives), and [tags](#tags):
|
||||
|
||||
```
|
||||
turn on the living room lamp:light
|
||||
(turn | switch):switch on the living room lamp
|
||||
turn (on){action:activate} the living room lamp
|
||||
```
|
||||
|
||||
See [tag synonyms](#tag-synonyms) for more details on tag substitution.
|
||||
|
||||
You can leave the left-hand or right-hand side (or both!) of the `:` empty:
|
||||
|
||||
```
|
||||
these: words: will: be: dropped:
|
||||
:these :will :be :added
|
||||
```
|
||||
|
||||
When the right-hand side is empty (`dropped:`), the spoken word will not appear in the intent. An empty left-hand side (`:added`) means the word is *not* spoken, but will appear in the intent.
|
||||
|
||||
Leaving **both** sides empty does nothing unless you attach a [tag](#tags) it. This allows you to embed a named entity in a voice command without matching specific words:
|
||||
|
||||
```
|
||||
turn on the living room lamp (:){domain:light}
|
||||
```
|
||||
|
||||
An intent from the example above will contain a `domain` entity whose value is `light`.
|
||||
|
||||
### Rules
|
||||
|
||||
Rules allow you to reuse parts of your sentence templates. They're defined by `rule_name = ...` alongside other sentences and referenced by `<rule_name>`. For example:
|
||||
|
||||
```
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
```
|
||||
|
||||
which is equivalent to:
|
||||
|
||||
```
|
||||
set the light to (red | green | blue)
|
||||
```
|
||||
|
||||
You can **share rules** across intents by referencing them as `<IntentName.rule_name>` like:
|
||||
|
||||
[SetLightColor]
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
|
||||
[GetLightColor]
|
||||
is the light <SetLightColor.colors>
|
||||
|
||||
The second intent (`GetLightColor`) references the `colors` rule from `SetLightColor`. Rule references without a dot must exist in the current intent.
|
||||
|
||||
### Number Ranges
|
||||
|
||||
Rhasspy supports using number literals (`75`) and number ranges (`1..10`) directly in your sentence templates. During training, the [num2words](https://pypi.org/project/num2words) package is used to generate words that the speech recognizer can handle ("seventy five"). For example:
|
||||
|
||||
```
|
||||
[SetBrightness]
|
||||
set brightness to (0..100){brightness}
|
||||
```
|
||||
|
||||
The `brightness` property of the recognized `SetBrightness` intent will automatically be [converted](#converters) to an integer for you. You can optionally add a step to the integer range:
|
||||
|
||||
```
|
||||
evens = 0..100,2
|
||||
odds = 1..100,2
|
||||
```
|
||||
|
||||
Under the hood, number ranges are actually references to the `rhasspy/number` [slot program](#slot-programs). You can override this behavior by creating your `slot_programs/rhasspy/number` program or disable it entirely by setting `intent.replace_numbers` to `false` in [your profile](profiles.md).
|
||||
|
||||
### Slots Lists
|
||||
|
||||
Large [alternatives](#alternatives) can become unwieldy quickly. For example, say you have a list of movie names:
|
||||
|
||||
```
|
||||
movies = ("Primer" | "Moon" | "Chronicle" | "Timecrimes" | "Mulholland Drive" | ... )
|
||||
```
|
||||
|
||||
Rather than keep this list in `sentences.ini`, you may put each movie name on a separate line in a file named `slots/movies` (no file extension) and reference it as `$movies`. Rhasspy automatically loads all files in the `slots` directory of your [profile](#profiles.md) and makes them available as slots lists.
|
||||
|
||||
For the example above, the file `slots/movies` should contain:
|
||||
|
||||
```
|
||||
Primer
|
||||
Moon
|
||||
Chronicle
|
||||
Timecrimes
|
||||
Mullholand Drive
|
||||
```
|
||||
|
||||
Now you can simply use the placeholder `$movies` in your sentence templates:
|
||||
|
||||
```
|
||||
[PlayMovie]
|
||||
play ($movies){movie_name}
|
||||
```
|
||||
|
||||
When matched, the `PlayMovie` intent JSON will contain `movie_name` property with either "Primer", "Moon", etc.
|
||||
|
||||
Make sure to **re-train** Rhasspy whenever you update your slot values!
|
||||
|
||||
#### Slot Directories
|
||||
|
||||
Slot files can be put in **sub-directories** under `slots`. A list in `slots/foo/bar` should be referenced in `sentences.ini` as `$foo/bar`.
|
||||
|
||||
#### Slot Synonyms
|
||||
|
||||
Slot values are themselves sentence templates! So you can use all of the familiar syntax from above. Slot "synonyms" can be created simply using [substitutions](#substitutions). So a file named `slots/rooms` may contain:
|
||||
|
||||
```
|
||||
[the:] (den | playroom | downstairs):den
|
||||
```
|
||||
|
||||
which is referenced by `$rooms` and will match:
|
||||
|
||||
* the den
|
||||
* den
|
||||
* the playroom
|
||||
* playroom
|
||||
* the downstairs
|
||||
* downstairs
|
||||
|
||||
This will always output just "den" because `[the:]` optionally matches "the" and then drops the word.
|
||||
|
||||
#### Slot Programs
|
||||
|
||||
Slot lists are great if your slot values always stay the same and are easily written out by hand. If you have slot values that you need to be generated *each time Rhasspy is trained*, you can use slot programs.
|
||||
|
||||
Create a directory named `slot_programs` in your profile (e.g., `$HOME/.config/rhasspy/profiles/en/slot_programs`):
|
||||
|
||||
```bash
|
||||
slot_programs="${HOME}/.config/rhasspy/profiles/en/slot_programs"
|
||||
mkdir -p "${slot_programs}"
|
||||
```
|
||||
|
||||
Add a file in `slot_programs` with the name of your slot, e.g. `colors`. Write a program in this file, such as a bash script. Make sure to include the [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)) and mark the file as executable:
|
||||
|
||||
```bash
|
||||
cat <<EOF > "${slot_programs}/colors"
|
||||
#!/usr/bin/env bash
|
||||
echo 'red'
|
||||
echo 'green'
|
||||
echo 'blue'
|
||||
EOF
|
||||
|
||||
chmod +x "${slot_programs}/colors"
|
||||
```
|
||||
|
||||
Now, when you reference `$colors` in your `sentences.ini`, Rhasspy will run the program you wrote and collect the slot values from each line. Note that you can output all the same things as regular [slots lists](#slots-lists), including optional words, alternatives, etc.
|
||||
|
||||
You can pass **arguments** to your program using the syntax `$name,arg1,arg2,...` in `sentences.ini` (no spaces). Arguments will be pass on the command-line, so `arg1` and `arg2` will be `$1` and `$2` in a bash script.
|
||||
|
||||
Like regular slots lists, slot programs can also be put in sub-directories under `slot_programs`. A program in `slot_programs/foo/bar` should be referenced in `sentences.ini` as `$foo/bar`.
|
||||
|
||||
#### Built-in Slots
|
||||
|
||||
Rhasspy includes a few built-in slots for each language:
|
||||
|
||||
* `$rhasspy/days` - day names of the week
|
||||
* `$rhasspy/months` - month names of the year
|
||||
|
||||
### Converters
|
||||
|
||||
By default, all named entity values in a recognized intent's JSON are strings. If you need a different data type, such as an integer or float, or want to do some kind of complex *conversion*, use a converter:
|
||||
|
||||
```
|
||||
[SetBrightness]
|
||||
set brightness to (low:0 | medium:0.5 | high:1){brightness!float}
|
||||
```
|
||||
|
||||
The `!name` syntax calls a converter by name. Rhasspy includes several built-in converters:
|
||||
|
||||
* int - convert to integer
|
||||
* float - convert to real
|
||||
* bool - convert to boolean
|
||||
* lower - lower-case
|
||||
* upper - upper-case
|
||||
|
||||
You can define your own converters by placing a file in the `converters` directory of your profile. Like [slot programs](#slot-programs), this file should contain a [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)) and be marked as executable (`chmod +x`). A file named `converters/foo/bar` should be referenced as `!foo/bar` in `sentences.ini`.
|
||||
|
||||
Your custom converter will receive the value to convert on standard in (`stdin`) encoded as JSON. You should print a converted JSON value to standard out `stdout`. The example below demonstrates converting a string value into an integer:
|
||||
|
||||
```python
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import json
|
||||
|
||||
value = json.load(sys.stdin)
|
||||
print(int(value))
|
||||
```
|
||||
|
||||
Converters can be *chained*, so `!foo!bar` will call the `foo` converter and then pass the result to `bar`.
|
||||
|
||||
### Special Cases
|
||||
|
||||
If one of your sentences happens to start with an optional word (e.g., `[the]`), this can lead to a problem:
|
||||
|
||||
[SomeIntent]
|
||||
[the] problem sentence
|
||||
|
||||
Python's [configparser](https://docs.python.org/3/library/configparser.html) will interpret `[the]` as a new section header, which will produce a new intent, grammar, etc. Rhasspy handles this special case by using a backslash escape sequence (`\[`):
|
||||
|
||||
[SomeIntent]
|
||||
\[the] problem sentence
|
||||
|
||||
Now `[the]` will be properly interpreted as a sentence under `[SomeIntent]`. You only need to escape a `[` if it's the **very first** character in your sentence.
|
||||
|
||||
### Motivation
|
||||
|
||||
@@ -67,162 +338,6 @@ Compared to JSON, YAML, etc., there is minimal syntactic overhead for the purpos
|
||||
|
||||
Each of these shortcomings are addressed by considering the space between intent headings (`[Intent 1]`, etc.) as a **grammar** that represent many possible voice commands. The possible sentences, stripped of their tags, are used as input to [opengrm](https://www.opengrm.org) to produce a standard ARPA language model for [pocketsphinx](https://github.com/cmusphinx/pocketsphinx) or [Kaldi](https://kaldi-asr.org). The tagged sentences are then used to train an intent recognizer.
|
||||
|
||||
### Optional Words
|
||||
|
||||
Within a sentence, you can specify optional word(s) by surrounding them `[with brackets]`. These will generate at least two sentences: one with the optional word(s), and one without. So the following sentence template:
|
||||
|
||||
[an] example sentence [with] some optional words
|
||||
|
||||
will generate 4 concrete sentences:
|
||||
|
||||
1. `an example sentence with some optional words`
|
||||
2. `example sentence with some optional words`
|
||||
3. `an example sentence some optional words`
|
||||
4. `example sentence some optional words`
|
||||
|
||||
### Alternatives
|
||||
|
||||
A set of items, where only one is present at a time, is `(specified | like | this)`. For N items, there will be N sentences generated (unless you nest optional words, etc.). The template:
|
||||
|
||||
set the light to (red | green | blue)
|
||||
|
||||
will generate:
|
||||
|
||||
1. `set the light to red`
|
||||
2. `set the light to green`
|
||||
3. `set the light to blue`
|
||||
|
||||
### Rules
|
||||
|
||||
Rules allow you to reuse common phrases, alternatives, etc. Rules are defined by `rule_name = ...` alongside your sentences and referenced by `<rule_name>`. The template above with colors could be rewritten as:
|
||||
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
|
||||
which will generate the same 4 sentences as above. Importantly, you can **share rules** across intents by prefixing the rule's name with the intent name followed by a dot:
|
||||
|
||||
[SetLightColor]
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
|
||||
[GetLightColor]
|
||||
is the light <SetLightColor.colors>
|
||||
|
||||
The second intent (`GetLightColor`) references the `colors` rule from `SetLightColor`.
|
||||
|
||||
### Tags
|
||||
|
||||
The example templates above will generate sentences for training the speech recognizer, but using them to train the intent recognizer will not be satisfactory. The `SetLightColor` intent, when recognized, will result in a Home Assistant event called `rhasspy_SetLightColor`. But the actual *color* will not be provided because the intent recognizer is not aware that a `color` slot should exist (and has the values `red`, `green`, and `blue`).
|
||||
|
||||
Luckily, JSGF has a [tag feature](https://www.w3.org/TR/jsgf/#15057) that lets you annotate portions of sentences/rules. Rhasspy assumes that the tags themselves are *slot/entity names* and the tagged portions of the sentence are *slot/entity values*. The `SetLightColor` example can be extended with tags like this:
|
||||
|
||||
[SetLightColor]
|
||||
colors = (red | green | blue){color}
|
||||
set the light to <colors>
|
||||
|
||||
With the `{color}` tag attached to the `(red | green | blue)` alternative set, each color name will carry the tag. This is the same as typing `((red){color} | (green){color} | (blue){color})`, but less verbose. Rhasspy will now generate the following **tagged sentences**:
|
||||
|
||||
1. `set the light to [red](color)`
|
||||
2. `set the light to [green](color)`
|
||||
3. `set the light to [blue](color)`
|
||||
|
||||
When the `SetLightColor` intent is recognized now, the corresponding JSON event (`rhasspy_SetLightColor` in Home Assistant) will have the following properties:
|
||||
|
||||
{
|
||||
"color": "red"
|
||||
}
|
||||
|
||||
|
||||
A Home Assistant [automation](https://www.home-assistant.io/docs/automation) can use the slot values to take an appropriate action, such as [setting an RGB light's color](https://www.home-assistant.io/docs/automation/action/) to `[255,0,0]` (red).
|
||||
|
||||
#### Tag Synonyms
|
||||
|
||||
There are times where you want to match a particular part of your sentence with a tag, but want the actual *value* of the tag to be something different than the matched text. This is needed if you want to talk about entities in Home Assistant, for example, with phrases like "the living room lamp", but want to pass the appropriate entity id (say `lamp_1`) to Home Assistant instead.
|
||||
|
||||
Normally, you would tag part of a sentence like this:
|
||||
|
||||
[ChangeLightState]
|
||||
turn on the (living room lamp){name}
|
||||
|
||||
When this intent is activated, Rhasspy will send a JSON event (named `rhasspy_ChangeLightState` in Home Assistant) with:
|
||||
|
||||
{
|
||||
"name": "living room lamp"
|
||||
}
|
||||
|
||||
You can catch this event in a Home Assistant automation, match the `name` "living room name", and do something with the `lamp_1` entity. That's fine for one instance, but would require a separate rule for every `name`! Instead, let's add a tag **synonym**:
|
||||
|
||||
[ChangeLightState]
|
||||
turn on the (living room lamp){name:lamp_1}
|
||||
|
||||
The tag label and synonym are separated by a ":". When this sentence is spoken and the intent is activated, the same `rhasspy_ChangeLightState` event will be sent to Home Assistant, but with the following data:
|
||||
|
||||
{
|
||||
"name": "lamp_1"
|
||||
}
|
||||
|
||||
Now in your Home Assistant automation, you could use [templating](https://www.home-assistant.io/docs/automation/templating/) to plug the `name` directly into the `entity_id` field of an action. One rule to rule them all.
|
||||
|
||||
This same technique could be used to replace number words with digits, like:
|
||||
|
||||
[SetTimer]
|
||||
set a timer for (ten){number:10} seconds
|
||||
|
||||
which would generate an event like this when recognized:
|
||||
|
||||
{
|
||||
"number": "10"
|
||||
}
|
||||
|
||||
### Slots Lists
|
||||
|
||||
In the `SetLightColor` example above, the color names are stored in `sentences.ini` as a rule:
|
||||
|
||||
colors = (red | green | blue)
|
||||
|
||||
This is convenient when the list of colors is small, changes infrequently, and does not depend on an external service.
|
||||
But what if this was a list of movie names that were stored on your [Kodi Home Theater](https://kodi.tv)?
|
||||
|
||||
movies = ("Primer" | "Moon" | "Chronicle" | "Timecrimes" | "Mulholland Drive" | ... )
|
||||
|
||||
It would be much easier if this list was stored externally, but could be *referenced* in the appropriate places in the grammar.
|
||||
This is possible in Rhasspy by placing text files in the `speech_to_text.slots_dir` directory specified in your [profile](profiles.md) ("slots" by default).
|
||||
|
||||
If you're using the English (`en`) profile, for example, create the file `profiles/en/slots/movies` and add the following content:
|
||||
|
||||
Primer
|
||||
Moon
|
||||
Chronicle
|
||||
Timecrimes
|
||||
Mullholand Drive
|
||||
|
||||
This list of movie can now be referenced as `$movies` in your your `sentences.ini` file! Something like:
|
||||
|
||||
[PlayMovie]
|
||||
play ($movies){movie_name}
|
||||
|
||||
will generate `rhasspy_PlayMovie` events like:
|
||||
|
||||
{
|
||||
"movie_name": "Primer"
|
||||
}
|
||||
|
||||
If you update the `movies` file, make sure to re-train Rhasspy in order to pick up the new movie names.
|
||||
|
||||
### Special Cases
|
||||
|
||||
If one of your sentences happens to start with an optional word (e.g., `[the]`), this can lead to a problem:
|
||||
|
||||
[SomeIntent]
|
||||
[the] problem sentence
|
||||
|
||||
Python's [configparser](https://docs.python.org/3/library/configparser.html) will interpret `[the]` as a new section header, which will produce a new intent, grammar, etc. Rhasspy handles this special case by using a backslash escape sequence (`\[`):
|
||||
|
||||
[SomeIntent]
|
||||
\[the] problem sentence
|
||||
|
||||
Now `[the]` will be properly interpreted as a sentence under `[SomeIntent]`. You only need to escape a `[` if it's the **very first** character in your sentence.
|
||||
|
||||
## Custom Words
|
||||
|
||||
Rhasspy looks for words you've defined outside of your profile's base dictionary (typically `base_dictionary.txt`) in a custom words file (typically `custom_words.txt`). This is just a [CMU phonetic dictionary](https://cmusphinx.github.io/wiki/tutorialdict/) with words/pronunciations separated by newlines:
|
||||
@@ -232,170 +347,11 @@ Rhasspy looks for words you've defined outside of your profile's base dictionary
|
||||
|
||||
You can use the [Words tab](usage.md#words-tab) in Rhasspy's web interface to generate this dictionary. During training, Rhasspy will merge `custom_words.txt` into your `dictionary.txt` file so the [speech to text](speech-to-text.md) system knows the words in your voice commands are pronounced.
|
||||
|
||||
## Speech to Text
|
||||
|
||||
By default, Rhasspy generates training sentences from your [sentences.ini](#sentencesini) file, and then trains a custom language model using [opengrm](https://www.opengrm.org). You can call a **custom program** instead if you want to use a different language modeling toolkit or your custom speech to text system needs special training.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"training": {
|
||||
"speech_to_text": {
|
||||
"system": "command",
|
||||
"command": {
|
||||
"program": "/path/to/program",
|
||||
"arguments": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
When training, your program will be called with all of the training sentences grouped by intent in JSON to standard in. No output is expected from your program besides a successful exit code. **NOTE**: Rhasspy will not generate `dictionary.txt` or `language_model.txt` if you use a custom program.
|
||||
|
||||
The input JSON is an object where each key is the name of an intent and the values are lists of training sentence objects. Each sentence object has the text of the sentence, all tagged entities, and the tokens of the sentence.
|
||||
|
||||
Example input:
|
||||
|
||||
{
|
||||
"GetTime": [
|
||||
{
|
||||
"sentence": "what time is it",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"what",
|
||||
"time",
|
||||
"is",
|
||||
"it"
|
||||
]
|
||||
},
|
||||
{
|
||||
"sentence": "tell me the time",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"tell",
|
||||
"me",
|
||||
"the",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
],
|
||||
"ChangeLightColor": [
|
||||
{
|
||||
"sentence": "set the bedroom light to red",
|
||||
"entities": [
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "bedroom light"
|
||||
},
|
||||
{
|
||||
"entity": "color",
|
||||
"value": "red"
|
||||
}
|
||||
],
|
||||
"tokens": [
|
||||
"set",
|
||||
"the",
|
||||
"bedroom",
|
||||
"light",
|
||||
"to",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
See [train-stt.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/train-stt.sh) for an example program.
|
||||
|
||||
## Intent Recognition
|
||||
|
||||
During training, Rhasspy uses the sentences generated from [sentences.ini](#sentencesini) as training material for the selected intent recognition system. If your intent recognition system requires some special training, you can call a **custom program** here.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"training": {
|
||||
"intent": {
|
||||
"system": "command",
|
||||
"command": {
|
||||
"program": "/path/to/program",
|
||||
"arguments": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
During training, Rhasspy will call your program with the training sentences grouped by intent in JSON printed to standard in. No output is expected, besides a successful exit code.
|
||||
|
||||
The input JSON is an object where each key is the name of an intent and the values are lists of training sentence objects. Each sentence object has the text of the sentence, all tagged entities, and the tokens of the sentence.
|
||||
|
||||
Example input:
|
||||
|
||||
```json
|
||||
{
|
||||
"GetTime": [
|
||||
{
|
||||
"sentence": "what time is it",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"what",
|
||||
"time",
|
||||
"is",
|
||||
"it"
|
||||
]
|
||||
},
|
||||
{
|
||||
"sentence": "tell me the time",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"tell",
|
||||
"me",
|
||||
"the",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
],
|
||||
"ChangeLightColor": [
|
||||
{
|
||||
"sentence": "set the bedroom light to red",
|
||||
"entities": [
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "bedroom light"
|
||||
},
|
||||
{
|
||||
"entity": "color",
|
||||
"value": "red"
|
||||
}
|
||||
],
|
||||
"tokens": [
|
||||
"set",
|
||||
"the",
|
||||
"bedroom",
|
||||
"light",
|
||||
"to",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
The following environment variables are available to your program:
|
||||
|
||||
* `$RHASSPY_BASE_DIR` - path to the directory where Rhasspy is running from
|
||||
* `$RHASSPY_PROFILE` - name of the current profile (e.g., "en")
|
||||
* `$RHASSPY_PROFILE_DIR` - directory of the current profile (where `profile.json` is)
|
||||
|
||||
See [train-intent.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/train-intent.sh) for an example program.
|
||||
|
||||
|
||||
## Language Model Mixing
|
||||
|
||||
Rhasspy is designed to only respond to the voice commands you specify in [sentences.ini](training.md#sentencesini), but both the Pocketsphinx and Kaldi speech to text systems are capable of transcribing open ended speech. While this will never be as good as a cloud-based system, Rhasspy offers it as an option.
|
||||
Rhasspy is designed to only respond to the voice commands you specify in [sentences.ini](training.md#sentencesini), but both the Pocketsphinx and Kaldi speech to text systems are capable of transcribing open ended speech. While this will never be as good as a cloud-based system, Rhasspy [offers it as an option](speech-to-text.md#open-transcription).
|
||||
|
||||
Open ended speech is achieved in Rhasspy by the inclusion of `base_dictionary.txt` and `base_language_model.txt` files in every profile. The former is a dictionary containing the pronunciations all possible words. The latter is a large language model trained on very large corpus of text in the profile's language (usually books and web pages).
|
||||
|
||||
During training, Rhasspy can **mix** this large, open ended language model with the one generated specifically for your voice commands. You specify a **mixture weight**, which controls how much of an influence the large language model has; a mixture weight of 0 makes Rhasspy sensitive *only* to your voice commands, which is the default.
|
||||
A middle ground between open transcription and custom voice commands is **language model mixing**. During training, Rhasspy can mix a (large) pre-built language model with the custom-generated one. You specify a **mixture weight** (0-1), which controls how much of an influence the large language model has; a mixture weight of 0 makes Rhasspy sensitive *only* to your voice commands, which is the default.
|
||||
|
||||

|
||||
|
||||
@@ -468,15 +424,6 @@ $ echo 'would you please turn on the living room lamp' | \
|
||||
"value": "on"
|
||||
}
|
||||
],
|
||||
"tokens": [
|
||||
"turn",
|
||||
"on",
|
||||
"the",
|
||||
"living",
|
||||
"room",
|
||||
"lamp"
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on"
|
||||
}
|
||||
@@ -486,7 +433,6 @@ $ echo 'would you please turn on the living room lamp' | \
|
||||
|
||||
But this works only because the default intent recognizer ([fsticuffs](intent-recognition.md#fsticuffs)) ignores unknown words by default, so "would you please" is not interpreted. Changing "lamp" to "light" in the input sentence will reveal the problem:
|
||||
|
||||
|
||||
```
|
||||
$ echo 'would you please turn on the living room light | \
|
||||
rhasspy-cli --profile en text2intent
|
||||
@@ -499,7 +445,6 @@ $ echo 'would you please turn on the living room light | \
|
||||
"confidence": 0
|
||||
},
|
||||
"entities": [],
|
||||
"speech_confidence": 1,
|
||||
"slots": {}
|
||||
}
|
||||
}
|
||||
@@ -535,7 +480,6 @@ $ echo 'would you please turn on the living room light' | \
|
||||
"value": "on"
|
||||
}
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on"
|
||||
}
|
||||
@@ -545,4 +489,4 @@ $ echo 'would you please turn on the living room light' | \
|
||||
|
||||
This works well for our toy example, but will not scale well when there are thousands of voice commands represented in `sentences.ini` or if the words used are significantly different than in the training set ("light" and "lamp" are close enough for `fuzzywuzzy`).
|
||||
|
||||
A machine learning-based intent recognizer, like [flar](intent-recognition.md#flair), would be a better choice for open ended speech.
|
||||
A machine learning-based intent recognizer, like [flair](intent-recognition.md#flair) or [Rasa](intent-recognition.md#rasanlu), would be a better choice for open ended speech.
|
||||
|
||||
@@ -0,0 +1,314 @@
|
||||
# Tutorials
|
||||
|
||||
* [RGB Light Example](#rgb-light-example)
|
||||
* [Client/Server Setup](#clientserver-setup)
|
||||
* MATRIX Labs
|
||||
* [Rhasspy Voice Assistant on MATRIX Voice and MATRIX Creator](https://www.hackster.io/matrix-labs/rhasspy-voice-assistant-on-matrix-voice-and-matrix-creator-97f92e)
|
||||
* [Adding Intents for Rhasspy Offline Voice Assistant](https://www.hackster.io/matrix-labs/adding-intents-for-rhasspy-offline-voice-assistant-faa221)
|
||||
* Rendered Obsolete
|
||||
* [Home Assistant Voice Recognition with Rhasspy](https://rendered-obsolete.github.io/2020/01/02/rhasspy.html)
|
||||
|
||||
## RGB Light Example
|
||||
|
||||
Let's say you have an RGB light of some kind in your bedroom that's [hooked up already to Home Assistant](https://www.home-assistant.io/components/light.mqtt). You'd like to be able to say things like "*set the bedroom light to red*" to change its color. To start, let's write a [Home Assistant automation](https://www.home-assistant.io/docs/automation/action/) to help you out:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
...
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
Now you just need the trigger! Rhasspy will send events that can be caught with the [event trigger platform](https://www.home-assistant.io/docs/automation/trigger/#event-trigger). A different event will be sent for each *intent* that you define, with slot values corresponding to important parts of the command (like light name and color). Let's start by defining an intent in Rhasspy called `ChangeLightState` that can be said a few different ways:
|
||||
|
||||
[ChangeLightState]
|
||||
colors = (red | green | blue) {color}
|
||||
set [the] (bedroom){name} [to] <colors>
|
||||
|
||||
This is a [simplified JSGF grammar](training.md#sentencesini) that will generate the following sentences:
|
||||
|
||||
* set the bedroom to red
|
||||
* set the bedroom to green
|
||||
* set the bedroom to blue
|
||||
* set the bedroom red
|
||||
* set the bedroom green
|
||||
* set the bedroom blue
|
||||
* set bedroom to red
|
||||
* set bedroom to green
|
||||
* set bedroom to blue
|
||||
* set bedroom red
|
||||
* set bedroom green
|
||||
* set bedroom blue
|
||||
|
||||
Rhasspy uses these sentences to create an [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) for speech recognition, and also train an intent recognizer that can extract relevant parts of the command. The `{color}` tag in the `colors` rule will make Rhasspy put a `color` property in each event with the name of the recognized color (red, green, or blue). Likewise, the `{name}` tag on `bedroom` will add a `name` property to the event.
|
||||
|
||||
If trained on these sentences, Rhasspy will now recognize commands like "*set the bedroom light to red*" and send a `rhasspy_ChangeLightState` to Home Assistant with the following data:
|
||||
|
||||
{
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
|
||||
You can now fill in the rest of the Home Assistant automation:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
platform: event
|
||||
event_type: rhasspy_ChangeLightState
|
||||
event_data:
|
||||
name: bedroom
|
||||
color: red
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
This will handle the specific case of setting the bedroom light to red, but not any other color. You can either add additional automations to handle these, or make use of [automation templating](https://www.home-assistant.io/docs/automation/templating/) to do it all at once. [Home Assistant Template Example](Home-Assistant-Template-Example)
|
||||
|
||||
### Home Assistant Template Example
|
||||
|
||||
Using the following additions, you can get Home Assistant to respond to turning on / off *ANY* light in your setup.
|
||||
|
||||
#### Slots
|
||||
|
||||
Add the following JSON to the Slots tab in your Rhasspy web interface:
|
||||
|
||||
```json
|
||||
{
|
||||
"lights": [
|
||||
"(living room wall):light.bulb_3",
|
||||
"(living room desk):switch.m4",
|
||||
"(living room floor):switch.sonoff",
|
||||
"(bar lights):switch.maxcio1",
|
||||
"(entry wall):light.bulb_4",
|
||||
"(guest wall):light.bulb_6",
|
||||
"(guest floor):switch.m5",
|
||||
"(bedroom wall):light.bulb_5",
|
||||
"(bedroom desk):light.bulb_1",
|
||||
"(bedroom floor):light.bulb_2"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### Sentences
|
||||
|
||||
A simple sentence to turn any of the lights in the slots file on or off.
|
||||
Note the use of the `<state>` rule and the slot `$lights`
|
||||
|
||||
```
|
||||
[ChangeLightState]
|
||||
state = (on | off) {light_state}
|
||||
turn [the] ($lights) {light_name} <state>
|
||||
```
|
||||
|
||||
#### Home Assistant
|
||||
|
||||
In your Home Assistant `automations.yaml` file, use a `data_template` to get the Rhasspy event data with `trigger.event.data.<your property name>` and then pass those along to a script:
|
||||
|
||||
```yaml
|
||||
- id: '1577164768008'
|
||||
alias: Rhasspy Light States
|
||||
description: Voice Control on/off states for all lights
|
||||
trigger:
|
||||
- event_data: {}
|
||||
event_type: rhasspy_ChangeLightState
|
||||
platform: event
|
||||
condition: []
|
||||
action:
|
||||
- alias: ''
|
||||
data_template:
|
||||
light_name: "{{ trigger.event.data.light_name }}"
|
||||
light_state: "{{ trigger.event.data.light_state }}"
|
||||
service: script.rhasspy_light_state
|
||||
|
||||
```
|
||||
|
||||
In `scripts.yaml`, the `service_template` casts the `light_state` into a string and checks to see if you said 'on' or 'off'. The homeassistant-service can toggle both lights and switches, which is helpful if you have a combination of "light" types:
|
||||
|
||||
```yaml
|
||||
rhasspy_light_state:
|
||||
alias: change_light_state
|
||||
fields:
|
||||
light_name:
|
||||
description: "Light Entity"
|
||||
example: light.bulb_1
|
||||
light_state:
|
||||
description: "State to change the light to"
|
||||
example: on
|
||||
sequence:
|
||||
- service_template: >
|
||||
{% set this_state = light_state | string %}
|
||||
{% if this_state == 'on' %}
|
||||
homeassistant.turn_on
|
||||
{%else %}
|
||||
homeassistant.turn_off
|
||||
{% endif %}
|
||||
|
||||
data_template:
|
||||
entity_id: "{{ light_name }}"
|
||||
```
|
||||
|
||||
## Client/Server Setup
|
||||
|
||||
Contributed by [jaburges](https://community.home-assistant.io/u/jaburges)
|
||||
|
||||
* Hardware used:
|
||||
* Raspberry Pi 3B w/ 8GB SD card
|
||||
* [Seeed 4 Mic Array](https://www.amazon.com/seeed-Studio-ReSpeaker-4-Mic-Raspberry/dp/B076SSR1W1)
|
||||
* Software used:
|
||||
* [Raspbian Buster Lite](https://downloads.raspberrypi.org/raspbian_lite_latest)
|
||||
* [Etcher](https://www.balena.io/etcher/)
|
||||
* Docker ([install Docker](installation.md#docker))
|
||||
|
||||
### Server Steps
|
||||
|
||||
1. Assuming you already have docker running, create a directory for Rhasspy, and subdirectory called profiles.
|
||||
2. Pull and Run docker image:
|
||||
|
||||
docker run -p 12101:12101 \
|
||||
--restart unless-stopped \
|
||||
--name rhasspy \
|
||||
-v "/<PATH_TO>/rhasspy/profiles:/profiles" \
|
||||
synesthesiam/rhasspy-server:latest \
|
||||
--user-profiles /profiles \
|
||||
--profile en
|
||||
|
||||
3. Go to server URL `http://<Server_IP>:12101` (you may be asked to download files)
|
||||
4. Go to settings and check configuration (and save along the way):
|
||||
|
||||
[Rhasspy]
|
||||
Listen for wake word on Startup = UNchecked
|
||||
|
||||
[Intent Handling]
|
||||
Do not handle intent on this device
|
||||
#There is no harm in having the Server handle Intents, but the Client must handle Intents
|
||||
|
||||
[Wake Word]
|
||||
No Wake word on this device
|
||||
|
||||
[Voice Detection]
|
||||
No voice communication on this device
|
||||
|
||||
[Speech Recognition]
|
||||
Do Speech recognition with pocketsphinx
|
||||
|
||||
[Intent Recognition]
|
||||
Do intent recognition with fuzzywuzzy
|
||||
|
||||
[Text to Speech]
|
||||
No Text to speech on this device
|
||||
|
||||
[Audio Recording]
|
||||
No recording on this device
|
||||
|
||||
[Audio Playing]
|
||||
No Playback on this device
|
||||
|
||||
5. Check Slots, and Sentences tabs and make sure to hit `Train` and then `Restart`
|
||||
|
||||
### Client Steps
|
||||
|
||||
1. Flash 8Gb MicroSD Card with [Buster](https://downloads.raspberrypi.org/raspbian_lite_latest) with [Etcher](https://www.balena.io/etcher/).
|
||||
2. Remove and re-insert MicroSD card and add files to the root directory (for headless setup - meaning no screen needed). You only need `wpa_supplicant` if you plan to use WiFi.
|
||||
* a file simply called `ssh`
|
||||
* `wpa_supplicant.conf` ([example here](https://pastebin.com/cDhyhQLs))
|
||||
3. Insert the MicroSD card in the Pi, use a proper Power Supply and check your router for the IP address it gets.
|
||||
4. SSH into the Pi using that IP address (I use [Putty](https://the.earth.li/~sgtatham/putty/latest/w64/putty-64bit-0.73-installer.msi)) using pi default user/pass = pi/raspberry.
|
||||
You are going to want to change that in the future!
|
||||
5. Install git:
|
||||
|
||||
sudo apt install git
|
||||
|
||||
6. Install Seeed mic array based on info [here](https://github.com/respeaker/seeed-voicecard)
|
||||
|
||||
git clone https://github.com/respeaker/seeed-voicecard
|
||||
cd seeed-voicecard
|
||||
sudo ./install.sh
|
||||
sudo reboot
|
||||
|
||||
7. Plug in Seeed speaker and check install was successful against expected result here 5:
|
||||
|
||||
arecord -L
|
||||
|
||||
8. Install docker:
|
||||
|
||||
curl -sSL https://get.docker.com | sh
|
||||
|
||||
9. Modify user permissions to access docker without using `sudo` all the time ;)
|
||||
|
||||
sudo usermod -a -G docker pi
|
||||
|
||||
10. Close SSH, and relaunch SSH connection to use new permissions.
|
||||
11. Create directories for Rhasspy Docker image to use:
|
||||
|
||||
cd /home/pi
|
||||
mkdir rhasspy
|
||||
cd rhasspy
|
||||
mkdir profiles
|
||||
|
||||
12. Pull and run docker image:
|
||||
|
||||
docker run -p 12101:12101 \
|
||||
--restart unless-stopped \
|
||||
--name rhasspy \
|
||||
-v "/home/pi/rhasspy/profiles:/profiles" \
|
||||
--device /dev/snd:/dev/snd \
|
||||
synesthesiam/rhasspy-server:latest \
|
||||
--user-profiles /profiles \
|
||||
--profile en
|
||||
|
||||
13. Go to Client URL `http://<Pi_IP_address>:12101` (you will be asked to download some files)
|
||||
(At time of writing I put Wakeword, voice detection and recognition on the client)
|
||||
14. Under settings ensure the following is selected, Save along the way. You will need to Train once also.
|
||||
|
||||
[Rhasspy]
|
||||
Listen for wake word on Startup = checked
|
||||
|
||||
[Home Assistant]
|
||||
Enable Intent Handling on this device
|
||||
#Do not use Home Assistant if using Node-Red
|
||||
|
||||
[Wake Word]
|
||||
Use snowboy (this should trigger a download of more files)
|
||||
|
||||
[Voice Detection]
|
||||
Use webrtcvad and listen for silence
|
||||
|
||||
[Speech Recognition]
|
||||
Use Remote Rhasspy server for speech recognition:
|
||||
URL = http://<SERVER_IP>:12101/api/speech-to-text
|
||||
|
||||
[Intent Recognition]
|
||||
Use Remote Rhasspy server for speech recognition:
|
||||
URL = http://<SERVER_IP>:12101/api/text-to-intent
|
||||
|
||||
[Text to Speech]
|
||||
No Text to speech on this device
|
||||
|
||||
[Audio Recording]
|
||||
Use PyAudio (default)
|
||||
Input Device = seeed-4mic-voicecard (you can test this if you want)
|
||||
|
||||
[Audio Playing]
|
||||
No Playback on this device
|
||||
|
||||
### Node-Red Config
|
||||
|
||||
1. Import [this flow](https://github.com/synesthesiam/rhasspy/blob/cda3a02775865d49b52d32a3af7264b7cbd69472/examples/nodered/time-light-flow.js) from the Rhasspy examples
|
||||
2. Attach a debug node to the websocket in and configure it to show full msg object.
|
||||
3. I edited light text node to take this:
|
||||
|
||||
{
|
||||
"domain": "light",
|
||||
"service": "turn_{{slots.state}}",
|
||||
"entity_id": "{{slots.name}}"
|
||||
}
|
||||
|
||||
4. Add a call service node after the light text and leave it blank. Deploy and Enjoy offline voice assistant.
|
||||
|
||||
Pick a light (that is a light domain not a switch), and say "Snowboy, turn bedroom light off" :)
|
||||
@@ -1,11 +1,31 @@
|
||||
# Usage
|
||||
|
||||
You can interact with Rhasspy in different ways besides just your voice. Rhasspy includes a [web interface](#web-inteface), typically hosted on port 12101. There is also an [HTTP API](#http-api) that lets you programmatically manipulate Rhasspy from external programs or services. A [command-line interface](#command-line) is available as well to allow for Rhasspy to be easily included in shell scripts. Lastly, Rhasspy subscribes and publishes to specific [MQTT topics](#mqtt) in accordance with (a portion of) the [Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol).
|
||||
You can interact with Rhasspy in more ways than your voice:
|
||||
|
||||
* [Web Interface](#web-interface)
|
||||
* [Home Assistant](#home-assistant)
|
||||
* [Node-RED with Websockets](#node-red)
|
||||
* [MQTT and Snips](#mqtt-and-snips)
|
||||
* [HTTP API](#http-api)
|
||||
* [Command Line](#command-line)
|
||||
|
||||
## Web Interface
|
||||
|
||||
A browser-based interface for Rhasspy is available on port 12101 by default ([http://localhost:12101](http://localhost:12101) if running locally). From this interface, you can test voice commands, add new voice commands, re-train, and edit your profile.
|
||||
|
||||
### Top Bar
|
||||
|
||||
The top bar of the web interface lets you perform some global actions on Rhasspy, regardless of which tab you have selected.
|
||||
|
||||

|
||||
|
||||
* Click the Rhasspy logo to reload the page
|
||||
* Click the version number to test the [HTTP API](#http-api)
|
||||
* The green `Train` button will re-train your profile
|
||||
* Use the `Clear Cache` drop down to train from scratch
|
||||
* The yellow `Wake` button will wake Rhasspy up and start listening for a voice command
|
||||
* The red `Restart` button forces Rhasspy to restart
|
||||
|
||||
### Speech Tab
|
||||
|
||||
Test voice and text commands.
|
||||
@@ -14,17 +34,28 @@ Test voice and text commands.
|
||||
|
||||
* Record a voice command with `Hold to Record` or `Tap to Record`
|
||||
* Upload a WAV file with a voice command
|
||||
* Enter a text command and execute it
|
||||
* Enter a text command and either execute it (`Get Intent`) or `Speak` the sentence
|
||||
* Uncheck `Send to Home Assistant` if you **don't** want Rhasspy to send events to Home Assistant
|
||||
|
||||
### Sentences Tab
|
||||
|
||||
Add new voice commands to Rhasspy.
|
||||
Add new voice commands to Rhasspy using the [template syntax](training.md#sentencesini).
|
||||
|
||||

|
||||
|
||||
See documentation on [sentences.ini](training.md#sentencesini) for more information.
|
||||
Make sure to re-train after saving!
|
||||
* Edits `sentences.ini` by default
|
||||
* Use the `Add File` button to create additional sentence template files
|
||||
* These should be prefixed by the `sentences_dir` in your [profile](profiles.md). For example, `intents/more-commands.ini`
|
||||
* The drop down can be used to switch editing between different template files
|
||||
|
||||
### Slots Tab
|
||||
|
||||
Edit your [slots lists](training.md#slots-lists) as JSON (keys = slot names, values = lists of slot values).
|
||||
|
||||

|
||||
|
||||
* New slot values will overwrite previous ones
|
||||
* Delete a slot by providing an empty list for its JSON key
|
||||
|
||||
### Words Tab
|
||||
|
||||
@@ -57,83 +88,11 @@ Direct interface for editing your [profile](profiles.md).
|
||||
|
||||

|
||||
|
||||
## HTTP API
|
||||
### Log Tab
|
||||
|
||||
Rhasspy features a comprehensive HTTP API available at `/api`, documented with [OpenAPI 3](https://github.com/OAI/OpenAPI-Specification) (Swagger). Some notable endpoints are:
|
||||
Streams Rhasspy's log output over a websocket.
|
||||
|
||||
* `/api/profile`
|
||||
* GET the JSON for your profile, or POST to overwrite it
|
||||
* `/api/listen-for-command`
|
||||
* POST to wake Rhasspy up and start listening for a voice command
|
||||
* `/api/start-recording`
|
||||
* POST to have Rhasspy start recording a voice command
|
||||
* `/api/stop-recording`
|
||||
* POST to have Rhasspy stop recording and process recorded data as a voice command
|
||||
* `/api/train`
|
||||
* POST to re-train your profile
|
||||
* `/api/speech-to-intent`
|
||||
* POST a WAV file and have Rhasspy process it as a voice command
|
||||
* `/api/text-to-intent`
|
||||
* POST text and have Rhasspy process it as command
|
||||
* `/api/text-to-speech`
|
||||
* POST text and have Rhasspy speak it
|
||||
* `/api/slots`
|
||||
* POST JSON to update [slot values](training.md#slots-lists)
|
||||
|
||||
See `public/swagger.yaml` in Rhasspy's repository for all available endpoints, or visit `/api` on your Rhasspy web server (e.g., [http://localhost:12101/api](http://localhost:12101/api)).
|
||||
|
||||
## Secure Hosting with HTTPS
|
||||
|
||||
If you need to access Rhasspy's web interface/API through HTTPS (formally SSL), you can provide a certificate and key file via command-line parameters or the Hass.io configuration.
|
||||
|
||||
If you're running Rhasspy via Docker or in a virtual environment, add `--ssl <CERT_FILE> <KEY_FILE>` to the command-line arguments where `<CERT_FILE>` is your SSL certificate and `<KEY_FILE>` is your SSL key file.
|
||||
|
||||
You can generate a self-signed certificate with the following command:
|
||||
|
||||
openssl req -x509 -newkey rsa:4096 -nodes -out cert.pem -keyout key.pem -days 365
|
||||
|
||||
After answering the series of questions, you should have `cert.pem` and `key.pem` in your current directory. Then run Rhasspy with:
|
||||
|
||||
<RHASSPY COMMAND> --ssl cert.pem key.pem
|
||||
|
||||
The web interface will now be available at [https://localhost:12101](https://localhost:12101) and the web socket events at `wss://localhost:12101/api/events/intent`
|
||||
|
||||
In Hass.io, you will need to set the following options via the web interface or in your JSON configuration:
|
||||
|
||||
* `ssl`: `true`
|
||||
* `certfile`: `cert.pem`
|
||||
* `keyfile`: `key.pem`
|
||||
|
||||
## WebSocket Events
|
||||
|
||||
Whenever a voice command is recognized, Rhasspy emits JSON events over a websocket connection available at `ws://rhasspy:12101/api/events/intent` (replace `ws://` with `wss://` if you're using [secure hosting](usage.md#secure-hosting-with-https)).
|
||||
You can listen to these events in a [Node-RED](https://nodered.org) flow, and easily add offline, private voice commands to your home automation set up!
|
||||
|
||||
For the `ChangLightState` intent from the [RGB Light Example](index.md#rgb-light-example), Rhasspy will emit a JSON event like this over the websocket:
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "set the bedroom light to red",
|
||||
"intent": {
|
||||
"name": "ChangeLightColor",
|
||||
"confidence": 1
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "bedroom"
|
||||
},
|
||||
{
|
||||
"entity": "color",
|
||||
"value": "red"
|
||||
}
|
||||
],
|
||||
"slots": {
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
}
|
||||
```
|
||||

|
||||
|
||||
## Home Assistant
|
||||
|
||||
@@ -164,6 +123,13 @@ automation:
|
||||
|
||||
You've now added offline, private voice commands to your Home Assistant. Happy automating!
|
||||
|
||||
### Getting the Spoken Text
|
||||
|
||||
The Home Assistant event will contain two extra slots besides the ones you specify:
|
||||
|
||||
* `_text` - spoken voice command text with [substitutions](training.md#substitutions)
|
||||
* `_raw_text` - literal transcription of voice command
|
||||
|
||||
## Node-RED
|
||||
|
||||
Rhasspy can interact directly with [Node-RED](https://nodered.org) directly through [websockets](usage.md#websocket-events).
|
||||
@@ -174,23 +140,128 @@ Make sure to also set send/receive to "entire message".
|
||||
|
||||
More example flows are available [on Github](https://github.com/synesthesiam/rhasspy/tree/master/examples/nodered).
|
||||
|
||||
### WebSocket Events
|
||||
|
||||
Rhasspy supports multiple websocket event endpoints:
|
||||
|
||||
* `/api/events/intent`
|
||||
* Intent recognized or not
|
||||
* `/api/events/wake`
|
||||
* Wake word detected
|
||||
* `/api/events/text`
|
||||
* Speech transcription
|
||||
|
||||
#### WebSocket Intents
|
||||
|
||||
Whenever a voice command is recognized, Rhasspy emits JSON events over a websocket connection available at `ws://YOUR_SERVER:12101/api/events/intent` (replace `ws://` with `wss://` if you're using [secure hosting](usage.md#secure-hosting-with-https)).
|
||||
You can listen to these events in a [Node-RED](https://nodered.org) flow, and easily add offline, private voice commands to your home automation set up!
|
||||
|
||||
For the `ChangLightState` intent from the [RGB Light Example](index.md#rgb-light-example), Rhasspy will emit a JSON event like this over the websocket:
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "set the bedroom light to red",
|
||||
"intent": {
|
||||
"name": "ChangeLightColor",
|
||||
"confidence": 1
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "bedroom"
|
||||
},
|
||||
{
|
||||
"entity": "color",
|
||||
"value": "red"
|
||||
}
|
||||
],
|
||||
"slots": {
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### WebSocket Wake
|
||||
|
||||
When the wake word is detected, or Rhasspy is woken up via the `/api/listen-for-command` HTTP endpoint, a JSON event is emitted at `ws://YOUR_SERVER:12101/api/events/wake` (`wss://` if using HTTPS) like:
|
||||
|
||||
```json
|
||||
{
|
||||
"wakewordId": "default",
|
||||
"siteId": "default"
|
||||
}
|
||||
```
|
||||
|
||||
The `wakewordId` is set using the model or file name of your wakeword model (e.g., `porcupine` for `porcupine.ppn`). The `siteId` comes from your `mqtt.siteId` profile setting.
|
||||
|
||||
#### WebSocket Transcriptions
|
||||
|
||||
Each time a voice command is transcribed, Rhasspy emits a JSON event at `ws://YOUR_SERVER:12101/api/events/text` (`wss://` if using HTTPS) like:
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "text from voice command",
|
||||
"wakewordId": "default",
|
||||
"siteId": "default"
|
||||
}
|
||||
```
|
||||
|
||||
The transcription is contained in the `text` property. `wakewordId` is the id of the wakeword that initiated the voice command (or `default`). The `siteId` comes from your `mqtt.siteId` profile setting.
|
||||
|
||||
## MQTT and Snips
|
||||
|
||||
Rhasspy is able to interoperate with Snips.AI services using the [Hermes protocol](https://docs.snips.ai/reference/hermes) over [MQTT](http://mqtt.org). The following components are Snips/Hermes compatible:
|
||||
|
||||
* [Microphone input](audio-input.md#mqtthermes)
|
||||
* [Wake word](wake-word.md#mqtthermes)
|
||||
* [Speech to text](speech-to-text.md#mqtthermes)
|
||||
* [Intent recognition](intent-recognition.md#mqtthermes)
|
||||
* [Audio output](audio-output.md#mqtthermes)
|
||||
|
||||
## HTTP API
|
||||
|
||||
Rhasspy features a comprehensive HTTP API available at `/api/`, documented with [OpenAPI 3](https://github.com/OAI/OpenAPI-Specification) (Swagger). See the [HTTP API reference](reference.md#http-api) for more details.
|
||||
|
||||
### Secure Hosting with HTTPS
|
||||
|
||||
If you need to access Rhasspy's web interface/API through HTTPS (formally SSL), you can provide a certificate and key file via command-line parameters or the Hass.io configuration.
|
||||
|
||||
If you're running Rhasspy via Docker or in a virtual environment, add `--ssl <CERT_FILE> <KEY_FILE>` to the command-line arguments where `<CERT_FILE>` is your SSL certificate and `<KEY_FILE>` is your SSL key file.
|
||||
|
||||
You can generate a self-signed certificate with the following command:
|
||||
|
||||
openssl req -x509 -newkey rsa:4096 -nodes -out cert.pem -keyout key.pem -days 365
|
||||
|
||||
After answering the series of questions, you should have `cert.pem` and `key.pem` in your current directory. Then run Rhasspy with:
|
||||
|
||||
<RHASSPY COMMAND> --ssl cert.pem key.pem
|
||||
|
||||
The web interface will now be available at [https://localhost:12101](https://localhost:12101) and the web socket events at `wss://localhost:12101/api/events/intent`
|
||||
|
||||
In Hass.io, you will need to set the following options via the web interface or in your JSON configuration:
|
||||
|
||||
* `ssl`: `true`
|
||||
* `certfile`: `cert.pem`
|
||||
* `keyfile`: `key.pem`
|
||||
|
||||
## Command Line
|
||||
|
||||
You can access portions of Rhasspy's functionality without running a web server through the command-line interface.
|
||||
The `rhasspy` Python module runs this interface in its `__main__`, so it's accessible from Rhasspy's source code directory by running:
|
||||
|
||||
python3 -m rhasspy <COMMAND> <ARGUMENTS>
|
||||
|
||||
|
||||
This will only work inside a properly set up [virtual environment](installation.md#virtual-environment), however.
|
||||
If you run Rhasspy through [Docker](installation.md#docker), the [rhasspy-cli](https://github.com/synesthesiam/rhasspy/blob/master/bin/rhasspy-cli) script should be used instead:
|
||||
|
||||
wget https://github.com/synesthesiam/rhasspy/blob/master/bin/rhasspy-cli
|
||||
chmod +x rhasspy-cli
|
||||
./rhasspy-cli --help
|
||||
|
||||
|
||||
Put this script in your `~/bin` directory so that you can refer to it as `rhasspy-cli` from any directory.
|
||||
By default, it will look for profiles in `$XDG_CONFIG_FILE/rhasspy/profiles`, which is probably `~/.config/rhasspy/profiles` (see [XDG specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) for more information).
|
||||
|
||||
|
||||
**Beware**: the `rhasspy-cli` script runs under your user account and grants Rhasspy **write access to your home directory**.
|
||||
This is needed to save files during the training process, and to avoid those files being owned by `root`.
|
||||
The [rhasspy-cli-ro](https://github.com/synesthesiam/rhasspy/blob/master/bin/rhasspy-cli-ro) script can be used for read only operations, such as speech to text or intent handling, but cannot make any changes to your file system.
|
||||
@@ -200,240 +271,13 @@ The [rhasspy-cli-ro](https://github.com/synesthesiam/rhasspy/blob/master/bin/rha
|
||||
The `rhasspy-cli` script takes a command and a set of arguments:
|
||||
|
||||
rhasspy-cli --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>
|
||||
|
||||
|
||||
Adding `--debug` before the command will print additional information to the console:
|
||||
|
||||
rhasspy-cli --debug --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>
|
||||
|
||||
|
||||
You can override profile settings with `--set` like this:
|
||||
|
||||
rhasspy-cli --profile <PROFILE_NAME> --set <SETTING_NAME> <SETTING_VALUE> ... <COMMAND> <ARGUMENTS>
|
||||
|
||||
### Available Commands
|
||||
|
||||
For `rhasspy-cli --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>`, `<COMMAND>` can be:
|
||||
|
||||
* `info`
|
||||
* Print profile JSON to standard out
|
||||
* Add `--defaults` to only print settings from `defaults.json`
|
||||
* `wav2text`
|
||||
* Convert WAV file(s) to text
|
||||
* `wav2intent`
|
||||
* Convert WAV file(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `text2intent`
|
||||
* Convert text command(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `train`
|
||||
* Re-train your profile
|
||||
* `mic2wav`
|
||||
* Listen for a voice command and output WAV data
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2text`
|
||||
* Listen for a voice command and convert it to text
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2intent`
|
||||
* Listen for a voice command output intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `word2phonemes`
|
||||
* Print the CMU phonemes for a word (possibly unknown)
|
||||
* Add `-n <COUNT>` to control the maximum number of guessed pronunciations
|
||||
* `word2wav`
|
||||
* Pronounce a word (possibly unknown) and output WAV data
|
||||
* `text2speech`
|
||||
* Speaks one or more sentences using Rhasspy's text to speech system
|
||||
* `text2wav`
|
||||
* Converts a single sentence to WAV using Rhasspy's text to speech system
|
||||
* `sleep`
|
||||
* Run Rhasspy and wait until wake word is spoken
|
||||
* `download`
|
||||
* Download necessary profile files from the internet
|
||||
|
||||
### Profile Operations
|
||||
|
||||
Print the complete JSON for the English profile with:
|
||||
|
||||
rhasspy-cli --profile en info
|
||||
|
||||
You can combine this with other commands, such as `jq` to get at specific pieces:
|
||||
|
||||
rhasspy-cli info --profile en | jq .wake.pocketsphinx.keyphrase
|
||||
|
||||
Output (JSON):
|
||||
|
||||
"okay rhasspy"
|
||||
|
||||
### Training
|
||||
|
||||
Retrain your the English profile with:
|
||||
|
||||
rhasspy-cli --profile en train
|
||||
|
||||
Add `--debug` before `train` for more information.
|
||||
|
||||
### Speech to Text/Intent
|
||||
|
||||
Convert a WAV file to text from stdin:
|
||||
|
||||
rhasspy-cli --profile en wav2text < what-time-is-it.wav
|
||||
|
||||
Output (text):
|
||||
|
||||
what time is it
|
||||
|
||||
Convert multiple WAV files:
|
||||
|
||||
rhasspy-cli --profile en wav2text what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON)
|
||||
|
||||
```json
|
||||
{
|
||||
"what-time-is-it.wav": "what time is it",
|
||||
"turn-on-the-living-room-lamp.wav": "turn on the living room lamp"
|
||||
}
|
||||
```
|
||||
|
||||
Convert multiple WAV file(s) to intents **and** handle them:
|
||||
|
||||
rhasspy-cli --profile en wav2intent --handle what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"what_time_is_it.wav": {
|
||||
"text": "what time is it",
|
||||
"intent": {
|
||||
"name": "GetTime",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": []
|
||||
},
|
||||
"turn_on_living_room_lamp.wav": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Text to Intent
|
||||
|
||||
Handle a command as if it was spoken:
|
||||
|
||||
rhasspy-cli --profile en text2intent --handle "turn off the living room lamp"
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn off the living room lamp": {
|
||||
"text": "turn off the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "off"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Record Your Voice
|
||||
|
||||
Save a voice command to a WAV:
|
||||
|
||||
rhasspy-cli --profile en mic2wav > my-voice-command.wav
|
||||
|
||||
You can listen to it with:
|
||||
|
||||
aplay my-voice-command.wav
|
||||
|
||||
### Test Your Wake Word
|
||||
|
||||
Start Rhasspy and wait for wake word:
|
||||
|
||||
rhasspy-cli --profile en sleep
|
||||
|
||||
Should exit and print the wake word when its spoken.
|
||||
|
||||
### Text to Speech
|
||||
|
||||
Have Rhasspy speak one or more sentences:
|
||||
|
||||
rhasspy-cli --profile en text2speech "We ride at dawn!"
|
||||
|
||||
Use a different text to speech system and voice:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'flite' \
|
||||
--set 'text_to_speech.flite.voice' 'slt' \
|
||||
text2speech "We ride at dawn!"
|
||||
|
||||
### Pronounce Words
|
||||
|
||||
Speak words Rhasspy doesn't know!
|
||||
|
||||
rhasspy-cli --profile en word2wav raxacoricofallapatorius | aplay
|
||||
|
||||
### Text to Speech to Text to Intent
|
||||
|
||||
Use the miracle of Unix pipes to have Rhasspy interpret voice commands from itself:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'picotts' \
|
||||
text2wav "turn on the living room lamp" | \
|
||||
rhasspy-cli --profile en wav2text | \
|
||||
rhasspy-cli --profile en text2intent
|
||||
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn on the living room lamp": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on",
|
||||
"name": "living room lamp"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
See the [command-line reference](reference.md#command-line) for available commands.
|
||||
|
||||
@@ -34,16 +34,16 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
There are a lot of [keyword files](https://github.com/Picovoice/Porcupine/tree/master/resources/keyword_files) available for download. Use the `linux` platform if you're on desktop/laptop (`amd64`) and the `raspberrypi` platform if you're using a Raspberry Pi (`armhf`/`aarch64`). The `.ppn` files should go in the `porcupine` directory inside your profile (referenced by `keyword_path`).
|
||||
|
||||
If you want to create a custom wake word, you will need to run the [Porcupine Optimizer](https://github.com/Picovoice/Porcupine/tree/master/tools/optimizer). **NOTE**: the generated keyword file is only valid for 30 days, though you can always just re-run the optimizer.
|
||||
If you want to create a custom wake word, you will need to use the [Picovoice Console](https://github.com/Picovoice/porcupine#picovoice-console). **NOTE**: the generated keyword file is only valid for 30 days, though you can always just re-run the optimizer.
|
||||
|
||||
See `rhasspy.wake.PorcupineWakeListener` for details.
|
||||
|
||||
## Snowboy
|
||||
|
||||
Listens for a wake word with [snowboy](https://snowboy.kitt.ai). This system has the good performance out of the box, but requires an online service to train.
|
||||
Listens for one or more wake words with [snowboy](https://snowboy.kitt.ai). This system has the good performance out of the box, but requires an online service to train.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -54,10 +54,10 @@ Add to your [profile](profiles.md):
|
||||
"wakeword_id": "default"
|
||||
},
|
||||
"snowboy": {
|
||||
"model": "model-name-in-profile.(u|p)mdl",
|
||||
"model": "snowboy/snowboy.umdl",
|
||||
"audio_gain": 1,
|
||||
"sensitivity": 0.5,
|
||||
"chunk_size": 960
|
||||
"sensitivity": "0.5",
|
||||
"apply_frontend": false
|
||||
}
|
||||
},
|
||||
|
||||
@@ -65,10 +65,41 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
If your hotword model has multiple embedded hotwords (such as `jarvis.umdl`), the "sensitivity" parameter should contain sensitivities for each embedded hotword separated by commas (e.g., "0.5,0.5").
|
||||
|
||||
Visit [the snowboy website](https://snowboy.kitt.ai) to train your own wake word model (requires linking to a GitHub/Google/Facebook account). This *personal* model with end with `.pmdl`, and should go in your profile directory. Then, set `wake.snowboy.model` to the name of that file.
|
||||
|
||||
You also have the option of using a pre-train *universal* model (`.umdl`) from [Kitt.AI](https://github.com/Kitt-AI/snowboy/tree/master/resources/models). I've received errors using anything but `snowboy.umdl`, but YMMV.
|
||||
You also have the option of using a pre-train *universal* model (`.umdl`) from [Kitt.AI](https://github.com/Kitt-AI/snowboy/tree/master/resources/models).
|
||||
|
||||
### Multiple Wake Words
|
||||
|
||||
You can have `snowboy` listen for multiple wake words with different models, each with their own settings. You will need to download each model file to the `snowboy` directory in your profile.
|
||||
|
||||
For example, to use both the `snowboy.umdl` and `jarvis.umdl` models, add this to your profile:
|
||||
|
||||
```json
|
||||
"wake": {
|
||||
"system": "snowboy",
|
||||
"snowboy": {
|
||||
"model": "snowboy/snowboy.umdl,snowboy/jarvis.umdl",
|
||||
"model_settings": {
|
||||
"snowboy/snowboy.umdl": {
|
||||
"sensitivity": "0.5",
|
||||
"audio_gain": 1,
|
||||
"apply_frontend": false
|
||||
},
|
||||
"snowboy/jarvis.umdl": {
|
||||
"sensitivity": "0.5,0.5",
|
||||
"audio_gain": 1,
|
||||
"apply_frontend": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Make sure to include all models you want in the `model` setting (separated by commas). Each model may have different settings in `model_settings`. If a setting is not present, the default values under `snowboy` will be used.
|
||||
|
||||
See `rhasspy.wake.SnowboyWakeListener` for details.
|
||||
|
||||
@@ -92,7 +123,7 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Set `wake.pocketsphinx.keyphrase` to whatever you like, though 3-4 syllables is recommended. Make sure to [train](training.md) and restart Rhasspy whenever you change the keyphrase.
|
||||
|
||||
The `wake.pocketsphinx.threshold` should be in the range 1e-50 to 1e-5. The smaller the number, the less like the keyphrase is to be observed. At least one person has written a script to [automatically tune the threshold](https://medium.com/@PankajB96/automatic-tuning-of-keyword-spotting-thresholds-a27256869d31).
|
||||
@@ -120,14 +151,14 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Follow [the instructions from Mycroft AI](https://github.com/MycroftAI/mycroft-precise/wiki/Training-your-own-wake-word#how-to-train-your-own-wake-word) to train your own wake word model. When you're finished, place **both** the `.pb` and `.pb.params` files in your profile directory, and set `wake.precise.model` to the name of the `.pb` file.
|
||||
|
||||
|
||||
See `rhasspy.wake.PreciseWakeListener` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Subscribes to the `hermes/hotword/<WAKEWORD_ID>/detected` topic, and wakes Rhasspy up when a message is received ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)). This allows Rhasspy to use the wake word functionality in [Snips.AI](https://snips.ai/).
|
||||
Subscribes to the `hermes/hotword/<WAKEWORD_ID>/detected` topic, and wakes Rhasspy up when a message is received ([Hermes protocol](https://docs.snips.ai/reference/hermes)). This allows Rhasspy to use the wake word functionality in [Snips.AI](https://snips.ai/).
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -150,10 +181,18 @@ Add to your [profile](profiles.md):
|
||||
"username": "",
|
||||
"port": 1883,
|
||||
"password": "",
|
||||
"site_id": "default"
|
||||
"site_id": "default",
|
||||
"tls": {
|
||||
"enabled": false,
|
||||
"ca_certs": "",
|
||||
"cert_reqs": "CERT_REQUIRED",
|
||||
"certfile": "",
|
||||
"ciphers": "",
|
||||
"keyfile": ""
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Adjust the `mqtt` configuration to connect to your MQTT broker.
|
||||
Set `mqtt.site_id` to match your Snips.AI siteId and `wake.hermes.wakeword_id` to match your Snips.AI wakewordId.
|
||||
|
||||
@@ -178,7 +217,7 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
When Rhasspy starts, your program will be called with the given arguments. Once your program detects the wake word, it should print it to standard out and exit. Rhasspy will call your program again when it goes back to sleep. If the empty string is printed, Rhasspy will **not** wake up and your program will be called again.
|
||||
|
||||
The following environment variables are available to your program:
|
||||
|
||||
@@ -11,9 +11,10 @@ cpu_arch=$(uname --m)
|
||||
DEFINE_string 'download-dir' "${this_dir}/download" 'Directory to cache downloaded files'
|
||||
DEFINE_boolean 'precise' true 'Install Mycroft Precise'
|
||||
DEFINE_boolean 'kaldi' true 'Install Kaldi'
|
||||
DEFINE_boolean 'web' true "Install web UI"
|
||||
DEFINE_boolean 'offline' false "Don't download anything"
|
||||
DEFINE_boolean 'all-cpu' false 'Download dependencies for all CPU architectures'
|
||||
DEFINE_string 'cpu-arch' "${cpu_arch}" 'CPU architecture (x86_64, armv7l, arm64v8)'
|
||||
DEFINE_string 'cpu-arch' "${cpu_arch}" 'CPU architecture (x86_64, armv7l, arm64v8, armv6l)'
|
||||
|
||||
FLAGS "$@" || exit $?
|
||||
eval set -- "${FLAGS_ARGV}"
|
||||
@@ -44,17 +45,21 @@ if [[ "${FLAGS_kaldi}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_kaldi='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_web}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_web='true'
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
function maybe_download {
|
||||
if [[ ! -f "$2" ]]; then
|
||||
if [[ ! -z "${offline}" ]]; then
|
||||
if [[ ! -s "$2" ]]; then
|
||||
if [[ -n "${offline}" ]]; then
|
||||
echo "Need to download $1 but offline."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$2")"
|
||||
curl -sSfL -o "$2" "$1"
|
||||
curl -sSfL -o "$2" "$1" || { echo "Can't download $1"; exit 1; }
|
||||
echo "$1 => $2"
|
||||
fi
|
||||
}
|
||||
@@ -65,9 +70,17 @@ declare -A CPU_TO_FRIENDLY
|
||||
CPU_TO_FRIENDLY["x86_64"]="amd64"
|
||||
CPU_TO_FRIENDLY["armv7l"]="armhf"
|
||||
CPU_TO_FRIENDLY["arm64v8"]="aarch64"
|
||||
CPU_TO_FRIENDLY["aarch64"]="aarch64"
|
||||
CPU_TO_FRIENDLY["armv6l"]="armv6l"
|
||||
|
||||
declare -A FRIENDLY_TO_DOCKER
|
||||
FRIENDLY_TO_DOCKER["amd64"]="amd64"
|
||||
FRIENDLY_TO_DOCKER["armhf"]="armv7"
|
||||
FRIENDLY_TO_DOCKER["aarch64"]="arm64"
|
||||
FRIENDLY_TO_DOCKER["armv6l"]="armv6"
|
||||
|
||||
# CPU architecture
|
||||
if [[ ! -z "${all_cpu}" ]]; then
|
||||
if [[ -n "${all_cpu}" ]]; then
|
||||
CPU_ARCHS=("x86_64" "armv7l" "arm64v8")
|
||||
FRIENDLY_ARCHS=("amd64" "armhf" "aarch64")
|
||||
else
|
||||
@@ -79,14 +92,33 @@ fi
|
||||
# Rhasspy
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}";
|
||||
do
|
||||
rhasspy_files=("rhasspy-tools_${FRIENDLY_ARCH}.tar.gz" "rhasspy-web-dist.tar.gz")
|
||||
for rhasspy_file_name in "${rhasspy_files}"; do
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}"; do
|
||||
rhasspy_files=()
|
||||
|
||||
if [[ -z "${no_tools}" ]]; then
|
||||
# Install Rhasspy tools
|
||||
rhasspy_files+=("rhasspy-tools_${FRIENDLY_ARCH}.tar.gz")
|
||||
fi
|
||||
|
||||
if [[ -z "${no_web}" ]]; then
|
||||
# Install web UI
|
||||
rhasspy_files+=('rhasspy-web-dist.tar.gz')
|
||||
fi
|
||||
|
||||
for rhasspy_file_name in "${rhasspy_files[@]}"; do
|
||||
rhasspy_file="${download_dir}/${rhasspy_file_name}"
|
||||
rhasspy_file_url="https://github.com/synesthesiam/rhasspy/releases/download/v2.0/${rhasspy_file_name}"
|
||||
maybe_download "${rhasspy_file_url}" "${rhasspy_file}"
|
||||
done
|
||||
|
||||
if [[ -z "${no_tools}" ]]; then
|
||||
# Create link for docker buildx
|
||||
DOCKER_ARCH="${FRIENDLY_TO_DOCKER[${FRIENDLY_ARCH}]}"
|
||||
if [[ "${FRIENDLY_ARCH}" != "${DOCKER_ARCH}" ]]; then
|
||||
ln -f "${download_dir}/rhasspy-tools_${FRIENDLY_ARCH}.tar.gz" \
|
||||
"${download_dir}/rhasspy-tools_${DOCKER_ARCH}.tar.gz"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -110,13 +142,20 @@ maybe_download "${snowboy_url}" "${snowboy_file}"
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_precise}" ]]; then
|
||||
for CPU_ARCH in "${CPU_ARCHS}";
|
||||
do
|
||||
for CPU_ARCH in "${CPU_ARCHS[@]}"; do
|
||||
case $CPU_ARCH in
|
||||
x86_64|armv7l)
|
||||
x86_64|armv7l|aarch64)
|
||||
precise_file="${download_dir}/precise-engine_0.3.0_${CPU_ARCH}.tar.gz"
|
||||
precise_url="https://github.com/MycroftAI/mycroft-precise/releases/download/v0.3.0/precise-engine_0.3.0_${CPU_ARCH}.tar.gz"
|
||||
maybe_download "${precise_url}" "${precise_file}"
|
||||
|
||||
# Create link for docker buildx
|
||||
FRIENDLY_ARCH="${CPU_TO_FRIENDLY[${CPU_ARCH}]}"
|
||||
DOCKER_ARCH="${FRIENDLY_TO_DOCKER[${FRIENDLY_ARCH}]}"
|
||||
if [[ "${CPU_ARCH}" != "${DOCKER_ARCH}" ]]; then
|
||||
ln -f "${download_dir}/precise-engine_0.3.0_${CPU_ARCH}.tar.gz" \
|
||||
"${download_dir}/precise-engine_0.3.0_${DOCKER_ARCH}.tar.gz"
|
||||
fi
|
||||
esac
|
||||
done
|
||||
fi
|
||||
@@ -126,12 +165,20 @@ fi
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_kaldi}" ]]; then
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS}"
|
||||
do
|
||||
# Install pre-built package
|
||||
kaldi_file="${download_dir}/kaldi_${FRIENDLY_ARCH}.tar.gz"
|
||||
kaldi_url="https://github.com/synesthesiam/kaldi-docker/releases/download/v1.0/kaldi_${FRIENDLY_ARCH}.tar.gz"
|
||||
maybe_download "${kaldi_url}" "${kaldi_file}"
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}"; do
|
||||
if [[ "${FRIENDLY_ARCH}" != "armv6l" ]]; then
|
||||
# Install pre-built package
|
||||
kaldi_file="${download_dir}/kaldi_${FRIENDLY_ARCH}.tar.gz"
|
||||
kaldi_url="https://github.com/synesthesiam/kaldi-docker/releases/download/v1.0/kaldi_${FRIENDLY_ARCH}.tar.gz"
|
||||
maybe_download "${kaldi_url}" "${kaldi_file}"
|
||||
|
||||
# Create link for docker buildx
|
||||
DOCKER_ARCH="${FRIENDLY_TO_DOCKER[${FRIENDLY_ARCH}]}"
|
||||
if [[ "${FRIENDLY_ARCH}" != "${DOCKER_ARCH}" ]]; then
|
||||
ln -f "${download_dir}/kaldi_${FRIENDLY_ARCH}.tar.gz" \
|
||||
"${download_dir}/kaldi_${DOCKER_ARCH}.tar.gz"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -53,7 +53,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
default_view:
|
||||
view: yes
|
||||
view: true
|
||||
entities:
|
||||
- group.inside
|
||||
- group.garage
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
# ATLAS specific Linux ARM configuration
|
||||
|
||||
ifndef DOUBLE_PRECISION
|
||||
$(error DOUBLE_PRECISION not defined.)
|
||||
endif
|
||||
ifndef OPENFSTINC
|
||||
$(error OPENFSTINC not defined.)
|
||||
endif
|
||||
ifndef OPENFSTLIBS
|
||||
$(error OPENFSTLIBS not defined.)
|
||||
endif
|
||||
ifndef ATLASINC
|
||||
$(error ATLASINC not defined.)
|
||||
endif
|
||||
ifndef ATLASLIBS
|
||||
$(error ATLASLIBS not defined.)
|
||||
endif
|
||||
|
||||
CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
|
||||
-Wall -Wno-sign-compare -Wno-unused-local-typedefs \
|
||||
-Wno-deprecated-declarations -Winit-self \
|
||||
-DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
|
||||
-DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \
|
||||
-ftree-vectorize -pthread \
|
||||
-g # -O0 -DKALDI_PARANOID
|
||||
|
||||
ifeq ($(KALDI_FLAVOR), dynamic)
|
||||
CXXFLAGS += -fPIC
|
||||
endif
|
||||
|
||||
# Compiler specific flags
|
||||
COMPILER = $(shell $(CXX) -v 2>&1)
|
||||
ifeq ($(findstring clang,$(COMPILER)),clang)
|
||||
# Suppress annoying clang warnings that are perfectly valid per spec.
|
||||
CXXFLAGS += -Wno-mismatched-tags
|
||||
endif
|
||||
|
||||
LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic
|
||||
LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl
|
||||
@@ -0,0 +1,19 @@
|
||||
[Unit]
|
||||
Description=Rhasspy
|
||||
After=syslog.target network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/home/<USER>/path/to/rhasspy
|
||||
ExecStart=/bin/bash -lc './run-venv.sh --profile <LANGUAGE>'
|
||||
|
||||
RestartSec=1
|
||||
Restart=on-failure
|
||||
|
||||
StandardOutput=syslog
|
||||
StandardError=syslog
|
||||
|
||||
SyslogIdentifier=rhasspy
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -40,7 +40,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -50,7 +50,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -75,7 +75,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -85,7 +85,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -42,7 +42,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -52,7 +52,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
if [[ -z "$(which phonetisaurus-train)" ]]; then
|
||||
if [[ -z "$(command -v phonetisaurus-train)" ]]; then
|
||||
echo "Phonetisaurus not installed!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -4,6 +4,7 @@ nav:
|
||||
- Home: index.md
|
||||
- Hardware: hardware.md
|
||||
- Installation: installation.md
|
||||
- Tutorials: tutorials.md
|
||||
- Usage: usage.md
|
||||
- Profiles: profiles.md
|
||||
- Training: training.md
|
||||
@@ -15,5 +16,7 @@ nav:
|
||||
- Intent Recognition: intent-recognition.md
|
||||
- Intent Handling: intent-handling.md
|
||||
- Text to Speech: text-to-speech.md
|
||||
- Reference: reference.md
|
||||
- Development: development.md
|
||||
- License: license.md
|
||||
- About: about.md
|
||||
|
||||
@@ -58,4 +58,25 @@ ignore_missing_imports = True
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-google.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-networkx.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-num2words.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-doit.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-json5.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-quart.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-quart_cors.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-swagger_ui.*]
|
||||
ignore_missing_imports = True
|
||||
@@ -1,17 +1,12 @@
|
||||
#
|
||||
# Copyright 2018 Picovoice Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
|
||||
# file accompanying this source.
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
@@ -20,7 +15,7 @@ from enum import Enum
|
||||
|
||||
|
||||
class Porcupine(object):
|
||||
"""Python binding for Picovoice's wake word detection (aka Porcupine) library."""
|
||||
"""Python binding for Picovoice's wake word detection (Porcupine) engine."""
|
||||
|
||||
class PicovoiceStatuses(Enum):
|
||||
"""Status codes corresponding to 'pv_status_t' defined in 'include/picovoice.h'"""
|
||||
@@ -29,11 +24,17 @@ class Porcupine(object):
|
||||
OUT_OF_MEMORY = 1
|
||||
IO_ERROR = 2
|
||||
INVALID_ARGUMENT = 3
|
||||
STOP_ITERATION = 4
|
||||
KEY_ERROR = 5
|
||||
INVALID_STATE = 6
|
||||
|
||||
_PICOVOICE_STATUS_TO_EXCEPTION = {
|
||||
PicovoiceStatuses.OUT_OF_MEMORY: MemoryError,
|
||||
PicovoiceStatuses.IO_ERROR: IOError,
|
||||
PicovoiceStatuses.INVALID_ARGUMENT: ValueError
|
||||
PicovoiceStatuses.INVALID_ARGUMENT: ValueError,
|
||||
PicovoiceStatuses.STOP_ITERATION: StopIteration,
|
||||
PicovoiceStatuses.KEY_ERROR: KeyError,
|
||||
PicovoiceStatuses.INVALID_STATE: ValueError,
|
||||
}
|
||||
|
||||
class CPorcupine(Structure):
|
||||
@@ -48,9 +49,9 @@ class Porcupine(object):
|
||||
keyword_file_paths=None,
|
||||
sensitivities=None):
|
||||
"""
|
||||
Loads Porcupine's shared library and creates an instance of wake word detection object.
|
||||
Constructor.
|
||||
|
||||
:param library_path: Absolute path to Porcupine's shared library.
|
||||
:param library_path: Absolute path to Porcupine's dynamic library.
|
||||
:param model_file_path: Absolute path to file containing model parameters.
|
||||
:param keyword_file_path: Absolute path to keyword file containing hyper-parameters. If not present then
|
||||
'keyword_file_paths' will be used.
|
||||
@@ -64,38 +65,38 @@ class Porcupine(object):
|
||||
"""
|
||||
|
||||
if not os.path.exists(library_path):
|
||||
raise IOError("Could not find Porcupine's library at '%s'" % library_path)
|
||||
raise IOError("could'nt find Porcupine's library at '%s'" % library_path)
|
||||
|
||||
library = cdll.LoadLibrary(library_path)
|
||||
|
||||
if not os.path.exists(model_file_path):
|
||||
raise IOError("Could not find model file at '%s'" % model_file_path)
|
||||
raise IOError("could'nt find model file at '%s'" % model_file_path)
|
||||
|
||||
if sensitivity is not None and keyword_file_path is not None:
|
||||
if not os.path.exists(keyword_file_path):
|
||||
raise IOError("Could not find keyword file at '%s'" % keyword_file_path)
|
||||
raise IOError("could'nt' find keyword file at '%s'" % keyword_file_path)
|
||||
keyword_file_paths = [keyword_file_path]
|
||||
|
||||
if not (0 <= sensitivity <= 1):
|
||||
raise ValueError('Sensitivity should be within [0, 1]')
|
||||
raise ValueError('sensitivity should be within [0, 1]')
|
||||
sensitivities = [sensitivity]
|
||||
elif sensitivities is not None and keyword_file_paths is not None:
|
||||
if len(keyword_file_paths) != len(sensitivities):
|
||||
raise ValueError("Different number of sensitivity and keyword file path parameters are provided.")
|
||||
raise ValueError("different number of sensitivity and keyword file path parameters are provided.")
|
||||
|
||||
for x in keyword_file_paths:
|
||||
if not os.path.exists(os.path.expanduser(x)):
|
||||
raise IOError("Could not find keyword file at '%s'" % x)
|
||||
raise IOError("could not find keyword file at '%s'" % x)
|
||||
|
||||
for x in sensitivities:
|
||||
if not (0 <= x <= 1):
|
||||
raise ValueError('Sensitivity should be within [0, 1]')
|
||||
raise ValueError('sensitivity should be within [0, 1]')
|
||||
else:
|
||||
raise ValueError("Sensitivity and/or keyword file path is missing")
|
||||
raise ValueError("sensitivity and/or keyword file path is missing")
|
||||
|
||||
self._num_keywords = len(keyword_file_paths)
|
||||
|
||||
init_func = library.pv_porcupine_multiple_keywords_init
|
||||
init_func = library.pv_porcupine_init
|
||||
init_func.argtypes = [
|
||||
c_char_p,
|
||||
c_int,
|
||||
@@ -107,44 +108,43 @@ class Porcupine(object):
|
||||
self._handle = POINTER(self.CPorcupine)()
|
||||
|
||||
status = init_func(
|
||||
model_file_path.encode(),
|
||||
model_file_path.encode('utf-8'),
|
||||
self._num_keywords,
|
||||
(c_char_p * self._num_keywords)(*[os.path.expanduser(x).encode() for x in keyword_file_paths]),
|
||||
(c_char_p * self._num_keywords)(*[os.path.expanduser(x).encode('utf-8') for x in keyword_file_paths]),
|
||||
(c_float * self._num_keywords)(*sensitivities),
|
||||
byref(self._handle))
|
||||
if status is not self.PicovoiceStatuses.SUCCESS:
|
||||
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Initialization failed')
|
||||
|
||||
self.process_func = library.pv_porcupine_multiple_keywords_process
|
||||
self.process_func.argtypes = [POINTER(self.CPorcupine), POINTER(c_short), POINTER(c_int)]
|
||||
self.process_func.restype = self.PicovoiceStatuses
|
||||
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('initialization failed')
|
||||
|
||||
self._delete_func = library.pv_porcupine_delete
|
||||
self._delete_func.argtypes = [POINTER(self.CPorcupine)]
|
||||
self._delete_func.restype = None
|
||||
|
||||
self._sample_rate = library.pv_sample_rate()
|
||||
self.process_func = library.pv_porcupine_process
|
||||
self.process_func.argtypes = [POINTER(self.CPorcupine), POINTER(c_short), POINTER(c_int)]
|
||||
self.process_func.restype = self.PicovoiceStatuses
|
||||
|
||||
version_func = library.pv_porcupine_version
|
||||
version_func.argtypes = []
|
||||
version_func.restype = c_char_p
|
||||
self._version = version_func().decode('utf-8')
|
||||
|
||||
self._frame_length = library.pv_porcupine_frame_length()
|
||||
|
||||
@property
|
||||
def sample_rate(self):
|
||||
"""Audio sample rate accepted by Porcupine library."""
|
||||
self._sample_rate = library.pv_sample_rate()
|
||||
|
||||
return self._sample_rate
|
||||
def delete(self):
|
||||
"""Releases resources acquired by Porcupine's library."""
|
||||
|
||||
@property
|
||||
def frame_length(self):
|
||||
"""Number of audio samples per frame expected by C library."""
|
||||
|
||||
return self._frame_length
|
||||
self._delete_func(self._handle)
|
||||
|
||||
def process(self, pcm):
|
||||
"""
|
||||
Monitors incoming audio stream for given wake word(s).
|
||||
Processes a frame of the incoming audio stream and emits the detection result.
|
||||
|
||||
:param pcm: An array (or array-like) of consecutive audio samples. For more information regarding required audio
|
||||
properties (i.e. sample rate, number of channels encoding, and number of samples per frame) please refer to
|
||||
'include/pv_porcupine.h'.
|
||||
:param pcm: A frame of audio samples. The number of samples per frame can be attained by calling
|
||||
'.frame_length'. The incoming audio needs to have a sample rate equal to '.sample_rate' and be 16-bit
|
||||
linearly-encoded. Porcupine operates on single-channel audio.
|
||||
:return: For a single wake-word use cse True if wake word is detected. For multiple wake-word use case it
|
||||
returns the index of detected wake-word. Indexing is 0-based and according to ordering of input keyword file
|
||||
paths. It returns -1 when no keyword is detected.
|
||||
@@ -153,7 +153,7 @@ class Porcupine(object):
|
||||
result = c_int()
|
||||
status = self.process_func(self._handle, (c_short * len(pcm))(*pcm), byref(result))
|
||||
if status is not self.PicovoiceStatuses.SUCCESS:
|
||||
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Processing failed')
|
||||
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]()
|
||||
|
||||
keyword_index = result.value
|
||||
|
||||
@@ -162,7 +162,20 @@ class Porcupine(object):
|
||||
else:
|
||||
return keyword_index
|
||||
|
||||
def delete(self):
|
||||
"""Releases resources acquired by Porcupine's library."""
|
||||
@property
|
||||
def version(self):
|
||||
"""Getter for version"""
|
||||
|
||||
self._delete_func(self._handle)
|
||||
return self._version
|
||||
|
||||
@property
|
||||
def frame_length(self):
|
||||
"""Getter for number of audio samples per frame."""
|
||||
|
||||
return self._frame_length
|
||||
|
||||
@property
|
||||
def sample_rate(self):
|
||||
"""Audio sample rate accepted by Picovoice."""
|
||||
|
||||
return self._sample_rate
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"language": "ca",
|
||||
"name": "ca",
|
||||
"locale": "ca_ES",
|
||||
"speech_to_text": {
|
||||
"system": "pocketsphinx",
|
||||
"dictionary_casing": "lower"
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser("number")
|
||||
parser.add_argument("lower", type=int, help="Lower bound")
|
||||
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
|
||||
args, rest_args = parser.parse_known_args()
|
||||
|
||||
lower = args.lower
|
||||
upper = args.upper
|
||||
step = 1
|
||||
|
||||
if rest_args:
|
||||
step = int(rest_args[0])
|
||||
|
||||
if upper < lower:
|
||||
lower, upper = upper, lower
|
||||
|
||||
for n in range(lower, upper + 1, step):
|
||||
print(n)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,7 @@
|
||||
dilluns
|
||||
dimarts
|
||||
dimecres
|
||||
dijous
|
||||
divendres
|
||||
dissabte
|
||||
diumenge
|
||||
@@ -0,0 +1,12 @@
|
||||
de gener
|
||||
de febrer
|
||||
de març
|
||||
d’abril
|
||||
de maig
|
||||
de juny
|
||||
de juliol
|
||||
d’agost
|
||||
de setembre
|
||||
d’octubre
|
||||
de novembre
|
||||
de desembre
|
||||