119 Commits

Author SHA1 Message Date
Michael Hansen e00c1448cb Fix CHANGELOG date 2020-02-07 20:39:03 -05:00
Michael Hansen f04ad3bfeb Add more tutorials to docs 2020-02-07 17:14:01 -05:00
Michael Hansen eb11f90cab Add espeak arguments for text to speech 2020-02-07 17:00:33 -05:00
Michael Hansen 2c612ee669 Pocketsphinx wake keyphrase words added to dictionary 2020-02-07 16:39:06 -05:00
Michael Hansen dfe92f9d0e Fix STT casing outside of HTTP calls 2020-02-07 16:25:40 -05:00
Michael Hansen c59e7b42ab Update docs 2020-02-07 15:55:51 -05:00
Michael Hansen 948705a87b Add wake/text websocket endpoints 2020-02-07 15:45:37 -05:00
Michael Hansen 6b0b5c1799 Merge branch 'master' of https://github.com/synesthesiam/rhasspy 2020-02-06 16:50:51 -05:00
Michael Hansen 9553691e88 Working on wake websocket 2020-02-06 16:49:33 -05:00
Michael Hansen 997456631e Update /api/listen-for-wake to enable/disable wake word 2020-02-05 22:00:06 -05:00
Michael Hansen 104165198b Bump to rhasspy-nlu 0.1.5 2020-01-28 21:38:05 -05:00
Michael Hansen f405b827f4 Add Hass.IO change to CHANGELOG 2020-01-22 21:20:35 -05:00
Michael Hansen 089568cf9f Fix version in CHANGELOG 2020-01-22 21:14:49 -05:00
Michael Hansen a92d88ff8f Rename Add File button in web UI on sentences page 2020-01-22 17:03:21 -05:00
Michael Hansen c60030b48f Add download feedback to web UI 2020-01-22 17:02:26 -05:00
Michael Hansen b400d651f6 Add RHASSPY_LOG_LEVEL environment variable 2020-01-22 16:40:57 -05:00
Michael Hansen 2dfa9aa782 Fix _raw_text in Hass event being same as _text 2020-01-22 16:36:07 -05:00
Michael Hansen bd2c065415 Force slot programs to run each training cycle 2020-01-22 16:24:58 -05:00
Michael Hansen 1b95144b05 Add CHANGELOG and bump version 2020-01-21 15:56:09 -05:00
Michael Hansen 3f60936471 Add web button to play last recorded voice command 2020-01-21 15:55:01 -05:00
Michael Hansen 9a3c2f8a3f Move kaldi/custom_words.txt to kaldi_custom_words.txt 2020-01-21 15:39:22 -05:00
Michael Hansen 1fb75f24d7 Delete partial downloads of profile files 2020-01-21 15:39:08 -05:00
Michael Hansen 6c0187e606 Hide web notifications after 10 seconds 2020-01-21 15:38:52 -05:00
Michael Hansen 16262ec896 Keep slot substitution casing during training/recognition 2020-01-21 14:57:00 -05:00
Michael Hansen 9d1303ed21 Merge pull request #164 from alexkn/fix-device-preselection
fix microphone/sound device preselection
2020-01-20 08:38:57 -05:00
Michael Hansen a12e537110 Merge pull request #165 from alexkn/docs-picotts
update pico-tts languages
2020-01-20 08:37:39 -05:00
Alexander Knöbel 63fb3cf046 update pico-tts languages 2020-01-19 13:06:20 +01:00
Alexander Knöbel f5e6666931 fix microphone/sound device preselection 2020-01-19 01:04:02 +01:00
Michael Hansen 44a9c84bc7 Merge pull request #158 from drhirn/master
Added exclamation mark to shebang
2020-01-14 22:23:56 -05:00
Michael Hansen 9a076936c5 Merge pull request #160 from alexkn/docs-yarn-install
Add yarn install before build
2020-01-14 22:23:01 -05:00
Alexander Knöbel 102b29ecf6 Add yarn install before build 2020-01-14 19:02:08 +01:00
drhirn 51455bfd97 Added exclamation mark to shebang
The shebang in the code for the slot_program was missing an exclamation mark
2020-01-14 15:38:36 +01:00
Michael Hansen 5bf6086164 Merge pull request #156 from mzoeller/patch-3
Add missing space
2020-01-12 18:59:51 -05:00
mzoeller 08ebaf0914 Add missing space 2020-01-12 23:50:40 +01:00
Michael Hansen 4b3f26c12f Merge pull request #153 from daniele-athome/patch-1
Give a hint to lame about mp3 files
2020-01-12 13:29:34 -05:00
Michael Hansen 707c31e4d3 Merge pull request #155 from mzoeller/patch-2
Update intent-handling.md
2020-01-12 13:29:01 -05:00
Michael Hansen 509d47ea0f Merge pull request #154 from mzoeller/patch-1
Example command handler in python
2020-01-12 13:28:42 -05:00
mzoeller 1d2b08df6e Update intent-handling.md 2020-01-12 18:05:20 +01:00
mzoeller 0608443482 Example command handler in python 2020-01-12 17:56:00 +01:00
Daniele Ricci 9a1a41385c Give a hint to lame about mp3 files
Apparently, when given audio through stdin, lame can't detect the file type correctly some times. A quick fix is to add --mp3input to the command line (we have checked for file extension anyway, so...)
2020-01-12 17:24:43 +01:00
Michael Hansen 2d8095f0e1 Merge /media/hansenm/BAC6B44DC6B40C1F/rhasspy 2020-01-08 16:11:36 -05:00
Michael Hansen 8f3c1c5d61 Fix dictionary issue with multiple pronunciations 2020-01-07 21:08:17 -05:00
Michael Hansen deb742d768 Return WAV mimetype 2020-01-07 16:37:47 -05:00
Michael Hansen fa24588ea4 Removed flair intent recognition. Fixes for adapt/rasa. 2020-01-06 11:23:32 -05:00
Michael Hansen ed581ecf9d Fixing fuzzywuzzy and others with converters 2020-01-05 20:43:15 -05:00
Michael Hansen f8aedd4ef5 Update Docker update docs 2020-01-05 16:55:37 -05:00
Michael Hansen 14c1386496 Possible fix for threading issues 2020-01-05 16:46:14 -05:00
Michael Hansen 153b642057 Add Rpi Zero to docs 2020-01-05 15:14:45 -05:00
Michael Hansen dec32102dd Merge pull request #146 from esdeboer/freebsd
sed -i is not POSIX compliant, instead make a temp copy and rename to…
2020-01-05 14:50:38 -05:00
Michael Hansen f365c69265 Merge pull request #142 from maxbachmann/cleanup
code cleanup
2020-01-05 14:50:02 -05:00
Michael Hansen 1724c328b7 Trying to fix Docker image 2020-01-05 11:14:56 -05:00
Eric de Boer 6db4a8d341 sed -i is not POSIX compliant, instead make a temp copy and rename to original. 2020-01-05 16:53:38 +01:00
Michael Hansen b70e8a8569 Copying profiles in Docker 2020-01-04 22:41:04 -05:00
Michael Hansen 2e4828da06 Fix dockerignore 2020-01-04 22:34:15 -05:00
Michael Hansen 96cfe69753 Re-generated Dockerfile 2020-01-04 22:02:31 -05:00
maxbachmann 3e8e246c1c swap vars without temp var 2020-01-05 01:02:35 +01:00
Michael Hansen 80a5008b93 Copy built-in slots to Docker 2020-01-04 16:55:38 -05:00
Michael Hansen e26ecf82f1 Bump rhasspy-nly to 0.1.4.1 2020-01-04 16:41:10 -05:00
Michael Hansen e4db52f845 Merge pull request #138 from esdeboer/master
Install dependencies before running yarn build.
2020-01-04 16:38:41 -05:00
Michael Hansen 846313e236 Documented slot programs, number ranges, converters 2020-01-04 16:28:03 -05:00
Michael Hansen b68a3fac4a Add rhasspy/days and rhasspy/months slots 2020-01-04 16:15:34 -05:00
Michael Hansen 7459f0d9d9 Add rhasspy/number 2020-01-04 15:53:30 -05:00
Michael Hansen 617b789d89 Add locales to profiles 2020-01-04 15:53:13 -05:00
Michael Hansen bb20cd280b Transforming number ranges to rhasspy/number 2020-01-04 15:04:52 -05:00
Michael Hansen ce780feb74 Add en rhasspy/number and rhasspy/days slots 2020-01-04 12:31:46 -05:00
Michael Hansen 2225262a53 Add system slots/slot programs 2020-01-04 12:28:29 -05:00
Michael Hansen 5b5529339b Minor clean up in tutorial 2020-01-04 10:46:02 -05:00
Michael Hansen 90f5c5aef7 Touch up tutorials 2020-01-04 10:42:55 -05:00
Eric de Boer 78f263582d Install dependencies before running yarn build. 2020-01-04 09:44:02 +01:00
Michael Hansen 6c8608f1a1 Merge pull request #136 from esdeboer/master
Only download Kaldi when it is requested to be installed.
2020-01-03 17:21:57 -05:00
Michael Hansen 3e5437856b Merge pull request #137 from xLAva/Feature_Fuzzywuzzy_SpeedUp
Fuzzywuzzy: major speed improvement by disabling the debug log spam
2020-01-03 17:21:17 -05:00
Michael Hansen 15aaea2810 Support siteId in /api/text-to-speech 2020-01-03 17:19:27 -05:00
xLAva 61d8930e38 Fuzzywuzzy: major speed improvement by disabling the debug log spam 2020-01-03 22:43:26 +01:00
Eric de Boer 5748b2dc3a Only download Kaldi when it is requested to be installed. 2020-01-03 20:57:21 +01:00
Michael Hansen 8f7158f7cc Only allow a single hotword to be detected by snowboy (single_detection) 2020-01-03 14:11:01 -05:00
Michael Hansen 97226286e3 Fix phonetisaurus download link in build-from-source.sh 2020-01-03 11:46:41 -05:00
Michael Hansen 896b3ddfba Run isort 2020-01-03 11:18:45 -05:00
Michael Hansen 1772f6e740 Add slot programs 2020-01-03 11:17:42 -05:00
Michael Hansen b5dfd6518b Add converter args 2020-01-03 10:36:06 -05:00
Michael Hansen 2730c131d0 Merge pull request #133 from maxbachmann/cleanup
do some code cleanup
2020-01-03 09:37:41 -05:00
maxbachmann 05ded030c8 do some code cleanup 2020-01-03 08:52:49 +01:00
Michael Hansen 3b90383145 Trying to fix Jekyll build errors 2020-01-02 23:22:02 -05:00
Michael Hansen 1bb5462150 Merge pull request #130 from kroka/patch-1
add missing ASR fields for Hermes MQTT publishing
2020-01-02 23:13:26 -05:00
Michael Hansen 95a354e2a3 Merge pull request #132 from maxbachmann/master
correct spelling mistake
2020-01-02 23:13:08 -05:00
Michael Hansen d203a3ed75 Add custom converters (programs) 2020-01-02 17:16:34 -05:00
Michael Hansen 59d473b931 Add number ranges 2020-01-02 16:37:16 -05:00
Michael Hansen 17737f7fed Bump version 2020-01-02 16:22:29 -05:00
Michael Hansen 76cf173849 Doing int conversion with built-in number conversion 2020-01-02 16:20:05 -05:00
maxbachmann 55d1cfacdd correct spelling mistake 2020-01-02 18:52:14 +01:00
Michael Hansen 4f6d02169c Force casing on slot inputs 2020-01-02 11:40:04 -05:00
Michael Hansen 74761b942f Fix overwrite_all in slot params 2020-01-02 10:57:10 -05:00
Michael Hansen b88acb3a34 Parse JSON from requests with json5 2020-01-02 10:47:58 -05:00
kroka 7b323a08bb add missing fields for Hermes publishing
prevents a null pointer access in hermes-python
2020-01-02 16:42:28 +01:00
Michael Hansen ec55dbfa5b Remove --yes from apt-get install commands 2020-01-01 22:37:23 -05:00
Michael Hansen 15af0ae3c1 Merge pull request #121 from jthomasdewald/master
Home Assistant Template Example
2020-01-01 22:35:06 -05:00
Michael Hansen f8542f7ac1 Merge pull request #126 from Romkabouter/remove-x-hassio-key
Change X-HASSIO-KEY to Authorization
2020-01-01 09:06:14 -05:00
Paul Romkes de67b3318c Change X-HASSIO-KEY to Authorization 2020-01-01 11:32:09 +01:00
jthomasdewald b47dca03aa Update command-listener doc
Changed vad_mode description to match webrtcvad docs
2019-12-31 15:03:47 -08:00
Michael Hansen 89a1921c3e Merge 2019-12-31 13:00:48 -05:00
Michael Hansen e5fe2a31b3 Add mypy to check, code cleanup 2019-12-31 12:54:10 -05:00
Michael Hansen afdd241c57 Add awake webhook 2019-12-31 12:40:56 -05:00
Michael Hansen bea38cc64f Fix wakeword issues on TTS pause 2019-12-30 21:49:20 -05:00
Michael Hansen c2562aa674 Don't disable wake system by default with TTS 2019-12-30 17:09:52 -05:00
Michael Hansen 7dec472ec4 Add update instructions to docs 2019-12-28 20:51:07 -05:00
Michael Hansen 007ea4266e Bump version 2019-12-27 22:12:38 -05:00
Michael Hansen a627f8746c Code cleanup 2019-12-27 21:19:46 -05:00
Michael Hansen 13f183afd4 Reset PyAudio on error 2019-12-27 21:17:03 -05:00
Michael Hansen 130cbeb7a8 Consolidate actor events. Stop wake on TTS speak. 2019-12-27 21:07:00 -05:00
jthomasdewald 358e7b087e Home Assistant Template Example
Example files to get Rhasspy to control any light with only one automation and one script.
2019-12-27 15:25:36 -08:00
Michael Hansen 8e2d2f2352 Play error sound when intent not recognized 2019-12-27 11:16:38 -05:00
Michael Hansen ac3c92e24a Fix pop sound in pico2wave 2019-12-27 10:59:52 -05:00
Michael Hansen a501c52954 Add /api/speech-to-text endpoint to docs 2019-12-26 22:29:22 -05:00
Michael Hansen f8f0b48140 Setting text and raw_text when intent is not recognized 2019-12-26 22:11:58 -05:00
Michael Hansen 2a8972fb99 Merge branch 'master' of https://github.com/synesthesiam/rhasspy 2019-12-26 11:10:37 -05:00
Michael Hansen cbbfc23395 Merge pull request #118 from jthomasdewald/master
Clarify intent handling for sever / client setup
2019-12-25 18:04:10 -05:00
Michael Hansen 0c2a1931f6 Merge pull request #115 from frkos/patch-1
Porcupine optimizer tool is deprecated
2019-12-25 18:02:52 -05:00
jthomasdewald 414457f150 Clarify intent handling for sever / client setup 2019-12-25 10:08:18 -08:00
frkos 640be7b0ac Update wake-word.md 2019-12-24 13:02:26 +03:00
frkos 421f59518a Update Porcupine wake-word docs
Porcupine optimizer tool is retired, so the link doesn't work
Accroding to the link https://github.com/Picovoice/porcupine#picovoice-console :
The console succeeds the (now retired) optimizer tool, as it can be used to train custom wake-words (Porcupine .ppn files).
2019-12-24 12:58:52 +03:00
134 changed files with 3457 additions and 1770 deletions
+173 -22
View File
@@ -1,26 +1,177 @@
.git/
.venv/
node_modules/
__pycache__/
test/
tools/
etc/test/
download/precise-engine/
download/kaldi/
opt/
*
!etc/qemu-*
etc/homeassistant/config/.storage
examples/typical/home-assistant/config/.storage
examples/typical-intent/home-assistant/config/.storage
examples/client-server/home-assistant/config/.storage
examples/mqtt-hermes/home-assistant/config/.storage
!download/rhasspy-tools*
!download/pocketsphinx-python.tar.gz
!download/snowboy*
!download/kaldi*
profiles/*/base_dictionary.txt
profiles/*/base_language_model.txt
profiles/*/acoustic_model/
profiles/*/g2p.fst
!requirements.txt
!dist/
!etc/wav
profiles/en-kaldi/
profiles/en-zamia/
!docker/run.sh
!docker/rhasspy
profiles/*/download/
!profiles/defaults.json
!profiles/zh/profile.json
!profiles/zh/custom_words.txt
!profiles/zh/espeak_phonemes.txt
!profiles/zh/phoneme_examples.txt
!profiles/zh/frequent_words.txt
!profiles/zh/sentences.ini
!profiles/zh/stop_words.txt
!profiles/zh/slots
!profiles/zh/slot_programs
!profiles/hi/profile.json
!profiles/hi/custom_words.txt
!profiles/hi/espeak_phonemes.txt
!profiles/hi/phoneme_examples.txt
!profiles/hi/frequent_words.txt
!profiles/hi/sentences.ini
!profiles/hi/stop_words.txt
!profiles/hi/slots
!profiles/hi/slot_programs
!profiles/el/profile.json
!profiles/el/custom_words.txt
!profiles/el/espeak_phonemes.txt
!profiles/el/phoneme_examples.txt
!profiles/el/frequent_words.txt
!profiles/el/sentences.ini
!profiles/el/stop_words.txt
!profiles/el/slots
!profiles/el/slot_programs
!profiles/es/profile.json
!profiles/es/custom_words.txt
!profiles/es/espeak_phonemes.txt
!profiles/es/phoneme_examples.txt
!profiles/es/frequent_words.txt
!profiles/es/sentences.ini
!profiles/es/stop_words.txt
!profiles/es/slots
!profiles/es/slot_programs
!profiles/it/profile.json
!profiles/it/custom_words.txt
!profiles/it/espeak_phonemes.txt
!profiles/it/phoneme_examples.txt
!profiles/it/frequent_words.txt
!profiles/it/sentences.ini
!profiles/it/stop_words.txt
!profiles/it/slots
!profiles/it/slot_programs
!profiles/ru/profile.json
!profiles/ru/custom_words.txt
!profiles/ru/espeak_phonemes.txt
!profiles/ru/phoneme_examples.txt
!profiles/ru/frequent_words.txt
!profiles/ru/sentences.ini
!profiles/ru/stop_words.txt
!profiles/ru/slots
!profiles/ru/slot_programs
!profiles/pt/profile.json
!profiles/pt/custom_words.txt
!profiles/pt/espeak_phonemes.txt
!profiles/pt/phoneme_examples.txt
!profiles/pt/frequent_words.txt
!profiles/pt/sentences.ini
!profiles/pt/stop_words.txt
!profiles/pt/slots
!profiles/pt/slot_programs
!profiles/sv/profile.json
!profiles/sv/custom_words.txt
!profiles/sv/espeak_phonemes.txt
!profiles/sv/phoneme_examples.txt
!profiles/sv/frequent_words.txt
!profiles/sv/sentences.ini
!profiles/sv/stop_words.txt
!profiles/sv/slots
!profiles/sv/slot_programs
!profiles/vi/profile.json
!profiles/vi/custom_words.txt
!profiles/vi/espeak_phonemes.txt
!profiles/vi/phoneme_examples.txt
!profiles/vi/frequent_words.txt
!profiles/vi/sentences.ini
!profiles/vi/stop_words.txt
!profiles/vi/slots
!profiles/vi/slot_programs
!profiles/ca/profile.json
!profiles/ca/custom_words.txt
!profiles/ca/espeak_phonemes.txt
!profiles/ca/phoneme_examples.txt
!profiles/ca/frequent_words.txt
!profiles/ca/sentences.ini
!profiles/ca/stop_words.txt
!profiles/ca/slots
!profiles/ca/slot_programs
!profiles/nl/profile.json
!profiles/nl/custom_words.txt
!profiles/nl/espeak_phonemes.txt
!profiles/nl/phoneme_examples.txt
!profiles/nl/frequent_words.txt
!profiles/nl/sentences.ini
!profiles/nl/stop_words.txt
!profiles/nl/slots
!profiles/nl/slot_programs
!profiles/nl/kaldi/custom_words.txt
!profiles/nl/kaldi/espeak_phonemes.txt
!profiles/nl/kaldi/phoneme_examples.txt
!profiles/de/profile.json
!profiles/de/custom_words.txt
!profiles/de/espeak_phonemes.txt
!profiles/de/phoneme_examples.txt
!profiles/de/frequent_words.txt
!profiles/de/sentences.ini
!profiles/de/stop_words.txt
!profiles/de/slots
!profiles/de/slot_programs
!profiles/de/kaldi/custom_words.txt
!profiles/de/kaldi/espeak_phonemes.txt
!profiles/de/kaldi/phoneme_examples.txt
!profiles/fr/profile.json
!profiles/fr/custom_words.txt
!profiles/fr/espeak_phonemes.txt
!profiles/fr/phoneme_examples.txt
!profiles/fr/frequent_words.txt
!profiles/fr/sentences.ini
!profiles/fr/stop_words.txt
!profiles/fr/slots
!profiles/fr/slot_programs
!profiles/fr/kaldi/custom_words.txt
!profiles/fr/kaldi/espeak_phonemes.txt
!profiles/fr/kaldi/phoneme_examples.txt
!profiles/en/profile.json
!profiles/en/custom_words.txt
!profiles/en/espeak_phonemes.txt
!profiles/en/phoneme_examples.txt
!profiles/en/frequent_words.txt
!profiles/en/sentences.ini
!profiles/en/stop_words.txt
!profiles/en/slots
!profiles/en/slot_programs
!profiles/en/kaldi/custom_words.txt
!profiles/en/kaldi/espeak_phonemes.txt
!profiles/en/kaldi/phoneme_examples.txt
!rhasspy/profile_schema.json
!rhasspy/*.py
!rhasspy/train/*.py
!rhasspy/train/jsgf2fst/*.py
!*.py
!VERSION
!pip
+77
View File
@@ -0,0 +1,77 @@
## [2.4.18] - 2020 Feb 07
### Added
- /api/listen-for-wake accepts "on" and "off" as POST data to enable/disable wake word
- /api/events/wake websocket endpoint reports wake up events
- /api/events/text websocket endpoint reports transcription events
- Rhasspy logo changes in web UI when wake word is detected
- espeak arguments list for text to speech
### Changed
- STT output casing is fixed outside of HTTP API calls
- All voice commands show up in web UI test page
- Play last voice command button in web UI works for any command
- Fixed commas in numbers with thousand separators
- Words from Pocketsphinx wake keyphrase are added to dictionary
- Pocketsphinx wake word keyphrase casing is fixed
## [2.4.17] - 2020 Jan 21
### Added
- Button to web UI to play last recorded voice command
- RHASSPY_LOG_LEVEL environment variable
- Web UI feedback during download
- Add "asoundrc" config option to Hass.IO add-on
### Changed
- Moved $profile/kaldi/custom_words.txt to $profile/kaldi_custom_words.txt
- Slot substitution casing is kept during training/recognition
- Fixed fuzzywuzzy and other intent recognizer training after addition of converters
- Fix thread max count issue
- Hide web UI alerts after 10 seconds
- Delete partially downloaded profile files
- Force slot programs to run each training cycle
- Fix _raw_text in Hass event being same as _text
### Removed
- Flair intent recognizer
## [2.4.16] - 2020 Jan 5
### Added
- Number ranges (0..100)
- Converters for transforming JSON values in intents (!int)
- Slot programs for generating slot values
- $rhasspy/days and $rhasspy/months built-in slots
## [2.4.15] - 2019 Dec 27
### Added
- Preliminary support for Raspberry Pi Zero (no Kaldi)
- Play error sound when intent not recognized
- _text and _raw_text to Home Assistant events
### Changed
- Disable wake word when TTS is speaking
- Use json5 library to parse profile
- Remove picotts pop sound
- Don't open/close microphone after wake-up
## [2.4.14] - 2019 Dec 19
### Added
- Ability to split sentences across multiple .ini file in intents directory
- Support (future) /api/intent for Home Assistant
- Support for Home Assistant TTS system
- Emulate MaryTTS /process API in web API
- Include wakeId/siteId in JSON intent (MQTT/Websocket)
- ?voice and ?language query parameters to /api/text-to-speech
+4 -4
View File
@@ -5,7 +5,9 @@ SHELL := bash
# Docker
# -----------------------------------------------------------------------------
docker: web-dist docker-amd64 docker-armhf docker-aarch64 docker-push manifest
docker: web-dist docker-amd64 docker-armhf docker-aarch64
docker-deploy: docker-push manifest
docker-amd64:
docker build . -f docker/templates/dockerfiles/Dockerfile.prebuilt.alsa.all \
@@ -81,9 +83,7 @@ g2p: $(G2P_MODELS)
# Testing
# -----------------------------------------------------------------------------
mypy:
mypy app.py rhasspy
check:
flake8 --exclude=lexconvert.py app.py test.py rhasspy/*.py
pylint --ignore=lexconvert.py app.py test.py rhasspy/*.py
mypy app.py test.py rhasspy/*.py
+2 -2
View File
@@ -1,6 +1,6 @@
![Rhasspy logo](docs/img/rhasspy.svg)
Rhasspy (pronounced RAH-SPEE) is an offline, [multilingual](#supported-languages) voice assistant toolkit inspired by [Jasper](https://jasperproject.github.io/) that works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
Rhasspy (pronounced RAH-SPEE) is an offline voice assistant toolkit inspired by [Jasper](https://jasperproject.github.io/) [supports many languages](#supported-languages). It works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
* [Documentation](https://rhasspy.readthedocs.io/)
* [Discussion](https://community.rhasspy.org)
@@ -58,7 +58,7 @@ The table below summarizes language support across the various supporting techno
| | [rasaNLU](https://rhasspy.readthedocs.io/en/latest/intent-recognition/#rasanlu) | *needs extra software* | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| **Text to Speech** | [espeak](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| | [flite](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#flite) | ✓ | ✓ | | | | | | | | ✓ | | | | | |
| | [picotts](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#picotts) | ✓ | ✓ | | | | | | | | | | | | | |
| | [picotts](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#picotts) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | | | | | | | | |
| | [marytts](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#marytts) | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | | | |
| | [wavenet](https://rhasspy.readthedocs.io/en/latest/text-to-speech/#google-wavenet) | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | ✓ | |
+1 -1
View File
@@ -1 +1 @@
2.4.14
2.4.18
+6
View File
@@ -0,0 +1,6 @@
defaults:
-
scope:
path: ""
values:
render_with_liquid: false
+207 -63
View File
@@ -2,16 +2,20 @@
import argparse
import asyncio
import atexit
import concurrent.futures
import json
import logging
import os
import re
import shutil
import time
from functools import wraps
from pathlib import Path
from typing import Any, Dict, List, Tuple, Union
from typing import Any, Dict, List, Optional, Set, Tuple, Union
from uuid import uuid4
import attr
import json5
from quart import (
Quart,
Response,
@@ -27,8 +31,13 @@ from swagger_ui import quart_api_doc
from rhasspy.actor import ActorSystem, ConfigureEvent, RhasspyActor
from rhasspy.core import RhasspyCore
from rhasspy.dialogue import ProfileTrainingFailed
from rhasspy.intent import IntentRecognized
from rhasspy.events import (
IntentRecognized,
ProfileTrainingFailed,
VoiceCommand,
WakeWordDetected,
WavTranscription,
)
from rhasspy.utils import (
FunctionLoggingHandler,
buffer_to_wav,
@@ -52,6 +61,10 @@ app = Quart("rhasspy")
app.secret_key = str(uuid4())
app = cors(app)
# WAV data from last voice command
last_voice_wav: Optional[bytes] = None
# -----------------------------------------------------------------------------
# Parse Arguments
# -----------------------------------------------------------------------------
@@ -90,8 +103,12 @@ parser.add_argument("--log-level", default="DEBUG", help="Set logging level")
args = parser.parse_args()
# Set log level
log_level = getattr(logging, args.log_level.upper())
logging.basicConfig(level=log_level)
if "RHASSPY_LOG_LEVEL" in os.environ:
log_level = os.environ["RHASSPY_LOG_LEVEL"]
else:
log_level = args.log_level
logging.basicConfig(level=getattr(logging, log_level.upper()))
logger.debug(args)
@@ -185,7 +202,7 @@ async def api_profiles() -> Response:
return jsonify(
{
"default_profile": core.profile.name,
"profiles": sorted(list(profile_names)),
"profiles": sorted(profile_names),
"downloaded": downloaded,
"missing_files": missing_files,
}
@@ -205,6 +222,14 @@ async def api_download_profile() -> str:
return "OK"
@app.route("/api/download-status", methods=["GET"])
async def api_download_status() -> str:
"""Get status of profile download"""
assert core is not None
return "\n".join(core.download_status)
# -----------------------------------------------------------------------------
@@ -255,8 +280,11 @@ async def api_speakers() -> Response:
async def api_listen_for_wake() -> str:
"""Make Rhasspy listen for a wake word"""
assert core is not None
core.listen_for_wake()
return "OK"
enabled_str = (await request.data).decode().strip().lower()
enabled = enabled_str not in ["false", "off"]
core.listen_for_wake(enabled)
return str(enabled)
# -----------------------------------------------------------------------------
@@ -277,6 +305,10 @@ async def api_listen_for_command() -> Response:
entity = request.args.get("entity")
value = request.args.get("value")
# Emulate wake
wake_json = json.dumps({"wakewordId": "default", "siteId": core.siteId})
await add_ws_event("wake", wake_json)
return jsonify(
await core.listen_for_command(
handle=(not no_hass), timeout=timeout, entity=entity, value=value
@@ -295,14 +327,14 @@ async def api_profile() -> Union[str, Response]:
if request.method == "POST":
# Ensure that JSON is valid
profile_json = await request.json
profile_json = json5.loads(await request.data)
recursive_remove(core.profile.system_json, profile_json)
profile_path = Path(core.profile.write_path("profile.json"))
with open(profile_path, "w") as profile_file:
json.dump(profile_json, profile_file, indent=4)
msg = "Wrote profile to %s" % profile_path
msg = f"Wrote profile to {profile_path}"
logger.debug(msg)
return msg
@@ -368,7 +400,7 @@ async def api_pronounce() -> Union[Response, str]:
if download:
# Return WAV
return Response(wav_data) # , mimetype="audio/wav")
return Response(wav_data, mimetype="audio/wav")
# Play through speakers
core.play_wav_data(wav_data)
@@ -441,7 +473,7 @@ async def api_sentences():
num_chars = 0
paths_written = []
sentences_dict = await request.json
sentences_dict = json5.loads(await request.data)
for sentences_path, sentences_text in sentences_dict.items():
# Path is relative to profile directory
sentences_path = Path(core.profile.write_path(sentences_path))
@@ -460,9 +492,7 @@ async def api_sentences():
logger.debug("Removing %s", sentences_path)
sentences_path.unlink()
return "Wrote {} char(s) to {}".format(
num_chars, [str(p) for p in paths_written]
)
return f"Wrote {num_chars} char(s) to {[str(p) for p in paths_written]}"
# Update sentences.ini only
sentences_path = Path(
@@ -472,7 +502,7 @@ async def api_sentences():
data = await request.data
with open(sentences_path, "wb") as sentences_file:
sentences_file.write(data)
return "Wrote {} byte(s) to {}".format(len(data), sentences_path)
return f"Wrote {len(data)} byte(s) to {sentences_path}"
# GET
sentences_path_rel = core.profile.read_path(
@@ -525,6 +555,26 @@ async def api_custom_words():
assert core is not None
speech_system = core.profile.get("speech_to_text.system", "pocketsphinx")
# Temporary fix for kaldi/custom_words -> kaldi_custom_words.txt
old_kaldi_words_path = Path(core.profile.read_path("kaldi/custom_words.txt"))
if old_kaldi_words_path.is_file():
new_kaldi_words_path = Path(
core.profile.write_path(
core.profile.get(
"speech_to_text.kaldi.custom_words", "custom_words.txt"
)
)
)
if (
new_kaldi_words_path != old_kaldi_words_path
and not new_kaldi_words_path.is_file()
):
logger.warning(
"Moving %s to %s", str(old_kaldi_words_path), str(new_kaldi_words_path)
)
shutil.move(old_kaldi_words_path, new_kaldi_words_path)
if request.method == "POST":
custom_words_path = Path(
core.profile.write_path(
@@ -547,7 +597,7 @@ async def api_custom_words():
print(line, file=custom_words_file)
lines_written += 1
return "Wrote %s line(s) to %s" % (lines_written, custom_words_path)
return f"Wrote {lines_written} line(s) to {custom_words_path}"
custom_words_path = Path(
core.profile.read_path(
@@ -619,6 +669,7 @@ async def api_restart() -> str:
@app.route("/api/speech-to-text", methods=["POST"])
async def api_speech_to_text() -> str:
"""Transcribe speech from WAV file."""
global last_voice_wav
no_header = request.args.get("noheader", "false").lower() == "true"
assert core is not None
@@ -628,10 +679,20 @@ async def api_speech_to_text() -> str:
# Wrap in WAV
wav_data = buffer_to_wav(wav_data)
last_voice_wav = wav_data
start_time = time.perf_counter()
result = await core.transcribe_wav(wav_data)
end_time = time.perf_counter()
# Send to websocket
await add_ws_event(
"transcription",
json.dumps(
{"text": result.text, "wakewordId": "default", "siteId": core.siteId}
),
)
if prefers_json():
return jsonify(
{
@@ -666,7 +727,7 @@ async def api_text_to_intent():
intent_json = json.dumps(intent)
logger.debug(intent_json)
await add_ws_event(WS_EVENT_INTENT, intent_json)
await add_ws_event("intent", intent_json)
if not no_hass:
# Send intent to Home Assistant
@@ -681,11 +742,13 @@ async def api_text_to_intent():
@app.route("/api/speech-to-intent", methods=["POST"])
async def api_speech_to_intent() -> Response:
"""Transcribe speech, recognize intent, and optionally handle."""
global last_voice_wav
assert core is not None
no_hass = request.args.get("nohass", "false").lower() == "true"
# Prefer 16-bit 16Khz mono, but will convert with sox if needed
wav_data = await request.data
last_voice_wav = wav_data
# speech -> text
start_time = time.time()
@@ -693,6 +756,12 @@ async def api_speech_to_intent() -> Response:
text = transcription.text
logger.debug(text)
# Send to websocket
await add_ws_event(
"transcription",
json.dumps({"text": text, "wakewordId": "default", "siteId": core.siteId}),
)
# text -> intent
intent = (await core.recognize_intent(text)).intent
intent["speech_confidence"] = transcription.confidence
@@ -702,7 +771,7 @@ async def api_speech_to_intent() -> Response:
intent_json = json.dumps(intent)
logger.debug(intent_json)
await add_ws_event(WS_EVENT_INTENT, intent_json)
await add_ws_event("intent", intent_json)
if not no_hass:
# Send intent to Home Assistant
@@ -727,6 +796,7 @@ async def api_start_recording() -> str:
@app.route("/api/stop-recording", methods=["POST"])
async def api_stop_recording() -> Response:
"""End recording voice command. Transcribe and handle."""
global last_voice_wav
assert core is not None
no_hass = request.args.get("nohass", "false").lower() == "true"
@@ -740,20 +810,43 @@ async def api_stop_recording() -> Response:
text = transcription.text
logger.debug(text)
# Send to websocket
await add_ws_event(
"transcription",
json.dumps({"text": text, "wakewordId": "default", "siteId": core.siteId}),
)
intent = (await core.recognize_intent(text)).intent
intent["speech_confidence"] = transcription.confidence
intent_json = json.dumps(intent)
logger.debug(intent_json)
await add_ws_event(WS_EVENT_INTENT, intent_json)
await add_ws_event("intent", intent_json)
if not no_hass:
# Send intent to Home Assistant
intent = (await core.handle_intent(intent)).intent
# Save last voice command WAV data
last_voice_wav = wav_data
return jsonify(intent)
@app.route("/api/play-recording", methods=["POST"])
async def api_play_recording() -> str:
"""Play last recorded voice command through the configured audio output system"""
global last_voice_wav
assert core is not None
if last_voice_wav:
# Play through speakers
logger.debug("Playing %s byte(s)", len(last_voice_wav))
core.play_wav_data(last_voice_wav)
return "OK"
# -----------------------------------------------------------------------------
@@ -794,19 +887,20 @@ async def api_text_to_speech() -> Union[bytes, str]:
play = request.args.get("play", "true").strip().lower() == "true"
language = request.args.get("language")
voice = request.args.get("voice")
siteId = request.args.get("siteId")
data = await request.data
sentence = last_sentence if repeat else data.decode().strip()
assert core is not None
result = await core.speak_sentence(
sentence, play=play, language=language, voice=voice
sentence, play=play, language=language, voice=voice, siteId=siteId
)
last_sentence = sentence
if not play:
# Return WAV data instead of speaking
return result.wav_data
return Response(result.wav_data, mimetype="audio/wav")
return sentence
@@ -821,7 +915,7 @@ async def api_slots() -> Union[str, Response]:
if request.method == "POST":
overwrite_all = request.args.get("overwrite_all", "false").lower() == "true"
new_slot_values = await request.json
new_slot_values = json5.loads(await request.data)
slots_dir = Path(
core.profile.write_path(
@@ -831,7 +925,7 @@ async def api_slots() -> Union[str, Response]:
if overwrite_all:
# Remote existing values first
for name in new_slot_values.keys():
for name in new_slot_values:
slots_path = safe_join(slots_dir, f"{name}")
if slots_path.is_file():
try:
@@ -849,15 +943,16 @@ async def api_slots() -> Union[str, Response]:
slots_path.parent.mkdir(parents=True, exist_ok=True)
# Merge with existing values
values = set(values)
values = {v.strip() for v in values}
if slots_path.is_file():
values.update(line for line in slots_path.read_text().splitlines())
values.update(
line.strip() for line in slots_path.read_text().splitlines()
)
# Write merged values
if values:
with open(slots_path, "w") as slots_file:
for value in values:
value = value.strip()
if value:
print(value, file=slots_file)
@@ -977,7 +1072,7 @@ def api_intents():
@app.route("/process", methods=["GET"])
async def marytts_process():
async def marytts_process() -> Response:
"""Emulate MaryTTS /process API"""
global last_sentence
@@ -989,7 +1084,7 @@ async def marytts_process():
sentence, play=False, voice=voice, language=locale
)
return spoken.wav_data
return Response(spoken.wav_data, mimetype="audio/wav")
# -----------------------------------------------------------------------------
@@ -1061,26 +1156,26 @@ async def swagger_yaml() -> Response:
# WebSocket API
# -----------------------------------------------------------------------------
WS_EVENT_INTENT = 0
WS_EVENT_LOG = 1
ws_queues: List[List[asyncio.Queue]] = [[], []]
ws_locks: List[asyncio.Lock] = [asyncio.Lock(), asyncio.Lock()]
user_queues: Set[asyncio.Queue] = set()
logging_queues: Set[asyncio.Queue] = set()
async def add_ws_event(event_type: int, text: str):
"""Send text out to all websockets for a specific event."""
async with ws_locks[event_type]:
for q in ws_queues[event_type]:
await q.put(text)
async def add_ws_event(message_type: str, text: str):
"""Send text out to all user websockets for a specific event."""
for q in user_queues:
await q.put((message_type, text))
async def log_ws_event(text: str):
"""Send logging message out to websockets."""
for q in logging_queues:
await q.put(text)
# Send logging messages out to websocket
logging.root.addHandler(
FunctionLoggingHandler(
lambda msg: asyncio.run_coroutine_threadsafe(
add_ws_event(WS_EVENT_LOG, msg), loop
)
lambda msg: asyncio.run_coroutine_threadsafe(log_ws_event(msg), loop)
)
)
@@ -1090,6 +1185,8 @@ class WebSocketObserver(RhasspyActor):
def in_started(self, message: Any, sender: RhasspyActor) -> None:
"""Handle messages in started state."""
global last_voice_wav
if isinstance(message, IntentRecognized):
# Add slots
intent_slots = {}
@@ -1101,29 +1198,75 @@ class WebSocketObserver(RhasspyActor):
# Convert to JSON
intent_json = json.dumps(message.intent)
self._logger.debug(intent_json)
asyncio.run_coroutine_threadsafe(
add_ws_event(WS_EVENT_INTENT, intent_json), loop
asyncio.run_coroutine_threadsafe(add_ws_event("intent", intent_json), loop)
elif isinstance(message, WakeWordDetected):
assert core is not None
wake_json = json.dumps({"wakewordId": message.name, "siteId": core.siteId})
asyncio.run_coroutine_threadsafe(add_ws_event("wake", wake_json), loop)
elif isinstance(message, WavTranscription):
assert core is not None
transcription_json = json.dumps(
{
"text": message.text,
"wakewordId": message.wakewordId,
"siteId": core.siteId,
}
)
asyncio.run_coroutine_threadsafe(
add_ws_event("transcription_json", transcription_json), loop
)
elif isinstance(message, VoiceCommand):
# Save last voice command
last_voice_wav = buffer_to_wav(message.data)
def api_websocket(func):
"""Wraps a websocket route to use a user websocket queue"""
@wraps(func)
async def wrapper(*_args, **kwargs):
global user_queues
queue = asyncio.Queue()
user_queues.add(queue)
try:
return await func(queue, *_args, **kwargs)
except Exception:
logger.exception("api_websocket")
finally:
user_queues.discard(queue)
return wrapper
@app.websocket("/api/events/intent")
async def api_events_intent() -> None:
@api_websocket
async def api_events_intent(queue) -> None:
"""Websocket endpoint to receive intents as JSON."""
# Add new queue for websocket
q: asyncio.Queue = asyncio.Queue()
async with ws_locks[WS_EVENT_INTENT]:
ws_queues[WS_EVENT_INTENT].append(q)
try:
while True:
text = await q.get()
while True:
message_type, text = await queue.get()
if message_type == "intent":
await websocket.send(text)
except Exception:
logger.exception("api_events_intent")
# Remove queue
async with ws_locks[WS_EVENT_INTENT]:
ws_queues[WS_EVENT_INTENT].remove(q)
@app.websocket("/api/events/text")
@api_websocket
async def api_events_text(queue) -> None:
"""Websocket endpoint for transcriptions."""
while True:
message_type, text = await queue.get()
if message_type == "transcription":
await websocket.send(text)
@app.websocket("/api/events/wake")
@api_websocket
async def api_events_wake(queue) -> None:
"""Websocket endpoint to report wake up."""
while True:
message_type, text = await queue.get()
if message_type == "wake":
await websocket.send(text)
@app.websocket("/api/events/log")
@@ -1131,19 +1274,17 @@ async def api_events_log() -> None:
"""Websocket endpoint to receive logging messages as text."""
# Add new queue for websocket
q: asyncio.Queue = asyncio.Queue()
async with ws_locks[WS_EVENT_LOG]:
ws_queues[WS_EVENT_LOG].append(q)
logging_queues.add(q)
try:
while True:
text = await q.get()
await websocket.send(text)
except Exception:
logger.exception("api_events_log")
except concurrent.futures.CancelledError:
pass
# Remove queue
async with ws_locks[WS_EVENT_LOG]:
ws_queues[WS_EVENT_LOG].remove(q)
logging_queues.discard(q)
# -----------------------------------------------------------------------------
@@ -1179,6 +1320,9 @@ loop.run_until_complete(start_rhasspy())
# -----------------------------------------------------------------------------
# Disable useless logging messages
logging.getLogger("wsproto").setLevel(logging.CRITICAL)
# Start web server
if args.ssl is not None:
logger.debug("Using SSL with certfile, keyfile = %s", args.ssl)
+7 -7
View File
@@ -36,7 +36,7 @@ def main():
# Load dictionary
word_dict = {}
logging.info("Loading dictionary from %s" % args.dictionary)
logging.info("Loading dictionary from %s", args.dictionary)
with open(args.dictionary, "r") as dict_file:
read_dict(dict_file, word_dict)
@@ -53,7 +53,7 @@ def main():
all_words.append(word)
assert len(phonemes) == len(phoneme_words), "Not enough words to cover phonemes"
logging.debug("Phonemes: %s" % ", ".join(phoneme_words.keys()))
logging.debug("Phonemes: %s", ", ".join(phoneme_words))
phoneme_hyps = defaultdict(lambda: defaultdict(float))
@@ -66,7 +66,7 @@ def main():
phoneme_hyps[phoneme][hyp] = count
# Sample words from the dictionary
logging.info("Starting %s sample(s)" % args.samples)
logging.info("Starting %s sample(s)", args.samples)
phoneme_futures = {}
with ProcessPoolExecutor() as executor:
# Schedule eSpeak word samples
@@ -80,7 +80,7 @@ def main():
for i, future in enumerate(as_completed(phoneme_futures)):
if i % len(phonemes) == 0:
logging.info(
"Sample %s of %s" % ((i // len(phonemes) + 1), args.samples)
"Sample %s of %s", (i // len(phonemes) + 1), args.samples
)
phoneme = phoneme_futures[future]
@@ -113,14 +113,14 @@ def main():
best = {}
todo = set(phonemes)
used = set()
while len(todo) > 0:
while todo:
for phoneme in list(todo):
best_to_worst = sorted(
phoneme_hyps[phoneme].items(), key=lambda kv: kv[1], reverse=True
)
for hyp, count in best_to_worst:
if not hyp in used:
if hyp not in used:
best[phoneme] = hyp
used.add(hyp)
todo.remove(phoneme)
@@ -165,7 +165,7 @@ def read_dict(dict_file, word_dict):
"""
for line in dict_file:
line = line.strip()
if len(line) == 0:
if not line:
continue
word, pronounce = re.split("[ ]+", line, maxsplit=1)
+36
View File
@@ -0,0 +1,36 @@
#!/usr/bin/env python3
import argparse
import calendar
import json
import locale
from pathlib import Path
def main():
parser = argparse.ArgumentParser("generate-slots")
parser.add_argument("profiles_dir")
args = parser.parse_args()
for profile_dir in Path(args.profiles_dir).glob("*"):
if not profile_dir.is_dir():
continue
with open(profile_dir / "profile.json", "r") as profile_file:
profile = json.load(profile_file)
locale_name = profile["locale"] + ".UTF-8"
locale.setlocale(locale.LC_ALL, locale_name)
print(locale_name)
slots_dir = profile_dir / "slots" / "rhasspy"
slots_dir.mkdir(parents=True, exist_ok=True)
# Day names
(slots_dir / "days").write_text('\n'.join(calendar.day_name))
# Month names
(slots_dir / "months").write_text('\n'.join(filter(None, calendar.month_name)))
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+1 -3
View File
@@ -38,9 +38,7 @@ def main():
if not os.path.exists(html_path):
# Download
url = "https://www.ezglot.com/most-frequently-used-words.php?l={0}&submit=Select".format(
language
)
url = f"https://www.ezglot.com/most-frequently-used-words.php?l={language}&submit=Select"
print(f"Downloading from {url}")
with open(html_path, "w") as html_file:
+3 -3
View File
@@ -26,7 +26,7 @@ def main():
with open(args.dictionary, "r") as dict_file:
for line in dict_file:
line = line.strip()
if len(line) == 0:
if not line:
continue
parts = re.split(r"[\t ]+", line)
@@ -44,11 +44,11 @@ def main():
# Pick unique example words for every phoneme
used_words = set()
for phoneme in sorted(examples.keys()):
for phoneme in sorted(examples):
# Choose the shortest, unused example word for this phoneme.
# Exclude words with 3 or fewer letters.
for word, pron in sorted(examples[phoneme], key=lambda kv: len(kv[0])):
if len(word) > 3 and (not word in used_words):
if len(word) > 3 and (word not in used_words):
# Output format is:
# phoneme word pronunciation
print(phoneme, word, " ".join(pron))
+5 -5
View File
@@ -31,7 +31,7 @@ def main():
with open(args.dictionary, "r") as dict_file:
for line in dict_file:
line = line.strip()
if len(line) == 0:
if not line:
continue
parts = re.split(r"[\t ]+", line)
@@ -70,7 +70,7 @@ def main():
with open(args.frequent_phones, "r") as freq_phones_file:
for line in freq_phones_file:
line = line.strip()
if len(line) == 0:
if not line:
continue
parts = re.split(r"[ ]+", line, maxsplit=1)
@@ -82,7 +82,7 @@ def main():
mappings = []
bad_espeak = (":", ";", "-", "#")
for word, espeak in freq_espeak.items():
if not word in freq_phonemes:
if word not in freq_phonemes:
# No pronunciation
continue
@@ -134,7 +134,7 @@ def main():
m = 4
for p in all_phonemes:
candidate_counts = [
(e, phoneme_counts[(cp, e)]) for (cp, e) in phoneme_counts.keys() if cp == p
(e, phoneme_counts[(cp, e)]) for (cp, e) in phoneme_counts if cp == p
]
candidate_counts = [ec for ec in candidate_counts if ec[1] > n]
candidate_counts = sorted(candidate_counts, key=lambda x: x[1], reverse=True)
@@ -213,7 +213,7 @@ assign(P, E) :- maybe_assign(P, E).
predicates = []
for line in proc.stdout.splitlines():
line = line.decode().strip()
if len(line) == 0:
if not line:
continue
elif line.startswith("OPTIMUM FOUND"):
break
+1 -1
View File
@@ -20,7 +20,7 @@ def main():
with open(dict_path, "r") as dict_file:
for line in dict_file:
line = line.strip()
if len(line) == 0:
if not line:
continue
parts = re.split(r"[ ]+", line)
+28
View File
@@ -0,0 +1,28 @@
#!/usr/bin/env python
import sys
import json
import random
import datetime
def speech(text):
global o
o["speech"] = {"text": text}
# get json from stdin and load into python dict
o = json.loads(sys.stdin.read())
intent = o["intent"]["name"]
if intent == "GetTime":
now = datetime.datetime.now()
speech("It's %s %d %s." % (now.strftime('%H'), now.minute, now.strftime('%p')))
elif intent == "Hello":
replies = ['Hi!', 'Hello!', 'Hey there!', 'Greetings.']
speech(random.choice(replies))
# convert dict to json and print to stdout
print(json.dumps(o))
+1 -1
View File
@@ -12,7 +12,7 @@ def main():
with open(sys.argv[1], "r") as dict_file:
for line in dict_file:
line = line.strip()
if len(line) == 0:
if not line:
continue
parts = re.split(r"[ ]+", line)
+18 -9
View File
@@ -128,7 +128,7 @@ if [[ -z "${no_system}" ]]; then
echo "Installing system dependencies"
run_sudo apt-get update
run_sudo apt-get install --no-install-recommends --yes \
run_sudo apt-get install --no-install-recommends \
python3 python3-pip python3-venv python3-dev \
python \
build-essential autoconf autoconf-archive libtool automake bison \
@@ -183,7 +183,7 @@ if [[ ! -d "${phonetisaurus_dir}/build" ]]; then
phonetisaurus_file="${download_dir}/phonetisaurus-2019.tar.gz"
if [[ ! -s "${phonetisaurus_file}" ]]; then
phonetisaurus_url='https://github.com/synesthesiam/phonetisaurus-2019/releases/download/v1.0/phonetisaurus-2019.tar.gz'
phonetisaurus_url='https://github.com/synesthesiam/docker-phonetisaurus/raw/master/download/phonetisaurus-2019.tar.gz'
echo "Downloading phonetisaurus (${phonetisaurus_url})"
maybe_download "${phonetisaurus_url}" "${phonetisaurus_file}"
fi
@@ -191,7 +191,7 @@ fi
# Kaldi
kaldi_dir="${this_dir}/opt/kaldi"
if [[ ! -d "${kaldi_dir}" ]]; then
if [[ -z "${no_kaldi}" && ! -d "${kaldi_dir}" ]]; then
install libatlas-base-dev libatlas3-base gfortran
run_sudo ldconfig
kaldi_file="${download_dir}/kaldi-2019.tar.gz"
@@ -329,31 +329,40 @@ case "${CPU_ARCH}" in
esac
requirements_file="${temp_dir}/requirements.txt"
temp_requirements_file="${temp_dir}/temp_requirements.txt"
cp "${this_dir}/requirements.txt" "${requirements_file}"
# Exclude requirements
if [[ -n "${no_flair}" ]]; then
echo "Excluding flair from virtual environment"
sed -i '/^flair/d' "${requirements_file}"
sed '/^flair/d' "${requirements_file}" > "${temp_requirements_file}" &&
mv "${temp_requirements_file}" "${requirements_file}"
fi
if [[ -n "${no_precise}" ]]; then
echo "Excluding Mycroft Precise from virtual environment"
sed -i '/^precise-runner/d' "${requirements_file}"
sed '/^precise-runner/d' "${requirements_file}" > "${temp_requirements_file}" &&
mv "${temp_requirements_file}" "${requirements_file}"
fi
if [[ -n "${no_adapt}" ]]; then
echo "Excluding Mycroft Adapt from virtual environment"
sed -i '/^adapt-parser/d' "${requirements_file}"
sed '/^adapt-parser/d' "${requirements_file}" > "${temp_requirements_file}" &&
mv "${temp_requirements_file}" "${requirements_file}"
fi
if [[ -n "${no_google}" ]]; then
echo "Excluding Google Text to Speech from virtual environment"
sed -i '/^google-cloud-texttospeech/d' "${requirements_file}"
sed '/^google-cloud-texttospeech/d' "${requirements_file}" > "${temp_requirements_file}" &&
mv "${temp_requirements_file}" "${requirements_file}"
fi
# Install everything except openfst first
sed -i '/^openfst/d' "${requirements_file}"
sed '/^openfst/d' "${requirements_file}" > "${temp_requirements_file}" &&
mv "${temp_requirements_file}" "${requirements_file}"
"${python}" -m pip install -r "${requirements_file}"
@@ -398,5 +407,5 @@ esac
if [[ -z "${no_web}" ]]; then
echo "Building web interface"
cd "${this_dir}" && yarn build
cd "${this_dir}" && yarn install && yarn build
fi
+2 -2
View File
@@ -95,7 +95,7 @@ function maybe_download {
if [[ -z "${no_system}" ]]; then
echo "Installing system dependencies"
sudo apt-get update
sudo apt-get install --no-install-recommends --yes \
sudo apt-get install --no-install-recommends \
python3 python3-pip python3-venv python3-dev \
python \
build-essential autoconf autoconf-archive libtool automake bison \
@@ -122,7 +122,7 @@ if [[ -z "${FLAGS_python}" ]]; then
PYTHON='python3.6'
else
echo "Installing Python 3.6 from source. This is going to take a LONG time."
sudo apt-get install --no-install-recommends --yes \
sudo apt-get install --no-install-recommends \
tk-dev libncurses5-dev libncursesw5-dev \
libreadline6-dev libdb5.3-dev libgdbm-dev \
libsqlite3-dev libssl-dev libbz2-dev \
+1 -132
View File
@@ -1,132 +1 @@
COPY profiles/zh/profile.json \
profiles/zh/custom_words.txt \
profiles/zh/espeak_phonemes.txt \
profiles/zh/phoneme_examples.txt \
profiles/zh/frequent_words.txt \
profiles/zh/sentences.ini \
profiles/zh/stop_words.txt ${RHASSPY_APP}/profiles/zh/
COPY profiles/hi/ \
profiles/hi/profile.json \
profiles/hi/custom_words.txt \
profiles/hi/espeak_phonemes.txt \
profiles/hi/phoneme_examples.txt \
profiles/hi/frequent_words.txt \
profiles/hi/sentences.ini \
profiles/hi/stop_words.txt ${RHASSPY_APP}/profiles/hi/
COPY profiles/el/profile.json \
profiles/el/custom_words.txt \
profiles/el/espeak_phonemes.txt \
profiles/el/phoneme_examples.txt \
profiles/el/frequent_words.txt \
profiles/el/sentences.ini \
profiles/el/stop_words.txt ${RHASSPY_APP}/profiles/el/
COPY profiles/de/profile.json \
profiles/de/custom_words.txt \
profiles/de/espeak_phonemes.txt \
profiles/de/phoneme_examples.txt \
profiles/de/frequent_words.txt \
profiles/de/sentences.ini \
profiles/de/stop_words.txt ${RHASSPY_APP}/profiles/de/
COPY profiles/de/kaldi/custom_words.txt \
profiles/de/kaldi/espeak_phonemes.txt \
profiles/de/kaldi/phoneme_examples.txt \
${RHASSPY_APP}/profiles/de/kaldi/
COPY profiles/it/profile.json \
profiles/it/custom_words.txt \
profiles/it/espeak_phonemes.txt \
profiles/it/phoneme_examples.txt \
profiles/it/frequent_words.txt \
profiles/it/sentences.ini \
profiles/it/stop_words.txt ${RHASSPY_APP}/profiles/it/
COPY profiles/es/profile.json \
profiles/es/custom_words.txt \
profiles/es/espeak_phonemes.txt \
profiles/es/phoneme_examples.txt \
profiles/es/frequent_words.txt \
profiles/es/sentences.ini \
profiles/es/stop_words.txt ${RHASSPY_APP}/profiles/es/
COPY profiles/fr/profile.json \
profiles/fr/custom_words.txt \
profiles/fr/espeak_phonemes.txt \
profiles/fr/phoneme_examples.txt \
profiles/fr/frequent_words.txt \
profiles/fr/sentences.ini \
profiles/fr/stop_words.txt ${RHASSPY_APP}/profiles/fr/
COPY profiles/fr/kaldi/custom_words.txt \
profiles/fr/kaldi/espeak_phonemes.txt \
profiles/fr/kaldi/phoneme_examples.txt \
${RHASSPY_APP}/profiles/fr/kaldi/
COPY profiles/ru/profile.json \
profiles/ru/custom_words.txt \
profiles/ru/espeak_phonemes.txt \
profiles/ru/phoneme_examples.txt \
profiles/ru/frequent_words.txt \
profiles/ru/sentences.ini \
profiles/ru/stop_words.txt ${RHASSPY_APP}/profiles/ru/
COPY profiles/nl/profile.json \
profiles/nl/custom_words.txt \
profiles/nl/espeak_phonemes.txt \
profiles/nl/phoneme_examples.txt \
profiles/nl/frequent_words.txt \
profiles/nl/sentences.ini \
profiles/nl/stop_words.txt ${RHASSPY_APP}/profiles/nl/
COPY profiles/nl/kaldi/custom_words.txt \
profiles/nl/kaldi/espeak_phonemes.txt \
profiles/nl/kaldi/phoneme_examples.txt \
${RHASSPY_APP}/profiles/nl/kaldi/
COPY profiles/vi/profile.json \
profiles/vi/custom_words.txt \
profiles/vi/espeak_phonemes.txt \
profiles/vi/phoneme_examples.txt \
profiles/vi/frequent_words.txt \
profiles/vi/sentences.ini \
profiles/vi/stop_words.txt ${RHASSPY_APP}/profiles/vi/
COPY profiles/pt/profile.json \
profiles/pt/custom_words.txt \
profiles/pt/espeak_phonemes.txt \
profiles/pt/phoneme_examples.txt \
profiles/pt/frequent_words.txt \
profiles/pt/sentences.ini \
profiles/pt/stop_words.txt ${RHASSPY_APP}/profiles/pt/
COPY profiles/sv/profile.json \
profiles/sv/custom_words.txt \
profiles/sv/espeak_phonemes.txt \
profiles/sv/phoneme_examples.txt \
profiles/sv/frequent_words.txt \
profiles/sv/sentences.ini \
profiles/sv/stop_words.txt ${RHASSPY_APP}/profiles/sv/
COPY profiles/ca/profile.json \
profiles/ca/custom_words.txt \
profiles/ca/espeak_phonemes.txt \
profiles/ca/phoneme_examples.txt \
profiles/ca/frequent_words.txt \
profiles/ca/sentences.ini \
profiles/ca/stop_words.txt ${RHASSPY_APP}/profiles/ca/
COPY profiles/en/profile.json \
profiles/en/custom_words.txt \
profiles/en/espeak_phonemes.txt \
profiles/en/phoneme_examples.txt \
profiles/en/frequent_words.txt \
profiles/en/sentences.ini \
profiles/en/stop_words.txt ${RHASSPY_APP}/profiles/en/
COPY profiles/en/kaldi/custom_words.txt \
profiles/en/kaldi/espeak_phonemes.txt \
profiles/en/kaldi/phoneme_examples.txt \
${RHASSPY_APP}/profiles/en/kaldi/
COPY profiles/ ${RHASSPY_APP}/profiles/
@@ -72,138 +72,7 @@ RUN chmod +x /run.sh
COPY profiles/zh/profile.json \
profiles/zh/custom_words.txt \
profiles/zh/espeak_phonemes.txt \
profiles/zh/phoneme_examples.txt \
profiles/zh/frequent_words.txt \
profiles/zh/sentences.ini \
profiles/zh/stop_words.txt ${RHASSPY_APP}/profiles/zh/
COPY profiles/hi/ \
profiles/hi/profile.json \
profiles/hi/custom_words.txt \
profiles/hi/espeak_phonemes.txt \
profiles/hi/phoneme_examples.txt \
profiles/hi/frequent_words.txt \
profiles/hi/sentences.ini \
profiles/hi/stop_words.txt ${RHASSPY_APP}/profiles/hi/
COPY profiles/el/profile.json \
profiles/el/custom_words.txt \
profiles/el/espeak_phonemes.txt \
profiles/el/phoneme_examples.txt \
profiles/el/frequent_words.txt \
profiles/el/sentences.ini \
profiles/el/stop_words.txt ${RHASSPY_APP}/profiles/el/
COPY profiles/de/profile.json \
profiles/de/custom_words.txt \
profiles/de/espeak_phonemes.txt \
profiles/de/phoneme_examples.txt \
profiles/de/frequent_words.txt \
profiles/de/sentences.ini \
profiles/de/stop_words.txt ${RHASSPY_APP}/profiles/de/
COPY profiles/de/kaldi/custom_words.txt \
profiles/de/kaldi/espeak_phonemes.txt \
profiles/de/kaldi/phoneme_examples.txt \
${RHASSPY_APP}/profiles/de/kaldi/
COPY profiles/it/profile.json \
profiles/it/custom_words.txt \
profiles/it/espeak_phonemes.txt \
profiles/it/phoneme_examples.txt \
profiles/it/frequent_words.txt \
profiles/it/sentences.ini \
profiles/it/stop_words.txt ${RHASSPY_APP}/profiles/it/
COPY profiles/es/profile.json \
profiles/es/custom_words.txt \
profiles/es/espeak_phonemes.txt \
profiles/es/phoneme_examples.txt \
profiles/es/frequent_words.txt \
profiles/es/sentences.ini \
profiles/es/stop_words.txt ${RHASSPY_APP}/profiles/es/
COPY profiles/fr/profile.json \
profiles/fr/custom_words.txt \
profiles/fr/espeak_phonemes.txt \
profiles/fr/phoneme_examples.txt \
profiles/fr/frequent_words.txt \
profiles/fr/sentences.ini \
profiles/fr/stop_words.txt ${RHASSPY_APP}/profiles/fr/
COPY profiles/fr/kaldi/custom_words.txt \
profiles/fr/kaldi/espeak_phonemes.txt \
profiles/fr/kaldi/phoneme_examples.txt \
${RHASSPY_APP}/profiles/fr/kaldi/
COPY profiles/ru/profile.json \
profiles/ru/custom_words.txt \
profiles/ru/espeak_phonemes.txt \
profiles/ru/phoneme_examples.txt \
profiles/ru/frequent_words.txt \
profiles/ru/sentences.ini \
profiles/ru/stop_words.txt ${RHASSPY_APP}/profiles/ru/
COPY profiles/nl/profile.json \
profiles/nl/custom_words.txt \
profiles/nl/espeak_phonemes.txt \
profiles/nl/phoneme_examples.txt \
profiles/nl/frequent_words.txt \
profiles/nl/sentences.ini \
profiles/nl/stop_words.txt ${RHASSPY_APP}/profiles/nl/
COPY profiles/nl/kaldi/custom_words.txt \
profiles/nl/kaldi/espeak_phonemes.txt \
profiles/nl/kaldi/phoneme_examples.txt \
${RHASSPY_APP}/profiles/nl/kaldi/
COPY profiles/vi/profile.json \
profiles/vi/custom_words.txt \
profiles/vi/espeak_phonemes.txt \
profiles/vi/phoneme_examples.txt \
profiles/vi/frequent_words.txt \
profiles/vi/sentences.ini \
profiles/vi/stop_words.txt ${RHASSPY_APP}/profiles/vi/
COPY profiles/pt/profile.json \
profiles/pt/custom_words.txt \
profiles/pt/espeak_phonemes.txt \
profiles/pt/phoneme_examples.txt \
profiles/pt/frequent_words.txt \
profiles/pt/sentences.ini \
profiles/pt/stop_words.txt ${RHASSPY_APP}/profiles/pt/
COPY profiles/sv/profile.json \
profiles/sv/custom_words.txt \
profiles/sv/espeak_phonemes.txt \
profiles/sv/phoneme_examples.txt \
profiles/sv/frequent_words.txt \
profiles/sv/sentences.ini \
profiles/sv/stop_words.txt ${RHASSPY_APP}/profiles/sv/
COPY profiles/ca/profile.json \
profiles/ca/custom_words.txt \
profiles/ca/espeak_phonemes.txt \
profiles/ca/phoneme_examples.txt \
profiles/ca/frequent_words.txt \
profiles/ca/sentences.ini \
profiles/ca/stop_words.txt ${RHASSPY_APP}/profiles/ca/
COPY profiles/en/profile.json \
profiles/en/custom_words.txt \
profiles/en/espeak_phonemes.txt \
profiles/en/phoneme_examples.txt \
profiles/en/frequent_words.txt \
profiles/en/sentences.ini \
profiles/en/stop_words.txt ${RHASSPY_APP}/profiles/en/
COPY profiles/en/kaldi/custom_words.txt \
profiles/en/kaldi/espeak_phonemes.txt \
profiles/en/kaldi/phoneme_examples.txt \
${RHASSPY_APP}/profiles/en/kaldi/
COPY profiles/ ${RHASSPY_APP}/profiles/
COPY profiles/defaults.json ${RHASSPY_APP}/profiles/
COPY docker/rhasspy ${RHASSPY_APP}/bin/
+1 -1
View File
@@ -36,7 +36,7 @@ Add to your [profile](profiles.md):
This system listens for up to `timeout_sec` for a voice command. The first few frames of audio data are discarded (`throwaway_buffers`) to avoid clicks from the microphone being engaged. When speech is detected for some number of successive frames (`speech_buffers`), the voice command is considered to have *started*. After `min_sec`, Rhasspy will start listening for silence. If at least `silence_sec` goes by without any speech detected, the command is considered *finished*, and the recorded WAV data is sent to the [speech recognition system](speech-to-text.md).
You may want to adjust `min_sec`, `silence_sec`, and `vad_mode` for your environment.
These control how short a voice command can be (`min_sec`), how much silence is required before Rhasspy stops listening (`silence_sec`), and how sensitive the voice activity detector is (`vad_mode`, higher is more sensitive).
These control how short a voice command can be (`min_sec`), how much silence is required before Rhasspy stops listening (`silence_sec`), and how aggressive the voice activity filter `vad_mode` is: this is an integer between 0 and 3. 0 is the least aggressive about filtering out non-speech, 3 is the most aggressive.
**NOTE**: you must set `chunk_size` such that (relative to sample rate) it produces 10, 20, or 30 millisecond buffers. This is required by `webrtcvad`.
+4 -1
View File
@@ -4,6 +4,9 @@ Rhasspy is designed to be run on different kinds of hardware, such as:
* Raspberry Pi 2-3 B/B+ (`armhf`/`aarch64`)
* Desktop/laptop/server (`amd64`)
* Raspberry Pi Zero (`armv6l`)
* You must use a [virtual environment](installation.md#virtual-environment)
* The [Kaldi speech recognizer](speech-to-text.md#kaldi) is **not** supported
The table below summarizes architecture compatibility with Rhasspy's components:
@@ -30,7 +33,7 @@ The table below summarizes architecture compatibility with Rhasspy's components:
To run Rhasspy on a Raspberry Pi, you'll need at least a 4 GB SD card and a good power supply. I highly recommend the [CanaKit Starter Kit](https://www.amazon.com/CanaKit-Raspberry-Starter-Premium-Black/dp/B07BCC8PK7), which includes a 32 GB SD card, a 2.5 A power supply, and a case.
Some components of Rhasspy will not work on the Raspberry Pi 3 B+ model (`aarch64`). As of the time of this writing, these are:
Some components of Rhasspy will not work on the Raspberry Pi 3 B+ model with a 64-bit operating system (`aarch64`). As of the time of this writing, these are:
* [snowboy](wake-word.md#snowboy) (wake word)
* [Mycroft Precise](wake-word.md#mycroft-precise) (wake word)
+56 -6
View File
@@ -46,10 +46,25 @@ If you're using [docker compose](https://docs.docker.com/compose/), add the foll
devices:
- "/dev/snd:/dev/snd"
command: --user-profiles /profiles --profile en
### Updating Docker Image
To update your Rhasspy Docker image, just run:
```bash
docker pull synesthesiam/rhasspy-server:latest
```
on your Rhasspy server and restart the Docker container. This may require running something like:
```bash
docker rm <container-name>
```
before doing a `docker run...`
## Hass.io
The second easiest was to install Rhasspy is as a [Hass.io add-on](https://www.home-assistant.io/addons/). Following the [installation instructions for Hass.io](https://www.home-assistant.io/hassio/installation/) before proceeding.
The second easiest way to install Rhasspy is as a [Hass.io add-on](https://www.home-assistant.io/addons/). Follow the [installation instructions for Hass.io](https://www.home-assistant.io/hassio/installation/) before proceeding.
To install the add-on, add my [Hass.IO Add-On Repository](https://github.com/synesthesiam/hassio-addons) in the Add-On Store, refresh, then install the "Rhasspy Assistant" under “Synesthesiam Hass.IO Add-Ons” (all the way at the bottom of the Add-On Store screen).
@@ -63,24 +78,55 @@ Before starting the add-on, make sure to give it access to your microphone and s
![Audio settings for Hass.io](img/hass-io-audio.png)
### Updating Hass.IO Add-On
You should receive notifications when a new version of Rhasspy is available for Hass.IO. Follow the instructions from Hass.IO on how to update the add-on.
## Virtual Environment
Rhasspy can be installed into a Python virtual environment, though there are a number of requirements. This may be desirable, however, if you have trouble getting Rhasspy to access your microphone from within a Docker container. To start, clone the repo somewhere:
git clone https://github.com/synesthesiam/rhasspy.git
```bash
git clone https://github.com/synesthesiam/rhasspy.git
```
Then run the `download-dependencies.sh` and `create-venv.sh` scripts (assumes a Debian distribution):
cd rhasspy/
./download-dependencies.sh
./create-venv.sh
```bash
cd rhasspy/
./download-dependencies.sh
./create-venv.sh
```
Once the installation finishes (5-10 minutes on a Raspberry Pi 3), you can use the `run-venv.sh` script to start Rhasspy:
./run-venv.sh --profile en
```bash
./run-venv.sh --profile en
```
If all is well, the web interface will be available at [http://localhost:12101](http://localhost:12101)
### Updating Virtual Environment
To update your Rhasspy virtual environment to the latest version, run:
```bash
git pull origin master
```
in your `rhasspy` directory, and then update your Python dependencies:
```bash
source .venv/bin/activate
pip3 install -r requirements.txt
```
You should also re-build the web interface:
1. Install [yarn](https://yarnpkg.com) on your system
2. Run `yarn install && yarn build` in the `rhasspy` directory
3. Restart any running instances of Rhasspy
### Running as a Service
Once installed, Rhasspy can be run as a [systemd service](https://systemd.io/). An [example unit file](https://github.com/synesthesiam/rhasspy/blob/master/etc/rhasspy.service) is available (thanks [UnderpantsGnome](https://github.com/UnderpantsGnome)):
@@ -151,4 +197,8 @@ On low memory devices like the Raspberry Pi, building the tools above can quickl
You can skip building Kaldi if you plan to just [use Pocketsphinx](speech-to-text.md#pocketsphinx) for speech recognition.
### Updating Source Install
Follow the same instructions as [updating a virtual environment](#updating-virtual-environment).
+1 -1
View File
@@ -207,7 +207,7 @@ The following environment variables are available to your program:
* `$RHASSPY_PROFILE` - name of the current profile (e.g., "en")
* `$RHASSPY_PROFILE_DIR` - directory of the current profile (where `profile.json` is)
See [handle.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/handle.sh) for an example program.
See [handle.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/handle.sh) or [handle.py](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/handle.py) for example programs.
### Speech
+1 -1
View File
@@ -10,8 +10,8 @@ The following table summarizes the trade-offs of using each intent recognizer:
| [fsticuffs](intent-recognition.md#fsticuffs) | 1M+ | very fast | very fast | ignores unknown words |
| [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) | 12-100 | fast | fast | fuzzy string matching |
| [adapt](intent-recognition.md#mycroft-adapt) | 100-1K | moderate | fast | ignores unknown words |
| [flair](intent-recognition.md#flair) | 1K-100K | very slow | moderate | handles unseen words |
| [rasaNLU](intent-recognition.md#rasanlu) | 1K-100K | very slow | moderate | handles unseen words |
| [flair](intent-recognition.md#flair) | 1K-100K | very slow | moderate | handles unseen words |
## Fsticuffs
+10 -2
View File
@@ -53,7 +53,8 @@ Application authors may want to use the [rhasspy-client](https://pypi.org/projec
* `?timeout=<seconds>` - override default command timeout
* `?entity=<entity>&value=<value>` - set custom entity/value in recognized intent
* `/api/listen-for-wake-word`
* POST to wake Rhasspy up and return immediately
* POST "on" to have Rhasspy listen for a wake word
* POST "off" to disable wake word
* `/api/lookup`
* POST word as plain text to look up or guess pronunciation
* `?n=<number>` - return at most `n` guessed pronunciations
@@ -84,6 +85,10 @@ Application authors may want to use the [rhasspy-client](https://pypi.org/projec
* POST a WAV file and have Rhasspy process it as a voice command
* Returns intent JSON when command is finished
* `?nohass=true` - stop Rhasspy from handling the intent
* `/api/speech-to-text`
* POST a WAV file and have Rhasspy return the text transcription
* Set `Accept: application/json` to receive JSON with more details
* `?noheader=true` - send raw 16-bit 16Khz mono audio without a WAV header
* `/api/start-recording`
* POST to have Rhasspy start recording a voice command
* `/api/stop-recording`
@@ -425,12 +430,14 @@ All available profile sections and settings are listed below:
* `g2p_model` - finite-state transducer for phonetisaurus to guess word pronunciations
* `g2p_casing` - casing to force for g2p model (`upper`, `lower`, or blank)
* `dictionary_casing` - casing to force for dictionary words (`upper`, `lower`, or blank)
* `grammars_dir` - directory to write generated JSGF grammars from sentences ini file
* `slots_dir` - directory to look for [slots lists](training.md#slots-lists) (default: `slots`)
* `slot_programs` - directory to look for [slot programs](training.md#slot-programs) (default `slot_programs`)
* `fsts_dir` - directory to write generated finite state transducers from JSGF grammars
* `intent` - transforming text commands to intents
* `system` - intent recognition system (`fsticuffs`, `fuzzywuzzy`, `rasa`, `remote`, `adapt`, `command`, or `dummy`)
* `fsticuffs` - configuration for [OpenFST-based](https://www.openfst.org) intent recognizer
* `intent_fst` - path to generated finite state transducer with all intents combined
* `converters_dir` - directory to look for [converter](training.md#converters) programs (default: `converters`)
* `ignore_unknown_words` - true if words not in the FST symbol table should be ignored
* `fuzzy` - true if text is matching in a fuzzy manner, skipping words in `stop_words.txt`
* `fuzzywuzzy` - configuration for simplistic [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) based intent recognizer
@@ -447,6 +454,7 @@ All available profile sections and settings are listed below:
* `command` - configuration for external speech-to-text program
* `program` - path to executable
* `arguments` - list of arguments to pass to program
* `replace_numbers` if true, automatically replace number ranges (`N..M`) or numbers (`N`) with words
* `text_to_speech` - pronouncing words
* `system` - text to speech system (`espeak`, `flite`, `picotts`, `marytts`, `command`, or `dummy`)
* `espeak` - configuration for [eSpeak](http://espeak.sourceforge.net)
+16 -1
View File
@@ -29,6 +29,19 @@ Add to your [profile](profiles.md):
Remove the `voice` option to have `espeak` use your profile's language automatically.
You may also pass additional arguments to the `espeak` command. For example,
```json
"text_to_speech": {
"system": "espeak",
"espeak": {
"arguments": ["-s", "80"]
}
}
```
will speak the sentence more slowly.
See `rhasspy.tts.EspeakSentenceSpeaker` for more details.
## Flite
@@ -52,7 +65,9 @@ See `rhasspy.tts.FliteSentenceSpeaker` for details.
## PicoTTS
Uses SVOX's [picotts](https://en.wikipedia.org/wiki/SVOX) for text to speech. Sounds a bit better (to me) than `flite` or `espeak`, but only has a single English voice.
Uses SVOX's [picotts](https://en.wikipedia.org/wiki/SVOX) for text to speech. Sounds a bit better (to me) than `flite` or `espeak`.
Included languages are `en-US`, `en-GB`, `de-DE`, `es-ES`, `fr-FR` and `it-IT`.
Add to your [profile](profiles.md):
+96 -1
View File
@@ -5,7 +5,11 @@ Rhasspy is designed to recognize voice commands [in a template language](#senten
* Intent Recognition
* [Basic Syntax](#basic-syntax)
* [Named Entities](#tags)
* [Number Ranges](#number-ranges)
* [Slots](#slots-lists)
* [Slot Synonyms](#slot-synonyms)
* [Slot Programs](#slot-programs)
* [Converters](#converters)
* Speech Recognition
* [Custom Words](#custom-words)
* [Language Model Mixing](#language-model-mixing)
@@ -156,6 +160,24 @@ You can **share rules** across intents by referencing them as `<IntentName.rule_
The second intent (`GetLightColor`) references the `colors` rule from `SetLightColor`. Rule references without a dot must exist in the current intent.
### Number Ranges
Rhasspy supports using number literals (`75`) and number ranges (`1..10`) directly in your sentence templates. During training, the [num2words](https://pypi.org/project/num2words) package is used to generate words that the speech recognizer can handle ("seventy five"). For example:
```
[SetBrightness]
set brightness to (0..100){brightness}
```
The `brightness` property of the recognized `SetBrightness` intent will automatically be [converted](#converters) to an integer for you. You can optionally add a step to the integer range:
```
evens = 0..100,2
odds = 1..100,2
```
Under the hood, number ranges are actually references to the `rhasspy/number` [slot program](#slot-programs). You can override this behavior by creating your `slot_programs/rhasspy/number` program or disable it entirely by setting `intent.replace_numbers` to `false` in [your profile](profiles.md).
### Slots Lists
Large [alternatives](#alternatives) can become unwieldy quickly. For example, say you have a list of movie names:
@@ -185,7 +207,11 @@ play ($movies){movie_name}
When matched, the `PlayMovie` intent JSON will contain `movie_name` property with either "Primer", "Moon", etc.
Make sure to **re-train** Rhasspy whenever you update your slot values.
Make sure to **re-train** Rhasspy whenever you update your slot values!
#### Slot Directories
Slot files can be put in **sub-directories** under `slots`. A list in `slots/foo/bar` should be referenced in `sentences.ini` as `$foo/bar`.
#### Slot Synonyms
@@ -206,6 +232,75 @@ which is referenced by `$rooms` and will match:
This will always output just "den" because `[the:]` optionally matches "the" and then drops the word.
#### Slot Programs
Slot lists are great if your slot values always stay the same and are easily written out by hand. If you have slot values that you need to be generated *each time Rhasspy is trained*, you can use slot programs.
Create a directory named `slot_programs` in your profile (e.g., `$HOME/.config/rhasspy/profiles/en/slot_programs`):
```bash
slot_programs="${HOME}/.config/rhasspy/profiles/en/slot_programs"
mkdir -p "${slot_programs}"
```
Add a file in `slot_programs` with the name of your slot, e.g. `colors`. Write a program in this file, such as a bash script. Make sure to include the [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)) and mark the file as executable:
```bash
cat <<EOF > "${slot_programs}/colors"
#!/usr/bin/env bash
echo 'red'
echo 'green'
echo 'blue'
EOF
chmod +x "${slot_programs}/colors"
```
Now, when you reference `$colors` in your `sentences.ini`, Rhasspy will run the program you wrote and collect the slot values from each line. Note that you can output all the same things as regular [slots lists](#slots-lists), including optional words, alternatives, etc.
You can pass **arguments** to your program using the syntax `$name,arg1,arg2,...` in `sentences.ini` (no spaces). Arguments will be pass on the command-line, so `arg1` and `arg2` will be `$1` and `$2` in a bash script.
Like regular slots lists, slot programs can also be put in sub-directories under `slot_programs`. A program in `slot_programs/foo/bar` should be referenced in `sentences.ini` as `$foo/bar`.
#### Built-in Slots
Rhasspy includes a few built-in slots for each language:
* `$rhasspy/days` - day names of the week
* `$rhasspy/months` - month names of the year
### Converters
By default, all named entity values in a recognized intent's JSON are strings. If you need a different data type, such as an integer or float, or want to do some kind of complex *conversion*, use a converter:
```
[SetBrightness]
set brightness to (low:0 | medium:0.5 | high:1){brightness!float}
```
The `!name` syntax calls a converter by name. Rhasspy includes several built-in converters:
* int - convert to integer
* float - convert to real
* bool - convert to boolean
* lower - lower-case
* upper - upper-case
You can define your own converters by placing a file in the `converters` directory of your profile. Like [slot programs](#slot-programs), this file should contain a [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)) and be marked as executable (`chmod +x`). A file named `converters/foo/bar` should be referenced as `!foo/bar` in `sentences.ini`.
Your custom converter will receive the value to convert on standard in (`stdin`) encoded as JSON. You should print a converted JSON value to standard out `stdout`. The example below demonstrates converting a string value into an integer:
```python
#!/usr/bin/env python3
import sys
import json
value = json.load(sys.stdin)
print(int(value))
```
Converters can be *chained*, so `!foo!bar` will call the `foo` converter and then pass the result to `bar`.
### Special Cases
If one of your sentences happens to start with an optional word (e.g., `[the]`), this can lead to a problem:
+95 -5
View File
@@ -2,6 +2,11 @@
* [RGB Light Example](#rgb-light-example)
* [Client/Server Setup](#clientserver-setup)
* MATRIX Labs
* [Rhasspy Voice Assistant on MATRIX Voice and MATRIX Creator](https://www.hackster.io/matrix-labs/rhasspy-voice-assistant-on-matrix-voice-and-matrix-creator-97f92e)
* [Adding Intents for Rhasspy Offline Voice Assistant](https://www.hackster.io/matrix-labs/adding-intents-for-rhasspy-offline-voice-assistant-faa221)
* Rendered Obsolete
* [Home Assistant Voice Recognition with Rhasspy](https://rendered-obsolete.github.io/2020/01/02/rhasspy.html)
## RGB Light Example
@@ -63,7 +68,90 @@ You can now fill in the rest of the Home Assistant automation:
rgb_color: [255, 0, 0]
entity_id: light.bedroom
This will handle the specific case of setting the bedroom light to red, but not any other color. You can either add additional automations to handle these, or make use of [automation templating](https://www.home-assistant.io/docs/automation/templating/) to do it all at once.
This will handle the specific case of setting the bedroom light to red, but not any other color. You can either add additional automations to handle these, or make use of [automation templating](https://www.home-assistant.io/docs/automation/templating/) to do it all at once. [Home Assistant Template Example](Home-Assistant-Template-Example)
### Home Assistant Template Example
Using the following additions, you can get Home Assistant to respond to turning on / off *ANY* light in your setup.
#### Slots
Add the following JSON to the Slots tab in your Rhasspy web interface:
```json
{
"lights": [
"(living room wall):light.bulb_3",
"(living room desk):switch.m4",
"(living room floor):switch.sonoff",
"(bar lights):switch.maxcio1",
"(entry wall):light.bulb_4",
"(guest wall):light.bulb_6",
"(guest floor):switch.m5",
"(bedroom wall):light.bulb_5",
"(bedroom desk):light.bulb_1",
"(bedroom floor):light.bulb_2"
]
}
```
#### Sentences
A simple sentence to turn any of the lights in the slots file on or off.
Note the use of the `<state>` rule and the slot `$lights`
```
[ChangeLightState]
state = (on | off) {light_state}
turn [the] ($lights) {light_name} <state>
```
#### Home Assistant
In your Home Assistant `automations.yaml` file, use a `data_template` to get the Rhasspy event data with `trigger.event.data.<your property name>` and then pass those along to a script:
```yaml
- id: '1577164768008'
alias: Rhasspy Light States
description: Voice Control on/off states for all lights
trigger:
- event_data: {}
event_type: rhasspy_ChangeLightState
platform: event
condition: []
action:
- alias: ''
data_template:
light_name: "{{ trigger.event.data.light_name }}"
light_state: "{{ trigger.event.data.light_state }}"
service: script.rhasspy_light_state
```
In `scripts.yaml`, the `service_template` casts the `light_state` into a string and checks to see if you said 'on' or 'off'. The homeassistant-service can toggle both lights and switches, which is helpful if you have a combination of "light" types:
```yaml
rhasspy_light_state:
alias: change_light_state
fields:
light_name:
description: "Light Entity"
example: light.bulb_1
light_state:
description: "State to change the light to"
example: on
sequence:
- service_template: >
{% set this_state = light_state | string %}
{% if this_state == 'on' %}
homeassistant.turn_on
{%else %}
homeassistant.turn_off
{% endif %}
data_template:
entity_id: "{{ light_name }}"
```
## Client/Server Setup
@@ -96,9 +184,10 @@ Contributed by [jaburges](https://community.home-assistant.io/u/jaburges)
[Rhasspy]
Listen for wake word on Startup = UNchecked
[Home Assistant]
Do not use Home Assistant (note you obviously can instead of Node-Red)
[Intent Handling]
Do not handle intent on this device
#There is no harm in having the Server handle Intents, but the Client must handle Intents
[Wake Word]
No Wake word on this device
@@ -181,7 +270,8 @@ Contributed by [jaburges](https://community.home-assistant.io/u/jaburges)
Listen for wake word on Startup = checked
[Home Assistant]
Do not use Home Assistant (note you obviously can instead of Node-Red)
Enable Intent Handling on this device
#Do not use Home Assistant if using Node-Red
[Wake Word]
Use snowboy (this should trigger a download of more files)
+39 -1
View File
@@ -142,7 +142,18 @@ More example flows are available [on Github](https://github.com/synesthesiam/rha
### WebSocket Events
Whenever a voice command is recognized, Rhasspy emits JSON events over a websocket connection available at `ws://rhasspy:12101/api/events/intent` (replace `ws://` with `wss://` if you're using [secure hosting](usage.md#secure-hosting-with-https)).
Rhasspy supports multiple websocket event endpoints:
* `/api/events/intent`
* Intent recognized or not
* `/api/events/wake`
* Wake word detected
* `/api/events/text`
* Speech transcription
#### WebSocket Intents
Whenever a voice command is recognized, Rhasspy emits JSON events over a websocket connection available at `ws://YOUR_SERVER:12101/api/events/intent` (replace `ws://` with `wss://` if you're using [secure hosting](usage.md#secure-hosting-with-https)).
You can listen to these events in a [Node-RED](https://nodered.org) flow, and easily add offline, private voice commands to your home automation set up!
For the `ChangLightState` intent from the [RGB Light Example](index.md#rgb-light-example), Rhasspy will emit a JSON event like this over the websocket:
@@ -171,6 +182,33 @@ For the `ChangLightState` intent from the [RGB Light Example](index.md#rgb-light
}
```
#### WebSocket Wake
When the wake word is detected, or Rhasspy is woken up via the `/api/listen-for-command` HTTP endpoint, a JSON event is emitted at `ws://YOUR_SERVER:12101/api/events/wake` (`wss://` if using HTTPS) like:
```json
{
"wakewordId": "default",
"siteId": "default"
}
```
The `wakewordId` is set using the model or file name of your wakeword model (e.g., `porcupine` for `porcupine.ppn`). The `siteId` comes from your `mqtt.siteId` profile setting.
#### WebSocket Transcriptions
Each time a voice command is transcribed, Rhasspy emits a JSON event at `ws://YOUR_SERVER:12101/api/events/text` (`wss://` if using HTTPS) like:
```json
{
"text": "text from voice command",
"wakewordId": "default",
"siteId": "default"
}
```
The transcription is contained in the `text` property. `wakewordId` is the id of the wakeword that initiated the voice command (or `default`). The `siteId` comes from your `mqtt.siteId` profile setting.
## MQTT and Snips
Rhasspy is able to interoperate with Snips.AI services using the [Hermes protocol](https://docs.snips.ai/reference/hermes) over [MQTT](http://mqtt.org). The following components are Snips/Hermes compatible:
+1 -1
View File
@@ -37,7 +37,7 @@ Add to your [profile](profiles.md):
There are a lot of [keyword files](https://github.com/Picovoice/Porcupine/tree/master/resources/keyword_files) available for download. Use the `linux` platform if you're on desktop/laptop (`amd64`) and the `raspberrypi` platform if you're using a Raspberry Pi (`armhf`/`aarch64`). The `.ppn` files should go in the `porcupine` directory inside your profile (referenced by `keyword_path`).
If you want to create a custom wake word, you will need to run the [Porcupine Optimizer](https://github.com/Picovoice/Porcupine/tree/master/tools/optimizer). **NOTE**: the generated keyword file is only valid for 30 days, though you can always just re-run the optimizer.
If you want to create a custom wake word, you will need to use the [Picovoice Console](https://github.com/Picovoice/porcupine#picovoice-console). **NOTE**: the generated keyword file is only valid for 30 days, though you can always just re-run the optimizer.
See `rhasspy.wake.PorcupineWakeListener` for details.
+3
View File
@@ -72,6 +72,9 @@ ignore_missing_imports = True
[mypy-json5.*]
ignore_missing_imports = True
[mypy-quart.*]
ignore_missing_imports = True
[mypy-quart_cors.*]
ignore_missing_imports = True
+4 -4
View File
@@ -64,16 +64,16 @@ class Porcupine(object):
"""
if not os.path.exists(library_path):
raise IOError("Could not find Porcupine's library at '%s'" % library_path)
raise IOError(f"Could not find Porcupine's library at '{library_path}'")
library = cdll.LoadLibrary(library_path)
if not os.path.exists(model_file_path):
raise IOError("Could not find model file at '%s'" % model_file_path)
raise IOError(f"Could not find model file at '{model_file_path}'")
if sensitivity is not None and keyword_file_path is not None:
if not os.path.exists(keyword_file_path):
raise IOError("Could not find keyword file at '%s'" % keyword_file_path)
raise IOError(f"Could not find keyword file at '{keyword_file_path}'")
keyword_file_paths = [keyword_file_path]
if not (0 <= sensitivity <= 1):
@@ -85,7 +85,7 @@ class Porcupine(object):
for x in keyword_file_paths:
if not os.path.exists(os.path.expanduser(x)):
raise IOError("Could not find keyword file at '%s'" % x)
raise IOError(f"Could not find keyword file at '{x}'")
for x in sensitivities:
if not (0 <= x <= 1):
+1
View File
@@ -1,6 +1,7 @@
{
"language": "ca",
"name": "ca",
"locale": "ca_ES",
"speech_to_text": {
"system": "pocketsphinx",
"dictionary_casing": "lower"
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
dilluns
dimarts
dimecres
dijous
divendres
dissabte
diumenge
+12
View File
@@ -0,0 +1,12 @@
de gener
de febrer
de març
dabril
de maig
de juny
de juliol
dagost
de setembre
doctubre
de novembre
de desembre
+2 -1
View File
@@ -1,6 +1,7 @@
{
"language": "de",
"name": "de",
"locale": "de_DE",
"speech_to_text": {
"system": "pocketsphinx",
"dictionary_casing": "lower",
@@ -9,7 +10,7 @@
"base_language_model": "kaldi/base_language_model.txt",
"base_language_model_fst": "kaldi/base_language_model.fst",
"compatible": true,
"custom_words": "kaldi/custom_words.txt",
"custom_words": "kaldi_custom_words.txt",
"dictionary": "kaldi/dictionary.txt",
"graph": "graph",
"language_model": "kaldi/language_model.txt",
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
Montag
Dienstag
Mittwoch
Donnerstag
Freitag
Samstag
Sonntag
+12
View File
@@ -0,0 +1,12 @@
Januar
Februar
März
April
Mai
Juni
Juli
August
September
Oktober
November
Dezember
+13 -5
View File
@@ -28,10 +28,10 @@
"program": ""
},
"forward_to_hass": false,
"system": "dummy",
"system": "dummy",
"remote": {
"url": "http://my-server:port/endpoint"
},
}
},
"home_assistant": {
"access_token": "",
@@ -52,14 +52,17 @@
"conversation": {
"handle_speech": true
},
"error_sound": true,
"fuzzywuzzy": {
"examples_json": "intent_examples.json",
"min_confidence": 0
},
"fsticuffs": {
"intent_fst": "intent.fst",
"intent_graph": "intent.json",
"ignore_unknown_words": true,
"fuzzy": true
"fuzzy": true,
"converters_dir": "converters"
},
"flair": {
"cache_dir": "flair/cache",
@@ -73,7 +76,8 @@
"rasa": {
"examples_markdown": "intent_examples.md",
"project_name": "rhasspy",
"url": "http://localhost:5005/"
"url": "http://localhost:5005/",
"model_dir": "/app/models"
},
"remote": {
"url": "http://my-server:12101/api/text-to-intent"
@@ -125,7 +129,8 @@
"sounds": {
"recorded": "${RHASSPY_BASE_DIR}/etc/wav/beep_lo.wav",
"system": "aplay",
"wake": "${RHASSPY_BASE_DIR}/etc/wav/beep_hi.wav"
"wake": "${RHASSPY_BASE_DIR}/etc/wav/beep_hi.wav",
"error": "${RHASSPY_BASE_DIR}/etc/wav/beep_error.wav"
},
"speech_to_text": {
"command": {
@@ -186,6 +191,7 @@
"sentences_ini": "sentences.ini",
"sentences_dir": "intents",
"slots_dir": "slots",
"slot_programs_dir": "slot_programs",
"system": "dummy"
},
"text_to_speech": {
@@ -193,6 +199,7 @@
"arguments": [],
"program": ""
},
"disable_wake": false,
"espeak": {},
"flite": {
"voice": "kal16"
@@ -284,6 +291,7 @@
},
"system": "dummy"
},
"webhooks": {},
"download": {
"cache_dir": "download",
"conditions": {
+1
View File
@@ -1,6 +1,7 @@
{
"language": "el",
"name": "el",
"locale": "el_GR",
"speech_to_text": {
"g2p_casing": "lower",
"system": "pocketsphinx",
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
Δευτέρα
Τρίτη
Τετάρτη
Πέμπτη
Παρασκευή
Σάββατο
Κυριακή
+12
View File
@@ -0,0 +1,12 @@
Ιανουαρίου
Φεβρουαρίου
Μαρτίου
Απριλίου
Μαΐου
Ιουνίου
Ιουλίου
Αυγούστου
Σεπτεμβρίου
Οκτωβρίου
Νοεμβρίου
Δεκεμβρίου
+2 -1
View File
@@ -1,6 +1,7 @@
{
"language": "en",
"name": "en",
"locale": "en_US",
"speech_to_text": {
"system": "pocketsphinx",
"dictionary_casing": "lower",
@@ -9,7 +10,7 @@
"base_language_model": "kaldi/base_language_model.txt",
"base_language_model_fst": "kaldi/base_language_model.fst",
"compatible": true,
"custom_words": "kaldi/custom_words.txt",
"custom_words": "kaldi_custom_words.txt",
"dictionary": "kaldi/dictionary.txt",
"graph": "graph",
"language_model": "kaldi/language_model.txt",
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
Monday
Tuesday
Wednesday
Thursday
Friday
Saturday
Sunday
+12
View File
@@ -0,0 +1,12 @@
January
February
March
April
May
June
July
August
September
October
November
December
+1
View File
@@ -1,6 +1,7 @@
{
"language": "es",
"name": "es",
"locale": "es_ES",
"speech_to_text": {
"system": "pocketsphinx",
"dictionary_casing": "lower"
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
lunes
martes
miércoles
jueves
viernes
sábado
domingo
+12
View File
@@ -0,0 +1,12 @@
enero
febrero
marzo
abril
mayo
junio
julio
agosto
septiembre
octubre
noviembre
diciembre
+2 -1
View File
@@ -1,6 +1,7 @@
{
"language": "fr",
"name": "fr",
"locale": "fr_FR",
"speech_to_text": {
"system": "pocketsphinx",
"dictionary_casing": "lower",
@@ -9,7 +10,7 @@
"base_language_model": "kaldi/base_language_model.txt",
"base_language_model_fst": "kaldi/base_language_model.fst",
"compatible": true,
"custom_words": "kaldi/custom_words.txt",
"custom_words": "kaldi_custom_words.txt",
"dictionary": "kaldi/dictionary.txt",
"graph": "graph",
"language_model": "kaldi/language_model.txt",
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
lundi
mardi
mercredi
jeudi
vendredi
samedi
dimanche
+12
View File
@@ -0,0 +1,12 @@
janvier
février
mars
avril
mai
juin
juillet
août
septembre
octobre
novembre
décembre
+1
View File
@@ -1,6 +1,7 @@
{
"language": "hi",
"name": "hi",
"locale": "hi_IN",
"speech_to_text": {
"system": "pocketsphinx",
"dictionary_casing": "lower"
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
सोमवार
मंगलवार
बुधवार
गुरुवार
शुक्रवार
शनिवार
रविवार
+12
View File
@@ -0,0 +1,12 @@
जनवरी
फ़रवरी
मार्च
अप्रैल
मई
जून
जुलाई
अगस्त
सितंबर
अक्तूबर
नवंबर
दिसंबर
+1
View File
@@ -1,6 +1,7 @@
{
"name": "it",
"language": "it",
"locale": "it_IT",
"speech_to_text": {
"system": "pocketsphinx",
"dictionary_casing": "lower"
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
lunedì
martedì
mercoledì
giovedì
venerdì
sabato
domenica
+12
View File
@@ -0,0 +1,12 @@
gennaio
febbraio
marzo
aprile
maggio
giugno
luglio
agosto
settembre
ottobre
novembre
dicembre
+2 -1
View File
@@ -1,6 +1,7 @@
{
"language": "nl",
"name": "nl",
"locale": "nl_NL",
"speech_to_text": {
"system": "pocketsphinx",
"dictionary_casing": "lower",
@@ -8,7 +9,7 @@
"base_dictionary": "kaldi/base_dictionary.txt",
"base_language_model": "kaldi/base_language_model.txt",
"compatible": true,
"custom_words": "kaldi/custom_words.txt",
"custom_words": "kaldi_custom_words.txt",
"dictionary": "kaldi/dictionary.txt",
"graph": "graph",
"language_model": "kaldi/language_model.txt",
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
maandag
dinsdag
woensdag
donderdag
vrijdag
zaterdag
zondag
+12
View File
@@ -0,0 +1,12 @@
januari
februari
maart
april
mei
juni
juli
augustus
september
oktober
november
december
+1
View File
@@ -1,6 +1,7 @@
{
"language": "pt",
"name": "pt",
"locale": "pt_BR",
"speech_to_text": {
"system": "pocketsphinx",
"dictionary_casing": "lower"
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
segunda
terça
quarta
quinta
sexta
sábado
domingo
+12
View File
@@ -0,0 +1,12 @@
janeiro
fevereiro
março
abril
maio
junho
julho
agosto
setembro
outubro
novembro
dezembro
+1 -1
View File
@@ -1,7 +1,7 @@
{
"language": "ru",
"name": "ru",
"locale": "ru_RU",
"speech_to_text": {
"system": "pocketsphinx",
"dictionary_casing": "lower"
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
Понедельник
Вторник
Среда
Четверг
Пятница
Суббота
Воскресенье
+12
View File
@@ -0,0 +1,12 @@
января
февраля
марта
апреля
мая
июня
июля
августа
сентября
октября
ноября
декабря
+1
View File
@@ -1,6 +1,7 @@
{
"language": "sv",
"name": "sv",
"locale": "sv_SE",
"speech_to_text": {
"kaldi": {
"compatible": true
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
måndag
tisdag
onsdag
torsdag
fredag
lördag
söndag
+12
View File
@@ -0,0 +1,12 @@
januari
februari
mars
april
maj
juni
juli
augusti
september
oktober
november
december
+1
View File
@@ -1,6 +1,7 @@
{
"language": "vi",
"name": "vi",
"locale": "vi_VN",
"speech_to_text": {
"kaldi": {
"compatible": true
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
Thứ hai
Thứ ba
Thứ tư
Thứ năm
Thứ sáu
Thứ bảy
Chủ nhật
+12
View File
@@ -0,0 +1,12 @@
Tháng 1
Tháng 2
Tháng 3
Tháng 4
Tháng 5
Tháng 6
Tháng 7
Tháng 8
Tháng 9
Tháng 10
Tháng 11
Tháng 12
+1
View File
@@ -1,6 +1,7 @@
{
"name": "zh",
"language": "zh",
"locale": "zh_CN",
"speech_to_text": {
"g2p_casing": "n/a",
"dictionary_casing": "n/a"
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import argparse
import sys
def main():
parser = argparse.ArgumentParser("number")
parser.add_argument("lower", type=int, help="Lower bound")
parser.add_argument("upper", type=int, help="Upper bound (inclusive)")
args, rest_args = parser.parse_known_args()
lower = args.lower
upper = args.upper
step = 1
if rest_args:
step = int(rest_args[0])
if upper < lower:
lower, upper = upper, lower
for n in range(lower, upper + 1, step):
print(n)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()
+7
View File
@@ -0,0 +1,7 @@
星期一
星期二
星期三
星期四
星期五
星期六
星期日
+12
View File
@@ -0,0 +1,12 @@
一月
二月
三月
四月
五月
六月
七月
八月
九月
十月
十一月
十二月
+5
View File
@@ -13,6 +13,11 @@ body {
z-index: 9999;
}
#logo {
border-color: red;
border-width: 0;
}
.response {
text-align: center;
}
+17
View File
@@ -383,6 +383,11 @@ paths:
schema:
type: boolean
default: true
- in: query
name: siteId
description: 'Hermes siteId to use in playBytes'
schema:
type: string
responses:
'200':
description: OK
@@ -533,3 +538,15 @@ paths:
description: intents
schema:
type: object
/api/play-recording:
post:
summary: 'Play the last recorded voice command from web API'
produces:
- text/plain
responses:
'200':
description: OK
content:
text/plain:
schema:
type: string
+2 -2
View File
@@ -4,7 +4,7 @@ doit==0.31.1
fuzzywuzzy[speedup]==0.17.0
google-cloud-texttospeech==0.5.0
html5lib==1.0.1
json5==0.8.5
json5==0.7.0
multidict==4.6.1
networkx>=2.0
num2words==0.5.10
@@ -15,6 +15,6 @@ pydash==4.7.6
quart==0.6.15
quart-cors==0.1.3
requests==2.22.0
rhasspy-nlu==0.1.3
rhasspy-nlu==0.1.6
swagger-ui-py==0.1.7
webrtcvad==2.0.10
+2 -1
View File
@@ -15,11 +15,12 @@ pydash==4.7.6
quart==0.6.15
quart-cors==0.1.3
requests==2.22.0
rhasspy-nlu==0.1.3
rhasspy-nlu==0.1.4.1
swagger-ui-py==0.1.7
webrtcvad==2.0.10
flake8==3.7.9
pylint==2.4.4
pyinstaller==3.5
mypy==0.700
mkdocs==1.0.4
+1 -1
View File
@@ -12,6 +12,6 @@ pydash==4.7.6
quart==0.6.15
quart-cors==0.1.3
requests==2.22.0
rhasspy-nlu==0.1.1
rhasspy-nlu==0.1.4
swagger-ui-py==0.1.7
webrtcvad==2.0.10
+13 -12
View File
@@ -4,7 +4,6 @@ import asyncio
import io
import json
import logging
# Configure logging
import logging.config
import os
@@ -302,11 +301,11 @@ async def main() -> None:
if not args.no_check and (args.command not in ["check", "download"]):
# Verify that profile has necessary files
missing_files = core.check_profile()
if len(missing_files) > 0:
if missing_files:
logger.fatal(
"Missing required files for %s: %s. Please run download command and try again.",
profile.name,
missing_files.keys(),
list(missing_files),
)
sys.exit(1)
@@ -335,7 +334,7 @@ async def main() -> None:
async def wav2text(core: RhasspyCore, profile: Profile, args: Any) -> None:
"""Transcribe WAV file(s)"""
if len(args.wav_files) > 0:
if args.wav_files:
# Read WAV paths from argument list
transcriptions = {}
for wav_path in args.wav_files:
@@ -361,7 +360,7 @@ async def wav2text(core: RhasspyCore, profile: Profile, args: Any) -> None:
async def text2intent(core: RhasspyCore, profile: Profile, args: Any) -> None:
"""Parse sentences from command line or stdin"""
intents = {}
sentences = args.sentences if len(args.sentences) > 0 else sys.stdin
sentences = args.sentences or sys.stdin
for sentence in sentences:
sentence = sentence.strip()
intent = (await core.recognize_intent(sentence)).intent
@@ -382,7 +381,7 @@ async def text2intent(core: RhasspyCore, profile: Profile, args: Any) -> None:
async def wav2intent(core: RhasspyCore, profile: Profile, args: Any) -> None:
"""Recognize intent from WAV file(s)"""
if len(args.wav_files) > 0:
if args.wav_files:
# Read WAV paths from argument list
transcriptions = {}
for wav_path in args.wav_files:
@@ -494,7 +493,9 @@ async def mic2intent(core: RhasspyCore, profile: Profile, args: Any) -> None:
async def word2phonemes(core: RhasspyCore, profile: Profile, args: Any) -> None:
"""Get pronunciation(s) for word(s)"""
words = args.words if len(args.words) > 0 else sys.stdin
words = args.words
if not words:
words = [w.strip() for w in sys.stdin if w.strip()]
# Get pronunciations for all words
pronunciations = (
@@ -558,9 +559,9 @@ def _send_frame(
async def wav2mqtt(core: RhasspyCore, profile: Profile, args: Any) -> None:
"""Publish WAV to MQTT as audio frames"""
# hermes/audioServer/<SITE_ID>/audioFrame
topic = "hermes/audioServer/%s/audioFrame" % args.site_id
topic = f"hermes/audioServer/{args.site_id}/audioFrame"
if len(args.wav_files) > 0:
if args.wav_files:
# Read WAV paths from argument list
for wav_path in args.wav_files:
with wave.open(wav_path, "rb") as wav_file:
@@ -585,7 +586,7 @@ async def wav2mqtt(core: RhasspyCore, profile: Profile, args: Any) -> None:
# Read actual audio data
audio_data = wav_file.readframes(args.frames)
while len(audio_data) > 0:
while audio_data:
_send_frame(core, topic, audio_data, rate, width, channels)
time.sleep(args.pause)
@@ -617,7 +618,7 @@ async def wav2mqtt(core: RhasspyCore, profile: Profile, args: Any) -> None:
async def text2wav(core: RhasspyCore, profile: Profile, args: Any) -> None:
"""Speak a sentence and output WAV data"""
result = await core.speak_sentence(args)
result = await core.speak_sentence(args.sentence)
sys.stdout.buffer.write(result.wav_data)
@@ -629,7 +630,7 @@ async def text2wav(core: RhasspyCore, profile: Profile, args: Any) -> None:
async def text2speech(core: RhasspyCore, profile: Profile, args: Any) -> None:
"""Speak sentences"""
sentences = args.sentences
if len(sentences) == 0:
if not sentences:
sentences = sys.stdin
for sentence in sentences:
+10 -4
View File
@@ -116,9 +116,6 @@ class RhasspyActor:
def stop(self, block=True):
"""Stop this actor and its children."""
for child_actor in self._actors:
child_actor.stop(block=block)
self.send(self, ActorExitRequest())
if block:
self._thread.join()
@@ -127,6 +124,15 @@ class RhasspyActor:
"""Main loop for this actor."""
while self._running:
message_dict = self._queue.get()
message = message_dict.get("message")
if isinstance(message, ActorExitRequest):
for child in self._actors:
self.send(child, ActorExitRequest())
self._running = False
self.transition("stopped")
self.send(self._parent, ChildActorExited(self))
self.on_receive(message_dict)
@property
@@ -296,7 +302,7 @@ class InboxActor(RhasspyActor):
return self
def __exit__(self, *args):
self.stop(block=False)
self.stop(block=True)
class ActorSystem:
+15 -37
View File
@@ -6,43 +6,14 @@ import uuid
from typing import Any, Dict, List, Optional, Type
from rhasspy.actor import RhasspyActor
from rhasspy.mqtt import MqttPublish
# -----------------------------------------------------------------------------
# Events
# -----------------------------------------------------------------------------
class PlayWavFile:
"""Play a WAV file."""
def __init__(self, wav_path: str, receiver: Optional[RhasspyActor] = None) -> None:
self.wav_path = wav_path
self.receiver = receiver
class PlayWavData:
"""Play a WAV buffer."""
def __init__(
self, wav_data: bytes, receiver: Optional[RhasspyActor] = None
) -> None:
self.wav_data = wav_data
self.receiver = receiver
class WavPlayed:
"""Response to PlayWavFile or PlayWavData."""
pass
from rhasspy.events import MqttPublish, PlayWavData, PlayWavFile, WavPlayed
# -----------------------------------------------------------------------------
def get_sound_class(system: str) -> Type[RhasspyActor]:
"""Get class type for profile audio player."""
assert system in ["aplay", "hermes", "dummy"], "Unknown sound system: %s" % system
assert system in ["aplay", "hermes", "dummy"], f"Unknown sound system: {system}"
if system == "aplay":
return APlayAudioPlayer
@@ -180,29 +151,36 @@ class HermesAudioPlayer(RhasspyActor):
def in_started(self, message: Any, sender: RhasspyActor) -> None:
"""Handle messages in started state."""
if isinstance(message, PlayWavFile):
self.play_file(message.wav_path)
self.play_file(message.wav_path, siteId=message.siteId)
self.send(message.receiver or sender, WavPlayed())
elif isinstance(message, PlayWavData):
self.play_data(message.wav_data)
self.play_data(message.wav_data, siteId=message.siteId)
self.send(message.receiver or sender, WavPlayed())
# -------------------------------------------------------------------------
def play_file(self, path: str) -> None:
def play_file(self, path: str, siteId: Optional[str] = None) -> None:
"""Send WAV file over MQTT."""
if not os.path.exists(path):
self._logger.warning("Path does not exist: %s", path)
return
with open(path, "rb") as wav_file:
self.play_data(wav_file.read())
self.play_data(wav_file.read(), siteId=siteId)
def play_data(self, wav_data: bytes) -> None:
def play_data(self, wav_data: bytes, siteId: Optional[str] = None) -> None:
"""Send WAV buffer over MQTT."""
request_id = str(uuid.uuid4())
if siteId:
# Send to a specific site id
publish_sites = [siteId]
else:
# Send to all site ids
publish_sites = self.site_ids
# Send to all site ids
for site_id in self.site_ids:
for site_id in publish_sites:
topic = f"hermes/audioServer/{site_id}/playBytes/{request_id}"
self.send(self.mqtt, MqttPublish(topic, wav_data))
+33 -70
View File
@@ -14,55 +14,12 @@ from http.server import BaseHTTPRequestHandler, HTTPServer
from typing import Any, Dict, List, Optional, Type
from rhasspy.actor import RhasspyActor
from rhasspy.intent import IntentRecognized
from rhasspy.mqtt import MqttMessage, MqttSubscribe
from rhasspy.stt import WavTranscription
from rhasspy.events import (AudioData, IntentRecognized, MqttMessage,
MqttSubscribe, StartRecordingToBuffer,
StartStreaming, StopRecordingToBuffer,
StopStreaming, WavTranscription)
from rhasspy.utils import convert_wav
# -----------------------------------------------------------------------------
# Events
# -----------------------------------------------------------------------------
class AudioData:
"""Raw 16-bit 16Khz audio data."""
def __init__(self, data: bytes, **kwargs: Any) -> None:
self.data = data
self.info = kwargs
class StartStreaming:
"""Tells microphone to begin recording. Emits AudioData chunks."""
def __init__(self, receiver: Optional[RhasspyActor] = None) -> None:
self.receiver = receiver
class StopStreaming:
"""Tells microphone to stop recording."""
def __init__(self, receiver: Optional[RhasspyActor] = None) -> None:
self.receiver = receiver
class StartRecordingToBuffer:
"""Tells microphone to record audio data to named buffer."""
def __init__(self, buffer_name: str) -> None:
self.buffer_name = buffer_name
class StopRecordingToBuffer:
"""Tells microphone to stop recording to buffer and emit AudioData."""
def __init__(
self, buffer_name: str, receiver: Optional[RhasspyActor] = None
) -> None:
self.buffer_name = buffer_name
self.receiver = receiver
# -----------------------------------------------------------------------------
@@ -76,7 +33,7 @@ def get_microphone_class(system: str) -> Type[RhasspyActor]:
"stdin",
"http",
"gstreamer",
], ("Unknown microphone system: %s" % system)
], f"Unknown microphone system: {system}"
if system == "arecord":
# Use arecord locally
@@ -191,7 +148,7 @@ class PyAudioRecorder(RhasspyActor):
# Start audio system
def stream_callback(data, frame_count, time_info, status):
if len(data) > 0:
if data:
# Send to this actor to avoid threading issues
self.send(self.myAddress, AudioData(data))
@@ -221,6 +178,7 @@ class PyAudioRecorder(RhasspyActor):
)
except Exception:
self._logger.exception("to_recording")
self._stop_microphone()
self.transition("started")
# -------------------------------------------------------------------------
@@ -257,8 +215,8 @@ class PyAudioRecorder(RhasspyActor):
# Check to see if anyone is still listening
if (
(not self.keep_device_open)
and (len(self.receivers) == 0)
and (len(self.buffers) == 0)
and not self.receivers
and not self.buffers
):
# Terminate audio recording
if self.mic is not None:
@@ -274,6 +232,11 @@ class PyAudioRecorder(RhasspyActor):
def to_stopped(self, from_state: str) -> None:
"""Transition to stopped state."""
self._stop_microphone()
# -------------------------------------------------------------------------
def _stop_microphone(self) -> None:
try:
if self.mic is not None:
self.mic.stop_stream()
@@ -284,7 +247,7 @@ class PyAudioRecorder(RhasspyActor):
self.audio.terminate()
self.audio = None
except Exception:
self._logger.exception("to_stopped")
self._logger.exception("_stop_microphone")
# -------------------------------------------------------------------------
@@ -342,7 +305,7 @@ class PyAudioRecorder(RhasspyActor):
finally:
pyaudio_stream.close()
except Exception:
result[device_index] = "%s (error)" % device_name
result[device_index] = f"{device_name} (error)"
continue
# compute RMS of debiased audio
@@ -353,9 +316,9 @@ class PyAudioRecorder(RhasspyActor):
)
if debiased_energy > 30: # probably actually audio
result[device_index] = "%s (working!)" % device_name
result[device_index] = f"{device_name} (working!)"
else:
result[device_index] = "%s (no sound)" % device_name
result[device_index] = f"{device_name} (no sound)"
finally:
audio.terminate()
@@ -390,7 +353,7 @@ class ARecordAudioRecorder(RhasspyActor):
if self.device_name is not None:
self.device_name = str(self.device_name)
if len(self.device_name) == 0:
if not self.device_name:
self.device_name = None
self.chunk_size = int(
@@ -438,7 +401,7 @@ class ARecordAudioRecorder(RhasspyActor):
while self.is_recording:
# Pull from process STDOUT
data = self.record_proc.stdout.read(self.chunk_size)
if len(data) > 0:
if data:
# Send to this actor to avoid threading issues
self.send(self.myAddress, AudioData(data))
else:
@@ -495,8 +458,8 @@ class ARecordAudioRecorder(RhasspyActor):
# Check to see if anyone is still listening
if (
(not self.keep_device_open)
and (len(self.receivers) == 0)
and (len(self.buffers) == 0)
and not self.receivers
and not self.buffers
):
# Terminate audio recording
self.is_recording = False
@@ -573,7 +536,7 @@ class ARecordAudioRecorder(RhasspyActor):
buffer = proc.stdout.read(chunk_size * 2)
proc.terminate()
except Exception:
result[device_id] = "%s (error)" % device_name
result[device_id] = f"{device_name} (error)"
continue
# compute RMS of debiased audio
@@ -584,9 +547,9 @@ class ARecordAudioRecorder(RhasspyActor):
)
if debiased_energy > 30: # probably actually audio
result[device_id] = "%s (working!)" % device_name
result[device_id] = f"{device_name} (working!)"
else:
result[device_id] = "%s (no sound)" % device_name
result[device_id] = f"{device_name} (no sound)"
return result
@@ -613,11 +576,11 @@ class HermesAudioRecorder(RhasspyActor):
"""Transition to started state."""
self.mqtt = self.config["mqtt"]
self.site_ids = self.profile.get("mqtt.site_id", "default").split(",")
if len(self.site_ids) > 0:
if self.site_ids:
self.site_id = self.site_ids[0]
else:
self.site_id = "default"
self.topic_audio_frame = "hermes/audioServer/%s/audioFrame" % self.site_id
self.topic_audio_frame = f"hermes/audioServer/{self.site_id}/audioFrame"
self.send(self.mqtt, MqttSubscribe(self.topic_audio_frame))
def in_started(self, message: Any, sender: RhasspyActor) -> None:
@@ -760,7 +723,7 @@ class StdinAudioRecorder(RhasspyActor):
self.send(message.receiver or sender, AudioData(buffer))
# Check to see if anyone is still listening
if (len(self.receivers) == 0) and (len(self.buffers) == 0):
if not self.receivers and not self.buffers:
# Terminate audio recording
self.is_recording = False
self.transition("started")
@@ -778,7 +741,7 @@ class StdinAudioRecorder(RhasspyActor):
"""Forward single audio chunk."""
while True:
data = sys.stdin.buffer.read(self.chunk_size)
if self.is_recording and (len(data) > 0):
if self.is_recording and data:
# Actor will forward
self.send(self.myAddress, AudioData(data))
@@ -827,7 +790,7 @@ class HTTPStreamServer(BaseHTTPRequestHandler):
while True:
# Assume chunked transfer encoding
chunk_size_str = self.rfile.readline().decode().strip()
if len(chunk_size_str) == 0:
if not chunk_size_str:
break
chunk_size = int(chunk_size_str, 16)
@@ -952,7 +915,7 @@ class HTTPAudioRecorder(RhasspyActor):
self.send(message.receiver or sender, AudioData(buffer))
# Check to see if anyone is still listening
if (len(self.receivers) == 0) and (len(self.buffers) == 0):
if not self.receivers and not self.buffers:
self.transition("started")
def to_stopped(self, from_state: str) -> None:
@@ -1049,7 +1012,7 @@ class GStreamerAudioRecorder(RhasspyActor):
while True:
chunk = self.gstreamer_proc.stdout.read(self.chunk_size)
if len(chunk) > 0:
if chunk:
if first_audio:
self._logger.debug("Receiving audio")
first_audio = False
@@ -1111,7 +1074,7 @@ class GStreamerAudioRecorder(RhasspyActor):
self.send(message.receiver or sender, AudioData(buffer))
# Check to see if anyone is still listening
if (len(self.receivers) == 0) and (len(self.buffers) == 0):
if not self.receivers and not self.buffers:
self.transition("started")
def to_stopped(self, from_state: str) -> None:
+4 -31
View File
@@ -11,45 +11,18 @@ from typing import Any, Dict, List, Optional, Type
import webrtcvad
from rhasspy.actor import RhasspyActor, WakeupMessage
from rhasspy.audio_recorder import AudioData, StartStreaming, StopStreaming
from rhasspy.mqtt import MqttMessage, MqttSubscribe
from rhasspy.events import (AudioData, ListenForCommand, MqttMessage,
MqttSubscribe, StartStreaming, StopStreaming,
VoiceCommand)
from rhasspy.utils import convert_wav
# -----------------------------------------------------------------------------
class ListenForCommand:
"""Tell Rhasspy to listen for a voice command."""
def __init__(
self,
receiver: Optional[RhasspyActor] = None,
handle: bool = True,
timeout: Optional[float] = None,
entities: List[Dict[str, Any]] = None,
) -> None:
self.receiver = receiver
self.handle = handle
self.timeout = timeout
self.entities = entities or []
class VoiceCommand:
"""Response to ListenForCommand."""
def __init__(self, data: bytes, timeout: bool = False, handle: bool = True) -> None:
self.data = data
self.timeout = timeout
self.handle = handle
# -----------------------------------------------------------------------------
def get_command_class(system: str) -> Type[RhasspyActor]:
"""Return class type for profile command listener."""
assert system in ["dummy", "webrtcvad", "command", "oneshot", "hermes"], (
"Unknown voice command system: %s" % system
f"Unknown voice command system: {system}"
)
if system == "webrtcvad":
+59 -21
View File
@@ -14,13 +14,9 @@ import aiohttp
# Internal imports
from rhasspy.actor import ActorSystem, ConfigureEvent, RhasspyActor
from rhasspy.audio_recorder import (
from rhasspy.dialogue import DialogueManager
from rhasspy.events import (
AudioData,
StartRecordingToBuffer,
StopRecordingToBuffer,
)
from rhasspy.dialogue import (
DialogueManager,
GetActorStates,
GetMicrophones,
GetProblems,
@@ -29,6 +25,8 @@ from rhasspy.dialogue import (
GetWordPhonemes,
GetWordPronunciations,
HandleIntent,
IntentHandled,
IntentRecognized,
ListenForCommand,
ListenForWakeWord,
MqttPublish,
@@ -38,21 +36,24 @@ from rhasspy.dialogue import (
ProfileTrainingComplete,
ProfileTrainingFailed,
RecognizeIntent,
SentenceSpoken,
SpeakSentence,
SpeakWord,
StopListeningForWakeWord,
StartRecordingToBuffer,
StopRecordingToBuffer,
TestMicrophones,
TrainProfile,
TranscribeWav,
VoiceCommand,
WakeWordDetected,
WakeWordNotDetected,
WavTranscription,
WordPhonemes,
WordPronunciations,
WordSpoken,
)
from rhasspy.intent import IntentRecognized
from rhasspy.intent_handler import IntentHandled
from rhasspy.profiles import Profile
from rhasspy.pronounce import WordPhonemes, WordPronunciations, WordSpoken
from rhasspy.stt import WavTranscription
from rhasspy.tts import SentenceSpoken
from rhasspy.utils import numbers_to_words
# -----------------------------------------------------------------------------
@@ -88,6 +89,8 @@ class RhasspyCore:
self._session: Optional[aiohttp.ClientSession] = aiohttp.ClientSession()
self.dialogue_manager: Optional[RhasspyActor] = None
self.download_status: List[str] = []
# -------------------------------------------------------------------------
@property
@@ -96,6 +99,14 @@ class RhasspyCore:
assert self._session is not None
return self._session
@property
def siteId(self) -> str:
"""Get default MQTT siteId"""
try:
siteIds = self.profile.get("mqtt.siteId", "default").split(",")[0]
except Exception:
return "default"
# -------------------------------------------------------------------------
async def start(
@@ -160,10 +171,14 @@ class RhasspyCore:
# -------------------------------------------------------------------------
def listen_for_wake(self) -> None:
def listen_for_wake(self, enabled: bool = True) -> None:
"""Tell Rhasspy to start listening for a wake word."""
assert self.actor_system is not None
self.actor_system.tell(self.dialogue_manager, ListenForWakeWord())
if enabled:
self.actor_system.tell(self.dialogue_manager, ListenForWakeWord())
else:
self.actor_system.tell(self.dialogue_manager, StopListeningForWakeWord())
async def listen_for_command(
self,
@@ -322,13 +337,16 @@ class RhasspyCore:
play: bool = True,
language: Optional[str] = None,
voice: Optional[str] = None,
siteId: Optional[str] = None,
) -> SentenceSpoken:
"""Speak an entire sentence using text to speech system."""
assert self.actor_system is not None
with self.actor_system.private() as sys:
result = await sys.async_ask(
self.dialogue_manager,
SpeakSentence(sentence, play=play, language=language, voice=voice),
SpeakSentence(
sentence, play=play, language=language, voice=voice, siteId=siteId
),
)
assert isinstance(result, SentenceSpoken), result
return result
@@ -341,9 +359,10 @@ class RhasspyCore:
"""Generate speech/intent artifacts for profile."""
if no_cache:
# Delete doit database
db_path = Path(self.profile.write_path(".doit.db"))
if db_path.is_file():
db_path.unlink()
profile_dir = Path(self.profile.write_path())
for db_path in profile_dir.glob(".doit.db*"):
if db_path.is_file():
db_path.unlink()
assert self.actor_system is not None
with self.actor_system.private() as sys:
@@ -477,6 +496,8 @@ class RhasspyCore:
async def download_profile(self, delete=False, chunk_size=4096) -> None:
"""Download all necessary profile files from the internet and extract them."""
self.download_status = []
output_dir = Path(self.profile.write_path())
download_dir = Path(
self.profile.write_path(self.profile.get("download.cache_dir", "download"))
@@ -497,7 +518,9 @@ class RhasspyCore:
async def download_file(url, filename):
try:
self._logger.debug("Downloading %s to %s", url, filename)
status = f"Downloading {url} to {filename}"
self.download_status.append(status)
self._logger.debug(status)
os.makedirs(os.path.dirname(filename), exist_ok=True)
async with self.session.get(url) as response:
@@ -505,10 +528,21 @@ class RhasspyCore:
async for chunk in response.content.iter_chunked(chunk_size):
out_file.write(chunk)
self._logger.debug("Downloaded %s", filename)
status = f"Downloaded {filename}"
self.download_status.append(status)
self._logger.debug(status)
except Exception:
self._logger.exception(url)
# Try to delete partially downloaded file
try:
status = f"Failed to download {filename}"
self.download_status.append(status)
self._logger.debug(status)
os.unlink(filename)
except Exception:
pass
# Check conditions
machine_type = platform.machine()
download_tasks = []
@@ -592,7 +626,9 @@ class RhasspyCore:
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
# Copy file/directory as is
self._logger.debug("Copying %s to %s", src_path, dest_path)
status = f"Copying {src_path} to {dest_path}"
self.download_status.append(status)
self._logger.debug(status)
if os.path.isdir(src_path):
shutil.copytree(src_path, dest_path)
else:
@@ -665,7 +701,9 @@ class RhasspyCore:
extract_path = os.path.join(temp_dir, src_extract)
# Copy specific file/directory
self._logger.debug("Copying %s to %s", extract_path, dest_path)
status = f"Copying {extract_path} to {dest_path}"
self.download_status.append(status)
self._logger.debug(status)
if os.path.isdir(extract_path):
if src_exclude:
# Ignore some files

Some files were not shown because too many files have changed in this diff Show More