Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e00c1448cb | |||
| f04ad3bfeb | |||
| eb11f90cab | |||
| 2c612ee669 | |||
| dfe92f9d0e | |||
| c59e7b42ab | |||
| 948705a87b | |||
| 6b0b5c1799 | |||
| 9553691e88 | |||
| 997456631e | |||
| 104165198b | |||
| f405b827f4 | |||
| 089568cf9f |
+3
-1
@@ -172,4 +172,6 @@
|
||||
!rhasspy/train/*.py
|
||||
!rhasspy/train/jsgf2fst/*.py
|
||||
!*.py
|
||||
!VERSION
|
||||
!VERSION
|
||||
|
||||
!pip
|
||||
|
||||
@@ -1,12 +1,33 @@
|
||||
## [Unreleased] - 2020 Jan 21
|
||||
## [2.4.18] - 2020 Feb 07
|
||||
|
||||
### Added
|
||||
|
||||
- Button to web UI to play last recorded voice commmand
|
||||
- RHASSPY_LOG_LEVEL environment variable
|
||||
- Web UI feedback during download
|
||||
- /api/listen-for-wake accepts "on" and "off" as POST data to enable/disable wake word
|
||||
- /api/events/wake websocket endpoint reports wake up events
|
||||
- /api/events/text websocket endpoint reports transcription events
|
||||
- Rhasspy logo changes in web UI when wake word is detected
|
||||
- espeak arguments list for text to speech
|
||||
|
||||
### Changed
|
||||
|
||||
- STT output casing is fixed outside of HTTP API calls
|
||||
- All voice commands show up in web UI test page
|
||||
- Play last voice command button in web UI works for any command
|
||||
- Fixed commas in numbers with thousand separators
|
||||
- Words from Pocketsphinx wake keyphrase are added to dictionary
|
||||
- Pocketsphinx wake word keyphrase casing is fixed
|
||||
|
||||
## [2.4.17] - 2020 Jan 21
|
||||
|
||||
### Added
|
||||
|
||||
- Button to web UI to play last recorded voice command
|
||||
- RHASSPY_LOG_LEVEL environment variable
|
||||
- Web UI feedback during download
|
||||
- Add "asoundrc" config option to Hass.IO add-on
|
||||
|
||||
### Changed
|
||||
|
||||
- Moved $profile/kaldi/custom_words.txt to $profile/kaldi_custom_words.txt
|
||||
- Slot substitution casing is kept during training/recognition
|
||||
- Fixed fuzzywuzzy and other intent recognizer training after addition of converters
|
||||
@@ -33,7 +54,7 @@
|
||||
|
||||
### Added
|
||||
|
||||
- Preliminary support for Rasperry Pi Zero (no Kaldi)
|
||||
- Preliminary support for Raspberry Pi Zero (no Kaldi)
|
||||
- Play error sound when intent not recognized
|
||||
- _text and _raw_text to Home Assistant events
|
||||
|
||||
@@ -53,4 +74,4 @@
|
||||
- Support for Home Assistant TTS system
|
||||
- Emulate MaryTTS /process API in web API
|
||||
- Include wakeId/siteId in JSON intent (MQTT/Websocket)
|
||||
- ?voice and ?language query parameters to /api/text-to-speech
|
||||
- ?voice and ?language query parameters to /api/text-to-speech
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||

|
||||
|
||||
Rhasspy (pronounced RAH-SPEE) is an offline, [multilingual](#supported-languages) voice assistant toolkit inspired by [Jasper](https://jasperproject.github.io/) that works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
|
||||
Rhasspy (pronounced RAH-SPEE) is an offline voice assistant toolkit inspired by [Jasper](https://jasperproject.github.io/) [supports many languages](#supported-languages). It works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
|
||||
|
||||
* [Documentation](https://rhasspy.readthedocs.io/)
|
||||
* [Discussion](https://community.rhasspy.org)
|
||||
|
||||
@@ -9,8 +9,9 @@ import os
|
||||
import re
|
||||
import shutil
|
||||
import time
|
||||
from functools import wraps
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
||||
from uuid import uuid4
|
||||
|
||||
import attr
|
||||
@@ -30,7 +31,13 @@ from swagger_ui import quart_api_doc
|
||||
|
||||
from rhasspy.actor import ActorSystem, ConfigureEvent, RhasspyActor
|
||||
from rhasspy.core import RhasspyCore
|
||||
from rhasspy.events import IntentRecognized, ProfileTrainingFailed
|
||||
from rhasspy.events import (
|
||||
IntentRecognized,
|
||||
ProfileTrainingFailed,
|
||||
VoiceCommand,
|
||||
WakeWordDetected,
|
||||
WavTranscription,
|
||||
)
|
||||
from rhasspy.utils import (
|
||||
FunctionLoggingHandler,
|
||||
buffer_to_wav,
|
||||
@@ -54,6 +61,10 @@ app = Quart("rhasspy")
|
||||
app.secret_key = str(uuid4())
|
||||
app = cors(app)
|
||||
|
||||
# WAV data from last voice command
|
||||
last_voice_wav: Optional[bytes] = None
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Parse Arguments
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -269,8 +280,11 @@ async def api_speakers() -> Response:
|
||||
async def api_listen_for_wake() -> str:
|
||||
"""Make Rhasspy listen for a wake word"""
|
||||
assert core is not None
|
||||
core.listen_for_wake()
|
||||
return "OK"
|
||||
enabled_str = (await request.data).decode().strip().lower()
|
||||
enabled = enabled_str not in ["false", "off"]
|
||||
core.listen_for_wake(enabled)
|
||||
|
||||
return str(enabled)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -291,6 +305,10 @@ async def api_listen_for_command() -> Response:
|
||||
entity = request.args.get("entity")
|
||||
value = request.args.get("value")
|
||||
|
||||
# Emulate wake
|
||||
wake_json = json.dumps({"wakewordId": "default", "siteId": core.siteId})
|
||||
await add_ws_event("wake", wake_json)
|
||||
|
||||
return jsonify(
|
||||
await core.listen_for_command(
|
||||
handle=(not no_hass), timeout=timeout, entity=entity, value=value
|
||||
@@ -651,6 +669,7 @@ async def api_restart() -> str:
|
||||
@app.route("/api/speech-to-text", methods=["POST"])
|
||||
async def api_speech_to_text() -> str:
|
||||
"""Transcribe speech from WAV file."""
|
||||
global last_voice_wav
|
||||
no_header = request.args.get("noheader", "false").lower() == "true"
|
||||
assert core is not None
|
||||
|
||||
@@ -660,10 +679,20 @@ async def api_speech_to_text() -> str:
|
||||
# Wrap in WAV
|
||||
wav_data = buffer_to_wav(wav_data)
|
||||
|
||||
last_voice_wav = wav_data
|
||||
|
||||
start_time = time.perf_counter()
|
||||
result = await core.transcribe_wav(wav_data)
|
||||
end_time = time.perf_counter()
|
||||
|
||||
# Send to websocket
|
||||
await add_ws_event(
|
||||
"transcription",
|
||||
json.dumps(
|
||||
{"text": result.text, "wakewordId": "default", "siteId": core.siteId}
|
||||
),
|
||||
)
|
||||
|
||||
if prefers_json():
|
||||
return jsonify(
|
||||
{
|
||||
@@ -698,7 +727,7 @@ async def api_text_to_intent():
|
||||
|
||||
intent_json = json.dumps(intent)
|
||||
logger.debug(intent_json)
|
||||
await add_ws_event(WS_EVENT_INTENT, intent_json)
|
||||
await add_ws_event("intent", intent_json)
|
||||
|
||||
if not no_hass:
|
||||
# Send intent to Home Assistant
|
||||
@@ -713,11 +742,13 @@ async def api_text_to_intent():
|
||||
@app.route("/api/speech-to-intent", methods=["POST"])
|
||||
async def api_speech_to_intent() -> Response:
|
||||
"""Transcribe speech, recognize intent, and optionally handle."""
|
||||
global last_voice_wav
|
||||
assert core is not None
|
||||
no_hass = request.args.get("nohass", "false").lower() == "true"
|
||||
|
||||
# Prefer 16-bit 16Khz mono, but will convert with sox if needed
|
||||
wav_data = await request.data
|
||||
last_voice_wav = wav_data
|
||||
|
||||
# speech -> text
|
||||
start_time = time.time()
|
||||
@@ -725,6 +756,12 @@ async def api_speech_to_intent() -> Response:
|
||||
text = transcription.text
|
||||
logger.debug(text)
|
||||
|
||||
# Send to websocket
|
||||
await add_ws_event(
|
||||
"transcription",
|
||||
json.dumps({"text": text, "wakewordId": "default", "siteId": core.siteId}),
|
||||
)
|
||||
|
||||
# text -> intent
|
||||
intent = (await core.recognize_intent(text)).intent
|
||||
intent["speech_confidence"] = transcription.confidence
|
||||
@@ -734,7 +771,7 @@ async def api_speech_to_intent() -> Response:
|
||||
|
||||
intent_json = json.dumps(intent)
|
||||
logger.debug(intent_json)
|
||||
await add_ws_event(WS_EVENT_INTENT, intent_json)
|
||||
await add_ws_event("intent", intent_json)
|
||||
|
||||
if not no_hass:
|
||||
# Send intent to Home Assistant
|
||||
@@ -745,8 +782,6 @@ async def api_speech_to_intent() -> Response:
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
last_voice_wav: Optional[bytes] = None
|
||||
|
||||
|
||||
@app.route("/api/start-recording", methods=["POST"])
|
||||
async def api_start_recording() -> str:
|
||||
@@ -775,12 +810,18 @@ async def api_stop_recording() -> Response:
|
||||
text = transcription.text
|
||||
logger.debug(text)
|
||||
|
||||
# Send to websocket
|
||||
await add_ws_event(
|
||||
"transcription",
|
||||
json.dumps({"text": text, "wakewordId": "default", "siteId": core.siteId}),
|
||||
)
|
||||
|
||||
intent = (await core.recognize_intent(text)).intent
|
||||
intent["speech_confidence"] = transcription.confidence
|
||||
|
||||
intent_json = json.dumps(intent)
|
||||
logger.debug(intent_json)
|
||||
await add_ws_event(WS_EVENT_INTENT, intent_json)
|
||||
await add_ws_event("intent", intent_json)
|
||||
|
||||
if not no_hass:
|
||||
# Send intent to Home Assistant
|
||||
@@ -1115,26 +1156,26 @@ async def swagger_yaml() -> Response:
|
||||
# WebSocket API
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
WS_EVENT_INTENT = 0
|
||||
WS_EVENT_LOG = 1
|
||||
|
||||
ws_queues: List[List[asyncio.Queue]] = [[], []]
|
||||
ws_locks: List[asyncio.Lock] = [asyncio.Lock(), asyncio.Lock()]
|
||||
user_queues: Set[asyncio.Queue] = set()
|
||||
logging_queues: Set[asyncio.Queue] = set()
|
||||
|
||||
|
||||
async def add_ws_event(event_type: int, text: str):
|
||||
"""Send text out to all websockets for a specific event."""
|
||||
async with ws_locks[event_type]:
|
||||
for q in ws_queues[event_type]:
|
||||
await q.put(text)
|
||||
async def add_ws_event(message_type: str, text: str):
|
||||
"""Send text out to all user websockets for a specific event."""
|
||||
for q in user_queues:
|
||||
await q.put((message_type, text))
|
||||
|
||||
|
||||
async def log_ws_event(text: str):
|
||||
"""Send logging message out to websockets."""
|
||||
for q in logging_queues:
|
||||
await q.put(text)
|
||||
|
||||
|
||||
# Send logging messages out to websocket
|
||||
logging.root.addHandler(
|
||||
FunctionLoggingHandler(
|
||||
lambda msg: asyncio.run_coroutine_threadsafe(
|
||||
add_ws_event(WS_EVENT_LOG, msg), loop
|
||||
)
|
||||
lambda msg: asyncio.run_coroutine_threadsafe(log_ws_event(msg), loop)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -1144,6 +1185,8 @@ class WebSocketObserver(RhasspyActor):
|
||||
|
||||
def in_started(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in started state."""
|
||||
global last_voice_wav
|
||||
|
||||
if isinstance(message, IntentRecognized):
|
||||
# Add slots
|
||||
intent_slots = {}
|
||||
@@ -1155,29 +1198,75 @@ class WebSocketObserver(RhasspyActor):
|
||||
# Convert to JSON
|
||||
intent_json = json.dumps(message.intent)
|
||||
self._logger.debug(intent_json)
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
add_ws_event(WS_EVENT_INTENT, intent_json), loop
|
||||
asyncio.run_coroutine_threadsafe(add_ws_event("intent", intent_json), loop)
|
||||
elif isinstance(message, WakeWordDetected):
|
||||
assert core is not None
|
||||
wake_json = json.dumps({"wakewordId": message.name, "siteId": core.siteId})
|
||||
asyncio.run_coroutine_threadsafe(add_ws_event("wake", wake_json), loop)
|
||||
elif isinstance(message, WavTranscription):
|
||||
assert core is not None
|
||||
transcription_json = json.dumps(
|
||||
{
|
||||
"text": message.text,
|
||||
"wakewordId": message.wakewordId,
|
||||
"siteId": core.siteId,
|
||||
}
|
||||
)
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
add_ws_event("transcription_json", transcription_json), loop
|
||||
)
|
||||
elif isinstance(message, VoiceCommand):
|
||||
# Save last voice command
|
||||
last_voice_wav = buffer_to_wav(message.data)
|
||||
|
||||
|
||||
def api_websocket(func):
|
||||
"""Wraps a websocket route to use a user websocket queue"""
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(*_args, **kwargs):
|
||||
global user_queues
|
||||
queue = asyncio.Queue()
|
||||
user_queues.add(queue)
|
||||
try:
|
||||
return await func(queue, *_args, **kwargs)
|
||||
except Exception:
|
||||
logger.exception("api_websocket")
|
||||
finally:
|
||||
user_queues.discard(queue)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@app.websocket("/api/events/intent")
|
||||
async def api_events_intent() -> None:
|
||||
@api_websocket
|
||||
async def api_events_intent(queue) -> None:
|
||||
"""Websocket endpoint to receive intents as JSON."""
|
||||
# Add new queue for websocket
|
||||
q: asyncio.Queue = asyncio.Queue()
|
||||
async with ws_locks[WS_EVENT_INTENT]:
|
||||
ws_queues[WS_EVENT_INTENT].append(q)
|
||||
|
||||
try:
|
||||
while True:
|
||||
text = await q.get()
|
||||
while True:
|
||||
message_type, text = await queue.get()
|
||||
if message_type == "intent":
|
||||
await websocket.send(text)
|
||||
except Exception:
|
||||
logger.exception("api_events_intent")
|
||||
|
||||
# Remove queue
|
||||
async with ws_locks[WS_EVENT_INTENT]:
|
||||
ws_queues[WS_EVENT_INTENT].remove(q)
|
||||
|
||||
@app.websocket("/api/events/text")
|
||||
@api_websocket
|
||||
async def api_events_text(queue) -> None:
|
||||
"""Websocket endpoint for transcriptions."""
|
||||
while True:
|
||||
message_type, text = await queue.get()
|
||||
if message_type == "transcription":
|
||||
await websocket.send(text)
|
||||
|
||||
|
||||
@app.websocket("/api/events/wake")
|
||||
@api_websocket
|
||||
async def api_events_wake(queue) -> None:
|
||||
"""Websocket endpoint to report wake up."""
|
||||
while True:
|
||||
message_type, text = await queue.get()
|
||||
if message_type == "wake":
|
||||
await websocket.send(text)
|
||||
|
||||
|
||||
@app.websocket("/api/events/log")
|
||||
@@ -1185,8 +1274,7 @@ async def api_events_log() -> None:
|
||||
"""Websocket endpoint to receive logging messages as text."""
|
||||
# Add new queue for websocket
|
||||
q: asyncio.Queue = asyncio.Queue()
|
||||
async with ws_locks[WS_EVENT_LOG]:
|
||||
ws_queues[WS_EVENT_LOG].append(q)
|
||||
logging_queues.add(q)
|
||||
|
||||
try:
|
||||
while True:
|
||||
@@ -1196,8 +1284,7 @@ async def api_events_log() -> None:
|
||||
pass
|
||||
|
||||
# Remove queue
|
||||
async with ws_locks[WS_EVENT_LOG]:
|
||||
ws_queues[WS_EVENT_LOG].remove(q)
|
||||
logging_queues.discard(q)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
+2
-1
@@ -53,7 +53,8 @@ Application authors may want to use the [rhasspy-client](https://pypi.org/projec
|
||||
* `?timeout=<seconds>` - override default command timeout
|
||||
* `?entity=<entity>&value=<value>` - set custom entity/value in recognized intent
|
||||
* `/api/listen-for-wake-word`
|
||||
* POST to wake Rhasspy up and return immediately
|
||||
* POST "on" to have Rhasspy listen for a wake word
|
||||
* POST "off" to disable wake word
|
||||
* `/api/lookup`
|
||||
* POST word as plain text to look up or guess pronunciation
|
||||
* `?n=<number>` - return at most `n` guessed pronunciations
|
||||
|
||||
@@ -29,6 +29,19 @@ Add to your [profile](profiles.md):
|
||||
|
||||
Remove the `voice` option to have `espeak` use your profile's language automatically.
|
||||
|
||||
You may also pass additional arguments to the `espeak` command. For example,
|
||||
|
||||
```json
|
||||
"text_to_speech": {
|
||||
"system": "espeak",
|
||||
"espeak": {
|
||||
"arguments": ["-s", "80"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
will speak the sentence more slowly.
|
||||
|
||||
See `rhasspy.tts.EspeakSentenceSpeaker` for more details.
|
||||
|
||||
## Flite
|
||||
|
||||
@@ -2,6 +2,11 @@
|
||||
|
||||
* [RGB Light Example](#rgb-light-example)
|
||||
* [Client/Server Setup](#clientserver-setup)
|
||||
* MATRIX Labs
|
||||
* [Rhasspy Voice Assistant on MATRIX Voice and MATRIX Creator](https://www.hackster.io/matrix-labs/rhasspy-voice-assistant-on-matrix-voice-and-matrix-creator-97f92e)
|
||||
* [Adding Intents for Rhasspy Offline Voice Assistant](https://www.hackster.io/matrix-labs/adding-intents-for-rhasspy-offline-voice-assistant-faa221)
|
||||
* Rendered Obsolete
|
||||
* [Home Assistant Voice Recognition with Rhasspy](https://rendered-obsolete.github.io/2020/01/02/rhasspy.html)
|
||||
|
||||
## RGB Light Example
|
||||
|
||||
|
||||
+39
-1
@@ -142,7 +142,18 @@ More example flows are available [on Github](https://github.com/synesthesiam/rha
|
||||
|
||||
### WebSocket Events
|
||||
|
||||
Whenever a voice command is recognized, Rhasspy emits JSON events over a websocket connection available at `ws://rhasspy:12101/api/events/intent` (replace `ws://` with `wss://` if you're using [secure hosting](usage.md#secure-hosting-with-https)).
|
||||
Rhasspy supports multiple websocket event endpoints:
|
||||
|
||||
* `/api/events/intent`
|
||||
* Intent recognized or not
|
||||
* `/api/events/wake`
|
||||
* Wake word detected
|
||||
* `/api/events/text`
|
||||
* Speech transcription
|
||||
|
||||
#### WebSocket Intents
|
||||
|
||||
Whenever a voice command is recognized, Rhasspy emits JSON events over a websocket connection available at `ws://YOUR_SERVER:12101/api/events/intent` (replace `ws://` with `wss://` if you're using [secure hosting](usage.md#secure-hosting-with-https)).
|
||||
You can listen to these events in a [Node-RED](https://nodered.org) flow, and easily add offline, private voice commands to your home automation set up!
|
||||
|
||||
For the `ChangLightState` intent from the [RGB Light Example](index.md#rgb-light-example), Rhasspy will emit a JSON event like this over the websocket:
|
||||
@@ -171,6 +182,33 @@ For the `ChangLightState` intent from the [RGB Light Example](index.md#rgb-light
|
||||
}
|
||||
```
|
||||
|
||||
#### WebSocket Wake
|
||||
|
||||
When the wake word is detected, or Rhasspy is woken up via the `/api/listen-for-command` HTTP endpoint, a JSON event is emitted at `ws://YOUR_SERVER:12101/api/events/wake` (`wss://` if using HTTPS) like:
|
||||
|
||||
```json
|
||||
{
|
||||
"wakewordId": "default",
|
||||
"siteId": "default"
|
||||
}
|
||||
```
|
||||
|
||||
The `wakewordId` is set using the model or file name of your wakeword model (e.g., `porcupine` for `porcupine.ppn`). The `siteId` comes from your `mqtt.siteId` profile setting.
|
||||
|
||||
#### WebSocket Transcriptions
|
||||
|
||||
Each time a voice command is transcribed, Rhasspy emits a JSON event at `ws://YOUR_SERVER:12101/api/events/text` (`wss://` if using HTTPS) like:
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "text from voice command",
|
||||
"wakewordId": "default",
|
||||
"siteId": "default"
|
||||
}
|
||||
```
|
||||
|
||||
The transcription is contained in the `text` property. `wakewordId` is the id of the wakeword that initiated the voice command (or `default`). The `siteId` comes from your `mqtt.siteId` profile setting.
|
||||
|
||||
## MQTT and Snips
|
||||
|
||||
Rhasspy is able to interoperate with Snips.AI services using the [Hermes protocol](https://docs.snips.ai/reference/hermes) over [MQTT](http://mqtt.org). The following components are Snips/Hermes compatible:
|
||||
|
||||
@@ -13,6 +13,11 @@ body {
|
||||
z-index: 9999;
|
||||
}
|
||||
|
||||
#logo {
|
||||
border-color: red;
|
||||
border-width: 0;
|
||||
}
|
||||
|
||||
.response {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
+2
-2
@@ -4,7 +4,7 @@ doit==0.31.1
|
||||
fuzzywuzzy[speedup]==0.17.0
|
||||
google-cloud-texttospeech==0.5.0
|
||||
html5lib==1.0.1
|
||||
json5==0.8.5
|
||||
json5==0.7.0
|
||||
multidict==4.6.1
|
||||
networkx>=2.0
|
||||
num2words==0.5.10
|
||||
@@ -15,6 +15,6 @@ pydash==4.7.6
|
||||
quart==0.6.15
|
||||
quart-cors==0.1.3
|
||||
requests==2.22.0
|
||||
rhasspy-nlu==0.1.4.1
|
||||
rhasspy-nlu==0.1.6
|
||||
swagger-ui-py==0.1.7
|
||||
webrtcvad==2.0.10
|
||||
|
||||
+1
-1
@@ -618,7 +618,7 @@ async def wav2mqtt(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
async def text2wav(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Speak a sentence and output WAV data"""
|
||||
result = await core.speak_sentence(args)
|
||||
result = await core.speak_sentence(args.sentence)
|
||||
sys.stdout.buffer.write(result.wav_data)
|
||||
|
||||
|
||||
|
||||
+20
-6
@@ -39,6 +39,7 @@ from rhasspy.events import (
|
||||
SentenceSpoken,
|
||||
SpeakSentence,
|
||||
SpeakWord,
|
||||
StopListeningForWakeWord,
|
||||
StartRecordingToBuffer,
|
||||
StopRecordingToBuffer,
|
||||
TestMicrophones,
|
||||
@@ -88,7 +89,7 @@ class RhasspyCore:
|
||||
self._session: Optional[aiohttp.ClientSession] = aiohttp.ClientSession()
|
||||
self.dialogue_manager: Optional[RhasspyActor] = None
|
||||
|
||||
self.download_status: typing.List[str] = []
|
||||
self.download_status: List[str] = []
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
@@ -98,6 +99,14 @@ class RhasspyCore:
|
||||
assert self._session is not None
|
||||
return self._session
|
||||
|
||||
@property
|
||||
def siteId(self) -> str:
|
||||
"""Get default MQTT siteId"""
|
||||
try:
|
||||
siteIds = self.profile.get("mqtt.siteId", "default").split(",")[0]
|
||||
except Exception:
|
||||
return "default"
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
async def start(
|
||||
@@ -162,10 +171,14 @@ class RhasspyCore:
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def listen_for_wake(self) -> None:
|
||||
def listen_for_wake(self, enabled: bool = True) -> None:
|
||||
"""Tell Rhasspy to start listening for a wake word."""
|
||||
assert self.actor_system is not None
|
||||
self.actor_system.tell(self.dialogue_manager, ListenForWakeWord())
|
||||
|
||||
if enabled:
|
||||
self.actor_system.tell(self.dialogue_manager, ListenForWakeWord())
|
||||
else:
|
||||
self.actor_system.tell(self.dialogue_manager, StopListeningForWakeWord())
|
||||
|
||||
async def listen_for_command(
|
||||
self,
|
||||
@@ -346,9 +359,10 @@ class RhasspyCore:
|
||||
"""Generate speech/intent artifacts for profile."""
|
||||
if no_cache:
|
||||
# Delete doit database
|
||||
db_path = Path(self.profile.write_path(".doit.db"))
|
||||
if db_path.is_file():
|
||||
db_path.unlink()
|
||||
profile_dir = Path(self.profile.write_path())
|
||||
for db_path in profile_dir.glob(".doit.db*"):
|
||||
if db_path.is_file():
|
||||
db_path.unlink()
|
||||
|
||||
assert self.actor_system is not None
|
||||
with self.actor_system.private() as sys:
|
||||
|
||||
+26
-1
@@ -386,6 +386,10 @@ class DialogueManager(RhasspyActor):
|
||||
for hook_url in awake_hooks:
|
||||
self._logger.debug("POST-ing to %s", hook_url)
|
||||
requests.post(hook_url, json=hook_json)
|
||||
|
||||
# Forward to observer
|
||||
if self.observer:
|
||||
self.send(self.observer, message)
|
||||
elif isinstance(message, WakeWordNotDetected):
|
||||
self._logger.debug("Wake word NOT detected. Staying asleep.")
|
||||
self.transition("ready")
|
||||
@@ -423,6 +427,10 @@ class DialogueManager(RhasspyActor):
|
||||
wav_data = buffer_to_wav(message.data)
|
||||
self.send(self.decoder, TranscribeWav(wav_data, handle=message.handle))
|
||||
self.transition("decoding")
|
||||
|
||||
# Forward to observer
|
||||
if self.observer:
|
||||
self.send(self.observer, message)
|
||||
else:
|
||||
self.handle_any(message, sender)
|
||||
|
||||
@@ -433,6 +441,15 @@ class DialogueManager(RhasspyActor):
|
||||
def in_decoding(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in decoding state."""
|
||||
if isinstance(message, WavTranscription):
|
||||
message.wakewordId = self.wake_detected_name or "default"
|
||||
|
||||
# Fix casing
|
||||
dict_casing = self.profile.get("speech_to_text.dictionary_casing", "")
|
||||
if dict_casing == "lower":
|
||||
message.text = message.text.lower()
|
||||
elif dict_casing == "upper":
|
||||
message.text = message.text.upper()
|
||||
|
||||
# text -> intent
|
||||
self._logger.debug("%s (confidence=%s)", message.text, message.confidence)
|
||||
|
||||
@@ -447,7 +464,8 @@ class DialogueManager(RhasspyActor):
|
||||
"text": message.text,
|
||||
"likelihood": 1,
|
||||
"seconds": 0,
|
||||
"wakeId": self.wake_detected_name or "",
|
||||
"wakeId": message.wakewordId,
|
||||
"wakewordId": message.wakewordId,
|
||||
}
|
||||
).encode()
|
||||
|
||||
@@ -460,6 +478,10 @@ class DialogueManager(RhasspyActor):
|
||||
)
|
||||
self.send(self.mqtt, MqttPublish("hermes/asr/textCaptured", payload))
|
||||
|
||||
# Forward to observer
|
||||
if self.observer:
|
||||
self.send(self.observer, message)
|
||||
|
||||
# Pass to intent recognizer
|
||||
self.send(
|
||||
self.recognizer,
|
||||
@@ -732,6 +754,9 @@ class DialogueManager(RhasspyActor):
|
||||
elif isinstance(message, GetProblems):
|
||||
# Report problems from child actors
|
||||
self.send(sender, Problems(self.problems))
|
||||
elif isinstance(message, (ListenForWakeWord, StopListeningForWakeWord)):
|
||||
# Forward to wake actor
|
||||
self.send(self.wake, message)
|
||||
else:
|
||||
self.handle_forward(message, sender)
|
||||
|
||||
|
||||
+8
-1
@@ -390,10 +390,17 @@ class TranscribeWav:
|
||||
class WavTranscription:
|
||||
"""Response to TranscribeWav."""
|
||||
|
||||
def __init__(self, text: str, handle: bool = True, confidence: float = 1) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
text: str,
|
||||
handle: bool = True,
|
||||
confidence: float = 1,
|
||||
wakewordId: str = "default",
|
||||
) -> None:
|
||||
self.text = text
|
||||
self.confidence = confidence
|
||||
self.handle = handle
|
||||
self.wakewordId = wakewordId
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
"""Training for intent recognizers."""
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
from collections import Counter, defaultdict
|
||||
from io import StringIO
|
||||
from typing import Any, Callable, Dict, List, Set, Type
|
||||
@@ -14,7 +10,7 @@ from urllib.parse import urljoin
|
||||
|
||||
from rhasspy.actor import RhasspyActor
|
||||
from rhasspy.events import IntentTrainingComplete, IntentTrainingFailed, TrainIntent
|
||||
from rhasspy.utils import lcm, make_sentences_by_intent, load_converters
|
||||
from rhasspy.utils import make_sentences_by_intent, load_converters
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -257,7 +257,7 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
n = int(match.group(1))
|
||||
|
||||
# 75 -> (seventy five):75!int
|
||||
number_text = num2words(n, lang=language).replace("-", " ").strip()
|
||||
number_text = re.sub(r"[-,]\s*", " ", num2words(n, lang=language)).strip()
|
||||
assert number_text, f"Empty num2words result for {n}"
|
||||
number_words = number_text.split()
|
||||
|
||||
@@ -526,6 +526,13 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
for word in read_dict(dict_file):
|
||||
print(word, file=vocab_file)
|
||||
|
||||
if profile.get("wake.system", "dummy") == "pocketsphinx":
|
||||
# Add words from Pocketsphinx wake keyphrase
|
||||
keyphrase = profile.get("wake.pocketsphinx.keyphrase", "")
|
||||
if keyphrase:
|
||||
for word in re.split(r"\s+", keyphrase):
|
||||
print(word, file=vocab_file)
|
||||
|
||||
@create_after(executed="language_model")
|
||||
def task_vocab():
|
||||
"""Writes all vocabulary words to a file from intent.fst."""
|
||||
|
||||
+3
-1
@@ -94,6 +94,7 @@ class EspeakSentenceSpeaker(RhasspyActor):
|
||||
self.disable_wake = True
|
||||
self.enable_wake = False
|
||||
self.wake: Optional[RhasspyActor] = None
|
||||
self.espeak_args: List[str] = []
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
@@ -104,6 +105,7 @@ class EspeakSentenceSpeaker(RhasspyActor):
|
||||
self.wake = self.config.get("wake")
|
||||
self.wake_on_start = self.profile.get("rhasspy.listen_on_start", False)
|
||||
self.disable_wake = self.profile.get("text_to_speech.disable_wake", True)
|
||||
self.espeak_args = list(self.profile.get("text_to_speech.espeak.arguments", []))
|
||||
self.transition("ready")
|
||||
|
||||
def in_ready(self, message: Any, sender: RhasspyActor) -> None:
|
||||
@@ -143,7 +145,7 @@ class EspeakSentenceSpeaker(RhasspyActor):
|
||||
def speak(self, sentence: str, voice: Optional[str] = None) -> bytes:
|
||||
"""Get WAV buffer for sentence."""
|
||||
try:
|
||||
espeak_cmd = ["espeak"]
|
||||
espeak_cmd = ["espeak"] + self.espeak_args
|
||||
if voice:
|
||||
espeak_cmd.extend(["-v", str(voice)])
|
||||
|
||||
|
||||
+1
-1
@@ -407,7 +407,7 @@ def numbers_to_words(sentence: str, language: Optional[str] = None) -> str:
|
||||
number = float(word)
|
||||
|
||||
# 75 -> seventy-five -> seventy five
|
||||
words[i] = num2words(number, lang=language).replace("-", " ")
|
||||
words[i] = re.sub(r"[-,]\s*", " ", num2words(number, lang=language))
|
||||
changed = True
|
||||
except ValueError:
|
||||
pass # not a number
|
||||
|
||||
+10
-7
@@ -227,18 +227,19 @@ class PocketsphinxWakeListener(RhasspyActor):
|
||||
self.keyphrase = self.profile.get("wake.pocketsphinx.keyphrase", "")
|
||||
assert self.keyphrase, "No wake keyphrase"
|
||||
|
||||
# Fix casing
|
||||
dict_casing = self.profile.get("speech_to_text.dictionary_casing", "")
|
||||
if dict_casing == "lower":
|
||||
self.keyphrase = self.keyphrase.lower()
|
||||
elif dict_casing == "upper":
|
||||
self.keyphrase = self.keyphrase.upper()
|
||||
|
||||
# Verify that keyphrase words are in dictionary
|
||||
keyphrase_words = re.split(r"\s+", self.keyphrase)
|
||||
with open(dict_path, "r") as dict_file:
|
||||
word_dict = read_dict(dict_file)
|
||||
|
||||
dict_upper = self.profile.get("speech_to_text.dictionary_upper", False)
|
||||
for word in keyphrase_words:
|
||||
if dict_upper:
|
||||
word = word.upper()
|
||||
else:
|
||||
word = word.lower()
|
||||
|
||||
if word not in word_dict:
|
||||
self._logger.warning("%s not in dictionary", word)
|
||||
|
||||
@@ -570,7 +571,9 @@ class PreciseWakeListener(RhasspyActor):
|
||||
self.prediction_sem = threading.Semaphore()
|
||||
for _ in range(num_chunks):
|
||||
chunk = self.audio_buffer[: self.chunk_size]
|
||||
self.stream.write(chunk)
|
||||
if chunk:
|
||||
self.stream.write(chunk)
|
||||
|
||||
self.audio_buffer = self.audio_buffer[self.chunk_size :]
|
||||
|
||||
if self.send_not_detected:
|
||||
|
||||
+28
-4
@@ -3,7 +3,7 @@
|
||||
<!-- Top Bar -->
|
||||
<nav class="navbar navbar-expand-sm navbar-dark bg-dark fixed-top">
|
||||
<a href="/">
|
||||
<img class="navbar-brand" v-bind:class="spinnerClass" src="/img/logo.png">
|
||||
<img id="logo" class="navbar-brand" v-bind:class="spinnerClass" src="/img/logo.png">
|
||||
</a>
|
||||
|
||||
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
|
||||
@@ -191,7 +191,9 @@
|
||||
|
||||
version: '',
|
||||
|
||||
downloadStatus: ''
|
||||
downloadStatus: '',
|
||||
|
||||
wakeSocket: null
|
||||
}
|
||||
},
|
||||
|
||||
@@ -215,8 +217,8 @@
|
||||
this.alertText = text
|
||||
this.alertClass = 'alert-' + level
|
||||
|
||||
// Hide alert after 10 seconds
|
||||
setTimeout(this.clearAlert, 10000)
|
||||
// Hide alert after 20 seconds
|
||||
setTimeout(this.clearAlert, 20000)
|
||||
},
|
||||
|
||||
beginAsync: function() {
|
||||
@@ -365,6 +367,27 @@
|
||||
if (this.downloading) {
|
||||
setTimeout(this.updateDownloadStatus, 1000)
|
||||
}
|
||||
},
|
||||
|
||||
connectWakeSocket: function() {
|
||||
// Connect to /api/events/intent websocket
|
||||
var wsProtocol = 'ws://'
|
||||
if (window.location.protocol == 'https:') {
|
||||
wsProtocol = 'wss://'
|
||||
}
|
||||
|
||||
var wsURL = wsProtocol + window.location.host + '/api/events/wake'
|
||||
this.wakeSocket = new WebSocket(wsURL)
|
||||
this.wakeSocket.onmessage = (evt) => {
|
||||
$('#logo').css('filter', 'invert()')
|
||||
setTimeout(() => {
|
||||
$('#logo').css('filter', 'initial')
|
||||
}, 2000)
|
||||
}
|
||||
this.wakeSocket.onclose = () => {
|
||||
// Try to reconnect
|
||||
setTimeout(this.connectWakeSocket, 1000)
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -376,6 +399,7 @@
|
||||
this.getCustomWords()
|
||||
this.getUnknownWords()
|
||||
this.getProblems()
|
||||
this.connectWakeSocket()
|
||||
this.$options.sockets.onmessage = function(event) {
|
||||
this.rhasspyLog = event.data + '\n' + this.rhasspyLog
|
||||
}
|
||||
|
||||
@@ -136,7 +136,9 @@
|
||||
audioContext: null,
|
||||
recorder: null,
|
||||
|
||||
sendHass: true
|
||||
sendHass: true,
|
||||
|
||||
intentSocket: null
|
||||
}
|
||||
},
|
||||
|
||||
@@ -276,7 +278,30 @@
|
||||
playLastVoiceCommand: function(event) {
|
||||
TranscribeService.playRecording()
|
||||
.catch(err => this.$parent.error(err))
|
||||
},
|
||||
|
||||
connectIntentSocket: function() {
|
||||
// Connect to /api/events/intent websocket
|
||||
var wsProtocol = 'ws://'
|
||||
if (window.location.protocol == 'https:') {
|
||||
wsProtocol = 'wss://'
|
||||
}
|
||||
|
||||
var wsURL = wsProtocol + window.location.host + '/api/events/intent'
|
||||
this.intentSocket = new WebSocket(wsURL)
|
||||
this.intentSocket.onmessage = (evt) => {
|
||||
this.jsonSource = JSON.parse(evt.data)
|
||||
this.sentence = this.jsonSource.raw_text
|
||||
}
|
||||
this.intentSocket.onclose = () => {
|
||||
// Try to reconnect
|
||||
setTimeout(this.connectIntentSocket, 1000)
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
mounted: function() {
|
||||
this.connectIntentSocket()
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
Reference in New Issue
Block a user